Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg...
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 91static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 94 unsigned pref);
efa2cea0 95static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
96 const struct in6_addr *prefix, int prefixlen,
97 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
0972ddb2
HB
156static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157 unsigned long old)
158{
159 return NULL;
160}
161
14e50e57
DM
162static struct dst_ops ip6_dst_blackhole_ops = {
163 .family = AF_INET6,
09640e63 164 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
165 .destroy = ip6_dst_destroy,
166 .check = ip6_dst_check,
ec831ea7 167 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 168 .default_advmss = ip6_default_advmss,
14e50e57 169 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 170 .cow_metrics = ip6_rt_blackhole_cow_metrics,
14e50e57
DM
171};
172
62fa8a84
DM
173static const u32 ip6_template_metrics[RTAX_MAX] = {
174 [RTAX_HOPLIMIT - 1] = 255,
175};
176
bdb3289f 177static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
178 .dst = {
179 .__refcnt = ATOMIC_INIT(1),
180 .__use = 1,
181 .obsolete = -1,
182 .error = -ENETUNREACH,
d8d1f30b
CG
183 .input = ip6_pkt_discard,
184 .output = ip6_pkt_discard_out,
1da177e4
LT
185 },
186 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 187 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
190};
191
101367c2
TG
192#ifdef CONFIG_IPV6_MULTIPLE_TABLES
193
6723ab54
DM
194static int ip6_pkt_prohibit(struct sk_buff *skb);
195static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 196
280a34c8 197static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
198 .dst = {
199 .__refcnt = ATOMIC_INIT(1),
200 .__use = 1,
201 .obsolete = -1,
202 .error = -EACCES,
d8d1f30b
CG
203 .input = ip6_pkt_prohibit,
204 .output = ip6_pkt_prohibit_out,
101367c2
TG
205 },
206 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 207 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
208 .rt6i_metric = ~(u32) 0,
209 .rt6i_ref = ATOMIC_INIT(1),
210};
211
bdb3289f 212static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
213 .dst = {
214 .__refcnt = ATOMIC_INIT(1),
215 .__use = 1,
216 .obsolete = -1,
217 .error = -EINVAL,
d8d1f30b
CG
218 .input = dst_discard,
219 .output = dst_discard,
101367c2
TG
220 },
221 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 222 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
223 .rt6i_metric = ~(u32) 0,
224 .rt6i_ref = ATOMIC_INIT(1),
225};
226
227#endif
228
1da177e4 229/* allocate dst with ip6_dst_ops */
5c1e6aa3
DM
230static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
231 struct net_device *dev)
1da177e4 232{
cf911662
DM
233 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0);
234
235 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
236
237 return rt;
1da177e4
LT
238}
239
240static void ip6_dst_destroy(struct dst_entry *dst)
241{
242 struct rt6_info *rt = (struct rt6_info *)dst;
243 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 244 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
245
246 if (idev != NULL) {
247 rt->rt6i_idev = NULL;
248 in6_dev_put(idev);
1ab1457c 249 }
b3419363 250 if (peer) {
b3419363
DM
251 rt->rt6i_peer = NULL;
252 inet_putpeer(peer);
253 }
254}
255
6431cbc2
DM
256static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
257
258static u32 rt6_peer_genid(void)
259{
260 return atomic_read(&__rt6_peer_genid);
261}
262
b3419363
DM
263void rt6_bind_peer(struct rt6_info *rt, int create)
264{
265 struct inet_peer *peer;
266
b3419363
DM
267 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
268 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
269 inet_putpeer(peer);
6431cbc2
DM
270 else
271 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
272}
273
274static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
275 int how)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 279 struct net_device *loopback_dev =
c346dca1 280 dev_net(dev)->loopback_dev;
1da177e4 281
5a3e55d6
DL
282 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
283 struct inet6_dev *loopback_idev =
284 in6_dev_get(loopback_dev);
1da177e4
LT
285 if (loopback_idev != NULL) {
286 rt->rt6i_idev = loopback_idev;
287 in6_dev_put(idev);
288 }
289 }
290}
291
292static __inline__ int rt6_check_expired(const struct rt6_info *rt)
293{
a02cec21
ED
294 return (rt->rt6i_flags & RTF_EXPIRES) &&
295 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
296}
297
b71d1d42 298static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 299{
a02cec21
ED
300 return ipv6_addr_type(daddr) &
301 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
302}
303
1da177e4 304/*
c71099ac 305 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
306 */
307
8ed67789
DL
308static inline struct rt6_info *rt6_device_match(struct net *net,
309 struct rt6_info *rt,
b71d1d42 310 const struct in6_addr *saddr,
1da177e4 311 int oif,
d420895e 312 int flags)
1da177e4
LT
313{
314 struct rt6_info *local = NULL;
315 struct rt6_info *sprt;
316
dd3abc4e
YH
317 if (!oif && ipv6_addr_any(saddr))
318 goto out;
319
d8d1f30b 320 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
321 struct net_device *dev = sprt->rt6i_dev;
322
323 if (oif) {
1da177e4
LT
324 if (dev->ifindex == oif)
325 return sprt;
326 if (dev->flags & IFF_LOOPBACK) {
327 if (sprt->rt6i_idev == NULL ||
328 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 329 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 330 continue;
1ab1457c 331 if (local && (!oif ||
1da177e4
LT
332 local->rt6i_idev->dev->ifindex == oif))
333 continue;
334 }
335 local = sprt;
336 }
dd3abc4e
YH
337 } else {
338 if (ipv6_chk_addr(net, saddr, dev,
339 flags & RT6_LOOKUP_F_IFACE))
340 return sprt;
1da177e4 341 }
dd3abc4e 342 }
1da177e4 343
dd3abc4e 344 if (oif) {
1da177e4
LT
345 if (local)
346 return local;
347
d420895e 348 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 349 return net->ipv6.ip6_null_entry;
1da177e4 350 }
dd3abc4e 351out:
1da177e4
LT
352 return rt;
353}
354
27097255
YH
355#ifdef CONFIG_IPV6_ROUTER_PREF
356static void rt6_probe(struct rt6_info *rt)
357{
358 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
359 /*
360 * Okay, this does not seem to be appropriate
361 * for now, however, we need to check if it
362 * is really so; aka Router Reachability Probing.
363 *
364 * Router Reachability Probe MUST be rate-limited
365 * to no more than one per minute.
366 */
367 if (!neigh || (neigh->nud_state & NUD_VALID))
368 return;
369 read_lock_bh(&neigh->lock);
370 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 371 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
372 struct in6_addr mcaddr;
373 struct in6_addr *target;
374
375 neigh->updated = jiffies;
376 read_unlock_bh(&neigh->lock);
377
378 target = (struct in6_addr *)&neigh->primary_key;
379 addrconf_addr_solict_mult(target, &mcaddr);
380 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
381 } else
382 read_unlock_bh(&neigh->lock);
383}
384#else
385static inline void rt6_probe(struct rt6_info *rt)
386{
27097255
YH
387}
388#endif
389
1da177e4 390/*
554cfb7e 391 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 392 */
b6f99a21 393static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
394{
395 struct net_device *dev = rt->rt6i_dev;
161980f4 396 if (!oif || dev->ifindex == oif)
554cfb7e 397 return 2;
161980f4
DM
398 if ((dev->flags & IFF_LOOPBACK) &&
399 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
400 return 1;
401 return 0;
554cfb7e 402}
1da177e4 403
b6f99a21 404static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 405{
554cfb7e 406 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 407 int m;
4d0c5911
YH
408 if (rt->rt6i_flags & RTF_NONEXTHOP ||
409 !(rt->rt6i_flags & RTF_GATEWAY))
410 m = 1;
411 else if (neigh) {
554cfb7e
YH
412 read_lock_bh(&neigh->lock);
413 if (neigh->nud_state & NUD_VALID)
4d0c5911 414 m = 2;
398bcbeb
YH
415#ifdef CONFIG_IPV6_ROUTER_PREF
416 else if (neigh->nud_state & NUD_FAILED)
417 m = 0;
418#endif
419 else
ea73ee23 420 m = 1;
554cfb7e 421 read_unlock_bh(&neigh->lock);
398bcbeb
YH
422 } else
423 m = 0;
554cfb7e 424 return m;
1da177e4
LT
425}
426
554cfb7e
YH
427static int rt6_score_route(struct rt6_info *rt, int oif,
428 int strict)
1da177e4 429{
4d0c5911 430 int m, n;
1ab1457c 431
4d0c5911 432 m = rt6_check_dev(rt, oif);
77d16f45 433 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 434 return -1;
ebacaaa0
YH
435#ifdef CONFIG_IPV6_ROUTER_PREF
436 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
437#endif
4d0c5911 438 n = rt6_check_neigh(rt);
557e92ef 439 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
440 return -1;
441 return m;
442}
443
f11e6659
DM
444static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
445 int *mpri, struct rt6_info *match)
554cfb7e 446{
f11e6659
DM
447 int m;
448
449 if (rt6_check_expired(rt))
450 goto out;
451
452 m = rt6_score_route(rt, oif, strict);
453 if (m < 0)
454 goto out;
455
456 if (m > *mpri) {
457 if (strict & RT6_LOOKUP_F_REACHABLE)
458 rt6_probe(match);
459 *mpri = m;
460 match = rt;
461 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
462 rt6_probe(rt);
463 }
464
465out:
466 return match;
467}
468
469static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
470 struct rt6_info *rr_head,
471 u32 metric, int oif, int strict)
472{
473 struct rt6_info *rt, *match;
554cfb7e 474 int mpri = -1;
1da177e4 475
f11e6659
DM
476 match = NULL;
477 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 478 rt = rt->dst.rt6_next)
f11e6659
DM
479 match = find_match(rt, oif, strict, &mpri, match);
480 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 481 rt = rt->dst.rt6_next)
f11e6659 482 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 483
f11e6659
DM
484 return match;
485}
1da177e4 486
f11e6659
DM
487static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
488{
489 struct rt6_info *match, *rt0;
8ed67789 490 struct net *net;
1da177e4 491
f11e6659 492 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 493 __func__, fn->leaf, oif);
554cfb7e 494
f11e6659
DM
495 rt0 = fn->rr_ptr;
496 if (!rt0)
497 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 498
f11e6659 499 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 500
554cfb7e 501 if (!match &&
f11e6659 502 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 503 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 504
554cfb7e 505 /* no entries matched; do round-robin */
f11e6659
DM
506 if (!next || next->rt6i_metric != rt0->rt6i_metric)
507 next = fn->leaf;
508
509 if (next != rt0)
510 fn->rr_ptr = next;
1da177e4 511 }
1da177e4 512
f11e6659 513 RT6_TRACE("%s() => %p\n",
0dc47877 514 __func__, match);
1da177e4 515
c346dca1 516 net = dev_net(rt0->rt6i_dev);
a02cec21 517 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
518}
519
70ceb4f5
YH
520#ifdef CONFIG_IPV6_ROUTE_INFO
521int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 522 const struct in6_addr *gwaddr)
70ceb4f5 523{
c346dca1 524 struct net *net = dev_net(dev);
70ceb4f5
YH
525 struct route_info *rinfo = (struct route_info *) opt;
526 struct in6_addr prefix_buf, *prefix;
527 unsigned int pref;
4bed72e4 528 unsigned long lifetime;
70ceb4f5
YH
529 struct rt6_info *rt;
530
531 if (len < sizeof(struct route_info)) {
532 return -EINVAL;
533 }
534
535 /* Sanity check for prefix_len and length */
536 if (rinfo->length > 3) {
537 return -EINVAL;
538 } else if (rinfo->prefix_len > 128) {
539 return -EINVAL;
540 } else if (rinfo->prefix_len > 64) {
541 if (rinfo->length < 2) {
542 return -EINVAL;
543 }
544 } else if (rinfo->prefix_len > 0) {
545 if (rinfo->length < 1) {
546 return -EINVAL;
547 }
548 }
549
550 pref = rinfo->route_pref;
551 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 552 return -EINVAL;
70ceb4f5 553
4bed72e4 554 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
555
556 if (rinfo->length == 3)
557 prefix = (struct in6_addr *)rinfo->prefix;
558 else {
559 /* this function is safe */
560 ipv6_addr_prefix(&prefix_buf,
561 (struct in6_addr *)rinfo->prefix,
562 rinfo->prefix_len);
563 prefix = &prefix_buf;
564 }
565
efa2cea0
DL
566 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
567 dev->ifindex);
70ceb4f5
YH
568
569 if (rt && !lifetime) {
e0a1ad73 570 ip6_del_rt(rt);
70ceb4f5
YH
571 rt = NULL;
572 }
573
574 if (!rt && lifetime)
efa2cea0 575 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
576 pref);
577 else if (rt)
578 rt->rt6i_flags = RTF_ROUTEINFO |
579 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
580
581 if (rt) {
4bed72e4 582 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
583 rt->rt6i_flags &= ~RTF_EXPIRES;
584 } else {
585 rt->rt6i_expires = jiffies + HZ * lifetime;
586 rt->rt6i_flags |= RTF_EXPIRES;
587 }
d8d1f30b 588 dst_release(&rt->dst);
70ceb4f5
YH
589 }
590 return 0;
591}
592#endif
593
8ed67789 594#define BACKTRACK(__net, saddr) \
982f56f3 595do { \
8ed67789 596 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 597 struct fib6_node *pn; \
e0eda7bb 598 while (1) { \
982f56f3
YH
599 if (fn->fn_flags & RTN_TL_ROOT) \
600 goto out; \
601 pn = fn->parent; \
602 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 603 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
604 else \
605 fn = pn; \
606 if (fn->fn_flags & RTN_RTINFO) \
607 goto restart; \
c71099ac 608 } \
c71099ac 609 } \
982f56f3 610} while(0)
c71099ac 611
8ed67789
DL
612static struct rt6_info *ip6_pol_route_lookup(struct net *net,
613 struct fib6_table *table,
4c9483b2 614 struct flowi6 *fl6, int flags)
1da177e4
LT
615{
616 struct fib6_node *fn;
617 struct rt6_info *rt;
618
c71099ac 619 read_lock_bh(&table->tb6_lock);
4c9483b2 620 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
621restart:
622 rt = fn->leaf;
4c9483b2
DM
623 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
624 BACKTRACK(net, &fl6->saddr);
c71099ac 625out:
d8d1f30b 626 dst_use(&rt->dst, jiffies);
c71099ac 627 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
628 return rt;
629
630}
631
9acd9f3a
YH
632struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
633 const struct in6_addr *saddr, int oif, int strict)
c71099ac 634{
4c9483b2
DM
635 struct flowi6 fl6 = {
636 .flowi6_oif = oif,
637 .daddr = *daddr,
c71099ac
TG
638 };
639 struct dst_entry *dst;
77d16f45 640 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 641
adaa70bb 642 if (saddr) {
4c9483b2 643 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
644 flags |= RT6_LOOKUP_F_HAS_SADDR;
645 }
646
4c9483b2 647 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
648 if (dst->error == 0)
649 return (struct rt6_info *) dst;
650
651 dst_release(dst);
652
1da177e4
LT
653 return NULL;
654}
655
7159039a
YH
656EXPORT_SYMBOL(rt6_lookup);
657
c71099ac 658/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
659 It takes new route entry, the addition fails by any reason the
660 route is freed. In any case, if caller does not hold it, it may
661 be destroyed.
662 */
663
86872cb5 664static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
665{
666 int err;
c71099ac 667 struct fib6_table *table;
1da177e4 668
c71099ac
TG
669 table = rt->rt6i_table;
670 write_lock_bh(&table->tb6_lock);
86872cb5 671 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 672 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
673
674 return err;
675}
676
40e22e8f
TG
677int ip6_ins_rt(struct rt6_info *rt)
678{
4d1169c1 679 struct nl_info info = {
c346dca1 680 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 681 };
528c4ceb 682 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
683}
684
b71d1d42
ED
685static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr,
686 const struct in6_addr *saddr)
1da177e4 687{
1da177e4
LT
688 struct rt6_info *rt;
689
690 /*
691 * Clone the route.
692 */
693
694 rt = ip6_rt_copy(ort);
695
696 if (rt) {
14deae41
DM
697 struct neighbour *neigh;
698 int attempts = !in_softirq();
699
58c4fb86
YH
700 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
701 if (rt->rt6i_dst.plen != 128 &&
702 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
703 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 704 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 705 }
1da177e4 706
58c4fb86 707 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
708 rt->rt6i_dst.plen = 128;
709 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 710 rt->dst.flags |= DST_HOST;
1da177e4
LT
711
712#ifdef CONFIG_IPV6_SUBTREES
713 if (rt->rt6i_src.plen && saddr) {
714 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
715 rt->rt6i_src.plen = 128;
716 }
717#endif
718
14deae41
DM
719 retry:
720 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
721 if (IS_ERR(neigh)) {
722 struct net *net = dev_net(rt->rt6i_dev);
723 int saved_rt_min_interval =
724 net->ipv6.sysctl.ip6_rt_gc_min_interval;
725 int saved_rt_elasticity =
726 net->ipv6.sysctl.ip6_rt_gc_elasticity;
727
728 if (attempts-- > 0) {
729 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
730 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
731
86393e52 732 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
733
734 net->ipv6.sysctl.ip6_rt_gc_elasticity =
735 saved_rt_elasticity;
736 net->ipv6.sysctl.ip6_rt_gc_min_interval =
737 saved_rt_min_interval;
738 goto retry;
739 }
740
741 if (net_ratelimit())
742 printk(KERN_WARNING
7e1b33e5 743 "ipv6: Neighbour table overflow.\n");
d8d1f30b 744 dst_free(&rt->dst);
14deae41
DM
745 return NULL;
746 }
747 rt->rt6i_nexthop = neigh;
1da177e4 748
95a9a5ba 749 }
1da177e4 750
95a9a5ba
YH
751 return rt;
752}
1da177e4 753
b71d1d42 754static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr)
299d9939
YH
755{
756 struct rt6_info *rt = ip6_rt_copy(ort);
757 if (rt) {
758 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
759 rt->rt6i_dst.plen = 128;
760 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 761 rt->dst.flags |= DST_HOST;
299d9939
YH
762 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
763 }
764 return rt;
765}
766
8ed67789 767static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 768 struct flowi6 *fl6, int flags)
1da177e4
LT
769{
770 struct fib6_node *fn;
519fbd87 771 struct rt6_info *rt, *nrt;
c71099ac 772 int strict = 0;
1da177e4 773 int attempts = 3;
519fbd87 774 int err;
53b7997f 775 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 776
77d16f45 777 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
778
779relookup:
c71099ac 780 read_lock_bh(&table->tb6_lock);
1da177e4 781
8238dd06 782restart_2:
4c9483b2 783 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
784
785restart:
4acad72d 786 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 787
4c9483b2 788 BACKTRACK(net, &fl6->saddr);
8ed67789 789 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 790 rt->rt6i_flags & RTF_CACHE)
1ddef044 791 goto out;
1da177e4 792
d8d1f30b 793 dst_hold(&rt->dst);
c71099ac 794 read_unlock_bh(&table->tb6_lock);
fb9de91e 795
519fbd87 796 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 797 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 798 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 799 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
800 else
801 goto out2;
e40cf353 802
d8d1f30b 803 dst_release(&rt->dst);
8ed67789 804 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 805
d8d1f30b 806 dst_hold(&rt->dst);
519fbd87 807 if (nrt) {
40e22e8f 808 err = ip6_ins_rt(nrt);
519fbd87 809 if (!err)
1da177e4 810 goto out2;
1da177e4 811 }
1da177e4 812
519fbd87
YH
813 if (--attempts <= 0)
814 goto out2;
815
816 /*
c71099ac 817 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
818 * released someone could insert this route. Relookup.
819 */
d8d1f30b 820 dst_release(&rt->dst);
519fbd87
YH
821 goto relookup;
822
823out:
8238dd06
YH
824 if (reachable) {
825 reachable = 0;
826 goto restart_2;
827 }
d8d1f30b 828 dst_hold(&rt->dst);
c71099ac 829 read_unlock_bh(&table->tb6_lock);
1da177e4 830out2:
d8d1f30b
CG
831 rt->dst.lastuse = jiffies;
832 rt->dst.__use++;
c71099ac
TG
833
834 return rt;
1da177e4
LT
835}
836
8ed67789 837static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 838 struct flowi6 *fl6, int flags)
4acad72d 839{
4c9483b2 840 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
841}
842
c71099ac
TG
843void ip6_route_input(struct sk_buff *skb)
844{
b71d1d42 845 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 846 struct net *net = dev_net(skb->dev);
adaa70bb 847 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
848 struct flowi6 fl6 = {
849 .flowi6_iif = skb->dev->ifindex,
850 .daddr = iph->daddr,
851 .saddr = iph->saddr,
852 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
853 .flowi6_mark = skb->mark,
854 .flowi6_proto = iph->nexthdr,
c71099ac 855 };
adaa70bb 856
1d6e55f1 857 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 858 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 859
4c9483b2 860 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
861}
862
8ed67789 863static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 864 struct flowi6 *fl6, int flags)
1da177e4 865{
4c9483b2 866 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
867}
868
9c7a4f9c 869struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 870 struct flowi6 *fl6)
c71099ac
TG
871{
872 int flags = 0;
873
4c9483b2 874 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 875 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 876
4c9483b2 877 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 878 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
879 else if (sk)
880 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 881
4c9483b2 882 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
883}
884
7159039a 885EXPORT_SYMBOL(ip6_route_output);
1da177e4 886
2774c131 887struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 888{
5c1e6aa3 889 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
890 struct dst_entry *new = NULL;
891
5c1e6aa3 892 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 893 if (rt) {
cf911662
DM
894 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
895
d8d1f30b 896 new = &rt->dst;
14e50e57 897
14e50e57 898 new->__use = 1;
352e512c
HX
899 new->input = dst_discard;
900 new->output = dst_discard;
14e50e57 901
defb3519 902 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
903 rt->rt6i_idev = ort->rt6i_idev;
904 if (rt->rt6i_idev)
905 in6_dev_hold(rt->rt6i_idev);
906 rt->rt6i_expires = 0;
907
908 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
909 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
910 rt->rt6i_metric = 0;
911
912 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
913#ifdef CONFIG_IPV6_SUBTREES
914 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
915#endif
916
917 dst_free(new);
918 }
919
69ead7af
DM
920 dst_release(dst_orig);
921 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 922}
14e50e57 923
1da177e4
LT
924/*
925 * Destination cache support functions
926 */
927
928static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
929{
930 struct rt6_info *rt;
931
932 rt = (struct rt6_info *) dst;
933
6431cbc2
DM
934 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
935 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
936 if (!rt->rt6i_peer)
937 rt6_bind_peer(rt, 0);
938 rt->rt6i_peer_genid = rt6_peer_genid();
939 }
1da177e4 940 return dst;
6431cbc2 941 }
1da177e4
LT
942 return NULL;
943}
944
945static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
946{
947 struct rt6_info *rt = (struct rt6_info *) dst;
948
949 if (rt) {
54c1a859
YH
950 if (rt->rt6i_flags & RTF_CACHE) {
951 if (rt6_check_expired(rt)) {
952 ip6_del_rt(rt);
953 dst = NULL;
954 }
955 } else {
1da177e4 956 dst_release(dst);
54c1a859
YH
957 dst = NULL;
958 }
1da177e4 959 }
54c1a859 960 return dst;
1da177e4
LT
961}
962
963static void ip6_link_failure(struct sk_buff *skb)
964{
965 struct rt6_info *rt;
966
3ffe533c 967 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 968
adf30907 969 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
970 if (rt) {
971 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 972 dst_set_expires(&rt->dst, 0);
1da177e4
LT
973 rt->rt6i_flags |= RTF_EXPIRES;
974 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
975 rt->rt6i_node->fn_sernum = -1;
976 }
977}
978
979static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
980{
981 struct rt6_info *rt6 = (struct rt6_info*)dst;
982
983 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
984 rt6->rt6i_flags |= RTF_MODIFIED;
985 if (mtu < IPV6_MIN_MTU) {
defb3519 986 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 987 mtu = IPV6_MIN_MTU;
defb3519
DM
988 features |= RTAX_FEATURE_ALLFRAG;
989 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 990 }
defb3519 991 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
992 }
993}
994
0dbaee3b 995static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 996{
0dbaee3b
DM
997 struct net_device *dev = dst->dev;
998 unsigned int mtu = dst_mtu(dst);
999 struct net *net = dev_net(dev);
1000
1da177e4
LT
1001 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1002
5578689a
DL
1003 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1004 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1005
1006 /*
1ab1457c
YH
1007 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1008 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1009 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1010 * rely only on pmtu discovery"
1011 */
1012 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1013 mtu = IPV6_MAXPLEN;
1014 return mtu;
1015}
1016
d33e4553
DM
1017static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1018{
1019 unsigned int mtu = IPV6_MIN_MTU;
1020 struct inet6_dev *idev;
1021
1022 rcu_read_lock();
1023 idev = __in6_dev_get(dst->dev);
1024 if (idev)
1025 mtu = idev->cnf.mtu6;
1026 rcu_read_unlock();
1027
1028 return mtu;
1029}
1030
3b00944c
YH
1031static struct dst_entry *icmp6_dst_gc_list;
1032static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1033
3b00944c 1034struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1035 struct neighbour *neigh,
9acd9f3a 1036 const struct in6_addr *addr)
1da177e4
LT
1037{
1038 struct rt6_info *rt;
1039 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1040 struct net *net = dev_net(dev);
1da177e4
LT
1041
1042 if (unlikely(idev == NULL))
1043 return NULL;
1044
5c1e6aa3 1045 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
1da177e4
LT
1046 if (unlikely(rt == NULL)) {
1047 in6_dev_put(idev);
1048 goto out;
1049 }
1050
1da177e4
LT
1051 if (neigh)
1052 neigh_hold(neigh);
14deae41 1053 else {
1da177e4 1054 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1055 if (IS_ERR(neigh))
1056 neigh = NULL;
1057 }
1da177e4 1058
1da177e4
LT
1059 rt->rt6i_idev = idev;
1060 rt->rt6i_nexthop = neigh;
d8d1f30b 1061 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1062 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1063 rt->dst.output = ip6_output;
1da177e4
LT
1064
1065#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1066 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1067 ? DST_HOST
1da177e4
LT
1068 : 0;
1069 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1070 rt->rt6i_dst.plen = 128;
1071#endif
1072
3b00944c 1073 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1074 rt->dst.next = icmp6_dst_gc_list;
1075 icmp6_dst_gc_list = &rt->dst;
3b00944c 1076 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1077
5578689a 1078 fib6_force_start_gc(net);
1da177e4
LT
1079
1080out:
d8d1f30b 1081 return &rt->dst;
1da177e4
LT
1082}
1083
3d0f24a7 1084int icmp6_dst_gc(void)
1da177e4 1085{
e9476e95 1086 struct dst_entry *dst, **pprev;
3d0f24a7 1087 int more = 0;
1da177e4 1088
3b00944c
YH
1089 spin_lock_bh(&icmp6_dst_lock);
1090 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1091
1da177e4
LT
1092 while ((dst = *pprev) != NULL) {
1093 if (!atomic_read(&dst->__refcnt)) {
1094 *pprev = dst->next;
1095 dst_free(dst);
1da177e4
LT
1096 } else {
1097 pprev = &dst->next;
3d0f24a7 1098 ++more;
1da177e4
LT
1099 }
1100 }
1101
3b00944c 1102 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1103
3d0f24a7 1104 return more;
1da177e4
LT
1105}
1106
1e493d19
DM
1107static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1108 void *arg)
1109{
1110 struct dst_entry *dst, **pprev;
1111
1112 spin_lock_bh(&icmp6_dst_lock);
1113 pprev = &icmp6_dst_gc_list;
1114 while ((dst = *pprev) != NULL) {
1115 struct rt6_info *rt = (struct rt6_info *) dst;
1116 if (func(rt, arg)) {
1117 *pprev = dst->next;
1118 dst_free(dst);
1119 } else {
1120 pprev = &dst->next;
1121 }
1122 }
1123 spin_unlock_bh(&icmp6_dst_lock);
1124}
1125
569d3645 1126static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1127{
1da177e4 1128 unsigned long now = jiffies;
86393e52 1129 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1130 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1131 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1132 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1133 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1134 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1135 int entries;
7019b78e 1136
fc66f95c 1137 entries = dst_entries_get_fast(ops);
7019b78e 1138 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1139 entries <= rt_max_size)
1da177e4
LT
1140 goto out;
1141
6891a346
BT
1142 net->ipv6.ip6_rt_gc_expire++;
1143 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1144 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1145 entries = dst_entries_get_slow(ops);
1146 if (entries < ops->gc_thresh)
7019b78e 1147 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1148out:
7019b78e 1149 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1150 return entries > rt_max_size;
1da177e4
LT
1151}
1152
1153/* Clean host part of a prefix. Not necessary in radix tree,
1154 but results in cleaner routing tables.
1155
1156 Remove it only when all the things will work!
1157 */
1158
6b75d090 1159int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1160{
5170ae82 1161 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1162 if (hoplimit == 0) {
6b75d090 1163 struct net_device *dev = dst->dev;
c68f24cc
ED
1164 struct inet6_dev *idev;
1165
1166 rcu_read_lock();
1167 idev = __in6_dev_get(dev);
1168 if (idev)
6b75d090 1169 hoplimit = idev->cnf.hop_limit;
c68f24cc 1170 else
53b7997f 1171 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1172 rcu_read_unlock();
1da177e4
LT
1173 }
1174 return hoplimit;
1175}
abbf46ae 1176EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1177
1178/*
1179 *
1180 */
1181
86872cb5 1182int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1183{
1184 int err;
5578689a 1185 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1186 struct rt6_info *rt = NULL;
1187 struct net_device *dev = NULL;
1188 struct inet6_dev *idev = NULL;
c71099ac 1189 struct fib6_table *table;
1da177e4
LT
1190 int addr_type;
1191
86872cb5 1192 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1193 return -EINVAL;
1194#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1195 if (cfg->fc_src_len)
1da177e4
LT
1196 return -EINVAL;
1197#endif
86872cb5 1198 if (cfg->fc_ifindex) {
1da177e4 1199 err = -ENODEV;
5578689a 1200 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1201 if (!dev)
1202 goto out;
1203 idev = in6_dev_get(dev);
1204 if (!idev)
1205 goto out;
1206 }
1207
86872cb5
TG
1208 if (cfg->fc_metric == 0)
1209 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1210
5578689a 1211 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1212 if (table == NULL) {
1213 err = -ENOBUFS;
1214 goto out;
1215 }
1216
5c1e6aa3 1217 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
1da177e4
LT
1218
1219 if (rt == NULL) {
1220 err = -ENOMEM;
1221 goto out;
1222 }
1223
d8d1f30b 1224 rt->dst.obsolete = -1;
6f704992
YH
1225 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1226 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1227 0;
1da177e4 1228
86872cb5
TG
1229 if (cfg->fc_protocol == RTPROT_UNSPEC)
1230 cfg->fc_protocol = RTPROT_BOOT;
1231 rt->rt6i_protocol = cfg->fc_protocol;
1232
1233 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1234
1235 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1236 rt->dst.input = ip6_mc_input;
ab79ad14
1237 else if (cfg->fc_flags & RTF_LOCAL)
1238 rt->dst.input = ip6_input;
1da177e4 1239 else
d8d1f30b 1240 rt->dst.input = ip6_forward;
1da177e4 1241
d8d1f30b 1242 rt->dst.output = ip6_output;
1da177e4 1243
86872cb5
TG
1244 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1245 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1246 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1247 rt->dst.flags = DST_HOST;
1da177e4
LT
1248
1249#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1250 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1251 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1252#endif
1253
86872cb5 1254 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1255
1256 /* We cannot add true routes via loopback here,
1257 they would result in kernel looping; promote them to reject routes
1258 */
86872cb5 1259 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1260 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1261 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1262 /* hold loopback dev/idev if we haven't done so. */
5578689a 1263 if (dev != net->loopback_dev) {
1da177e4
LT
1264 if (dev) {
1265 dev_put(dev);
1266 in6_dev_put(idev);
1267 }
5578689a 1268 dev = net->loopback_dev;
1da177e4
LT
1269 dev_hold(dev);
1270 idev = in6_dev_get(dev);
1271 if (!idev) {
1272 err = -ENODEV;
1273 goto out;
1274 }
1275 }
d8d1f30b
CG
1276 rt->dst.output = ip6_pkt_discard_out;
1277 rt->dst.input = ip6_pkt_discard;
1278 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1279 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1280 goto install_route;
1281 }
1282
86872cb5 1283 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1284 const struct in6_addr *gw_addr;
1da177e4
LT
1285 int gwa_type;
1286
86872cb5
TG
1287 gw_addr = &cfg->fc_gateway;
1288 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1289 gwa_type = ipv6_addr_type(gw_addr);
1290
1291 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1292 struct rt6_info *grt;
1293
1294 /* IPv6 strictly inhibits using not link-local
1295 addresses as nexthop address.
1296 Otherwise, router will not able to send redirects.
1297 It is very good, but in some (rare!) circumstances
1298 (SIT, PtP, NBMA NOARP links) it is handy to allow
1299 some exceptions. --ANK
1300 */
1301 err = -EINVAL;
1302 if (!(gwa_type&IPV6_ADDR_UNICAST))
1303 goto out;
1304
5578689a 1305 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1306
1307 err = -EHOSTUNREACH;
1308 if (grt == NULL)
1309 goto out;
1310 if (dev) {
1311 if (dev != grt->rt6i_dev) {
d8d1f30b 1312 dst_release(&grt->dst);
1da177e4
LT
1313 goto out;
1314 }
1315 } else {
1316 dev = grt->rt6i_dev;
1317 idev = grt->rt6i_idev;
1318 dev_hold(dev);
1319 in6_dev_hold(grt->rt6i_idev);
1320 }
1321 if (!(grt->rt6i_flags&RTF_GATEWAY))
1322 err = 0;
d8d1f30b 1323 dst_release(&grt->dst);
1da177e4
LT
1324
1325 if (err)
1326 goto out;
1327 }
1328 err = -EINVAL;
1329 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1330 goto out;
1331 }
1332
1333 err = -ENODEV;
1334 if (dev == NULL)
1335 goto out;
1336
c3968a85
DW
1337 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1338 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1339 err = -EINVAL;
1340 goto out;
1341 }
1342 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1343 rt->rt6i_prefsrc.plen = 128;
1344 } else
1345 rt->rt6i_prefsrc.plen = 0;
1346
86872cb5 1347 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1348 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1349 if (IS_ERR(rt->rt6i_nexthop)) {
1350 err = PTR_ERR(rt->rt6i_nexthop);
1351 rt->rt6i_nexthop = NULL;
1352 goto out;
1353 }
1354 }
1355
86872cb5 1356 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1357
1358install_route:
86872cb5
TG
1359 if (cfg->fc_mx) {
1360 struct nlattr *nla;
1361 int remaining;
1362
1363 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1364 int type = nla_type(nla);
86872cb5
TG
1365
1366 if (type) {
1367 if (type > RTAX_MAX) {
1da177e4
LT
1368 err = -EINVAL;
1369 goto out;
1370 }
86872cb5 1371
defb3519 1372 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1373 }
1da177e4
LT
1374 }
1375 }
1376
d8d1f30b 1377 rt->dst.dev = dev;
1da177e4 1378 rt->rt6i_idev = idev;
c71099ac 1379 rt->rt6i_table = table;
63152fc0 1380
c346dca1 1381 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1382
86872cb5 1383 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1384
1385out:
1386 if (dev)
1387 dev_put(dev);
1388 if (idev)
1389 in6_dev_put(idev);
1390 if (rt)
d8d1f30b 1391 dst_free(&rt->dst);
1da177e4
LT
1392 return err;
1393}
1394
86872cb5 1395static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1396{
1397 int err;
c71099ac 1398 struct fib6_table *table;
c346dca1 1399 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1400
8ed67789 1401 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1402 return -ENOENT;
1403
c71099ac
TG
1404 table = rt->rt6i_table;
1405 write_lock_bh(&table->tb6_lock);
1da177e4 1406
86872cb5 1407 err = fib6_del(rt, info);
d8d1f30b 1408 dst_release(&rt->dst);
1da177e4 1409
c71099ac 1410 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1411
1412 return err;
1413}
1414
e0a1ad73
TG
1415int ip6_del_rt(struct rt6_info *rt)
1416{
4d1169c1 1417 struct nl_info info = {
c346dca1 1418 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1419 };
528c4ceb 1420 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1421}
1422
86872cb5 1423static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1424{
c71099ac 1425 struct fib6_table *table;
1da177e4
LT
1426 struct fib6_node *fn;
1427 struct rt6_info *rt;
1428 int err = -ESRCH;
1429
5578689a 1430 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1431 if (table == NULL)
1432 return err;
1433
1434 read_lock_bh(&table->tb6_lock);
1da177e4 1435
c71099ac 1436 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1437 &cfg->fc_dst, cfg->fc_dst_len,
1438 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1439
1da177e4 1440 if (fn) {
d8d1f30b 1441 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1442 if (cfg->fc_ifindex &&
1da177e4 1443 (rt->rt6i_dev == NULL ||
86872cb5 1444 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1445 continue;
86872cb5
TG
1446 if (cfg->fc_flags & RTF_GATEWAY &&
1447 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1448 continue;
86872cb5 1449 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1450 continue;
d8d1f30b 1451 dst_hold(&rt->dst);
c71099ac 1452 read_unlock_bh(&table->tb6_lock);
1da177e4 1453
86872cb5 1454 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1455 }
1456 }
c71099ac 1457 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1458
1459 return err;
1460}
1461
1462/*
1463 * Handle redirects
1464 */
a6279458 1465struct ip6rd_flowi {
4c9483b2 1466 struct flowi6 fl6;
a6279458
YH
1467 struct in6_addr gateway;
1468};
1469
8ed67789
DL
1470static struct rt6_info *__ip6_route_redirect(struct net *net,
1471 struct fib6_table *table,
4c9483b2 1472 struct flowi6 *fl6,
a6279458 1473 int flags)
1da177e4 1474{
4c9483b2 1475 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1476 struct rt6_info *rt;
e843b9e1 1477 struct fib6_node *fn;
c71099ac 1478
1da177e4 1479 /*
e843b9e1
YH
1480 * Get the "current" route for this destination and
1481 * check if the redirect has come from approriate router.
1482 *
1483 * RFC 2461 specifies that redirects should only be
1484 * accepted if they come from the nexthop to the target.
1485 * Due to the way the routes are chosen, this notion
1486 * is a bit fuzzy and one might need to check all possible
1487 * routes.
1da177e4 1488 */
1da177e4 1489
c71099ac 1490 read_lock_bh(&table->tb6_lock);
4c9483b2 1491 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1492restart:
d8d1f30b 1493 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1494 /*
1495 * Current route is on-link; redirect is always invalid.
1496 *
1497 * Seems, previous statement is not true. It could
1498 * be node, which looks for us as on-link (f.e. proxy ndisc)
1499 * But then router serving it might decide, that we should
1500 * know truth 8)8) --ANK (980726).
1501 */
1502 if (rt6_check_expired(rt))
1503 continue;
1504 if (!(rt->rt6i_flags & RTF_GATEWAY))
1505 continue;
4c9483b2 1506 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1507 continue;
a6279458 1508 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1509 continue;
1510 break;
1511 }
a6279458 1512
cb15d9c2 1513 if (!rt)
8ed67789 1514 rt = net->ipv6.ip6_null_entry;
4c9483b2 1515 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1516out:
d8d1f30b 1517 dst_hold(&rt->dst);
a6279458 1518
c71099ac 1519 read_unlock_bh(&table->tb6_lock);
e843b9e1 1520
a6279458
YH
1521 return rt;
1522};
1523
b71d1d42
ED
1524static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1525 const struct in6_addr *src,
1526 const struct in6_addr *gateway,
a6279458
YH
1527 struct net_device *dev)
1528{
adaa70bb 1529 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1530 struct net *net = dev_net(dev);
a6279458 1531 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1532 .fl6 = {
1533 .flowi6_oif = dev->ifindex,
1534 .daddr = *dest,
1535 .saddr = *src,
a6279458 1536 },
a6279458 1537 };
adaa70bb 1538
86c36ce4
BH
1539 ipv6_addr_copy(&rdfl.gateway, gateway);
1540
adaa70bb
TG
1541 if (rt6_need_strict(dest))
1542 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1543
4c9483b2 1544 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1545 flags, __ip6_route_redirect);
a6279458
YH
1546}
1547
b71d1d42
ED
1548void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1549 const struct in6_addr *saddr,
a6279458
YH
1550 struct neighbour *neigh, u8 *lladdr, int on_link)
1551{
1552 struct rt6_info *rt, *nrt = NULL;
1553 struct netevent_redirect netevent;
c346dca1 1554 struct net *net = dev_net(neigh->dev);
a6279458
YH
1555
1556 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1557
8ed67789 1558 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1559 if (net_ratelimit())
1560 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1561 "for redirect target\n");
a6279458 1562 goto out;
1da177e4
LT
1563 }
1564
1da177e4
LT
1565 /*
1566 * We have finally decided to accept it.
1567 */
1568
1ab1457c 1569 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1570 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1571 NEIGH_UPDATE_F_OVERRIDE|
1572 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1573 NEIGH_UPDATE_F_ISROUTER))
1574 );
1575
1576 /*
1577 * Redirect received -> path was valid.
1578 * Look, redirects are sent only in response to data packets,
1579 * so that this nexthop apparently is reachable. --ANK
1580 */
d8d1f30b 1581 dst_confirm(&rt->dst);
1da177e4
LT
1582
1583 /* Duplicate redirect: silently ignore. */
d8d1f30b 1584 if (neigh == rt->dst.neighbour)
1da177e4
LT
1585 goto out;
1586
1587 nrt = ip6_rt_copy(rt);
1588 if (nrt == NULL)
1589 goto out;
1590
1591 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1592 if (on_link)
1593 nrt->rt6i_flags &= ~RTF_GATEWAY;
1594
1595 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1596 nrt->rt6i_dst.plen = 128;
d8d1f30b 1597 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1598
1599 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1600 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1601
40e22e8f 1602 if (ip6_ins_rt(nrt))
1da177e4
LT
1603 goto out;
1604
d8d1f30b
CG
1605 netevent.old = &rt->dst;
1606 netevent.new = &nrt->dst;
8d71740c
TT
1607 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1608
1da177e4 1609 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1610 ip6_del_rt(rt);
1da177e4
LT
1611 return;
1612 }
1613
1614out:
d8d1f30b 1615 dst_release(&rt->dst);
1da177e4
LT
1616}
1617
1618/*
1619 * Handle ICMP "packet too big" messages
1620 * i.e. Path MTU discovery
1621 */
1622
b71d1d42 1623static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1624 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1625{
1626 struct rt6_info *rt, *nrt;
1627 int allfrag = 0;
d3052b55 1628again:
ae878ae2 1629 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1630 if (rt == NULL)
1631 return;
1632
d3052b55
AV
1633 if (rt6_check_expired(rt)) {
1634 ip6_del_rt(rt);
1635 goto again;
1636 }
1637
d8d1f30b 1638 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1639 goto out;
1640
1641 if (pmtu < IPV6_MIN_MTU) {
1642 /*
1ab1457c 1643 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1644 * MTU (1280) and a fragment header should always be included
1645 * after a node receiving Too Big message reporting PMTU is
1646 * less than the IPv6 Minimum Link MTU.
1647 */
1648 pmtu = IPV6_MIN_MTU;
1649 allfrag = 1;
1650 }
1651
1652 /* New mtu received -> path was valid.
1653 They are sent only in response to data packets,
1654 so that this nexthop apparently is reachable. --ANK
1655 */
d8d1f30b 1656 dst_confirm(&rt->dst);
1da177e4
LT
1657
1658 /* Host route. If it is static, it would be better
1659 not to override it, but add new one, so that
1660 when cache entry will expire old pmtu
1661 would return automatically.
1662 */
1663 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1664 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1665 if (allfrag) {
1666 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1667 features |= RTAX_FEATURE_ALLFRAG;
1668 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1669 }
d8d1f30b 1670 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1671 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1672 goto out;
1673 }
1674
1675 /* Network route.
1676 Two cases are possible:
1677 1. It is connected route. Action: COW
1678 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1679 */
d5315b50 1680 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1681 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1682 else
1683 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1684
d5315b50 1685 if (nrt) {
defb3519
DM
1686 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1687 if (allfrag) {
1688 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1689 features |= RTAX_FEATURE_ALLFRAG;
1690 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1691 }
a1e78363
YH
1692
1693 /* According to RFC 1981, detecting PMTU increase shouldn't be
1694 * happened within 5 mins, the recommended timer is 10 mins.
1695 * Here this route expiration time is set to ip6_rt_mtu_expires
1696 * which is 10 mins. After 10 mins the decreased pmtu is expired
1697 * and detecting PMTU increase will be automatically happened.
1698 */
d8d1f30b 1699 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1700 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1701
40e22e8f 1702 ip6_ins_rt(nrt);
1da177e4 1703 }
1da177e4 1704out:
d8d1f30b 1705 dst_release(&rt->dst);
1da177e4
LT
1706}
1707
b71d1d42 1708void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1709 struct net_device *dev, u32 pmtu)
1710{
1711 struct net *net = dev_net(dev);
1712
1713 /*
1714 * RFC 1981 states that a node "MUST reduce the size of the packets it
1715 * is sending along the path" that caused the Packet Too Big message.
1716 * Since it's not possible in the general case to determine which
1717 * interface was used to send the original packet, we update the MTU
1718 * on the interface that will be used to send future packets. We also
1719 * update the MTU on the interface that received the Packet Too Big in
1720 * case the original packet was forced out that interface with
1721 * SO_BINDTODEVICE or similar. This is the next best thing to the
1722 * correct behaviour, which would be to update the MTU on all
1723 * interfaces.
1724 */
1725 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1726 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1727}
1728
1da177e4
LT
1729/*
1730 * Misc support functions
1731 */
1732
1733static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1734{
c346dca1 1735 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3
DM
1736 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1737 ort->dst.dev);
1da177e4
LT
1738
1739 if (rt) {
d8d1f30b
CG
1740 rt->dst.input = ort->dst.input;
1741 rt->dst.output = ort->dst.output;
1742
defb3519 1743 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1744 rt->dst.error = ort->dst.error;
1da177e4
LT
1745 rt->rt6i_idev = ort->rt6i_idev;
1746 if (rt->rt6i_idev)
1747 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1748 rt->dst.lastuse = jiffies;
1da177e4
LT
1749 rt->rt6i_expires = 0;
1750
1751 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1752 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1753 rt->rt6i_metric = 0;
1754
1755 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1756#ifdef CONFIG_IPV6_SUBTREES
1757 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1758#endif
0f6c6392 1759 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1760 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1761 }
1762 return rt;
1763}
1764
70ceb4f5 1765#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1766static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1767 const struct in6_addr *prefix, int prefixlen,
1768 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1769{
1770 struct fib6_node *fn;
1771 struct rt6_info *rt = NULL;
c71099ac
TG
1772 struct fib6_table *table;
1773
efa2cea0 1774 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1775 if (table == NULL)
1776 return NULL;
70ceb4f5 1777
c71099ac
TG
1778 write_lock_bh(&table->tb6_lock);
1779 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1780 if (!fn)
1781 goto out;
1782
d8d1f30b 1783 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1784 if (rt->rt6i_dev->ifindex != ifindex)
1785 continue;
1786 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1787 continue;
1788 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1789 continue;
d8d1f30b 1790 dst_hold(&rt->dst);
70ceb4f5
YH
1791 break;
1792 }
1793out:
c71099ac 1794 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1795 return rt;
1796}
1797
efa2cea0 1798static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1799 const struct in6_addr *prefix, int prefixlen,
1800 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1801 unsigned pref)
1802{
86872cb5
TG
1803 struct fib6_config cfg = {
1804 .fc_table = RT6_TABLE_INFO,
238fc7ea 1805 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1806 .fc_ifindex = ifindex,
1807 .fc_dst_len = prefixlen,
1808 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1809 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1810 .fc_nlinfo.pid = 0,
1811 .fc_nlinfo.nlh = NULL,
1812 .fc_nlinfo.nl_net = net,
86872cb5
TG
1813 };
1814
1815 ipv6_addr_copy(&cfg.fc_dst, prefix);
1816 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1817
e317da96
YH
1818 /* We should treat it as a default route if prefix length is 0. */
1819 if (!prefixlen)
86872cb5 1820 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1821
86872cb5 1822 ip6_route_add(&cfg);
70ceb4f5 1823
efa2cea0 1824 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1825}
1826#endif
1827
b71d1d42 1828struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1829{
1da177e4 1830 struct rt6_info *rt;
c71099ac 1831 struct fib6_table *table;
1da177e4 1832
c346dca1 1833 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1834 if (table == NULL)
1835 return NULL;
1da177e4 1836
c71099ac 1837 write_lock_bh(&table->tb6_lock);
d8d1f30b 1838 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1839 if (dev == rt->rt6i_dev &&
045927ff 1840 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1841 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1842 break;
1843 }
1844 if (rt)
d8d1f30b 1845 dst_hold(&rt->dst);
c71099ac 1846 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1847 return rt;
1848}
1849
b71d1d42 1850struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1851 struct net_device *dev,
1852 unsigned int pref)
1da177e4 1853{
86872cb5
TG
1854 struct fib6_config cfg = {
1855 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1856 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1857 .fc_ifindex = dev->ifindex,
1858 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1859 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1860 .fc_nlinfo.pid = 0,
1861 .fc_nlinfo.nlh = NULL,
c346dca1 1862 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1863 };
1da177e4 1864
86872cb5 1865 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1866
86872cb5 1867 ip6_route_add(&cfg);
1da177e4 1868
1da177e4
LT
1869 return rt6_get_dflt_router(gwaddr, dev);
1870}
1871
7b4da532 1872void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1873{
1874 struct rt6_info *rt;
c71099ac
TG
1875 struct fib6_table *table;
1876
1877 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1878 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1879 if (table == NULL)
1880 return;
1da177e4
LT
1881
1882restart:
c71099ac 1883 read_lock_bh(&table->tb6_lock);
d8d1f30b 1884 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1885 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1886 dst_hold(&rt->dst);
c71099ac 1887 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1888 ip6_del_rt(rt);
1da177e4
LT
1889 goto restart;
1890 }
1891 }
c71099ac 1892 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1893}
1894
5578689a
DL
1895static void rtmsg_to_fib6_config(struct net *net,
1896 struct in6_rtmsg *rtmsg,
86872cb5
TG
1897 struct fib6_config *cfg)
1898{
1899 memset(cfg, 0, sizeof(*cfg));
1900
1901 cfg->fc_table = RT6_TABLE_MAIN;
1902 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1903 cfg->fc_metric = rtmsg->rtmsg_metric;
1904 cfg->fc_expires = rtmsg->rtmsg_info;
1905 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1906 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1907 cfg->fc_flags = rtmsg->rtmsg_flags;
1908
5578689a 1909 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1910
86872cb5
TG
1911 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1912 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1913 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1914}
1915
5578689a 1916int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1917{
86872cb5 1918 struct fib6_config cfg;
1da177e4
LT
1919 struct in6_rtmsg rtmsg;
1920 int err;
1921
1922 switch(cmd) {
1923 case SIOCADDRT: /* Add a route */
1924 case SIOCDELRT: /* Delete a route */
1925 if (!capable(CAP_NET_ADMIN))
1926 return -EPERM;
1927 err = copy_from_user(&rtmsg, arg,
1928 sizeof(struct in6_rtmsg));
1929 if (err)
1930 return -EFAULT;
86872cb5 1931
5578689a 1932 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1933
1da177e4
LT
1934 rtnl_lock();
1935 switch (cmd) {
1936 case SIOCADDRT:
86872cb5 1937 err = ip6_route_add(&cfg);
1da177e4
LT
1938 break;
1939 case SIOCDELRT:
86872cb5 1940 err = ip6_route_del(&cfg);
1da177e4
LT
1941 break;
1942 default:
1943 err = -EINVAL;
1944 }
1945 rtnl_unlock();
1946
1947 return err;
3ff50b79 1948 }
1da177e4
LT
1949
1950 return -EINVAL;
1951}
1952
1953/*
1954 * Drop the packet on the floor
1955 */
1956
d5fdd6ba 1957static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1958{
612f09e8 1959 int type;
adf30907 1960 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1961 switch (ipstats_mib_noroutes) {
1962 case IPSTATS_MIB_INNOROUTES:
0660e03f 1963 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1964 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1965 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1966 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1967 break;
1968 }
1969 /* FALLTHROUGH */
1970 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1971 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1972 ipstats_mib_noroutes);
612f09e8
YH
1973 break;
1974 }
3ffe533c 1975 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1976 kfree_skb(skb);
1977 return 0;
1978}
1979
9ce8ade0
TG
1980static int ip6_pkt_discard(struct sk_buff *skb)
1981{
612f09e8 1982 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1983}
1984
20380731 1985static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1986{
adf30907 1987 skb->dev = skb_dst(skb)->dev;
612f09e8 1988 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1989}
1990
6723ab54
DM
1991#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1992
9ce8ade0
TG
1993static int ip6_pkt_prohibit(struct sk_buff *skb)
1994{
612f09e8 1995 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1996}
1997
1998static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1999{
adf30907 2000 skb->dev = skb_dst(skb)->dev;
612f09e8 2001 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2002}
2003
6723ab54
DM
2004#endif
2005
1da177e4
LT
2006/*
2007 * Allocate a dst for local (unicast / anycast) address.
2008 */
2009
2010struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2011 const struct in6_addr *addr,
2012 int anycast)
2013{
c346dca1 2014 struct net *net = dev_net(idev->dev);
5c1e6aa3
DM
2015 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2016 net->loopback_dev);
14deae41 2017 struct neighbour *neigh;
1da177e4 2018
40385653
BG
2019 if (rt == NULL) {
2020 if (net_ratelimit())
2021 pr_warning("IPv6: Maximum number of routes reached,"
2022 " consider increasing route/max_size.\n");
1da177e4 2023 return ERR_PTR(-ENOMEM);
40385653 2024 }
1da177e4 2025
1da177e4
LT
2026 in6_dev_hold(idev);
2027
d8d1f30b
CG
2028 rt->dst.flags = DST_HOST;
2029 rt->dst.input = ip6_input;
2030 rt->dst.output = ip6_output;
1da177e4 2031 rt->rt6i_idev = idev;
d8d1f30b 2032 rt->dst.obsolete = -1;
1da177e4
LT
2033
2034 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2035 if (anycast)
2036 rt->rt6i_flags |= RTF_ANYCAST;
2037 else
1da177e4 2038 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2039 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2040 if (IS_ERR(neigh)) {
d8d1f30b 2041 dst_free(&rt->dst);
14deae41 2042
29546a64 2043 return ERR_CAST(neigh);
1da177e4 2044 }
14deae41 2045 rt->rt6i_nexthop = neigh;
1da177e4
LT
2046
2047 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2048 rt->rt6i_dst.plen = 128;
5578689a 2049 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2050
d8d1f30b 2051 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2052
2053 return rt;
2054}
2055
c3968a85
DW
2056int ip6_route_get_saddr(struct net *net,
2057 struct rt6_info *rt,
b71d1d42 2058 const struct in6_addr *daddr,
c3968a85
DW
2059 unsigned int prefs,
2060 struct in6_addr *saddr)
2061{
2062 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2063 int err = 0;
2064 if (rt->rt6i_prefsrc.plen)
2065 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2066 else
2067 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2068 daddr, prefs, saddr);
2069 return err;
2070}
2071
2072/* remove deleted ip from prefsrc entries */
2073struct arg_dev_net_ip {
2074 struct net_device *dev;
2075 struct net *net;
2076 struct in6_addr *addr;
2077};
2078
2079static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2080{
2081 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2082 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2083 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2084
2085 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2086 rt != net->ipv6.ip6_null_entry &&
2087 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2088 /* remove prefsrc entry */
2089 rt->rt6i_prefsrc.plen = 0;
2090 }
2091 return 0;
2092}
2093
2094void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2095{
2096 struct net *net = dev_net(ifp->idev->dev);
2097 struct arg_dev_net_ip adni = {
2098 .dev = ifp->idev->dev,
2099 .net = net,
2100 .addr = &ifp->addr,
2101 };
2102 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2103}
2104
8ed67789
DL
2105struct arg_dev_net {
2106 struct net_device *dev;
2107 struct net *net;
2108};
2109
1da177e4
LT
2110static int fib6_ifdown(struct rt6_info *rt, void *arg)
2111{
bc3ef660 2112 const struct arg_dev_net *adn = arg;
2113 const struct net_device *dev = adn->dev;
8ed67789 2114
bc3ef660 2115 if ((rt->rt6i_dev == dev || dev == NULL) &&
2116 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2117 RT6_TRACE("deleted by ifdown %p\n", rt);
2118 return -1;
2119 }
2120 return 0;
2121}
2122
f3db4851 2123void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2124{
8ed67789
DL
2125 struct arg_dev_net adn = {
2126 .dev = dev,
2127 .net = net,
2128 };
2129
2130 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2131 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2132}
2133
2134struct rt6_mtu_change_arg
2135{
2136 struct net_device *dev;
2137 unsigned mtu;
2138};
2139
2140static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2141{
2142 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2143 struct inet6_dev *idev;
2144
2145 /* In IPv6 pmtu discovery is not optional,
2146 so that RTAX_MTU lock cannot disable it.
2147 We still use this lock to block changes
2148 caused by addrconf/ndisc.
2149 */
2150
2151 idev = __in6_dev_get(arg->dev);
2152 if (idev == NULL)
2153 return 0;
2154
2155 /* For administrative MTU increase, there is no way to discover
2156 IPv6 PMTU increase, so PMTU increase should be updated here.
2157 Since RFC 1981 doesn't include administrative MTU increase
2158 update PMTU increase is a MUST. (i.e. jumbo frame)
2159 */
2160 /*
2161 If new MTU is less than route PMTU, this new MTU will be the
2162 lowest MTU in the path, update the route PMTU to reflect PMTU
2163 decreases; if new MTU is greater than route PMTU, and the
2164 old MTU is the lowest MTU in the path, update the route PMTU
2165 to reflect the increase. In this case if the other nodes' MTU
2166 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2167 PMTU discouvery.
2168 */
2169 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2170 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2171 (dst_mtu(&rt->dst) >= arg->mtu ||
2172 (dst_mtu(&rt->dst) < arg->mtu &&
2173 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2174 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2175 }
1da177e4
LT
2176 return 0;
2177}
2178
2179void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2180{
c71099ac
TG
2181 struct rt6_mtu_change_arg arg = {
2182 .dev = dev,
2183 .mtu = mtu,
2184 };
1da177e4 2185
c346dca1 2186 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2187}
2188
ef7c79ed 2189static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2190 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2191 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2192 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2193 [RTA_PRIORITY] = { .type = NLA_U32 },
2194 [RTA_METRICS] = { .type = NLA_NESTED },
2195};
2196
2197static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2198 struct fib6_config *cfg)
1da177e4 2199{
86872cb5
TG
2200 struct rtmsg *rtm;
2201 struct nlattr *tb[RTA_MAX+1];
2202 int err;
1da177e4 2203
86872cb5
TG
2204 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2205 if (err < 0)
2206 goto errout;
1da177e4 2207
86872cb5
TG
2208 err = -EINVAL;
2209 rtm = nlmsg_data(nlh);
2210 memset(cfg, 0, sizeof(*cfg));
2211
2212 cfg->fc_table = rtm->rtm_table;
2213 cfg->fc_dst_len = rtm->rtm_dst_len;
2214 cfg->fc_src_len = rtm->rtm_src_len;
2215 cfg->fc_flags = RTF_UP;
2216 cfg->fc_protocol = rtm->rtm_protocol;
2217
2218 if (rtm->rtm_type == RTN_UNREACHABLE)
2219 cfg->fc_flags |= RTF_REJECT;
2220
ab79ad14
2221 if (rtm->rtm_type == RTN_LOCAL)
2222 cfg->fc_flags |= RTF_LOCAL;
2223
86872cb5
TG
2224 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2225 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2226 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2227
2228 if (tb[RTA_GATEWAY]) {
2229 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2230 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2231 }
86872cb5
TG
2232
2233 if (tb[RTA_DST]) {
2234 int plen = (rtm->rtm_dst_len + 7) >> 3;
2235
2236 if (nla_len(tb[RTA_DST]) < plen)
2237 goto errout;
2238
2239 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2240 }
86872cb5
TG
2241
2242 if (tb[RTA_SRC]) {
2243 int plen = (rtm->rtm_src_len + 7) >> 3;
2244
2245 if (nla_len(tb[RTA_SRC]) < plen)
2246 goto errout;
2247
2248 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2249 }
86872cb5 2250
c3968a85
DW
2251 if (tb[RTA_PREFSRC])
2252 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2253
86872cb5
TG
2254 if (tb[RTA_OIF])
2255 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2256
2257 if (tb[RTA_PRIORITY])
2258 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2259
2260 if (tb[RTA_METRICS]) {
2261 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2262 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2263 }
86872cb5
TG
2264
2265 if (tb[RTA_TABLE])
2266 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2267
2268 err = 0;
2269errout:
2270 return err;
1da177e4
LT
2271}
2272
c127ea2c 2273static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2274{
86872cb5
TG
2275 struct fib6_config cfg;
2276 int err;
1da177e4 2277
86872cb5
TG
2278 err = rtm_to_fib6_config(skb, nlh, &cfg);
2279 if (err < 0)
2280 return err;
2281
2282 return ip6_route_del(&cfg);
1da177e4
LT
2283}
2284
c127ea2c 2285static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2286{
86872cb5
TG
2287 struct fib6_config cfg;
2288 int err;
1da177e4 2289
86872cb5
TG
2290 err = rtm_to_fib6_config(skb, nlh, &cfg);
2291 if (err < 0)
2292 return err;
2293
2294 return ip6_route_add(&cfg);
1da177e4
LT
2295}
2296
339bf98f
TG
2297static inline size_t rt6_nlmsg_size(void)
2298{
2299 return NLMSG_ALIGN(sizeof(struct rtmsg))
2300 + nla_total_size(16) /* RTA_SRC */
2301 + nla_total_size(16) /* RTA_DST */
2302 + nla_total_size(16) /* RTA_GATEWAY */
2303 + nla_total_size(16) /* RTA_PREFSRC */
2304 + nla_total_size(4) /* RTA_TABLE */
2305 + nla_total_size(4) /* RTA_IIF */
2306 + nla_total_size(4) /* RTA_OIF */
2307 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2308 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2309 + nla_total_size(sizeof(struct rta_cacheinfo));
2310}
2311
191cd582
BH
2312static int rt6_fill_node(struct net *net,
2313 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2314 struct in6_addr *dst, struct in6_addr *src,
2315 int iif, int type, u32 pid, u32 seq,
7bc570c8 2316 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2317{
2318 struct rtmsg *rtm;
2d7202bf 2319 struct nlmsghdr *nlh;
e3703b3d 2320 long expires;
9e762a4a 2321 u32 table;
1da177e4
LT
2322
2323 if (prefix) { /* user wants prefix routes only */
2324 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2325 /* success since this is not a prefix route */
2326 return 1;
2327 }
2328 }
2329
2d7202bf
TG
2330 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2331 if (nlh == NULL)
26932566 2332 return -EMSGSIZE;
2d7202bf
TG
2333
2334 rtm = nlmsg_data(nlh);
1da177e4
LT
2335 rtm->rtm_family = AF_INET6;
2336 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2337 rtm->rtm_src_len = rt->rt6i_src.plen;
2338 rtm->rtm_tos = 0;
c71099ac 2339 if (rt->rt6i_table)
9e762a4a 2340 table = rt->rt6i_table->tb6_id;
c71099ac 2341 else
9e762a4a
PM
2342 table = RT6_TABLE_UNSPEC;
2343 rtm->rtm_table = table;
2d7202bf 2344 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2345 if (rt->rt6i_flags&RTF_REJECT)
2346 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2347 else if (rt->rt6i_flags&RTF_LOCAL)
2348 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2349 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2350 rtm->rtm_type = RTN_LOCAL;
2351 else
2352 rtm->rtm_type = RTN_UNICAST;
2353 rtm->rtm_flags = 0;
2354 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2355 rtm->rtm_protocol = rt->rt6i_protocol;
2356 if (rt->rt6i_flags&RTF_DYNAMIC)
2357 rtm->rtm_protocol = RTPROT_REDIRECT;
2358 else if (rt->rt6i_flags & RTF_ADDRCONF)
2359 rtm->rtm_protocol = RTPROT_KERNEL;
2360 else if (rt->rt6i_flags&RTF_DEFAULT)
2361 rtm->rtm_protocol = RTPROT_RA;
2362
2363 if (rt->rt6i_flags&RTF_CACHE)
2364 rtm->rtm_flags |= RTM_F_CLONED;
2365
2366 if (dst) {
2d7202bf 2367 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2368 rtm->rtm_dst_len = 128;
1da177e4 2369 } else if (rtm->rtm_dst_len)
2d7202bf 2370 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2371#ifdef CONFIG_IPV6_SUBTREES
2372 if (src) {
2d7202bf 2373 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2374 rtm->rtm_src_len = 128;
1da177e4 2375 } else if (rtm->rtm_src_len)
2d7202bf 2376 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2377#endif
7bc570c8
YH
2378 if (iif) {
2379#ifdef CONFIG_IPV6_MROUTE
2380 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2381 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2382 if (err <= 0) {
2383 if (!nowait) {
2384 if (err == 0)
2385 return 0;
2386 goto nla_put_failure;
2387 } else {
2388 if (err == -EMSGSIZE)
2389 goto nla_put_failure;
2390 }
2391 }
2392 } else
2393#endif
2394 NLA_PUT_U32(skb, RTA_IIF, iif);
2395 } else if (dst) {
1da177e4 2396 struct in6_addr saddr_buf;
c3968a85 2397 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2398 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2399 }
2d7202bf 2400
c3968a85
DW
2401 if (rt->rt6i_prefsrc.plen) {
2402 struct in6_addr saddr_buf;
2403 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2404 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2405 }
2406
defb3519 2407 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2408 goto nla_put_failure;
2409
d8d1f30b
CG
2410 if (rt->dst.neighbour)
2411 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2412
d8d1f30b 2413 if (rt->dst.dev)
2d7202bf
TG
2414 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2415
2416 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2417
36e3deae
YH
2418 if (!(rt->rt6i_flags & RTF_EXPIRES))
2419 expires = 0;
2420 else if (rt->rt6i_expires - jiffies < INT_MAX)
2421 expires = rt->rt6i_expires - jiffies;
2422 else
2423 expires = INT_MAX;
69cdf8f9 2424
d8d1f30b
CG
2425 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2426 expires, rt->dst.error) < 0)
e3703b3d 2427 goto nla_put_failure;
2d7202bf
TG
2428
2429 return nlmsg_end(skb, nlh);
2430
2431nla_put_failure:
26932566
PM
2432 nlmsg_cancel(skb, nlh);
2433 return -EMSGSIZE;
1da177e4
LT
2434}
2435
1b43af54 2436int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2437{
2438 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2439 int prefix;
2440
2d7202bf
TG
2441 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2442 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2443 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2444 } else
2445 prefix = 0;
2446
191cd582
BH
2447 return rt6_fill_node(arg->net,
2448 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2449 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2450 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2451}
2452
c127ea2c 2453static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2454{
3b1e0a65 2455 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2456 struct nlattr *tb[RTA_MAX+1];
2457 struct rt6_info *rt;
1da177e4 2458 struct sk_buff *skb;
ab364a6f 2459 struct rtmsg *rtm;
4c9483b2 2460 struct flowi6 fl6;
ab364a6f 2461 int err, iif = 0;
1da177e4 2462
ab364a6f
TG
2463 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2464 if (err < 0)
2465 goto errout;
1da177e4 2466
ab364a6f 2467 err = -EINVAL;
4c9483b2 2468 memset(&fl6, 0, sizeof(fl6));
1da177e4 2469
ab364a6f
TG
2470 if (tb[RTA_SRC]) {
2471 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2472 goto errout;
2473
4c9483b2 2474 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2475 }
2476
2477 if (tb[RTA_DST]) {
2478 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2479 goto errout;
2480
4c9483b2 2481 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2482 }
2483
2484 if (tb[RTA_IIF])
2485 iif = nla_get_u32(tb[RTA_IIF]);
2486
2487 if (tb[RTA_OIF])
4c9483b2 2488 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2489
2490 if (iif) {
2491 struct net_device *dev;
5578689a 2492 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2493 if (!dev) {
2494 err = -ENODEV;
ab364a6f 2495 goto errout;
1da177e4
LT
2496 }
2497 }
2498
ab364a6f
TG
2499 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2500 if (skb == NULL) {
2501 err = -ENOBUFS;
2502 goto errout;
2503 }
1da177e4 2504
ab364a6f
TG
2505 /* Reserve room for dummy headers, this skb can pass
2506 through good chunk of routing engine.
2507 */
459a98ed 2508 skb_reset_mac_header(skb);
ab364a6f 2509 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2510
4c9483b2 2511 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2512 skb_dst_set(skb, &rt->dst);
1da177e4 2513
4c9483b2 2514 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2515 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2516 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2517 if (err < 0) {
ab364a6f
TG
2518 kfree_skb(skb);
2519 goto errout;
1da177e4
LT
2520 }
2521
5578689a 2522 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2523errout:
1da177e4 2524 return err;
1da177e4
LT
2525}
2526
86872cb5 2527void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2528{
2529 struct sk_buff *skb;
5578689a 2530 struct net *net = info->nl_net;
528c4ceb
DL
2531 u32 seq;
2532 int err;
2533
2534 err = -ENOBUFS;
2535 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2536
339bf98f 2537 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2538 if (skb == NULL)
2539 goto errout;
2540
191cd582 2541 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2542 event, info->pid, seq, 0, 0, 0);
26932566
PM
2543 if (err < 0) {
2544 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2545 WARN_ON(err == -EMSGSIZE);
2546 kfree_skb(skb);
2547 goto errout;
2548 }
1ce85fe4
PNA
2549 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2550 info->nlh, gfp_any());
2551 return;
21713ebc
TG
2552errout:
2553 if (err < 0)
5578689a 2554 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2555}
2556
8ed67789
DL
2557static int ip6_route_dev_notify(struct notifier_block *this,
2558 unsigned long event, void *data)
2559{
2560 struct net_device *dev = (struct net_device *)data;
c346dca1 2561 struct net *net = dev_net(dev);
8ed67789
DL
2562
2563 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2564 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2565 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2566#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2567 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2568 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2569 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2570 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2571#endif
2572 }
2573
2574 return NOTIFY_OK;
2575}
2576
1da177e4
LT
2577/*
2578 * /proc
2579 */
2580
2581#ifdef CONFIG_PROC_FS
2582
1da177e4
LT
2583struct rt6_proc_arg
2584{
2585 char *buffer;
2586 int offset;
2587 int length;
2588 int skip;
2589 int len;
2590};
2591
2592static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2593{
33120b30 2594 struct seq_file *m = p_arg;
1da177e4 2595
4b7a4274 2596 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2597
2598#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2599 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2600#else
33120b30 2601 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2602#endif
2603
2604 if (rt->rt6i_nexthop) {
4b7a4274 2605 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2606 } else {
33120b30 2607 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2608 }
33120b30 2609 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2610 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2611 rt->dst.__use, rt->rt6i_flags,
33120b30 2612 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2613 return 0;
2614}
2615
33120b30 2616static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2617{
f3db4851
DL
2618 struct net *net = (struct net *)m->private;
2619 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2620 return 0;
2621}
1da177e4 2622
33120b30
AD
2623static int ipv6_route_open(struct inode *inode, struct file *file)
2624{
de05c557 2625 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2626}
2627
33120b30
AD
2628static const struct file_operations ipv6_route_proc_fops = {
2629 .owner = THIS_MODULE,
2630 .open = ipv6_route_open,
2631 .read = seq_read,
2632 .llseek = seq_lseek,
b6fcbdb4 2633 .release = single_release_net,
33120b30
AD
2634};
2635
1da177e4
LT
2636static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2637{
69ddb805 2638 struct net *net = (struct net *)seq->private;
1da177e4 2639 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2640 net->ipv6.rt6_stats->fib_nodes,
2641 net->ipv6.rt6_stats->fib_route_nodes,
2642 net->ipv6.rt6_stats->fib_rt_alloc,
2643 net->ipv6.rt6_stats->fib_rt_entries,
2644 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2645 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2646 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2647
2648 return 0;
2649}
2650
2651static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2652{
de05c557 2653 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2654}
2655
9a32144e 2656static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2657 .owner = THIS_MODULE,
2658 .open = rt6_stats_seq_open,
2659 .read = seq_read,
2660 .llseek = seq_lseek,
b6fcbdb4 2661 .release = single_release_net,
1da177e4
LT
2662};
2663#endif /* CONFIG_PROC_FS */
2664
2665#ifdef CONFIG_SYSCTL
2666
1da177e4 2667static
8d65af78 2668int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2669 void __user *buffer, size_t *lenp, loff_t *ppos)
2670{
c486da34
LAG
2671 struct net *net;
2672 int delay;
2673 if (!write)
1da177e4 2674 return -EINVAL;
c486da34
LAG
2675
2676 net = (struct net *)ctl->extra1;
2677 delay = net->ipv6.sysctl.flush_delay;
2678 proc_dointvec(ctl, write, buffer, lenp, ppos);
2679 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2680 return 0;
1da177e4
LT
2681}
2682
760f2d01 2683ctl_table ipv6_route_table_template[] = {
1ab1457c 2684 {
1da177e4 2685 .procname = "flush",
4990509f 2686 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2687 .maxlen = sizeof(int),
89c8b3a1 2688 .mode = 0200,
6d9f239a 2689 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2690 },
2691 {
1da177e4 2692 .procname = "gc_thresh",
9a7ec3a9 2693 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2694 .maxlen = sizeof(int),
2695 .mode = 0644,
6d9f239a 2696 .proc_handler = proc_dointvec,
1da177e4
LT
2697 },
2698 {
1da177e4 2699 .procname = "max_size",
4990509f 2700 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2701 .maxlen = sizeof(int),
2702 .mode = 0644,
6d9f239a 2703 .proc_handler = proc_dointvec,
1da177e4
LT
2704 },
2705 {
1da177e4 2706 .procname = "gc_min_interval",
4990509f 2707 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2708 .maxlen = sizeof(int),
2709 .mode = 0644,
6d9f239a 2710 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2711 },
2712 {
1da177e4 2713 .procname = "gc_timeout",
4990509f 2714 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2715 .maxlen = sizeof(int),
2716 .mode = 0644,
6d9f239a 2717 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2718 },
2719 {
1da177e4 2720 .procname = "gc_interval",
4990509f 2721 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2722 .maxlen = sizeof(int),
2723 .mode = 0644,
6d9f239a 2724 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2725 },
2726 {
1da177e4 2727 .procname = "gc_elasticity",
4990509f 2728 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2729 .maxlen = sizeof(int),
2730 .mode = 0644,
f3d3f616 2731 .proc_handler = proc_dointvec,
1da177e4
LT
2732 },
2733 {
1da177e4 2734 .procname = "mtu_expires",
4990509f 2735 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2736 .maxlen = sizeof(int),
2737 .mode = 0644,
6d9f239a 2738 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2739 },
2740 {
1da177e4 2741 .procname = "min_adv_mss",
4990509f 2742 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2743 .maxlen = sizeof(int),
2744 .mode = 0644,
f3d3f616 2745 .proc_handler = proc_dointvec,
1da177e4
LT
2746 },
2747 {
1da177e4 2748 .procname = "gc_min_interval_ms",
4990509f 2749 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2750 .maxlen = sizeof(int),
2751 .mode = 0644,
6d9f239a 2752 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2753 },
f8572d8f 2754 { }
1da177e4
LT
2755};
2756
2c8c1e72 2757struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2758{
2759 struct ctl_table *table;
2760
2761 table = kmemdup(ipv6_route_table_template,
2762 sizeof(ipv6_route_table_template),
2763 GFP_KERNEL);
5ee09105
YH
2764
2765 if (table) {
2766 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2767 table[0].extra1 = net;
86393e52 2768 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2769 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2770 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2771 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2772 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2773 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2774 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2775 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2776 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2777 }
2778
760f2d01
DL
2779 return table;
2780}
1da177e4
LT
2781#endif
2782
2c8c1e72 2783static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2784{
633d424b 2785 int ret = -ENOMEM;
8ed67789 2786
86393e52
AD
2787 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2788 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2789
fc66f95c
ED
2790 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2791 goto out_ip6_dst_ops;
2792
8ed67789
DL
2793 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2794 sizeof(*net->ipv6.ip6_null_entry),
2795 GFP_KERNEL);
2796 if (!net->ipv6.ip6_null_entry)
fc66f95c 2797 goto out_ip6_dst_entries;
d8d1f30b 2798 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2799 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2800 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2801 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2802 ip6_template_metrics, true);
8ed67789
DL
2803
2804#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2805 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2806 sizeof(*net->ipv6.ip6_prohibit_entry),
2807 GFP_KERNEL);
68fffc67
PZ
2808 if (!net->ipv6.ip6_prohibit_entry)
2809 goto out_ip6_null_entry;
d8d1f30b 2810 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2811 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2812 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2813 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2814 ip6_template_metrics, true);
8ed67789
DL
2815
2816 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2817 sizeof(*net->ipv6.ip6_blk_hole_entry),
2818 GFP_KERNEL);
68fffc67
PZ
2819 if (!net->ipv6.ip6_blk_hole_entry)
2820 goto out_ip6_prohibit_entry;
d8d1f30b 2821 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2822 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2823 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2824 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2825 ip6_template_metrics, true);
8ed67789
DL
2826#endif
2827
b339a47c
PZ
2828 net->ipv6.sysctl.flush_delay = 0;
2829 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2830 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2831 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2832 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2833 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2834 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2835 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2836
cdb18761
DL
2837#ifdef CONFIG_PROC_FS
2838 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2839 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2840#endif
6891a346
BT
2841 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2842
8ed67789
DL
2843 ret = 0;
2844out:
2845 return ret;
f2fc6a54 2846
68fffc67
PZ
2847#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2848out_ip6_prohibit_entry:
2849 kfree(net->ipv6.ip6_prohibit_entry);
2850out_ip6_null_entry:
2851 kfree(net->ipv6.ip6_null_entry);
2852#endif
fc66f95c
ED
2853out_ip6_dst_entries:
2854 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2855out_ip6_dst_ops:
f2fc6a54 2856 goto out;
cdb18761
DL
2857}
2858
2c8c1e72 2859static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2860{
2861#ifdef CONFIG_PROC_FS
2862 proc_net_remove(net, "ipv6_route");
2863 proc_net_remove(net, "rt6_stats");
2864#endif
8ed67789
DL
2865 kfree(net->ipv6.ip6_null_entry);
2866#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2867 kfree(net->ipv6.ip6_prohibit_entry);
2868 kfree(net->ipv6.ip6_blk_hole_entry);
2869#endif
41bb78b4 2870 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2871}
2872
2873static struct pernet_operations ip6_route_net_ops = {
2874 .init = ip6_route_net_init,
2875 .exit = ip6_route_net_exit,
2876};
2877
8ed67789
DL
2878static struct notifier_block ip6_route_dev_notifier = {
2879 .notifier_call = ip6_route_dev_notify,
2880 .priority = 0,
2881};
2882
433d49c3 2883int __init ip6_route_init(void)
1da177e4 2884{
433d49c3
DL
2885 int ret;
2886
9a7ec3a9
DL
2887 ret = -ENOMEM;
2888 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2889 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2890 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2891 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2892 goto out;
14e50e57 2893
fc66f95c 2894 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2895 if (ret)
bdb3289f 2896 goto out_kmem_cache;
bdb3289f 2897
fc66f95c
ED
2898 ret = register_pernet_subsys(&ip6_route_net_ops);
2899 if (ret)
2900 goto out_dst_entries;
2901
5dc121e9
AE
2902 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2903
8ed67789
DL
2904 /* Registering of the loopback is done before this portion of code,
2905 * the loopback reference in rt6_info will not be taken, do it
2906 * manually for init_net */
d8d1f30b 2907 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2908 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2909 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2910 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2911 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2912 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2913 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2914 #endif
433d49c3
DL
2915 ret = fib6_init();
2916 if (ret)
8ed67789 2917 goto out_register_subsys;
433d49c3 2918
433d49c3
DL
2919 ret = xfrm6_init();
2920 if (ret)
cdb18761 2921 goto out_fib6_init;
c35b7e72 2922
433d49c3
DL
2923 ret = fib6_rules_init();
2924 if (ret)
2925 goto xfrm6_init;
7e5449c2 2926
433d49c3
DL
2927 ret = -ENOBUFS;
2928 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2929 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2930 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2931 goto fib6_rules_init;
c127ea2c 2932
8ed67789 2933 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2934 if (ret)
2935 goto fib6_rules_init;
8ed67789 2936
433d49c3
DL
2937out:
2938 return ret;
2939
2940fib6_rules_init:
433d49c3
DL
2941 fib6_rules_cleanup();
2942xfrm6_init:
433d49c3 2943 xfrm6_fini();
433d49c3 2944out_fib6_init:
433d49c3 2945 fib6_gc_cleanup();
8ed67789
DL
2946out_register_subsys:
2947 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2948out_dst_entries:
2949 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2950out_kmem_cache:
f2fc6a54 2951 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2952 goto out;
1da177e4
LT
2953}
2954
2955void ip6_route_cleanup(void)
2956{
8ed67789 2957 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2958 fib6_rules_cleanup();
1da177e4 2959 xfrm6_fini();
1da177e4 2960 fib6_gc_cleanup();
8ed67789 2961 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2962 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2963 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2964}