bnx2x: Update bnx2x version to 1.62.00-5
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
0dbaee3b 106 .default_advmss = ip6_default_advmss,
d33e4553 107 .default_mtu = ip6_default_mtu,
1da177e4
LT
108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 113 .local_out = __ip6_local_out,
1da177e4
LT
114};
115
14e50e57
DM
116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
09640e63 122 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
126};
127
bdb3289f 128static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
d8d1f30b
CG
134 .input = ip6_pkt_discard,
135 .output = ip6_pkt_discard_out,
1da177e4
LT
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 138 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
101367c2
TG
143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
6723ab54
DM
145static int ip6_pkt_prohibit(struct sk_buff *skb);
146static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 147
280a34c8 148static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
149 .dst = {
150 .__refcnt = ATOMIC_INIT(1),
151 .__use = 1,
152 .obsolete = -1,
153 .error = -EACCES,
d8d1f30b
CG
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
101367c2
TG
156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 158 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
bdb3289f 163static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
d8d1f30b
CG
169 .input = dst_discard,
170 .output = dst_discard,
101367c2
TG
171 },
172 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 173 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
174 .rt6i_metric = ~(u32) 0,
175 .rt6i_ref = ATOMIC_INIT(1),
176};
177
178#endif
179
1da177e4 180/* allocate dst with ip6_dst_ops */
f2fc6a54 181static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 182{
f2fc6a54 183 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
184}
185
186static void ip6_dst_destroy(struct dst_entry *dst)
187{
188 struct rt6_info *rt = (struct rt6_info *)dst;
189 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 190 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
1ab1457c 195 }
b3419363 196 if (peer) {
b3419363
DM
197 rt->rt6i_peer = NULL;
198 inet_putpeer(peer);
199 }
200}
201
202void rt6_bind_peer(struct rt6_info *rt, int create)
203{
204 struct inet_peer *peer;
205
b3419363
DM
206 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
207 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
208 inet_putpeer(peer);
1da177e4
LT
209}
210
211static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
212 int how)
213{
214 struct rt6_info *rt = (struct rt6_info *)dst;
215 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 216 struct net_device *loopback_dev =
c346dca1 217 dev_net(dev)->loopback_dev;
1da177e4 218
5a3e55d6
DL
219 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
220 struct inet6_dev *loopback_idev =
221 in6_dev_get(loopback_dev);
1da177e4
LT
222 if (loopback_idev != NULL) {
223 rt->rt6i_idev = loopback_idev;
224 in6_dev_put(idev);
225 }
226 }
227}
228
229static __inline__ int rt6_check_expired(const struct rt6_info *rt)
230{
a02cec21
ED
231 return (rt->rt6i_flags & RTF_EXPIRES) &&
232 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
233}
234
c71099ac
TG
235static inline int rt6_need_strict(struct in6_addr *daddr)
236{
a02cec21
ED
237 return ipv6_addr_type(daddr) &
238 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
239}
240
1da177e4 241/*
c71099ac 242 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
243 */
244
8ed67789
DL
245static inline struct rt6_info *rt6_device_match(struct net *net,
246 struct rt6_info *rt,
dd3abc4e 247 struct in6_addr *saddr,
1da177e4 248 int oif,
d420895e 249 int flags)
1da177e4
LT
250{
251 struct rt6_info *local = NULL;
252 struct rt6_info *sprt;
253
dd3abc4e
YH
254 if (!oif && ipv6_addr_any(saddr))
255 goto out;
256
d8d1f30b 257 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
258 struct net_device *dev = sprt->rt6i_dev;
259
260 if (oif) {
1da177e4
LT
261 if (dev->ifindex == oif)
262 return sprt;
263 if (dev->flags & IFF_LOOPBACK) {
264 if (sprt->rt6i_idev == NULL ||
265 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 266 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 267 continue;
1ab1457c 268 if (local && (!oif ||
1da177e4
LT
269 local->rt6i_idev->dev->ifindex == oif))
270 continue;
271 }
272 local = sprt;
273 }
dd3abc4e
YH
274 } else {
275 if (ipv6_chk_addr(net, saddr, dev,
276 flags & RT6_LOOKUP_F_IFACE))
277 return sprt;
1da177e4 278 }
dd3abc4e 279 }
1da177e4 280
dd3abc4e 281 if (oif) {
1da177e4
LT
282 if (local)
283 return local;
284
d420895e 285 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 286 return net->ipv6.ip6_null_entry;
1da177e4 287 }
dd3abc4e 288out:
1da177e4
LT
289 return rt;
290}
291
27097255
YH
292#ifdef CONFIG_IPV6_ROUTER_PREF
293static void rt6_probe(struct rt6_info *rt)
294{
295 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
296 /*
297 * Okay, this does not seem to be appropriate
298 * for now, however, we need to check if it
299 * is really so; aka Router Reachability Probing.
300 *
301 * Router Reachability Probe MUST be rate-limited
302 * to no more than one per minute.
303 */
304 if (!neigh || (neigh->nud_state & NUD_VALID))
305 return;
306 read_lock_bh(&neigh->lock);
307 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 308 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
309 struct in6_addr mcaddr;
310 struct in6_addr *target;
311
312 neigh->updated = jiffies;
313 read_unlock_bh(&neigh->lock);
314
315 target = (struct in6_addr *)&neigh->primary_key;
316 addrconf_addr_solict_mult(target, &mcaddr);
317 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
318 } else
319 read_unlock_bh(&neigh->lock);
320}
321#else
322static inline void rt6_probe(struct rt6_info *rt)
323{
27097255
YH
324}
325#endif
326
1da177e4 327/*
554cfb7e 328 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 329 */
b6f99a21 330static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
331{
332 struct net_device *dev = rt->rt6i_dev;
161980f4 333 if (!oif || dev->ifindex == oif)
554cfb7e 334 return 2;
161980f4
DM
335 if ((dev->flags & IFF_LOOPBACK) &&
336 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
337 return 1;
338 return 0;
554cfb7e 339}
1da177e4 340
b6f99a21 341static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 342{
554cfb7e 343 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 344 int m;
4d0c5911
YH
345 if (rt->rt6i_flags & RTF_NONEXTHOP ||
346 !(rt->rt6i_flags & RTF_GATEWAY))
347 m = 1;
348 else if (neigh) {
554cfb7e
YH
349 read_lock_bh(&neigh->lock);
350 if (neigh->nud_state & NUD_VALID)
4d0c5911 351 m = 2;
398bcbeb
YH
352#ifdef CONFIG_IPV6_ROUTER_PREF
353 else if (neigh->nud_state & NUD_FAILED)
354 m = 0;
355#endif
356 else
ea73ee23 357 m = 1;
554cfb7e 358 read_unlock_bh(&neigh->lock);
398bcbeb
YH
359 } else
360 m = 0;
554cfb7e 361 return m;
1da177e4
LT
362}
363
554cfb7e
YH
364static int rt6_score_route(struct rt6_info *rt, int oif,
365 int strict)
1da177e4 366{
4d0c5911 367 int m, n;
1ab1457c 368
4d0c5911 369 m = rt6_check_dev(rt, oif);
77d16f45 370 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 371 return -1;
ebacaaa0
YH
372#ifdef CONFIG_IPV6_ROUTER_PREF
373 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
374#endif
4d0c5911 375 n = rt6_check_neigh(rt);
557e92ef 376 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
377 return -1;
378 return m;
379}
380
f11e6659
DM
381static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
382 int *mpri, struct rt6_info *match)
554cfb7e 383{
f11e6659
DM
384 int m;
385
386 if (rt6_check_expired(rt))
387 goto out;
388
389 m = rt6_score_route(rt, oif, strict);
390 if (m < 0)
391 goto out;
392
393 if (m > *mpri) {
394 if (strict & RT6_LOOKUP_F_REACHABLE)
395 rt6_probe(match);
396 *mpri = m;
397 match = rt;
398 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
399 rt6_probe(rt);
400 }
401
402out:
403 return match;
404}
405
406static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
407 struct rt6_info *rr_head,
408 u32 metric, int oif, int strict)
409{
410 struct rt6_info *rt, *match;
554cfb7e 411 int mpri = -1;
1da177e4 412
f11e6659
DM
413 match = NULL;
414 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 415 rt = rt->dst.rt6_next)
f11e6659
DM
416 match = find_match(rt, oif, strict, &mpri, match);
417 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 418 rt = rt->dst.rt6_next)
f11e6659 419 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 420
f11e6659
DM
421 return match;
422}
1da177e4 423
f11e6659
DM
424static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
425{
426 struct rt6_info *match, *rt0;
8ed67789 427 struct net *net;
1da177e4 428
f11e6659 429 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 430 __func__, fn->leaf, oif);
554cfb7e 431
f11e6659
DM
432 rt0 = fn->rr_ptr;
433 if (!rt0)
434 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 435
f11e6659 436 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 437
554cfb7e 438 if (!match &&
f11e6659 439 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 440 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 441
554cfb7e 442 /* no entries matched; do round-robin */
f11e6659
DM
443 if (!next || next->rt6i_metric != rt0->rt6i_metric)
444 next = fn->leaf;
445
446 if (next != rt0)
447 fn->rr_ptr = next;
1da177e4 448 }
1da177e4 449
f11e6659 450 RT6_TRACE("%s() => %p\n",
0dc47877 451 __func__, match);
1da177e4 452
c346dca1 453 net = dev_net(rt0->rt6i_dev);
a02cec21 454 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
455}
456
70ceb4f5
YH
457#ifdef CONFIG_IPV6_ROUTE_INFO
458int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
459 struct in6_addr *gwaddr)
460{
c346dca1 461 struct net *net = dev_net(dev);
70ceb4f5
YH
462 struct route_info *rinfo = (struct route_info *) opt;
463 struct in6_addr prefix_buf, *prefix;
464 unsigned int pref;
4bed72e4 465 unsigned long lifetime;
70ceb4f5
YH
466 struct rt6_info *rt;
467
468 if (len < sizeof(struct route_info)) {
469 return -EINVAL;
470 }
471
472 /* Sanity check for prefix_len and length */
473 if (rinfo->length > 3) {
474 return -EINVAL;
475 } else if (rinfo->prefix_len > 128) {
476 return -EINVAL;
477 } else if (rinfo->prefix_len > 64) {
478 if (rinfo->length < 2) {
479 return -EINVAL;
480 }
481 } else if (rinfo->prefix_len > 0) {
482 if (rinfo->length < 1) {
483 return -EINVAL;
484 }
485 }
486
487 pref = rinfo->route_pref;
488 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 489 return -EINVAL;
70ceb4f5 490
4bed72e4 491 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
492
493 if (rinfo->length == 3)
494 prefix = (struct in6_addr *)rinfo->prefix;
495 else {
496 /* this function is safe */
497 ipv6_addr_prefix(&prefix_buf,
498 (struct in6_addr *)rinfo->prefix,
499 rinfo->prefix_len);
500 prefix = &prefix_buf;
501 }
502
efa2cea0
DL
503 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
504 dev->ifindex);
70ceb4f5
YH
505
506 if (rt && !lifetime) {
e0a1ad73 507 ip6_del_rt(rt);
70ceb4f5
YH
508 rt = NULL;
509 }
510
511 if (!rt && lifetime)
efa2cea0 512 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
513 pref);
514 else if (rt)
515 rt->rt6i_flags = RTF_ROUTEINFO |
516 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
517
518 if (rt) {
4bed72e4 519 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
520 rt->rt6i_flags &= ~RTF_EXPIRES;
521 } else {
522 rt->rt6i_expires = jiffies + HZ * lifetime;
523 rt->rt6i_flags |= RTF_EXPIRES;
524 }
d8d1f30b 525 dst_release(&rt->dst);
70ceb4f5
YH
526 }
527 return 0;
528}
529#endif
530
8ed67789 531#define BACKTRACK(__net, saddr) \
982f56f3 532do { \
8ed67789 533 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 534 struct fib6_node *pn; \
e0eda7bb 535 while (1) { \
982f56f3
YH
536 if (fn->fn_flags & RTN_TL_ROOT) \
537 goto out; \
538 pn = fn->parent; \
539 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 540 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
541 else \
542 fn = pn; \
543 if (fn->fn_flags & RTN_RTINFO) \
544 goto restart; \
c71099ac 545 } \
c71099ac 546 } \
982f56f3 547} while(0)
c71099ac 548
8ed67789
DL
549static struct rt6_info *ip6_pol_route_lookup(struct net *net,
550 struct fib6_table *table,
c71099ac 551 struct flowi *fl, int flags)
1da177e4
LT
552{
553 struct fib6_node *fn;
554 struct rt6_info *rt;
555
c71099ac
TG
556 read_lock_bh(&table->tb6_lock);
557 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
558restart:
559 rt = fn->leaf;
dd3abc4e 560 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 561 BACKTRACK(net, &fl->fl6_src);
c71099ac 562out:
d8d1f30b 563 dst_use(&rt->dst, jiffies);
c71099ac 564 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
565 return rt;
566
567}
568
9acd9f3a
YH
569struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
570 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
571{
572 struct flowi fl = {
573 .oif = oif,
5811662b 574 .fl6_dst = *daddr,
c71099ac
TG
575 };
576 struct dst_entry *dst;
77d16f45 577 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 578
adaa70bb
TG
579 if (saddr) {
580 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
581 flags |= RT6_LOOKUP_F_HAS_SADDR;
582 }
583
606a2b48 584 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
585 if (dst->error == 0)
586 return (struct rt6_info *) dst;
587
588 dst_release(dst);
589
1da177e4
LT
590 return NULL;
591}
592
7159039a
YH
593EXPORT_SYMBOL(rt6_lookup);
594
c71099ac 595/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
596 It takes new route entry, the addition fails by any reason the
597 route is freed. In any case, if caller does not hold it, it may
598 be destroyed.
599 */
600
86872cb5 601static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
602{
603 int err;
c71099ac 604 struct fib6_table *table;
1da177e4 605
c71099ac
TG
606 table = rt->rt6i_table;
607 write_lock_bh(&table->tb6_lock);
86872cb5 608 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 609 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
610
611 return err;
612}
613
40e22e8f
TG
614int ip6_ins_rt(struct rt6_info *rt)
615{
4d1169c1 616 struct nl_info info = {
c346dca1 617 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 618 };
528c4ceb 619 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
620}
621
95a9a5ba
YH
622static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
623 struct in6_addr *saddr)
1da177e4 624{
1da177e4
LT
625 struct rt6_info *rt;
626
627 /*
628 * Clone the route.
629 */
630
631 rt = ip6_rt_copy(ort);
632
633 if (rt) {
14deae41
DM
634 struct neighbour *neigh;
635 int attempts = !in_softirq();
636
58c4fb86
YH
637 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
638 if (rt->rt6i_dst.plen != 128 &&
639 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
640 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 641 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 642 }
1da177e4 643
58c4fb86 644 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
645 rt->rt6i_dst.plen = 128;
646 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 647 rt->dst.flags |= DST_HOST;
1da177e4
LT
648
649#ifdef CONFIG_IPV6_SUBTREES
650 if (rt->rt6i_src.plen && saddr) {
651 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
652 rt->rt6i_src.plen = 128;
653 }
654#endif
655
14deae41
DM
656 retry:
657 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
658 if (IS_ERR(neigh)) {
659 struct net *net = dev_net(rt->rt6i_dev);
660 int saved_rt_min_interval =
661 net->ipv6.sysctl.ip6_rt_gc_min_interval;
662 int saved_rt_elasticity =
663 net->ipv6.sysctl.ip6_rt_gc_elasticity;
664
665 if (attempts-- > 0) {
666 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
667 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
668
86393e52 669 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
670
671 net->ipv6.sysctl.ip6_rt_gc_elasticity =
672 saved_rt_elasticity;
673 net->ipv6.sysctl.ip6_rt_gc_min_interval =
674 saved_rt_min_interval;
675 goto retry;
676 }
677
678 if (net_ratelimit())
679 printk(KERN_WARNING
7e1b33e5 680 "ipv6: Neighbour table overflow.\n");
d8d1f30b 681 dst_free(&rt->dst);
14deae41
DM
682 return NULL;
683 }
684 rt->rt6i_nexthop = neigh;
1da177e4 685
95a9a5ba 686 }
1da177e4 687
95a9a5ba
YH
688 return rt;
689}
1da177e4 690
299d9939
YH
691static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
692{
693 struct rt6_info *rt = ip6_rt_copy(ort);
694 if (rt) {
695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 698 rt->dst.flags |= DST_HOST;
299d9939
YH
699 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
700 }
701 return rt;
702}
703
8ed67789
DL
704static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
705 struct flowi *fl, int flags)
1da177e4
LT
706{
707 struct fib6_node *fn;
519fbd87 708 struct rt6_info *rt, *nrt;
c71099ac 709 int strict = 0;
1da177e4 710 int attempts = 3;
519fbd87 711 int err;
53b7997f 712 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 713
77d16f45 714 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
715
716relookup:
c71099ac 717 read_lock_bh(&table->tb6_lock);
1da177e4 718
8238dd06 719restart_2:
c71099ac 720 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
721
722restart:
4acad72d 723 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
724
725 BACKTRACK(net, &fl->fl6_src);
726 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 727 rt->rt6i_flags & RTF_CACHE)
1ddef044 728 goto out;
1da177e4 729
d8d1f30b 730 dst_hold(&rt->dst);
c71099ac 731 read_unlock_bh(&table->tb6_lock);
fb9de91e 732
519fbd87 733 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 734 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 735 else
c71099ac 736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 737
d8d1f30b 738 dst_release(&rt->dst);
8ed67789 739 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 740
d8d1f30b 741 dst_hold(&rt->dst);
519fbd87 742 if (nrt) {
40e22e8f 743 err = ip6_ins_rt(nrt);
519fbd87 744 if (!err)
1da177e4 745 goto out2;
1da177e4 746 }
1da177e4 747
519fbd87
YH
748 if (--attempts <= 0)
749 goto out2;
750
751 /*
c71099ac 752 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
753 * released someone could insert this route. Relookup.
754 */
d8d1f30b 755 dst_release(&rt->dst);
519fbd87
YH
756 goto relookup;
757
758out:
8238dd06
YH
759 if (reachable) {
760 reachable = 0;
761 goto restart_2;
762 }
d8d1f30b 763 dst_hold(&rt->dst);
c71099ac 764 read_unlock_bh(&table->tb6_lock);
1da177e4 765out2:
d8d1f30b
CG
766 rt->dst.lastuse = jiffies;
767 rt->dst.__use++;
c71099ac
TG
768
769 return rt;
1da177e4
LT
770}
771
8ed67789 772static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
773 struct flowi *fl, int flags)
774{
8ed67789 775 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
776}
777
c71099ac
TG
778void ip6_route_input(struct sk_buff *skb)
779{
0660e03f 780 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 781 struct net *net = dev_net(skb->dev);
adaa70bb 782 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
783 struct flowi fl = {
784 .iif = skb->dev->ifindex,
5811662b
CG
785 .fl6_dst = iph->daddr,
786 .fl6_src = iph->saddr,
787 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 788 .mark = skb->mark,
c71099ac
TG
789 .proto = iph->nexthdr,
790 };
adaa70bb 791
1d6e55f1 792 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 793 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 794
adf30907 795 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
796}
797
8ed67789 798static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 799 struct flowi *fl, int flags)
1da177e4 800{
8ed67789 801 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
802}
803
4591db4f
DL
804struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
805 struct flowi *fl)
c71099ac
TG
806{
807 int flags = 0;
808
6057fd78 809 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 810 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 811
adaa70bb
TG
812 if (!ipv6_addr_any(&fl->fl6_src))
813 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
814 else if (sk)
815 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 816
4591db4f 817 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
818}
819
7159039a 820EXPORT_SYMBOL(ip6_route_output);
1da177e4 821
14e50e57
DM
822int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
823{
824 struct rt6_info *ort = (struct rt6_info *) *dstp;
825 struct rt6_info *rt = (struct rt6_info *)
826 dst_alloc(&ip6_dst_blackhole_ops);
827 struct dst_entry *new = NULL;
828
829 if (rt) {
d8d1f30b 830 new = &rt->dst;
14e50e57
DM
831
832 atomic_set(&new->__refcnt, 1);
833 new->__use = 1;
352e512c
HX
834 new->input = dst_discard;
835 new->output = dst_discard;
14e50e57 836
defb3519 837 dst_copy_metrics(new, &ort->dst);
d8d1f30b 838 new->dev = ort->dst.dev;
14e50e57
DM
839 if (new->dev)
840 dev_hold(new->dev);
841 rt->rt6i_idev = ort->rt6i_idev;
842 if (rt->rt6i_idev)
843 in6_dev_hold(rt->rt6i_idev);
844 rt->rt6i_expires = 0;
845
846 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
847 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
848 rt->rt6i_metric = 0;
849
850 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
851#ifdef CONFIG_IPV6_SUBTREES
852 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
853#endif
854
855 dst_free(new);
856 }
857
858 dst_release(*dstp);
859 *dstp = new;
a02cec21 860 return new ? 0 : -ENOMEM;
14e50e57
DM
861}
862EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
863
1da177e4
LT
864/*
865 * Destination cache support functions
866 */
867
868static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
869{
870 struct rt6_info *rt;
871
872 rt = (struct rt6_info *) dst;
873
10414444 874 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
875 return dst;
876
877 return NULL;
878}
879
880static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
881{
882 struct rt6_info *rt = (struct rt6_info *) dst;
883
884 if (rt) {
54c1a859
YH
885 if (rt->rt6i_flags & RTF_CACHE) {
886 if (rt6_check_expired(rt)) {
887 ip6_del_rt(rt);
888 dst = NULL;
889 }
890 } else {
1da177e4 891 dst_release(dst);
54c1a859
YH
892 dst = NULL;
893 }
1da177e4 894 }
54c1a859 895 return dst;
1da177e4
LT
896}
897
898static void ip6_link_failure(struct sk_buff *skb)
899{
900 struct rt6_info *rt;
901
3ffe533c 902 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 903
adf30907 904 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
905 if (rt) {
906 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 907 dst_set_expires(&rt->dst, 0);
1da177e4
LT
908 rt->rt6i_flags |= RTF_EXPIRES;
909 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
910 rt->rt6i_node->fn_sernum = -1;
911 }
912}
913
914static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
915{
916 struct rt6_info *rt6 = (struct rt6_info*)dst;
917
918 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
919 rt6->rt6i_flags |= RTF_MODIFIED;
920 if (mtu < IPV6_MIN_MTU) {
defb3519 921 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 922 mtu = IPV6_MIN_MTU;
defb3519
DM
923 features |= RTAX_FEATURE_ALLFRAG;
924 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 925 }
defb3519 926 dst_metric_set(dst, RTAX_MTU, mtu);
8d71740c 927 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
928 }
929}
930
0dbaee3b 931static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 932{
0dbaee3b
DM
933 struct net_device *dev = dst->dev;
934 unsigned int mtu = dst_mtu(dst);
935 struct net *net = dev_net(dev);
936
1da177e4
LT
937 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
938
5578689a
DL
939 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
940 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
941
942 /*
1ab1457c
YH
943 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
944 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
945 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
946 * rely only on pmtu discovery"
947 */
948 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
949 mtu = IPV6_MAXPLEN;
950 return mtu;
951}
952
d33e4553
DM
953static unsigned int ip6_default_mtu(const struct dst_entry *dst)
954{
955 unsigned int mtu = IPV6_MIN_MTU;
956 struct inet6_dev *idev;
957
958 rcu_read_lock();
959 idev = __in6_dev_get(dst->dev);
960 if (idev)
961 mtu = idev->cnf.mtu6;
962 rcu_read_unlock();
963
964 return mtu;
965}
966
3b00944c
YH
967static struct dst_entry *icmp6_dst_gc_list;
968static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 969
3b00944c 970struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 971 struct neighbour *neigh,
9acd9f3a 972 const struct in6_addr *addr)
1da177e4
LT
973{
974 struct rt6_info *rt;
975 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 976 struct net *net = dev_net(dev);
1da177e4
LT
977
978 if (unlikely(idev == NULL))
979 return NULL;
980
86393e52 981 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
982 if (unlikely(rt == NULL)) {
983 in6_dev_put(idev);
984 goto out;
985 }
986
987 dev_hold(dev);
988 if (neigh)
989 neigh_hold(neigh);
14deae41 990 else {
1da177e4 991 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
992 if (IS_ERR(neigh))
993 neigh = NULL;
994 }
1da177e4
LT
995
996 rt->rt6i_dev = dev;
997 rt->rt6i_idev = idev;
998 rt->rt6i_nexthop = neigh;
d8d1f30b 999 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1000 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1001 rt->dst.output = ip6_output;
1da177e4
LT
1002
1003#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1004 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1005 ? DST_HOST
1da177e4
LT
1006 : 0;
1007 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1008 rt->rt6i_dst.plen = 128;
1009#endif
1010
3b00944c 1011 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1012 rt->dst.next = icmp6_dst_gc_list;
1013 icmp6_dst_gc_list = &rt->dst;
3b00944c 1014 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1015
5578689a 1016 fib6_force_start_gc(net);
1da177e4
LT
1017
1018out:
d8d1f30b 1019 return &rt->dst;
1da177e4
LT
1020}
1021
3d0f24a7 1022int icmp6_dst_gc(void)
1da177e4
LT
1023{
1024 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1025 int more = 0;
1da177e4
LT
1026
1027 next = NULL;
5d0bbeeb 1028
3b00944c
YH
1029 spin_lock_bh(&icmp6_dst_lock);
1030 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1031
1da177e4
LT
1032 while ((dst = *pprev) != NULL) {
1033 if (!atomic_read(&dst->__refcnt)) {
1034 *pprev = dst->next;
1035 dst_free(dst);
1da177e4
LT
1036 } else {
1037 pprev = &dst->next;
3d0f24a7 1038 ++more;
1da177e4
LT
1039 }
1040 }
1041
3b00944c 1042 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1043
3d0f24a7 1044 return more;
1da177e4
LT
1045}
1046
1e493d19
DM
1047static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1048 void *arg)
1049{
1050 struct dst_entry *dst, **pprev;
1051
1052 spin_lock_bh(&icmp6_dst_lock);
1053 pprev = &icmp6_dst_gc_list;
1054 while ((dst = *pprev) != NULL) {
1055 struct rt6_info *rt = (struct rt6_info *) dst;
1056 if (func(rt, arg)) {
1057 *pprev = dst->next;
1058 dst_free(dst);
1059 } else {
1060 pprev = &dst->next;
1061 }
1062 }
1063 spin_unlock_bh(&icmp6_dst_lock);
1064}
1065
569d3645 1066static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1067{
1da177e4 1068 unsigned long now = jiffies;
86393e52 1069 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1070 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1071 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1072 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1073 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1074 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1075 int entries;
7019b78e 1076
fc66f95c 1077 entries = dst_entries_get_fast(ops);
7019b78e 1078 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1079 entries <= rt_max_size)
1da177e4
LT
1080 goto out;
1081
6891a346
BT
1082 net->ipv6.ip6_rt_gc_expire++;
1083 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1084 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1085 entries = dst_entries_get_slow(ops);
1086 if (entries < ops->gc_thresh)
7019b78e 1087 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1088out:
7019b78e 1089 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1090 return entries > rt_max_size;
1da177e4
LT
1091}
1092
1093/* Clean host part of a prefix. Not necessary in radix tree,
1094 but results in cleaner routing tables.
1095
1096 Remove it only when all the things will work!
1097 */
1098
6b75d090 1099int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1100{
5170ae82 1101 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1102 if (hoplimit == 0) {
6b75d090 1103 struct net_device *dev = dst->dev;
c68f24cc
ED
1104 struct inet6_dev *idev;
1105
1106 rcu_read_lock();
1107 idev = __in6_dev_get(dev);
1108 if (idev)
6b75d090 1109 hoplimit = idev->cnf.hop_limit;
c68f24cc 1110 else
53b7997f 1111 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1112 rcu_read_unlock();
1da177e4
LT
1113 }
1114 return hoplimit;
1115}
abbf46ae 1116EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1117
1118/*
1119 *
1120 */
1121
86872cb5 1122int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1123{
1124 int err;
5578689a 1125 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1126 struct rt6_info *rt = NULL;
1127 struct net_device *dev = NULL;
1128 struct inet6_dev *idev = NULL;
c71099ac 1129 struct fib6_table *table;
1da177e4
LT
1130 int addr_type;
1131
86872cb5 1132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1133 return -EINVAL;
1134#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1135 if (cfg->fc_src_len)
1da177e4
LT
1136 return -EINVAL;
1137#endif
86872cb5 1138 if (cfg->fc_ifindex) {
1da177e4 1139 err = -ENODEV;
5578689a 1140 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1141 if (!dev)
1142 goto out;
1143 idev = in6_dev_get(dev);
1144 if (!idev)
1145 goto out;
1146 }
1147
86872cb5
TG
1148 if (cfg->fc_metric == 0)
1149 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1150
5578689a 1151 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1152 if (table == NULL) {
1153 err = -ENOBUFS;
1154 goto out;
1155 }
1156
86393e52 1157 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1158
1159 if (rt == NULL) {
1160 err = -ENOMEM;
1161 goto out;
1162 }
1163
d8d1f30b 1164 rt->dst.obsolete = -1;
6f704992
YH
1165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1167 0;
1da177e4 1168
86872cb5
TG
1169 if (cfg->fc_protocol == RTPROT_UNSPEC)
1170 cfg->fc_protocol = RTPROT_BOOT;
1171 rt->rt6i_protocol = cfg->fc_protocol;
1172
1173 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1174
1175 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1176 rt->dst.input = ip6_mc_input;
ab79ad14
1177 else if (cfg->fc_flags & RTF_LOCAL)
1178 rt->dst.input = ip6_input;
1da177e4 1179 else
d8d1f30b 1180 rt->dst.input = ip6_forward;
1da177e4 1181
d8d1f30b 1182 rt->dst.output = ip6_output;
1da177e4 1183
86872cb5
TG
1184 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1185 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1186 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1187 rt->dst.flags = DST_HOST;
1da177e4
LT
1188
1189#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1190 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1191 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1192#endif
1193
86872cb5 1194 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1195
1196 /* We cannot add true routes via loopback here,
1197 they would result in kernel looping; promote them to reject routes
1198 */
86872cb5 1199 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1200 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1201 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1202 /* hold loopback dev/idev if we haven't done so. */
5578689a 1203 if (dev != net->loopback_dev) {
1da177e4
LT
1204 if (dev) {
1205 dev_put(dev);
1206 in6_dev_put(idev);
1207 }
5578689a 1208 dev = net->loopback_dev;
1da177e4
LT
1209 dev_hold(dev);
1210 idev = in6_dev_get(dev);
1211 if (!idev) {
1212 err = -ENODEV;
1213 goto out;
1214 }
1215 }
d8d1f30b
CG
1216 rt->dst.output = ip6_pkt_discard_out;
1217 rt->dst.input = ip6_pkt_discard;
1218 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1219 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1220 goto install_route;
1221 }
1222
86872cb5 1223 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1224 struct in6_addr *gw_addr;
1225 int gwa_type;
1226
86872cb5
TG
1227 gw_addr = &cfg->fc_gateway;
1228 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1229 gwa_type = ipv6_addr_type(gw_addr);
1230
1231 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1232 struct rt6_info *grt;
1233
1234 /* IPv6 strictly inhibits using not link-local
1235 addresses as nexthop address.
1236 Otherwise, router will not able to send redirects.
1237 It is very good, but in some (rare!) circumstances
1238 (SIT, PtP, NBMA NOARP links) it is handy to allow
1239 some exceptions. --ANK
1240 */
1241 err = -EINVAL;
1242 if (!(gwa_type&IPV6_ADDR_UNICAST))
1243 goto out;
1244
5578689a 1245 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1246
1247 err = -EHOSTUNREACH;
1248 if (grt == NULL)
1249 goto out;
1250 if (dev) {
1251 if (dev != grt->rt6i_dev) {
d8d1f30b 1252 dst_release(&grt->dst);
1da177e4
LT
1253 goto out;
1254 }
1255 } else {
1256 dev = grt->rt6i_dev;
1257 idev = grt->rt6i_idev;
1258 dev_hold(dev);
1259 in6_dev_hold(grt->rt6i_idev);
1260 }
1261 if (!(grt->rt6i_flags&RTF_GATEWAY))
1262 err = 0;
d8d1f30b 1263 dst_release(&grt->dst);
1da177e4
LT
1264
1265 if (err)
1266 goto out;
1267 }
1268 err = -EINVAL;
1269 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1270 goto out;
1271 }
1272
1273 err = -ENODEV;
1274 if (dev == NULL)
1275 goto out;
1276
86872cb5 1277 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1278 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1279 if (IS_ERR(rt->rt6i_nexthop)) {
1280 err = PTR_ERR(rt->rt6i_nexthop);
1281 rt->rt6i_nexthop = NULL;
1282 goto out;
1283 }
1284 }
1285
86872cb5 1286 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1287
1288install_route:
86872cb5
TG
1289 if (cfg->fc_mx) {
1290 struct nlattr *nla;
1291 int remaining;
1292
1293 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1294 int type = nla_type(nla);
86872cb5
TG
1295
1296 if (type) {
1297 if (type > RTAX_MAX) {
1da177e4
LT
1298 err = -EINVAL;
1299 goto out;
1300 }
86872cb5 1301
defb3519 1302 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1303 }
1da177e4
LT
1304 }
1305 }
1306
d8d1f30b 1307 rt->dst.dev = dev;
1da177e4 1308 rt->rt6i_idev = idev;
c71099ac 1309 rt->rt6i_table = table;
63152fc0 1310
c346dca1 1311 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1312
86872cb5 1313 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1314
1315out:
1316 if (dev)
1317 dev_put(dev);
1318 if (idev)
1319 in6_dev_put(idev);
1320 if (rt)
d8d1f30b 1321 dst_free(&rt->dst);
1da177e4
LT
1322 return err;
1323}
1324
86872cb5 1325static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1326{
1327 int err;
c71099ac 1328 struct fib6_table *table;
c346dca1 1329 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1330
8ed67789 1331 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1332 return -ENOENT;
1333
c71099ac
TG
1334 table = rt->rt6i_table;
1335 write_lock_bh(&table->tb6_lock);
1da177e4 1336
86872cb5 1337 err = fib6_del(rt, info);
d8d1f30b 1338 dst_release(&rt->dst);
1da177e4 1339
c71099ac 1340 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1341
1342 return err;
1343}
1344
e0a1ad73
TG
1345int ip6_del_rt(struct rt6_info *rt)
1346{
4d1169c1 1347 struct nl_info info = {
c346dca1 1348 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1349 };
528c4ceb 1350 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1351}
1352
86872cb5 1353static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1354{
c71099ac 1355 struct fib6_table *table;
1da177e4
LT
1356 struct fib6_node *fn;
1357 struct rt6_info *rt;
1358 int err = -ESRCH;
1359
5578689a 1360 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1361 if (table == NULL)
1362 return err;
1363
1364 read_lock_bh(&table->tb6_lock);
1da177e4 1365
c71099ac 1366 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1367 &cfg->fc_dst, cfg->fc_dst_len,
1368 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1369
1da177e4 1370 if (fn) {
d8d1f30b 1371 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1372 if (cfg->fc_ifindex &&
1da177e4 1373 (rt->rt6i_dev == NULL ||
86872cb5 1374 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1375 continue;
86872cb5
TG
1376 if (cfg->fc_flags & RTF_GATEWAY &&
1377 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1378 continue;
86872cb5 1379 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1380 continue;
d8d1f30b 1381 dst_hold(&rt->dst);
c71099ac 1382 read_unlock_bh(&table->tb6_lock);
1da177e4 1383
86872cb5 1384 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1385 }
1386 }
c71099ac 1387 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1388
1389 return err;
1390}
1391
1392/*
1393 * Handle redirects
1394 */
a6279458
YH
1395struct ip6rd_flowi {
1396 struct flowi fl;
1397 struct in6_addr gateway;
1398};
1399
8ed67789
DL
1400static struct rt6_info *__ip6_route_redirect(struct net *net,
1401 struct fib6_table *table,
a6279458
YH
1402 struct flowi *fl,
1403 int flags)
1da177e4 1404{
a6279458
YH
1405 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1406 struct rt6_info *rt;
e843b9e1 1407 struct fib6_node *fn;
c71099ac 1408
1da177e4 1409 /*
e843b9e1
YH
1410 * Get the "current" route for this destination and
1411 * check if the redirect has come from approriate router.
1412 *
1413 * RFC 2461 specifies that redirects should only be
1414 * accepted if they come from the nexthop to the target.
1415 * Due to the way the routes are chosen, this notion
1416 * is a bit fuzzy and one might need to check all possible
1417 * routes.
1da177e4 1418 */
1da177e4 1419
c71099ac 1420 read_lock_bh(&table->tb6_lock);
a6279458 1421 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1422restart:
d8d1f30b 1423 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1424 /*
1425 * Current route is on-link; redirect is always invalid.
1426 *
1427 * Seems, previous statement is not true. It could
1428 * be node, which looks for us as on-link (f.e. proxy ndisc)
1429 * But then router serving it might decide, that we should
1430 * know truth 8)8) --ANK (980726).
1431 */
1432 if (rt6_check_expired(rt))
1433 continue;
1434 if (!(rt->rt6i_flags & RTF_GATEWAY))
1435 continue;
a6279458 1436 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1437 continue;
a6279458 1438 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1439 continue;
1440 break;
1441 }
a6279458 1442
cb15d9c2 1443 if (!rt)
8ed67789
DL
1444 rt = net->ipv6.ip6_null_entry;
1445 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1446out:
d8d1f30b 1447 dst_hold(&rt->dst);
a6279458 1448
c71099ac 1449 read_unlock_bh(&table->tb6_lock);
e843b9e1 1450
a6279458
YH
1451 return rt;
1452};
1453
1454static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1455 struct in6_addr *src,
1456 struct in6_addr *gateway,
1457 struct net_device *dev)
1458{
adaa70bb 1459 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1460 struct net *net = dev_net(dev);
a6279458
YH
1461 struct ip6rd_flowi rdfl = {
1462 .fl = {
1463 .oif = dev->ifindex,
5811662b
CG
1464 .fl6_dst = *dest,
1465 .fl6_src = *src,
a6279458 1466 },
a6279458 1467 };
adaa70bb 1468
86c36ce4
BH
1469 ipv6_addr_copy(&rdfl.gateway, gateway);
1470
adaa70bb
TG
1471 if (rt6_need_strict(dest))
1472 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1473
5578689a 1474 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1475 flags, __ip6_route_redirect);
a6279458
YH
1476}
1477
1478void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1479 struct in6_addr *saddr,
1480 struct neighbour *neigh, u8 *lladdr, int on_link)
1481{
1482 struct rt6_info *rt, *nrt = NULL;
1483 struct netevent_redirect netevent;
c346dca1 1484 struct net *net = dev_net(neigh->dev);
a6279458
YH
1485
1486 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1487
8ed67789 1488 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1489 if (net_ratelimit())
1490 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1491 "for redirect target\n");
a6279458 1492 goto out;
1da177e4
LT
1493 }
1494
1da177e4
LT
1495 /*
1496 * We have finally decided to accept it.
1497 */
1498
1ab1457c 1499 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1500 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1501 NEIGH_UPDATE_F_OVERRIDE|
1502 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1503 NEIGH_UPDATE_F_ISROUTER))
1504 );
1505
1506 /*
1507 * Redirect received -> path was valid.
1508 * Look, redirects are sent only in response to data packets,
1509 * so that this nexthop apparently is reachable. --ANK
1510 */
d8d1f30b 1511 dst_confirm(&rt->dst);
1da177e4
LT
1512
1513 /* Duplicate redirect: silently ignore. */
d8d1f30b 1514 if (neigh == rt->dst.neighbour)
1da177e4
LT
1515 goto out;
1516
1517 nrt = ip6_rt_copy(rt);
1518 if (nrt == NULL)
1519 goto out;
1520
1521 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1522 if (on_link)
1523 nrt->rt6i_flags &= ~RTF_GATEWAY;
1524
1525 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1526 nrt->rt6i_dst.plen = 128;
d8d1f30b 1527 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1528
1529 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1530 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1531
40e22e8f 1532 if (ip6_ins_rt(nrt))
1da177e4
LT
1533 goto out;
1534
d8d1f30b
CG
1535 netevent.old = &rt->dst;
1536 netevent.new = &nrt->dst;
8d71740c
TT
1537 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1538
1da177e4 1539 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1540 ip6_del_rt(rt);
1da177e4
LT
1541 return;
1542 }
1543
1544out:
d8d1f30b 1545 dst_release(&rt->dst);
1da177e4
LT
1546}
1547
1548/*
1549 * Handle ICMP "packet too big" messages
1550 * i.e. Path MTU discovery
1551 */
1552
ae878ae2
1553static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1554 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1555{
1556 struct rt6_info *rt, *nrt;
1557 int allfrag = 0;
d3052b55 1558again:
ae878ae2 1559 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1560 if (rt == NULL)
1561 return;
1562
d3052b55
AV
1563 if (rt6_check_expired(rt)) {
1564 ip6_del_rt(rt);
1565 goto again;
1566 }
1567
d8d1f30b 1568 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1569 goto out;
1570
1571 if (pmtu < IPV6_MIN_MTU) {
1572 /*
1ab1457c 1573 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1574 * MTU (1280) and a fragment header should always be included
1575 * after a node receiving Too Big message reporting PMTU is
1576 * less than the IPv6 Minimum Link MTU.
1577 */
1578 pmtu = IPV6_MIN_MTU;
1579 allfrag = 1;
1580 }
1581
1582 /* New mtu received -> path was valid.
1583 They are sent only in response to data packets,
1584 so that this nexthop apparently is reachable. --ANK
1585 */
d8d1f30b 1586 dst_confirm(&rt->dst);
1da177e4
LT
1587
1588 /* Host route. If it is static, it would be better
1589 not to override it, but add new one, so that
1590 when cache entry will expire old pmtu
1591 would return automatically.
1592 */
1593 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1594 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1595 if (allfrag) {
1596 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1597 features |= RTAX_FEATURE_ALLFRAG;
1598 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1599 }
d8d1f30b 1600 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1601 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1602 goto out;
1603 }
1604
1605 /* Network route.
1606 Two cases are possible:
1607 1. It is connected route. Action: COW
1608 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1609 */
d5315b50 1610 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1611 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1612 else
1613 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1614
d5315b50 1615 if (nrt) {
defb3519
DM
1616 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1617 if (allfrag) {
1618 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1619 features |= RTAX_FEATURE_ALLFRAG;
1620 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1621 }
a1e78363
YH
1622
1623 /* According to RFC 1981, detecting PMTU increase shouldn't be
1624 * happened within 5 mins, the recommended timer is 10 mins.
1625 * Here this route expiration time is set to ip6_rt_mtu_expires
1626 * which is 10 mins. After 10 mins the decreased pmtu is expired
1627 * and detecting PMTU increase will be automatically happened.
1628 */
d8d1f30b 1629 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1630 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1631
40e22e8f 1632 ip6_ins_rt(nrt);
1da177e4 1633 }
1da177e4 1634out:
d8d1f30b 1635 dst_release(&rt->dst);
1da177e4
LT
1636}
1637
ae878ae2
1638void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1639 struct net_device *dev, u32 pmtu)
1640{
1641 struct net *net = dev_net(dev);
1642
1643 /*
1644 * RFC 1981 states that a node "MUST reduce the size of the packets it
1645 * is sending along the path" that caused the Packet Too Big message.
1646 * Since it's not possible in the general case to determine which
1647 * interface was used to send the original packet, we update the MTU
1648 * on the interface that will be used to send future packets. We also
1649 * update the MTU on the interface that received the Packet Too Big in
1650 * case the original packet was forced out that interface with
1651 * SO_BINDTODEVICE or similar. This is the next best thing to the
1652 * correct behaviour, which would be to update the MTU on all
1653 * interfaces.
1654 */
1655 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1656 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1657}
1658
1da177e4
LT
1659/*
1660 * Misc support functions
1661 */
1662
1663static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1664{
c346dca1 1665 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1666 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1667
1668 if (rt) {
d8d1f30b
CG
1669 rt->dst.input = ort->dst.input;
1670 rt->dst.output = ort->dst.output;
1671
defb3519 1672 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1673 rt->dst.error = ort->dst.error;
1674 rt->dst.dev = ort->dst.dev;
1675 if (rt->dst.dev)
1676 dev_hold(rt->dst.dev);
1da177e4
LT
1677 rt->rt6i_idev = ort->rt6i_idev;
1678 if (rt->rt6i_idev)
1679 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1680 rt->dst.lastuse = jiffies;
1da177e4
LT
1681 rt->rt6i_expires = 0;
1682
1683 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1684 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1685 rt->rt6i_metric = 0;
1686
1687 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1688#ifdef CONFIG_IPV6_SUBTREES
1689 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1690#endif
c71099ac 1691 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1692 }
1693 return rt;
1694}
1695
70ceb4f5 1696#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1697static struct rt6_info *rt6_get_route_info(struct net *net,
1698 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1699 struct in6_addr *gwaddr, int ifindex)
1700{
1701 struct fib6_node *fn;
1702 struct rt6_info *rt = NULL;
c71099ac
TG
1703 struct fib6_table *table;
1704
efa2cea0 1705 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1706 if (table == NULL)
1707 return NULL;
70ceb4f5 1708
c71099ac
TG
1709 write_lock_bh(&table->tb6_lock);
1710 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1711 if (!fn)
1712 goto out;
1713
d8d1f30b 1714 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1715 if (rt->rt6i_dev->ifindex != ifindex)
1716 continue;
1717 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1718 continue;
1719 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1720 continue;
d8d1f30b 1721 dst_hold(&rt->dst);
70ceb4f5
YH
1722 break;
1723 }
1724out:
c71099ac 1725 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1726 return rt;
1727}
1728
efa2cea0
DL
1729static struct rt6_info *rt6_add_route_info(struct net *net,
1730 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1731 struct in6_addr *gwaddr, int ifindex,
1732 unsigned pref)
1733{
86872cb5
TG
1734 struct fib6_config cfg = {
1735 .fc_table = RT6_TABLE_INFO,
238fc7ea 1736 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1737 .fc_ifindex = ifindex,
1738 .fc_dst_len = prefixlen,
1739 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1740 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1741 .fc_nlinfo.pid = 0,
1742 .fc_nlinfo.nlh = NULL,
1743 .fc_nlinfo.nl_net = net,
86872cb5
TG
1744 };
1745
1746 ipv6_addr_copy(&cfg.fc_dst, prefix);
1747 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1748
e317da96
YH
1749 /* We should treat it as a default route if prefix length is 0. */
1750 if (!prefixlen)
86872cb5 1751 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1752
86872cb5 1753 ip6_route_add(&cfg);
70ceb4f5 1754
efa2cea0 1755 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1756}
1757#endif
1758
1da177e4 1759struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1760{
1da177e4 1761 struct rt6_info *rt;
c71099ac 1762 struct fib6_table *table;
1da177e4 1763
c346dca1 1764 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1765 if (table == NULL)
1766 return NULL;
1da177e4 1767
c71099ac 1768 write_lock_bh(&table->tb6_lock);
d8d1f30b 1769 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1770 if (dev == rt->rt6i_dev &&
045927ff 1771 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1772 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1773 break;
1774 }
1775 if (rt)
d8d1f30b 1776 dst_hold(&rt->dst);
c71099ac 1777 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1778 return rt;
1779}
1780
1781struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1782 struct net_device *dev,
1783 unsigned int pref)
1da177e4 1784{
86872cb5
TG
1785 struct fib6_config cfg = {
1786 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1787 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1788 .fc_ifindex = dev->ifindex,
1789 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1790 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1791 .fc_nlinfo.pid = 0,
1792 .fc_nlinfo.nlh = NULL,
c346dca1 1793 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1794 };
1da177e4 1795
86872cb5 1796 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1797
86872cb5 1798 ip6_route_add(&cfg);
1da177e4 1799
1da177e4
LT
1800 return rt6_get_dflt_router(gwaddr, dev);
1801}
1802
7b4da532 1803void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1804{
1805 struct rt6_info *rt;
c71099ac
TG
1806 struct fib6_table *table;
1807
1808 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1809 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1810 if (table == NULL)
1811 return;
1da177e4
LT
1812
1813restart:
c71099ac 1814 read_lock_bh(&table->tb6_lock);
d8d1f30b 1815 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1816 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1817 dst_hold(&rt->dst);
c71099ac 1818 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1819 ip6_del_rt(rt);
1da177e4
LT
1820 goto restart;
1821 }
1822 }
c71099ac 1823 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1824}
1825
5578689a
DL
1826static void rtmsg_to_fib6_config(struct net *net,
1827 struct in6_rtmsg *rtmsg,
86872cb5
TG
1828 struct fib6_config *cfg)
1829{
1830 memset(cfg, 0, sizeof(*cfg));
1831
1832 cfg->fc_table = RT6_TABLE_MAIN;
1833 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1834 cfg->fc_metric = rtmsg->rtmsg_metric;
1835 cfg->fc_expires = rtmsg->rtmsg_info;
1836 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1837 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1838 cfg->fc_flags = rtmsg->rtmsg_flags;
1839
5578689a 1840 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1841
86872cb5
TG
1842 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1843 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1844 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1845}
1846
5578689a 1847int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1848{
86872cb5 1849 struct fib6_config cfg;
1da177e4
LT
1850 struct in6_rtmsg rtmsg;
1851 int err;
1852
1853 switch(cmd) {
1854 case SIOCADDRT: /* Add a route */
1855 case SIOCDELRT: /* Delete a route */
1856 if (!capable(CAP_NET_ADMIN))
1857 return -EPERM;
1858 err = copy_from_user(&rtmsg, arg,
1859 sizeof(struct in6_rtmsg));
1860 if (err)
1861 return -EFAULT;
86872cb5 1862
5578689a 1863 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1864
1da177e4
LT
1865 rtnl_lock();
1866 switch (cmd) {
1867 case SIOCADDRT:
86872cb5 1868 err = ip6_route_add(&cfg);
1da177e4
LT
1869 break;
1870 case SIOCDELRT:
86872cb5 1871 err = ip6_route_del(&cfg);
1da177e4
LT
1872 break;
1873 default:
1874 err = -EINVAL;
1875 }
1876 rtnl_unlock();
1877
1878 return err;
3ff50b79 1879 }
1da177e4
LT
1880
1881 return -EINVAL;
1882}
1883
1884/*
1885 * Drop the packet on the floor
1886 */
1887
d5fdd6ba 1888static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1889{
612f09e8 1890 int type;
adf30907 1891 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1892 switch (ipstats_mib_noroutes) {
1893 case IPSTATS_MIB_INNOROUTES:
0660e03f 1894 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1895 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1896 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1897 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1898 break;
1899 }
1900 /* FALLTHROUGH */
1901 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1902 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1903 ipstats_mib_noroutes);
612f09e8
YH
1904 break;
1905 }
3ffe533c 1906 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1907 kfree_skb(skb);
1908 return 0;
1909}
1910
9ce8ade0
TG
1911static int ip6_pkt_discard(struct sk_buff *skb)
1912{
612f09e8 1913 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1914}
1915
20380731 1916static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1917{
adf30907 1918 skb->dev = skb_dst(skb)->dev;
612f09e8 1919 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1920}
1921
6723ab54
DM
1922#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1923
9ce8ade0
TG
1924static int ip6_pkt_prohibit(struct sk_buff *skb)
1925{
612f09e8 1926 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1927}
1928
1929static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1930{
adf30907 1931 skb->dev = skb_dst(skb)->dev;
612f09e8 1932 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1933}
1934
6723ab54
DM
1935#endif
1936
1da177e4
LT
1937/*
1938 * Allocate a dst for local (unicast / anycast) address.
1939 */
1940
1941struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1942 const struct in6_addr *addr,
1943 int anycast)
1944{
c346dca1 1945 struct net *net = dev_net(idev->dev);
86393e52 1946 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1947 struct neighbour *neigh;
1da177e4 1948
40385653
BG
1949 if (rt == NULL) {
1950 if (net_ratelimit())
1951 pr_warning("IPv6: Maximum number of routes reached,"
1952 " consider increasing route/max_size.\n");
1da177e4 1953 return ERR_PTR(-ENOMEM);
40385653 1954 }
1da177e4 1955
5578689a 1956 dev_hold(net->loopback_dev);
1da177e4
LT
1957 in6_dev_hold(idev);
1958
d8d1f30b
CG
1959 rt->dst.flags = DST_HOST;
1960 rt->dst.input = ip6_input;
1961 rt->dst.output = ip6_output;
5578689a 1962 rt->rt6i_dev = net->loopback_dev;
1da177e4 1963 rt->rt6i_idev = idev;
defb3519 1964 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 1965 rt->dst.obsolete = -1;
1da177e4
LT
1966
1967 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1968 if (anycast)
1969 rt->rt6i_flags |= RTF_ANYCAST;
1970 else
1da177e4 1971 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1972 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1973 if (IS_ERR(neigh)) {
d8d1f30b 1974 dst_free(&rt->dst);
14deae41
DM
1975
1976 /* We are casting this because that is the return
1977 * value type. But an errno encoded pointer is the
1978 * same regardless of the underlying pointer type,
1979 * and that's what we are returning. So this is OK.
1980 */
1981 return (struct rt6_info *) neigh;
1da177e4 1982 }
14deae41 1983 rt->rt6i_nexthop = neigh;
1da177e4
LT
1984
1985 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1986 rt->rt6i_dst.plen = 128;
5578689a 1987 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1988
d8d1f30b 1989 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
1990
1991 return rt;
1992}
1993
8ed67789
DL
1994struct arg_dev_net {
1995 struct net_device *dev;
1996 struct net *net;
1997};
1998
1da177e4
LT
1999static int fib6_ifdown(struct rt6_info *rt, void *arg)
2000{
bc3ef660 2001 const struct arg_dev_net *adn = arg;
2002 const struct net_device *dev = adn->dev;
8ed67789 2003
bc3ef660 2004 if ((rt->rt6i_dev == dev || dev == NULL) &&
2005 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2006 RT6_TRACE("deleted by ifdown %p\n", rt);
2007 return -1;
2008 }
2009 return 0;
2010}
2011
f3db4851 2012void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2013{
8ed67789
DL
2014 struct arg_dev_net adn = {
2015 .dev = dev,
2016 .net = net,
2017 };
2018
2019 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2020 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2021}
2022
2023struct rt6_mtu_change_arg
2024{
2025 struct net_device *dev;
2026 unsigned mtu;
2027};
2028
2029static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2030{
2031 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2032 struct inet6_dev *idev;
2033
2034 /* In IPv6 pmtu discovery is not optional,
2035 so that RTAX_MTU lock cannot disable it.
2036 We still use this lock to block changes
2037 caused by addrconf/ndisc.
2038 */
2039
2040 idev = __in6_dev_get(arg->dev);
2041 if (idev == NULL)
2042 return 0;
2043
2044 /* For administrative MTU increase, there is no way to discover
2045 IPv6 PMTU increase, so PMTU increase should be updated here.
2046 Since RFC 1981 doesn't include administrative MTU increase
2047 update PMTU increase is a MUST. (i.e. jumbo frame)
2048 */
2049 /*
2050 If new MTU is less than route PMTU, this new MTU will be the
2051 lowest MTU in the path, update the route PMTU to reflect PMTU
2052 decreases; if new MTU is greater than route PMTU, and the
2053 old MTU is the lowest MTU in the path, update the route PMTU
2054 to reflect the increase. In this case if the other nodes' MTU
2055 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2056 PMTU discouvery.
2057 */
2058 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2059 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2060 (dst_mtu(&rt->dst) >= arg->mtu ||
2061 (dst_mtu(&rt->dst) < arg->mtu &&
2062 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2063 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2064 }
1da177e4
LT
2065 return 0;
2066}
2067
2068void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2069{
c71099ac
TG
2070 struct rt6_mtu_change_arg arg = {
2071 .dev = dev,
2072 .mtu = mtu,
2073 };
1da177e4 2074
c346dca1 2075 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2076}
2077
ef7c79ed 2078static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2079 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2080 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2081 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2082 [RTA_PRIORITY] = { .type = NLA_U32 },
2083 [RTA_METRICS] = { .type = NLA_NESTED },
2084};
2085
2086static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2087 struct fib6_config *cfg)
1da177e4 2088{
86872cb5
TG
2089 struct rtmsg *rtm;
2090 struct nlattr *tb[RTA_MAX+1];
2091 int err;
1da177e4 2092
86872cb5
TG
2093 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2094 if (err < 0)
2095 goto errout;
1da177e4 2096
86872cb5
TG
2097 err = -EINVAL;
2098 rtm = nlmsg_data(nlh);
2099 memset(cfg, 0, sizeof(*cfg));
2100
2101 cfg->fc_table = rtm->rtm_table;
2102 cfg->fc_dst_len = rtm->rtm_dst_len;
2103 cfg->fc_src_len = rtm->rtm_src_len;
2104 cfg->fc_flags = RTF_UP;
2105 cfg->fc_protocol = rtm->rtm_protocol;
2106
2107 if (rtm->rtm_type == RTN_UNREACHABLE)
2108 cfg->fc_flags |= RTF_REJECT;
2109
ab79ad14
2110 if (rtm->rtm_type == RTN_LOCAL)
2111 cfg->fc_flags |= RTF_LOCAL;
2112
86872cb5
TG
2113 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2114 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2115 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2116
2117 if (tb[RTA_GATEWAY]) {
2118 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2119 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2120 }
86872cb5
TG
2121
2122 if (tb[RTA_DST]) {
2123 int plen = (rtm->rtm_dst_len + 7) >> 3;
2124
2125 if (nla_len(tb[RTA_DST]) < plen)
2126 goto errout;
2127
2128 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2129 }
86872cb5
TG
2130
2131 if (tb[RTA_SRC]) {
2132 int plen = (rtm->rtm_src_len + 7) >> 3;
2133
2134 if (nla_len(tb[RTA_SRC]) < plen)
2135 goto errout;
2136
2137 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2138 }
86872cb5
TG
2139
2140 if (tb[RTA_OIF])
2141 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2142
2143 if (tb[RTA_PRIORITY])
2144 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2145
2146 if (tb[RTA_METRICS]) {
2147 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2148 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2149 }
86872cb5
TG
2150
2151 if (tb[RTA_TABLE])
2152 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2153
2154 err = 0;
2155errout:
2156 return err;
1da177e4
LT
2157}
2158
c127ea2c 2159static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2160{
86872cb5
TG
2161 struct fib6_config cfg;
2162 int err;
1da177e4 2163
86872cb5
TG
2164 err = rtm_to_fib6_config(skb, nlh, &cfg);
2165 if (err < 0)
2166 return err;
2167
2168 return ip6_route_del(&cfg);
1da177e4
LT
2169}
2170
c127ea2c 2171static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2172{
86872cb5
TG
2173 struct fib6_config cfg;
2174 int err;
1da177e4 2175
86872cb5
TG
2176 err = rtm_to_fib6_config(skb, nlh, &cfg);
2177 if (err < 0)
2178 return err;
2179
2180 return ip6_route_add(&cfg);
1da177e4
LT
2181}
2182
339bf98f
TG
2183static inline size_t rt6_nlmsg_size(void)
2184{
2185 return NLMSG_ALIGN(sizeof(struct rtmsg))
2186 + nla_total_size(16) /* RTA_SRC */
2187 + nla_total_size(16) /* RTA_DST */
2188 + nla_total_size(16) /* RTA_GATEWAY */
2189 + nla_total_size(16) /* RTA_PREFSRC */
2190 + nla_total_size(4) /* RTA_TABLE */
2191 + nla_total_size(4) /* RTA_IIF */
2192 + nla_total_size(4) /* RTA_OIF */
2193 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2194 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2195 + nla_total_size(sizeof(struct rta_cacheinfo));
2196}
2197
191cd582
BH
2198static int rt6_fill_node(struct net *net,
2199 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2200 struct in6_addr *dst, struct in6_addr *src,
2201 int iif, int type, u32 pid, u32 seq,
7bc570c8 2202 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2203{
2204 struct rtmsg *rtm;
2d7202bf 2205 struct nlmsghdr *nlh;
e3703b3d 2206 long expires;
9e762a4a 2207 u32 table;
1da177e4
LT
2208
2209 if (prefix) { /* user wants prefix routes only */
2210 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2211 /* success since this is not a prefix route */
2212 return 1;
2213 }
2214 }
2215
2d7202bf
TG
2216 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2217 if (nlh == NULL)
26932566 2218 return -EMSGSIZE;
2d7202bf
TG
2219
2220 rtm = nlmsg_data(nlh);
1da177e4
LT
2221 rtm->rtm_family = AF_INET6;
2222 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2223 rtm->rtm_src_len = rt->rt6i_src.plen;
2224 rtm->rtm_tos = 0;
c71099ac 2225 if (rt->rt6i_table)
9e762a4a 2226 table = rt->rt6i_table->tb6_id;
c71099ac 2227 else
9e762a4a
PM
2228 table = RT6_TABLE_UNSPEC;
2229 rtm->rtm_table = table;
2d7202bf 2230 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2231 if (rt->rt6i_flags&RTF_REJECT)
2232 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2233 else if (rt->rt6i_flags&RTF_LOCAL)
2234 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2235 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2236 rtm->rtm_type = RTN_LOCAL;
2237 else
2238 rtm->rtm_type = RTN_UNICAST;
2239 rtm->rtm_flags = 0;
2240 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2241 rtm->rtm_protocol = rt->rt6i_protocol;
2242 if (rt->rt6i_flags&RTF_DYNAMIC)
2243 rtm->rtm_protocol = RTPROT_REDIRECT;
2244 else if (rt->rt6i_flags & RTF_ADDRCONF)
2245 rtm->rtm_protocol = RTPROT_KERNEL;
2246 else if (rt->rt6i_flags&RTF_DEFAULT)
2247 rtm->rtm_protocol = RTPROT_RA;
2248
2249 if (rt->rt6i_flags&RTF_CACHE)
2250 rtm->rtm_flags |= RTM_F_CLONED;
2251
2252 if (dst) {
2d7202bf 2253 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2254 rtm->rtm_dst_len = 128;
1da177e4 2255 } else if (rtm->rtm_dst_len)
2d7202bf 2256 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2257#ifdef CONFIG_IPV6_SUBTREES
2258 if (src) {
2d7202bf 2259 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2260 rtm->rtm_src_len = 128;
1da177e4 2261 } else if (rtm->rtm_src_len)
2d7202bf 2262 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2263#endif
7bc570c8
YH
2264 if (iif) {
2265#ifdef CONFIG_IPV6_MROUTE
2266 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2267 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2268 if (err <= 0) {
2269 if (!nowait) {
2270 if (err == 0)
2271 return 0;
2272 goto nla_put_failure;
2273 } else {
2274 if (err == -EMSGSIZE)
2275 goto nla_put_failure;
2276 }
2277 }
2278 } else
2279#endif
2280 NLA_PUT_U32(skb, RTA_IIF, iif);
2281 } else if (dst) {
d8d1f30b 2282 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2283 struct in6_addr saddr_buf;
191cd582 2284 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2285 dst, 0, &saddr_buf) == 0)
2d7202bf 2286 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2287 }
2d7202bf 2288
defb3519 2289 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2290 goto nla_put_failure;
2291
d8d1f30b
CG
2292 if (rt->dst.neighbour)
2293 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2294
d8d1f30b 2295 if (rt->dst.dev)
2d7202bf
TG
2296 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2297
2298 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2299
36e3deae
YH
2300 if (!(rt->rt6i_flags & RTF_EXPIRES))
2301 expires = 0;
2302 else if (rt->rt6i_expires - jiffies < INT_MAX)
2303 expires = rt->rt6i_expires - jiffies;
2304 else
2305 expires = INT_MAX;
69cdf8f9 2306
d8d1f30b
CG
2307 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2308 expires, rt->dst.error) < 0)
e3703b3d 2309 goto nla_put_failure;
2d7202bf
TG
2310
2311 return nlmsg_end(skb, nlh);
2312
2313nla_put_failure:
26932566
PM
2314 nlmsg_cancel(skb, nlh);
2315 return -EMSGSIZE;
1da177e4
LT
2316}
2317
1b43af54 2318int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2319{
2320 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2321 int prefix;
2322
2d7202bf
TG
2323 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2324 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2325 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2326 } else
2327 prefix = 0;
2328
191cd582
BH
2329 return rt6_fill_node(arg->net,
2330 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2331 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2332 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2333}
2334
c127ea2c 2335static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2336{
3b1e0a65 2337 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2338 struct nlattr *tb[RTA_MAX+1];
2339 struct rt6_info *rt;
1da177e4 2340 struct sk_buff *skb;
ab364a6f 2341 struct rtmsg *rtm;
1da177e4 2342 struct flowi fl;
ab364a6f 2343 int err, iif = 0;
1da177e4 2344
ab364a6f
TG
2345 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2346 if (err < 0)
2347 goto errout;
1da177e4 2348
ab364a6f 2349 err = -EINVAL;
1da177e4 2350 memset(&fl, 0, sizeof(fl));
1da177e4 2351
ab364a6f
TG
2352 if (tb[RTA_SRC]) {
2353 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2354 goto errout;
2355
2356 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2357 }
2358
2359 if (tb[RTA_DST]) {
2360 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2361 goto errout;
2362
2363 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2364 }
2365
2366 if (tb[RTA_IIF])
2367 iif = nla_get_u32(tb[RTA_IIF]);
2368
2369 if (tb[RTA_OIF])
2370 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2371
2372 if (iif) {
2373 struct net_device *dev;
5578689a 2374 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2375 if (!dev) {
2376 err = -ENODEV;
ab364a6f 2377 goto errout;
1da177e4
LT
2378 }
2379 }
2380
ab364a6f
TG
2381 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2382 if (skb == NULL) {
2383 err = -ENOBUFS;
2384 goto errout;
2385 }
1da177e4 2386
ab364a6f
TG
2387 /* Reserve room for dummy headers, this skb can pass
2388 through good chunk of routing engine.
2389 */
459a98ed 2390 skb_reset_mac_header(skb);
ab364a6f 2391 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2392
8a3edd80 2393 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2394 skb_dst_set(skb, &rt->dst);
1da177e4 2395
191cd582 2396 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2397 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2398 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2399 if (err < 0) {
ab364a6f
TG
2400 kfree_skb(skb);
2401 goto errout;
1da177e4
LT
2402 }
2403
5578689a 2404 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2405errout:
1da177e4 2406 return err;
1da177e4
LT
2407}
2408
86872cb5 2409void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2410{
2411 struct sk_buff *skb;
5578689a 2412 struct net *net = info->nl_net;
528c4ceb
DL
2413 u32 seq;
2414 int err;
2415
2416 err = -ENOBUFS;
2417 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2418
339bf98f 2419 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2420 if (skb == NULL)
2421 goto errout;
2422
191cd582 2423 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2424 event, info->pid, seq, 0, 0, 0);
26932566
PM
2425 if (err < 0) {
2426 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2427 WARN_ON(err == -EMSGSIZE);
2428 kfree_skb(skb);
2429 goto errout;
2430 }
1ce85fe4
PNA
2431 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2432 info->nlh, gfp_any());
2433 return;
21713ebc
TG
2434errout:
2435 if (err < 0)
5578689a 2436 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2437}
2438
8ed67789
DL
2439static int ip6_route_dev_notify(struct notifier_block *this,
2440 unsigned long event, void *data)
2441{
2442 struct net_device *dev = (struct net_device *)data;
c346dca1 2443 struct net *net = dev_net(dev);
8ed67789
DL
2444
2445 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2446 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2447 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2448#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2449 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2450 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2451 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2452 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2453#endif
2454 }
2455
2456 return NOTIFY_OK;
2457}
2458
1da177e4
LT
2459/*
2460 * /proc
2461 */
2462
2463#ifdef CONFIG_PROC_FS
2464
1da177e4
LT
2465struct rt6_proc_arg
2466{
2467 char *buffer;
2468 int offset;
2469 int length;
2470 int skip;
2471 int len;
2472};
2473
2474static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2475{
33120b30 2476 struct seq_file *m = p_arg;
1da177e4 2477
4b7a4274 2478 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2479
2480#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2481 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2482#else
33120b30 2483 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2484#endif
2485
2486 if (rt->rt6i_nexthop) {
4b7a4274 2487 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2488 } else {
33120b30 2489 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2490 }
33120b30 2491 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2492 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2493 rt->dst.__use, rt->rt6i_flags,
33120b30 2494 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2495 return 0;
2496}
2497
33120b30 2498static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2499{
f3db4851
DL
2500 struct net *net = (struct net *)m->private;
2501 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2502 return 0;
2503}
1da177e4 2504
33120b30
AD
2505static int ipv6_route_open(struct inode *inode, struct file *file)
2506{
de05c557 2507 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2508}
2509
33120b30
AD
2510static const struct file_operations ipv6_route_proc_fops = {
2511 .owner = THIS_MODULE,
2512 .open = ipv6_route_open,
2513 .read = seq_read,
2514 .llseek = seq_lseek,
b6fcbdb4 2515 .release = single_release_net,
33120b30
AD
2516};
2517
1da177e4
LT
2518static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2519{
69ddb805 2520 struct net *net = (struct net *)seq->private;
1da177e4 2521 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2522 net->ipv6.rt6_stats->fib_nodes,
2523 net->ipv6.rt6_stats->fib_route_nodes,
2524 net->ipv6.rt6_stats->fib_rt_alloc,
2525 net->ipv6.rt6_stats->fib_rt_entries,
2526 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2527 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2528 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2529
2530 return 0;
2531}
2532
2533static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2534{
de05c557 2535 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2536}
2537
9a32144e 2538static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2539 .owner = THIS_MODULE,
2540 .open = rt6_stats_seq_open,
2541 .read = seq_read,
2542 .llseek = seq_lseek,
b6fcbdb4 2543 .release = single_release_net,
1da177e4
LT
2544};
2545#endif /* CONFIG_PROC_FS */
2546
2547#ifdef CONFIG_SYSCTL
2548
1da177e4 2549static
8d65af78 2550int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2551 void __user *buffer, size_t *lenp, loff_t *ppos)
2552{
5b7c931d
DL
2553 struct net *net = current->nsproxy->net_ns;
2554 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2555 if (write) {
8d65af78 2556 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2557 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2558 return 0;
2559 } else
2560 return -EINVAL;
2561}
2562
760f2d01 2563ctl_table ipv6_route_table_template[] = {
1ab1457c 2564 {
1da177e4 2565 .procname = "flush",
4990509f 2566 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2567 .maxlen = sizeof(int),
89c8b3a1 2568 .mode = 0200,
6d9f239a 2569 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2570 },
2571 {
1da177e4 2572 .procname = "gc_thresh",
9a7ec3a9 2573 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2574 .maxlen = sizeof(int),
2575 .mode = 0644,
6d9f239a 2576 .proc_handler = proc_dointvec,
1da177e4
LT
2577 },
2578 {
1da177e4 2579 .procname = "max_size",
4990509f 2580 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2581 .maxlen = sizeof(int),
2582 .mode = 0644,
6d9f239a 2583 .proc_handler = proc_dointvec,
1da177e4
LT
2584 },
2585 {
1da177e4 2586 .procname = "gc_min_interval",
4990509f 2587 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2588 .maxlen = sizeof(int),
2589 .mode = 0644,
6d9f239a 2590 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2591 },
2592 {
1da177e4 2593 .procname = "gc_timeout",
4990509f 2594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2595 .maxlen = sizeof(int),
2596 .mode = 0644,
6d9f239a 2597 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2598 },
2599 {
1da177e4 2600 .procname = "gc_interval",
4990509f 2601 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2602 .maxlen = sizeof(int),
2603 .mode = 0644,
6d9f239a 2604 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2605 },
2606 {
1da177e4 2607 .procname = "gc_elasticity",
4990509f 2608 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2609 .maxlen = sizeof(int),
2610 .mode = 0644,
f3d3f616 2611 .proc_handler = proc_dointvec,
1da177e4
LT
2612 },
2613 {
1da177e4 2614 .procname = "mtu_expires",
4990509f 2615 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2616 .maxlen = sizeof(int),
2617 .mode = 0644,
6d9f239a 2618 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2619 },
2620 {
1da177e4 2621 .procname = "min_adv_mss",
4990509f 2622 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2623 .maxlen = sizeof(int),
2624 .mode = 0644,
f3d3f616 2625 .proc_handler = proc_dointvec,
1da177e4
LT
2626 },
2627 {
1da177e4 2628 .procname = "gc_min_interval_ms",
4990509f 2629 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2630 .maxlen = sizeof(int),
2631 .mode = 0644,
6d9f239a 2632 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2633 },
f8572d8f 2634 { }
1da177e4
LT
2635};
2636
2c8c1e72 2637struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2638{
2639 struct ctl_table *table;
2640
2641 table = kmemdup(ipv6_route_table_template,
2642 sizeof(ipv6_route_table_template),
2643 GFP_KERNEL);
5ee09105
YH
2644
2645 if (table) {
2646 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2647 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2648 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2649 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2650 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2651 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2652 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2653 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2654 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2655 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2656 }
2657
760f2d01
DL
2658 return table;
2659}
1da177e4
LT
2660#endif
2661
2c8c1e72 2662static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2663{
633d424b 2664 int ret = -ENOMEM;
8ed67789 2665
86393e52
AD
2666 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2667 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2668
fc66f95c
ED
2669 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2670 goto out_ip6_dst_ops;
2671
8ed67789
DL
2672 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2673 sizeof(*net->ipv6.ip6_null_entry),
2674 GFP_KERNEL);
2675 if (!net->ipv6.ip6_null_entry)
fc66f95c 2676 goto out_ip6_dst_entries;
d8d1f30b 2677 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2678 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2679 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2680 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2681
2682#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2683 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2684 sizeof(*net->ipv6.ip6_prohibit_entry),
2685 GFP_KERNEL);
68fffc67
PZ
2686 if (!net->ipv6.ip6_prohibit_entry)
2687 goto out_ip6_null_entry;
d8d1f30b 2688 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2689 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2690 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2691 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2692
2693 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2694 sizeof(*net->ipv6.ip6_blk_hole_entry),
2695 GFP_KERNEL);
68fffc67
PZ
2696 if (!net->ipv6.ip6_blk_hole_entry)
2697 goto out_ip6_prohibit_entry;
d8d1f30b 2698 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2699 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2700 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2701 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2702#endif
2703
b339a47c
PZ
2704 net->ipv6.sysctl.flush_delay = 0;
2705 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2706 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2707 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2708 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2709 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2710 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2711 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2712
cdb18761
DL
2713#ifdef CONFIG_PROC_FS
2714 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2715 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2716#endif
6891a346
BT
2717 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2718
8ed67789
DL
2719 ret = 0;
2720out:
2721 return ret;
f2fc6a54 2722
68fffc67
PZ
2723#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2724out_ip6_prohibit_entry:
2725 kfree(net->ipv6.ip6_prohibit_entry);
2726out_ip6_null_entry:
2727 kfree(net->ipv6.ip6_null_entry);
2728#endif
fc66f95c
ED
2729out_ip6_dst_entries:
2730 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2731out_ip6_dst_ops:
f2fc6a54 2732 goto out;
cdb18761
DL
2733}
2734
2c8c1e72 2735static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2736{
2737#ifdef CONFIG_PROC_FS
2738 proc_net_remove(net, "ipv6_route");
2739 proc_net_remove(net, "rt6_stats");
2740#endif
8ed67789
DL
2741 kfree(net->ipv6.ip6_null_entry);
2742#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2743 kfree(net->ipv6.ip6_prohibit_entry);
2744 kfree(net->ipv6.ip6_blk_hole_entry);
2745#endif
41bb78b4 2746 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2747}
2748
2749static struct pernet_operations ip6_route_net_ops = {
2750 .init = ip6_route_net_init,
2751 .exit = ip6_route_net_exit,
2752};
2753
8ed67789
DL
2754static struct notifier_block ip6_route_dev_notifier = {
2755 .notifier_call = ip6_route_dev_notify,
2756 .priority = 0,
2757};
2758
433d49c3 2759int __init ip6_route_init(void)
1da177e4 2760{
433d49c3
DL
2761 int ret;
2762
9a7ec3a9
DL
2763 ret = -ENOMEM;
2764 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2765 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2766 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2767 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2768 goto out;
14e50e57 2769
fc66f95c 2770 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2771 if (ret)
bdb3289f 2772 goto out_kmem_cache;
bdb3289f 2773
fc66f95c
ED
2774 ret = register_pernet_subsys(&ip6_route_net_ops);
2775 if (ret)
2776 goto out_dst_entries;
2777
5dc121e9
AE
2778 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2779
8ed67789
DL
2780 /* Registering of the loopback is done before this portion of code,
2781 * the loopback reference in rt6_info will not be taken, do it
2782 * manually for init_net */
d8d1f30b 2783 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2784 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2785 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2786 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2787 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2788 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2789 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2790 #endif
433d49c3
DL
2791 ret = fib6_init();
2792 if (ret)
8ed67789 2793 goto out_register_subsys;
433d49c3 2794
433d49c3
DL
2795 ret = xfrm6_init();
2796 if (ret)
cdb18761 2797 goto out_fib6_init;
c35b7e72 2798
433d49c3
DL
2799 ret = fib6_rules_init();
2800 if (ret)
2801 goto xfrm6_init;
7e5449c2 2802
433d49c3
DL
2803 ret = -ENOBUFS;
2804 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2805 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2806 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2807 goto fib6_rules_init;
c127ea2c 2808
8ed67789 2809 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2810 if (ret)
2811 goto fib6_rules_init;
8ed67789 2812
433d49c3
DL
2813out:
2814 return ret;
2815
2816fib6_rules_init:
433d49c3
DL
2817 fib6_rules_cleanup();
2818xfrm6_init:
433d49c3 2819 xfrm6_fini();
433d49c3 2820out_fib6_init:
433d49c3 2821 fib6_gc_cleanup();
8ed67789
DL
2822out_register_subsys:
2823 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2824out_dst_entries:
2825 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2826out_kmem_cache:
f2fc6a54 2827 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2828 goto out;
1da177e4
LT
2829}
2830
2831void ip6_route_cleanup(void)
2832{
8ed67789 2833 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2834 fib6_rules_cleanup();
1da177e4 2835 xfrm6_fini();
1da177e4 2836 fib6_gc_cleanup();
8ed67789 2837 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2838 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2839 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2840}