caif_hsi: use dev_dbg not dev_err for reporting
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
21efcfa0
ED
65static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 85 unsigned pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
39232973
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
a7563f34 128 if (!ipv6_addr_any(p))
39232973
DM
129 return (const void *) p;
130 return daddr;
131}
132
d3aaeb38
DM
133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
39232973
DM
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
8ade06c6 145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 146{
8ade06c6
DM
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
f83c7790
DM
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
d3aaeb38
DM
156}
157
9a7ec3a9 158static struct dst_ops ip6_dst_ops_template = {
1da177e4 159 .family = AF_INET6,
09640e63 160 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
0dbaee3b 164 .default_advmss = ip6_default_advmss,
ebb762f2 165 .mtu = ip6_mtu,
06582540 166 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 172 .local_out = __ip6_local_out,
d3aaeb38 173 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
174};
175
ebb762f2 176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 177{
618f9bc7
SK
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
ec831ea7
RD
181}
182
14e50e57
DM
183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
0972ddb2
HB
187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
14e50e57
DM
193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
09640e63 195 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
ebb762f2 198 .mtu = ip6_blackhole_mtu,
214f45c9 199 .default_advmss = ip6_default_advmss,
14e50e57 200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 202 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
203};
204
62fa8a84
DM
205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
bdb3289f 209static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
d8d1f30b
CG
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
1da177e4
LT
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 219 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
101367c2
TG
224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
6723ab54
DM
226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 228
280a34c8 229static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
d8d1f30b
CG
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
101367c2
TG
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 239 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
bdb3289f 244static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
d8d1f30b
CG
250 .input = dst_discard,
251 .output = dst_discard,
101367c2
TG
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 254 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
1da177e4 261/* allocate dst with ip6_dst_ops */
5c1e6aa3 262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
263 struct net_device *dev,
264 int flags)
1da177e4 265{
957c665f 266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 267
38308473 268 if (rt)
fbe58186 269 memset(&rt->rt6i_table, 0,
38308473 270 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
271
272 return rt;
1da177e4
LT
273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 279 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 280
8e2ec639
YZ
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
38308473 284 if (idev) {
1da177e4
LT
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
1ab1457c 287 }
b3419363 288 if (peer) {
b3419363
DM
289 rt->rt6i_peer = NULL;
290 inet_putpeer(peer);
291 }
292}
293
6431cbc2
DM
294static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295
296static u32 rt6_peer_genid(void)
297{
298 return atomic_read(&__rt6_peer_genid);
299}
300
b3419363
DM
301void rt6_bind_peer(struct rt6_info *rt, int create)
302{
303 struct inet_peer *peer;
304
b3419363
DM
305 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 inet_putpeer(peer);
6431cbc2
DM
308 else
309 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
310}
311
312static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 int how)
314{
315 struct rt6_info *rt = (struct rt6_info *)dst;
316 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 317 struct net_device *loopback_dev =
c346dca1 318 dev_net(dev)->loopback_dev;
1da177e4 319
38308473 320 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
321 struct inet6_dev *loopback_idev =
322 in6_dev_get(loopback_dev);
38308473 323 if (loopback_idev) {
1da177e4
LT
324 rt->rt6i_idev = loopback_idev;
325 in6_dev_put(idev);
326 }
327 }
328}
329
330static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331{
a02cec21 332 return (rt->rt6i_flags & RTF_EXPIRES) &&
d1918542 333 time_after(jiffies, rt->dst.expires);
1da177e4
LT
334}
335
b71d1d42 336static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 337{
a02cec21
ED
338 return ipv6_addr_type(daddr) &
339 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
340}
341
1da177e4 342/*
c71099ac 343 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
344 */
345
8ed67789
DL
346static inline struct rt6_info *rt6_device_match(struct net *net,
347 struct rt6_info *rt,
b71d1d42 348 const struct in6_addr *saddr,
1da177e4 349 int oif,
d420895e 350 int flags)
1da177e4
LT
351{
352 struct rt6_info *local = NULL;
353 struct rt6_info *sprt;
354
dd3abc4e
YH
355 if (!oif && ipv6_addr_any(saddr))
356 goto out;
357
d8d1f30b 358 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 359 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
360
361 if (oif) {
1da177e4
LT
362 if (dev->ifindex == oif)
363 return sprt;
364 if (dev->flags & IFF_LOOPBACK) {
38308473 365 if (!sprt->rt6i_idev ||
1da177e4 366 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 367 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 368 continue;
1ab1457c 369 if (local && (!oif ||
1da177e4
LT
370 local->rt6i_idev->dev->ifindex == oif))
371 continue;
372 }
373 local = sprt;
374 }
dd3abc4e
YH
375 } else {
376 if (ipv6_chk_addr(net, saddr, dev,
377 flags & RT6_LOOKUP_F_IFACE))
378 return sprt;
1da177e4 379 }
dd3abc4e 380 }
1da177e4 381
dd3abc4e 382 if (oif) {
1da177e4
LT
383 if (local)
384 return local;
385
d420895e 386 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 387 return net->ipv6.ip6_null_entry;
1da177e4 388 }
dd3abc4e 389out:
1da177e4
LT
390 return rt;
391}
392
27097255
YH
393#ifdef CONFIG_IPV6_ROUTER_PREF
394static void rt6_probe(struct rt6_info *rt)
395{
f2c31e32 396 struct neighbour *neigh;
27097255
YH
397 /*
398 * Okay, this does not seem to be appropriate
399 * for now, however, we need to check if it
400 * is really so; aka Router Reachability Probing.
401 *
402 * Router Reachability Probe MUST be rate-limited
403 * to no more than one per minute.
404 */
f2c31e32 405 rcu_read_lock();
27217455 406 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 407 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 408 goto out;
27097255
YH
409 read_lock_bh(&neigh->lock);
410 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 411 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
412 struct in6_addr mcaddr;
413 struct in6_addr *target;
414
415 neigh->updated = jiffies;
416 read_unlock_bh(&neigh->lock);
417
418 target = (struct in6_addr *)&neigh->primary_key;
419 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 420 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 421 } else {
27097255 422 read_unlock_bh(&neigh->lock);
f2c31e32
ED
423 }
424out:
425 rcu_read_unlock();
27097255
YH
426}
427#else
428static inline void rt6_probe(struct rt6_info *rt)
429{
27097255
YH
430}
431#endif
432
1da177e4 433/*
554cfb7e 434 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 435 */
b6f99a21 436static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 437{
d1918542 438 struct net_device *dev = rt->dst.dev;
161980f4 439 if (!oif || dev->ifindex == oif)
554cfb7e 440 return 2;
161980f4
DM
441 if ((dev->flags & IFF_LOOPBACK) &&
442 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 return 1;
444 return 0;
554cfb7e 445}
1da177e4 446
b6f99a21 447static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 448{
f2c31e32 449 struct neighbour *neigh;
398bcbeb 450 int m;
f2c31e32
ED
451
452 rcu_read_lock();
27217455 453 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
454 if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 !(rt->rt6i_flags & RTF_GATEWAY))
456 m = 1;
457 else if (neigh) {
554cfb7e
YH
458 read_lock_bh(&neigh->lock);
459 if (neigh->nud_state & NUD_VALID)
4d0c5911 460 m = 2;
398bcbeb
YH
461#ifdef CONFIG_IPV6_ROUTER_PREF
462 else if (neigh->nud_state & NUD_FAILED)
463 m = 0;
464#endif
465 else
ea73ee23 466 m = 1;
554cfb7e 467 read_unlock_bh(&neigh->lock);
398bcbeb
YH
468 } else
469 m = 0;
f2c31e32 470 rcu_read_unlock();
554cfb7e 471 return m;
1da177e4
LT
472}
473
554cfb7e
YH
474static int rt6_score_route(struct rt6_info *rt, int oif,
475 int strict)
1da177e4 476{
4d0c5911 477 int m, n;
1ab1457c 478
4d0c5911 479 m = rt6_check_dev(rt, oif);
77d16f45 480 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 481 return -1;
ebacaaa0
YH
482#ifdef CONFIG_IPV6_ROUTER_PREF
483 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484#endif
4d0c5911 485 n = rt6_check_neigh(rt);
557e92ef 486 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
487 return -1;
488 return m;
489}
490
f11e6659
DM
491static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 int *mpri, struct rt6_info *match)
554cfb7e 493{
f11e6659
DM
494 int m;
495
496 if (rt6_check_expired(rt))
497 goto out;
498
499 m = rt6_score_route(rt, oif, strict);
500 if (m < 0)
501 goto out;
502
503 if (m > *mpri) {
504 if (strict & RT6_LOOKUP_F_REACHABLE)
505 rt6_probe(match);
506 *mpri = m;
507 match = rt;
508 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 rt6_probe(rt);
510 }
511
512out:
513 return match;
514}
515
516static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 struct rt6_info *rr_head,
518 u32 metric, int oif, int strict)
519{
520 struct rt6_info *rt, *match;
554cfb7e 521 int mpri = -1;
1da177e4 522
f11e6659
DM
523 match = NULL;
524 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 525 rt = rt->dst.rt6_next)
f11e6659
DM
526 match = find_match(rt, oif, strict, &mpri, match);
527 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 528 rt = rt->dst.rt6_next)
f11e6659 529 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 530
f11e6659
DM
531 return match;
532}
1da177e4 533
f11e6659
DM
534static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535{
536 struct rt6_info *match, *rt0;
8ed67789 537 struct net *net;
1da177e4 538
f11e6659
DM
539 rt0 = fn->rr_ptr;
540 if (!rt0)
541 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 542
f11e6659 543 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 544
554cfb7e 545 if (!match &&
f11e6659 546 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 547 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 548
554cfb7e 549 /* no entries matched; do round-robin */
f11e6659
DM
550 if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 next = fn->leaf;
552
553 if (next != rt0)
554 fn->rr_ptr = next;
1da177e4 555 }
1da177e4 556
d1918542 557 net = dev_net(rt0->dst.dev);
a02cec21 558 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
559}
560
70ceb4f5
YH
561#ifdef CONFIG_IPV6_ROUTE_INFO
562int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 563 const struct in6_addr *gwaddr)
70ceb4f5 564{
c346dca1 565 struct net *net = dev_net(dev);
70ceb4f5
YH
566 struct route_info *rinfo = (struct route_info *) opt;
567 struct in6_addr prefix_buf, *prefix;
568 unsigned int pref;
4bed72e4 569 unsigned long lifetime;
70ceb4f5
YH
570 struct rt6_info *rt;
571
572 if (len < sizeof(struct route_info)) {
573 return -EINVAL;
574 }
575
576 /* Sanity check for prefix_len and length */
577 if (rinfo->length > 3) {
578 return -EINVAL;
579 } else if (rinfo->prefix_len > 128) {
580 return -EINVAL;
581 } else if (rinfo->prefix_len > 64) {
582 if (rinfo->length < 2) {
583 return -EINVAL;
584 }
585 } else if (rinfo->prefix_len > 0) {
586 if (rinfo->length < 1) {
587 return -EINVAL;
588 }
589 }
590
591 pref = rinfo->route_pref;
592 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 593 return -EINVAL;
70ceb4f5 594
4bed72e4 595 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
596
597 if (rinfo->length == 3)
598 prefix = (struct in6_addr *)rinfo->prefix;
599 else {
600 /* this function is safe */
601 ipv6_addr_prefix(&prefix_buf,
602 (struct in6_addr *)rinfo->prefix,
603 rinfo->prefix_len);
604 prefix = &prefix_buf;
605 }
606
efa2cea0
DL
607 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 dev->ifindex);
70ceb4f5
YH
609
610 if (rt && !lifetime) {
e0a1ad73 611 ip6_del_rt(rt);
70ceb4f5
YH
612 rt = NULL;
613 }
614
615 if (!rt && lifetime)
efa2cea0 616 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
617 pref);
618 else if (rt)
619 rt->rt6i_flags = RTF_ROUTEINFO |
620 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621
622 if (rt) {
4bed72e4 623 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
624 rt->rt6i_flags &= ~RTF_EXPIRES;
625 } else {
d1918542 626 rt->dst.expires = jiffies + HZ * lifetime;
70ceb4f5
YH
627 rt->rt6i_flags |= RTF_EXPIRES;
628 }
d8d1f30b 629 dst_release(&rt->dst);
70ceb4f5
YH
630 }
631 return 0;
632}
633#endif
634
8ed67789 635#define BACKTRACK(__net, saddr) \
982f56f3 636do { \
8ed67789 637 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 638 struct fib6_node *pn; \
e0eda7bb 639 while (1) { \
982f56f3
YH
640 if (fn->fn_flags & RTN_TL_ROOT) \
641 goto out; \
642 pn = fn->parent; \
643 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 644 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
645 else \
646 fn = pn; \
647 if (fn->fn_flags & RTN_RTINFO) \
648 goto restart; \
c71099ac 649 } \
c71099ac 650 } \
38308473 651} while (0)
c71099ac 652
8ed67789
DL
653static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 struct fib6_table *table,
4c9483b2 655 struct flowi6 *fl6, int flags)
1da177e4
LT
656{
657 struct fib6_node *fn;
658 struct rt6_info *rt;
659
c71099ac 660 read_lock_bh(&table->tb6_lock);
4c9483b2 661 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
662restart:
663 rt = fn->leaf;
4c9483b2
DM
664 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 BACKTRACK(net, &fl6->saddr);
c71099ac 666out:
d8d1f30b 667 dst_use(&rt->dst, jiffies);
c71099ac 668 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
669 return rt;
670
671}
672
ea6e574e
FW
673struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 int flags)
675{
676 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677}
678EXPORT_SYMBOL_GPL(ip6_route_lookup);
679
9acd9f3a
YH
680struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 const struct in6_addr *saddr, int oif, int strict)
c71099ac 682{
4c9483b2
DM
683 struct flowi6 fl6 = {
684 .flowi6_oif = oif,
685 .daddr = *daddr,
c71099ac
TG
686 };
687 struct dst_entry *dst;
77d16f45 688 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 689
adaa70bb 690 if (saddr) {
4c9483b2 691 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
692 flags |= RT6_LOOKUP_F_HAS_SADDR;
693 }
694
4c9483b2 695 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
696 if (dst->error == 0)
697 return (struct rt6_info *) dst;
698
699 dst_release(dst);
700
1da177e4
LT
701 return NULL;
702}
703
7159039a
YH
704EXPORT_SYMBOL(rt6_lookup);
705
c71099ac 706/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
707 It takes new route entry, the addition fails by any reason the
708 route is freed. In any case, if caller does not hold it, it may
709 be destroyed.
710 */
711
86872cb5 712static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
713{
714 int err;
c71099ac 715 struct fib6_table *table;
1da177e4 716
c71099ac
TG
717 table = rt->rt6i_table;
718 write_lock_bh(&table->tb6_lock);
86872cb5 719 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 720 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
721
722 return err;
723}
724
40e22e8f
TG
725int ip6_ins_rt(struct rt6_info *rt)
726{
4d1169c1 727 struct nl_info info = {
d1918542 728 .nl_net = dev_net(rt->dst.dev),
4d1169c1 729 };
528c4ceb 730 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
731}
732
21efcfa0
ED
733static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 const struct in6_addr *daddr,
b71d1d42 735 const struct in6_addr *saddr)
1da177e4 736{
1da177e4
LT
737 struct rt6_info *rt;
738
739 /*
740 * Clone the route.
741 */
742
21efcfa0 743 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
744
745 if (rt) {
14deae41
DM
746 int attempts = !in_softirq();
747
38308473 748 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 749 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 750 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 751 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 752 rt->rt6i_gateway = *daddr;
58c4fb86 753 }
1da177e4 754
1da177e4 755 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
756
757#ifdef CONFIG_IPV6_SUBTREES
758 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 759 rt->rt6i_src.addr = *saddr;
1da177e4
LT
760 rt->rt6i_src.plen = 128;
761 }
762#endif
763
14deae41 764 retry:
8ade06c6 765 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 766 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
767 int saved_rt_min_interval =
768 net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 int saved_rt_elasticity =
770 net->ipv6.sysctl.ip6_rt_gc_elasticity;
771
772 if (attempts-- > 0) {
773 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775
86393e52 776 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
777
778 net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 saved_rt_elasticity;
780 net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 saved_rt_min_interval;
782 goto retry;
783 }
784
785 if (net_ratelimit())
786 printk(KERN_WARNING
7e1b33e5 787 "ipv6: Neighbour table overflow.\n");
d8d1f30b 788 dst_free(&rt->dst);
14deae41
DM
789 return NULL;
790 }
95a9a5ba 791 }
1da177e4 792
95a9a5ba
YH
793 return rt;
794}
1da177e4 795
21efcfa0
ED
796static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 const struct in6_addr *daddr)
299d9939 798{
21efcfa0
ED
799 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800
299d9939 801 if (rt) {
299d9939 802 rt->rt6i_flags |= RTF_CACHE;
27217455 803 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
804 }
805 return rt;
806}
807
8ed67789 808static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 809 struct flowi6 *fl6, int flags)
1da177e4
LT
810{
811 struct fib6_node *fn;
519fbd87 812 struct rt6_info *rt, *nrt;
c71099ac 813 int strict = 0;
1da177e4 814 int attempts = 3;
519fbd87 815 int err;
53b7997f 816 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 817
77d16f45 818 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
819
820relookup:
c71099ac 821 read_lock_bh(&table->tb6_lock);
1da177e4 822
8238dd06 823restart_2:
4c9483b2 824 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
825
826restart:
4acad72d 827 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 828
4c9483b2 829 BACKTRACK(net, &fl6->saddr);
8ed67789 830 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 831 rt->rt6i_flags & RTF_CACHE)
1ddef044 832 goto out;
1da177e4 833
d8d1f30b 834 dst_hold(&rt->dst);
c71099ac 835 read_unlock_bh(&table->tb6_lock);
fb9de91e 836
27217455 837 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 838 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 839 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 840 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
841 else
842 goto out2;
e40cf353 843
d8d1f30b 844 dst_release(&rt->dst);
8ed67789 845 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 846
d8d1f30b 847 dst_hold(&rt->dst);
519fbd87 848 if (nrt) {
40e22e8f 849 err = ip6_ins_rt(nrt);
519fbd87 850 if (!err)
1da177e4 851 goto out2;
1da177e4 852 }
1da177e4 853
519fbd87
YH
854 if (--attempts <= 0)
855 goto out2;
856
857 /*
c71099ac 858 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
859 * released someone could insert this route. Relookup.
860 */
d8d1f30b 861 dst_release(&rt->dst);
519fbd87
YH
862 goto relookup;
863
864out:
8238dd06
YH
865 if (reachable) {
866 reachable = 0;
867 goto restart_2;
868 }
d8d1f30b 869 dst_hold(&rt->dst);
c71099ac 870 read_unlock_bh(&table->tb6_lock);
1da177e4 871out2:
d8d1f30b
CG
872 rt->dst.lastuse = jiffies;
873 rt->dst.__use++;
c71099ac
TG
874
875 return rt;
1da177e4
LT
876}
877
8ed67789 878static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 879 struct flowi6 *fl6, int flags)
4acad72d 880{
4c9483b2 881 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
882}
883
72331bc0
SL
884static struct dst_entry *ip6_route_input_lookup(struct net *net,
885 struct net_device *dev,
886 struct flowi6 *fl6, int flags)
887{
888 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
889 flags |= RT6_LOOKUP_F_IFACE;
890
891 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
892}
893
c71099ac
TG
894void ip6_route_input(struct sk_buff *skb)
895{
b71d1d42 896 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 897 struct net *net = dev_net(skb->dev);
adaa70bb 898 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
899 struct flowi6 fl6 = {
900 .flowi6_iif = skb->dev->ifindex,
901 .daddr = iph->daddr,
902 .saddr = iph->saddr,
38308473 903 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
904 .flowi6_mark = skb->mark,
905 .flowi6_proto = iph->nexthdr,
c71099ac 906 };
adaa70bb 907
72331bc0 908 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
909}
910
8ed67789 911static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 912 struct flowi6 *fl6, int flags)
1da177e4 913{
4c9483b2 914 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
915}
916
9c7a4f9c 917struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 918 struct flowi6 *fl6)
c71099ac
TG
919{
920 int flags = 0;
921
4c9483b2 922 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 923 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 924
4c9483b2 925 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 926 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
927 else if (sk)
928 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 929
4c9483b2 930 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
931}
932
7159039a 933EXPORT_SYMBOL(ip6_route_output);
1da177e4 934
2774c131 935struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 936{
5c1e6aa3 937 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
938 struct dst_entry *new = NULL;
939
5c1e6aa3 940 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 941 if (rt) {
cf911662
DM
942 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
943
d8d1f30b 944 new = &rt->dst;
14e50e57 945
14e50e57 946 new->__use = 1;
352e512c
HX
947 new->input = dst_discard;
948 new->output = dst_discard;
14e50e57 949
21efcfa0
ED
950 if (dst_metrics_read_only(&ort->dst))
951 new->_metrics = ort->dst._metrics;
952 else
953 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
954 rt->rt6i_idev = ort->rt6i_idev;
955 if (rt->rt6i_idev)
956 in6_dev_hold(rt->rt6i_idev);
d1918542 957 rt->dst.expires = 0;
14e50e57 958
4e3fd7a0 959 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
960 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
961 rt->rt6i_metric = 0;
962
963 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
964#ifdef CONFIG_IPV6_SUBTREES
965 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
966#endif
967
968 dst_free(new);
969 }
970
69ead7af
DM
971 dst_release(dst_orig);
972 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 973}
14e50e57 974
1da177e4
LT
975/*
976 * Destination cache support functions
977 */
978
979static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
980{
981 struct rt6_info *rt;
982
983 rt = (struct rt6_info *) dst;
984
6431cbc2
DM
985 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
986 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
987 if (!rt->rt6i_peer)
988 rt6_bind_peer(rt, 0);
989 rt->rt6i_peer_genid = rt6_peer_genid();
990 }
1da177e4 991 return dst;
6431cbc2 992 }
1da177e4
LT
993 return NULL;
994}
995
996static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
997{
998 struct rt6_info *rt = (struct rt6_info *) dst;
999
1000 if (rt) {
54c1a859
YH
1001 if (rt->rt6i_flags & RTF_CACHE) {
1002 if (rt6_check_expired(rt)) {
1003 ip6_del_rt(rt);
1004 dst = NULL;
1005 }
1006 } else {
1da177e4 1007 dst_release(dst);
54c1a859
YH
1008 dst = NULL;
1009 }
1da177e4 1010 }
54c1a859 1011 return dst;
1da177e4
LT
1012}
1013
1014static void ip6_link_failure(struct sk_buff *skb)
1015{
1016 struct rt6_info *rt;
1017
3ffe533c 1018 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1019
adf30907 1020 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1021 if (rt) {
38308473 1022 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1023 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1024 rt->rt6i_flags |= RTF_EXPIRES;
1025 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1026 rt->rt6i_node->fn_sernum = -1;
1027 }
1028}
1029
1030static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1031{
1032 struct rt6_info *rt6 = (struct rt6_info*)dst;
1033
1034 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1035 rt6->rt6i_flags |= RTF_MODIFIED;
1036 if (mtu < IPV6_MIN_MTU) {
defb3519 1037 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1038 mtu = IPV6_MIN_MTU;
defb3519
DM
1039 features |= RTAX_FEATURE_ALLFRAG;
1040 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1041 }
defb3519 1042 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1043 }
1044}
1045
0dbaee3b 1046static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1047{
0dbaee3b
DM
1048 struct net_device *dev = dst->dev;
1049 unsigned int mtu = dst_mtu(dst);
1050 struct net *net = dev_net(dev);
1051
1da177e4
LT
1052 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1053
5578689a
DL
1054 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1055 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1056
1057 /*
1ab1457c
YH
1058 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1059 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1060 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1061 * rely only on pmtu discovery"
1062 */
1063 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1064 mtu = IPV6_MAXPLEN;
1065 return mtu;
1066}
1067
ebb762f2 1068static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1069{
d33e4553 1070 struct inet6_dev *idev;
618f9bc7
SK
1071 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1072
1073 if (mtu)
1074 return mtu;
1075
1076 mtu = IPV6_MIN_MTU;
d33e4553
DM
1077
1078 rcu_read_lock();
1079 idev = __in6_dev_get(dst->dev);
1080 if (idev)
1081 mtu = idev->cnf.mtu6;
1082 rcu_read_unlock();
1083
1084 return mtu;
1085}
1086
3b00944c
YH
1087static struct dst_entry *icmp6_dst_gc_list;
1088static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1089
3b00944c 1090struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1091 struct neighbour *neigh,
87a11578 1092 struct flowi6 *fl6)
1da177e4 1093{
87a11578 1094 struct dst_entry *dst;
1da177e4
LT
1095 struct rt6_info *rt;
1096 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1097 struct net *net = dev_net(dev);
1da177e4 1098
38308473 1099 if (unlikely(!idev))
122bdf67 1100 return ERR_PTR(-ENODEV);
1da177e4 1101
957c665f 1102 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1103 if (unlikely(!rt)) {
1da177e4 1104 in6_dev_put(idev);
87a11578 1105 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1106 goto out;
1107 }
1108
1da177e4
LT
1109 if (neigh)
1110 neigh_hold(neigh);
14deae41 1111 else {
f83c7790 1112 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1113 if (IS_ERR(neigh)) {
252c3d84 1114 in6_dev_put(idev);
b43faac6
DM
1115 dst_free(&rt->dst);
1116 return ERR_CAST(neigh);
1117 }
14deae41 1118 }
1da177e4 1119
8e2ec639
YZ
1120 rt->dst.flags |= DST_HOST;
1121 rt->dst.output = ip6_output;
69cce1d1 1122 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1123 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1124 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1125 rt->rt6i_dst.plen = 128;
1126 rt->rt6i_idev = idev;
7011687f 1127 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1128
3b00944c 1129 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1130 rt->dst.next = icmp6_dst_gc_list;
1131 icmp6_dst_gc_list = &rt->dst;
3b00944c 1132 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1133
5578689a 1134 fib6_force_start_gc(net);
1da177e4 1135
87a11578
DM
1136 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1137
1da177e4 1138out:
87a11578 1139 return dst;
1da177e4
LT
1140}
1141
3d0f24a7 1142int icmp6_dst_gc(void)
1da177e4 1143{
e9476e95 1144 struct dst_entry *dst, **pprev;
3d0f24a7 1145 int more = 0;
1da177e4 1146
3b00944c
YH
1147 spin_lock_bh(&icmp6_dst_lock);
1148 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1149
1da177e4
LT
1150 while ((dst = *pprev) != NULL) {
1151 if (!atomic_read(&dst->__refcnt)) {
1152 *pprev = dst->next;
1153 dst_free(dst);
1da177e4
LT
1154 } else {
1155 pprev = &dst->next;
3d0f24a7 1156 ++more;
1da177e4
LT
1157 }
1158 }
1159
3b00944c 1160 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1161
3d0f24a7 1162 return more;
1da177e4
LT
1163}
1164
1e493d19
DM
1165static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1166 void *arg)
1167{
1168 struct dst_entry *dst, **pprev;
1169
1170 spin_lock_bh(&icmp6_dst_lock);
1171 pprev = &icmp6_dst_gc_list;
1172 while ((dst = *pprev) != NULL) {
1173 struct rt6_info *rt = (struct rt6_info *) dst;
1174 if (func(rt, arg)) {
1175 *pprev = dst->next;
1176 dst_free(dst);
1177 } else {
1178 pprev = &dst->next;
1179 }
1180 }
1181 spin_unlock_bh(&icmp6_dst_lock);
1182}
1183
569d3645 1184static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1185{
1da177e4 1186 unsigned long now = jiffies;
86393e52 1187 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1188 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1189 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1190 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1191 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1192 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1193 int entries;
7019b78e 1194
fc66f95c 1195 entries = dst_entries_get_fast(ops);
7019b78e 1196 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1197 entries <= rt_max_size)
1da177e4
LT
1198 goto out;
1199
6891a346
BT
1200 net->ipv6.ip6_rt_gc_expire++;
1201 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1202 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1203 entries = dst_entries_get_slow(ops);
1204 if (entries < ops->gc_thresh)
7019b78e 1205 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1206out:
7019b78e 1207 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1208 return entries > rt_max_size;
1da177e4
LT
1209}
1210
1211/* Clean host part of a prefix. Not necessary in radix tree,
1212 but results in cleaner routing tables.
1213
1214 Remove it only when all the things will work!
1215 */
1216
6b75d090 1217int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1218{
5170ae82 1219 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1220 if (hoplimit == 0) {
6b75d090 1221 struct net_device *dev = dst->dev;
c68f24cc
ED
1222 struct inet6_dev *idev;
1223
1224 rcu_read_lock();
1225 idev = __in6_dev_get(dev);
1226 if (idev)
6b75d090 1227 hoplimit = idev->cnf.hop_limit;
c68f24cc 1228 else
53b7997f 1229 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1230 rcu_read_unlock();
1da177e4
LT
1231 }
1232 return hoplimit;
1233}
abbf46ae 1234EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1235
1236/*
1237 *
1238 */
1239
86872cb5 1240int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1241{
1242 int err;
5578689a 1243 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1244 struct rt6_info *rt = NULL;
1245 struct net_device *dev = NULL;
1246 struct inet6_dev *idev = NULL;
c71099ac 1247 struct fib6_table *table;
1da177e4
LT
1248 int addr_type;
1249
86872cb5 1250 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1251 return -EINVAL;
1252#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1253 if (cfg->fc_src_len)
1da177e4
LT
1254 return -EINVAL;
1255#endif
86872cb5 1256 if (cfg->fc_ifindex) {
1da177e4 1257 err = -ENODEV;
5578689a 1258 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1259 if (!dev)
1260 goto out;
1261 idev = in6_dev_get(dev);
1262 if (!idev)
1263 goto out;
1264 }
1265
86872cb5
TG
1266 if (cfg->fc_metric == 0)
1267 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1268
d71314b4 1269 err = -ENOBUFS;
38308473
DM
1270 if (cfg->fc_nlinfo.nlh &&
1271 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1272 table = fib6_get_table(net, cfg->fc_table);
38308473 1273 if (!table) {
d71314b4
MV
1274 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1275 table = fib6_new_table(net, cfg->fc_table);
1276 }
1277 } else {
1278 table = fib6_new_table(net, cfg->fc_table);
1279 }
38308473
DM
1280
1281 if (!table)
c71099ac 1282 goto out;
c71099ac 1283
957c665f 1284 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1285
38308473 1286 if (!rt) {
1da177e4
LT
1287 err = -ENOMEM;
1288 goto out;
1289 }
1290
d8d1f30b 1291 rt->dst.obsolete = -1;
d1918542 1292 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
6f704992
YH
1293 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1294 0;
1da177e4 1295
86872cb5
TG
1296 if (cfg->fc_protocol == RTPROT_UNSPEC)
1297 cfg->fc_protocol = RTPROT_BOOT;
1298 rt->rt6i_protocol = cfg->fc_protocol;
1299
1300 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1301
1302 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1303 rt->dst.input = ip6_mc_input;
ab79ad14
1304 else if (cfg->fc_flags & RTF_LOCAL)
1305 rt->dst.input = ip6_input;
1da177e4 1306 else
d8d1f30b 1307 rt->dst.input = ip6_forward;
1da177e4 1308
d8d1f30b 1309 rt->dst.output = ip6_output;
1da177e4 1310
86872cb5
TG
1311 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1312 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1313 if (rt->rt6i_dst.plen == 128)
11d53b49 1314 rt->dst.flags |= DST_HOST;
1da177e4 1315
8e2ec639
YZ
1316 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1317 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1318 if (!metrics) {
1319 err = -ENOMEM;
1320 goto out;
1321 }
1322 dst_init_metrics(&rt->dst, metrics, 0);
1323 }
1da177e4 1324#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1325 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1326 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1327#endif
1328
86872cb5 1329 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1330
1331 /* We cannot add true routes via loopback here,
1332 they would result in kernel looping; promote them to reject routes
1333 */
86872cb5 1334 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1335 (dev && (dev->flags & IFF_LOOPBACK) &&
1336 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1337 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1338 /* hold loopback dev/idev if we haven't done so. */
5578689a 1339 if (dev != net->loopback_dev) {
1da177e4
LT
1340 if (dev) {
1341 dev_put(dev);
1342 in6_dev_put(idev);
1343 }
5578689a 1344 dev = net->loopback_dev;
1da177e4
LT
1345 dev_hold(dev);
1346 idev = in6_dev_get(dev);
1347 if (!idev) {
1348 err = -ENODEV;
1349 goto out;
1350 }
1351 }
d8d1f30b
CG
1352 rt->dst.output = ip6_pkt_discard_out;
1353 rt->dst.input = ip6_pkt_discard;
1354 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1355 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1356 goto install_route;
1357 }
1358
86872cb5 1359 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1360 const struct in6_addr *gw_addr;
1da177e4
LT
1361 int gwa_type;
1362
86872cb5 1363 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1364 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1365 gwa_type = ipv6_addr_type(gw_addr);
1366
1367 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1368 struct rt6_info *grt;
1369
1370 /* IPv6 strictly inhibits using not link-local
1371 addresses as nexthop address.
1372 Otherwise, router will not able to send redirects.
1373 It is very good, but in some (rare!) circumstances
1374 (SIT, PtP, NBMA NOARP links) it is handy to allow
1375 some exceptions. --ANK
1376 */
1377 err = -EINVAL;
38308473 1378 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1379 goto out;
1380
5578689a 1381 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1382
1383 err = -EHOSTUNREACH;
38308473 1384 if (!grt)
1da177e4
LT
1385 goto out;
1386 if (dev) {
d1918542 1387 if (dev != grt->dst.dev) {
d8d1f30b 1388 dst_release(&grt->dst);
1da177e4
LT
1389 goto out;
1390 }
1391 } else {
d1918542 1392 dev = grt->dst.dev;
1da177e4
LT
1393 idev = grt->rt6i_idev;
1394 dev_hold(dev);
1395 in6_dev_hold(grt->rt6i_idev);
1396 }
38308473 1397 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1398 err = 0;
d8d1f30b 1399 dst_release(&grt->dst);
1da177e4
LT
1400
1401 if (err)
1402 goto out;
1403 }
1404 err = -EINVAL;
38308473 1405 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1406 goto out;
1407 }
1408
1409 err = -ENODEV;
38308473 1410 if (!dev)
1da177e4
LT
1411 goto out;
1412
c3968a85
DW
1413 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1414 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1415 err = -EINVAL;
1416 goto out;
1417 }
4e3fd7a0 1418 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1419 rt->rt6i_prefsrc.plen = 128;
1420 } else
1421 rt->rt6i_prefsrc.plen = 0;
1422
86872cb5 1423 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1424 err = rt6_bind_neighbour(rt, dev);
f83c7790 1425 if (err)
1da177e4 1426 goto out;
1da177e4
LT
1427 }
1428
86872cb5 1429 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1430
1431install_route:
86872cb5
TG
1432 if (cfg->fc_mx) {
1433 struct nlattr *nla;
1434 int remaining;
1435
1436 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1437 int type = nla_type(nla);
86872cb5
TG
1438
1439 if (type) {
1440 if (type > RTAX_MAX) {
1da177e4
LT
1441 err = -EINVAL;
1442 goto out;
1443 }
86872cb5 1444
defb3519 1445 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1446 }
1da177e4
LT
1447 }
1448 }
1449
d8d1f30b 1450 rt->dst.dev = dev;
1da177e4 1451 rt->rt6i_idev = idev;
c71099ac 1452 rt->rt6i_table = table;
63152fc0 1453
c346dca1 1454 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1455
86872cb5 1456 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1457
1458out:
1459 if (dev)
1460 dev_put(dev);
1461 if (idev)
1462 in6_dev_put(idev);
1463 if (rt)
d8d1f30b 1464 dst_free(&rt->dst);
1da177e4
LT
1465 return err;
1466}
1467
86872cb5 1468static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1469{
1470 int err;
c71099ac 1471 struct fib6_table *table;
d1918542 1472 struct net *net = dev_net(rt->dst.dev);
1da177e4 1473
8ed67789 1474 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1475 return -ENOENT;
1476
c71099ac
TG
1477 table = rt->rt6i_table;
1478 write_lock_bh(&table->tb6_lock);
1da177e4 1479
86872cb5 1480 err = fib6_del(rt, info);
d8d1f30b 1481 dst_release(&rt->dst);
1da177e4 1482
c71099ac 1483 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1484
1485 return err;
1486}
1487
e0a1ad73
TG
1488int ip6_del_rt(struct rt6_info *rt)
1489{
4d1169c1 1490 struct nl_info info = {
d1918542 1491 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1492 };
528c4ceb 1493 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1494}
1495
86872cb5 1496static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1497{
c71099ac 1498 struct fib6_table *table;
1da177e4
LT
1499 struct fib6_node *fn;
1500 struct rt6_info *rt;
1501 int err = -ESRCH;
1502
5578689a 1503 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1504 if (!table)
c71099ac
TG
1505 return err;
1506
1507 read_lock_bh(&table->tb6_lock);
1da177e4 1508
c71099ac 1509 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1510 &cfg->fc_dst, cfg->fc_dst_len,
1511 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1512
1da177e4 1513 if (fn) {
d8d1f30b 1514 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1515 if (cfg->fc_ifindex &&
d1918542
DM
1516 (!rt->dst.dev ||
1517 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1518 continue;
86872cb5
TG
1519 if (cfg->fc_flags & RTF_GATEWAY &&
1520 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1521 continue;
86872cb5 1522 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1523 continue;
d8d1f30b 1524 dst_hold(&rt->dst);
c71099ac 1525 read_unlock_bh(&table->tb6_lock);
1da177e4 1526
86872cb5 1527 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1528 }
1529 }
c71099ac 1530 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1531
1532 return err;
1533}
1534
1535/*
1536 * Handle redirects
1537 */
a6279458 1538struct ip6rd_flowi {
4c9483b2 1539 struct flowi6 fl6;
a6279458
YH
1540 struct in6_addr gateway;
1541};
1542
8ed67789
DL
1543static struct rt6_info *__ip6_route_redirect(struct net *net,
1544 struct fib6_table *table,
4c9483b2 1545 struct flowi6 *fl6,
a6279458 1546 int flags)
1da177e4 1547{
4c9483b2 1548 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1549 struct rt6_info *rt;
e843b9e1 1550 struct fib6_node *fn;
c71099ac 1551
1da177e4 1552 /*
e843b9e1
YH
1553 * Get the "current" route for this destination and
1554 * check if the redirect has come from approriate router.
1555 *
1556 * RFC 2461 specifies that redirects should only be
1557 * accepted if they come from the nexthop to the target.
1558 * Due to the way the routes are chosen, this notion
1559 * is a bit fuzzy and one might need to check all possible
1560 * routes.
1da177e4 1561 */
1da177e4 1562
c71099ac 1563 read_lock_bh(&table->tb6_lock);
4c9483b2 1564 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1565restart:
d8d1f30b 1566 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1567 /*
1568 * Current route is on-link; redirect is always invalid.
1569 *
1570 * Seems, previous statement is not true. It could
1571 * be node, which looks for us as on-link (f.e. proxy ndisc)
1572 * But then router serving it might decide, that we should
1573 * know truth 8)8) --ANK (980726).
1574 */
1575 if (rt6_check_expired(rt))
1576 continue;
1577 if (!(rt->rt6i_flags & RTF_GATEWAY))
1578 continue;
d1918542 1579 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1580 continue;
a6279458 1581 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1582 continue;
1583 break;
1584 }
a6279458 1585
cb15d9c2 1586 if (!rt)
8ed67789 1587 rt = net->ipv6.ip6_null_entry;
4c9483b2 1588 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1589out:
d8d1f30b 1590 dst_hold(&rt->dst);
a6279458 1591
c71099ac 1592 read_unlock_bh(&table->tb6_lock);
e843b9e1 1593
a6279458
YH
1594 return rt;
1595};
1596
b71d1d42
ED
1597static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1598 const struct in6_addr *src,
1599 const struct in6_addr *gateway,
a6279458
YH
1600 struct net_device *dev)
1601{
adaa70bb 1602 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1603 struct net *net = dev_net(dev);
a6279458 1604 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1605 .fl6 = {
1606 .flowi6_oif = dev->ifindex,
1607 .daddr = *dest,
1608 .saddr = *src,
a6279458 1609 },
a6279458 1610 };
adaa70bb 1611
4e3fd7a0 1612 rdfl.gateway = *gateway;
86c36ce4 1613
adaa70bb
TG
1614 if (rt6_need_strict(dest))
1615 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1616
4c9483b2 1617 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1618 flags, __ip6_route_redirect);
a6279458
YH
1619}
1620
b71d1d42
ED
1621void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1622 const struct in6_addr *saddr,
a6279458
YH
1623 struct neighbour *neigh, u8 *lladdr, int on_link)
1624{
1625 struct rt6_info *rt, *nrt = NULL;
1626 struct netevent_redirect netevent;
c346dca1 1627 struct net *net = dev_net(neigh->dev);
a6279458
YH
1628
1629 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1630
8ed67789 1631 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1632 if (net_ratelimit())
1633 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1634 "for redirect target\n");
a6279458 1635 goto out;
1da177e4
LT
1636 }
1637
1da177e4
LT
1638 /*
1639 * We have finally decided to accept it.
1640 */
1641
1ab1457c 1642 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1643 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1644 NEIGH_UPDATE_F_OVERRIDE|
1645 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1646 NEIGH_UPDATE_F_ISROUTER))
1647 );
1648
1649 /*
1650 * Redirect received -> path was valid.
1651 * Look, redirects are sent only in response to data packets,
1652 * so that this nexthop apparently is reachable. --ANK
1653 */
d8d1f30b 1654 dst_confirm(&rt->dst);
1da177e4
LT
1655
1656 /* Duplicate redirect: silently ignore. */
27217455 1657 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1658 goto out;
1659
21efcfa0 1660 nrt = ip6_rt_copy(rt, dest);
38308473 1661 if (!nrt)
1da177e4
LT
1662 goto out;
1663
1664 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1665 if (on_link)
1666 nrt->rt6i_flags &= ~RTF_GATEWAY;
1667
4e3fd7a0 1668 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1669 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1670
40e22e8f 1671 if (ip6_ins_rt(nrt))
1da177e4
LT
1672 goto out;
1673
d8d1f30b
CG
1674 netevent.old = &rt->dst;
1675 netevent.new = &nrt->dst;
8d71740c
TT
1676 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1677
38308473 1678 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1679 ip6_del_rt(rt);
1da177e4
LT
1680 return;
1681 }
1682
1683out:
d8d1f30b 1684 dst_release(&rt->dst);
1da177e4
LT
1685}
1686
1687/*
1688 * Handle ICMP "packet too big" messages
1689 * i.e. Path MTU discovery
1690 */
1691
b71d1d42 1692static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1693 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1694{
1695 struct rt6_info *rt, *nrt;
1696 int allfrag = 0;
d3052b55 1697again:
ae878ae2 1698 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1699 if (!rt)
1da177e4
LT
1700 return;
1701
d3052b55
AV
1702 if (rt6_check_expired(rt)) {
1703 ip6_del_rt(rt);
1704 goto again;
1705 }
1706
d8d1f30b 1707 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1708 goto out;
1709
1710 if (pmtu < IPV6_MIN_MTU) {
1711 /*
1ab1457c 1712 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1713 * MTU (1280) and a fragment header should always be included
1714 * after a node receiving Too Big message reporting PMTU is
1715 * less than the IPv6 Minimum Link MTU.
1716 */
1717 pmtu = IPV6_MIN_MTU;
1718 allfrag = 1;
1719 }
1720
1721 /* New mtu received -> path was valid.
1722 They are sent only in response to data packets,
1723 so that this nexthop apparently is reachable. --ANK
1724 */
d8d1f30b 1725 dst_confirm(&rt->dst);
1da177e4
LT
1726
1727 /* Host route. If it is static, it would be better
1728 not to override it, but add new one, so that
1729 when cache entry will expire old pmtu
1730 would return automatically.
1731 */
1732 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1733 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1734 if (allfrag) {
1735 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1736 features |= RTAX_FEATURE_ALLFRAG;
1737 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1738 }
d8d1f30b 1739 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1740 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1741 goto out;
1742 }
1743
1744 /* Network route.
1745 Two cases are possible:
1746 1. It is connected route. Action: COW
1747 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1748 */
27217455 1749 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1750 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1751 else
1752 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1753
d5315b50 1754 if (nrt) {
defb3519
DM
1755 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1756 if (allfrag) {
1757 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1758 features |= RTAX_FEATURE_ALLFRAG;
1759 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1760 }
a1e78363
YH
1761
1762 /* According to RFC 1981, detecting PMTU increase shouldn't be
1763 * happened within 5 mins, the recommended timer is 10 mins.
1764 * Here this route expiration time is set to ip6_rt_mtu_expires
1765 * which is 10 mins. After 10 mins the decreased pmtu is expired
1766 * and detecting PMTU increase will be automatically happened.
1767 */
d8d1f30b 1768 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1769 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1770
40e22e8f 1771 ip6_ins_rt(nrt);
1da177e4 1772 }
1da177e4 1773out:
d8d1f30b 1774 dst_release(&rt->dst);
1da177e4
LT
1775}
1776
b71d1d42 1777void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1778 struct net_device *dev, u32 pmtu)
1779{
1780 struct net *net = dev_net(dev);
1781
1782 /*
1783 * RFC 1981 states that a node "MUST reduce the size of the packets it
1784 * is sending along the path" that caused the Packet Too Big message.
1785 * Since it's not possible in the general case to determine which
1786 * interface was used to send the original packet, we update the MTU
1787 * on the interface that will be used to send future packets. We also
1788 * update the MTU on the interface that received the Packet Too Big in
1789 * case the original packet was forced out that interface with
1790 * SO_BINDTODEVICE or similar. This is the next best thing to the
1791 * correct behaviour, which would be to update the MTU on all
1792 * interfaces.
1793 */
1794 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1795 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1796}
1797
1da177e4
LT
1798/*
1799 * Misc support functions
1800 */
1801
21efcfa0
ED
1802static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1803 const struct in6_addr *dest)
1da177e4 1804{
d1918542 1805 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1806 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1807 ort->dst.dev, 0);
1da177e4
LT
1808
1809 if (rt) {
d8d1f30b
CG
1810 rt->dst.input = ort->dst.input;
1811 rt->dst.output = ort->dst.output;
8e2ec639 1812 rt->dst.flags |= DST_HOST;
d8d1f30b 1813
4e3fd7a0 1814 rt->rt6i_dst.addr = *dest;
8e2ec639 1815 rt->rt6i_dst.plen = 128;
defb3519 1816 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1817 rt->dst.error = ort->dst.error;
1da177e4
LT
1818 rt->rt6i_idev = ort->rt6i_idev;
1819 if (rt->rt6i_idev)
1820 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1821 rt->dst.lastuse = jiffies;
d1918542 1822 rt->dst.expires = 0;
1da177e4 1823
4e3fd7a0 1824 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1825 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1826 rt->rt6i_metric = 0;
1827
1da177e4
LT
1828#ifdef CONFIG_IPV6_SUBTREES
1829 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1830#endif
0f6c6392 1831 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1832 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1833 }
1834 return rt;
1835}
1836
70ceb4f5 1837#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1838static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1841{
1842 struct fib6_node *fn;
1843 struct rt6_info *rt = NULL;
c71099ac
TG
1844 struct fib6_table *table;
1845
efa2cea0 1846 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1847 if (!table)
c71099ac 1848 return NULL;
70ceb4f5 1849
c71099ac
TG
1850 write_lock_bh(&table->tb6_lock);
1851 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1852 if (!fn)
1853 goto out;
1854
d8d1f30b 1855 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1856 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1857 continue;
1858 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1859 continue;
1860 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1861 continue;
d8d1f30b 1862 dst_hold(&rt->dst);
70ceb4f5
YH
1863 break;
1864 }
1865out:
c71099ac 1866 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1867 return rt;
1868}
1869
efa2cea0 1870static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1871 const struct in6_addr *prefix, int prefixlen,
1872 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1873 unsigned pref)
1874{
86872cb5
TG
1875 struct fib6_config cfg = {
1876 .fc_table = RT6_TABLE_INFO,
238fc7ea 1877 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1878 .fc_ifindex = ifindex,
1879 .fc_dst_len = prefixlen,
1880 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1881 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1882 .fc_nlinfo.pid = 0,
1883 .fc_nlinfo.nlh = NULL,
1884 .fc_nlinfo.nl_net = net,
86872cb5
TG
1885 };
1886
4e3fd7a0
AD
1887 cfg.fc_dst = *prefix;
1888 cfg.fc_gateway = *gwaddr;
70ceb4f5 1889
e317da96
YH
1890 /* We should treat it as a default route if prefix length is 0. */
1891 if (!prefixlen)
86872cb5 1892 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1893
86872cb5 1894 ip6_route_add(&cfg);
70ceb4f5 1895
efa2cea0 1896 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1897}
1898#endif
1899
b71d1d42 1900struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1901{
1da177e4 1902 struct rt6_info *rt;
c71099ac 1903 struct fib6_table *table;
1da177e4 1904
c346dca1 1905 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1906 if (!table)
c71099ac 1907 return NULL;
1da177e4 1908
c71099ac 1909 write_lock_bh(&table->tb6_lock);
d8d1f30b 1910 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1911 if (dev == rt->dst.dev &&
045927ff 1912 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1913 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1914 break;
1915 }
1916 if (rt)
d8d1f30b 1917 dst_hold(&rt->dst);
c71099ac 1918 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1919 return rt;
1920}
1921
b71d1d42 1922struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1923 struct net_device *dev,
1924 unsigned int pref)
1da177e4 1925{
86872cb5
TG
1926 struct fib6_config cfg = {
1927 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1928 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1929 .fc_ifindex = dev->ifindex,
1930 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1931 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1932 .fc_nlinfo.pid = 0,
1933 .fc_nlinfo.nlh = NULL,
c346dca1 1934 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1935 };
1da177e4 1936
4e3fd7a0 1937 cfg.fc_gateway = *gwaddr;
1da177e4 1938
86872cb5 1939 ip6_route_add(&cfg);
1da177e4 1940
1da177e4
LT
1941 return rt6_get_dflt_router(gwaddr, dev);
1942}
1943
7b4da532 1944void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1945{
1946 struct rt6_info *rt;
c71099ac
TG
1947 struct fib6_table *table;
1948
1949 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1950 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1951 if (!table)
c71099ac 1952 return;
1da177e4
LT
1953
1954restart:
c71099ac 1955 read_lock_bh(&table->tb6_lock);
d8d1f30b 1956 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1957 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1958 dst_hold(&rt->dst);
c71099ac 1959 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1960 ip6_del_rt(rt);
1da177e4
LT
1961 goto restart;
1962 }
1963 }
c71099ac 1964 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1965}
1966
5578689a
DL
1967static void rtmsg_to_fib6_config(struct net *net,
1968 struct in6_rtmsg *rtmsg,
86872cb5
TG
1969 struct fib6_config *cfg)
1970{
1971 memset(cfg, 0, sizeof(*cfg));
1972
1973 cfg->fc_table = RT6_TABLE_MAIN;
1974 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1975 cfg->fc_metric = rtmsg->rtmsg_metric;
1976 cfg->fc_expires = rtmsg->rtmsg_info;
1977 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1978 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1979 cfg->fc_flags = rtmsg->rtmsg_flags;
1980
5578689a 1981 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1982
4e3fd7a0
AD
1983 cfg->fc_dst = rtmsg->rtmsg_dst;
1984 cfg->fc_src = rtmsg->rtmsg_src;
1985 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1986}
1987
5578689a 1988int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1989{
86872cb5 1990 struct fib6_config cfg;
1da177e4
LT
1991 struct in6_rtmsg rtmsg;
1992 int err;
1993
1994 switch(cmd) {
1995 case SIOCADDRT: /* Add a route */
1996 case SIOCDELRT: /* Delete a route */
1997 if (!capable(CAP_NET_ADMIN))
1998 return -EPERM;
1999 err = copy_from_user(&rtmsg, arg,
2000 sizeof(struct in6_rtmsg));
2001 if (err)
2002 return -EFAULT;
86872cb5 2003
5578689a 2004 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2005
1da177e4
LT
2006 rtnl_lock();
2007 switch (cmd) {
2008 case SIOCADDRT:
86872cb5 2009 err = ip6_route_add(&cfg);
1da177e4
LT
2010 break;
2011 case SIOCDELRT:
86872cb5 2012 err = ip6_route_del(&cfg);
1da177e4
LT
2013 break;
2014 default:
2015 err = -EINVAL;
2016 }
2017 rtnl_unlock();
2018
2019 return err;
3ff50b79 2020 }
1da177e4
LT
2021
2022 return -EINVAL;
2023}
2024
2025/*
2026 * Drop the packet on the floor
2027 */
2028
d5fdd6ba 2029static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2030{
612f09e8 2031 int type;
adf30907 2032 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2033 switch (ipstats_mib_noroutes) {
2034 case IPSTATS_MIB_INNOROUTES:
0660e03f 2035 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2036 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2037 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2038 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2039 break;
2040 }
2041 /* FALLTHROUGH */
2042 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2043 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2044 ipstats_mib_noroutes);
612f09e8
YH
2045 break;
2046 }
3ffe533c 2047 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2048 kfree_skb(skb);
2049 return 0;
2050}
2051
9ce8ade0
TG
2052static int ip6_pkt_discard(struct sk_buff *skb)
2053{
612f09e8 2054 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2055}
2056
20380731 2057static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2058{
adf30907 2059 skb->dev = skb_dst(skb)->dev;
612f09e8 2060 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2061}
2062
6723ab54
DM
2063#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2064
9ce8ade0
TG
2065static int ip6_pkt_prohibit(struct sk_buff *skb)
2066{
612f09e8 2067 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2068}
2069
2070static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2071{
adf30907 2072 skb->dev = skb_dst(skb)->dev;
612f09e8 2073 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2074}
2075
6723ab54
DM
2076#endif
2077
1da177e4
LT
2078/*
2079 * Allocate a dst for local (unicast / anycast) address.
2080 */
2081
2082struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2083 const struct in6_addr *addr,
8f031519 2084 bool anycast)
1da177e4 2085{
c346dca1 2086 struct net *net = dev_net(idev->dev);
5c1e6aa3 2087 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2088 net->loopback_dev, 0);
f83c7790 2089 int err;
1da177e4 2090
38308473 2091 if (!rt) {
40385653
BG
2092 if (net_ratelimit())
2093 pr_warning("IPv6: Maximum number of routes reached,"
2094 " consider increasing route/max_size.\n");
1da177e4 2095 return ERR_PTR(-ENOMEM);
40385653 2096 }
1da177e4 2097
1da177e4
LT
2098 in6_dev_hold(idev);
2099
11d53b49 2100 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2101 rt->dst.input = ip6_input;
2102 rt->dst.output = ip6_output;
1da177e4 2103 rt->rt6i_idev = idev;
d8d1f30b 2104 rt->dst.obsolete = -1;
1da177e4
LT
2105
2106 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2107 if (anycast)
2108 rt->rt6i_flags |= RTF_ANYCAST;
2109 else
1da177e4 2110 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2111 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2112 if (err) {
d8d1f30b 2113 dst_free(&rt->dst);
f83c7790 2114 return ERR_PTR(err);
1da177e4
LT
2115 }
2116
4e3fd7a0 2117 rt->rt6i_dst.addr = *addr;
1da177e4 2118 rt->rt6i_dst.plen = 128;
5578689a 2119 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2120
d8d1f30b 2121 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2122
2123 return rt;
2124}
2125
c3968a85
DW
2126int ip6_route_get_saddr(struct net *net,
2127 struct rt6_info *rt,
b71d1d42 2128 const struct in6_addr *daddr,
c3968a85
DW
2129 unsigned int prefs,
2130 struct in6_addr *saddr)
2131{
2132 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2133 int err = 0;
2134 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2135 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2136 else
2137 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2138 daddr, prefs, saddr);
2139 return err;
2140}
2141
2142/* remove deleted ip from prefsrc entries */
2143struct arg_dev_net_ip {
2144 struct net_device *dev;
2145 struct net *net;
2146 struct in6_addr *addr;
2147};
2148
2149static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2150{
2151 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2152 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2153 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2154
d1918542 2155 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2156 rt != net->ipv6.ip6_null_entry &&
2157 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2158 /* remove prefsrc entry */
2159 rt->rt6i_prefsrc.plen = 0;
2160 }
2161 return 0;
2162}
2163
2164void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2165{
2166 struct net *net = dev_net(ifp->idev->dev);
2167 struct arg_dev_net_ip adni = {
2168 .dev = ifp->idev->dev,
2169 .net = net,
2170 .addr = &ifp->addr,
2171 };
2172 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2173}
2174
8ed67789
DL
2175struct arg_dev_net {
2176 struct net_device *dev;
2177 struct net *net;
2178};
2179
1da177e4
LT
2180static int fib6_ifdown(struct rt6_info *rt, void *arg)
2181{
bc3ef660 2182 const struct arg_dev_net *adn = arg;
2183 const struct net_device *dev = adn->dev;
8ed67789 2184
d1918542 2185 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2186 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2187 return -1;
c159d30c 2188
1da177e4
LT
2189 return 0;
2190}
2191
f3db4851 2192void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2193{
8ed67789
DL
2194 struct arg_dev_net adn = {
2195 .dev = dev,
2196 .net = net,
2197 };
2198
2199 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2200 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2201}
2202
2203struct rt6_mtu_change_arg
2204{
2205 struct net_device *dev;
2206 unsigned mtu;
2207};
2208
2209static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2210{
2211 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2212 struct inet6_dev *idev;
2213
2214 /* In IPv6 pmtu discovery is not optional,
2215 so that RTAX_MTU lock cannot disable it.
2216 We still use this lock to block changes
2217 caused by addrconf/ndisc.
2218 */
2219
2220 idev = __in6_dev_get(arg->dev);
38308473 2221 if (!idev)
1da177e4
LT
2222 return 0;
2223
2224 /* For administrative MTU increase, there is no way to discover
2225 IPv6 PMTU increase, so PMTU increase should be updated here.
2226 Since RFC 1981 doesn't include administrative MTU increase
2227 update PMTU increase is a MUST. (i.e. jumbo frame)
2228 */
2229 /*
2230 If new MTU is less than route PMTU, this new MTU will be the
2231 lowest MTU in the path, update the route PMTU to reflect PMTU
2232 decreases; if new MTU is greater than route PMTU, and the
2233 old MTU is the lowest MTU in the path, update the route PMTU
2234 to reflect the increase. In this case if the other nodes' MTU
2235 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2236 PMTU discouvery.
2237 */
d1918542 2238 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2239 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2240 (dst_mtu(&rt->dst) >= arg->mtu ||
2241 (dst_mtu(&rt->dst) < arg->mtu &&
2242 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2243 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2244 }
1da177e4
LT
2245 return 0;
2246}
2247
2248void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2249{
c71099ac
TG
2250 struct rt6_mtu_change_arg arg = {
2251 .dev = dev,
2252 .mtu = mtu,
2253 };
1da177e4 2254
c346dca1 2255 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2256}
2257
ef7c79ed 2258static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2259 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2260 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2261 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2262 [RTA_PRIORITY] = { .type = NLA_U32 },
2263 [RTA_METRICS] = { .type = NLA_NESTED },
2264};
2265
2266static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2267 struct fib6_config *cfg)
1da177e4 2268{
86872cb5
TG
2269 struct rtmsg *rtm;
2270 struct nlattr *tb[RTA_MAX+1];
2271 int err;
1da177e4 2272
86872cb5
TG
2273 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2274 if (err < 0)
2275 goto errout;
1da177e4 2276
86872cb5
TG
2277 err = -EINVAL;
2278 rtm = nlmsg_data(nlh);
2279 memset(cfg, 0, sizeof(*cfg));
2280
2281 cfg->fc_table = rtm->rtm_table;
2282 cfg->fc_dst_len = rtm->rtm_dst_len;
2283 cfg->fc_src_len = rtm->rtm_src_len;
2284 cfg->fc_flags = RTF_UP;
2285 cfg->fc_protocol = rtm->rtm_protocol;
2286
2287 if (rtm->rtm_type == RTN_UNREACHABLE)
2288 cfg->fc_flags |= RTF_REJECT;
2289
ab79ad14
2290 if (rtm->rtm_type == RTN_LOCAL)
2291 cfg->fc_flags |= RTF_LOCAL;
2292
86872cb5
TG
2293 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2294 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2295 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2296
2297 if (tb[RTA_GATEWAY]) {
2298 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2299 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2300 }
86872cb5
TG
2301
2302 if (tb[RTA_DST]) {
2303 int plen = (rtm->rtm_dst_len + 7) >> 3;
2304
2305 if (nla_len(tb[RTA_DST]) < plen)
2306 goto errout;
2307
2308 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2309 }
86872cb5
TG
2310
2311 if (tb[RTA_SRC]) {
2312 int plen = (rtm->rtm_src_len + 7) >> 3;
2313
2314 if (nla_len(tb[RTA_SRC]) < plen)
2315 goto errout;
2316
2317 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2318 }
86872cb5 2319
c3968a85
DW
2320 if (tb[RTA_PREFSRC])
2321 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2322
86872cb5
TG
2323 if (tb[RTA_OIF])
2324 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2325
2326 if (tb[RTA_PRIORITY])
2327 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2328
2329 if (tb[RTA_METRICS]) {
2330 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2331 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2332 }
86872cb5
TG
2333
2334 if (tb[RTA_TABLE])
2335 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2336
2337 err = 0;
2338errout:
2339 return err;
1da177e4
LT
2340}
2341
c127ea2c 2342static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2343{
86872cb5
TG
2344 struct fib6_config cfg;
2345 int err;
1da177e4 2346
86872cb5
TG
2347 err = rtm_to_fib6_config(skb, nlh, &cfg);
2348 if (err < 0)
2349 return err;
2350
2351 return ip6_route_del(&cfg);
1da177e4
LT
2352}
2353
c127ea2c 2354static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2355{
86872cb5
TG
2356 struct fib6_config cfg;
2357 int err;
1da177e4 2358
86872cb5
TG
2359 err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 if (err < 0)
2361 return err;
2362
2363 return ip6_route_add(&cfg);
1da177e4
LT
2364}
2365
339bf98f
TG
2366static inline size_t rt6_nlmsg_size(void)
2367{
2368 return NLMSG_ALIGN(sizeof(struct rtmsg))
2369 + nla_total_size(16) /* RTA_SRC */
2370 + nla_total_size(16) /* RTA_DST */
2371 + nla_total_size(16) /* RTA_GATEWAY */
2372 + nla_total_size(16) /* RTA_PREFSRC */
2373 + nla_total_size(4) /* RTA_TABLE */
2374 + nla_total_size(4) /* RTA_IIF */
2375 + nla_total_size(4) /* RTA_OIF */
2376 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2377 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2378 + nla_total_size(sizeof(struct rta_cacheinfo));
2379}
2380
191cd582
BH
2381static int rt6_fill_node(struct net *net,
2382 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2383 struct in6_addr *dst, struct in6_addr *src,
2384 int iif, int type, u32 pid, u32 seq,
7bc570c8 2385 int prefix, int nowait, unsigned int flags)
1da177e4 2386{
346f870b 2387 const struct inet_peer *peer;
1da177e4 2388 struct rtmsg *rtm;
2d7202bf 2389 struct nlmsghdr *nlh;
e3703b3d 2390 long expires;
9e762a4a 2391 u32 table;
f2c31e32 2392 struct neighbour *n;
346f870b 2393 u32 ts, tsage;
1da177e4
LT
2394
2395 if (prefix) { /* user wants prefix routes only */
2396 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2397 /* success since this is not a prefix route */
2398 return 1;
2399 }
2400 }
2401
2d7202bf 2402 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2403 if (!nlh)
26932566 2404 return -EMSGSIZE;
2d7202bf
TG
2405
2406 rtm = nlmsg_data(nlh);
1da177e4
LT
2407 rtm->rtm_family = AF_INET6;
2408 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2409 rtm->rtm_src_len = rt->rt6i_src.plen;
2410 rtm->rtm_tos = 0;
c71099ac 2411 if (rt->rt6i_table)
9e762a4a 2412 table = rt->rt6i_table->tb6_id;
c71099ac 2413 else
9e762a4a
PM
2414 table = RT6_TABLE_UNSPEC;
2415 rtm->rtm_table = table;
2d7202bf 2416 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2417 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2418 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2419 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2420 rtm->rtm_type = RTN_LOCAL;
d1918542 2421 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2422 rtm->rtm_type = RTN_LOCAL;
2423 else
2424 rtm->rtm_type = RTN_UNICAST;
2425 rtm->rtm_flags = 0;
2426 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2427 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2428 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2429 rtm->rtm_protocol = RTPROT_REDIRECT;
2430 else if (rt->rt6i_flags & RTF_ADDRCONF)
2431 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2432 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2433 rtm->rtm_protocol = RTPROT_RA;
2434
38308473 2435 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2436 rtm->rtm_flags |= RTM_F_CLONED;
2437
2438 if (dst) {
2d7202bf 2439 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2440 rtm->rtm_dst_len = 128;
1da177e4 2441 } else if (rtm->rtm_dst_len)
2d7202bf 2442 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2443#ifdef CONFIG_IPV6_SUBTREES
2444 if (src) {
2d7202bf 2445 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2446 rtm->rtm_src_len = 128;
1da177e4 2447 } else if (rtm->rtm_src_len)
2d7202bf 2448 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2449#endif
7bc570c8
YH
2450 if (iif) {
2451#ifdef CONFIG_IPV6_MROUTE
2452 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2453 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2454 if (err <= 0) {
2455 if (!nowait) {
2456 if (err == 0)
2457 return 0;
2458 goto nla_put_failure;
2459 } else {
2460 if (err == -EMSGSIZE)
2461 goto nla_put_failure;
2462 }
2463 }
2464 } else
2465#endif
2466 NLA_PUT_U32(skb, RTA_IIF, iif);
2467 } else if (dst) {
1da177e4 2468 struct in6_addr saddr_buf;
c3968a85 2469 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2470 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2471 }
2d7202bf 2472
c3968a85
DW
2473 if (rt->rt6i_prefsrc.plen) {
2474 struct in6_addr saddr_buf;
4e3fd7a0 2475 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2476 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2477 }
2478
defb3519 2479 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2480 goto nla_put_failure;
2481
f2c31e32 2482 rcu_read_lock();
27217455 2483 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2484 if (n) {
2485 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2486 rcu_read_unlock();
2487 goto nla_put_failure;
2488 }
2489 }
f2c31e32 2490 rcu_read_unlock();
2d7202bf 2491
d8d1f30b 2492 if (rt->dst.dev)
d1918542 2493 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2d7202bf
TG
2494
2495 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2496
36e3deae
YH
2497 if (!(rt->rt6i_flags & RTF_EXPIRES))
2498 expires = 0;
d1918542
DM
2499 else if (rt->dst.expires - jiffies < INT_MAX)
2500 expires = rt->dst.expires - jiffies;
36e3deae
YH
2501 else
2502 expires = INT_MAX;
69cdf8f9 2503
346f870b
DM
2504 peer = rt->rt6i_peer;
2505 ts = tsage = 0;
2506 if (peer && peer->tcp_ts_stamp) {
2507 ts = peer->tcp_ts;
2508 tsage = get_seconds() - peer->tcp_ts_stamp;
2509 }
2510
2511 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2512 expires, rt->dst.error) < 0)
e3703b3d 2513 goto nla_put_failure;
2d7202bf
TG
2514
2515 return nlmsg_end(skb, nlh);
2516
2517nla_put_failure:
26932566
PM
2518 nlmsg_cancel(skb, nlh);
2519 return -EMSGSIZE;
1da177e4
LT
2520}
2521
1b43af54 2522int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2523{
2524 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2525 int prefix;
2526
2d7202bf
TG
2527 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2528 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2529 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2530 } else
2531 prefix = 0;
2532
191cd582
BH
2533 return rt6_fill_node(arg->net,
2534 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2535 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2536 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2537}
2538
c127ea2c 2539static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2540{
3b1e0a65 2541 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2542 struct nlattr *tb[RTA_MAX+1];
2543 struct rt6_info *rt;
1da177e4 2544 struct sk_buff *skb;
ab364a6f 2545 struct rtmsg *rtm;
4c9483b2 2546 struct flowi6 fl6;
72331bc0 2547 int err, iif = 0, oif = 0;
1da177e4 2548
ab364a6f
TG
2549 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2550 if (err < 0)
2551 goto errout;
1da177e4 2552
ab364a6f 2553 err = -EINVAL;
4c9483b2 2554 memset(&fl6, 0, sizeof(fl6));
1da177e4 2555
ab364a6f
TG
2556 if (tb[RTA_SRC]) {
2557 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2558 goto errout;
2559
4e3fd7a0 2560 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2561 }
2562
2563 if (tb[RTA_DST]) {
2564 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2565 goto errout;
2566
4e3fd7a0 2567 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2568 }
2569
2570 if (tb[RTA_IIF])
2571 iif = nla_get_u32(tb[RTA_IIF]);
2572
2573 if (tb[RTA_OIF])
72331bc0 2574 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2575
2576 if (iif) {
2577 struct net_device *dev;
72331bc0
SL
2578 int flags = 0;
2579
5578689a 2580 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2581 if (!dev) {
2582 err = -ENODEV;
ab364a6f 2583 goto errout;
1da177e4 2584 }
72331bc0
SL
2585
2586 fl6.flowi6_iif = iif;
2587
2588 if (!ipv6_addr_any(&fl6.saddr))
2589 flags |= RT6_LOOKUP_F_HAS_SADDR;
2590
2591 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2592 flags);
2593 } else {
2594 fl6.flowi6_oif = oif;
2595
2596 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2597 }
2598
ab364a6f 2599 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2600 if (!skb) {
ab364a6f
TG
2601 err = -ENOBUFS;
2602 goto errout;
2603 }
1da177e4 2604
ab364a6f
TG
2605 /* Reserve room for dummy headers, this skb can pass
2606 through good chunk of routing engine.
2607 */
459a98ed 2608 skb_reset_mac_header(skb);
ab364a6f 2609 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2610
d8d1f30b 2611 skb_dst_set(skb, &rt->dst);
1da177e4 2612
4c9483b2 2613 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2614 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2615 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2616 if (err < 0) {
ab364a6f
TG
2617 kfree_skb(skb);
2618 goto errout;
1da177e4
LT
2619 }
2620
5578689a 2621 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2622errout:
1da177e4 2623 return err;
1da177e4
LT
2624}
2625
86872cb5 2626void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2627{
2628 struct sk_buff *skb;
5578689a 2629 struct net *net = info->nl_net;
528c4ceb
DL
2630 u32 seq;
2631 int err;
2632
2633 err = -ENOBUFS;
38308473 2634 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2635
339bf98f 2636 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2637 if (!skb)
21713ebc
TG
2638 goto errout;
2639
191cd582 2640 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2641 event, info->pid, seq, 0, 0, 0);
26932566
PM
2642 if (err < 0) {
2643 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2644 WARN_ON(err == -EMSGSIZE);
2645 kfree_skb(skb);
2646 goto errout;
2647 }
1ce85fe4
PNA
2648 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2649 info->nlh, gfp_any());
2650 return;
21713ebc
TG
2651errout:
2652 if (err < 0)
5578689a 2653 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2654}
2655
8ed67789
DL
2656static int ip6_route_dev_notify(struct notifier_block *this,
2657 unsigned long event, void *data)
2658{
2659 struct net_device *dev = (struct net_device *)data;
c346dca1 2660 struct net *net = dev_net(dev);
8ed67789
DL
2661
2662 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2663 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2664 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2665#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2666 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2667 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2668 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2669 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2670#endif
2671 }
2672
2673 return NOTIFY_OK;
2674}
2675
1da177e4
LT
2676/*
2677 * /proc
2678 */
2679
2680#ifdef CONFIG_PROC_FS
2681
1da177e4
LT
2682struct rt6_proc_arg
2683{
2684 char *buffer;
2685 int offset;
2686 int length;
2687 int skip;
2688 int len;
2689};
2690
2691static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2692{
33120b30 2693 struct seq_file *m = p_arg;
69cce1d1 2694 struct neighbour *n;
1da177e4 2695
4b7a4274 2696 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2697
2698#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2699 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2700#else
33120b30 2701 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2702#endif
f2c31e32 2703 rcu_read_lock();
27217455 2704 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2705 if (n) {
2706 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2707 } else {
33120b30 2708 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2709 }
f2c31e32 2710 rcu_read_unlock();
33120b30 2711 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2712 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2713 rt->dst.__use, rt->rt6i_flags,
d1918542 2714 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2715 return 0;
2716}
2717
33120b30 2718static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2719{
f3db4851 2720 struct net *net = (struct net *)m->private;
32b293a5 2721 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2722 return 0;
2723}
1da177e4 2724
33120b30
AD
2725static int ipv6_route_open(struct inode *inode, struct file *file)
2726{
de05c557 2727 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2728}
2729
33120b30
AD
2730static const struct file_operations ipv6_route_proc_fops = {
2731 .owner = THIS_MODULE,
2732 .open = ipv6_route_open,
2733 .read = seq_read,
2734 .llseek = seq_lseek,
b6fcbdb4 2735 .release = single_release_net,
33120b30
AD
2736};
2737
1da177e4
LT
2738static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2739{
69ddb805 2740 struct net *net = (struct net *)seq->private;
1da177e4 2741 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2742 net->ipv6.rt6_stats->fib_nodes,
2743 net->ipv6.rt6_stats->fib_route_nodes,
2744 net->ipv6.rt6_stats->fib_rt_alloc,
2745 net->ipv6.rt6_stats->fib_rt_entries,
2746 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2747 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2748 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2749
2750 return 0;
2751}
2752
2753static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2754{
de05c557 2755 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2756}
2757
9a32144e 2758static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2759 .owner = THIS_MODULE,
2760 .open = rt6_stats_seq_open,
2761 .read = seq_read,
2762 .llseek = seq_lseek,
b6fcbdb4 2763 .release = single_release_net,
1da177e4
LT
2764};
2765#endif /* CONFIG_PROC_FS */
2766
2767#ifdef CONFIG_SYSCTL
2768
1da177e4 2769static
8d65af78 2770int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2771 void __user *buffer, size_t *lenp, loff_t *ppos)
2772{
c486da34
LAG
2773 struct net *net;
2774 int delay;
2775 if (!write)
1da177e4 2776 return -EINVAL;
c486da34
LAG
2777
2778 net = (struct net *)ctl->extra1;
2779 delay = net->ipv6.sysctl.flush_delay;
2780 proc_dointvec(ctl, write, buffer, lenp, ppos);
2781 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2782 return 0;
1da177e4
LT
2783}
2784
760f2d01 2785ctl_table ipv6_route_table_template[] = {
1ab1457c 2786 {
1da177e4 2787 .procname = "flush",
4990509f 2788 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2789 .maxlen = sizeof(int),
89c8b3a1 2790 .mode = 0200,
6d9f239a 2791 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2792 },
2793 {
1da177e4 2794 .procname = "gc_thresh",
9a7ec3a9 2795 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2796 .maxlen = sizeof(int),
2797 .mode = 0644,
6d9f239a 2798 .proc_handler = proc_dointvec,
1da177e4
LT
2799 },
2800 {
1da177e4 2801 .procname = "max_size",
4990509f 2802 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2803 .maxlen = sizeof(int),
2804 .mode = 0644,
6d9f239a 2805 .proc_handler = proc_dointvec,
1da177e4
LT
2806 },
2807 {
1da177e4 2808 .procname = "gc_min_interval",
4990509f 2809 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2810 .maxlen = sizeof(int),
2811 .mode = 0644,
6d9f239a 2812 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2813 },
2814 {
1da177e4 2815 .procname = "gc_timeout",
4990509f 2816 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2817 .maxlen = sizeof(int),
2818 .mode = 0644,
6d9f239a 2819 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2820 },
2821 {
1da177e4 2822 .procname = "gc_interval",
4990509f 2823 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2824 .maxlen = sizeof(int),
2825 .mode = 0644,
6d9f239a 2826 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2827 },
2828 {
1da177e4 2829 .procname = "gc_elasticity",
4990509f 2830 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2831 .maxlen = sizeof(int),
2832 .mode = 0644,
f3d3f616 2833 .proc_handler = proc_dointvec,
1da177e4
LT
2834 },
2835 {
1da177e4 2836 .procname = "mtu_expires",
4990509f 2837 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2838 .maxlen = sizeof(int),
2839 .mode = 0644,
6d9f239a 2840 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2841 },
2842 {
1da177e4 2843 .procname = "min_adv_mss",
4990509f 2844 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2845 .maxlen = sizeof(int),
2846 .mode = 0644,
f3d3f616 2847 .proc_handler = proc_dointvec,
1da177e4
LT
2848 },
2849 {
1da177e4 2850 .procname = "gc_min_interval_ms",
4990509f 2851 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2852 .maxlen = sizeof(int),
2853 .mode = 0644,
6d9f239a 2854 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2855 },
f8572d8f 2856 { }
1da177e4
LT
2857};
2858
2c8c1e72 2859struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2860{
2861 struct ctl_table *table;
2862
2863 table = kmemdup(ipv6_route_table_template,
2864 sizeof(ipv6_route_table_template),
2865 GFP_KERNEL);
5ee09105
YH
2866
2867 if (table) {
2868 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2869 table[0].extra1 = net;
86393e52 2870 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2871 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2872 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2873 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2874 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2875 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2876 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2877 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2878 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2879 }
2880
760f2d01
DL
2881 return table;
2882}
1da177e4
LT
2883#endif
2884
2c8c1e72 2885static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2886{
633d424b 2887 int ret = -ENOMEM;
8ed67789 2888
86393e52
AD
2889 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2890 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2891
fc66f95c
ED
2892 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2893 goto out_ip6_dst_ops;
2894
8ed67789
DL
2895 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2896 sizeof(*net->ipv6.ip6_null_entry),
2897 GFP_KERNEL);
2898 if (!net->ipv6.ip6_null_entry)
fc66f95c 2899 goto out_ip6_dst_entries;
d8d1f30b 2900 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2901 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2902 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2903 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2904 ip6_template_metrics, true);
8ed67789
DL
2905
2906#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2907 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2908 sizeof(*net->ipv6.ip6_prohibit_entry),
2909 GFP_KERNEL);
68fffc67
PZ
2910 if (!net->ipv6.ip6_prohibit_entry)
2911 goto out_ip6_null_entry;
d8d1f30b 2912 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2913 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2914 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2915 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2916 ip6_template_metrics, true);
8ed67789
DL
2917
2918 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2919 sizeof(*net->ipv6.ip6_blk_hole_entry),
2920 GFP_KERNEL);
68fffc67
PZ
2921 if (!net->ipv6.ip6_blk_hole_entry)
2922 goto out_ip6_prohibit_entry;
d8d1f30b 2923 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2924 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2925 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2926 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2927 ip6_template_metrics, true);
8ed67789
DL
2928#endif
2929
b339a47c
PZ
2930 net->ipv6.sysctl.flush_delay = 0;
2931 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2932 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2933 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2934 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2935 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2936 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2937 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2938
cdb18761
DL
2939#ifdef CONFIG_PROC_FS
2940 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2941 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2942#endif
6891a346
BT
2943 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2944
8ed67789
DL
2945 ret = 0;
2946out:
2947 return ret;
f2fc6a54 2948
68fffc67
PZ
2949#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2950out_ip6_prohibit_entry:
2951 kfree(net->ipv6.ip6_prohibit_entry);
2952out_ip6_null_entry:
2953 kfree(net->ipv6.ip6_null_entry);
2954#endif
fc66f95c
ED
2955out_ip6_dst_entries:
2956 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2957out_ip6_dst_ops:
f2fc6a54 2958 goto out;
cdb18761
DL
2959}
2960
2c8c1e72 2961static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2962{
2963#ifdef CONFIG_PROC_FS
2964 proc_net_remove(net, "ipv6_route");
2965 proc_net_remove(net, "rt6_stats");
2966#endif
8ed67789
DL
2967 kfree(net->ipv6.ip6_null_entry);
2968#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2969 kfree(net->ipv6.ip6_prohibit_entry);
2970 kfree(net->ipv6.ip6_blk_hole_entry);
2971#endif
41bb78b4 2972 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2973}
2974
2975static struct pernet_operations ip6_route_net_ops = {
2976 .init = ip6_route_net_init,
2977 .exit = ip6_route_net_exit,
2978};
2979
8ed67789
DL
2980static struct notifier_block ip6_route_dev_notifier = {
2981 .notifier_call = ip6_route_dev_notify,
2982 .priority = 0,
2983};
2984
433d49c3 2985int __init ip6_route_init(void)
1da177e4 2986{
433d49c3
DL
2987 int ret;
2988
9a7ec3a9
DL
2989 ret = -ENOMEM;
2990 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2991 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2992 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2993 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2994 goto out;
14e50e57 2995
fc66f95c 2996 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2997 if (ret)
bdb3289f 2998 goto out_kmem_cache;
bdb3289f 2999
fc66f95c
ED
3000 ret = register_pernet_subsys(&ip6_route_net_ops);
3001 if (ret)
3002 goto out_dst_entries;
3003
5dc121e9
AE
3004 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3005
8ed67789
DL
3006 /* Registering of the loopback is done before this portion of code,
3007 * the loopback reference in rt6_info will not be taken, do it
3008 * manually for init_net */
d8d1f30b 3009 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3010 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3011 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3012 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3013 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3014 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3015 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3016 #endif
433d49c3
DL
3017 ret = fib6_init();
3018 if (ret)
8ed67789 3019 goto out_register_subsys;
433d49c3 3020
433d49c3
DL
3021 ret = xfrm6_init();
3022 if (ret)
cdb18761 3023 goto out_fib6_init;
c35b7e72 3024
433d49c3
DL
3025 ret = fib6_rules_init();
3026 if (ret)
3027 goto xfrm6_init;
7e5449c2 3028
433d49c3 3029 ret = -ENOBUFS;
c7ac8679
GR
3030 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3031 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3032 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3033 goto fib6_rules_init;
c127ea2c 3034
8ed67789 3035 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3036 if (ret)
3037 goto fib6_rules_init;
8ed67789 3038
433d49c3
DL
3039out:
3040 return ret;
3041
3042fib6_rules_init:
433d49c3
DL
3043 fib6_rules_cleanup();
3044xfrm6_init:
433d49c3 3045 xfrm6_fini();
433d49c3 3046out_fib6_init:
433d49c3 3047 fib6_gc_cleanup();
8ed67789
DL
3048out_register_subsys:
3049 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3050out_dst_entries:
3051 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3052out_kmem_cache:
f2fc6a54 3053 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3054 goto out;
1da177e4
LT
3055}
3056
3057void ip6_route_cleanup(void)
3058{
8ed67789 3059 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3060 fib6_rules_cleanup();
1da177e4 3061 xfrm6_fini();
1da177e4 3062 fib6_gc_cleanup();
8ed67789 3063 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3064 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3065 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3066}