ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
6e157b6a 82static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
1da177e4 83
70ceb4f5 84#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 85static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
86 const struct in6_addr *prefix, int prefixlen,
87 const struct in6_addr *gwaddr, int ifindex,
95c96174 88 unsigned int pref);
efa2cea0 89static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
90 const struct in6_addr *prefix, int prefixlen,
91 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
92#endif
93
06582540
DM
94static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
95{
96 struct rt6_info *rt = (struct rt6_info *) dst;
97 struct inet_peer *peer;
98 u32 *p = NULL;
99
8e2ec639
YZ
100 if (!(rt->dst.flags & DST_HOST))
101 return NULL;
102
fbfe95a4 103 peer = rt6_get_peer_create(rt);
06582540
DM
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
f894cbf8
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt,
125 struct sk_buff *skb,
126 const void *daddr)
39232973
DM
127{
128 struct in6_addr *p = &rt->rt6i_gateway;
129
a7563f34 130 if (!ipv6_addr_any(p))
39232973 131 return (const void *) p;
f894cbf8
DM
132 else if (skb)
133 return &ipv6_hdr(skb)->daddr;
39232973
DM
134 return daddr;
135}
136
f894cbf8
DM
137static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
138 struct sk_buff *skb,
139 const void *daddr)
d3aaeb38 140{
39232973
DM
141 struct rt6_info *rt = (struct rt6_info *) dst;
142 struct neighbour *n;
143
f894cbf8 144 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 145 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
146 if (n)
147 return n;
148 return neigh_create(&nd_tbl, daddr, dst->dev);
149}
150
8ade06c6 151static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 152{
8ade06c6
DM
153 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
154 if (!n) {
155 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
156 if (IS_ERR(n))
157 return PTR_ERR(n);
158 }
97cac082 159 rt->n = n;
f83c7790
DM
160
161 return 0;
d3aaeb38
DM
162}
163
9a7ec3a9 164static struct dst_ops ip6_dst_ops_template = {
1da177e4 165 .family = AF_INET6,
09640e63 166 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
167 .gc = ip6_dst_gc,
168 .gc_thresh = 1024,
169 .check = ip6_dst_check,
0dbaee3b 170 .default_advmss = ip6_default_advmss,
ebb762f2 171 .mtu = ip6_mtu,
06582540 172 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
173 .destroy = ip6_dst_destroy,
174 .ifdown = ip6_dst_ifdown,
175 .negative_advice = ip6_negative_advice,
176 .link_failure = ip6_link_failure,
177 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 178 .redirect = rt6_do_redirect,
1ac06e03 179 .local_out = __ip6_local_out,
d3aaeb38 180 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
181};
182
ebb762f2 183static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 184{
618f9bc7
SK
185 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
186
187 return mtu ? : dst->dev->mtu;
ec831ea7
RD
188}
189
14e50e57
DM
190static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
191{
192}
193
0972ddb2
HB
194static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
195 unsigned long old)
196{
197 return NULL;
198}
199
14e50e57
DM
200static struct dst_ops ip6_dst_blackhole_ops = {
201 .family = AF_INET6,
09640e63 202 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
203 .destroy = ip6_dst_destroy,
204 .check = ip6_dst_check,
ebb762f2 205 .mtu = ip6_blackhole_mtu,
214f45c9 206 .default_advmss = ip6_default_advmss,
14e50e57 207 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 208 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 209 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
210};
211
62fa8a84
DM
212static const u32 ip6_template_metrics[RTAX_MAX] = {
213 [RTAX_HOPLIMIT - 1] = 255,
214};
215
bdb3289f 216static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
217 .dst = {
218 .__refcnt = ATOMIC_INIT(1),
219 .__use = 1,
220 .obsolete = -1,
221 .error = -ENETUNREACH,
d8d1f30b
CG
222 .input = ip6_pkt_discard,
223 .output = ip6_pkt_discard_out,
1da177e4
LT
224 },
225 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 226 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
227 .rt6i_metric = ~(u32) 0,
228 .rt6i_ref = ATOMIC_INIT(1),
229};
230
101367c2
TG
231#ifdef CONFIG_IPV6_MULTIPLE_TABLES
232
6723ab54
DM
233static int ip6_pkt_prohibit(struct sk_buff *skb);
234static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 235
280a34c8 236static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
237 .dst = {
238 .__refcnt = ATOMIC_INIT(1),
239 .__use = 1,
240 .obsolete = -1,
241 .error = -EACCES,
d8d1f30b
CG
242 .input = ip6_pkt_prohibit,
243 .output = ip6_pkt_prohibit_out,
101367c2
TG
244 },
245 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 246 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
247 .rt6i_metric = ~(u32) 0,
248 .rt6i_ref = ATOMIC_INIT(1),
249};
250
bdb3289f 251static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
252 .dst = {
253 .__refcnt = ATOMIC_INIT(1),
254 .__use = 1,
255 .obsolete = -1,
256 .error = -EINVAL,
d8d1f30b
CG
257 .input = dst_discard,
258 .output = dst_discard,
101367c2
TG
259 },
260 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 261 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
262 .rt6i_metric = ~(u32) 0,
263 .rt6i_ref = ATOMIC_INIT(1),
264};
265
266#endif
267
1da177e4 268/* allocate dst with ip6_dst_ops */
97bab73f 269static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 270 struct net_device *dev,
8b96d22d
DM
271 int flags,
272 struct fib6_table *table)
1da177e4 273{
97bab73f
DM
274 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
275 0, 0, flags);
cf911662 276
97bab73f 277 if (rt) {
a2de86f6 278 memset(&rt->n, 0,
38308473 279 sizeof(*rt) - sizeof(struct dst_entry));
8b96d22d 280 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 281 }
cf911662 282 return rt;
1da177e4
LT
283}
284
285static void ip6_dst_destroy(struct dst_entry *dst)
286{
287 struct rt6_info *rt = (struct rt6_info *)dst;
288 struct inet6_dev *idev = rt->rt6i_idev;
289
97cac082
DM
290 if (rt->n)
291 neigh_release(rt->n);
292
8e2ec639
YZ
293 if (!(rt->dst.flags & DST_HOST))
294 dst_destroy_metrics_generic(dst);
295
38308473 296 if (idev) {
1da177e4
LT
297 rt->rt6i_idev = NULL;
298 in6_dev_put(idev);
1ab1457c 299 }
1716a961
G
300
301 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
302 dst_release(dst->from);
303
97bab73f
DM
304 if (rt6_has_peer(rt)) {
305 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
306 inet_putpeer(peer);
307 }
308}
309
6431cbc2
DM
310static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
311
312static u32 rt6_peer_genid(void)
313{
314 return atomic_read(&__rt6_peer_genid);
315}
316
b3419363
DM
317void rt6_bind_peer(struct rt6_info *rt, int create)
318{
97bab73f 319 struct inet_peer_base *base;
b3419363
DM
320 struct inet_peer *peer;
321
97bab73f
DM
322 base = inetpeer_base_ptr(rt->_rt6i_peer);
323 if (!base)
324 return;
325
326 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
327 if (peer) {
328 if (!rt6_set_peer(rt, peer))
329 inet_putpeer(peer);
330 else
331 rt->rt6i_peer_genid = rt6_peer_genid();
332 }
1da177e4
LT
333}
334
335static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
336 int how)
337{
338 struct rt6_info *rt = (struct rt6_info *)dst;
339 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 340 struct net_device *loopback_dev =
c346dca1 341 dev_net(dev)->loopback_dev;
1da177e4 342
97cac082
DM
343 if (dev != loopback_dev) {
344 if (idev && idev->dev == dev) {
345 struct inet6_dev *loopback_idev =
346 in6_dev_get(loopback_dev);
347 if (loopback_idev) {
348 rt->rt6i_idev = loopback_idev;
349 in6_dev_put(idev);
350 }
351 }
352 if (rt->n && rt->n->dev == dev) {
353 rt->n->dev = loopback_dev;
354 dev_hold(loopback_dev);
355 dev_put(dev);
1da177e4
LT
356 }
357 }
358}
359
a50feda5 360static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 361{
1716a961
G
362 struct rt6_info *ort = NULL;
363
364 if (rt->rt6i_flags & RTF_EXPIRES) {
365 if (time_after(jiffies, rt->dst.expires))
a50feda5 366 return true;
1716a961
G
367 } else if (rt->dst.from) {
368 ort = (struct rt6_info *) rt->dst.from;
369 return (ort->rt6i_flags & RTF_EXPIRES) &&
370 time_after(jiffies, ort->dst.expires);
371 }
a50feda5 372 return false;
1da177e4
LT
373}
374
a50feda5 375static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 376{
a02cec21
ED
377 return ipv6_addr_type(daddr) &
378 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
379}
380
1da177e4 381/*
c71099ac 382 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
383 */
384
8ed67789
DL
385static inline struct rt6_info *rt6_device_match(struct net *net,
386 struct rt6_info *rt,
b71d1d42 387 const struct in6_addr *saddr,
1da177e4 388 int oif,
d420895e 389 int flags)
1da177e4
LT
390{
391 struct rt6_info *local = NULL;
392 struct rt6_info *sprt;
393
dd3abc4e
YH
394 if (!oif && ipv6_addr_any(saddr))
395 goto out;
396
d8d1f30b 397 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 398 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
399
400 if (oif) {
1da177e4
LT
401 if (dev->ifindex == oif)
402 return sprt;
403 if (dev->flags & IFF_LOOPBACK) {
38308473 404 if (!sprt->rt6i_idev ||
1da177e4 405 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 406 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 407 continue;
1ab1457c 408 if (local && (!oif ||
1da177e4
LT
409 local->rt6i_idev->dev->ifindex == oif))
410 continue;
411 }
412 local = sprt;
413 }
dd3abc4e
YH
414 } else {
415 if (ipv6_chk_addr(net, saddr, dev,
416 flags & RT6_LOOKUP_F_IFACE))
417 return sprt;
1da177e4 418 }
dd3abc4e 419 }
1da177e4 420
dd3abc4e 421 if (oif) {
1da177e4
LT
422 if (local)
423 return local;
424
d420895e 425 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 426 return net->ipv6.ip6_null_entry;
1da177e4 427 }
dd3abc4e 428out:
1da177e4
LT
429 return rt;
430}
431
27097255
YH
432#ifdef CONFIG_IPV6_ROUTER_PREF
433static void rt6_probe(struct rt6_info *rt)
434{
f2c31e32 435 struct neighbour *neigh;
27097255
YH
436 /*
437 * Okay, this does not seem to be appropriate
438 * for now, however, we need to check if it
439 * is really so; aka Router Reachability Probing.
440 *
441 * Router Reachability Probe MUST be rate-limited
442 * to no more than one per minute.
443 */
f2c31e32 444 rcu_read_lock();
97cac082 445 neigh = rt ? rt->n : NULL;
27097255 446 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 447 goto out;
27097255
YH
448 read_lock_bh(&neigh->lock);
449 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 450 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
451 struct in6_addr mcaddr;
452 struct in6_addr *target;
453
454 neigh->updated = jiffies;
455 read_unlock_bh(&neigh->lock);
456
457 target = (struct in6_addr *)&neigh->primary_key;
458 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 459 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 460 } else {
27097255 461 read_unlock_bh(&neigh->lock);
f2c31e32
ED
462 }
463out:
464 rcu_read_unlock();
27097255
YH
465}
466#else
467static inline void rt6_probe(struct rt6_info *rt)
468{
27097255
YH
469}
470#endif
471
1da177e4 472/*
554cfb7e 473 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 474 */
b6f99a21 475static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 476{
d1918542 477 struct net_device *dev = rt->dst.dev;
161980f4 478 if (!oif || dev->ifindex == oif)
554cfb7e 479 return 2;
161980f4
DM
480 if ((dev->flags & IFF_LOOPBACK) &&
481 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
482 return 1;
483 return 0;
554cfb7e 484}
1da177e4 485
b6f99a21 486static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 487{
f2c31e32 488 struct neighbour *neigh;
398bcbeb 489 int m;
f2c31e32
ED
490
491 rcu_read_lock();
97cac082 492 neigh = rt->n;
4d0c5911
YH
493 if (rt->rt6i_flags & RTF_NONEXTHOP ||
494 !(rt->rt6i_flags & RTF_GATEWAY))
495 m = 1;
496 else if (neigh) {
554cfb7e
YH
497 read_lock_bh(&neigh->lock);
498 if (neigh->nud_state & NUD_VALID)
4d0c5911 499 m = 2;
398bcbeb
YH
500#ifdef CONFIG_IPV6_ROUTER_PREF
501 else if (neigh->nud_state & NUD_FAILED)
502 m = 0;
503#endif
504 else
ea73ee23 505 m = 1;
554cfb7e 506 read_unlock_bh(&neigh->lock);
398bcbeb
YH
507 } else
508 m = 0;
f2c31e32 509 rcu_read_unlock();
554cfb7e 510 return m;
1da177e4
LT
511}
512
554cfb7e
YH
513static int rt6_score_route(struct rt6_info *rt, int oif,
514 int strict)
1da177e4 515{
4d0c5911 516 int m, n;
1ab1457c 517
4d0c5911 518 m = rt6_check_dev(rt, oif);
77d16f45 519 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 520 return -1;
ebacaaa0
YH
521#ifdef CONFIG_IPV6_ROUTER_PREF
522 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
523#endif
4d0c5911 524 n = rt6_check_neigh(rt);
557e92ef 525 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
526 return -1;
527 return m;
528}
529
f11e6659
DM
530static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
531 int *mpri, struct rt6_info *match)
554cfb7e 532{
f11e6659
DM
533 int m;
534
535 if (rt6_check_expired(rt))
536 goto out;
537
538 m = rt6_score_route(rt, oif, strict);
539 if (m < 0)
540 goto out;
541
542 if (m > *mpri) {
543 if (strict & RT6_LOOKUP_F_REACHABLE)
544 rt6_probe(match);
545 *mpri = m;
546 match = rt;
547 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
548 rt6_probe(rt);
549 }
550
551out:
552 return match;
553}
554
555static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
556 struct rt6_info *rr_head,
557 u32 metric, int oif, int strict)
558{
559 struct rt6_info *rt, *match;
554cfb7e 560 int mpri = -1;
1da177e4 561
f11e6659
DM
562 match = NULL;
563 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 564 rt = rt->dst.rt6_next)
f11e6659
DM
565 match = find_match(rt, oif, strict, &mpri, match);
566 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 567 rt = rt->dst.rt6_next)
f11e6659 568 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 569
f11e6659
DM
570 return match;
571}
1da177e4 572
f11e6659
DM
573static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
574{
575 struct rt6_info *match, *rt0;
8ed67789 576 struct net *net;
1da177e4 577
f11e6659
DM
578 rt0 = fn->rr_ptr;
579 if (!rt0)
580 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 581
f11e6659 582 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 583
554cfb7e 584 if (!match &&
f11e6659 585 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 586 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 587
554cfb7e 588 /* no entries matched; do round-robin */
f11e6659
DM
589 if (!next || next->rt6i_metric != rt0->rt6i_metric)
590 next = fn->leaf;
591
592 if (next != rt0)
593 fn->rr_ptr = next;
1da177e4 594 }
1da177e4 595
d1918542 596 net = dev_net(rt0->dst.dev);
a02cec21 597 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
598}
599
70ceb4f5
YH
600#ifdef CONFIG_IPV6_ROUTE_INFO
601int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 602 const struct in6_addr *gwaddr)
70ceb4f5 603{
c346dca1 604 struct net *net = dev_net(dev);
70ceb4f5
YH
605 struct route_info *rinfo = (struct route_info *) opt;
606 struct in6_addr prefix_buf, *prefix;
607 unsigned int pref;
4bed72e4 608 unsigned long lifetime;
70ceb4f5
YH
609 struct rt6_info *rt;
610
611 if (len < sizeof(struct route_info)) {
612 return -EINVAL;
613 }
614
615 /* Sanity check for prefix_len and length */
616 if (rinfo->length > 3) {
617 return -EINVAL;
618 } else if (rinfo->prefix_len > 128) {
619 return -EINVAL;
620 } else if (rinfo->prefix_len > 64) {
621 if (rinfo->length < 2) {
622 return -EINVAL;
623 }
624 } else if (rinfo->prefix_len > 0) {
625 if (rinfo->length < 1) {
626 return -EINVAL;
627 }
628 }
629
630 pref = rinfo->route_pref;
631 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 632 return -EINVAL;
70ceb4f5 633
4bed72e4 634 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
635
636 if (rinfo->length == 3)
637 prefix = (struct in6_addr *)rinfo->prefix;
638 else {
639 /* this function is safe */
640 ipv6_addr_prefix(&prefix_buf,
641 (struct in6_addr *)rinfo->prefix,
642 rinfo->prefix_len);
643 prefix = &prefix_buf;
644 }
645
efa2cea0
DL
646 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
647 dev->ifindex);
70ceb4f5
YH
648
649 if (rt && !lifetime) {
e0a1ad73 650 ip6_del_rt(rt);
70ceb4f5
YH
651 rt = NULL;
652 }
653
654 if (!rt && lifetime)
efa2cea0 655 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
656 pref);
657 else if (rt)
658 rt->rt6i_flags = RTF_ROUTEINFO |
659 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
660
661 if (rt) {
1716a961
G
662 if (!addrconf_finite_timeout(lifetime))
663 rt6_clean_expires(rt);
664 else
665 rt6_set_expires(rt, jiffies + HZ * lifetime);
666
d8d1f30b 667 dst_release(&rt->dst);
70ceb4f5
YH
668 }
669 return 0;
670}
671#endif
672
8ed67789 673#define BACKTRACK(__net, saddr) \
982f56f3 674do { \
8ed67789 675 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 676 struct fib6_node *pn; \
e0eda7bb 677 while (1) { \
982f56f3
YH
678 if (fn->fn_flags & RTN_TL_ROOT) \
679 goto out; \
680 pn = fn->parent; \
681 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 682 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
683 else \
684 fn = pn; \
685 if (fn->fn_flags & RTN_RTINFO) \
686 goto restart; \
c71099ac 687 } \
c71099ac 688 } \
38308473 689} while (0)
c71099ac 690
8ed67789
DL
691static struct rt6_info *ip6_pol_route_lookup(struct net *net,
692 struct fib6_table *table,
4c9483b2 693 struct flowi6 *fl6, int flags)
1da177e4
LT
694{
695 struct fib6_node *fn;
696 struct rt6_info *rt;
697
c71099ac 698 read_lock_bh(&table->tb6_lock);
4c9483b2 699 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
700restart:
701 rt = fn->leaf;
4c9483b2
DM
702 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
703 BACKTRACK(net, &fl6->saddr);
c71099ac 704out:
d8d1f30b 705 dst_use(&rt->dst, jiffies);
c71099ac 706 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
707 return rt;
708
709}
710
ea6e574e
FW
711struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
712 int flags)
713{
714 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
715}
716EXPORT_SYMBOL_GPL(ip6_route_lookup);
717
9acd9f3a
YH
718struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
719 const struct in6_addr *saddr, int oif, int strict)
c71099ac 720{
4c9483b2
DM
721 struct flowi6 fl6 = {
722 .flowi6_oif = oif,
723 .daddr = *daddr,
c71099ac
TG
724 };
725 struct dst_entry *dst;
77d16f45 726 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 727
adaa70bb 728 if (saddr) {
4c9483b2 729 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
730 flags |= RT6_LOOKUP_F_HAS_SADDR;
731 }
732
4c9483b2 733 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
734 if (dst->error == 0)
735 return (struct rt6_info *) dst;
736
737 dst_release(dst);
738
1da177e4
LT
739 return NULL;
740}
741
7159039a
YH
742EXPORT_SYMBOL(rt6_lookup);
743
c71099ac 744/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
745 It takes new route entry, the addition fails by any reason the
746 route is freed. In any case, if caller does not hold it, it may
747 be destroyed.
748 */
749
86872cb5 750static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
751{
752 int err;
c71099ac 753 struct fib6_table *table;
1da177e4 754
c71099ac
TG
755 table = rt->rt6i_table;
756 write_lock_bh(&table->tb6_lock);
86872cb5 757 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 758 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
759
760 return err;
761}
762
40e22e8f
TG
763int ip6_ins_rt(struct rt6_info *rt)
764{
4d1169c1 765 struct nl_info info = {
d1918542 766 .nl_net = dev_net(rt->dst.dev),
4d1169c1 767 };
528c4ceb 768 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
769}
770
1716a961 771static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 772 const struct in6_addr *daddr,
b71d1d42 773 const struct in6_addr *saddr)
1da177e4 774{
1da177e4
LT
775 struct rt6_info *rt;
776
777 /*
778 * Clone the route.
779 */
780
21efcfa0 781 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
782
783 if (rt) {
14deae41
DM
784 int attempts = !in_softirq();
785
38308473 786 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 787 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 788 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 789 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 790 rt->rt6i_gateway = *daddr;
58c4fb86 791 }
1da177e4 792
1da177e4 793 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
794
795#ifdef CONFIG_IPV6_SUBTREES
796 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 797 rt->rt6i_src.addr = *saddr;
1da177e4
LT
798 rt->rt6i_src.plen = 128;
799 }
800#endif
801
14deae41 802 retry:
8ade06c6 803 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 804 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
805 int saved_rt_min_interval =
806 net->ipv6.sysctl.ip6_rt_gc_min_interval;
807 int saved_rt_elasticity =
808 net->ipv6.sysctl.ip6_rt_gc_elasticity;
809
810 if (attempts-- > 0) {
811 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
812 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
813
86393e52 814 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
815
816 net->ipv6.sysctl.ip6_rt_gc_elasticity =
817 saved_rt_elasticity;
818 net->ipv6.sysctl.ip6_rt_gc_min_interval =
819 saved_rt_min_interval;
820 goto retry;
821 }
822
f3213831 823 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 824 dst_free(&rt->dst);
14deae41
DM
825 return NULL;
826 }
95a9a5ba 827 }
1da177e4 828
95a9a5ba
YH
829 return rt;
830}
1da177e4 831
21efcfa0
ED
832static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
833 const struct in6_addr *daddr)
299d9939 834{
21efcfa0
ED
835 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
836
299d9939 837 if (rt) {
299d9939 838 rt->rt6i_flags |= RTF_CACHE;
97cac082 839 rt->n = neigh_clone(ort->n);
299d9939
YH
840 }
841 return rt;
842}
843
8ed67789 844static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 845 struct flowi6 *fl6, int flags)
1da177e4
LT
846{
847 struct fib6_node *fn;
519fbd87 848 struct rt6_info *rt, *nrt;
c71099ac 849 int strict = 0;
1da177e4 850 int attempts = 3;
519fbd87 851 int err;
53b7997f 852 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 853
77d16f45 854 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
855
856relookup:
c71099ac 857 read_lock_bh(&table->tb6_lock);
1da177e4 858
8238dd06 859restart_2:
4c9483b2 860 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
861
862restart:
4acad72d 863 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 864
4c9483b2 865 BACKTRACK(net, &fl6->saddr);
8ed67789 866 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 867 rt->rt6i_flags & RTF_CACHE)
1ddef044 868 goto out;
1da177e4 869
d8d1f30b 870 dst_hold(&rt->dst);
c71099ac 871 read_unlock_bh(&table->tb6_lock);
fb9de91e 872
97cac082 873 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 874 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 875 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 876 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
877 else
878 goto out2;
e40cf353 879
d8d1f30b 880 dst_release(&rt->dst);
8ed67789 881 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 882
d8d1f30b 883 dst_hold(&rt->dst);
519fbd87 884 if (nrt) {
40e22e8f 885 err = ip6_ins_rt(nrt);
519fbd87 886 if (!err)
1da177e4 887 goto out2;
1da177e4 888 }
1da177e4 889
519fbd87
YH
890 if (--attempts <= 0)
891 goto out2;
892
893 /*
c71099ac 894 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
895 * released someone could insert this route. Relookup.
896 */
d8d1f30b 897 dst_release(&rt->dst);
519fbd87
YH
898 goto relookup;
899
900out:
8238dd06
YH
901 if (reachable) {
902 reachable = 0;
903 goto restart_2;
904 }
d8d1f30b 905 dst_hold(&rt->dst);
c71099ac 906 read_unlock_bh(&table->tb6_lock);
1da177e4 907out2:
d8d1f30b
CG
908 rt->dst.lastuse = jiffies;
909 rt->dst.__use++;
c71099ac
TG
910
911 return rt;
1da177e4
LT
912}
913
8ed67789 914static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 915 struct flowi6 *fl6, int flags)
4acad72d 916{
4c9483b2 917 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
918}
919
72331bc0
SL
920static struct dst_entry *ip6_route_input_lookup(struct net *net,
921 struct net_device *dev,
922 struct flowi6 *fl6, int flags)
923{
924 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
925 flags |= RT6_LOOKUP_F_IFACE;
926
927 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
928}
929
c71099ac
TG
930void ip6_route_input(struct sk_buff *skb)
931{
b71d1d42 932 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 933 struct net *net = dev_net(skb->dev);
adaa70bb 934 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
935 struct flowi6 fl6 = {
936 .flowi6_iif = skb->dev->ifindex,
937 .daddr = iph->daddr,
938 .saddr = iph->saddr,
38308473 939 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
940 .flowi6_mark = skb->mark,
941 .flowi6_proto = iph->nexthdr,
c71099ac 942 };
adaa70bb 943
72331bc0 944 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
945}
946
8ed67789 947static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 948 struct flowi6 *fl6, int flags)
1da177e4 949{
4c9483b2 950 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
951}
952
9c7a4f9c 953struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 954 struct flowi6 *fl6)
c71099ac
TG
955{
956 int flags = 0;
957
4dc27d1c
DM
958 fl6->flowi6_iif = net->loopback_dev->ifindex;
959
4c9483b2 960 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 961 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 962
4c9483b2 963 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 964 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
965 else if (sk)
966 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 967
4c9483b2 968 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
969}
970
7159039a 971EXPORT_SYMBOL(ip6_route_output);
1da177e4 972
2774c131 973struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 974{
5c1e6aa3 975 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
976 struct dst_entry *new = NULL;
977
5c1e6aa3 978 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 979 if (rt) {
cf911662 980 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 981 rt6_init_peer(rt, net->ipv6.peers);
cf911662 982
d8d1f30b 983 new = &rt->dst;
14e50e57 984
14e50e57 985 new->__use = 1;
352e512c
HX
986 new->input = dst_discard;
987 new->output = dst_discard;
14e50e57 988
21efcfa0
ED
989 if (dst_metrics_read_only(&ort->dst))
990 new->_metrics = ort->dst._metrics;
991 else
992 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
993 rt->rt6i_idev = ort->rt6i_idev;
994 if (rt->rt6i_idev)
995 in6_dev_hold(rt->rt6i_idev);
14e50e57 996
4e3fd7a0 997 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
998 rt->rt6i_flags = ort->rt6i_flags;
999 rt6_clean_expires(rt);
14e50e57
DM
1000 rt->rt6i_metric = 0;
1001
1002 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1003#ifdef CONFIG_IPV6_SUBTREES
1004 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1005#endif
1006
1007 dst_free(new);
1008 }
1009
69ead7af
DM
1010 dst_release(dst_orig);
1011 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1012}
14e50e57 1013
1da177e4
LT
1014/*
1015 * Destination cache support functions
1016 */
1017
1018static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1019{
1020 struct rt6_info *rt;
1021
1022 rt = (struct rt6_info *) dst;
1023
6431cbc2
DM
1024 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1025 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1026 if (!rt6_has_peer(rt))
6431cbc2
DM
1027 rt6_bind_peer(rt, 0);
1028 rt->rt6i_peer_genid = rt6_peer_genid();
1029 }
1da177e4 1030 return dst;
6431cbc2 1031 }
1da177e4
LT
1032 return NULL;
1033}
1034
1035static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1036{
1037 struct rt6_info *rt = (struct rt6_info *) dst;
1038
1039 if (rt) {
54c1a859
YH
1040 if (rt->rt6i_flags & RTF_CACHE) {
1041 if (rt6_check_expired(rt)) {
1042 ip6_del_rt(rt);
1043 dst = NULL;
1044 }
1045 } else {
1da177e4 1046 dst_release(dst);
54c1a859
YH
1047 dst = NULL;
1048 }
1da177e4 1049 }
54c1a859 1050 return dst;
1da177e4
LT
1051}
1052
1053static void ip6_link_failure(struct sk_buff *skb)
1054{
1055 struct rt6_info *rt;
1056
3ffe533c 1057 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1058
adf30907 1059 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1060 if (rt) {
1716a961
G
1061 if (rt->rt6i_flags & RTF_CACHE)
1062 rt6_update_expires(rt, 0);
1063 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1064 rt->rt6i_node->fn_sernum = -1;
1065 }
1066}
1067
1068static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1069{
1070 struct rt6_info *rt6 = (struct rt6_info*)dst;
1071
81aded24 1072 dst_confirm(dst);
1da177e4 1073 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1074 struct net *net = dev_net(dst->dev);
1075
1da177e4
LT
1076 rt6->rt6i_flags |= RTF_MODIFIED;
1077 if (mtu < IPV6_MIN_MTU) {
defb3519 1078 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1079 mtu = IPV6_MIN_MTU;
defb3519
DM
1080 features |= RTAX_FEATURE_ALLFRAG;
1081 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1082 }
defb3519 1083 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1084 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1085 }
1086}
1087
42ae66c8
DM
1088void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1089 int oif, u32 mark)
81aded24
DM
1090{
1091 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1092 struct dst_entry *dst;
1093 struct flowi6 fl6;
1094
1095 memset(&fl6, 0, sizeof(fl6));
1096 fl6.flowi6_oif = oif;
1097 fl6.flowi6_mark = mark;
3e12939a 1098 fl6.flowi6_flags = 0;
81aded24
DM
1099 fl6.daddr = iph->daddr;
1100 fl6.saddr = iph->saddr;
1101 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1102
1103 dst = ip6_route_output(net, NULL, &fl6);
1104 if (!dst->error)
1105 ip6_rt_update_pmtu(dst, ntohl(mtu));
1106 dst_release(dst);
1107}
1108EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1109
1110void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1111{
1112 ip6_update_pmtu(skb, sock_net(sk), mtu,
1113 sk->sk_bound_dev_if, sk->sk_mark);
1114}
1115EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1116
3a5ad2ee
DM
1117void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1118{
1119 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1120 struct dst_entry *dst;
1121 struct flowi6 fl6;
1122
1123 memset(&fl6, 0, sizeof(fl6));
1124 fl6.flowi6_oif = oif;
1125 fl6.flowi6_mark = mark;
1126 fl6.flowi6_flags = 0;
1127 fl6.daddr = iph->daddr;
1128 fl6.saddr = iph->saddr;
1129 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1130
1131 dst = ip6_route_output(net, NULL, &fl6);
1132 if (!dst->error)
1133 rt6_do_redirect(dst, skb);
1134 dst_release(dst);
1135}
1136EXPORT_SYMBOL_GPL(ip6_redirect);
1137
1138void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1139{
1140 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1141}
1142EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1143
0dbaee3b 1144static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1145{
0dbaee3b
DM
1146 struct net_device *dev = dst->dev;
1147 unsigned int mtu = dst_mtu(dst);
1148 struct net *net = dev_net(dev);
1149
1da177e4
LT
1150 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1151
5578689a
DL
1152 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1153 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1154
1155 /*
1ab1457c
YH
1156 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1157 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1158 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1159 * rely only on pmtu discovery"
1160 */
1161 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1162 mtu = IPV6_MAXPLEN;
1163 return mtu;
1164}
1165
ebb762f2 1166static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1167{
d33e4553 1168 struct inet6_dev *idev;
618f9bc7
SK
1169 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1170
1171 if (mtu)
1172 return mtu;
1173
1174 mtu = IPV6_MIN_MTU;
d33e4553
DM
1175
1176 rcu_read_lock();
1177 idev = __in6_dev_get(dst->dev);
1178 if (idev)
1179 mtu = idev->cnf.mtu6;
1180 rcu_read_unlock();
1181
1182 return mtu;
1183}
1184
3b00944c
YH
1185static struct dst_entry *icmp6_dst_gc_list;
1186static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1187
3b00944c 1188struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1189 struct neighbour *neigh,
87a11578 1190 struct flowi6 *fl6)
1da177e4 1191{
87a11578 1192 struct dst_entry *dst;
1da177e4
LT
1193 struct rt6_info *rt;
1194 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1195 struct net *net = dev_net(dev);
1da177e4 1196
38308473 1197 if (unlikely(!idev))
122bdf67 1198 return ERR_PTR(-ENODEV);
1da177e4 1199
8b96d22d 1200 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1201 if (unlikely(!rt)) {
1da177e4 1202 in6_dev_put(idev);
87a11578 1203 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1204 goto out;
1205 }
1206
1da177e4
LT
1207 if (neigh)
1208 neigh_hold(neigh);
14deae41 1209 else {
f894cbf8 1210 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1211 if (IS_ERR(neigh)) {
252c3d84 1212 in6_dev_put(idev);
b43faac6
DM
1213 dst_free(&rt->dst);
1214 return ERR_CAST(neigh);
1215 }
14deae41 1216 }
1da177e4 1217
8e2ec639
YZ
1218 rt->dst.flags |= DST_HOST;
1219 rt->dst.output = ip6_output;
97cac082 1220 rt->n = neigh;
d8d1f30b 1221 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1222 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1223 rt->rt6i_dst.plen = 128;
1224 rt->rt6i_idev = idev;
7011687f 1225 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1226
3b00944c 1227 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1228 rt->dst.next = icmp6_dst_gc_list;
1229 icmp6_dst_gc_list = &rt->dst;
3b00944c 1230 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1231
5578689a 1232 fib6_force_start_gc(net);
1da177e4 1233
87a11578
DM
1234 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1235
1da177e4 1236out:
87a11578 1237 return dst;
1da177e4
LT
1238}
1239
3d0f24a7 1240int icmp6_dst_gc(void)
1da177e4 1241{
e9476e95 1242 struct dst_entry *dst, **pprev;
3d0f24a7 1243 int more = 0;
1da177e4 1244
3b00944c
YH
1245 spin_lock_bh(&icmp6_dst_lock);
1246 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1247
1da177e4
LT
1248 while ((dst = *pprev) != NULL) {
1249 if (!atomic_read(&dst->__refcnt)) {
1250 *pprev = dst->next;
1251 dst_free(dst);
1da177e4
LT
1252 } else {
1253 pprev = &dst->next;
3d0f24a7 1254 ++more;
1da177e4
LT
1255 }
1256 }
1257
3b00944c 1258 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1259
3d0f24a7 1260 return more;
1da177e4
LT
1261}
1262
1e493d19
DM
1263static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1264 void *arg)
1265{
1266 struct dst_entry *dst, **pprev;
1267
1268 spin_lock_bh(&icmp6_dst_lock);
1269 pprev = &icmp6_dst_gc_list;
1270 while ((dst = *pprev) != NULL) {
1271 struct rt6_info *rt = (struct rt6_info *) dst;
1272 if (func(rt, arg)) {
1273 *pprev = dst->next;
1274 dst_free(dst);
1275 } else {
1276 pprev = &dst->next;
1277 }
1278 }
1279 spin_unlock_bh(&icmp6_dst_lock);
1280}
1281
569d3645 1282static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1283{
1da177e4 1284 unsigned long now = jiffies;
86393e52 1285 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1286 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1287 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1288 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1289 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1290 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1291 int entries;
7019b78e 1292
fc66f95c 1293 entries = dst_entries_get_fast(ops);
7019b78e 1294 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1295 entries <= rt_max_size)
1da177e4
LT
1296 goto out;
1297
6891a346
BT
1298 net->ipv6.ip6_rt_gc_expire++;
1299 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1300 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1301 entries = dst_entries_get_slow(ops);
1302 if (entries < ops->gc_thresh)
7019b78e 1303 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1304out:
7019b78e 1305 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1306 return entries > rt_max_size;
1da177e4
LT
1307}
1308
1309/* Clean host part of a prefix. Not necessary in radix tree,
1310 but results in cleaner routing tables.
1311
1312 Remove it only when all the things will work!
1313 */
1314
6b75d090 1315int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1316{
5170ae82 1317 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1318 if (hoplimit == 0) {
6b75d090 1319 struct net_device *dev = dst->dev;
c68f24cc
ED
1320 struct inet6_dev *idev;
1321
1322 rcu_read_lock();
1323 idev = __in6_dev_get(dev);
1324 if (idev)
6b75d090 1325 hoplimit = idev->cnf.hop_limit;
c68f24cc 1326 else
53b7997f 1327 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1328 rcu_read_unlock();
1da177e4
LT
1329 }
1330 return hoplimit;
1331}
abbf46ae 1332EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1333
1334/*
1335 *
1336 */
1337
86872cb5 1338int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1339{
1340 int err;
5578689a 1341 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1342 struct rt6_info *rt = NULL;
1343 struct net_device *dev = NULL;
1344 struct inet6_dev *idev = NULL;
c71099ac 1345 struct fib6_table *table;
1da177e4
LT
1346 int addr_type;
1347
86872cb5 1348 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1349 return -EINVAL;
1350#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1351 if (cfg->fc_src_len)
1da177e4
LT
1352 return -EINVAL;
1353#endif
86872cb5 1354 if (cfg->fc_ifindex) {
1da177e4 1355 err = -ENODEV;
5578689a 1356 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1357 if (!dev)
1358 goto out;
1359 idev = in6_dev_get(dev);
1360 if (!idev)
1361 goto out;
1362 }
1363
86872cb5
TG
1364 if (cfg->fc_metric == 0)
1365 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1366
d71314b4 1367 err = -ENOBUFS;
38308473
DM
1368 if (cfg->fc_nlinfo.nlh &&
1369 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1370 table = fib6_get_table(net, cfg->fc_table);
38308473 1371 if (!table) {
f3213831 1372 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1373 table = fib6_new_table(net, cfg->fc_table);
1374 }
1375 } else {
1376 table = fib6_new_table(net, cfg->fc_table);
1377 }
38308473
DM
1378
1379 if (!table)
c71099ac 1380 goto out;
c71099ac 1381
8b96d22d 1382 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1383
38308473 1384 if (!rt) {
1da177e4
LT
1385 err = -ENOMEM;
1386 goto out;
1387 }
1388
d8d1f30b 1389 rt->dst.obsolete = -1;
1716a961
G
1390
1391 if (cfg->fc_flags & RTF_EXPIRES)
1392 rt6_set_expires(rt, jiffies +
1393 clock_t_to_jiffies(cfg->fc_expires));
1394 else
1395 rt6_clean_expires(rt);
1da177e4 1396
86872cb5
TG
1397 if (cfg->fc_protocol == RTPROT_UNSPEC)
1398 cfg->fc_protocol = RTPROT_BOOT;
1399 rt->rt6i_protocol = cfg->fc_protocol;
1400
1401 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1402
1403 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1404 rt->dst.input = ip6_mc_input;
ab79ad14
1405 else if (cfg->fc_flags & RTF_LOCAL)
1406 rt->dst.input = ip6_input;
1da177e4 1407 else
d8d1f30b 1408 rt->dst.input = ip6_forward;
1da177e4 1409
d8d1f30b 1410 rt->dst.output = ip6_output;
1da177e4 1411
86872cb5
TG
1412 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1413 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1414 if (rt->rt6i_dst.plen == 128)
11d53b49 1415 rt->dst.flags |= DST_HOST;
1da177e4 1416
8e2ec639
YZ
1417 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1418 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1419 if (!metrics) {
1420 err = -ENOMEM;
1421 goto out;
1422 }
1423 dst_init_metrics(&rt->dst, metrics, 0);
1424 }
1da177e4 1425#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1426 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1427 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1428#endif
1429
86872cb5 1430 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1431
1432 /* We cannot add true routes via loopback here,
1433 they would result in kernel looping; promote them to reject routes
1434 */
86872cb5 1435 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1436 (dev && (dev->flags & IFF_LOOPBACK) &&
1437 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1438 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1439 /* hold loopback dev/idev if we haven't done so. */
5578689a 1440 if (dev != net->loopback_dev) {
1da177e4
LT
1441 if (dev) {
1442 dev_put(dev);
1443 in6_dev_put(idev);
1444 }
5578689a 1445 dev = net->loopback_dev;
1da177e4
LT
1446 dev_hold(dev);
1447 idev = in6_dev_get(dev);
1448 if (!idev) {
1449 err = -ENODEV;
1450 goto out;
1451 }
1452 }
d8d1f30b
CG
1453 rt->dst.output = ip6_pkt_discard_out;
1454 rt->dst.input = ip6_pkt_discard;
1455 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1456 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1457 goto install_route;
1458 }
1459
86872cb5 1460 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1461 const struct in6_addr *gw_addr;
1da177e4
LT
1462 int gwa_type;
1463
86872cb5 1464 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1465 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1466 gwa_type = ipv6_addr_type(gw_addr);
1467
1468 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1469 struct rt6_info *grt;
1470
1471 /* IPv6 strictly inhibits using not link-local
1472 addresses as nexthop address.
1473 Otherwise, router will not able to send redirects.
1474 It is very good, but in some (rare!) circumstances
1475 (SIT, PtP, NBMA NOARP links) it is handy to allow
1476 some exceptions. --ANK
1477 */
1478 err = -EINVAL;
38308473 1479 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1480 goto out;
1481
5578689a 1482 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1483
1484 err = -EHOSTUNREACH;
38308473 1485 if (!grt)
1da177e4
LT
1486 goto out;
1487 if (dev) {
d1918542 1488 if (dev != grt->dst.dev) {
d8d1f30b 1489 dst_release(&grt->dst);
1da177e4
LT
1490 goto out;
1491 }
1492 } else {
d1918542 1493 dev = grt->dst.dev;
1da177e4
LT
1494 idev = grt->rt6i_idev;
1495 dev_hold(dev);
1496 in6_dev_hold(grt->rt6i_idev);
1497 }
38308473 1498 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1499 err = 0;
d8d1f30b 1500 dst_release(&grt->dst);
1da177e4
LT
1501
1502 if (err)
1503 goto out;
1504 }
1505 err = -EINVAL;
38308473 1506 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1507 goto out;
1508 }
1509
1510 err = -ENODEV;
38308473 1511 if (!dev)
1da177e4
LT
1512 goto out;
1513
c3968a85
DW
1514 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1515 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1516 err = -EINVAL;
1517 goto out;
1518 }
4e3fd7a0 1519 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1520 rt->rt6i_prefsrc.plen = 128;
1521 } else
1522 rt->rt6i_prefsrc.plen = 0;
1523
86872cb5 1524 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1525 err = rt6_bind_neighbour(rt, dev);
f83c7790 1526 if (err)
1da177e4 1527 goto out;
1da177e4
LT
1528 }
1529
86872cb5 1530 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1531
1532install_route:
86872cb5
TG
1533 if (cfg->fc_mx) {
1534 struct nlattr *nla;
1535 int remaining;
1536
1537 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1538 int type = nla_type(nla);
86872cb5
TG
1539
1540 if (type) {
1541 if (type > RTAX_MAX) {
1da177e4
LT
1542 err = -EINVAL;
1543 goto out;
1544 }
86872cb5 1545
defb3519 1546 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1547 }
1da177e4
LT
1548 }
1549 }
1550
d8d1f30b 1551 rt->dst.dev = dev;
1da177e4 1552 rt->rt6i_idev = idev;
c71099ac 1553 rt->rt6i_table = table;
63152fc0 1554
c346dca1 1555 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1556
86872cb5 1557 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1558
1559out:
1560 if (dev)
1561 dev_put(dev);
1562 if (idev)
1563 in6_dev_put(idev);
1564 if (rt)
d8d1f30b 1565 dst_free(&rt->dst);
1da177e4
LT
1566 return err;
1567}
1568
86872cb5 1569static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1570{
1571 int err;
c71099ac 1572 struct fib6_table *table;
d1918542 1573 struct net *net = dev_net(rt->dst.dev);
1da177e4 1574
8ed67789 1575 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1576 return -ENOENT;
1577
c71099ac
TG
1578 table = rt->rt6i_table;
1579 write_lock_bh(&table->tb6_lock);
1da177e4 1580
86872cb5 1581 err = fib6_del(rt, info);
d8d1f30b 1582 dst_release(&rt->dst);
1da177e4 1583
c71099ac 1584 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1585
1586 return err;
1587}
1588
e0a1ad73
TG
1589int ip6_del_rt(struct rt6_info *rt)
1590{
4d1169c1 1591 struct nl_info info = {
d1918542 1592 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1593 };
528c4ceb 1594 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1595}
1596
86872cb5 1597static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1598{
c71099ac 1599 struct fib6_table *table;
1da177e4
LT
1600 struct fib6_node *fn;
1601 struct rt6_info *rt;
1602 int err = -ESRCH;
1603
5578689a 1604 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1605 if (!table)
c71099ac
TG
1606 return err;
1607
1608 read_lock_bh(&table->tb6_lock);
1da177e4 1609
c71099ac 1610 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1611 &cfg->fc_dst, cfg->fc_dst_len,
1612 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1613
1da177e4 1614 if (fn) {
d8d1f30b 1615 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1616 if (cfg->fc_ifindex &&
d1918542
DM
1617 (!rt->dst.dev ||
1618 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1619 continue;
86872cb5
TG
1620 if (cfg->fc_flags & RTF_GATEWAY &&
1621 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1622 continue;
86872cb5 1623 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1624 continue;
d8d1f30b 1625 dst_hold(&rt->dst);
c71099ac 1626 read_unlock_bh(&table->tb6_lock);
1da177e4 1627
86872cb5 1628 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1629 }
1630 }
c71099ac 1631 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1632
1633 return err;
1634}
1635
6e157b6a 1636static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
a6279458 1637{
e8599ff4 1638 struct net *net = dev_net(skb->dev);
a6279458 1639 struct netevent_redirect netevent;
e8599ff4
DM
1640 struct rt6_info *rt, *nrt = NULL;
1641 const struct in6_addr *target;
e8599ff4 1642 struct ndisc_options ndopts;
6e157b6a
DM
1643 const struct in6_addr *dest;
1644 struct neighbour *old_neigh;
e8599ff4
DM
1645 struct inet6_dev *in6_dev;
1646 struct neighbour *neigh;
1647 struct icmp6hdr *icmph;
6e157b6a
DM
1648 int optlen, on_link;
1649 u8 *lladdr;
e8599ff4
DM
1650
1651 optlen = skb->tail - skb->transport_header;
1652 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1653
1654 if (optlen < 0) {
6e157b6a 1655 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1656 return;
1657 }
1658
1659 icmph = icmp6_hdr(skb);
1660 target = (const struct in6_addr *) (icmph + 1);
1661 dest = target + 1;
1662
1663 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1664 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1665 return;
1666 }
1667
6e157b6a 1668 on_link = 0;
e8599ff4
DM
1669 if (ipv6_addr_equal(dest, target)) {
1670 on_link = 1;
1671 } else if (ipv6_addr_type(target) !=
1672 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1673 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1674 return;
1675 }
1676
1677 in6_dev = __in6_dev_get(skb->dev);
1678 if (!in6_dev)
1679 return;
1680 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1681 return;
1682
1683 /* RFC2461 8.1:
1684 * The IP source address of the Redirect MUST be the same as the current
1685 * first-hop router for the specified ICMP Destination Address.
1686 */
1687
1688 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1689 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1690 return;
1691 }
6e157b6a
DM
1692
1693 lladdr = NULL;
e8599ff4
DM
1694 if (ndopts.nd_opts_tgt_lladdr) {
1695 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1696 skb->dev);
1697 if (!lladdr) {
1698 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1699 return;
1700 }
1701 }
1702
6e157b6a
DM
1703 rt = (struct rt6_info *) dst;
1704 if (rt == net->ipv6.ip6_null_entry) {
1705 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1706 return;
6e157b6a 1707 }
e8599ff4 1708
6e157b6a
DM
1709 /* Redirect received -> path was valid.
1710 * Look, redirects are sent only in response to data packets,
1711 * so that this nexthop apparently is reachable. --ANK
1712 */
1713 dst_confirm(&rt->dst);
a6279458 1714
6e157b6a
DM
1715 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1716 if (!neigh)
1717 return;
a6279458 1718
6e157b6a
DM
1719 /* Duplicate redirect: silently ignore. */
1720 old_neigh = rt->n;
1721 if (neigh == old_neigh)
a6279458 1722 goto out;
1da177e4 1723
1da177e4
LT
1724 /*
1725 * We have finally decided to accept it.
1726 */
1727
1ab1457c 1728 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1729 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1730 NEIGH_UPDATE_F_OVERRIDE|
1731 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1732 NEIGH_UPDATE_F_ISROUTER))
1733 );
1734
21efcfa0 1735 nrt = ip6_rt_copy(rt, dest);
38308473 1736 if (!nrt)
1da177e4
LT
1737 goto out;
1738
1739 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1740 if (on_link)
1741 nrt->rt6i_flags &= ~RTF_GATEWAY;
1742
4e3fd7a0 1743 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1744 nrt->n = neigh_clone(neigh);
1da177e4 1745
40e22e8f 1746 if (ip6_ins_rt(nrt))
1da177e4
LT
1747 goto out;
1748
d8d1f30b 1749 netevent.old = &rt->dst;
1d248b1c 1750 netevent.old_neigh = old_neigh;
d8d1f30b 1751 netevent.new = &nrt->dst;
1d248b1c
DM
1752 netevent.new_neigh = neigh;
1753 netevent.daddr = dest;
8d71740c
TT
1754 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1755
38308473 1756 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1757 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1758 ip6_del_rt(rt);
1da177e4
LT
1759 }
1760
1761out:
e8599ff4 1762 neigh_release(neigh);
6e157b6a
DM
1763}
1764
1da177e4
LT
1765/*
1766 * Misc support functions
1767 */
1768
1716a961 1769static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1770 const struct in6_addr *dest)
1da177e4 1771{
d1918542 1772 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1773 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1774 ort->rt6i_table);
1da177e4
LT
1775
1776 if (rt) {
d8d1f30b
CG
1777 rt->dst.input = ort->dst.input;
1778 rt->dst.output = ort->dst.output;
8e2ec639 1779 rt->dst.flags |= DST_HOST;
d8d1f30b 1780
4e3fd7a0 1781 rt->rt6i_dst.addr = *dest;
8e2ec639 1782 rt->rt6i_dst.plen = 128;
defb3519 1783 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1784 rt->dst.error = ort->dst.error;
1da177e4
LT
1785 rt->rt6i_idev = ort->rt6i_idev;
1786 if (rt->rt6i_idev)
1787 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1788 rt->dst.lastuse = jiffies;
1da177e4 1789
4e3fd7a0 1790 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1791 rt->rt6i_flags = ort->rt6i_flags;
1792 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1793 (RTF_DEFAULT | RTF_ADDRCONF))
1794 rt6_set_from(rt, ort);
1795 else
1796 rt6_clean_expires(rt);
1da177e4
LT
1797 rt->rt6i_metric = 0;
1798
1da177e4
LT
1799#ifdef CONFIG_IPV6_SUBTREES
1800 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1801#endif
0f6c6392 1802 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1803 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1804 }
1805 return rt;
1806}
1807
70ceb4f5 1808#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1809static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1810 const struct in6_addr *prefix, int prefixlen,
1811 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1812{
1813 struct fib6_node *fn;
1814 struct rt6_info *rt = NULL;
c71099ac
TG
1815 struct fib6_table *table;
1816
efa2cea0 1817 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1818 if (!table)
c71099ac 1819 return NULL;
70ceb4f5 1820
c71099ac
TG
1821 write_lock_bh(&table->tb6_lock);
1822 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1823 if (!fn)
1824 goto out;
1825
d8d1f30b 1826 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1827 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1828 continue;
1829 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1830 continue;
1831 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1832 continue;
d8d1f30b 1833 dst_hold(&rt->dst);
70ceb4f5
YH
1834 break;
1835 }
1836out:
c71099ac 1837 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1838 return rt;
1839}
1840
efa2cea0 1841static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1842 const struct in6_addr *prefix, int prefixlen,
1843 const struct in6_addr *gwaddr, int ifindex,
95c96174 1844 unsigned int pref)
70ceb4f5 1845{
86872cb5
TG
1846 struct fib6_config cfg = {
1847 .fc_table = RT6_TABLE_INFO,
238fc7ea 1848 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1849 .fc_ifindex = ifindex,
1850 .fc_dst_len = prefixlen,
1851 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1852 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1853 .fc_nlinfo.pid = 0,
1854 .fc_nlinfo.nlh = NULL,
1855 .fc_nlinfo.nl_net = net,
86872cb5
TG
1856 };
1857
4e3fd7a0
AD
1858 cfg.fc_dst = *prefix;
1859 cfg.fc_gateway = *gwaddr;
70ceb4f5 1860
e317da96
YH
1861 /* We should treat it as a default route if prefix length is 0. */
1862 if (!prefixlen)
86872cb5 1863 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1864
86872cb5 1865 ip6_route_add(&cfg);
70ceb4f5 1866
efa2cea0 1867 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1868}
1869#endif
1870
b71d1d42 1871struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1872{
1da177e4 1873 struct rt6_info *rt;
c71099ac 1874 struct fib6_table *table;
1da177e4 1875
c346dca1 1876 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1877 if (!table)
c71099ac 1878 return NULL;
1da177e4 1879
c71099ac 1880 write_lock_bh(&table->tb6_lock);
d8d1f30b 1881 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1882 if (dev == rt->dst.dev &&
045927ff 1883 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1884 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1885 break;
1886 }
1887 if (rt)
d8d1f30b 1888 dst_hold(&rt->dst);
c71099ac 1889 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1890 return rt;
1891}
1892
b71d1d42 1893struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1894 struct net_device *dev,
1895 unsigned int pref)
1da177e4 1896{
86872cb5
TG
1897 struct fib6_config cfg = {
1898 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1899 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1900 .fc_ifindex = dev->ifindex,
1901 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1902 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1903 .fc_nlinfo.pid = 0,
1904 .fc_nlinfo.nlh = NULL,
c346dca1 1905 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1906 };
1da177e4 1907
4e3fd7a0 1908 cfg.fc_gateway = *gwaddr;
1da177e4 1909
86872cb5 1910 ip6_route_add(&cfg);
1da177e4 1911
1da177e4
LT
1912 return rt6_get_dflt_router(gwaddr, dev);
1913}
1914
7b4da532 1915void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1916{
1917 struct rt6_info *rt;
c71099ac
TG
1918 struct fib6_table *table;
1919
1920 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1921 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1922 if (!table)
c71099ac 1923 return;
1da177e4
LT
1924
1925restart:
c71099ac 1926 read_lock_bh(&table->tb6_lock);
d8d1f30b 1927 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1928 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1929 dst_hold(&rt->dst);
c71099ac 1930 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1931 ip6_del_rt(rt);
1da177e4
LT
1932 goto restart;
1933 }
1934 }
c71099ac 1935 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1936}
1937
5578689a
DL
1938static void rtmsg_to_fib6_config(struct net *net,
1939 struct in6_rtmsg *rtmsg,
86872cb5
TG
1940 struct fib6_config *cfg)
1941{
1942 memset(cfg, 0, sizeof(*cfg));
1943
1944 cfg->fc_table = RT6_TABLE_MAIN;
1945 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1946 cfg->fc_metric = rtmsg->rtmsg_metric;
1947 cfg->fc_expires = rtmsg->rtmsg_info;
1948 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1949 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1950 cfg->fc_flags = rtmsg->rtmsg_flags;
1951
5578689a 1952 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1953
4e3fd7a0
AD
1954 cfg->fc_dst = rtmsg->rtmsg_dst;
1955 cfg->fc_src = rtmsg->rtmsg_src;
1956 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1957}
1958
5578689a 1959int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1960{
86872cb5 1961 struct fib6_config cfg;
1da177e4
LT
1962 struct in6_rtmsg rtmsg;
1963 int err;
1964
1965 switch(cmd) {
1966 case SIOCADDRT: /* Add a route */
1967 case SIOCDELRT: /* Delete a route */
1968 if (!capable(CAP_NET_ADMIN))
1969 return -EPERM;
1970 err = copy_from_user(&rtmsg, arg,
1971 sizeof(struct in6_rtmsg));
1972 if (err)
1973 return -EFAULT;
86872cb5 1974
5578689a 1975 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1976
1da177e4
LT
1977 rtnl_lock();
1978 switch (cmd) {
1979 case SIOCADDRT:
86872cb5 1980 err = ip6_route_add(&cfg);
1da177e4
LT
1981 break;
1982 case SIOCDELRT:
86872cb5 1983 err = ip6_route_del(&cfg);
1da177e4
LT
1984 break;
1985 default:
1986 err = -EINVAL;
1987 }
1988 rtnl_unlock();
1989
1990 return err;
3ff50b79 1991 }
1da177e4
LT
1992
1993 return -EINVAL;
1994}
1995
1996/*
1997 * Drop the packet on the floor
1998 */
1999
d5fdd6ba 2000static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2001{
612f09e8 2002 int type;
adf30907 2003 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2004 switch (ipstats_mib_noroutes) {
2005 case IPSTATS_MIB_INNOROUTES:
0660e03f 2006 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2007 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2008 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2009 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2010 break;
2011 }
2012 /* FALLTHROUGH */
2013 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2014 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2015 ipstats_mib_noroutes);
612f09e8
YH
2016 break;
2017 }
3ffe533c 2018 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2019 kfree_skb(skb);
2020 return 0;
2021}
2022
9ce8ade0
TG
2023static int ip6_pkt_discard(struct sk_buff *skb)
2024{
612f09e8 2025 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2026}
2027
20380731 2028static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2029{
adf30907 2030 skb->dev = skb_dst(skb)->dev;
612f09e8 2031 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2032}
2033
6723ab54
DM
2034#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2035
9ce8ade0
TG
2036static int ip6_pkt_prohibit(struct sk_buff *skb)
2037{
612f09e8 2038 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2039}
2040
2041static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2042{
adf30907 2043 skb->dev = skb_dst(skb)->dev;
612f09e8 2044 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2045}
2046
6723ab54
DM
2047#endif
2048
1da177e4
LT
2049/*
2050 * Allocate a dst for local (unicast / anycast) address.
2051 */
2052
2053struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2054 const struct in6_addr *addr,
8f031519 2055 bool anycast)
1da177e4 2056{
c346dca1 2057 struct net *net = dev_net(idev->dev);
8b96d22d 2058 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2059 int err;
1da177e4 2060
38308473 2061 if (!rt) {
f3213831 2062 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2063 return ERR_PTR(-ENOMEM);
40385653 2064 }
1da177e4 2065
1da177e4
LT
2066 in6_dev_hold(idev);
2067
11d53b49 2068 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2069 rt->dst.input = ip6_input;
2070 rt->dst.output = ip6_output;
1da177e4 2071 rt->rt6i_idev = idev;
d8d1f30b 2072 rt->dst.obsolete = -1;
1da177e4
LT
2073
2074 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2075 if (anycast)
2076 rt->rt6i_flags |= RTF_ANYCAST;
2077 else
1da177e4 2078 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2079 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2080 if (err) {
d8d1f30b 2081 dst_free(&rt->dst);
f83c7790 2082 return ERR_PTR(err);
1da177e4
LT
2083 }
2084
4e3fd7a0 2085 rt->rt6i_dst.addr = *addr;
1da177e4 2086 rt->rt6i_dst.plen = 128;
5578689a 2087 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2088
d8d1f30b 2089 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2090
2091 return rt;
2092}
2093
c3968a85
DW
2094int ip6_route_get_saddr(struct net *net,
2095 struct rt6_info *rt,
b71d1d42 2096 const struct in6_addr *daddr,
c3968a85
DW
2097 unsigned int prefs,
2098 struct in6_addr *saddr)
2099{
2100 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2101 int err = 0;
2102 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2103 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2104 else
2105 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2106 daddr, prefs, saddr);
2107 return err;
2108}
2109
2110/* remove deleted ip from prefsrc entries */
2111struct arg_dev_net_ip {
2112 struct net_device *dev;
2113 struct net *net;
2114 struct in6_addr *addr;
2115};
2116
2117static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2118{
2119 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2120 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2121 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2122
d1918542 2123 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2124 rt != net->ipv6.ip6_null_entry &&
2125 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2126 /* remove prefsrc entry */
2127 rt->rt6i_prefsrc.plen = 0;
2128 }
2129 return 0;
2130}
2131
2132void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2133{
2134 struct net *net = dev_net(ifp->idev->dev);
2135 struct arg_dev_net_ip adni = {
2136 .dev = ifp->idev->dev,
2137 .net = net,
2138 .addr = &ifp->addr,
2139 };
2140 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2141}
2142
8ed67789
DL
2143struct arg_dev_net {
2144 struct net_device *dev;
2145 struct net *net;
2146};
2147
1da177e4
LT
2148static int fib6_ifdown(struct rt6_info *rt, void *arg)
2149{
bc3ef660 2150 const struct arg_dev_net *adn = arg;
2151 const struct net_device *dev = adn->dev;
8ed67789 2152
d1918542 2153 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2154 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2155 return -1;
c159d30c 2156
1da177e4
LT
2157 return 0;
2158}
2159
f3db4851 2160void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2161{
8ed67789
DL
2162 struct arg_dev_net adn = {
2163 .dev = dev,
2164 .net = net,
2165 };
2166
2167 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2168 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2169}
2170
95c96174 2171struct rt6_mtu_change_arg {
1da177e4 2172 struct net_device *dev;
95c96174 2173 unsigned int mtu;
1da177e4
LT
2174};
2175
2176static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2177{
2178 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2179 struct inet6_dev *idev;
2180
2181 /* In IPv6 pmtu discovery is not optional,
2182 so that RTAX_MTU lock cannot disable it.
2183 We still use this lock to block changes
2184 caused by addrconf/ndisc.
2185 */
2186
2187 idev = __in6_dev_get(arg->dev);
38308473 2188 if (!idev)
1da177e4
LT
2189 return 0;
2190
2191 /* For administrative MTU increase, there is no way to discover
2192 IPv6 PMTU increase, so PMTU increase should be updated here.
2193 Since RFC 1981 doesn't include administrative MTU increase
2194 update PMTU increase is a MUST. (i.e. jumbo frame)
2195 */
2196 /*
2197 If new MTU is less than route PMTU, this new MTU will be the
2198 lowest MTU in the path, update the route PMTU to reflect PMTU
2199 decreases; if new MTU is greater than route PMTU, and the
2200 old MTU is the lowest MTU in the path, update the route PMTU
2201 to reflect the increase. In this case if the other nodes' MTU
2202 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2203 PMTU discouvery.
2204 */
d1918542 2205 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2206 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2207 (dst_mtu(&rt->dst) >= arg->mtu ||
2208 (dst_mtu(&rt->dst) < arg->mtu &&
2209 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2210 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2211 }
1da177e4
LT
2212 return 0;
2213}
2214
95c96174 2215void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2216{
c71099ac
TG
2217 struct rt6_mtu_change_arg arg = {
2218 .dev = dev,
2219 .mtu = mtu,
2220 };
1da177e4 2221
c346dca1 2222 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2223}
2224
ef7c79ed 2225static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2226 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2227 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2228 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2229 [RTA_PRIORITY] = { .type = NLA_U32 },
2230 [RTA_METRICS] = { .type = NLA_NESTED },
2231};
2232
2233static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2234 struct fib6_config *cfg)
1da177e4 2235{
86872cb5
TG
2236 struct rtmsg *rtm;
2237 struct nlattr *tb[RTA_MAX+1];
2238 int err;
1da177e4 2239
86872cb5
TG
2240 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2241 if (err < 0)
2242 goto errout;
1da177e4 2243
86872cb5
TG
2244 err = -EINVAL;
2245 rtm = nlmsg_data(nlh);
2246 memset(cfg, 0, sizeof(*cfg));
2247
2248 cfg->fc_table = rtm->rtm_table;
2249 cfg->fc_dst_len = rtm->rtm_dst_len;
2250 cfg->fc_src_len = rtm->rtm_src_len;
2251 cfg->fc_flags = RTF_UP;
2252 cfg->fc_protocol = rtm->rtm_protocol;
2253
2254 if (rtm->rtm_type == RTN_UNREACHABLE)
2255 cfg->fc_flags |= RTF_REJECT;
2256
ab79ad14
2257 if (rtm->rtm_type == RTN_LOCAL)
2258 cfg->fc_flags |= RTF_LOCAL;
2259
86872cb5
TG
2260 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2261 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2262 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2263
2264 if (tb[RTA_GATEWAY]) {
2265 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2266 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2267 }
86872cb5
TG
2268
2269 if (tb[RTA_DST]) {
2270 int plen = (rtm->rtm_dst_len + 7) >> 3;
2271
2272 if (nla_len(tb[RTA_DST]) < plen)
2273 goto errout;
2274
2275 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2276 }
86872cb5
TG
2277
2278 if (tb[RTA_SRC]) {
2279 int plen = (rtm->rtm_src_len + 7) >> 3;
2280
2281 if (nla_len(tb[RTA_SRC]) < plen)
2282 goto errout;
2283
2284 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2285 }
86872cb5 2286
c3968a85
DW
2287 if (tb[RTA_PREFSRC])
2288 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2289
86872cb5
TG
2290 if (tb[RTA_OIF])
2291 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2292
2293 if (tb[RTA_PRIORITY])
2294 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2295
2296 if (tb[RTA_METRICS]) {
2297 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2298 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2299 }
86872cb5
TG
2300
2301 if (tb[RTA_TABLE])
2302 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2303
2304 err = 0;
2305errout:
2306 return err;
1da177e4
LT
2307}
2308
c127ea2c 2309static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2310{
86872cb5
TG
2311 struct fib6_config cfg;
2312 int err;
1da177e4 2313
86872cb5
TG
2314 err = rtm_to_fib6_config(skb, nlh, &cfg);
2315 if (err < 0)
2316 return err;
2317
2318 return ip6_route_del(&cfg);
1da177e4
LT
2319}
2320
c127ea2c 2321static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2322{
86872cb5
TG
2323 struct fib6_config cfg;
2324 int err;
1da177e4 2325
86872cb5
TG
2326 err = rtm_to_fib6_config(skb, nlh, &cfg);
2327 if (err < 0)
2328 return err;
2329
2330 return ip6_route_add(&cfg);
1da177e4
LT
2331}
2332
339bf98f
TG
2333static inline size_t rt6_nlmsg_size(void)
2334{
2335 return NLMSG_ALIGN(sizeof(struct rtmsg))
2336 + nla_total_size(16) /* RTA_SRC */
2337 + nla_total_size(16) /* RTA_DST */
2338 + nla_total_size(16) /* RTA_GATEWAY */
2339 + nla_total_size(16) /* RTA_PREFSRC */
2340 + nla_total_size(4) /* RTA_TABLE */
2341 + nla_total_size(4) /* RTA_IIF */
2342 + nla_total_size(4) /* RTA_OIF */
2343 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2344 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2345 + nla_total_size(sizeof(struct rta_cacheinfo));
2346}
2347
191cd582
BH
2348static int rt6_fill_node(struct net *net,
2349 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2350 struct in6_addr *dst, struct in6_addr *src,
2351 int iif, int type, u32 pid, u32 seq,
7bc570c8 2352 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2353{
2354 struct rtmsg *rtm;
2d7202bf 2355 struct nlmsghdr *nlh;
e3703b3d 2356 long expires;
9e762a4a 2357 u32 table;
f2c31e32 2358 struct neighbour *n;
1da177e4
LT
2359
2360 if (prefix) { /* user wants prefix routes only */
2361 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2362 /* success since this is not a prefix route */
2363 return 1;
2364 }
2365 }
2366
2d7202bf 2367 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2368 if (!nlh)
26932566 2369 return -EMSGSIZE;
2d7202bf
TG
2370
2371 rtm = nlmsg_data(nlh);
1da177e4
LT
2372 rtm->rtm_family = AF_INET6;
2373 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2374 rtm->rtm_src_len = rt->rt6i_src.plen;
2375 rtm->rtm_tos = 0;
c71099ac 2376 if (rt->rt6i_table)
9e762a4a 2377 table = rt->rt6i_table->tb6_id;
c71099ac 2378 else
9e762a4a
PM
2379 table = RT6_TABLE_UNSPEC;
2380 rtm->rtm_table = table;
c78679e8
DM
2381 if (nla_put_u32(skb, RTA_TABLE, table))
2382 goto nla_put_failure;
38308473 2383 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2384 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2385 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2386 rtm->rtm_type = RTN_LOCAL;
d1918542 2387 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2388 rtm->rtm_type = RTN_LOCAL;
2389 else
2390 rtm->rtm_type = RTN_UNICAST;
2391 rtm->rtm_flags = 0;
2392 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2393 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2394 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2395 rtm->rtm_protocol = RTPROT_REDIRECT;
2396 else if (rt->rt6i_flags & RTF_ADDRCONF)
2397 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2398 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2399 rtm->rtm_protocol = RTPROT_RA;
2400
38308473 2401 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2402 rtm->rtm_flags |= RTM_F_CLONED;
2403
2404 if (dst) {
c78679e8
DM
2405 if (nla_put(skb, RTA_DST, 16, dst))
2406 goto nla_put_failure;
1ab1457c 2407 rtm->rtm_dst_len = 128;
1da177e4 2408 } else if (rtm->rtm_dst_len)
c78679e8
DM
2409 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2410 goto nla_put_failure;
1da177e4
LT
2411#ifdef CONFIG_IPV6_SUBTREES
2412 if (src) {
c78679e8
DM
2413 if (nla_put(skb, RTA_SRC, 16, src))
2414 goto nla_put_failure;
1ab1457c 2415 rtm->rtm_src_len = 128;
c78679e8
DM
2416 } else if (rtm->rtm_src_len &&
2417 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2418 goto nla_put_failure;
1da177e4 2419#endif
7bc570c8
YH
2420 if (iif) {
2421#ifdef CONFIG_IPV6_MROUTE
2422 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2423 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2424 if (err <= 0) {
2425 if (!nowait) {
2426 if (err == 0)
2427 return 0;
2428 goto nla_put_failure;
2429 } else {
2430 if (err == -EMSGSIZE)
2431 goto nla_put_failure;
2432 }
2433 }
2434 } else
2435#endif
c78679e8
DM
2436 if (nla_put_u32(skb, RTA_IIF, iif))
2437 goto nla_put_failure;
7bc570c8 2438 } else if (dst) {
1da177e4 2439 struct in6_addr saddr_buf;
c78679e8
DM
2440 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2441 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2442 goto nla_put_failure;
1da177e4 2443 }
2d7202bf 2444
c3968a85
DW
2445 if (rt->rt6i_prefsrc.plen) {
2446 struct in6_addr saddr_buf;
4e3fd7a0 2447 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2448 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2449 goto nla_put_failure;
c3968a85
DW
2450 }
2451
defb3519 2452 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2453 goto nla_put_failure;
2454
f2c31e32 2455 rcu_read_lock();
97cac082 2456 n = rt->n;
94f826b8
ED
2457 if (n) {
2458 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2459 rcu_read_unlock();
2460 goto nla_put_failure;
2461 }
2462 }
f2c31e32 2463 rcu_read_unlock();
2d7202bf 2464
c78679e8
DM
2465 if (rt->dst.dev &&
2466 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2467 goto nla_put_failure;
2468 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2469 goto nla_put_failure;
36e3deae
YH
2470 if (!(rt->rt6i_flags & RTF_EXPIRES))
2471 expires = 0;
d1918542
DM
2472 else if (rt->dst.expires - jiffies < INT_MAX)
2473 expires = rt->dst.expires - jiffies;
36e3deae
YH
2474 else
2475 expires = INT_MAX;
69cdf8f9 2476
87a50699 2477 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2478 goto nla_put_failure;
2d7202bf
TG
2479
2480 return nlmsg_end(skb, nlh);
2481
2482nla_put_failure:
26932566
PM
2483 nlmsg_cancel(skb, nlh);
2484 return -EMSGSIZE;
1da177e4
LT
2485}
2486
1b43af54 2487int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2488{
2489 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2490 int prefix;
2491
2d7202bf
TG
2492 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2493 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2494 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2495 } else
2496 prefix = 0;
2497
191cd582
BH
2498 return rt6_fill_node(arg->net,
2499 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2500 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2501 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2502}
2503
c127ea2c 2504static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2505{
3b1e0a65 2506 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2507 struct nlattr *tb[RTA_MAX+1];
2508 struct rt6_info *rt;
1da177e4 2509 struct sk_buff *skb;
ab364a6f 2510 struct rtmsg *rtm;
4c9483b2 2511 struct flowi6 fl6;
72331bc0 2512 int err, iif = 0, oif = 0;
1da177e4 2513
ab364a6f
TG
2514 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2515 if (err < 0)
2516 goto errout;
1da177e4 2517
ab364a6f 2518 err = -EINVAL;
4c9483b2 2519 memset(&fl6, 0, sizeof(fl6));
1da177e4 2520
ab364a6f
TG
2521 if (tb[RTA_SRC]) {
2522 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2523 goto errout;
2524
4e3fd7a0 2525 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2526 }
2527
2528 if (tb[RTA_DST]) {
2529 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2530 goto errout;
2531
4e3fd7a0 2532 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2533 }
2534
2535 if (tb[RTA_IIF])
2536 iif = nla_get_u32(tb[RTA_IIF]);
2537
2538 if (tb[RTA_OIF])
72331bc0 2539 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2540
2541 if (iif) {
2542 struct net_device *dev;
72331bc0
SL
2543 int flags = 0;
2544
5578689a 2545 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2546 if (!dev) {
2547 err = -ENODEV;
ab364a6f 2548 goto errout;
1da177e4 2549 }
72331bc0
SL
2550
2551 fl6.flowi6_iif = iif;
2552
2553 if (!ipv6_addr_any(&fl6.saddr))
2554 flags |= RT6_LOOKUP_F_HAS_SADDR;
2555
2556 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2557 flags);
2558 } else {
2559 fl6.flowi6_oif = oif;
2560
2561 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2562 }
2563
ab364a6f 2564 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2565 if (!skb) {
2173bff5 2566 dst_release(&rt->dst);
ab364a6f
TG
2567 err = -ENOBUFS;
2568 goto errout;
2569 }
1da177e4 2570
ab364a6f
TG
2571 /* Reserve room for dummy headers, this skb can pass
2572 through good chunk of routing engine.
2573 */
459a98ed 2574 skb_reset_mac_header(skb);
ab364a6f 2575 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2576
d8d1f30b 2577 skb_dst_set(skb, &rt->dst);
1da177e4 2578
4c9483b2 2579 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2580 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2581 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2582 if (err < 0) {
ab364a6f
TG
2583 kfree_skb(skb);
2584 goto errout;
1da177e4
LT
2585 }
2586
5578689a 2587 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2588errout:
1da177e4 2589 return err;
1da177e4
LT
2590}
2591
86872cb5 2592void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2593{
2594 struct sk_buff *skb;
5578689a 2595 struct net *net = info->nl_net;
528c4ceb
DL
2596 u32 seq;
2597 int err;
2598
2599 err = -ENOBUFS;
38308473 2600 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2601
339bf98f 2602 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2603 if (!skb)
21713ebc
TG
2604 goto errout;
2605
191cd582 2606 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2607 event, info->pid, seq, 0, 0, 0);
26932566
PM
2608 if (err < 0) {
2609 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2610 WARN_ON(err == -EMSGSIZE);
2611 kfree_skb(skb);
2612 goto errout;
2613 }
1ce85fe4
PNA
2614 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2615 info->nlh, gfp_any());
2616 return;
21713ebc
TG
2617errout:
2618 if (err < 0)
5578689a 2619 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2620}
2621
8ed67789
DL
2622static int ip6_route_dev_notify(struct notifier_block *this,
2623 unsigned long event, void *data)
2624{
2625 struct net_device *dev = (struct net_device *)data;
c346dca1 2626 struct net *net = dev_net(dev);
8ed67789
DL
2627
2628 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2629 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2630 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2631#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2632 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2633 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2634 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2635 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2636#endif
2637 }
2638
2639 return NOTIFY_OK;
2640}
2641
1da177e4
LT
2642/*
2643 * /proc
2644 */
2645
2646#ifdef CONFIG_PROC_FS
2647
1da177e4
LT
2648struct rt6_proc_arg
2649{
2650 char *buffer;
2651 int offset;
2652 int length;
2653 int skip;
2654 int len;
2655};
2656
2657static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2658{
33120b30 2659 struct seq_file *m = p_arg;
69cce1d1 2660 struct neighbour *n;
1da177e4 2661
4b7a4274 2662 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2663
2664#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2665 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2666#else
33120b30 2667 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2668#endif
f2c31e32 2669 rcu_read_lock();
97cac082 2670 n = rt->n;
69cce1d1
DM
2671 if (n) {
2672 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2673 } else {
33120b30 2674 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2675 }
f2c31e32 2676 rcu_read_unlock();
33120b30 2677 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2678 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2679 rt->dst.__use, rt->rt6i_flags,
d1918542 2680 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2681 return 0;
2682}
2683
33120b30 2684static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2685{
f3db4851 2686 struct net *net = (struct net *)m->private;
32b293a5 2687 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2688 return 0;
2689}
1da177e4 2690
33120b30
AD
2691static int ipv6_route_open(struct inode *inode, struct file *file)
2692{
de05c557 2693 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2694}
2695
33120b30
AD
2696static const struct file_operations ipv6_route_proc_fops = {
2697 .owner = THIS_MODULE,
2698 .open = ipv6_route_open,
2699 .read = seq_read,
2700 .llseek = seq_lseek,
b6fcbdb4 2701 .release = single_release_net,
33120b30
AD
2702};
2703
1da177e4
LT
2704static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2705{
69ddb805 2706 struct net *net = (struct net *)seq->private;
1da177e4 2707 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2708 net->ipv6.rt6_stats->fib_nodes,
2709 net->ipv6.rt6_stats->fib_route_nodes,
2710 net->ipv6.rt6_stats->fib_rt_alloc,
2711 net->ipv6.rt6_stats->fib_rt_entries,
2712 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2713 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2714 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2715
2716 return 0;
2717}
2718
2719static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2720{
de05c557 2721 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2722}
2723
9a32144e 2724static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2725 .owner = THIS_MODULE,
2726 .open = rt6_stats_seq_open,
2727 .read = seq_read,
2728 .llseek = seq_lseek,
b6fcbdb4 2729 .release = single_release_net,
1da177e4
LT
2730};
2731#endif /* CONFIG_PROC_FS */
2732
2733#ifdef CONFIG_SYSCTL
2734
1da177e4 2735static
8d65af78 2736int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2737 void __user *buffer, size_t *lenp, loff_t *ppos)
2738{
c486da34
LAG
2739 struct net *net;
2740 int delay;
2741 if (!write)
1da177e4 2742 return -EINVAL;
c486da34
LAG
2743
2744 net = (struct net *)ctl->extra1;
2745 delay = net->ipv6.sysctl.flush_delay;
2746 proc_dointvec(ctl, write, buffer, lenp, ppos);
2747 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2748 return 0;
1da177e4
LT
2749}
2750
760f2d01 2751ctl_table ipv6_route_table_template[] = {
1ab1457c 2752 {
1da177e4 2753 .procname = "flush",
4990509f 2754 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2755 .maxlen = sizeof(int),
89c8b3a1 2756 .mode = 0200,
6d9f239a 2757 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2758 },
2759 {
1da177e4 2760 .procname = "gc_thresh",
9a7ec3a9 2761 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2762 .maxlen = sizeof(int),
2763 .mode = 0644,
6d9f239a 2764 .proc_handler = proc_dointvec,
1da177e4
LT
2765 },
2766 {
1da177e4 2767 .procname = "max_size",
4990509f 2768 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2769 .maxlen = sizeof(int),
2770 .mode = 0644,
6d9f239a 2771 .proc_handler = proc_dointvec,
1da177e4
LT
2772 },
2773 {
1da177e4 2774 .procname = "gc_min_interval",
4990509f 2775 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2776 .maxlen = sizeof(int),
2777 .mode = 0644,
6d9f239a 2778 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2779 },
2780 {
1da177e4 2781 .procname = "gc_timeout",
4990509f 2782 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2783 .maxlen = sizeof(int),
2784 .mode = 0644,
6d9f239a 2785 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2786 },
2787 {
1da177e4 2788 .procname = "gc_interval",
4990509f 2789 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2790 .maxlen = sizeof(int),
2791 .mode = 0644,
6d9f239a 2792 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2793 },
2794 {
1da177e4 2795 .procname = "gc_elasticity",
4990509f 2796 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2797 .maxlen = sizeof(int),
2798 .mode = 0644,
f3d3f616 2799 .proc_handler = proc_dointvec,
1da177e4
LT
2800 },
2801 {
1da177e4 2802 .procname = "mtu_expires",
4990509f 2803 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2804 .maxlen = sizeof(int),
2805 .mode = 0644,
6d9f239a 2806 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2807 },
2808 {
1da177e4 2809 .procname = "min_adv_mss",
4990509f 2810 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2811 .maxlen = sizeof(int),
2812 .mode = 0644,
f3d3f616 2813 .proc_handler = proc_dointvec,
1da177e4
LT
2814 },
2815 {
1da177e4 2816 .procname = "gc_min_interval_ms",
4990509f 2817 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2818 .maxlen = sizeof(int),
2819 .mode = 0644,
6d9f239a 2820 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2821 },
f8572d8f 2822 { }
1da177e4
LT
2823};
2824
2c8c1e72 2825struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2826{
2827 struct ctl_table *table;
2828
2829 table = kmemdup(ipv6_route_table_template,
2830 sizeof(ipv6_route_table_template),
2831 GFP_KERNEL);
5ee09105
YH
2832
2833 if (table) {
2834 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2835 table[0].extra1 = net;
86393e52 2836 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2837 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2838 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2839 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2840 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2841 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2842 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2843 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2844 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2845 }
2846
760f2d01
DL
2847 return table;
2848}
1da177e4
LT
2849#endif
2850
2c8c1e72 2851static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2852{
633d424b 2853 int ret = -ENOMEM;
8ed67789 2854
86393e52
AD
2855 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2856 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2857
fc66f95c
ED
2858 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2859 goto out_ip6_dst_ops;
2860
8ed67789
DL
2861 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2862 sizeof(*net->ipv6.ip6_null_entry),
2863 GFP_KERNEL);
2864 if (!net->ipv6.ip6_null_entry)
fc66f95c 2865 goto out_ip6_dst_entries;
d8d1f30b 2866 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2867 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2868 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2869 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2870 ip6_template_metrics, true);
8ed67789
DL
2871
2872#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2873 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2874 sizeof(*net->ipv6.ip6_prohibit_entry),
2875 GFP_KERNEL);
68fffc67
PZ
2876 if (!net->ipv6.ip6_prohibit_entry)
2877 goto out_ip6_null_entry;
d8d1f30b 2878 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2879 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2880 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2881 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2882 ip6_template_metrics, true);
8ed67789
DL
2883
2884 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2885 sizeof(*net->ipv6.ip6_blk_hole_entry),
2886 GFP_KERNEL);
68fffc67
PZ
2887 if (!net->ipv6.ip6_blk_hole_entry)
2888 goto out_ip6_prohibit_entry;
d8d1f30b 2889 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2890 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2891 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2892 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2893 ip6_template_metrics, true);
8ed67789
DL
2894#endif
2895
b339a47c
PZ
2896 net->ipv6.sysctl.flush_delay = 0;
2897 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2898 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2899 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2900 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2901 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2902 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2903 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2904
6891a346
BT
2905 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2906
8ed67789
DL
2907 ret = 0;
2908out:
2909 return ret;
f2fc6a54 2910
68fffc67
PZ
2911#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2912out_ip6_prohibit_entry:
2913 kfree(net->ipv6.ip6_prohibit_entry);
2914out_ip6_null_entry:
2915 kfree(net->ipv6.ip6_null_entry);
2916#endif
fc66f95c
ED
2917out_ip6_dst_entries:
2918 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2919out_ip6_dst_ops:
f2fc6a54 2920 goto out;
cdb18761
DL
2921}
2922
2c8c1e72 2923static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2924{
8ed67789
DL
2925 kfree(net->ipv6.ip6_null_entry);
2926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2927 kfree(net->ipv6.ip6_prohibit_entry);
2928 kfree(net->ipv6.ip6_blk_hole_entry);
2929#endif
41bb78b4 2930 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2931}
2932
d189634e
TG
2933static int __net_init ip6_route_net_init_late(struct net *net)
2934{
2935#ifdef CONFIG_PROC_FS
2936 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2937 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2938#endif
2939 return 0;
2940}
2941
2942static void __net_exit ip6_route_net_exit_late(struct net *net)
2943{
2944#ifdef CONFIG_PROC_FS
2945 proc_net_remove(net, "ipv6_route");
2946 proc_net_remove(net, "rt6_stats");
2947#endif
2948}
2949
cdb18761
DL
2950static struct pernet_operations ip6_route_net_ops = {
2951 .init = ip6_route_net_init,
2952 .exit = ip6_route_net_exit,
2953};
2954
c3426b47
DM
2955static int __net_init ipv6_inetpeer_init(struct net *net)
2956{
2957 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2958
2959 if (!bp)
2960 return -ENOMEM;
2961 inet_peer_base_init(bp);
2962 net->ipv6.peers = bp;
2963 return 0;
2964}
2965
2966static void __net_exit ipv6_inetpeer_exit(struct net *net)
2967{
2968 struct inet_peer_base *bp = net->ipv6.peers;
2969
2970 net->ipv6.peers = NULL;
56a6b248 2971 inetpeer_invalidate_tree(bp);
c3426b47
DM
2972 kfree(bp);
2973}
2974
2b823f72 2975static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
2976 .init = ipv6_inetpeer_init,
2977 .exit = ipv6_inetpeer_exit,
2978};
2979
d189634e
TG
2980static struct pernet_operations ip6_route_net_late_ops = {
2981 .init = ip6_route_net_init_late,
2982 .exit = ip6_route_net_exit_late,
2983};
2984
8ed67789
DL
2985static struct notifier_block ip6_route_dev_notifier = {
2986 .notifier_call = ip6_route_dev_notify,
2987 .priority = 0,
2988};
2989
433d49c3 2990int __init ip6_route_init(void)
1da177e4 2991{
433d49c3
DL
2992 int ret;
2993
9a7ec3a9
DL
2994 ret = -ENOMEM;
2995 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2996 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2997 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2998 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2999 goto out;
14e50e57 3000
fc66f95c 3001 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3002 if (ret)
bdb3289f 3003 goto out_kmem_cache;
bdb3289f 3004
c3426b47
DM
3005 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3006 if (ret)
e8803b6c 3007 goto out_dst_entries;
2a0c451a 3008
7e52b33b
DM
3009 ret = register_pernet_subsys(&ip6_route_net_ops);
3010 if (ret)
3011 goto out_register_inetpeer;
c3426b47 3012
5dc121e9
AE
3013 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3014
8ed67789
DL
3015 /* Registering of the loopback is done before this portion of code,
3016 * the loopback reference in rt6_info will not be taken, do it
3017 * manually for init_net */
d8d1f30b 3018 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3019 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3020 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3021 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3022 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3023 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3024 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3025 #endif
e8803b6c 3026 ret = fib6_init();
433d49c3 3027 if (ret)
8ed67789 3028 goto out_register_subsys;
433d49c3 3029
433d49c3
DL
3030 ret = xfrm6_init();
3031 if (ret)
e8803b6c 3032 goto out_fib6_init;
c35b7e72 3033
433d49c3
DL
3034 ret = fib6_rules_init();
3035 if (ret)
3036 goto xfrm6_init;
7e5449c2 3037
d189634e
TG
3038 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3039 if (ret)
3040 goto fib6_rules_init;
3041
433d49c3 3042 ret = -ENOBUFS;
c7ac8679
GR
3043 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3044 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3045 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3046 goto out_register_late_subsys;
c127ea2c 3047
8ed67789 3048 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3049 if (ret)
d189634e 3050 goto out_register_late_subsys;
8ed67789 3051
433d49c3
DL
3052out:
3053 return ret;
3054
d189634e
TG
3055out_register_late_subsys:
3056 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3057fib6_rules_init:
433d49c3
DL
3058 fib6_rules_cleanup();
3059xfrm6_init:
433d49c3 3060 xfrm6_fini();
2a0c451a
TG
3061out_fib6_init:
3062 fib6_gc_cleanup();
8ed67789
DL
3063out_register_subsys:
3064 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3065out_register_inetpeer:
3066 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3067out_dst_entries:
3068 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3069out_kmem_cache:
f2fc6a54 3070 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3071 goto out;
1da177e4
LT
3072}
3073
3074void ip6_route_cleanup(void)
3075{
8ed67789 3076 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3077 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3078 fib6_rules_cleanup();
1da177e4 3079 xfrm6_fini();
1da177e4 3080 fib6_gc_cleanup();
c3426b47 3081 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3082 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3083 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3084 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3085}