ipv6: Pull main logic of rt6_redirect() into rt6_do_redirect().
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
6e157b6a 82static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
1da177e4 83
70ceb4f5 84#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 85static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
86 const struct in6_addr *prefix, int prefixlen,
87 const struct in6_addr *gwaddr, int ifindex,
95c96174 88 unsigned int pref);
efa2cea0 89static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
90 const struct in6_addr *prefix, int prefixlen,
91 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
92#endif
93
06582540
DM
94static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
95{
96 struct rt6_info *rt = (struct rt6_info *) dst;
97 struct inet_peer *peer;
98 u32 *p = NULL;
99
8e2ec639
YZ
100 if (!(rt->dst.flags & DST_HOST))
101 return NULL;
102
fbfe95a4 103 peer = rt6_get_peer_create(rt);
06582540
DM
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
f894cbf8
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt,
125 struct sk_buff *skb,
126 const void *daddr)
39232973
DM
127{
128 struct in6_addr *p = &rt->rt6i_gateway;
129
a7563f34 130 if (!ipv6_addr_any(p))
39232973 131 return (const void *) p;
f894cbf8
DM
132 else if (skb)
133 return &ipv6_hdr(skb)->daddr;
39232973
DM
134 return daddr;
135}
136
f894cbf8
DM
137static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
138 struct sk_buff *skb,
139 const void *daddr)
d3aaeb38 140{
39232973
DM
141 struct rt6_info *rt = (struct rt6_info *) dst;
142 struct neighbour *n;
143
f894cbf8 144 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 145 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
146 if (n)
147 return n;
148 return neigh_create(&nd_tbl, daddr, dst->dev);
149}
150
8ade06c6 151static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 152{
8ade06c6
DM
153 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
154 if (!n) {
155 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
156 if (IS_ERR(n))
157 return PTR_ERR(n);
158 }
97cac082 159 rt->n = n;
f83c7790
DM
160
161 return 0;
d3aaeb38
DM
162}
163
9a7ec3a9 164static struct dst_ops ip6_dst_ops_template = {
1da177e4 165 .family = AF_INET6,
09640e63 166 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
167 .gc = ip6_dst_gc,
168 .gc_thresh = 1024,
169 .check = ip6_dst_check,
0dbaee3b 170 .default_advmss = ip6_default_advmss,
ebb762f2 171 .mtu = ip6_mtu,
06582540 172 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
173 .destroy = ip6_dst_destroy,
174 .ifdown = ip6_dst_ifdown,
175 .negative_advice = ip6_negative_advice,
176 .link_failure = ip6_link_failure,
177 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 178 .redirect = rt6_do_redirect,
1ac06e03 179 .local_out = __ip6_local_out,
d3aaeb38 180 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
181};
182
ebb762f2 183static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 184{
618f9bc7
SK
185 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
186
187 return mtu ? : dst->dev->mtu;
ec831ea7
RD
188}
189
14e50e57
DM
190static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
191{
192}
193
0972ddb2
HB
194static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
195 unsigned long old)
196{
197 return NULL;
198}
199
14e50e57
DM
200static struct dst_ops ip6_dst_blackhole_ops = {
201 .family = AF_INET6,
09640e63 202 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
203 .destroy = ip6_dst_destroy,
204 .check = ip6_dst_check,
ebb762f2 205 .mtu = ip6_blackhole_mtu,
214f45c9 206 .default_advmss = ip6_default_advmss,
14e50e57 207 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 208 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 209 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
210};
211
62fa8a84
DM
212static const u32 ip6_template_metrics[RTAX_MAX] = {
213 [RTAX_HOPLIMIT - 1] = 255,
214};
215
bdb3289f 216static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
217 .dst = {
218 .__refcnt = ATOMIC_INIT(1),
219 .__use = 1,
220 .obsolete = -1,
221 .error = -ENETUNREACH,
d8d1f30b
CG
222 .input = ip6_pkt_discard,
223 .output = ip6_pkt_discard_out,
1da177e4
LT
224 },
225 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 226 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
227 .rt6i_metric = ~(u32) 0,
228 .rt6i_ref = ATOMIC_INIT(1),
229};
230
101367c2
TG
231#ifdef CONFIG_IPV6_MULTIPLE_TABLES
232
6723ab54
DM
233static int ip6_pkt_prohibit(struct sk_buff *skb);
234static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 235
280a34c8 236static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
237 .dst = {
238 .__refcnt = ATOMIC_INIT(1),
239 .__use = 1,
240 .obsolete = -1,
241 .error = -EACCES,
d8d1f30b
CG
242 .input = ip6_pkt_prohibit,
243 .output = ip6_pkt_prohibit_out,
101367c2
TG
244 },
245 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 246 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
247 .rt6i_metric = ~(u32) 0,
248 .rt6i_ref = ATOMIC_INIT(1),
249};
250
bdb3289f 251static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
252 .dst = {
253 .__refcnt = ATOMIC_INIT(1),
254 .__use = 1,
255 .obsolete = -1,
256 .error = -EINVAL,
d8d1f30b
CG
257 .input = dst_discard,
258 .output = dst_discard,
101367c2
TG
259 },
260 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 261 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
262 .rt6i_metric = ~(u32) 0,
263 .rt6i_ref = ATOMIC_INIT(1),
264};
265
266#endif
267
1da177e4 268/* allocate dst with ip6_dst_ops */
97bab73f 269static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 270 struct net_device *dev,
8b96d22d
DM
271 int flags,
272 struct fib6_table *table)
1da177e4 273{
97bab73f
DM
274 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
275 0, 0, flags);
cf911662 276
97bab73f 277 if (rt) {
a2de86f6 278 memset(&rt->n, 0,
38308473 279 sizeof(*rt) - sizeof(struct dst_entry));
8b96d22d 280 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 281 }
cf911662 282 return rt;
1da177e4
LT
283}
284
285static void ip6_dst_destroy(struct dst_entry *dst)
286{
287 struct rt6_info *rt = (struct rt6_info *)dst;
288 struct inet6_dev *idev = rt->rt6i_idev;
289
97cac082
DM
290 if (rt->n)
291 neigh_release(rt->n);
292
8e2ec639
YZ
293 if (!(rt->dst.flags & DST_HOST))
294 dst_destroy_metrics_generic(dst);
295
38308473 296 if (idev) {
1da177e4
LT
297 rt->rt6i_idev = NULL;
298 in6_dev_put(idev);
1ab1457c 299 }
1716a961
G
300
301 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
302 dst_release(dst->from);
303
97bab73f
DM
304 if (rt6_has_peer(rt)) {
305 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
306 inet_putpeer(peer);
307 }
308}
309
6431cbc2
DM
310static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
311
312static u32 rt6_peer_genid(void)
313{
314 return atomic_read(&__rt6_peer_genid);
315}
316
b3419363
DM
317void rt6_bind_peer(struct rt6_info *rt, int create)
318{
97bab73f 319 struct inet_peer_base *base;
b3419363
DM
320 struct inet_peer *peer;
321
97bab73f
DM
322 base = inetpeer_base_ptr(rt->_rt6i_peer);
323 if (!base)
324 return;
325
326 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
327 if (peer) {
328 if (!rt6_set_peer(rt, peer))
329 inet_putpeer(peer);
330 else
331 rt->rt6i_peer_genid = rt6_peer_genid();
332 }
1da177e4
LT
333}
334
335static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
336 int how)
337{
338 struct rt6_info *rt = (struct rt6_info *)dst;
339 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 340 struct net_device *loopback_dev =
c346dca1 341 dev_net(dev)->loopback_dev;
1da177e4 342
97cac082
DM
343 if (dev != loopback_dev) {
344 if (idev && idev->dev == dev) {
345 struct inet6_dev *loopback_idev =
346 in6_dev_get(loopback_dev);
347 if (loopback_idev) {
348 rt->rt6i_idev = loopback_idev;
349 in6_dev_put(idev);
350 }
351 }
352 if (rt->n && rt->n->dev == dev) {
353 rt->n->dev = loopback_dev;
354 dev_hold(loopback_dev);
355 dev_put(dev);
1da177e4
LT
356 }
357 }
358}
359
a50feda5 360static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 361{
1716a961
G
362 struct rt6_info *ort = NULL;
363
364 if (rt->rt6i_flags & RTF_EXPIRES) {
365 if (time_after(jiffies, rt->dst.expires))
a50feda5 366 return true;
1716a961
G
367 } else if (rt->dst.from) {
368 ort = (struct rt6_info *) rt->dst.from;
369 return (ort->rt6i_flags & RTF_EXPIRES) &&
370 time_after(jiffies, ort->dst.expires);
371 }
a50feda5 372 return false;
1da177e4
LT
373}
374
a50feda5 375static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 376{
a02cec21
ED
377 return ipv6_addr_type(daddr) &
378 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
379}
380
1da177e4 381/*
c71099ac 382 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
383 */
384
8ed67789
DL
385static inline struct rt6_info *rt6_device_match(struct net *net,
386 struct rt6_info *rt,
b71d1d42 387 const struct in6_addr *saddr,
1da177e4 388 int oif,
d420895e 389 int flags)
1da177e4
LT
390{
391 struct rt6_info *local = NULL;
392 struct rt6_info *sprt;
393
dd3abc4e
YH
394 if (!oif && ipv6_addr_any(saddr))
395 goto out;
396
d8d1f30b 397 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 398 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
399
400 if (oif) {
1da177e4
LT
401 if (dev->ifindex == oif)
402 return sprt;
403 if (dev->flags & IFF_LOOPBACK) {
38308473 404 if (!sprt->rt6i_idev ||
1da177e4 405 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 406 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 407 continue;
1ab1457c 408 if (local && (!oif ||
1da177e4
LT
409 local->rt6i_idev->dev->ifindex == oif))
410 continue;
411 }
412 local = sprt;
413 }
dd3abc4e
YH
414 } else {
415 if (ipv6_chk_addr(net, saddr, dev,
416 flags & RT6_LOOKUP_F_IFACE))
417 return sprt;
1da177e4 418 }
dd3abc4e 419 }
1da177e4 420
dd3abc4e 421 if (oif) {
1da177e4
LT
422 if (local)
423 return local;
424
d420895e 425 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 426 return net->ipv6.ip6_null_entry;
1da177e4 427 }
dd3abc4e 428out:
1da177e4
LT
429 return rt;
430}
431
27097255
YH
432#ifdef CONFIG_IPV6_ROUTER_PREF
433static void rt6_probe(struct rt6_info *rt)
434{
f2c31e32 435 struct neighbour *neigh;
27097255
YH
436 /*
437 * Okay, this does not seem to be appropriate
438 * for now, however, we need to check if it
439 * is really so; aka Router Reachability Probing.
440 *
441 * Router Reachability Probe MUST be rate-limited
442 * to no more than one per minute.
443 */
f2c31e32 444 rcu_read_lock();
97cac082 445 neigh = rt ? rt->n : NULL;
27097255 446 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 447 goto out;
27097255
YH
448 read_lock_bh(&neigh->lock);
449 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 450 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
451 struct in6_addr mcaddr;
452 struct in6_addr *target;
453
454 neigh->updated = jiffies;
455 read_unlock_bh(&neigh->lock);
456
457 target = (struct in6_addr *)&neigh->primary_key;
458 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 459 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 460 } else {
27097255 461 read_unlock_bh(&neigh->lock);
f2c31e32
ED
462 }
463out:
464 rcu_read_unlock();
27097255
YH
465}
466#else
467static inline void rt6_probe(struct rt6_info *rt)
468{
27097255
YH
469}
470#endif
471
1da177e4 472/*
554cfb7e 473 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 474 */
b6f99a21 475static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 476{
d1918542 477 struct net_device *dev = rt->dst.dev;
161980f4 478 if (!oif || dev->ifindex == oif)
554cfb7e 479 return 2;
161980f4
DM
480 if ((dev->flags & IFF_LOOPBACK) &&
481 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
482 return 1;
483 return 0;
554cfb7e 484}
1da177e4 485
b6f99a21 486static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 487{
f2c31e32 488 struct neighbour *neigh;
398bcbeb 489 int m;
f2c31e32
ED
490
491 rcu_read_lock();
97cac082 492 neigh = rt->n;
4d0c5911
YH
493 if (rt->rt6i_flags & RTF_NONEXTHOP ||
494 !(rt->rt6i_flags & RTF_GATEWAY))
495 m = 1;
496 else if (neigh) {
554cfb7e
YH
497 read_lock_bh(&neigh->lock);
498 if (neigh->nud_state & NUD_VALID)
4d0c5911 499 m = 2;
398bcbeb
YH
500#ifdef CONFIG_IPV6_ROUTER_PREF
501 else if (neigh->nud_state & NUD_FAILED)
502 m = 0;
503#endif
504 else
ea73ee23 505 m = 1;
554cfb7e 506 read_unlock_bh(&neigh->lock);
398bcbeb
YH
507 } else
508 m = 0;
f2c31e32 509 rcu_read_unlock();
554cfb7e 510 return m;
1da177e4
LT
511}
512
554cfb7e
YH
513static int rt6_score_route(struct rt6_info *rt, int oif,
514 int strict)
1da177e4 515{
4d0c5911 516 int m, n;
1ab1457c 517
4d0c5911 518 m = rt6_check_dev(rt, oif);
77d16f45 519 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 520 return -1;
ebacaaa0
YH
521#ifdef CONFIG_IPV6_ROUTER_PREF
522 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
523#endif
4d0c5911 524 n = rt6_check_neigh(rt);
557e92ef 525 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
526 return -1;
527 return m;
528}
529
f11e6659
DM
530static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
531 int *mpri, struct rt6_info *match)
554cfb7e 532{
f11e6659
DM
533 int m;
534
535 if (rt6_check_expired(rt))
536 goto out;
537
538 m = rt6_score_route(rt, oif, strict);
539 if (m < 0)
540 goto out;
541
542 if (m > *mpri) {
543 if (strict & RT6_LOOKUP_F_REACHABLE)
544 rt6_probe(match);
545 *mpri = m;
546 match = rt;
547 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
548 rt6_probe(rt);
549 }
550
551out:
552 return match;
553}
554
555static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
556 struct rt6_info *rr_head,
557 u32 metric, int oif, int strict)
558{
559 struct rt6_info *rt, *match;
554cfb7e 560 int mpri = -1;
1da177e4 561
f11e6659
DM
562 match = NULL;
563 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 564 rt = rt->dst.rt6_next)
f11e6659
DM
565 match = find_match(rt, oif, strict, &mpri, match);
566 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 567 rt = rt->dst.rt6_next)
f11e6659 568 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 569
f11e6659
DM
570 return match;
571}
1da177e4 572
f11e6659
DM
573static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
574{
575 struct rt6_info *match, *rt0;
8ed67789 576 struct net *net;
1da177e4 577
f11e6659
DM
578 rt0 = fn->rr_ptr;
579 if (!rt0)
580 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 581
f11e6659 582 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 583
554cfb7e 584 if (!match &&
f11e6659 585 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 586 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 587
554cfb7e 588 /* no entries matched; do round-robin */
f11e6659
DM
589 if (!next || next->rt6i_metric != rt0->rt6i_metric)
590 next = fn->leaf;
591
592 if (next != rt0)
593 fn->rr_ptr = next;
1da177e4 594 }
1da177e4 595
d1918542 596 net = dev_net(rt0->dst.dev);
a02cec21 597 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
598}
599
70ceb4f5
YH
600#ifdef CONFIG_IPV6_ROUTE_INFO
601int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 602 const struct in6_addr *gwaddr)
70ceb4f5 603{
c346dca1 604 struct net *net = dev_net(dev);
70ceb4f5
YH
605 struct route_info *rinfo = (struct route_info *) opt;
606 struct in6_addr prefix_buf, *prefix;
607 unsigned int pref;
4bed72e4 608 unsigned long lifetime;
70ceb4f5
YH
609 struct rt6_info *rt;
610
611 if (len < sizeof(struct route_info)) {
612 return -EINVAL;
613 }
614
615 /* Sanity check for prefix_len and length */
616 if (rinfo->length > 3) {
617 return -EINVAL;
618 } else if (rinfo->prefix_len > 128) {
619 return -EINVAL;
620 } else if (rinfo->prefix_len > 64) {
621 if (rinfo->length < 2) {
622 return -EINVAL;
623 }
624 } else if (rinfo->prefix_len > 0) {
625 if (rinfo->length < 1) {
626 return -EINVAL;
627 }
628 }
629
630 pref = rinfo->route_pref;
631 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 632 return -EINVAL;
70ceb4f5 633
4bed72e4 634 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
635
636 if (rinfo->length == 3)
637 prefix = (struct in6_addr *)rinfo->prefix;
638 else {
639 /* this function is safe */
640 ipv6_addr_prefix(&prefix_buf,
641 (struct in6_addr *)rinfo->prefix,
642 rinfo->prefix_len);
643 prefix = &prefix_buf;
644 }
645
efa2cea0
DL
646 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
647 dev->ifindex);
70ceb4f5
YH
648
649 if (rt && !lifetime) {
e0a1ad73 650 ip6_del_rt(rt);
70ceb4f5
YH
651 rt = NULL;
652 }
653
654 if (!rt && lifetime)
efa2cea0 655 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
656 pref);
657 else if (rt)
658 rt->rt6i_flags = RTF_ROUTEINFO |
659 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
660
661 if (rt) {
1716a961
G
662 if (!addrconf_finite_timeout(lifetime))
663 rt6_clean_expires(rt);
664 else
665 rt6_set_expires(rt, jiffies + HZ * lifetime);
666
d8d1f30b 667 dst_release(&rt->dst);
70ceb4f5
YH
668 }
669 return 0;
670}
671#endif
672
8ed67789 673#define BACKTRACK(__net, saddr) \
982f56f3 674do { \
8ed67789 675 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 676 struct fib6_node *pn; \
e0eda7bb 677 while (1) { \
982f56f3
YH
678 if (fn->fn_flags & RTN_TL_ROOT) \
679 goto out; \
680 pn = fn->parent; \
681 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 682 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
683 else \
684 fn = pn; \
685 if (fn->fn_flags & RTN_RTINFO) \
686 goto restart; \
c71099ac 687 } \
c71099ac 688 } \
38308473 689} while (0)
c71099ac 690
8ed67789
DL
691static struct rt6_info *ip6_pol_route_lookup(struct net *net,
692 struct fib6_table *table,
4c9483b2 693 struct flowi6 *fl6, int flags)
1da177e4
LT
694{
695 struct fib6_node *fn;
696 struct rt6_info *rt;
697
c71099ac 698 read_lock_bh(&table->tb6_lock);
4c9483b2 699 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
700restart:
701 rt = fn->leaf;
4c9483b2
DM
702 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
703 BACKTRACK(net, &fl6->saddr);
c71099ac 704out:
d8d1f30b 705 dst_use(&rt->dst, jiffies);
c71099ac 706 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
707 return rt;
708
709}
710
ea6e574e
FW
711struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
712 int flags)
713{
714 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
715}
716EXPORT_SYMBOL_GPL(ip6_route_lookup);
717
9acd9f3a
YH
718struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
719 const struct in6_addr *saddr, int oif, int strict)
c71099ac 720{
4c9483b2
DM
721 struct flowi6 fl6 = {
722 .flowi6_oif = oif,
723 .daddr = *daddr,
c71099ac
TG
724 };
725 struct dst_entry *dst;
77d16f45 726 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 727
adaa70bb 728 if (saddr) {
4c9483b2 729 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
730 flags |= RT6_LOOKUP_F_HAS_SADDR;
731 }
732
4c9483b2 733 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
734 if (dst->error == 0)
735 return (struct rt6_info *) dst;
736
737 dst_release(dst);
738
1da177e4
LT
739 return NULL;
740}
741
7159039a
YH
742EXPORT_SYMBOL(rt6_lookup);
743
c71099ac 744/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
745 It takes new route entry, the addition fails by any reason the
746 route is freed. In any case, if caller does not hold it, it may
747 be destroyed.
748 */
749
86872cb5 750static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
751{
752 int err;
c71099ac 753 struct fib6_table *table;
1da177e4 754
c71099ac
TG
755 table = rt->rt6i_table;
756 write_lock_bh(&table->tb6_lock);
86872cb5 757 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 758 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
759
760 return err;
761}
762
40e22e8f
TG
763int ip6_ins_rt(struct rt6_info *rt)
764{
4d1169c1 765 struct nl_info info = {
d1918542 766 .nl_net = dev_net(rt->dst.dev),
4d1169c1 767 };
528c4ceb 768 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
769}
770
1716a961 771static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 772 const struct in6_addr *daddr,
b71d1d42 773 const struct in6_addr *saddr)
1da177e4 774{
1da177e4
LT
775 struct rt6_info *rt;
776
777 /*
778 * Clone the route.
779 */
780
21efcfa0 781 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
782
783 if (rt) {
14deae41
DM
784 int attempts = !in_softirq();
785
38308473 786 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 787 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 788 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 789 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 790 rt->rt6i_gateway = *daddr;
58c4fb86 791 }
1da177e4 792
1da177e4 793 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
794
795#ifdef CONFIG_IPV6_SUBTREES
796 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 797 rt->rt6i_src.addr = *saddr;
1da177e4
LT
798 rt->rt6i_src.plen = 128;
799 }
800#endif
801
14deae41 802 retry:
8ade06c6 803 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 804 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
805 int saved_rt_min_interval =
806 net->ipv6.sysctl.ip6_rt_gc_min_interval;
807 int saved_rt_elasticity =
808 net->ipv6.sysctl.ip6_rt_gc_elasticity;
809
810 if (attempts-- > 0) {
811 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
812 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
813
86393e52 814 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
815
816 net->ipv6.sysctl.ip6_rt_gc_elasticity =
817 saved_rt_elasticity;
818 net->ipv6.sysctl.ip6_rt_gc_min_interval =
819 saved_rt_min_interval;
820 goto retry;
821 }
822
f3213831 823 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 824 dst_free(&rt->dst);
14deae41
DM
825 return NULL;
826 }
95a9a5ba 827 }
1da177e4 828
95a9a5ba
YH
829 return rt;
830}
1da177e4 831
21efcfa0
ED
832static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
833 const struct in6_addr *daddr)
299d9939 834{
21efcfa0
ED
835 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
836
299d9939 837 if (rt) {
299d9939 838 rt->rt6i_flags |= RTF_CACHE;
97cac082 839 rt->n = neigh_clone(ort->n);
299d9939
YH
840 }
841 return rt;
842}
843
8ed67789 844static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 845 struct flowi6 *fl6, int flags)
1da177e4
LT
846{
847 struct fib6_node *fn;
519fbd87 848 struct rt6_info *rt, *nrt;
c71099ac 849 int strict = 0;
1da177e4 850 int attempts = 3;
519fbd87 851 int err;
53b7997f 852 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 853
77d16f45 854 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
855
856relookup:
c71099ac 857 read_lock_bh(&table->tb6_lock);
1da177e4 858
8238dd06 859restart_2:
4c9483b2 860 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
861
862restart:
4acad72d 863 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 864
4c9483b2 865 BACKTRACK(net, &fl6->saddr);
8ed67789 866 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 867 rt->rt6i_flags & RTF_CACHE)
1ddef044 868 goto out;
1da177e4 869
d8d1f30b 870 dst_hold(&rt->dst);
c71099ac 871 read_unlock_bh(&table->tb6_lock);
fb9de91e 872
97cac082 873 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 874 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 875 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 876 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
877 else
878 goto out2;
e40cf353 879
d8d1f30b 880 dst_release(&rt->dst);
8ed67789 881 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 882
d8d1f30b 883 dst_hold(&rt->dst);
519fbd87 884 if (nrt) {
40e22e8f 885 err = ip6_ins_rt(nrt);
519fbd87 886 if (!err)
1da177e4 887 goto out2;
1da177e4 888 }
1da177e4 889
519fbd87
YH
890 if (--attempts <= 0)
891 goto out2;
892
893 /*
c71099ac 894 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
895 * released someone could insert this route. Relookup.
896 */
d8d1f30b 897 dst_release(&rt->dst);
519fbd87
YH
898 goto relookup;
899
900out:
8238dd06
YH
901 if (reachable) {
902 reachable = 0;
903 goto restart_2;
904 }
d8d1f30b 905 dst_hold(&rt->dst);
c71099ac 906 read_unlock_bh(&table->tb6_lock);
1da177e4 907out2:
d8d1f30b
CG
908 rt->dst.lastuse = jiffies;
909 rt->dst.__use++;
c71099ac
TG
910
911 return rt;
1da177e4
LT
912}
913
8ed67789 914static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 915 struct flowi6 *fl6, int flags)
4acad72d 916{
4c9483b2 917 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
918}
919
72331bc0
SL
920static struct dst_entry *ip6_route_input_lookup(struct net *net,
921 struct net_device *dev,
922 struct flowi6 *fl6, int flags)
923{
924 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
925 flags |= RT6_LOOKUP_F_IFACE;
926
927 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
928}
929
c71099ac
TG
930void ip6_route_input(struct sk_buff *skb)
931{
b71d1d42 932 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 933 struct net *net = dev_net(skb->dev);
adaa70bb 934 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
935 struct flowi6 fl6 = {
936 .flowi6_iif = skb->dev->ifindex,
937 .daddr = iph->daddr,
938 .saddr = iph->saddr,
38308473 939 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
940 .flowi6_mark = skb->mark,
941 .flowi6_proto = iph->nexthdr,
c71099ac 942 };
adaa70bb 943
72331bc0 944 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
945}
946
8ed67789 947static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 948 struct flowi6 *fl6, int flags)
1da177e4 949{
4c9483b2 950 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
951}
952
9c7a4f9c 953struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 954 struct flowi6 *fl6)
c71099ac
TG
955{
956 int flags = 0;
957
4dc27d1c
DM
958 fl6->flowi6_iif = net->loopback_dev->ifindex;
959
4c9483b2 960 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 961 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 962
4c9483b2 963 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 964 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
965 else if (sk)
966 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 967
4c9483b2 968 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
969}
970
7159039a 971EXPORT_SYMBOL(ip6_route_output);
1da177e4 972
2774c131 973struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 974{
5c1e6aa3 975 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
976 struct dst_entry *new = NULL;
977
5c1e6aa3 978 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 979 if (rt) {
cf911662 980 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 981 rt6_init_peer(rt, net->ipv6.peers);
cf911662 982
d8d1f30b 983 new = &rt->dst;
14e50e57 984
14e50e57 985 new->__use = 1;
352e512c
HX
986 new->input = dst_discard;
987 new->output = dst_discard;
14e50e57 988
21efcfa0
ED
989 if (dst_metrics_read_only(&ort->dst))
990 new->_metrics = ort->dst._metrics;
991 else
992 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
993 rt->rt6i_idev = ort->rt6i_idev;
994 if (rt->rt6i_idev)
995 in6_dev_hold(rt->rt6i_idev);
14e50e57 996
4e3fd7a0 997 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
998 rt->rt6i_flags = ort->rt6i_flags;
999 rt6_clean_expires(rt);
14e50e57
DM
1000 rt->rt6i_metric = 0;
1001
1002 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1003#ifdef CONFIG_IPV6_SUBTREES
1004 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1005#endif
1006
1007 dst_free(new);
1008 }
1009
69ead7af
DM
1010 dst_release(dst_orig);
1011 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1012}
14e50e57 1013
1da177e4
LT
1014/*
1015 * Destination cache support functions
1016 */
1017
1018static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1019{
1020 struct rt6_info *rt;
1021
1022 rt = (struct rt6_info *) dst;
1023
6431cbc2
DM
1024 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1025 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1026 if (!rt6_has_peer(rt))
6431cbc2
DM
1027 rt6_bind_peer(rt, 0);
1028 rt->rt6i_peer_genid = rt6_peer_genid();
1029 }
1da177e4 1030 return dst;
6431cbc2 1031 }
1da177e4
LT
1032 return NULL;
1033}
1034
1035static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1036{
1037 struct rt6_info *rt = (struct rt6_info *) dst;
1038
1039 if (rt) {
54c1a859
YH
1040 if (rt->rt6i_flags & RTF_CACHE) {
1041 if (rt6_check_expired(rt)) {
1042 ip6_del_rt(rt);
1043 dst = NULL;
1044 }
1045 } else {
1da177e4 1046 dst_release(dst);
54c1a859
YH
1047 dst = NULL;
1048 }
1da177e4 1049 }
54c1a859 1050 return dst;
1da177e4
LT
1051}
1052
1053static void ip6_link_failure(struct sk_buff *skb)
1054{
1055 struct rt6_info *rt;
1056
3ffe533c 1057 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1058
adf30907 1059 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1060 if (rt) {
1716a961
G
1061 if (rt->rt6i_flags & RTF_CACHE)
1062 rt6_update_expires(rt, 0);
1063 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1064 rt->rt6i_node->fn_sernum = -1;
1065 }
1066}
1067
1068static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1069{
1070 struct rt6_info *rt6 = (struct rt6_info*)dst;
1071
81aded24 1072 dst_confirm(dst);
1da177e4 1073 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1074 struct net *net = dev_net(dst->dev);
1075
1da177e4
LT
1076 rt6->rt6i_flags |= RTF_MODIFIED;
1077 if (mtu < IPV6_MIN_MTU) {
defb3519 1078 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1079 mtu = IPV6_MIN_MTU;
defb3519
DM
1080 features |= RTAX_FEATURE_ALLFRAG;
1081 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1082 }
defb3519 1083 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1084 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1085 }
1086}
1087
42ae66c8
DM
1088void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1089 int oif, u32 mark)
81aded24
DM
1090{
1091 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1092 struct dst_entry *dst;
1093 struct flowi6 fl6;
1094
1095 memset(&fl6, 0, sizeof(fl6));
1096 fl6.flowi6_oif = oif;
1097 fl6.flowi6_mark = mark;
3e12939a 1098 fl6.flowi6_flags = 0;
81aded24
DM
1099 fl6.daddr = iph->daddr;
1100 fl6.saddr = iph->saddr;
1101 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1102
1103 dst = ip6_route_output(net, NULL, &fl6);
1104 if (!dst->error)
1105 ip6_rt_update_pmtu(dst, ntohl(mtu));
1106 dst_release(dst);
1107}
1108EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1109
1110void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1111{
1112 ip6_update_pmtu(skb, sock_net(sk), mtu,
1113 sk->sk_bound_dev_if, sk->sk_mark);
1114}
1115EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1116
0dbaee3b 1117static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1118{
0dbaee3b
DM
1119 struct net_device *dev = dst->dev;
1120 unsigned int mtu = dst_mtu(dst);
1121 struct net *net = dev_net(dev);
1122
1da177e4
LT
1123 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1124
5578689a
DL
1125 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1126 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1127
1128 /*
1ab1457c
YH
1129 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1130 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1131 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1132 * rely only on pmtu discovery"
1133 */
1134 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1135 mtu = IPV6_MAXPLEN;
1136 return mtu;
1137}
1138
ebb762f2 1139static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1140{
d33e4553 1141 struct inet6_dev *idev;
618f9bc7
SK
1142 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1143
1144 if (mtu)
1145 return mtu;
1146
1147 mtu = IPV6_MIN_MTU;
d33e4553
DM
1148
1149 rcu_read_lock();
1150 idev = __in6_dev_get(dst->dev);
1151 if (idev)
1152 mtu = idev->cnf.mtu6;
1153 rcu_read_unlock();
1154
1155 return mtu;
1156}
1157
3b00944c
YH
1158static struct dst_entry *icmp6_dst_gc_list;
1159static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1160
3b00944c 1161struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1162 struct neighbour *neigh,
87a11578 1163 struct flowi6 *fl6)
1da177e4 1164{
87a11578 1165 struct dst_entry *dst;
1da177e4
LT
1166 struct rt6_info *rt;
1167 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1168 struct net *net = dev_net(dev);
1da177e4 1169
38308473 1170 if (unlikely(!idev))
122bdf67 1171 return ERR_PTR(-ENODEV);
1da177e4 1172
8b96d22d 1173 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1174 if (unlikely(!rt)) {
1da177e4 1175 in6_dev_put(idev);
87a11578 1176 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1177 goto out;
1178 }
1179
1da177e4
LT
1180 if (neigh)
1181 neigh_hold(neigh);
14deae41 1182 else {
f894cbf8 1183 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1184 if (IS_ERR(neigh)) {
252c3d84 1185 in6_dev_put(idev);
b43faac6
DM
1186 dst_free(&rt->dst);
1187 return ERR_CAST(neigh);
1188 }
14deae41 1189 }
1da177e4 1190
8e2ec639
YZ
1191 rt->dst.flags |= DST_HOST;
1192 rt->dst.output = ip6_output;
97cac082 1193 rt->n = neigh;
d8d1f30b 1194 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1195 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1196 rt->rt6i_dst.plen = 128;
1197 rt->rt6i_idev = idev;
7011687f 1198 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1199
3b00944c 1200 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1201 rt->dst.next = icmp6_dst_gc_list;
1202 icmp6_dst_gc_list = &rt->dst;
3b00944c 1203 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1204
5578689a 1205 fib6_force_start_gc(net);
1da177e4 1206
87a11578
DM
1207 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1208
1da177e4 1209out:
87a11578 1210 return dst;
1da177e4
LT
1211}
1212
3d0f24a7 1213int icmp6_dst_gc(void)
1da177e4 1214{
e9476e95 1215 struct dst_entry *dst, **pprev;
3d0f24a7 1216 int more = 0;
1da177e4 1217
3b00944c
YH
1218 spin_lock_bh(&icmp6_dst_lock);
1219 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1220
1da177e4
LT
1221 while ((dst = *pprev) != NULL) {
1222 if (!atomic_read(&dst->__refcnt)) {
1223 *pprev = dst->next;
1224 dst_free(dst);
1da177e4
LT
1225 } else {
1226 pprev = &dst->next;
3d0f24a7 1227 ++more;
1da177e4
LT
1228 }
1229 }
1230
3b00944c 1231 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1232
3d0f24a7 1233 return more;
1da177e4
LT
1234}
1235
1e493d19
DM
1236static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1237 void *arg)
1238{
1239 struct dst_entry *dst, **pprev;
1240
1241 spin_lock_bh(&icmp6_dst_lock);
1242 pprev = &icmp6_dst_gc_list;
1243 while ((dst = *pprev) != NULL) {
1244 struct rt6_info *rt = (struct rt6_info *) dst;
1245 if (func(rt, arg)) {
1246 *pprev = dst->next;
1247 dst_free(dst);
1248 } else {
1249 pprev = &dst->next;
1250 }
1251 }
1252 spin_unlock_bh(&icmp6_dst_lock);
1253}
1254
569d3645 1255static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1256{
1da177e4 1257 unsigned long now = jiffies;
86393e52 1258 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1259 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1260 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1261 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1262 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1263 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1264 int entries;
7019b78e 1265
fc66f95c 1266 entries = dst_entries_get_fast(ops);
7019b78e 1267 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1268 entries <= rt_max_size)
1da177e4
LT
1269 goto out;
1270
6891a346
BT
1271 net->ipv6.ip6_rt_gc_expire++;
1272 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1273 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1274 entries = dst_entries_get_slow(ops);
1275 if (entries < ops->gc_thresh)
7019b78e 1276 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1277out:
7019b78e 1278 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1279 return entries > rt_max_size;
1da177e4
LT
1280}
1281
1282/* Clean host part of a prefix. Not necessary in radix tree,
1283 but results in cleaner routing tables.
1284
1285 Remove it only when all the things will work!
1286 */
1287
6b75d090 1288int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1289{
5170ae82 1290 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1291 if (hoplimit == 0) {
6b75d090 1292 struct net_device *dev = dst->dev;
c68f24cc
ED
1293 struct inet6_dev *idev;
1294
1295 rcu_read_lock();
1296 idev = __in6_dev_get(dev);
1297 if (idev)
6b75d090 1298 hoplimit = idev->cnf.hop_limit;
c68f24cc 1299 else
53b7997f 1300 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1301 rcu_read_unlock();
1da177e4
LT
1302 }
1303 return hoplimit;
1304}
abbf46ae 1305EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1306
1307/*
1308 *
1309 */
1310
86872cb5 1311int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1312{
1313 int err;
5578689a 1314 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1315 struct rt6_info *rt = NULL;
1316 struct net_device *dev = NULL;
1317 struct inet6_dev *idev = NULL;
c71099ac 1318 struct fib6_table *table;
1da177e4
LT
1319 int addr_type;
1320
86872cb5 1321 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1322 return -EINVAL;
1323#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1324 if (cfg->fc_src_len)
1da177e4
LT
1325 return -EINVAL;
1326#endif
86872cb5 1327 if (cfg->fc_ifindex) {
1da177e4 1328 err = -ENODEV;
5578689a 1329 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1330 if (!dev)
1331 goto out;
1332 idev = in6_dev_get(dev);
1333 if (!idev)
1334 goto out;
1335 }
1336
86872cb5
TG
1337 if (cfg->fc_metric == 0)
1338 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1339
d71314b4 1340 err = -ENOBUFS;
38308473
DM
1341 if (cfg->fc_nlinfo.nlh &&
1342 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1343 table = fib6_get_table(net, cfg->fc_table);
38308473 1344 if (!table) {
f3213831 1345 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1346 table = fib6_new_table(net, cfg->fc_table);
1347 }
1348 } else {
1349 table = fib6_new_table(net, cfg->fc_table);
1350 }
38308473
DM
1351
1352 if (!table)
c71099ac 1353 goto out;
c71099ac 1354
8b96d22d 1355 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1356
38308473 1357 if (!rt) {
1da177e4
LT
1358 err = -ENOMEM;
1359 goto out;
1360 }
1361
d8d1f30b 1362 rt->dst.obsolete = -1;
1716a961
G
1363
1364 if (cfg->fc_flags & RTF_EXPIRES)
1365 rt6_set_expires(rt, jiffies +
1366 clock_t_to_jiffies(cfg->fc_expires));
1367 else
1368 rt6_clean_expires(rt);
1da177e4 1369
86872cb5
TG
1370 if (cfg->fc_protocol == RTPROT_UNSPEC)
1371 cfg->fc_protocol = RTPROT_BOOT;
1372 rt->rt6i_protocol = cfg->fc_protocol;
1373
1374 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1375
1376 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1377 rt->dst.input = ip6_mc_input;
ab79ad14
1378 else if (cfg->fc_flags & RTF_LOCAL)
1379 rt->dst.input = ip6_input;
1da177e4 1380 else
d8d1f30b 1381 rt->dst.input = ip6_forward;
1da177e4 1382
d8d1f30b 1383 rt->dst.output = ip6_output;
1da177e4 1384
86872cb5
TG
1385 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1386 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1387 if (rt->rt6i_dst.plen == 128)
11d53b49 1388 rt->dst.flags |= DST_HOST;
1da177e4 1389
8e2ec639
YZ
1390 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1391 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1392 if (!metrics) {
1393 err = -ENOMEM;
1394 goto out;
1395 }
1396 dst_init_metrics(&rt->dst, metrics, 0);
1397 }
1da177e4 1398#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1399 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1400 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1401#endif
1402
86872cb5 1403 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1404
1405 /* We cannot add true routes via loopback here,
1406 they would result in kernel looping; promote them to reject routes
1407 */
86872cb5 1408 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1409 (dev && (dev->flags & IFF_LOOPBACK) &&
1410 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1411 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1412 /* hold loopback dev/idev if we haven't done so. */
5578689a 1413 if (dev != net->loopback_dev) {
1da177e4
LT
1414 if (dev) {
1415 dev_put(dev);
1416 in6_dev_put(idev);
1417 }
5578689a 1418 dev = net->loopback_dev;
1da177e4
LT
1419 dev_hold(dev);
1420 idev = in6_dev_get(dev);
1421 if (!idev) {
1422 err = -ENODEV;
1423 goto out;
1424 }
1425 }
d8d1f30b
CG
1426 rt->dst.output = ip6_pkt_discard_out;
1427 rt->dst.input = ip6_pkt_discard;
1428 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1429 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1430 goto install_route;
1431 }
1432
86872cb5 1433 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1434 const struct in6_addr *gw_addr;
1da177e4
LT
1435 int gwa_type;
1436
86872cb5 1437 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1438 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1439 gwa_type = ipv6_addr_type(gw_addr);
1440
1441 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1442 struct rt6_info *grt;
1443
1444 /* IPv6 strictly inhibits using not link-local
1445 addresses as nexthop address.
1446 Otherwise, router will not able to send redirects.
1447 It is very good, but in some (rare!) circumstances
1448 (SIT, PtP, NBMA NOARP links) it is handy to allow
1449 some exceptions. --ANK
1450 */
1451 err = -EINVAL;
38308473 1452 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1453 goto out;
1454
5578689a 1455 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1456
1457 err = -EHOSTUNREACH;
38308473 1458 if (!grt)
1da177e4
LT
1459 goto out;
1460 if (dev) {
d1918542 1461 if (dev != grt->dst.dev) {
d8d1f30b 1462 dst_release(&grt->dst);
1da177e4
LT
1463 goto out;
1464 }
1465 } else {
d1918542 1466 dev = grt->dst.dev;
1da177e4
LT
1467 idev = grt->rt6i_idev;
1468 dev_hold(dev);
1469 in6_dev_hold(grt->rt6i_idev);
1470 }
38308473 1471 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1472 err = 0;
d8d1f30b 1473 dst_release(&grt->dst);
1da177e4
LT
1474
1475 if (err)
1476 goto out;
1477 }
1478 err = -EINVAL;
38308473 1479 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1480 goto out;
1481 }
1482
1483 err = -ENODEV;
38308473 1484 if (!dev)
1da177e4
LT
1485 goto out;
1486
c3968a85
DW
1487 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1488 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1489 err = -EINVAL;
1490 goto out;
1491 }
4e3fd7a0 1492 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1493 rt->rt6i_prefsrc.plen = 128;
1494 } else
1495 rt->rt6i_prefsrc.plen = 0;
1496
86872cb5 1497 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1498 err = rt6_bind_neighbour(rt, dev);
f83c7790 1499 if (err)
1da177e4 1500 goto out;
1da177e4
LT
1501 }
1502
86872cb5 1503 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1504
1505install_route:
86872cb5
TG
1506 if (cfg->fc_mx) {
1507 struct nlattr *nla;
1508 int remaining;
1509
1510 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1511 int type = nla_type(nla);
86872cb5
TG
1512
1513 if (type) {
1514 if (type > RTAX_MAX) {
1da177e4
LT
1515 err = -EINVAL;
1516 goto out;
1517 }
86872cb5 1518
defb3519 1519 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1520 }
1da177e4
LT
1521 }
1522 }
1523
d8d1f30b 1524 rt->dst.dev = dev;
1da177e4 1525 rt->rt6i_idev = idev;
c71099ac 1526 rt->rt6i_table = table;
63152fc0 1527
c346dca1 1528 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1529
86872cb5 1530 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1531
1532out:
1533 if (dev)
1534 dev_put(dev);
1535 if (idev)
1536 in6_dev_put(idev);
1537 if (rt)
d8d1f30b 1538 dst_free(&rt->dst);
1da177e4
LT
1539 return err;
1540}
1541
86872cb5 1542static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1543{
1544 int err;
c71099ac 1545 struct fib6_table *table;
d1918542 1546 struct net *net = dev_net(rt->dst.dev);
1da177e4 1547
8ed67789 1548 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1549 return -ENOENT;
1550
c71099ac
TG
1551 table = rt->rt6i_table;
1552 write_lock_bh(&table->tb6_lock);
1da177e4 1553
86872cb5 1554 err = fib6_del(rt, info);
d8d1f30b 1555 dst_release(&rt->dst);
1da177e4 1556
c71099ac 1557 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1558
1559 return err;
1560}
1561
e0a1ad73
TG
1562int ip6_del_rt(struct rt6_info *rt)
1563{
4d1169c1 1564 struct nl_info info = {
d1918542 1565 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1566 };
528c4ceb 1567 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1568}
1569
86872cb5 1570static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1571{
c71099ac 1572 struct fib6_table *table;
1da177e4
LT
1573 struct fib6_node *fn;
1574 struct rt6_info *rt;
1575 int err = -ESRCH;
1576
5578689a 1577 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1578 if (!table)
c71099ac
TG
1579 return err;
1580
1581 read_lock_bh(&table->tb6_lock);
1da177e4 1582
c71099ac 1583 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1584 &cfg->fc_dst, cfg->fc_dst_len,
1585 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1586
1da177e4 1587 if (fn) {
d8d1f30b 1588 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1589 if (cfg->fc_ifindex &&
d1918542
DM
1590 (!rt->dst.dev ||
1591 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1592 continue;
86872cb5
TG
1593 if (cfg->fc_flags & RTF_GATEWAY &&
1594 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1595 continue;
86872cb5 1596 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1597 continue;
d8d1f30b 1598 dst_hold(&rt->dst);
c71099ac 1599 read_unlock_bh(&table->tb6_lock);
1da177e4 1600
86872cb5 1601 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1602 }
1603 }
c71099ac 1604 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1605
1606 return err;
1607}
1608
1609/*
1610 * Handle redirects
1611 */
a6279458 1612struct ip6rd_flowi {
4c9483b2 1613 struct flowi6 fl6;
a6279458
YH
1614 struct in6_addr gateway;
1615};
1616
8ed67789
DL
1617static struct rt6_info *__ip6_route_redirect(struct net *net,
1618 struct fib6_table *table,
4c9483b2 1619 struct flowi6 *fl6,
a6279458 1620 int flags)
1da177e4 1621{
4c9483b2 1622 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1623 struct rt6_info *rt;
e843b9e1 1624 struct fib6_node *fn;
c71099ac 1625
1da177e4 1626 /*
e843b9e1
YH
1627 * Get the "current" route for this destination and
1628 * check if the redirect has come from approriate router.
1629 *
1630 * RFC 2461 specifies that redirects should only be
1631 * accepted if they come from the nexthop to the target.
1632 * Due to the way the routes are chosen, this notion
1633 * is a bit fuzzy and one might need to check all possible
1634 * routes.
1da177e4 1635 */
1da177e4 1636
c71099ac 1637 read_lock_bh(&table->tb6_lock);
4c9483b2 1638 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1639restart:
d8d1f30b 1640 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1641 /*
1642 * Current route is on-link; redirect is always invalid.
1643 *
1644 * Seems, previous statement is not true. It could
1645 * be node, which looks for us as on-link (f.e. proxy ndisc)
1646 * But then router serving it might decide, that we should
1647 * know truth 8)8) --ANK (980726).
1648 */
1649 if (rt6_check_expired(rt))
1650 continue;
1651 if (!(rt->rt6i_flags & RTF_GATEWAY))
1652 continue;
d1918542 1653 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1654 continue;
a6279458 1655 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1656 continue;
1657 break;
1658 }
a6279458 1659
cb15d9c2 1660 if (!rt)
8ed67789 1661 rt = net->ipv6.ip6_null_entry;
4c9483b2 1662 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1663out:
d8d1f30b 1664 dst_hold(&rt->dst);
a6279458 1665
c71099ac 1666 read_unlock_bh(&table->tb6_lock);
e843b9e1 1667
a6279458
YH
1668 return rt;
1669};
1670
b71d1d42
ED
1671static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1672 const struct in6_addr *src,
1673 const struct in6_addr *gateway,
a6279458
YH
1674 struct net_device *dev)
1675{
adaa70bb 1676 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1677 struct net *net = dev_net(dev);
a6279458 1678 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1679 .fl6 = {
1680 .flowi6_oif = dev->ifindex,
1681 .daddr = *dest,
1682 .saddr = *src,
a6279458 1683 },
a6279458 1684 };
adaa70bb 1685
4e3fd7a0 1686 rdfl.gateway = *gateway;
86c36ce4 1687
adaa70bb
TG
1688 if (rt6_need_strict(dest))
1689 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1690
4c9483b2 1691 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1692 flags, __ip6_route_redirect);
a6279458
YH
1693}
1694
6e157b6a 1695static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
a6279458 1696{
e8599ff4 1697 struct net *net = dev_net(skb->dev);
a6279458 1698 struct netevent_redirect netevent;
e8599ff4
DM
1699 struct rt6_info *rt, *nrt = NULL;
1700 const struct in6_addr *target;
e8599ff4 1701 struct ndisc_options ndopts;
6e157b6a
DM
1702 const struct in6_addr *dest;
1703 struct neighbour *old_neigh;
e8599ff4
DM
1704 struct inet6_dev *in6_dev;
1705 struct neighbour *neigh;
1706 struct icmp6hdr *icmph;
6e157b6a
DM
1707 int optlen, on_link;
1708 u8 *lladdr;
e8599ff4
DM
1709
1710 optlen = skb->tail - skb->transport_header;
1711 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1712
1713 if (optlen < 0) {
6e157b6a 1714 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1715 return;
1716 }
1717
1718 icmph = icmp6_hdr(skb);
1719 target = (const struct in6_addr *) (icmph + 1);
1720 dest = target + 1;
1721
1722 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1723 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1724 return;
1725 }
1726
6e157b6a 1727 on_link = 0;
e8599ff4
DM
1728 if (ipv6_addr_equal(dest, target)) {
1729 on_link = 1;
1730 } else if (ipv6_addr_type(target) !=
1731 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1732 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1733 return;
1734 }
1735
1736 in6_dev = __in6_dev_get(skb->dev);
1737 if (!in6_dev)
1738 return;
1739 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1740 return;
1741
1742 /* RFC2461 8.1:
1743 * The IP source address of the Redirect MUST be the same as the current
1744 * first-hop router for the specified ICMP Destination Address.
1745 */
1746
1747 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1748 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1749 return;
1750 }
6e157b6a
DM
1751
1752 lladdr = NULL;
e8599ff4
DM
1753 if (ndopts.nd_opts_tgt_lladdr) {
1754 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1755 skb->dev);
1756 if (!lladdr) {
1757 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1758 return;
1759 }
1760 }
1761
6e157b6a
DM
1762 rt = (struct rt6_info *) dst;
1763 if (rt == net->ipv6.ip6_null_entry) {
1764 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1765 return;
6e157b6a 1766 }
e8599ff4 1767
6e157b6a
DM
1768 /* Redirect received -> path was valid.
1769 * Look, redirects are sent only in response to data packets,
1770 * so that this nexthop apparently is reachable. --ANK
1771 */
1772 dst_confirm(&rt->dst);
a6279458 1773
6e157b6a
DM
1774 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1775 if (!neigh)
1776 return;
a6279458 1777
6e157b6a
DM
1778 /* Duplicate redirect: silently ignore. */
1779 old_neigh = rt->n;
1780 if (neigh == old_neigh)
a6279458 1781 goto out;
1da177e4 1782
1da177e4
LT
1783 /*
1784 * We have finally decided to accept it.
1785 */
1786
1ab1457c 1787 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1788 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1789 NEIGH_UPDATE_F_OVERRIDE|
1790 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1791 NEIGH_UPDATE_F_ISROUTER))
1792 );
1793
21efcfa0 1794 nrt = ip6_rt_copy(rt, dest);
38308473 1795 if (!nrt)
1da177e4
LT
1796 goto out;
1797
1798 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1799 if (on_link)
1800 nrt->rt6i_flags &= ~RTF_GATEWAY;
1801
4e3fd7a0 1802 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1803 nrt->n = neigh_clone(neigh);
1da177e4 1804
40e22e8f 1805 if (ip6_ins_rt(nrt))
1da177e4
LT
1806 goto out;
1807
d8d1f30b 1808 netevent.old = &rt->dst;
1d248b1c 1809 netevent.old_neigh = old_neigh;
d8d1f30b 1810 netevent.new = &nrt->dst;
1d248b1c
DM
1811 netevent.new_neigh = neigh;
1812 netevent.daddr = dest;
8d71740c
TT
1813 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1814
38308473 1815 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1816 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1817 ip6_del_rt(rt);
1da177e4
LT
1818 }
1819
1820out:
e8599ff4 1821 neigh_release(neigh);
6e157b6a
DM
1822}
1823
1824void rt6_redirect(struct sk_buff *skb)
1825{
1826 const struct in6_addr *target;
1827 const struct in6_addr *dest;
1828 const struct in6_addr *src;
1829 const struct in6_addr *saddr;
1830 struct icmp6hdr *icmph;
1831 struct rt6_info *rt;
1832
1833 icmph = icmp6_hdr(skb);
1834 target = (const struct in6_addr *) (icmph + 1);
1835 dest = target + 1;
1836
1837 src = &ipv6_hdr(skb)->daddr;
1838 saddr = &ipv6_hdr(skb)->saddr;
1839
1840 rt = ip6_route_redirect(dest, src, saddr, skb->dev);
1841 rt6_do_redirect(&rt->dst, skb);
d8d1f30b 1842 dst_release(&rt->dst);
1da177e4
LT
1843}
1844
1da177e4
LT
1845/*
1846 * Misc support functions
1847 */
1848
1716a961 1849static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1850 const struct in6_addr *dest)
1da177e4 1851{
d1918542 1852 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1853 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1854 ort->rt6i_table);
1da177e4
LT
1855
1856 if (rt) {
d8d1f30b
CG
1857 rt->dst.input = ort->dst.input;
1858 rt->dst.output = ort->dst.output;
8e2ec639 1859 rt->dst.flags |= DST_HOST;
d8d1f30b 1860
4e3fd7a0 1861 rt->rt6i_dst.addr = *dest;
8e2ec639 1862 rt->rt6i_dst.plen = 128;
defb3519 1863 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1864 rt->dst.error = ort->dst.error;
1da177e4
LT
1865 rt->rt6i_idev = ort->rt6i_idev;
1866 if (rt->rt6i_idev)
1867 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1868 rt->dst.lastuse = jiffies;
1da177e4 1869
4e3fd7a0 1870 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1871 rt->rt6i_flags = ort->rt6i_flags;
1872 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1873 (RTF_DEFAULT | RTF_ADDRCONF))
1874 rt6_set_from(rt, ort);
1875 else
1876 rt6_clean_expires(rt);
1da177e4
LT
1877 rt->rt6i_metric = 0;
1878
1da177e4
LT
1879#ifdef CONFIG_IPV6_SUBTREES
1880 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1881#endif
0f6c6392 1882 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1883 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1884 }
1885 return rt;
1886}
1887
70ceb4f5 1888#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1889static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1890 const struct in6_addr *prefix, int prefixlen,
1891 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1892{
1893 struct fib6_node *fn;
1894 struct rt6_info *rt = NULL;
c71099ac
TG
1895 struct fib6_table *table;
1896
efa2cea0 1897 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1898 if (!table)
c71099ac 1899 return NULL;
70ceb4f5 1900
c71099ac
TG
1901 write_lock_bh(&table->tb6_lock);
1902 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1903 if (!fn)
1904 goto out;
1905
d8d1f30b 1906 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1907 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1908 continue;
1909 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1910 continue;
1911 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1912 continue;
d8d1f30b 1913 dst_hold(&rt->dst);
70ceb4f5
YH
1914 break;
1915 }
1916out:
c71099ac 1917 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1918 return rt;
1919}
1920
efa2cea0 1921static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1922 const struct in6_addr *prefix, int prefixlen,
1923 const struct in6_addr *gwaddr, int ifindex,
95c96174 1924 unsigned int pref)
70ceb4f5 1925{
86872cb5
TG
1926 struct fib6_config cfg = {
1927 .fc_table = RT6_TABLE_INFO,
238fc7ea 1928 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1929 .fc_ifindex = ifindex,
1930 .fc_dst_len = prefixlen,
1931 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1932 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1933 .fc_nlinfo.pid = 0,
1934 .fc_nlinfo.nlh = NULL,
1935 .fc_nlinfo.nl_net = net,
86872cb5
TG
1936 };
1937
4e3fd7a0
AD
1938 cfg.fc_dst = *prefix;
1939 cfg.fc_gateway = *gwaddr;
70ceb4f5 1940
e317da96
YH
1941 /* We should treat it as a default route if prefix length is 0. */
1942 if (!prefixlen)
86872cb5 1943 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1944
86872cb5 1945 ip6_route_add(&cfg);
70ceb4f5 1946
efa2cea0 1947 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1948}
1949#endif
1950
b71d1d42 1951struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1952{
1da177e4 1953 struct rt6_info *rt;
c71099ac 1954 struct fib6_table *table;
1da177e4 1955
c346dca1 1956 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1957 if (!table)
c71099ac 1958 return NULL;
1da177e4 1959
c71099ac 1960 write_lock_bh(&table->tb6_lock);
d8d1f30b 1961 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1962 if (dev == rt->dst.dev &&
045927ff 1963 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1964 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1965 break;
1966 }
1967 if (rt)
d8d1f30b 1968 dst_hold(&rt->dst);
c71099ac 1969 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1970 return rt;
1971}
1972
b71d1d42 1973struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1974 struct net_device *dev,
1975 unsigned int pref)
1da177e4 1976{
86872cb5
TG
1977 struct fib6_config cfg = {
1978 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1979 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1980 .fc_ifindex = dev->ifindex,
1981 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1982 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1983 .fc_nlinfo.pid = 0,
1984 .fc_nlinfo.nlh = NULL,
c346dca1 1985 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1986 };
1da177e4 1987
4e3fd7a0 1988 cfg.fc_gateway = *gwaddr;
1da177e4 1989
86872cb5 1990 ip6_route_add(&cfg);
1da177e4 1991
1da177e4
LT
1992 return rt6_get_dflt_router(gwaddr, dev);
1993}
1994
7b4da532 1995void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1996{
1997 struct rt6_info *rt;
c71099ac
TG
1998 struct fib6_table *table;
1999
2000 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2001 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2002 if (!table)
c71099ac 2003 return;
1da177e4
LT
2004
2005restart:
c71099ac 2006 read_lock_bh(&table->tb6_lock);
d8d1f30b 2007 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 2008 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 2009 dst_hold(&rt->dst);
c71099ac 2010 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2011 ip6_del_rt(rt);
1da177e4
LT
2012 goto restart;
2013 }
2014 }
c71099ac 2015 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2016}
2017
5578689a
DL
2018static void rtmsg_to_fib6_config(struct net *net,
2019 struct in6_rtmsg *rtmsg,
86872cb5
TG
2020 struct fib6_config *cfg)
2021{
2022 memset(cfg, 0, sizeof(*cfg));
2023
2024 cfg->fc_table = RT6_TABLE_MAIN;
2025 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2026 cfg->fc_metric = rtmsg->rtmsg_metric;
2027 cfg->fc_expires = rtmsg->rtmsg_info;
2028 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2029 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2030 cfg->fc_flags = rtmsg->rtmsg_flags;
2031
5578689a 2032 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2033
4e3fd7a0
AD
2034 cfg->fc_dst = rtmsg->rtmsg_dst;
2035 cfg->fc_src = rtmsg->rtmsg_src;
2036 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2037}
2038
5578689a 2039int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2040{
86872cb5 2041 struct fib6_config cfg;
1da177e4
LT
2042 struct in6_rtmsg rtmsg;
2043 int err;
2044
2045 switch(cmd) {
2046 case SIOCADDRT: /* Add a route */
2047 case SIOCDELRT: /* Delete a route */
2048 if (!capable(CAP_NET_ADMIN))
2049 return -EPERM;
2050 err = copy_from_user(&rtmsg, arg,
2051 sizeof(struct in6_rtmsg));
2052 if (err)
2053 return -EFAULT;
86872cb5 2054
5578689a 2055 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2056
1da177e4
LT
2057 rtnl_lock();
2058 switch (cmd) {
2059 case SIOCADDRT:
86872cb5 2060 err = ip6_route_add(&cfg);
1da177e4
LT
2061 break;
2062 case SIOCDELRT:
86872cb5 2063 err = ip6_route_del(&cfg);
1da177e4
LT
2064 break;
2065 default:
2066 err = -EINVAL;
2067 }
2068 rtnl_unlock();
2069
2070 return err;
3ff50b79 2071 }
1da177e4
LT
2072
2073 return -EINVAL;
2074}
2075
2076/*
2077 * Drop the packet on the floor
2078 */
2079
d5fdd6ba 2080static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2081{
612f09e8 2082 int type;
adf30907 2083 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2084 switch (ipstats_mib_noroutes) {
2085 case IPSTATS_MIB_INNOROUTES:
0660e03f 2086 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2087 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2088 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2089 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2090 break;
2091 }
2092 /* FALLTHROUGH */
2093 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2094 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2095 ipstats_mib_noroutes);
612f09e8
YH
2096 break;
2097 }
3ffe533c 2098 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2099 kfree_skb(skb);
2100 return 0;
2101}
2102
9ce8ade0
TG
2103static int ip6_pkt_discard(struct sk_buff *skb)
2104{
612f09e8 2105 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2106}
2107
20380731 2108static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2109{
adf30907 2110 skb->dev = skb_dst(skb)->dev;
612f09e8 2111 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2112}
2113
6723ab54
DM
2114#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2115
9ce8ade0
TG
2116static int ip6_pkt_prohibit(struct sk_buff *skb)
2117{
612f09e8 2118 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2119}
2120
2121static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2122{
adf30907 2123 skb->dev = skb_dst(skb)->dev;
612f09e8 2124 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2125}
2126
6723ab54
DM
2127#endif
2128
1da177e4
LT
2129/*
2130 * Allocate a dst for local (unicast / anycast) address.
2131 */
2132
2133struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2134 const struct in6_addr *addr,
8f031519 2135 bool anycast)
1da177e4 2136{
c346dca1 2137 struct net *net = dev_net(idev->dev);
8b96d22d 2138 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2139 int err;
1da177e4 2140
38308473 2141 if (!rt) {
f3213831 2142 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2143 return ERR_PTR(-ENOMEM);
40385653 2144 }
1da177e4 2145
1da177e4
LT
2146 in6_dev_hold(idev);
2147
11d53b49 2148 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2149 rt->dst.input = ip6_input;
2150 rt->dst.output = ip6_output;
1da177e4 2151 rt->rt6i_idev = idev;
d8d1f30b 2152 rt->dst.obsolete = -1;
1da177e4
LT
2153
2154 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2155 if (anycast)
2156 rt->rt6i_flags |= RTF_ANYCAST;
2157 else
1da177e4 2158 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2159 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2160 if (err) {
d8d1f30b 2161 dst_free(&rt->dst);
f83c7790 2162 return ERR_PTR(err);
1da177e4
LT
2163 }
2164
4e3fd7a0 2165 rt->rt6i_dst.addr = *addr;
1da177e4 2166 rt->rt6i_dst.plen = 128;
5578689a 2167 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2168
d8d1f30b 2169 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2170
2171 return rt;
2172}
2173
c3968a85
DW
2174int ip6_route_get_saddr(struct net *net,
2175 struct rt6_info *rt,
b71d1d42 2176 const struct in6_addr *daddr,
c3968a85
DW
2177 unsigned int prefs,
2178 struct in6_addr *saddr)
2179{
2180 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2181 int err = 0;
2182 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2183 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2184 else
2185 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2186 daddr, prefs, saddr);
2187 return err;
2188}
2189
2190/* remove deleted ip from prefsrc entries */
2191struct arg_dev_net_ip {
2192 struct net_device *dev;
2193 struct net *net;
2194 struct in6_addr *addr;
2195};
2196
2197static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2198{
2199 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2200 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2201 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2202
d1918542 2203 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2204 rt != net->ipv6.ip6_null_entry &&
2205 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2206 /* remove prefsrc entry */
2207 rt->rt6i_prefsrc.plen = 0;
2208 }
2209 return 0;
2210}
2211
2212void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2213{
2214 struct net *net = dev_net(ifp->idev->dev);
2215 struct arg_dev_net_ip adni = {
2216 .dev = ifp->idev->dev,
2217 .net = net,
2218 .addr = &ifp->addr,
2219 };
2220 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2221}
2222
8ed67789
DL
2223struct arg_dev_net {
2224 struct net_device *dev;
2225 struct net *net;
2226};
2227
1da177e4
LT
2228static int fib6_ifdown(struct rt6_info *rt, void *arg)
2229{
bc3ef660 2230 const struct arg_dev_net *adn = arg;
2231 const struct net_device *dev = adn->dev;
8ed67789 2232
d1918542 2233 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2234 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2235 return -1;
c159d30c 2236
1da177e4
LT
2237 return 0;
2238}
2239
f3db4851 2240void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2241{
8ed67789
DL
2242 struct arg_dev_net adn = {
2243 .dev = dev,
2244 .net = net,
2245 };
2246
2247 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2248 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2249}
2250
95c96174 2251struct rt6_mtu_change_arg {
1da177e4 2252 struct net_device *dev;
95c96174 2253 unsigned int mtu;
1da177e4
LT
2254};
2255
2256static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2257{
2258 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2259 struct inet6_dev *idev;
2260
2261 /* In IPv6 pmtu discovery is not optional,
2262 so that RTAX_MTU lock cannot disable it.
2263 We still use this lock to block changes
2264 caused by addrconf/ndisc.
2265 */
2266
2267 idev = __in6_dev_get(arg->dev);
38308473 2268 if (!idev)
1da177e4
LT
2269 return 0;
2270
2271 /* For administrative MTU increase, there is no way to discover
2272 IPv6 PMTU increase, so PMTU increase should be updated here.
2273 Since RFC 1981 doesn't include administrative MTU increase
2274 update PMTU increase is a MUST. (i.e. jumbo frame)
2275 */
2276 /*
2277 If new MTU is less than route PMTU, this new MTU will be the
2278 lowest MTU in the path, update the route PMTU to reflect PMTU
2279 decreases; if new MTU is greater than route PMTU, and the
2280 old MTU is the lowest MTU in the path, update the route PMTU
2281 to reflect the increase. In this case if the other nodes' MTU
2282 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2283 PMTU discouvery.
2284 */
d1918542 2285 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2286 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2287 (dst_mtu(&rt->dst) >= arg->mtu ||
2288 (dst_mtu(&rt->dst) < arg->mtu &&
2289 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2290 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2291 }
1da177e4
LT
2292 return 0;
2293}
2294
95c96174 2295void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2296{
c71099ac
TG
2297 struct rt6_mtu_change_arg arg = {
2298 .dev = dev,
2299 .mtu = mtu,
2300 };
1da177e4 2301
c346dca1 2302 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2303}
2304
ef7c79ed 2305static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2306 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2307 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2308 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2309 [RTA_PRIORITY] = { .type = NLA_U32 },
2310 [RTA_METRICS] = { .type = NLA_NESTED },
2311};
2312
2313static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2314 struct fib6_config *cfg)
1da177e4 2315{
86872cb5
TG
2316 struct rtmsg *rtm;
2317 struct nlattr *tb[RTA_MAX+1];
2318 int err;
1da177e4 2319
86872cb5
TG
2320 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2321 if (err < 0)
2322 goto errout;
1da177e4 2323
86872cb5
TG
2324 err = -EINVAL;
2325 rtm = nlmsg_data(nlh);
2326 memset(cfg, 0, sizeof(*cfg));
2327
2328 cfg->fc_table = rtm->rtm_table;
2329 cfg->fc_dst_len = rtm->rtm_dst_len;
2330 cfg->fc_src_len = rtm->rtm_src_len;
2331 cfg->fc_flags = RTF_UP;
2332 cfg->fc_protocol = rtm->rtm_protocol;
2333
2334 if (rtm->rtm_type == RTN_UNREACHABLE)
2335 cfg->fc_flags |= RTF_REJECT;
2336
ab79ad14
2337 if (rtm->rtm_type == RTN_LOCAL)
2338 cfg->fc_flags |= RTF_LOCAL;
2339
86872cb5
TG
2340 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2341 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2342 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2343
2344 if (tb[RTA_GATEWAY]) {
2345 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2346 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2347 }
86872cb5
TG
2348
2349 if (tb[RTA_DST]) {
2350 int plen = (rtm->rtm_dst_len + 7) >> 3;
2351
2352 if (nla_len(tb[RTA_DST]) < plen)
2353 goto errout;
2354
2355 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2356 }
86872cb5
TG
2357
2358 if (tb[RTA_SRC]) {
2359 int plen = (rtm->rtm_src_len + 7) >> 3;
2360
2361 if (nla_len(tb[RTA_SRC]) < plen)
2362 goto errout;
2363
2364 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2365 }
86872cb5 2366
c3968a85
DW
2367 if (tb[RTA_PREFSRC])
2368 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2369
86872cb5
TG
2370 if (tb[RTA_OIF])
2371 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2372
2373 if (tb[RTA_PRIORITY])
2374 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2375
2376 if (tb[RTA_METRICS]) {
2377 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2378 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2379 }
86872cb5
TG
2380
2381 if (tb[RTA_TABLE])
2382 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2383
2384 err = 0;
2385errout:
2386 return err;
1da177e4
LT
2387}
2388
c127ea2c 2389static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2390{
86872cb5
TG
2391 struct fib6_config cfg;
2392 int err;
1da177e4 2393
86872cb5
TG
2394 err = rtm_to_fib6_config(skb, nlh, &cfg);
2395 if (err < 0)
2396 return err;
2397
2398 return ip6_route_del(&cfg);
1da177e4
LT
2399}
2400
c127ea2c 2401static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2402{
86872cb5
TG
2403 struct fib6_config cfg;
2404 int err;
1da177e4 2405
86872cb5
TG
2406 err = rtm_to_fib6_config(skb, nlh, &cfg);
2407 if (err < 0)
2408 return err;
2409
2410 return ip6_route_add(&cfg);
1da177e4
LT
2411}
2412
339bf98f
TG
2413static inline size_t rt6_nlmsg_size(void)
2414{
2415 return NLMSG_ALIGN(sizeof(struct rtmsg))
2416 + nla_total_size(16) /* RTA_SRC */
2417 + nla_total_size(16) /* RTA_DST */
2418 + nla_total_size(16) /* RTA_GATEWAY */
2419 + nla_total_size(16) /* RTA_PREFSRC */
2420 + nla_total_size(4) /* RTA_TABLE */
2421 + nla_total_size(4) /* RTA_IIF */
2422 + nla_total_size(4) /* RTA_OIF */
2423 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2424 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2425 + nla_total_size(sizeof(struct rta_cacheinfo));
2426}
2427
191cd582
BH
2428static int rt6_fill_node(struct net *net,
2429 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2430 struct in6_addr *dst, struct in6_addr *src,
2431 int iif, int type, u32 pid, u32 seq,
7bc570c8 2432 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2433{
2434 struct rtmsg *rtm;
2d7202bf 2435 struct nlmsghdr *nlh;
e3703b3d 2436 long expires;
9e762a4a 2437 u32 table;
f2c31e32 2438 struct neighbour *n;
1da177e4
LT
2439
2440 if (prefix) { /* user wants prefix routes only */
2441 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2442 /* success since this is not a prefix route */
2443 return 1;
2444 }
2445 }
2446
2d7202bf 2447 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2448 if (!nlh)
26932566 2449 return -EMSGSIZE;
2d7202bf
TG
2450
2451 rtm = nlmsg_data(nlh);
1da177e4
LT
2452 rtm->rtm_family = AF_INET6;
2453 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2454 rtm->rtm_src_len = rt->rt6i_src.plen;
2455 rtm->rtm_tos = 0;
c71099ac 2456 if (rt->rt6i_table)
9e762a4a 2457 table = rt->rt6i_table->tb6_id;
c71099ac 2458 else
9e762a4a
PM
2459 table = RT6_TABLE_UNSPEC;
2460 rtm->rtm_table = table;
c78679e8
DM
2461 if (nla_put_u32(skb, RTA_TABLE, table))
2462 goto nla_put_failure;
38308473 2463 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2464 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2465 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2466 rtm->rtm_type = RTN_LOCAL;
d1918542 2467 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2468 rtm->rtm_type = RTN_LOCAL;
2469 else
2470 rtm->rtm_type = RTN_UNICAST;
2471 rtm->rtm_flags = 0;
2472 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2473 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2474 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2475 rtm->rtm_protocol = RTPROT_REDIRECT;
2476 else if (rt->rt6i_flags & RTF_ADDRCONF)
2477 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2478 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2479 rtm->rtm_protocol = RTPROT_RA;
2480
38308473 2481 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2482 rtm->rtm_flags |= RTM_F_CLONED;
2483
2484 if (dst) {
c78679e8
DM
2485 if (nla_put(skb, RTA_DST, 16, dst))
2486 goto nla_put_failure;
1ab1457c 2487 rtm->rtm_dst_len = 128;
1da177e4 2488 } else if (rtm->rtm_dst_len)
c78679e8
DM
2489 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2490 goto nla_put_failure;
1da177e4
LT
2491#ifdef CONFIG_IPV6_SUBTREES
2492 if (src) {
c78679e8
DM
2493 if (nla_put(skb, RTA_SRC, 16, src))
2494 goto nla_put_failure;
1ab1457c 2495 rtm->rtm_src_len = 128;
c78679e8
DM
2496 } else if (rtm->rtm_src_len &&
2497 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2498 goto nla_put_failure;
1da177e4 2499#endif
7bc570c8
YH
2500 if (iif) {
2501#ifdef CONFIG_IPV6_MROUTE
2502 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2503 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2504 if (err <= 0) {
2505 if (!nowait) {
2506 if (err == 0)
2507 return 0;
2508 goto nla_put_failure;
2509 } else {
2510 if (err == -EMSGSIZE)
2511 goto nla_put_failure;
2512 }
2513 }
2514 } else
2515#endif
c78679e8
DM
2516 if (nla_put_u32(skb, RTA_IIF, iif))
2517 goto nla_put_failure;
7bc570c8 2518 } else if (dst) {
1da177e4 2519 struct in6_addr saddr_buf;
c78679e8
DM
2520 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2521 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2522 goto nla_put_failure;
1da177e4 2523 }
2d7202bf 2524
c3968a85
DW
2525 if (rt->rt6i_prefsrc.plen) {
2526 struct in6_addr saddr_buf;
4e3fd7a0 2527 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2528 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2529 goto nla_put_failure;
c3968a85
DW
2530 }
2531
defb3519 2532 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2533 goto nla_put_failure;
2534
f2c31e32 2535 rcu_read_lock();
97cac082 2536 n = rt->n;
94f826b8
ED
2537 if (n) {
2538 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2539 rcu_read_unlock();
2540 goto nla_put_failure;
2541 }
2542 }
f2c31e32 2543 rcu_read_unlock();
2d7202bf 2544
c78679e8
DM
2545 if (rt->dst.dev &&
2546 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2547 goto nla_put_failure;
2548 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2549 goto nla_put_failure;
36e3deae
YH
2550 if (!(rt->rt6i_flags & RTF_EXPIRES))
2551 expires = 0;
d1918542
DM
2552 else if (rt->dst.expires - jiffies < INT_MAX)
2553 expires = rt->dst.expires - jiffies;
36e3deae
YH
2554 else
2555 expires = INT_MAX;
69cdf8f9 2556
87a50699 2557 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2558 goto nla_put_failure;
2d7202bf
TG
2559
2560 return nlmsg_end(skb, nlh);
2561
2562nla_put_failure:
26932566
PM
2563 nlmsg_cancel(skb, nlh);
2564 return -EMSGSIZE;
1da177e4
LT
2565}
2566
1b43af54 2567int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2568{
2569 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2570 int prefix;
2571
2d7202bf
TG
2572 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2573 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2574 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2575 } else
2576 prefix = 0;
2577
191cd582
BH
2578 return rt6_fill_node(arg->net,
2579 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2580 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2581 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2582}
2583
c127ea2c 2584static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2585{
3b1e0a65 2586 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2587 struct nlattr *tb[RTA_MAX+1];
2588 struct rt6_info *rt;
1da177e4 2589 struct sk_buff *skb;
ab364a6f 2590 struct rtmsg *rtm;
4c9483b2 2591 struct flowi6 fl6;
72331bc0 2592 int err, iif = 0, oif = 0;
1da177e4 2593
ab364a6f
TG
2594 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2595 if (err < 0)
2596 goto errout;
1da177e4 2597
ab364a6f 2598 err = -EINVAL;
4c9483b2 2599 memset(&fl6, 0, sizeof(fl6));
1da177e4 2600
ab364a6f
TG
2601 if (tb[RTA_SRC]) {
2602 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2603 goto errout;
2604
4e3fd7a0 2605 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2606 }
2607
2608 if (tb[RTA_DST]) {
2609 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2610 goto errout;
2611
4e3fd7a0 2612 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2613 }
2614
2615 if (tb[RTA_IIF])
2616 iif = nla_get_u32(tb[RTA_IIF]);
2617
2618 if (tb[RTA_OIF])
72331bc0 2619 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2620
2621 if (iif) {
2622 struct net_device *dev;
72331bc0
SL
2623 int flags = 0;
2624
5578689a 2625 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2626 if (!dev) {
2627 err = -ENODEV;
ab364a6f 2628 goto errout;
1da177e4 2629 }
72331bc0
SL
2630
2631 fl6.flowi6_iif = iif;
2632
2633 if (!ipv6_addr_any(&fl6.saddr))
2634 flags |= RT6_LOOKUP_F_HAS_SADDR;
2635
2636 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2637 flags);
2638 } else {
2639 fl6.flowi6_oif = oif;
2640
2641 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2642 }
2643
ab364a6f 2644 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2645 if (!skb) {
2173bff5 2646 dst_release(&rt->dst);
ab364a6f
TG
2647 err = -ENOBUFS;
2648 goto errout;
2649 }
1da177e4 2650
ab364a6f
TG
2651 /* Reserve room for dummy headers, this skb can pass
2652 through good chunk of routing engine.
2653 */
459a98ed 2654 skb_reset_mac_header(skb);
ab364a6f 2655 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2656
d8d1f30b 2657 skb_dst_set(skb, &rt->dst);
1da177e4 2658
4c9483b2 2659 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2660 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2661 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2662 if (err < 0) {
ab364a6f
TG
2663 kfree_skb(skb);
2664 goto errout;
1da177e4
LT
2665 }
2666
5578689a 2667 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2668errout:
1da177e4 2669 return err;
1da177e4
LT
2670}
2671
86872cb5 2672void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2673{
2674 struct sk_buff *skb;
5578689a 2675 struct net *net = info->nl_net;
528c4ceb
DL
2676 u32 seq;
2677 int err;
2678
2679 err = -ENOBUFS;
38308473 2680 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2681
339bf98f 2682 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2683 if (!skb)
21713ebc
TG
2684 goto errout;
2685
191cd582 2686 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2687 event, info->pid, seq, 0, 0, 0);
26932566
PM
2688 if (err < 0) {
2689 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2690 WARN_ON(err == -EMSGSIZE);
2691 kfree_skb(skb);
2692 goto errout;
2693 }
1ce85fe4
PNA
2694 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2695 info->nlh, gfp_any());
2696 return;
21713ebc
TG
2697errout:
2698 if (err < 0)
5578689a 2699 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2700}
2701
8ed67789
DL
2702static int ip6_route_dev_notify(struct notifier_block *this,
2703 unsigned long event, void *data)
2704{
2705 struct net_device *dev = (struct net_device *)data;
c346dca1 2706 struct net *net = dev_net(dev);
8ed67789
DL
2707
2708 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2709 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2710 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2711#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2712 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2713 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2714 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2715 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2716#endif
2717 }
2718
2719 return NOTIFY_OK;
2720}
2721
1da177e4
LT
2722/*
2723 * /proc
2724 */
2725
2726#ifdef CONFIG_PROC_FS
2727
1da177e4
LT
2728struct rt6_proc_arg
2729{
2730 char *buffer;
2731 int offset;
2732 int length;
2733 int skip;
2734 int len;
2735};
2736
2737static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2738{
33120b30 2739 struct seq_file *m = p_arg;
69cce1d1 2740 struct neighbour *n;
1da177e4 2741
4b7a4274 2742 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2743
2744#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2745 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2746#else
33120b30 2747 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2748#endif
f2c31e32 2749 rcu_read_lock();
97cac082 2750 n = rt->n;
69cce1d1
DM
2751 if (n) {
2752 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2753 } else {
33120b30 2754 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2755 }
f2c31e32 2756 rcu_read_unlock();
33120b30 2757 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2758 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2759 rt->dst.__use, rt->rt6i_flags,
d1918542 2760 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2761 return 0;
2762}
2763
33120b30 2764static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2765{
f3db4851 2766 struct net *net = (struct net *)m->private;
32b293a5 2767 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2768 return 0;
2769}
1da177e4 2770
33120b30
AD
2771static int ipv6_route_open(struct inode *inode, struct file *file)
2772{
de05c557 2773 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2774}
2775
33120b30
AD
2776static const struct file_operations ipv6_route_proc_fops = {
2777 .owner = THIS_MODULE,
2778 .open = ipv6_route_open,
2779 .read = seq_read,
2780 .llseek = seq_lseek,
b6fcbdb4 2781 .release = single_release_net,
33120b30
AD
2782};
2783
1da177e4
LT
2784static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2785{
69ddb805 2786 struct net *net = (struct net *)seq->private;
1da177e4 2787 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2788 net->ipv6.rt6_stats->fib_nodes,
2789 net->ipv6.rt6_stats->fib_route_nodes,
2790 net->ipv6.rt6_stats->fib_rt_alloc,
2791 net->ipv6.rt6_stats->fib_rt_entries,
2792 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2793 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2794 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2795
2796 return 0;
2797}
2798
2799static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2800{
de05c557 2801 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2802}
2803
9a32144e 2804static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2805 .owner = THIS_MODULE,
2806 .open = rt6_stats_seq_open,
2807 .read = seq_read,
2808 .llseek = seq_lseek,
b6fcbdb4 2809 .release = single_release_net,
1da177e4
LT
2810};
2811#endif /* CONFIG_PROC_FS */
2812
2813#ifdef CONFIG_SYSCTL
2814
1da177e4 2815static
8d65af78 2816int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2817 void __user *buffer, size_t *lenp, loff_t *ppos)
2818{
c486da34
LAG
2819 struct net *net;
2820 int delay;
2821 if (!write)
1da177e4 2822 return -EINVAL;
c486da34
LAG
2823
2824 net = (struct net *)ctl->extra1;
2825 delay = net->ipv6.sysctl.flush_delay;
2826 proc_dointvec(ctl, write, buffer, lenp, ppos);
2827 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2828 return 0;
1da177e4
LT
2829}
2830
760f2d01 2831ctl_table ipv6_route_table_template[] = {
1ab1457c 2832 {
1da177e4 2833 .procname = "flush",
4990509f 2834 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2835 .maxlen = sizeof(int),
89c8b3a1 2836 .mode = 0200,
6d9f239a 2837 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2838 },
2839 {
1da177e4 2840 .procname = "gc_thresh",
9a7ec3a9 2841 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2842 .maxlen = sizeof(int),
2843 .mode = 0644,
6d9f239a 2844 .proc_handler = proc_dointvec,
1da177e4
LT
2845 },
2846 {
1da177e4 2847 .procname = "max_size",
4990509f 2848 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2849 .maxlen = sizeof(int),
2850 .mode = 0644,
6d9f239a 2851 .proc_handler = proc_dointvec,
1da177e4
LT
2852 },
2853 {
1da177e4 2854 .procname = "gc_min_interval",
4990509f 2855 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2856 .maxlen = sizeof(int),
2857 .mode = 0644,
6d9f239a 2858 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2859 },
2860 {
1da177e4 2861 .procname = "gc_timeout",
4990509f 2862 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2863 .maxlen = sizeof(int),
2864 .mode = 0644,
6d9f239a 2865 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2866 },
2867 {
1da177e4 2868 .procname = "gc_interval",
4990509f 2869 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2870 .maxlen = sizeof(int),
2871 .mode = 0644,
6d9f239a 2872 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2873 },
2874 {
1da177e4 2875 .procname = "gc_elasticity",
4990509f 2876 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2877 .maxlen = sizeof(int),
2878 .mode = 0644,
f3d3f616 2879 .proc_handler = proc_dointvec,
1da177e4
LT
2880 },
2881 {
1da177e4 2882 .procname = "mtu_expires",
4990509f 2883 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2884 .maxlen = sizeof(int),
2885 .mode = 0644,
6d9f239a 2886 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2887 },
2888 {
1da177e4 2889 .procname = "min_adv_mss",
4990509f 2890 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2891 .maxlen = sizeof(int),
2892 .mode = 0644,
f3d3f616 2893 .proc_handler = proc_dointvec,
1da177e4
LT
2894 },
2895 {
1da177e4 2896 .procname = "gc_min_interval_ms",
4990509f 2897 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2898 .maxlen = sizeof(int),
2899 .mode = 0644,
6d9f239a 2900 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2901 },
f8572d8f 2902 { }
1da177e4
LT
2903};
2904
2c8c1e72 2905struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2906{
2907 struct ctl_table *table;
2908
2909 table = kmemdup(ipv6_route_table_template,
2910 sizeof(ipv6_route_table_template),
2911 GFP_KERNEL);
5ee09105
YH
2912
2913 if (table) {
2914 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2915 table[0].extra1 = net;
86393e52 2916 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2917 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2918 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2919 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2920 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2921 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2922 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2923 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2924 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2925 }
2926
760f2d01
DL
2927 return table;
2928}
1da177e4
LT
2929#endif
2930
2c8c1e72 2931static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2932{
633d424b 2933 int ret = -ENOMEM;
8ed67789 2934
86393e52
AD
2935 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2936 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2937
fc66f95c
ED
2938 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2939 goto out_ip6_dst_ops;
2940
8ed67789
DL
2941 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2942 sizeof(*net->ipv6.ip6_null_entry),
2943 GFP_KERNEL);
2944 if (!net->ipv6.ip6_null_entry)
fc66f95c 2945 goto out_ip6_dst_entries;
d8d1f30b 2946 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2947 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2948 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2949 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2950 ip6_template_metrics, true);
8ed67789
DL
2951
2952#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2953 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2954 sizeof(*net->ipv6.ip6_prohibit_entry),
2955 GFP_KERNEL);
68fffc67
PZ
2956 if (!net->ipv6.ip6_prohibit_entry)
2957 goto out_ip6_null_entry;
d8d1f30b 2958 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2959 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2960 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2961 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2962 ip6_template_metrics, true);
8ed67789
DL
2963
2964 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2965 sizeof(*net->ipv6.ip6_blk_hole_entry),
2966 GFP_KERNEL);
68fffc67
PZ
2967 if (!net->ipv6.ip6_blk_hole_entry)
2968 goto out_ip6_prohibit_entry;
d8d1f30b 2969 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2970 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2971 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2972 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2973 ip6_template_metrics, true);
8ed67789
DL
2974#endif
2975
b339a47c
PZ
2976 net->ipv6.sysctl.flush_delay = 0;
2977 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2978 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2979 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2980 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2981 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2982 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2983 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2984
6891a346
BT
2985 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2986
8ed67789
DL
2987 ret = 0;
2988out:
2989 return ret;
f2fc6a54 2990
68fffc67
PZ
2991#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2992out_ip6_prohibit_entry:
2993 kfree(net->ipv6.ip6_prohibit_entry);
2994out_ip6_null_entry:
2995 kfree(net->ipv6.ip6_null_entry);
2996#endif
fc66f95c
ED
2997out_ip6_dst_entries:
2998 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2999out_ip6_dst_ops:
f2fc6a54 3000 goto out;
cdb18761
DL
3001}
3002
2c8c1e72 3003static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3004{
8ed67789
DL
3005 kfree(net->ipv6.ip6_null_entry);
3006#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3007 kfree(net->ipv6.ip6_prohibit_entry);
3008 kfree(net->ipv6.ip6_blk_hole_entry);
3009#endif
41bb78b4 3010 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3011}
3012
d189634e
TG
3013static int __net_init ip6_route_net_init_late(struct net *net)
3014{
3015#ifdef CONFIG_PROC_FS
3016 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3017 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3018#endif
3019 return 0;
3020}
3021
3022static void __net_exit ip6_route_net_exit_late(struct net *net)
3023{
3024#ifdef CONFIG_PROC_FS
3025 proc_net_remove(net, "ipv6_route");
3026 proc_net_remove(net, "rt6_stats");
3027#endif
3028}
3029
cdb18761
DL
3030static struct pernet_operations ip6_route_net_ops = {
3031 .init = ip6_route_net_init,
3032 .exit = ip6_route_net_exit,
3033};
3034
c3426b47
DM
3035static int __net_init ipv6_inetpeer_init(struct net *net)
3036{
3037 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3038
3039 if (!bp)
3040 return -ENOMEM;
3041 inet_peer_base_init(bp);
3042 net->ipv6.peers = bp;
3043 return 0;
3044}
3045
3046static void __net_exit ipv6_inetpeer_exit(struct net *net)
3047{
3048 struct inet_peer_base *bp = net->ipv6.peers;
3049
3050 net->ipv6.peers = NULL;
56a6b248 3051 inetpeer_invalidate_tree(bp);
c3426b47
DM
3052 kfree(bp);
3053}
3054
2b823f72 3055static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3056 .init = ipv6_inetpeer_init,
3057 .exit = ipv6_inetpeer_exit,
3058};
3059
d189634e
TG
3060static struct pernet_operations ip6_route_net_late_ops = {
3061 .init = ip6_route_net_init_late,
3062 .exit = ip6_route_net_exit_late,
3063};
3064
8ed67789
DL
3065static struct notifier_block ip6_route_dev_notifier = {
3066 .notifier_call = ip6_route_dev_notify,
3067 .priority = 0,
3068};
3069
433d49c3 3070int __init ip6_route_init(void)
1da177e4 3071{
433d49c3
DL
3072 int ret;
3073
9a7ec3a9
DL
3074 ret = -ENOMEM;
3075 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3076 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3077 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3078 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3079 goto out;
14e50e57 3080
fc66f95c 3081 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3082 if (ret)
bdb3289f 3083 goto out_kmem_cache;
bdb3289f 3084
c3426b47
DM
3085 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3086 if (ret)
e8803b6c 3087 goto out_dst_entries;
2a0c451a 3088
7e52b33b
DM
3089 ret = register_pernet_subsys(&ip6_route_net_ops);
3090 if (ret)
3091 goto out_register_inetpeer;
c3426b47 3092
5dc121e9
AE
3093 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3094
8ed67789
DL
3095 /* Registering of the loopback is done before this portion of code,
3096 * the loopback reference in rt6_info will not be taken, do it
3097 * manually for init_net */
d8d1f30b 3098 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3099 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3100 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3101 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3102 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3103 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3104 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3105 #endif
e8803b6c 3106 ret = fib6_init();
433d49c3 3107 if (ret)
8ed67789 3108 goto out_register_subsys;
433d49c3 3109
433d49c3
DL
3110 ret = xfrm6_init();
3111 if (ret)
e8803b6c 3112 goto out_fib6_init;
c35b7e72 3113
433d49c3
DL
3114 ret = fib6_rules_init();
3115 if (ret)
3116 goto xfrm6_init;
7e5449c2 3117
d189634e
TG
3118 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3119 if (ret)
3120 goto fib6_rules_init;
3121
433d49c3 3122 ret = -ENOBUFS;
c7ac8679
GR
3123 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3124 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3125 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3126 goto out_register_late_subsys;
c127ea2c 3127
8ed67789 3128 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3129 if (ret)
d189634e 3130 goto out_register_late_subsys;
8ed67789 3131
433d49c3
DL
3132out:
3133 return ret;
3134
d189634e
TG
3135out_register_late_subsys:
3136 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3137fib6_rules_init:
433d49c3
DL
3138 fib6_rules_cleanup();
3139xfrm6_init:
433d49c3 3140 xfrm6_fini();
2a0c451a
TG
3141out_fib6_init:
3142 fib6_gc_cleanup();
8ed67789
DL
3143out_register_subsys:
3144 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3145out_register_inetpeer:
3146 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3147out_dst_entries:
3148 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3149out_kmem_cache:
f2fc6a54 3150 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3151 goto out;
1da177e4
LT
3152}
3153
3154void ip6_route_cleanup(void)
3155{
8ed67789 3156 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3157 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3158 fib6_rules_cleanup();
1da177e4 3159 xfrm6_fini();
1da177e4 3160 fib6_gc_cleanup();
c3426b47 3161 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3162 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3163 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3164 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3165}