ipv6: Prevent access to uninitialized fib_table_hash via /proc/net/ipv6_route
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
70ceb4f5 83#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 84static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
95c96174 87 unsigned int pref);
efa2cea0 88static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
91#endif
92
06582540
DM
93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
8e2ec639
YZ
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
06582540
DM
102 if (!rt->rt6i_peer)
103 rt6_bind_peer(rt, 1);
104
105 peer = rt->rt6i_peer;
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
39232973
DM
126static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
127{
128 struct in6_addr *p = &rt->rt6i_gateway;
129
a7563f34 130 if (!ipv6_addr_any(p))
39232973
DM
131 return (const void *) p;
132 return daddr;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
39232973
DM
137 struct rt6_info *rt = (struct rt6_info *) dst;
138 struct neighbour *n;
139
140 daddr = choose_neigh_daddr(rt, daddr);
141 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
142 if (n)
143 return n;
144 return neigh_create(&nd_tbl, daddr, dst->dev);
145}
146
8ade06c6 147static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 148{
8ade06c6
DM
149 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
150 if (!n) {
151 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
152 if (IS_ERR(n))
153 return PTR_ERR(n);
154 }
f83c7790
DM
155 dst_set_neighbour(&rt->dst, n);
156
157 return 0;
d3aaeb38
DM
158}
159
9a7ec3a9 160static struct dst_ops ip6_dst_ops_template = {
1da177e4 161 .family = AF_INET6,
09640e63 162 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
163 .gc = ip6_dst_gc,
164 .gc_thresh = 1024,
165 .check = ip6_dst_check,
0dbaee3b 166 .default_advmss = ip6_default_advmss,
ebb762f2 167 .mtu = ip6_mtu,
06582540 168 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
169 .destroy = ip6_dst_destroy,
170 .ifdown = ip6_dst_ifdown,
171 .negative_advice = ip6_negative_advice,
172 .link_failure = ip6_link_failure,
173 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 174 .local_out = __ip6_local_out,
d3aaeb38 175 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
176};
177
ebb762f2 178static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 179{
618f9bc7
SK
180 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
181
182 return mtu ? : dst->dev->mtu;
ec831ea7
RD
183}
184
14e50e57
DM
185static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
186{
187}
188
0972ddb2
HB
189static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
190 unsigned long old)
191{
192 return NULL;
193}
194
14e50e57
DM
195static struct dst_ops ip6_dst_blackhole_ops = {
196 .family = AF_INET6,
09640e63 197 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
198 .destroy = ip6_dst_destroy,
199 .check = ip6_dst_check,
ebb762f2 200 .mtu = ip6_blackhole_mtu,
214f45c9 201 .default_advmss = ip6_default_advmss,
14e50e57 202 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 203 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 204 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
205};
206
62fa8a84
DM
207static const u32 ip6_template_metrics[RTAX_MAX] = {
208 [RTAX_HOPLIMIT - 1] = 255,
209};
210
bdb3289f 211static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -ENETUNREACH,
d8d1f30b
CG
217 .input = ip6_pkt_discard,
218 .output = ip6_pkt_discard_out,
1da177e4
LT
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 221 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
101367c2
TG
226#ifdef CONFIG_IPV6_MULTIPLE_TABLES
227
6723ab54
DM
228static int ip6_pkt_prohibit(struct sk_buff *skb);
229static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 230
280a34c8 231static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
232 .dst = {
233 .__refcnt = ATOMIC_INIT(1),
234 .__use = 1,
235 .obsolete = -1,
236 .error = -EACCES,
d8d1f30b
CG
237 .input = ip6_pkt_prohibit,
238 .output = ip6_pkt_prohibit_out,
101367c2
TG
239 },
240 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 241 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
242 .rt6i_metric = ~(u32) 0,
243 .rt6i_ref = ATOMIC_INIT(1),
244};
245
bdb3289f 246static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
250 .obsolete = -1,
251 .error = -EINVAL,
d8d1f30b
CG
252 .input = dst_discard,
253 .output = dst_discard,
101367c2
TG
254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 256 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259};
260
261#endif
262
1da177e4 263/* allocate dst with ip6_dst_ops */
5c1e6aa3 264static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
265 struct net_device *dev,
266 int flags)
1da177e4 267{
957c665f 268 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 269
38308473 270 if (rt)
fbe58186 271 memset(&rt->rt6i_table, 0,
38308473 272 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
273
274 return rt;
1da177e4
LT
275}
276
277static void ip6_dst_destroy(struct dst_entry *dst)
278{
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 281 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 282
8e2ec639
YZ
283 if (!(rt->dst.flags & DST_HOST))
284 dst_destroy_metrics_generic(dst);
285
38308473 286 if (idev) {
1da177e4
LT
287 rt->rt6i_idev = NULL;
288 in6_dev_put(idev);
1ab1457c 289 }
1716a961
G
290
291 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
292 dst_release(dst->from);
293
b3419363 294 if (peer) {
b3419363
DM
295 rt->rt6i_peer = NULL;
296 inet_putpeer(peer);
297 }
298}
299
6431cbc2
DM
300static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
301
302static u32 rt6_peer_genid(void)
303{
304 return atomic_read(&__rt6_peer_genid);
305}
306
b3419363
DM
307void rt6_bind_peer(struct rt6_info *rt, int create)
308{
309 struct inet_peer *peer;
310
b3419363
DM
311 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
312 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
313 inet_putpeer(peer);
6431cbc2
DM
314 else
315 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
316}
317
318static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
319 int how)
320{
321 struct rt6_info *rt = (struct rt6_info *)dst;
322 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 323 struct net_device *loopback_dev =
c346dca1 324 dev_net(dev)->loopback_dev;
1da177e4 325
38308473 326 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
327 struct inet6_dev *loopback_idev =
328 in6_dev_get(loopback_dev);
38308473 329 if (loopback_idev) {
1da177e4
LT
330 rt->rt6i_idev = loopback_idev;
331 in6_dev_put(idev);
332 }
333 }
334}
335
a50feda5 336static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 337{
1716a961
G
338 struct rt6_info *ort = NULL;
339
340 if (rt->rt6i_flags & RTF_EXPIRES) {
341 if (time_after(jiffies, rt->dst.expires))
a50feda5 342 return true;
1716a961
G
343 } else if (rt->dst.from) {
344 ort = (struct rt6_info *) rt->dst.from;
345 return (ort->rt6i_flags & RTF_EXPIRES) &&
346 time_after(jiffies, ort->dst.expires);
347 }
a50feda5 348 return false;
1da177e4
LT
349}
350
a50feda5 351static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 352{
a02cec21
ED
353 return ipv6_addr_type(daddr) &
354 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
355}
356
1da177e4 357/*
c71099ac 358 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
359 */
360
8ed67789
DL
361static inline struct rt6_info *rt6_device_match(struct net *net,
362 struct rt6_info *rt,
b71d1d42 363 const struct in6_addr *saddr,
1da177e4 364 int oif,
d420895e 365 int flags)
1da177e4
LT
366{
367 struct rt6_info *local = NULL;
368 struct rt6_info *sprt;
369
dd3abc4e
YH
370 if (!oif && ipv6_addr_any(saddr))
371 goto out;
372
d8d1f30b 373 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 374 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
375
376 if (oif) {
1da177e4
LT
377 if (dev->ifindex == oif)
378 return sprt;
379 if (dev->flags & IFF_LOOPBACK) {
38308473 380 if (!sprt->rt6i_idev ||
1da177e4 381 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 382 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 383 continue;
1ab1457c 384 if (local && (!oif ||
1da177e4
LT
385 local->rt6i_idev->dev->ifindex == oif))
386 continue;
387 }
388 local = sprt;
389 }
dd3abc4e
YH
390 } else {
391 if (ipv6_chk_addr(net, saddr, dev,
392 flags & RT6_LOOKUP_F_IFACE))
393 return sprt;
1da177e4 394 }
dd3abc4e 395 }
1da177e4 396
dd3abc4e 397 if (oif) {
1da177e4
LT
398 if (local)
399 return local;
400
d420895e 401 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 402 return net->ipv6.ip6_null_entry;
1da177e4 403 }
dd3abc4e 404out:
1da177e4
LT
405 return rt;
406}
407
27097255
YH
408#ifdef CONFIG_IPV6_ROUTER_PREF
409static void rt6_probe(struct rt6_info *rt)
410{
f2c31e32 411 struct neighbour *neigh;
27097255
YH
412 /*
413 * Okay, this does not seem to be appropriate
414 * for now, however, we need to check if it
415 * is really so; aka Router Reachability Probing.
416 *
417 * Router Reachability Probe MUST be rate-limited
418 * to no more than one per minute.
419 */
f2c31e32 420 rcu_read_lock();
27217455 421 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 422 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 423 goto out;
27097255
YH
424 read_lock_bh(&neigh->lock);
425 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 426 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
427 struct in6_addr mcaddr;
428 struct in6_addr *target;
429
430 neigh->updated = jiffies;
431 read_unlock_bh(&neigh->lock);
432
433 target = (struct in6_addr *)&neigh->primary_key;
434 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 435 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 436 } else {
27097255 437 read_unlock_bh(&neigh->lock);
f2c31e32
ED
438 }
439out:
440 rcu_read_unlock();
27097255
YH
441}
442#else
443static inline void rt6_probe(struct rt6_info *rt)
444{
27097255
YH
445}
446#endif
447
1da177e4 448/*
554cfb7e 449 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 450 */
b6f99a21 451static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 452{
d1918542 453 struct net_device *dev = rt->dst.dev;
161980f4 454 if (!oif || dev->ifindex == oif)
554cfb7e 455 return 2;
161980f4
DM
456 if ((dev->flags & IFF_LOOPBACK) &&
457 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
458 return 1;
459 return 0;
554cfb7e 460}
1da177e4 461
b6f99a21 462static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 463{
f2c31e32 464 struct neighbour *neigh;
398bcbeb 465 int m;
f2c31e32
ED
466
467 rcu_read_lock();
27217455 468 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
469 if (rt->rt6i_flags & RTF_NONEXTHOP ||
470 !(rt->rt6i_flags & RTF_GATEWAY))
471 m = 1;
472 else if (neigh) {
554cfb7e
YH
473 read_lock_bh(&neigh->lock);
474 if (neigh->nud_state & NUD_VALID)
4d0c5911 475 m = 2;
398bcbeb
YH
476#ifdef CONFIG_IPV6_ROUTER_PREF
477 else if (neigh->nud_state & NUD_FAILED)
478 m = 0;
479#endif
480 else
ea73ee23 481 m = 1;
554cfb7e 482 read_unlock_bh(&neigh->lock);
398bcbeb
YH
483 } else
484 m = 0;
f2c31e32 485 rcu_read_unlock();
554cfb7e 486 return m;
1da177e4
LT
487}
488
554cfb7e
YH
489static int rt6_score_route(struct rt6_info *rt, int oif,
490 int strict)
1da177e4 491{
4d0c5911 492 int m, n;
1ab1457c 493
4d0c5911 494 m = rt6_check_dev(rt, oif);
77d16f45 495 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 496 return -1;
ebacaaa0
YH
497#ifdef CONFIG_IPV6_ROUTER_PREF
498 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
499#endif
4d0c5911 500 n = rt6_check_neigh(rt);
557e92ef 501 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
502 return -1;
503 return m;
504}
505
f11e6659
DM
506static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
507 int *mpri, struct rt6_info *match)
554cfb7e 508{
f11e6659
DM
509 int m;
510
511 if (rt6_check_expired(rt))
512 goto out;
513
514 m = rt6_score_route(rt, oif, strict);
515 if (m < 0)
516 goto out;
517
518 if (m > *mpri) {
519 if (strict & RT6_LOOKUP_F_REACHABLE)
520 rt6_probe(match);
521 *mpri = m;
522 match = rt;
523 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
524 rt6_probe(rt);
525 }
526
527out:
528 return match;
529}
530
531static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
532 struct rt6_info *rr_head,
533 u32 metric, int oif, int strict)
534{
535 struct rt6_info *rt, *match;
554cfb7e 536 int mpri = -1;
1da177e4 537
f11e6659
DM
538 match = NULL;
539 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 540 rt = rt->dst.rt6_next)
f11e6659
DM
541 match = find_match(rt, oif, strict, &mpri, match);
542 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 543 rt = rt->dst.rt6_next)
f11e6659 544 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 545
f11e6659
DM
546 return match;
547}
1da177e4 548
f11e6659
DM
549static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
550{
551 struct rt6_info *match, *rt0;
8ed67789 552 struct net *net;
1da177e4 553
f11e6659
DM
554 rt0 = fn->rr_ptr;
555 if (!rt0)
556 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 557
f11e6659 558 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 559
554cfb7e 560 if (!match &&
f11e6659 561 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 562 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 563
554cfb7e 564 /* no entries matched; do round-robin */
f11e6659
DM
565 if (!next || next->rt6i_metric != rt0->rt6i_metric)
566 next = fn->leaf;
567
568 if (next != rt0)
569 fn->rr_ptr = next;
1da177e4 570 }
1da177e4 571
d1918542 572 net = dev_net(rt0->dst.dev);
a02cec21 573 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
574}
575
70ceb4f5
YH
576#ifdef CONFIG_IPV6_ROUTE_INFO
577int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 578 const struct in6_addr *gwaddr)
70ceb4f5 579{
c346dca1 580 struct net *net = dev_net(dev);
70ceb4f5
YH
581 struct route_info *rinfo = (struct route_info *) opt;
582 struct in6_addr prefix_buf, *prefix;
583 unsigned int pref;
4bed72e4 584 unsigned long lifetime;
70ceb4f5
YH
585 struct rt6_info *rt;
586
587 if (len < sizeof(struct route_info)) {
588 return -EINVAL;
589 }
590
591 /* Sanity check for prefix_len and length */
592 if (rinfo->length > 3) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 128) {
595 return -EINVAL;
596 } else if (rinfo->prefix_len > 64) {
597 if (rinfo->length < 2) {
598 return -EINVAL;
599 }
600 } else if (rinfo->prefix_len > 0) {
601 if (rinfo->length < 1) {
602 return -EINVAL;
603 }
604 }
605
606 pref = rinfo->route_pref;
607 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 608 return -EINVAL;
70ceb4f5 609
4bed72e4 610 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
611
612 if (rinfo->length == 3)
613 prefix = (struct in6_addr *)rinfo->prefix;
614 else {
615 /* this function is safe */
616 ipv6_addr_prefix(&prefix_buf,
617 (struct in6_addr *)rinfo->prefix,
618 rinfo->prefix_len);
619 prefix = &prefix_buf;
620 }
621
efa2cea0
DL
622 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
623 dev->ifindex);
70ceb4f5
YH
624
625 if (rt && !lifetime) {
e0a1ad73 626 ip6_del_rt(rt);
70ceb4f5
YH
627 rt = NULL;
628 }
629
630 if (!rt && lifetime)
efa2cea0 631 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
632 pref);
633 else if (rt)
634 rt->rt6i_flags = RTF_ROUTEINFO |
635 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
636
637 if (rt) {
1716a961
G
638 if (!addrconf_finite_timeout(lifetime))
639 rt6_clean_expires(rt);
640 else
641 rt6_set_expires(rt, jiffies + HZ * lifetime);
642
d8d1f30b 643 dst_release(&rt->dst);
70ceb4f5
YH
644 }
645 return 0;
646}
647#endif
648
8ed67789 649#define BACKTRACK(__net, saddr) \
982f56f3 650do { \
8ed67789 651 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 652 struct fib6_node *pn; \
e0eda7bb 653 while (1) { \
982f56f3
YH
654 if (fn->fn_flags & RTN_TL_ROOT) \
655 goto out; \
656 pn = fn->parent; \
657 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 658 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
659 else \
660 fn = pn; \
661 if (fn->fn_flags & RTN_RTINFO) \
662 goto restart; \
c71099ac 663 } \
c71099ac 664 } \
38308473 665} while (0)
c71099ac 666
8ed67789
DL
667static struct rt6_info *ip6_pol_route_lookup(struct net *net,
668 struct fib6_table *table,
4c9483b2 669 struct flowi6 *fl6, int flags)
1da177e4
LT
670{
671 struct fib6_node *fn;
672 struct rt6_info *rt;
673
c71099ac 674 read_lock_bh(&table->tb6_lock);
4c9483b2 675 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
676restart:
677 rt = fn->leaf;
4c9483b2
DM
678 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
679 BACKTRACK(net, &fl6->saddr);
c71099ac 680out:
d8d1f30b 681 dst_use(&rt->dst, jiffies);
c71099ac 682 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
683 return rt;
684
685}
686
ea6e574e
FW
687struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
688 int flags)
689{
690 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
691}
692EXPORT_SYMBOL_GPL(ip6_route_lookup);
693
9acd9f3a
YH
694struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
695 const struct in6_addr *saddr, int oif, int strict)
c71099ac 696{
4c9483b2
DM
697 struct flowi6 fl6 = {
698 .flowi6_oif = oif,
699 .daddr = *daddr,
c71099ac
TG
700 };
701 struct dst_entry *dst;
77d16f45 702 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 703
adaa70bb 704 if (saddr) {
4c9483b2 705 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
706 flags |= RT6_LOOKUP_F_HAS_SADDR;
707 }
708
4c9483b2 709 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
710 if (dst->error == 0)
711 return (struct rt6_info *) dst;
712
713 dst_release(dst);
714
1da177e4
LT
715 return NULL;
716}
717
7159039a
YH
718EXPORT_SYMBOL(rt6_lookup);
719
c71099ac 720/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
721 It takes new route entry, the addition fails by any reason the
722 route is freed. In any case, if caller does not hold it, it may
723 be destroyed.
724 */
725
86872cb5 726static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
727{
728 int err;
c71099ac 729 struct fib6_table *table;
1da177e4 730
c71099ac
TG
731 table = rt->rt6i_table;
732 write_lock_bh(&table->tb6_lock);
86872cb5 733 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 734 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
735
736 return err;
737}
738
40e22e8f
TG
739int ip6_ins_rt(struct rt6_info *rt)
740{
4d1169c1 741 struct nl_info info = {
d1918542 742 .nl_net = dev_net(rt->dst.dev),
4d1169c1 743 };
528c4ceb 744 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
745}
746
1716a961 747static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 748 const struct in6_addr *daddr,
b71d1d42 749 const struct in6_addr *saddr)
1da177e4 750{
1da177e4
LT
751 struct rt6_info *rt;
752
753 /*
754 * Clone the route.
755 */
756
21efcfa0 757 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
758
759 if (rt) {
14deae41
DM
760 int attempts = !in_softirq();
761
38308473 762 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 763 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 764 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 765 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 766 rt->rt6i_gateway = *daddr;
58c4fb86 767 }
1da177e4 768
1da177e4 769 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
770
771#ifdef CONFIG_IPV6_SUBTREES
772 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 773 rt->rt6i_src.addr = *saddr;
1da177e4
LT
774 rt->rt6i_src.plen = 128;
775 }
776#endif
777
14deae41 778 retry:
8ade06c6 779 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 780 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
781 int saved_rt_min_interval =
782 net->ipv6.sysctl.ip6_rt_gc_min_interval;
783 int saved_rt_elasticity =
784 net->ipv6.sysctl.ip6_rt_gc_elasticity;
785
786 if (attempts-- > 0) {
787 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
788 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
789
86393e52 790 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
791
792 net->ipv6.sysctl.ip6_rt_gc_elasticity =
793 saved_rt_elasticity;
794 net->ipv6.sysctl.ip6_rt_gc_min_interval =
795 saved_rt_min_interval;
796 goto retry;
797 }
798
f3213831 799 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 800 dst_free(&rt->dst);
14deae41
DM
801 return NULL;
802 }
95a9a5ba 803 }
1da177e4 804
95a9a5ba
YH
805 return rt;
806}
1da177e4 807
21efcfa0
ED
808static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
809 const struct in6_addr *daddr)
299d9939 810{
21efcfa0
ED
811 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
812
299d9939 813 if (rt) {
299d9939 814 rt->rt6i_flags |= RTF_CACHE;
27217455 815 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
816 }
817 return rt;
818}
819
8ed67789 820static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 821 struct flowi6 *fl6, int flags)
1da177e4
LT
822{
823 struct fib6_node *fn;
519fbd87 824 struct rt6_info *rt, *nrt;
c71099ac 825 int strict = 0;
1da177e4 826 int attempts = 3;
519fbd87 827 int err;
53b7997f 828 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 829
77d16f45 830 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
831
832relookup:
c71099ac 833 read_lock_bh(&table->tb6_lock);
1da177e4 834
8238dd06 835restart_2:
4c9483b2 836 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
837
838restart:
4acad72d 839 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 840
4c9483b2 841 BACKTRACK(net, &fl6->saddr);
8ed67789 842 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 843 rt->rt6i_flags & RTF_CACHE)
1ddef044 844 goto out;
1da177e4 845
d8d1f30b 846 dst_hold(&rt->dst);
c71099ac 847 read_unlock_bh(&table->tb6_lock);
fb9de91e 848
27217455 849 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 850 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 851 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 852 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
853 else
854 goto out2;
e40cf353 855
d8d1f30b 856 dst_release(&rt->dst);
8ed67789 857 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 858
d8d1f30b 859 dst_hold(&rt->dst);
519fbd87 860 if (nrt) {
40e22e8f 861 err = ip6_ins_rt(nrt);
519fbd87 862 if (!err)
1da177e4 863 goto out2;
1da177e4 864 }
1da177e4 865
519fbd87
YH
866 if (--attempts <= 0)
867 goto out2;
868
869 /*
c71099ac 870 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
871 * released someone could insert this route. Relookup.
872 */
d8d1f30b 873 dst_release(&rt->dst);
519fbd87
YH
874 goto relookup;
875
876out:
8238dd06
YH
877 if (reachable) {
878 reachable = 0;
879 goto restart_2;
880 }
d8d1f30b 881 dst_hold(&rt->dst);
c71099ac 882 read_unlock_bh(&table->tb6_lock);
1da177e4 883out2:
d8d1f30b
CG
884 rt->dst.lastuse = jiffies;
885 rt->dst.__use++;
c71099ac
TG
886
887 return rt;
1da177e4
LT
888}
889
8ed67789 890static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 891 struct flowi6 *fl6, int flags)
4acad72d 892{
4c9483b2 893 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
894}
895
72331bc0
SL
896static struct dst_entry *ip6_route_input_lookup(struct net *net,
897 struct net_device *dev,
898 struct flowi6 *fl6, int flags)
899{
900 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
901 flags |= RT6_LOOKUP_F_IFACE;
902
903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
904}
905
c71099ac
TG
906void ip6_route_input(struct sk_buff *skb)
907{
b71d1d42 908 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 909 struct net *net = dev_net(skb->dev);
adaa70bb 910 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
911 struct flowi6 fl6 = {
912 .flowi6_iif = skb->dev->ifindex,
913 .daddr = iph->daddr,
914 .saddr = iph->saddr,
38308473 915 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
916 .flowi6_mark = skb->mark,
917 .flowi6_proto = iph->nexthdr,
c71099ac 918 };
adaa70bb 919
72331bc0 920 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
921}
922
8ed67789 923static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 924 struct flowi6 *fl6, int flags)
1da177e4 925{
4c9483b2 926 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
927}
928
9c7a4f9c 929struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 930 struct flowi6 *fl6)
c71099ac
TG
931{
932 int flags = 0;
933
4c9483b2 934 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 935 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 936
4c9483b2 937 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 938 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
939 else if (sk)
940 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 941
4c9483b2 942 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
943}
944
7159039a 945EXPORT_SYMBOL(ip6_route_output);
1da177e4 946
2774c131 947struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 948{
5c1e6aa3 949 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
950 struct dst_entry *new = NULL;
951
5c1e6aa3 952 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 953 if (rt) {
cf911662
DM
954 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
955
d8d1f30b 956 new = &rt->dst;
14e50e57 957
14e50e57 958 new->__use = 1;
352e512c
HX
959 new->input = dst_discard;
960 new->output = dst_discard;
14e50e57 961
21efcfa0
ED
962 if (dst_metrics_read_only(&ort->dst))
963 new->_metrics = ort->dst._metrics;
964 else
965 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
966 rt->rt6i_idev = ort->rt6i_idev;
967 if (rt->rt6i_idev)
968 in6_dev_hold(rt->rt6i_idev);
14e50e57 969
4e3fd7a0 970 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
971 rt->rt6i_flags = ort->rt6i_flags;
972 rt6_clean_expires(rt);
14e50e57
DM
973 rt->rt6i_metric = 0;
974
975 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
976#ifdef CONFIG_IPV6_SUBTREES
977 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
978#endif
979
980 dst_free(new);
981 }
982
69ead7af
DM
983 dst_release(dst_orig);
984 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 985}
14e50e57 986
1da177e4
LT
987/*
988 * Destination cache support functions
989 */
990
991static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
992{
993 struct rt6_info *rt;
994
995 rt = (struct rt6_info *) dst;
996
6431cbc2
DM
997 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
998 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
999 if (!rt->rt6i_peer)
1000 rt6_bind_peer(rt, 0);
1001 rt->rt6i_peer_genid = rt6_peer_genid();
1002 }
1da177e4 1003 return dst;
6431cbc2 1004 }
1da177e4
LT
1005 return NULL;
1006}
1007
1008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1009{
1010 struct rt6_info *rt = (struct rt6_info *) dst;
1011
1012 if (rt) {
54c1a859
YH
1013 if (rt->rt6i_flags & RTF_CACHE) {
1014 if (rt6_check_expired(rt)) {
1015 ip6_del_rt(rt);
1016 dst = NULL;
1017 }
1018 } else {
1da177e4 1019 dst_release(dst);
54c1a859
YH
1020 dst = NULL;
1021 }
1da177e4 1022 }
54c1a859 1023 return dst;
1da177e4
LT
1024}
1025
1026static void ip6_link_failure(struct sk_buff *skb)
1027{
1028 struct rt6_info *rt;
1029
3ffe533c 1030 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1031
adf30907 1032 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1033 if (rt) {
1716a961
G
1034 if (rt->rt6i_flags & RTF_CACHE)
1035 rt6_update_expires(rt, 0);
1036 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1037 rt->rt6i_node->fn_sernum = -1;
1038 }
1039}
1040
1041static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1042{
1043 struct rt6_info *rt6 = (struct rt6_info*)dst;
1044
1045 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1046 rt6->rt6i_flags |= RTF_MODIFIED;
1047 if (mtu < IPV6_MIN_MTU) {
defb3519 1048 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1049 mtu = IPV6_MIN_MTU;
defb3519
DM
1050 features |= RTAX_FEATURE_ALLFRAG;
1051 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1052 }
defb3519 1053 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1054 }
1055}
1056
0dbaee3b 1057static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1058{
0dbaee3b
DM
1059 struct net_device *dev = dst->dev;
1060 unsigned int mtu = dst_mtu(dst);
1061 struct net *net = dev_net(dev);
1062
1da177e4
LT
1063 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1064
5578689a
DL
1065 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1066 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1067
1068 /*
1ab1457c
YH
1069 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1070 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1071 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1072 * rely only on pmtu discovery"
1073 */
1074 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1075 mtu = IPV6_MAXPLEN;
1076 return mtu;
1077}
1078
ebb762f2 1079static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1080{
d33e4553 1081 struct inet6_dev *idev;
618f9bc7
SK
1082 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1083
1084 if (mtu)
1085 return mtu;
1086
1087 mtu = IPV6_MIN_MTU;
d33e4553
DM
1088
1089 rcu_read_lock();
1090 idev = __in6_dev_get(dst->dev);
1091 if (idev)
1092 mtu = idev->cnf.mtu6;
1093 rcu_read_unlock();
1094
1095 return mtu;
1096}
1097
3b00944c
YH
1098static struct dst_entry *icmp6_dst_gc_list;
1099static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1100
3b00944c 1101struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1102 struct neighbour *neigh,
87a11578 1103 struct flowi6 *fl6)
1da177e4 1104{
87a11578 1105 struct dst_entry *dst;
1da177e4
LT
1106 struct rt6_info *rt;
1107 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1108 struct net *net = dev_net(dev);
1da177e4 1109
38308473 1110 if (unlikely(!idev))
122bdf67 1111 return ERR_PTR(-ENODEV);
1da177e4 1112
957c665f 1113 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1114 if (unlikely(!rt)) {
1da177e4 1115 in6_dev_put(idev);
87a11578 1116 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1117 goto out;
1118 }
1119
1da177e4
LT
1120 if (neigh)
1121 neigh_hold(neigh);
14deae41 1122 else {
f83c7790 1123 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1124 if (IS_ERR(neigh)) {
252c3d84 1125 in6_dev_put(idev);
b43faac6
DM
1126 dst_free(&rt->dst);
1127 return ERR_CAST(neigh);
1128 }
14deae41 1129 }
1da177e4 1130
8e2ec639
YZ
1131 rt->dst.flags |= DST_HOST;
1132 rt->dst.output = ip6_output;
69cce1d1 1133 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1134 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1135 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1136 rt->rt6i_dst.plen = 128;
1137 rt->rt6i_idev = idev;
7011687f 1138 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1139
3b00944c 1140 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1141 rt->dst.next = icmp6_dst_gc_list;
1142 icmp6_dst_gc_list = &rt->dst;
3b00944c 1143 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1144
5578689a 1145 fib6_force_start_gc(net);
1da177e4 1146
87a11578
DM
1147 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1148
1da177e4 1149out:
87a11578 1150 return dst;
1da177e4
LT
1151}
1152
3d0f24a7 1153int icmp6_dst_gc(void)
1da177e4 1154{
e9476e95 1155 struct dst_entry *dst, **pprev;
3d0f24a7 1156 int more = 0;
1da177e4 1157
3b00944c
YH
1158 spin_lock_bh(&icmp6_dst_lock);
1159 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1160
1da177e4
LT
1161 while ((dst = *pprev) != NULL) {
1162 if (!atomic_read(&dst->__refcnt)) {
1163 *pprev = dst->next;
1164 dst_free(dst);
1da177e4
LT
1165 } else {
1166 pprev = &dst->next;
3d0f24a7 1167 ++more;
1da177e4
LT
1168 }
1169 }
1170
3b00944c 1171 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1172
3d0f24a7 1173 return more;
1da177e4
LT
1174}
1175
1e493d19
DM
1176static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1177 void *arg)
1178{
1179 struct dst_entry *dst, **pprev;
1180
1181 spin_lock_bh(&icmp6_dst_lock);
1182 pprev = &icmp6_dst_gc_list;
1183 while ((dst = *pprev) != NULL) {
1184 struct rt6_info *rt = (struct rt6_info *) dst;
1185 if (func(rt, arg)) {
1186 *pprev = dst->next;
1187 dst_free(dst);
1188 } else {
1189 pprev = &dst->next;
1190 }
1191 }
1192 spin_unlock_bh(&icmp6_dst_lock);
1193}
1194
569d3645 1195static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1196{
1da177e4 1197 unsigned long now = jiffies;
86393e52 1198 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1199 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1200 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1201 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1202 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1203 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1204 int entries;
7019b78e 1205
fc66f95c 1206 entries = dst_entries_get_fast(ops);
7019b78e 1207 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1208 entries <= rt_max_size)
1da177e4
LT
1209 goto out;
1210
6891a346
BT
1211 net->ipv6.ip6_rt_gc_expire++;
1212 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1213 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1214 entries = dst_entries_get_slow(ops);
1215 if (entries < ops->gc_thresh)
7019b78e 1216 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1217out:
7019b78e 1218 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1219 return entries > rt_max_size;
1da177e4
LT
1220}
1221
1222/* Clean host part of a prefix. Not necessary in radix tree,
1223 but results in cleaner routing tables.
1224
1225 Remove it only when all the things will work!
1226 */
1227
6b75d090 1228int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1229{
5170ae82 1230 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1231 if (hoplimit == 0) {
6b75d090 1232 struct net_device *dev = dst->dev;
c68f24cc
ED
1233 struct inet6_dev *idev;
1234
1235 rcu_read_lock();
1236 idev = __in6_dev_get(dev);
1237 if (idev)
6b75d090 1238 hoplimit = idev->cnf.hop_limit;
c68f24cc 1239 else
53b7997f 1240 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1241 rcu_read_unlock();
1da177e4
LT
1242 }
1243 return hoplimit;
1244}
abbf46ae 1245EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1246
1247/*
1248 *
1249 */
1250
86872cb5 1251int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1252{
1253 int err;
5578689a 1254 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1255 struct rt6_info *rt = NULL;
1256 struct net_device *dev = NULL;
1257 struct inet6_dev *idev = NULL;
c71099ac 1258 struct fib6_table *table;
1da177e4
LT
1259 int addr_type;
1260
86872cb5 1261 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1262 return -EINVAL;
1263#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1264 if (cfg->fc_src_len)
1da177e4
LT
1265 return -EINVAL;
1266#endif
86872cb5 1267 if (cfg->fc_ifindex) {
1da177e4 1268 err = -ENODEV;
5578689a 1269 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1270 if (!dev)
1271 goto out;
1272 idev = in6_dev_get(dev);
1273 if (!idev)
1274 goto out;
1275 }
1276
86872cb5
TG
1277 if (cfg->fc_metric == 0)
1278 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1279
d71314b4 1280 err = -ENOBUFS;
38308473
DM
1281 if (cfg->fc_nlinfo.nlh &&
1282 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1283 table = fib6_get_table(net, cfg->fc_table);
38308473 1284 if (!table) {
f3213831 1285 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1286 table = fib6_new_table(net, cfg->fc_table);
1287 }
1288 } else {
1289 table = fib6_new_table(net, cfg->fc_table);
1290 }
38308473
DM
1291
1292 if (!table)
c71099ac 1293 goto out;
c71099ac 1294
957c665f 1295 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1296
38308473 1297 if (!rt) {
1da177e4
LT
1298 err = -ENOMEM;
1299 goto out;
1300 }
1301
d8d1f30b 1302 rt->dst.obsolete = -1;
1716a961
G
1303
1304 if (cfg->fc_flags & RTF_EXPIRES)
1305 rt6_set_expires(rt, jiffies +
1306 clock_t_to_jiffies(cfg->fc_expires));
1307 else
1308 rt6_clean_expires(rt);
1da177e4 1309
86872cb5
TG
1310 if (cfg->fc_protocol == RTPROT_UNSPEC)
1311 cfg->fc_protocol = RTPROT_BOOT;
1312 rt->rt6i_protocol = cfg->fc_protocol;
1313
1314 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1315
1316 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1317 rt->dst.input = ip6_mc_input;
ab79ad14
1318 else if (cfg->fc_flags & RTF_LOCAL)
1319 rt->dst.input = ip6_input;
1da177e4 1320 else
d8d1f30b 1321 rt->dst.input = ip6_forward;
1da177e4 1322
d8d1f30b 1323 rt->dst.output = ip6_output;
1da177e4 1324
86872cb5
TG
1325 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1326 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1327 if (rt->rt6i_dst.plen == 128)
11d53b49 1328 rt->dst.flags |= DST_HOST;
1da177e4 1329
8e2ec639
YZ
1330 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1331 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1332 if (!metrics) {
1333 err = -ENOMEM;
1334 goto out;
1335 }
1336 dst_init_metrics(&rt->dst, metrics, 0);
1337 }
1da177e4 1338#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1339 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1340 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1341#endif
1342
86872cb5 1343 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1344
1345 /* We cannot add true routes via loopback here,
1346 they would result in kernel looping; promote them to reject routes
1347 */
86872cb5 1348 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1349 (dev && (dev->flags & IFF_LOOPBACK) &&
1350 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1351 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1352 /* hold loopback dev/idev if we haven't done so. */
5578689a 1353 if (dev != net->loopback_dev) {
1da177e4
LT
1354 if (dev) {
1355 dev_put(dev);
1356 in6_dev_put(idev);
1357 }
5578689a 1358 dev = net->loopback_dev;
1da177e4
LT
1359 dev_hold(dev);
1360 idev = in6_dev_get(dev);
1361 if (!idev) {
1362 err = -ENODEV;
1363 goto out;
1364 }
1365 }
d8d1f30b
CG
1366 rt->dst.output = ip6_pkt_discard_out;
1367 rt->dst.input = ip6_pkt_discard;
1368 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1369 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1370 goto install_route;
1371 }
1372
86872cb5 1373 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1374 const struct in6_addr *gw_addr;
1da177e4
LT
1375 int gwa_type;
1376
86872cb5 1377 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1378 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1379 gwa_type = ipv6_addr_type(gw_addr);
1380
1381 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1382 struct rt6_info *grt;
1383
1384 /* IPv6 strictly inhibits using not link-local
1385 addresses as nexthop address.
1386 Otherwise, router will not able to send redirects.
1387 It is very good, but in some (rare!) circumstances
1388 (SIT, PtP, NBMA NOARP links) it is handy to allow
1389 some exceptions. --ANK
1390 */
1391 err = -EINVAL;
38308473 1392 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1393 goto out;
1394
5578689a 1395 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1396
1397 err = -EHOSTUNREACH;
38308473 1398 if (!grt)
1da177e4
LT
1399 goto out;
1400 if (dev) {
d1918542 1401 if (dev != grt->dst.dev) {
d8d1f30b 1402 dst_release(&grt->dst);
1da177e4
LT
1403 goto out;
1404 }
1405 } else {
d1918542 1406 dev = grt->dst.dev;
1da177e4
LT
1407 idev = grt->rt6i_idev;
1408 dev_hold(dev);
1409 in6_dev_hold(grt->rt6i_idev);
1410 }
38308473 1411 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1412 err = 0;
d8d1f30b 1413 dst_release(&grt->dst);
1da177e4
LT
1414
1415 if (err)
1416 goto out;
1417 }
1418 err = -EINVAL;
38308473 1419 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1420 goto out;
1421 }
1422
1423 err = -ENODEV;
38308473 1424 if (!dev)
1da177e4
LT
1425 goto out;
1426
c3968a85
DW
1427 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1428 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1429 err = -EINVAL;
1430 goto out;
1431 }
4e3fd7a0 1432 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1433 rt->rt6i_prefsrc.plen = 128;
1434 } else
1435 rt->rt6i_prefsrc.plen = 0;
1436
86872cb5 1437 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1438 err = rt6_bind_neighbour(rt, dev);
f83c7790 1439 if (err)
1da177e4 1440 goto out;
1da177e4
LT
1441 }
1442
86872cb5 1443 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1444
1445install_route:
86872cb5
TG
1446 if (cfg->fc_mx) {
1447 struct nlattr *nla;
1448 int remaining;
1449
1450 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1451 int type = nla_type(nla);
86872cb5
TG
1452
1453 if (type) {
1454 if (type > RTAX_MAX) {
1da177e4
LT
1455 err = -EINVAL;
1456 goto out;
1457 }
86872cb5 1458
defb3519 1459 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1460 }
1da177e4
LT
1461 }
1462 }
1463
d8d1f30b 1464 rt->dst.dev = dev;
1da177e4 1465 rt->rt6i_idev = idev;
c71099ac 1466 rt->rt6i_table = table;
63152fc0 1467
c346dca1 1468 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1469
86872cb5 1470 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1471
1472out:
1473 if (dev)
1474 dev_put(dev);
1475 if (idev)
1476 in6_dev_put(idev);
1477 if (rt)
d8d1f30b 1478 dst_free(&rt->dst);
1da177e4
LT
1479 return err;
1480}
1481
86872cb5 1482static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1483{
1484 int err;
c71099ac 1485 struct fib6_table *table;
d1918542 1486 struct net *net = dev_net(rt->dst.dev);
1da177e4 1487
8ed67789 1488 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1489 return -ENOENT;
1490
c71099ac
TG
1491 table = rt->rt6i_table;
1492 write_lock_bh(&table->tb6_lock);
1da177e4 1493
86872cb5 1494 err = fib6_del(rt, info);
d8d1f30b 1495 dst_release(&rt->dst);
1da177e4 1496
c71099ac 1497 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1498
1499 return err;
1500}
1501
e0a1ad73
TG
1502int ip6_del_rt(struct rt6_info *rt)
1503{
4d1169c1 1504 struct nl_info info = {
d1918542 1505 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1506 };
528c4ceb 1507 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1508}
1509
86872cb5 1510static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1511{
c71099ac 1512 struct fib6_table *table;
1da177e4
LT
1513 struct fib6_node *fn;
1514 struct rt6_info *rt;
1515 int err = -ESRCH;
1516
5578689a 1517 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1518 if (!table)
c71099ac
TG
1519 return err;
1520
1521 read_lock_bh(&table->tb6_lock);
1da177e4 1522
c71099ac 1523 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1524 &cfg->fc_dst, cfg->fc_dst_len,
1525 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1526
1da177e4 1527 if (fn) {
d8d1f30b 1528 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1529 if (cfg->fc_ifindex &&
d1918542
DM
1530 (!rt->dst.dev ||
1531 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1532 continue;
86872cb5
TG
1533 if (cfg->fc_flags & RTF_GATEWAY &&
1534 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1535 continue;
86872cb5 1536 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1537 continue;
d8d1f30b 1538 dst_hold(&rt->dst);
c71099ac 1539 read_unlock_bh(&table->tb6_lock);
1da177e4 1540
86872cb5 1541 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1542 }
1543 }
c71099ac 1544 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1545
1546 return err;
1547}
1548
1549/*
1550 * Handle redirects
1551 */
a6279458 1552struct ip6rd_flowi {
4c9483b2 1553 struct flowi6 fl6;
a6279458
YH
1554 struct in6_addr gateway;
1555};
1556
8ed67789
DL
1557static struct rt6_info *__ip6_route_redirect(struct net *net,
1558 struct fib6_table *table,
4c9483b2 1559 struct flowi6 *fl6,
a6279458 1560 int flags)
1da177e4 1561{
4c9483b2 1562 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1563 struct rt6_info *rt;
e843b9e1 1564 struct fib6_node *fn;
c71099ac 1565
1da177e4 1566 /*
e843b9e1
YH
1567 * Get the "current" route for this destination and
1568 * check if the redirect has come from approriate router.
1569 *
1570 * RFC 2461 specifies that redirects should only be
1571 * accepted if they come from the nexthop to the target.
1572 * Due to the way the routes are chosen, this notion
1573 * is a bit fuzzy and one might need to check all possible
1574 * routes.
1da177e4 1575 */
1da177e4 1576
c71099ac 1577 read_lock_bh(&table->tb6_lock);
4c9483b2 1578 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1579restart:
d8d1f30b 1580 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1581 /*
1582 * Current route is on-link; redirect is always invalid.
1583 *
1584 * Seems, previous statement is not true. It could
1585 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586 * But then router serving it might decide, that we should
1587 * know truth 8)8) --ANK (980726).
1588 */
1589 if (rt6_check_expired(rt))
1590 continue;
1591 if (!(rt->rt6i_flags & RTF_GATEWAY))
1592 continue;
d1918542 1593 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1594 continue;
a6279458 1595 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1596 continue;
1597 break;
1598 }
a6279458 1599
cb15d9c2 1600 if (!rt)
8ed67789 1601 rt = net->ipv6.ip6_null_entry;
4c9483b2 1602 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1603out:
d8d1f30b 1604 dst_hold(&rt->dst);
a6279458 1605
c71099ac 1606 read_unlock_bh(&table->tb6_lock);
e843b9e1 1607
a6279458
YH
1608 return rt;
1609};
1610
b71d1d42
ED
1611static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1612 const struct in6_addr *src,
1613 const struct in6_addr *gateway,
a6279458
YH
1614 struct net_device *dev)
1615{
adaa70bb 1616 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1617 struct net *net = dev_net(dev);
a6279458 1618 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1619 .fl6 = {
1620 .flowi6_oif = dev->ifindex,
1621 .daddr = *dest,
1622 .saddr = *src,
a6279458 1623 },
a6279458 1624 };
adaa70bb 1625
4e3fd7a0 1626 rdfl.gateway = *gateway;
86c36ce4 1627
adaa70bb
TG
1628 if (rt6_need_strict(dest))
1629 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1630
4c9483b2 1631 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1632 flags, __ip6_route_redirect);
a6279458
YH
1633}
1634
b71d1d42
ED
1635void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1636 const struct in6_addr *saddr,
a6279458
YH
1637 struct neighbour *neigh, u8 *lladdr, int on_link)
1638{
1639 struct rt6_info *rt, *nrt = NULL;
1640 struct netevent_redirect netevent;
c346dca1 1641 struct net *net = dev_net(neigh->dev);
a6279458
YH
1642
1643 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1644
8ed67789 1645 if (rt == net->ipv6.ip6_null_entry) {
e87cc472 1646 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
a6279458 1647 goto out;
1da177e4
LT
1648 }
1649
1da177e4
LT
1650 /*
1651 * We have finally decided to accept it.
1652 */
1653
1ab1457c 1654 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1655 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1656 NEIGH_UPDATE_F_OVERRIDE|
1657 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1658 NEIGH_UPDATE_F_ISROUTER))
1659 );
1660
1661 /*
1662 * Redirect received -> path was valid.
1663 * Look, redirects are sent only in response to data packets,
1664 * so that this nexthop apparently is reachable. --ANK
1665 */
d8d1f30b 1666 dst_confirm(&rt->dst);
1da177e4
LT
1667
1668 /* Duplicate redirect: silently ignore. */
27217455 1669 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1670 goto out;
1671
21efcfa0 1672 nrt = ip6_rt_copy(rt, dest);
38308473 1673 if (!nrt)
1da177e4
LT
1674 goto out;
1675
1676 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1677 if (on_link)
1678 nrt->rt6i_flags &= ~RTF_GATEWAY;
1679
4e3fd7a0 1680 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1681 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1682
40e22e8f 1683 if (ip6_ins_rt(nrt))
1da177e4
LT
1684 goto out;
1685
d8d1f30b
CG
1686 netevent.old = &rt->dst;
1687 netevent.new = &nrt->dst;
8d71740c
TT
1688 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1689
38308473 1690 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1691 ip6_del_rt(rt);
1da177e4
LT
1692 return;
1693 }
1694
1695out:
d8d1f30b 1696 dst_release(&rt->dst);
1da177e4
LT
1697}
1698
1699/*
1700 * Handle ICMP "packet too big" messages
1701 * i.e. Path MTU discovery
1702 */
1703
b71d1d42 1704static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1705 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1706{
1707 struct rt6_info *rt, *nrt;
1708 int allfrag = 0;
d3052b55 1709again:
ae878ae2 1710 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1711 if (!rt)
1da177e4
LT
1712 return;
1713
d3052b55
AV
1714 if (rt6_check_expired(rt)) {
1715 ip6_del_rt(rt);
1716 goto again;
1717 }
1718
d8d1f30b 1719 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1720 goto out;
1721
1722 if (pmtu < IPV6_MIN_MTU) {
1723 /*
1ab1457c 1724 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1725 * MTU (1280) and a fragment header should always be included
1726 * after a node receiving Too Big message reporting PMTU is
1727 * less than the IPv6 Minimum Link MTU.
1728 */
1729 pmtu = IPV6_MIN_MTU;
1730 allfrag = 1;
1731 }
1732
1733 /* New mtu received -> path was valid.
1734 They are sent only in response to data packets,
1735 so that this nexthop apparently is reachable. --ANK
1736 */
d8d1f30b 1737 dst_confirm(&rt->dst);
1da177e4
LT
1738
1739 /* Host route. If it is static, it would be better
1740 not to override it, but add new one, so that
1741 when cache entry will expire old pmtu
1742 would return automatically.
1743 */
1744 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1745 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1746 if (allfrag) {
1747 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1748 features |= RTAX_FEATURE_ALLFRAG;
1749 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1750 }
1716a961
G
1751 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1752 rt->rt6i_flags |= RTF_MODIFIED;
1da177e4
LT
1753 goto out;
1754 }
1755
1756 /* Network route.
1757 Two cases are possible:
1758 1. It is connected route. Action: COW
1759 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1760 */
27217455 1761 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1762 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1763 else
1764 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1765
d5315b50 1766 if (nrt) {
defb3519
DM
1767 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1768 if (allfrag) {
1769 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1770 features |= RTAX_FEATURE_ALLFRAG;
1771 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1772 }
a1e78363
YH
1773
1774 /* According to RFC 1981, detecting PMTU increase shouldn't be
1775 * happened within 5 mins, the recommended timer is 10 mins.
1776 * Here this route expiration time is set to ip6_rt_mtu_expires
1777 * which is 10 mins. After 10 mins the decreased pmtu is expired
1778 * and detecting PMTU increase will be automatically happened.
1779 */
1716a961
G
1780 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1781 nrt->rt6i_flags |= RTF_DYNAMIC;
40e22e8f 1782 ip6_ins_rt(nrt);
1da177e4 1783 }
1da177e4 1784out:
d8d1f30b 1785 dst_release(&rt->dst);
1da177e4
LT
1786}
1787
b71d1d42 1788void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1789 struct net_device *dev, u32 pmtu)
1790{
1791 struct net *net = dev_net(dev);
1792
1793 /*
1794 * RFC 1981 states that a node "MUST reduce the size of the packets it
1795 * is sending along the path" that caused the Packet Too Big message.
1796 * Since it's not possible in the general case to determine which
1797 * interface was used to send the original packet, we update the MTU
1798 * on the interface that will be used to send future packets. We also
1799 * update the MTU on the interface that received the Packet Too Big in
1800 * case the original packet was forced out that interface with
1801 * SO_BINDTODEVICE or similar. This is the next best thing to the
1802 * correct behaviour, which would be to update the MTU on all
1803 * interfaces.
1804 */
1805 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1806 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1807}
1808
1da177e4
LT
1809/*
1810 * Misc support functions
1811 */
1812
1716a961 1813static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1814 const struct in6_addr *dest)
1da177e4 1815{
d1918542 1816 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1817 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1818 ort->dst.dev, 0);
1da177e4
LT
1819
1820 if (rt) {
d8d1f30b
CG
1821 rt->dst.input = ort->dst.input;
1822 rt->dst.output = ort->dst.output;
8e2ec639 1823 rt->dst.flags |= DST_HOST;
d8d1f30b 1824
4e3fd7a0 1825 rt->rt6i_dst.addr = *dest;
8e2ec639 1826 rt->rt6i_dst.plen = 128;
defb3519 1827 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1828 rt->dst.error = ort->dst.error;
1da177e4
LT
1829 rt->rt6i_idev = ort->rt6i_idev;
1830 if (rt->rt6i_idev)
1831 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1832 rt->dst.lastuse = jiffies;
1da177e4 1833
4e3fd7a0 1834 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1835 rt->rt6i_flags = ort->rt6i_flags;
1836 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1837 (RTF_DEFAULT | RTF_ADDRCONF))
1838 rt6_set_from(rt, ort);
1839 else
1840 rt6_clean_expires(rt);
1da177e4
LT
1841 rt->rt6i_metric = 0;
1842
1da177e4
LT
1843#ifdef CONFIG_IPV6_SUBTREES
1844 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1845#endif
0f6c6392 1846 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1847 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1848 }
1849 return rt;
1850}
1851
70ceb4f5 1852#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1853static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1854 const struct in6_addr *prefix, int prefixlen,
1855 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1856{
1857 struct fib6_node *fn;
1858 struct rt6_info *rt = NULL;
c71099ac
TG
1859 struct fib6_table *table;
1860
efa2cea0 1861 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1862 if (!table)
c71099ac 1863 return NULL;
70ceb4f5 1864
c71099ac
TG
1865 write_lock_bh(&table->tb6_lock);
1866 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1867 if (!fn)
1868 goto out;
1869
d8d1f30b 1870 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1871 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1872 continue;
1873 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1874 continue;
1875 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1876 continue;
d8d1f30b 1877 dst_hold(&rt->dst);
70ceb4f5
YH
1878 break;
1879 }
1880out:
c71099ac 1881 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1882 return rt;
1883}
1884
efa2cea0 1885static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1886 const struct in6_addr *prefix, int prefixlen,
1887 const struct in6_addr *gwaddr, int ifindex,
95c96174 1888 unsigned int pref)
70ceb4f5 1889{
86872cb5
TG
1890 struct fib6_config cfg = {
1891 .fc_table = RT6_TABLE_INFO,
238fc7ea 1892 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1893 .fc_ifindex = ifindex,
1894 .fc_dst_len = prefixlen,
1895 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1896 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1897 .fc_nlinfo.pid = 0,
1898 .fc_nlinfo.nlh = NULL,
1899 .fc_nlinfo.nl_net = net,
86872cb5
TG
1900 };
1901
4e3fd7a0
AD
1902 cfg.fc_dst = *prefix;
1903 cfg.fc_gateway = *gwaddr;
70ceb4f5 1904
e317da96
YH
1905 /* We should treat it as a default route if prefix length is 0. */
1906 if (!prefixlen)
86872cb5 1907 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1908
86872cb5 1909 ip6_route_add(&cfg);
70ceb4f5 1910
efa2cea0 1911 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1912}
1913#endif
1914
b71d1d42 1915struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1916{
1da177e4 1917 struct rt6_info *rt;
c71099ac 1918 struct fib6_table *table;
1da177e4 1919
c346dca1 1920 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1921 if (!table)
c71099ac 1922 return NULL;
1da177e4 1923
c71099ac 1924 write_lock_bh(&table->tb6_lock);
d8d1f30b 1925 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1926 if (dev == rt->dst.dev &&
045927ff 1927 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1928 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1929 break;
1930 }
1931 if (rt)
d8d1f30b 1932 dst_hold(&rt->dst);
c71099ac 1933 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1934 return rt;
1935}
1936
b71d1d42 1937struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1938 struct net_device *dev,
1939 unsigned int pref)
1da177e4 1940{
86872cb5
TG
1941 struct fib6_config cfg = {
1942 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1943 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1944 .fc_ifindex = dev->ifindex,
1945 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1946 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1947 .fc_nlinfo.pid = 0,
1948 .fc_nlinfo.nlh = NULL,
c346dca1 1949 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1950 };
1da177e4 1951
4e3fd7a0 1952 cfg.fc_gateway = *gwaddr;
1da177e4 1953
86872cb5 1954 ip6_route_add(&cfg);
1da177e4 1955
1da177e4
LT
1956 return rt6_get_dflt_router(gwaddr, dev);
1957}
1958
7b4da532 1959void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1960{
1961 struct rt6_info *rt;
c71099ac
TG
1962 struct fib6_table *table;
1963
1964 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1965 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1966 if (!table)
c71099ac 1967 return;
1da177e4
LT
1968
1969restart:
c71099ac 1970 read_lock_bh(&table->tb6_lock);
d8d1f30b 1971 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1972 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1973 dst_hold(&rt->dst);
c71099ac 1974 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1975 ip6_del_rt(rt);
1da177e4
LT
1976 goto restart;
1977 }
1978 }
c71099ac 1979 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1980}
1981
5578689a
DL
1982static void rtmsg_to_fib6_config(struct net *net,
1983 struct in6_rtmsg *rtmsg,
86872cb5
TG
1984 struct fib6_config *cfg)
1985{
1986 memset(cfg, 0, sizeof(*cfg));
1987
1988 cfg->fc_table = RT6_TABLE_MAIN;
1989 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1990 cfg->fc_metric = rtmsg->rtmsg_metric;
1991 cfg->fc_expires = rtmsg->rtmsg_info;
1992 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1993 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1994 cfg->fc_flags = rtmsg->rtmsg_flags;
1995
5578689a 1996 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1997
4e3fd7a0
AD
1998 cfg->fc_dst = rtmsg->rtmsg_dst;
1999 cfg->fc_src = rtmsg->rtmsg_src;
2000 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2001}
2002
5578689a 2003int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2004{
86872cb5 2005 struct fib6_config cfg;
1da177e4
LT
2006 struct in6_rtmsg rtmsg;
2007 int err;
2008
2009 switch(cmd) {
2010 case SIOCADDRT: /* Add a route */
2011 case SIOCDELRT: /* Delete a route */
2012 if (!capable(CAP_NET_ADMIN))
2013 return -EPERM;
2014 err = copy_from_user(&rtmsg, arg,
2015 sizeof(struct in6_rtmsg));
2016 if (err)
2017 return -EFAULT;
86872cb5 2018
5578689a 2019 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2020
1da177e4
LT
2021 rtnl_lock();
2022 switch (cmd) {
2023 case SIOCADDRT:
86872cb5 2024 err = ip6_route_add(&cfg);
1da177e4
LT
2025 break;
2026 case SIOCDELRT:
86872cb5 2027 err = ip6_route_del(&cfg);
1da177e4
LT
2028 break;
2029 default:
2030 err = -EINVAL;
2031 }
2032 rtnl_unlock();
2033
2034 return err;
3ff50b79 2035 }
1da177e4
LT
2036
2037 return -EINVAL;
2038}
2039
2040/*
2041 * Drop the packet on the floor
2042 */
2043
d5fdd6ba 2044static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2045{
612f09e8 2046 int type;
adf30907 2047 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2048 switch (ipstats_mib_noroutes) {
2049 case IPSTATS_MIB_INNOROUTES:
0660e03f 2050 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2051 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2052 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2053 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2054 break;
2055 }
2056 /* FALLTHROUGH */
2057 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2058 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2059 ipstats_mib_noroutes);
612f09e8
YH
2060 break;
2061 }
3ffe533c 2062 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2063 kfree_skb(skb);
2064 return 0;
2065}
2066
9ce8ade0
TG
2067static int ip6_pkt_discard(struct sk_buff *skb)
2068{
612f09e8 2069 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2070}
2071
20380731 2072static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2073{
adf30907 2074 skb->dev = skb_dst(skb)->dev;
612f09e8 2075 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2076}
2077
6723ab54
DM
2078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2079
9ce8ade0
TG
2080static int ip6_pkt_prohibit(struct sk_buff *skb)
2081{
612f09e8 2082 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2083}
2084
2085static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2086{
adf30907 2087 skb->dev = skb_dst(skb)->dev;
612f09e8 2088 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2089}
2090
6723ab54
DM
2091#endif
2092
1da177e4
LT
2093/*
2094 * Allocate a dst for local (unicast / anycast) address.
2095 */
2096
2097struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2098 const struct in6_addr *addr,
8f031519 2099 bool anycast)
1da177e4 2100{
c346dca1 2101 struct net *net = dev_net(idev->dev);
5c1e6aa3 2102 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2103 net->loopback_dev, 0);
f83c7790 2104 int err;
1da177e4 2105
38308473 2106 if (!rt) {
f3213831 2107 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2108 return ERR_PTR(-ENOMEM);
40385653 2109 }
1da177e4 2110
1da177e4
LT
2111 in6_dev_hold(idev);
2112
11d53b49 2113 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2114 rt->dst.input = ip6_input;
2115 rt->dst.output = ip6_output;
1da177e4 2116 rt->rt6i_idev = idev;
d8d1f30b 2117 rt->dst.obsolete = -1;
1da177e4
LT
2118
2119 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2120 if (anycast)
2121 rt->rt6i_flags |= RTF_ANYCAST;
2122 else
1da177e4 2123 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2124 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2125 if (err) {
d8d1f30b 2126 dst_free(&rt->dst);
f83c7790 2127 return ERR_PTR(err);
1da177e4
LT
2128 }
2129
4e3fd7a0 2130 rt->rt6i_dst.addr = *addr;
1da177e4 2131 rt->rt6i_dst.plen = 128;
5578689a 2132 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2133
d8d1f30b 2134 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2135
2136 return rt;
2137}
2138
c3968a85
DW
2139int ip6_route_get_saddr(struct net *net,
2140 struct rt6_info *rt,
b71d1d42 2141 const struct in6_addr *daddr,
c3968a85
DW
2142 unsigned int prefs,
2143 struct in6_addr *saddr)
2144{
2145 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2146 int err = 0;
2147 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2148 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2149 else
2150 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2151 daddr, prefs, saddr);
2152 return err;
2153}
2154
2155/* remove deleted ip from prefsrc entries */
2156struct arg_dev_net_ip {
2157 struct net_device *dev;
2158 struct net *net;
2159 struct in6_addr *addr;
2160};
2161
2162static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2163{
2164 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2165 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2166 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2167
d1918542 2168 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2169 rt != net->ipv6.ip6_null_entry &&
2170 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2171 /* remove prefsrc entry */
2172 rt->rt6i_prefsrc.plen = 0;
2173 }
2174 return 0;
2175}
2176
2177void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2178{
2179 struct net *net = dev_net(ifp->idev->dev);
2180 struct arg_dev_net_ip adni = {
2181 .dev = ifp->idev->dev,
2182 .net = net,
2183 .addr = &ifp->addr,
2184 };
2185 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2186}
2187
8ed67789
DL
2188struct arg_dev_net {
2189 struct net_device *dev;
2190 struct net *net;
2191};
2192
1da177e4
LT
2193static int fib6_ifdown(struct rt6_info *rt, void *arg)
2194{
bc3ef660 2195 const struct arg_dev_net *adn = arg;
2196 const struct net_device *dev = adn->dev;
8ed67789 2197
d1918542 2198 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2199 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2200 return -1;
c159d30c 2201
1da177e4
LT
2202 return 0;
2203}
2204
f3db4851 2205void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2206{
8ed67789
DL
2207 struct arg_dev_net adn = {
2208 .dev = dev,
2209 .net = net,
2210 };
2211
2212 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2213 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2214}
2215
95c96174 2216struct rt6_mtu_change_arg {
1da177e4 2217 struct net_device *dev;
95c96174 2218 unsigned int mtu;
1da177e4
LT
2219};
2220
2221static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2222{
2223 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2224 struct inet6_dev *idev;
2225
2226 /* In IPv6 pmtu discovery is not optional,
2227 so that RTAX_MTU lock cannot disable it.
2228 We still use this lock to block changes
2229 caused by addrconf/ndisc.
2230 */
2231
2232 idev = __in6_dev_get(arg->dev);
38308473 2233 if (!idev)
1da177e4
LT
2234 return 0;
2235
2236 /* For administrative MTU increase, there is no way to discover
2237 IPv6 PMTU increase, so PMTU increase should be updated here.
2238 Since RFC 1981 doesn't include administrative MTU increase
2239 update PMTU increase is a MUST. (i.e. jumbo frame)
2240 */
2241 /*
2242 If new MTU is less than route PMTU, this new MTU will be the
2243 lowest MTU in the path, update the route PMTU to reflect PMTU
2244 decreases; if new MTU is greater than route PMTU, and the
2245 old MTU is the lowest MTU in the path, update the route PMTU
2246 to reflect the increase. In this case if the other nodes' MTU
2247 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2248 PMTU discouvery.
2249 */
d1918542 2250 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2251 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2252 (dst_mtu(&rt->dst) >= arg->mtu ||
2253 (dst_mtu(&rt->dst) < arg->mtu &&
2254 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2255 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2256 }
1da177e4
LT
2257 return 0;
2258}
2259
95c96174 2260void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2261{
c71099ac
TG
2262 struct rt6_mtu_change_arg arg = {
2263 .dev = dev,
2264 .mtu = mtu,
2265 };
1da177e4 2266
c346dca1 2267 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2268}
2269
ef7c79ed 2270static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2271 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2272 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2273 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2274 [RTA_PRIORITY] = { .type = NLA_U32 },
2275 [RTA_METRICS] = { .type = NLA_NESTED },
2276};
2277
2278static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2279 struct fib6_config *cfg)
1da177e4 2280{
86872cb5
TG
2281 struct rtmsg *rtm;
2282 struct nlattr *tb[RTA_MAX+1];
2283 int err;
1da177e4 2284
86872cb5
TG
2285 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2286 if (err < 0)
2287 goto errout;
1da177e4 2288
86872cb5
TG
2289 err = -EINVAL;
2290 rtm = nlmsg_data(nlh);
2291 memset(cfg, 0, sizeof(*cfg));
2292
2293 cfg->fc_table = rtm->rtm_table;
2294 cfg->fc_dst_len = rtm->rtm_dst_len;
2295 cfg->fc_src_len = rtm->rtm_src_len;
2296 cfg->fc_flags = RTF_UP;
2297 cfg->fc_protocol = rtm->rtm_protocol;
2298
2299 if (rtm->rtm_type == RTN_UNREACHABLE)
2300 cfg->fc_flags |= RTF_REJECT;
2301
ab79ad14
2302 if (rtm->rtm_type == RTN_LOCAL)
2303 cfg->fc_flags |= RTF_LOCAL;
2304
86872cb5
TG
2305 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2306 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2307 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2308
2309 if (tb[RTA_GATEWAY]) {
2310 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2311 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2312 }
86872cb5
TG
2313
2314 if (tb[RTA_DST]) {
2315 int plen = (rtm->rtm_dst_len + 7) >> 3;
2316
2317 if (nla_len(tb[RTA_DST]) < plen)
2318 goto errout;
2319
2320 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2321 }
86872cb5
TG
2322
2323 if (tb[RTA_SRC]) {
2324 int plen = (rtm->rtm_src_len + 7) >> 3;
2325
2326 if (nla_len(tb[RTA_SRC]) < plen)
2327 goto errout;
2328
2329 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2330 }
86872cb5 2331
c3968a85
DW
2332 if (tb[RTA_PREFSRC])
2333 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2334
86872cb5
TG
2335 if (tb[RTA_OIF])
2336 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2337
2338 if (tb[RTA_PRIORITY])
2339 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2340
2341 if (tb[RTA_METRICS]) {
2342 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2343 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2344 }
86872cb5
TG
2345
2346 if (tb[RTA_TABLE])
2347 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2348
2349 err = 0;
2350errout:
2351 return err;
1da177e4
LT
2352}
2353
c127ea2c 2354static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2355{
86872cb5
TG
2356 struct fib6_config cfg;
2357 int err;
1da177e4 2358
86872cb5
TG
2359 err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 if (err < 0)
2361 return err;
2362
2363 return ip6_route_del(&cfg);
1da177e4
LT
2364}
2365
c127ea2c 2366static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2367{
86872cb5
TG
2368 struct fib6_config cfg;
2369 int err;
1da177e4 2370
86872cb5
TG
2371 err = rtm_to_fib6_config(skb, nlh, &cfg);
2372 if (err < 0)
2373 return err;
2374
2375 return ip6_route_add(&cfg);
1da177e4
LT
2376}
2377
339bf98f
TG
2378static inline size_t rt6_nlmsg_size(void)
2379{
2380 return NLMSG_ALIGN(sizeof(struct rtmsg))
2381 + nla_total_size(16) /* RTA_SRC */
2382 + nla_total_size(16) /* RTA_DST */
2383 + nla_total_size(16) /* RTA_GATEWAY */
2384 + nla_total_size(16) /* RTA_PREFSRC */
2385 + nla_total_size(4) /* RTA_TABLE */
2386 + nla_total_size(4) /* RTA_IIF */
2387 + nla_total_size(4) /* RTA_OIF */
2388 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2389 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2390 + nla_total_size(sizeof(struct rta_cacheinfo));
2391}
2392
191cd582
BH
2393static int rt6_fill_node(struct net *net,
2394 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2395 struct in6_addr *dst, struct in6_addr *src,
2396 int iif, int type, u32 pid, u32 seq,
7bc570c8 2397 int prefix, int nowait, unsigned int flags)
1da177e4 2398{
346f870b 2399 const struct inet_peer *peer;
1da177e4 2400 struct rtmsg *rtm;
2d7202bf 2401 struct nlmsghdr *nlh;
e3703b3d 2402 long expires;
9e762a4a 2403 u32 table;
f2c31e32 2404 struct neighbour *n;
346f870b 2405 u32 ts, tsage;
1da177e4
LT
2406
2407 if (prefix) { /* user wants prefix routes only */
2408 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2409 /* success since this is not a prefix route */
2410 return 1;
2411 }
2412 }
2413
2d7202bf 2414 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2415 if (!nlh)
26932566 2416 return -EMSGSIZE;
2d7202bf
TG
2417
2418 rtm = nlmsg_data(nlh);
1da177e4
LT
2419 rtm->rtm_family = AF_INET6;
2420 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2421 rtm->rtm_src_len = rt->rt6i_src.plen;
2422 rtm->rtm_tos = 0;
c71099ac 2423 if (rt->rt6i_table)
9e762a4a 2424 table = rt->rt6i_table->tb6_id;
c71099ac 2425 else
9e762a4a
PM
2426 table = RT6_TABLE_UNSPEC;
2427 rtm->rtm_table = table;
c78679e8
DM
2428 if (nla_put_u32(skb, RTA_TABLE, table))
2429 goto nla_put_failure;
38308473 2430 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2431 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2432 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2433 rtm->rtm_type = RTN_LOCAL;
d1918542 2434 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2435 rtm->rtm_type = RTN_LOCAL;
2436 else
2437 rtm->rtm_type = RTN_UNICAST;
2438 rtm->rtm_flags = 0;
2439 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2440 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2441 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2442 rtm->rtm_protocol = RTPROT_REDIRECT;
2443 else if (rt->rt6i_flags & RTF_ADDRCONF)
2444 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2445 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2446 rtm->rtm_protocol = RTPROT_RA;
2447
38308473 2448 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2449 rtm->rtm_flags |= RTM_F_CLONED;
2450
2451 if (dst) {
c78679e8
DM
2452 if (nla_put(skb, RTA_DST, 16, dst))
2453 goto nla_put_failure;
1ab1457c 2454 rtm->rtm_dst_len = 128;
1da177e4 2455 } else if (rtm->rtm_dst_len)
c78679e8
DM
2456 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2457 goto nla_put_failure;
1da177e4
LT
2458#ifdef CONFIG_IPV6_SUBTREES
2459 if (src) {
c78679e8
DM
2460 if (nla_put(skb, RTA_SRC, 16, src))
2461 goto nla_put_failure;
1ab1457c 2462 rtm->rtm_src_len = 128;
c78679e8
DM
2463 } else if (rtm->rtm_src_len &&
2464 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2465 goto nla_put_failure;
1da177e4 2466#endif
7bc570c8
YH
2467 if (iif) {
2468#ifdef CONFIG_IPV6_MROUTE
2469 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2470 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2471 if (err <= 0) {
2472 if (!nowait) {
2473 if (err == 0)
2474 return 0;
2475 goto nla_put_failure;
2476 } else {
2477 if (err == -EMSGSIZE)
2478 goto nla_put_failure;
2479 }
2480 }
2481 } else
2482#endif
c78679e8
DM
2483 if (nla_put_u32(skb, RTA_IIF, iif))
2484 goto nla_put_failure;
7bc570c8 2485 } else if (dst) {
1da177e4 2486 struct in6_addr saddr_buf;
c78679e8
DM
2487 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2488 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2489 goto nla_put_failure;
1da177e4 2490 }
2d7202bf 2491
c3968a85
DW
2492 if (rt->rt6i_prefsrc.plen) {
2493 struct in6_addr saddr_buf;
4e3fd7a0 2494 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2495 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2496 goto nla_put_failure;
c3968a85
DW
2497 }
2498
defb3519 2499 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2500 goto nla_put_failure;
2501
f2c31e32 2502 rcu_read_lock();
27217455 2503 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2504 if (n) {
2505 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2506 rcu_read_unlock();
2507 goto nla_put_failure;
2508 }
2509 }
f2c31e32 2510 rcu_read_unlock();
2d7202bf 2511
c78679e8
DM
2512 if (rt->dst.dev &&
2513 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2514 goto nla_put_failure;
2515 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2516 goto nla_put_failure;
36e3deae
YH
2517 if (!(rt->rt6i_flags & RTF_EXPIRES))
2518 expires = 0;
d1918542
DM
2519 else if (rt->dst.expires - jiffies < INT_MAX)
2520 expires = rt->dst.expires - jiffies;
36e3deae
YH
2521 else
2522 expires = INT_MAX;
69cdf8f9 2523
346f870b
DM
2524 peer = rt->rt6i_peer;
2525 ts = tsage = 0;
2526 if (peer && peer->tcp_ts_stamp) {
2527 ts = peer->tcp_ts;
2528 tsage = get_seconds() - peer->tcp_ts_stamp;
2529 }
2530
2531 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2532 expires, rt->dst.error) < 0)
e3703b3d 2533 goto nla_put_failure;
2d7202bf
TG
2534
2535 return nlmsg_end(skb, nlh);
2536
2537nla_put_failure:
26932566
PM
2538 nlmsg_cancel(skb, nlh);
2539 return -EMSGSIZE;
1da177e4
LT
2540}
2541
1b43af54 2542int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2543{
2544 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2545 int prefix;
2546
2d7202bf
TG
2547 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2548 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2549 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2550 } else
2551 prefix = 0;
2552
191cd582
BH
2553 return rt6_fill_node(arg->net,
2554 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2555 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2556 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2557}
2558
c127ea2c 2559static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2560{
3b1e0a65 2561 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2562 struct nlattr *tb[RTA_MAX+1];
2563 struct rt6_info *rt;
1da177e4 2564 struct sk_buff *skb;
ab364a6f 2565 struct rtmsg *rtm;
4c9483b2 2566 struct flowi6 fl6;
72331bc0 2567 int err, iif = 0, oif = 0;
1da177e4 2568
ab364a6f
TG
2569 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2570 if (err < 0)
2571 goto errout;
1da177e4 2572
ab364a6f 2573 err = -EINVAL;
4c9483b2 2574 memset(&fl6, 0, sizeof(fl6));
1da177e4 2575
ab364a6f
TG
2576 if (tb[RTA_SRC]) {
2577 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2578 goto errout;
2579
4e3fd7a0 2580 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2581 }
2582
2583 if (tb[RTA_DST]) {
2584 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2585 goto errout;
2586
4e3fd7a0 2587 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2588 }
2589
2590 if (tb[RTA_IIF])
2591 iif = nla_get_u32(tb[RTA_IIF]);
2592
2593 if (tb[RTA_OIF])
72331bc0 2594 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2595
2596 if (iif) {
2597 struct net_device *dev;
72331bc0
SL
2598 int flags = 0;
2599
5578689a 2600 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2601 if (!dev) {
2602 err = -ENODEV;
ab364a6f 2603 goto errout;
1da177e4 2604 }
72331bc0
SL
2605
2606 fl6.flowi6_iif = iif;
2607
2608 if (!ipv6_addr_any(&fl6.saddr))
2609 flags |= RT6_LOOKUP_F_HAS_SADDR;
2610
2611 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2612 flags);
2613 } else {
2614 fl6.flowi6_oif = oif;
2615
2616 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2617 }
2618
ab364a6f 2619 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2620 if (!skb) {
2173bff5 2621 dst_release(&rt->dst);
ab364a6f
TG
2622 err = -ENOBUFS;
2623 goto errout;
2624 }
1da177e4 2625
ab364a6f
TG
2626 /* Reserve room for dummy headers, this skb can pass
2627 through good chunk of routing engine.
2628 */
459a98ed 2629 skb_reset_mac_header(skb);
ab364a6f 2630 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2631
d8d1f30b 2632 skb_dst_set(skb, &rt->dst);
1da177e4 2633
4c9483b2 2634 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2635 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2636 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2637 if (err < 0) {
ab364a6f
TG
2638 kfree_skb(skb);
2639 goto errout;
1da177e4
LT
2640 }
2641
5578689a 2642 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2643errout:
1da177e4 2644 return err;
1da177e4
LT
2645}
2646
86872cb5 2647void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2648{
2649 struct sk_buff *skb;
5578689a 2650 struct net *net = info->nl_net;
528c4ceb
DL
2651 u32 seq;
2652 int err;
2653
2654 err = -ENOBUFS;
38308473 2655 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2656
339bf98f 2657 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2658 if (!skb)
21713ebc
TG
2659 goto errout;
2660
191cd582 2661 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2662 event, info->pid, seq, 0, 0, 0);
26932566
PM
2663 if (err < 0) {
2664 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2665 WARN_ON(err == -EMSGSIZE);
2666 kfree_skb(skb);
2667 goto errout;
2668 }
1ce85fe4
PNA
2669 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2670 info->nlh, gfp_any());
2671 return;
21713ebc
TG
2672errout:
2673 if (err < 0)
5578689a 2674 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2675}
2676
8ed67789
DL
2677static int ip6_route_dev_notify(struct notifier_block *this,
2678 unsigned long event, void *data)
2679{
2680 struct net_device *dev = (struct net_device *)data;
c346dca1 2681 struct net *net = dev_net(dev);
8ed67789
DL
2682
2683 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2684 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2685 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2687 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2688 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2689 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2690 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2691#endif
2692 }
2693
2694 return NOTIFY_OK;
2695}
2696
1da177e4
LT
2697/*
2698 * /proc
2699 */
2700
2701#ifdef CONFIG_PROC_FS
2702
1da177e4
LT
2703struct rt6_proc_arg
2704{
2705 char *buffer;
2706 int offset;
2707 int length;
2708 int skip;
2709 int len;
2710};
2711
2712static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2713{
33120b30 2714 struct seq_file *m = p_arg;
69cce1d1 2715 struct neighbour *n;
1da177e4 2716
4b7a4274 2717 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2718
2719#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2720 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2721#else
33120b30 2722 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2723#endif
f2c31e32 2724 rcu_read_lock();
27217455 2725 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2726 if (n) {
2727 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2728 } else {
33120b30 2729 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2730 }
f2c31e32 2731 rcu_read_unlock();
33120b30 2732 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2733 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2734 rt->dst.__use, rt->rt6i_flags,
d1918542 2735 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2736 return 0;
2737}
2738
33120b30 2739static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2740{
f3db4851 2741 struct net *net = (struct net *)m->private;
32b293a5 2742 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2743 return 0;
2744}
1da177e4 2745
33120b30
AD
2746static int ipv6_route_open(struct inode *inode, struct file *file)
2747{
de05c557 2748 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2749}
2750
33120b30
AD
2751static const struct file_operations ipv6_route_proc_fops = {
2752 .owner = THIS_MODULE,
2753 .open = ipv6_route_open,
2754 .read = seq_read,
2755 .llseek = seq_lseek,
b6fcbdb4 2756 .release = single_release_net,
33120b30
AD
2757};
2758
1da177e4
LT
2759static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2760{
69ddb805 2761 struct net *net = (struct net *)seq->private;
1da177e4 2762 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2763 net->ipv6.rt6_stats->fib_nodes,
2764 net->ipv6.rt6_stats->fib_route_nodes,
2765 net->ipv6.rt6_stats->fib_rt_alloc,
2766 net->ipv6.rt6_stats->fib_rt_entries,
2767 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2768 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2769 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2770
2771 return 0;
2772}
2773
2774static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2775{
de05c557 2776 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2777}
2778
9a32144e 2779static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2780 .owner = THIS_MODULE,
2781 .open = rt6_stats_seq_open,
2782 .read = seq_read,
2783 .llseek = seq_lseek,
b6fcbdb4 2784 .release = single_release_net,
1da177e4
LT
2785};
2786#endif /* CONFIG_PROC_FS */
2787
2788#ifdef CONFIG_SYSCTL
2789
1da177e4 2790static
8d65af78 2791int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2792 void __user *buffer, size_t *lenp, loff_t *ppos)
2793{
c486da34
LAG
2794 struct net *net;
2795 int delay;
2796 if (!write)
1da177e4 2797 return -EINVAL;
c486da34
LAG
2798
2799 net = (struct net *)ctl->extra1;
2800 delay = net->ipv6.sysctl.flush_delay;
2801 proc_dointvec(ctl, write, buffer, lenp, ppos);
2802 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2803 return 0;
1da177e4
LT
2804}
2805
760f2d01 2806ctl_table ipv6_route_table_template[] = {
1ab1457c 2807 {
1da177e4 2808 .procname = "flush",
4990509f 2809 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2810 .maxlen = sizeof(int),
89c8b3a1 2811 .mode = 0200,
6d9f239a 2812 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2813 },
2814 {
1da177e4 2815 .procname = "gc_thresh",
9a7ec3a9 2816 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2817 .maxlen = sizeof(int),
2818 .mode = 0644,
6d9f239a 2819 .proc_handler = proc_dointvec,
1da177e4
LT
2820 },
2821 {
1da177e4 2822 .procname = "max_size",
4990509f 2823 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2824 .maxlen = sizeof(int),
2825 .mode = 0644,
6d9f239a 2826 .proc_handler = proc_dointvec,
1da177e4
LT
2827 },
2828 {
1da177e4 2829 .procname = "gc_min_interval",
4990509f 2830 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2831 .maxlen = sizeof(int),
2832 .mode = 0644,
6d9f239a 2833 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2834 },
2835 {
1da177e4 2836 .procname = "gc_timeout",
4990509f 2837 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2838 .maxlen = sizeof(int),
2839 .mode = 0644,
6d9f239a 2840 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2841 },
2842 {
1da177e4 2843 .procname = "gc_interval",
4990509f 2844 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2845 .maxlen = sizeof(int),
2846 .mode = 0644,
6d9f239a 2847 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2848 },
2849 {
1da177e4 2850 .procname = "gc_elasticity",
4990509f 2851 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2852 .maxlen = sizeof(int),
2853 .mode = 0644,
f3d3f616 2854 .proc_handler = proc_dointvec,
1da177e4
LT
2855 },
2856 {
1da177e4 2857 .procname = "mtu_expires",
4990509f 2858 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2859 .maxlen = sizeof(int),
2860 .mode = 0644,
6d9f239a 2861 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2862 },
2863 {
1da177e4 2864 .procname = "min_adv_mss",
4990509f 2865 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2866 .maxlen = sizeof(int),
2867 .mode = 0644,
f3d3f616 2868 .proc_handler = proc_dointvec,
1da177e4
LT
2869 },
2870 {
1da177e4 2871 .procname = "gc_min_interval_ms",
4990509f 2872 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2873 .maxlen = sizeof(int),
2874 .mode = 0644,
6d9f239a 2875 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2876 },
f8572d8f 2877 { }
1da177e4
LT
2878};
2879
2c8c1e72 2880struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2881{
2882 struct ctl_table *table;
2883
2884 table = kmemdup(ipv6_route_table_template,
2885 sizeof(ipv6_route_table_template),
2886 GFP_KERNEL);
5ee09105
YH
2887
2888 if (table) {
2889 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2890 table[0].extra1 = net;
86393e52 2891 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2892 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2893 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2894 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2895 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2896 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2897 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2898 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2899 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2900 }
2901
760f2d01
DL
2902 return table;
2903}
1da177e4
LT
2904#endif
2905
2c8c1e72 2906static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2907{
633d424b 2908 int ret = -ENOMEM;
8ed67789 2909
86393e52
AD
2910 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2911 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2912
fc66f95c
ED
2913 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2914 goto out_ip6_dst_ops;
2915
8ed67789
DL
2916 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2917 sizeof(*net->ipv6.ip6_null_entry),
2918 GFP_KERNEL);
2919 if (!net->ipv6.ip6_null_entry)
fc66f95c 2920 goto out_ip6_dst_entries;
d8d1f30b 2921 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2922 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2923 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2924 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2925 ip6_template_metrics, true);
8ed67789
DL
2926
2927#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2928 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2929 sizeof(*net->ipv6.ip6_prohibit_entry),
2930 GFP_KERNEL);
68fffc67
PZ
2931 if (!net->ipv6.ip6_prohibit_entry)
2932 goto out_ip6_null_entry;
d8d1f30b 2933 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2934 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2935 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2936 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2937 ip6_template_metrics, true);
8ed67789
DL
2938
2939 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2940 sizeof(*net->ipv6.ip6_blk_hole_entry),
2941 GFP_KERNEL);
68fffc67
PZ
2942 if (!net->ipv6.ip6_blk_hole_entry)
2943 goto out_ip6_prohibit_entry;
d8d1f30b 2944 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2945 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2946 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2947 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2948 ip6_template_metrics, true);
8ed67789
DL
2949#endif
2950
b339a47c
PZ
2951 net->ipv6.sysctl.flush_delay = 0;
2952 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2953 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2954 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2955 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2956 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2957 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2958 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2959
cdb18761
DL
2960#ifdef CONFIG_PROC_FS
2961 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2962 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2963#endif
6891a346
BT
2964 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2965
8ed67789
DL
2966 ret = 0;
2967out:
2968 return ret;
f2fc6a54 2969
68fffc67
PZ
2970#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2971out_ip6_prohibit_entry:
2972 kfree(net->ipv6.ip6_prohibit_entry);
2973out_ip6_null_entry:
2974 kfree(net->ipv6.ip6_null_entry);
2975#endif
fc66f95c
ED
2976out_ip6_dst_entries:
2977 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2978out_ip6_dst_ops:
f2fc6a54 2979 goto out;
cdb18761
DL
2980}
2981
2c8c1e72 2982static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2983{
2984#ifdef CONFIG_PROC_FS
2985 proc_net_remove(net, "ipv6_route");
2986 proc_net_remove(net, "rt6_stats");
2987#endif
8ed67789
DL
2988 kfree(net->ipv6.ip6_null_entry);
2989#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2990 kfree(net->ipv6.ip6_prohibit_entry);
2991 kfree(net->ipv6.ip6_blk_hole_entry);
2992#endif
41bb78b4 2993 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2994}
2995
2996static struct pernet_operations ip6_route_net_ops = {
2997 .init = ip6_route_net_init,
2998 .exit = ip6_route_net_exit,
2999};
3000
8ed67789
DL
3001static struct notifier_block ip6_route_dev_notifier = {
3002 .notifier_call = ip6_route_dev_notify,
3003 .priority = 0,
3004};
3005
433d49c3 3006int __init ip6_route_init(void)
1da177e4 3007{
433d49c3
DL
3008 int ret;
3009
9a7ec3a9
DL
3010 ret = -ENOMEM;
3011 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3012 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3013 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3014 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3015 goto out;
14e50e57 3016
fc66f95c 3017 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3018 if (ret)
bdb3289f 3019 goto out_kmem_cache;
bdb3289f 3020
2a0c451a 3021 ret = fib6_init();
fc66f95c
ED
3022 if (ret)
3023 goto out_dst_entries;
3024
2a0c451a
TG
3025 ret = register_pernet_subsys(&ip6_route_net_ops);
3026 if (ret)
3027 goto out_fib6_init;
3028
5dc121e9
AE
3029 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3030
8ed67789
DL
3031 /* Registering of the loopback is done before this portion of code,
3032 * the loopback reference in rt6_info will not be taken, do it
3033 * manually for init_net */
d8d1f30b 3034 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3035 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3036 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3037 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3038 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3039 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3040 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3041 #endif
2a0c451a 3042 ret = fib6_init_late();
433d49c3 3043 if (ret)
8ed67789 3044 goto out_register_subsys;
433d49c3 3045
433d49c3
DL
3046 ret = xfrm6_init();
3047 if (ret)
2a0c451a 3048 goto out_fib6_init_late;
c35b7e72 3049
433d49c3
DL
3050 ret = fib6_rules_init();
3051 if (ret)
3052 goto xfrm6_init;
7e5449c2 3053
433d49c3 3054 ret = -ENOBUFS;
c7ac8679
GR
3055 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3056 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3057 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3058 goto fib6_rules_init;
c127ea2c 3059
8ed67789 3060 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3061 if (ret)
3062 goto fib6_rules_init;
8ed67789 3063
433d49c3
DL
3064out:
3065 return ret;
3066
3067fib6_rules_init:
433d49c3
DL
3068 fib6_rules_cleanup();
3069xfrm6_init:
433d49c3 3070 xfrm6_fini();
2a0c451a
TG
3071out_fib6_init_late:
3072 fib6_cleanup_late();
8ed67789
DL
3073out_register_subsys:
3074 unregister_pernet_subsys(&ip6_route_net_ops);
2a0c451a
TG
3075out_fib6_init:
3076 fib6_gc_cleanup();
fc66f95c
ED
3077out_dst_entries:
3078 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3079out_kmem_cache:
f2fc6a54 3080 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3081 goto out;
1da177e4
LT
3082}
3083
3084void ip6_route_cleanup(void)
3085{
8ed67789 3086 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3087 fib6_rules_cleanup();
1da177e4 3088 xfrm6_fini();
1da177e4 3089 fib6_gc_cleanup();
8ed67789 3090 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3091 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3092 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3093}