ipv6: Export ndisc option parsing from ndisc.c
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
70ceb4f5 83#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 84static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
95c96174 87 unsigned int pref);
efa2cea0 88static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
91#endif
92
06582540
DM
93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
8e2ec639
YZ
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
fbfe95a4 102 peer = rt6_get_peer_create(rt);
06582540
DM
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121}
122
f894cbf8
DM
123static inline const void *choose_neigh_daddr(struct rt6_info *rt,
124 struct sk_buff *skb,
125 const void *daddr)
39232973
DM
126{
127 struct in6_addr *p = &rt->rt6i_gateway;
128
a7563f34 129 if (!ipv6_addr_any(p))
39232973 130 return (const void *) p;
f894cbf8
DM
131 else if (skb)
132 return &ipv6_hdr(skb)->daddr;
39232973
DM
133 return daddr;
134}
135
f894cbf8
DM
136static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
137 struct sk_buff *skb,
138 const void *daddr)
d3aaeb38 139{
39232973
DM
140 struct rt6_info *rt = (struct rt6_info *) dst;
141 struct neighbour *n;
142
f894cbf8 143 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 144 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
145 if (n)
146 return n;
147 return neigh_create(&nd_tbl, daddr, dst->dev);
148}
149
8ade06c6 150static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 151{
8ade06c6
DM
152 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
153 if (!n) {
154 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
155 if (IS_ERR(n))
156 return PTR_ERR(n);
157 }
97cac082 158 rt->n = n;
f83c7790
DM
159
160 return 0;
d3aaeb38
DM
161}
162
9a7ec3a9 163static struct dst_ops ip6_dst_ops_template = {
1da177e4 164 .family = AF_INET6,
09640e63 165 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
166 .gc = ip6_dst_gc,
167 .gc_thresh = 1024,
168 .check = ip6_dst_check,
0dbaee3b 169 .default_advmss = ip6_default_advmss,
ebb762f2 170 .mtu = ip6_mtu,
06582540 171 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
172 .destroy = ip6_dst_destroy,
173 .ifdown = ip6_dst_ifdown,
174 .negative_advice = ip6_negative_advice,
175 .link_failure = ip6_link_failure,
176 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 177 .local_out = __ip6_local_out,
d3aaeb38 178 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
179};
180
ebb762f2 181static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 182{
618f9bc7
SK
183 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
184
185 return mtu ? : dst->dev->mtu;
ec831ea7
RD
186}
187
14e50e57
DM
188static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
189{
190}
191
0972ddb2
HB
192static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
193 unsigned long old)
194{
195 return NULL;
196}
197
14e50e57
DM
198static struct dst_ops ip6_dst_blackhole_ops = {
199 .family = AF_INET6,
09640e63 200 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
201 .destroy = ip6_dst_destroy,
202 .check = ip6_dst_check,
ebb762f2 203 .mtu = ip6_blackhole_mtu,
214f45c9 204 .default_advmss = ip6_default_advmss,
14e50e57 205 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 206 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 207 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
208};
209
62fa8a84
DM
210static const u32 ip6_template_metrics[RTAX_MAX] = {
211 [RTAX_HOPLIMIT - 1] = 255,
212};
213
bdb3289f 214static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
215 .dst = {
216 .__refcnt = ATOMIC_INIT(1),
217 .__use = 1,
218 .obsolete = -1,
219 .error = -ENETUNREACH,
d8d1f30b
CG
220 .input = ip6_pkt_discard,
221 .output = ip6_pkt_discard_out,
1da177e4
LT
222 },
223 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 224 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
225 .rt6i_metric = ~(u32) 0,
226 .rt6i_ref = ATOMIC_INIT(1),
227};
228
101367c2
TG
229#ifdef CONFIG_IPV6_MULTIPLE_TABLES
230
6723ab54
DM
231static int ip6_pkt_prohibit(struct sk_buff *skb);
232static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 233
280a34c8 234static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
235 .dst = {
236 .__refcnt = ATOMIC_INIT(1),
237 .__use = 1,
238 .obsolete = -1,
239 .error = -EACCES,
d8d1f30b
CG
240 .input = ip6_pkt_prohibit,
241 .output = ip6_pkt_prohibit_out,
101367c2
TG
242 },
243 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 244 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
245 .rt6i_metric = ~(u32) 0,
246 .rt6i_ref = ATOMIC_INIT(1),
247};
248
bdb3289f 249static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
250 .dst = {
251 .__refcnt = ATOMIC_INIT(1),
252 .__use = 1,
253 .obsolete = -1,
254 .error = -EINVAL,
d8d1f30b
CG
255 .input = dst_discard,
256 .output = dst_discard,
101367c2
TG
257 },
258 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 259 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
260 .rt6i_metric = ~(u32) 0,
261 .rt6i_ref = ATOMIC_INIT(1),
262};
263
264#endif
265
1da177e4 266/* allocate dst with ip6_dst_ops */
97bab73f 267static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 268 struct net_device *dev,
8b96d22d
DM
269 int flags,
270 struct fib6_table *table)
1da177e4 271{
97bab73f
DM
272 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
273 0, 0, flags);
cf911662 274
97bab73f 275 if (rt) {
a2de86f6 276 memset(&rt->n, 0,
38308473 277 sizeof(*rt) - sizeof(struct dst_entry));
8b96d22d 278 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 279 }
cf911662 280 return rt;
1da177e4
LT
281}
282
283static void ip6_dst_destroy(struct dst_entry *dst)
284{
285 struct rt6_info *rt = (struct rt6_info *)dst;
286 struct inet6_dev *idev = rt->rt6i_idev;
287
97cac082
DM
288 if (rt->n)
289 neigh_release(rt->n);
290
8e2ec639
YZ
291 if (!(rt->dst.flags & DST_HOST))
292 dst_destroy_metrics_generic(dst);
293
38308473 294 if (idev) {
1da177e4
LT
295 rt->rt6i_idev = NULL;
296 in6_dev_put(idev);
1ab1457c 297 }
1716a961
G
298
299 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
300 dst_release(dst->from);
301
97bab73f
DM
302 if (rt6_has_peer(rt)) {
303 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
304 inet_putpeer(peer);
305 }
306}
307
6431cbc2
DM
308static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
309
310static u32 rt6_peer_genid(void)
311{
312 return atomic_read(&__rt6_peer_genid);
313}
314
b3419363
DM
315void rt6_bind_peer(struct rt6_info *rt, int create)
316{
97bab73f 317 struct inet_peer_base *base;
b3419363
DM
318 struct inet_peer *peer;
319
97bab73f
DM
320 base = inetpeer_base_ptr(rt->_rt6i_peer);
321 if (!base)
322 return;
323
324 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
325 if (peer) {
326 if (!rt6_set_peer(rt, peer))
327 inet_putpeer(peer);
328 else
329 rt->rt6i_peer_genid = rt6_peer_genid();
330 }
1da177e4
LT
331}
332
333static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
334 int how)
335{
336 struct rt6_info *rt = (struct rt6_info *)dst;
337 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 338 struct net_device *loopback_dev =
c346dca1 339 dev_net(dev)->loopback_dev;
1da177e4 340
97cac082
DM
341 if (dev != loopback_dev) {
342 if (idev && idev->dev == dev) {
343 struct inet6_dev *loopback_idev =
344 in6_dev_get(loopback_dev);
345 if (loopback_idev) {
346 rt->rt6i_idev = loopback_idev;
347 in6_dev_put(idev);
348 }
349 }
350 if (rt->n && rt->n->dev == dev) {
351 rt->n->dev = loopback_dev;
352 dev_hold(loopback_dev);
353 dev_put(dev);
1da177e4
LT
354 }
355 }
356}
357
a50feda5 358static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 359{
1716a961
G
360 struct rt6_info *ort = NULL;
361
362 if (rt->rt6i_flags & RTF_EXPIRES) {
363 if (time_after(jiffies, rt->dst.expires))
a50feda5 364 return true;
1716a961
G
365 } else if (rt->dst.from) {
366 ort = (struct rt6_info *) rt->dst.from;
367 return (ort->rt6i_flags & RTF_EXPIRES) &&
368 time_after(jiffies, ort->dst.expires);
369 }
a50feda5 370 return false;
1da177e4
LT
371}
372
a50feda5 373static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 374{
a02cec21
ED
375 return ipv6_addr_type(daddr) &
376 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
377}
378
1da177e4 379/*
c71099ac 380 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
381 */
382
8ed67789
DL
383static inline struct rt6_info *rt6_device_match(struct net *net,
384 struct rt6_info *rt,
b71d1d42 385 const struct in6_addr *saddr,
1da177e4 386 int oif,
d420895e 387 int flags)
1da177e4
LT
388{
389 struct rt6_info *local = NULL;
390 struct rt6_info *sprt;
391
dd3abc4e
YH
392 if (!oif && ipv6_addr_any(saddr))
393 goto out;
394
d8d1f30b 395 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 396 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
397
398 if (oif) {
1da177e4
LT
399 if (dev->ifindex == oif)
400 return sprt;
401 if (dev->flags & IFF_LOOPBACK) {
38308473 402 if (!sprt->rt6i_idev ||
1da177e4 403 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 404 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 405 continue;
1ab1457c 406 if (local && (!oif ||
1da177e4
LT
407 local->rt6i_idev->dev->ifindex == oif))
408 continue;
409 }
410 local = sprt;
411 }
dd3abc4e
YH
412 } else {
413 if (ipv6_chk_addr(net, saddr, dev,
414 flags & RT6_LOOKUP_F_IFACE))
415 return sprt;
1da177e4 416 }
dd3abc4e 417 }
1da177e4 418
dd3abc4e 419 if (oif) {
1da177e4
LT
420 if (local)
421 return local;
422
d420895e 423 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 424 return net->ipv6.ip6_null_entry;
1da177e4 425 }
dd3abc4e 426out:
1da177e4
LT
427 return rt;
428}
429
27097255
YH
430#ifdef CONFIG_IPV6_ROUTER_PREF
431static void rt6_probe(struct rt6_info *rt)
432{
f2c31e32 433 struct neighbour *neigh;
27097255
YH
434 /*
435 * Okay, this does not seem to be appropriate
436 * for now, however, we need to check if it
437 * is really so; aka Router Reachability Probing.
438 *
439 * Router Reachability Probe MUST be rate-limited
440 * to no more than one per minute.
441 */
f2c31e32 442 rcu_read_lock();
97cac082 443 neigh = rt ? rt->n : NULL;
27097255 444 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 445 goto out;
27097255
YH
446 read_lock_bh(&neigh->lock);
447 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 448 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
449 struct in6_addr mcaddr;
450 struct in6_addr *target;
451
452 neigh->updated = jiffies;
453 read_unlock_bh(&neigh->lock);
454
455 target = (struct in6_addr *)&neigh->primary_key;
456 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 457 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 458 } else {
27097255 459 read_unlock_bh(&neigh->lock);
f2c31e32
ED
460 }
461out:
462 rcu_read_unlock();
27097255
YH
463}
464#else
465static inline void rt6_probe(struct rt6_info *rt)
466{
27097255
YH
467}
468#endif
469
1da177e4 470/*
554cfb7e 471 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 472 */
b6f99a21 473static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 474{
d1918542 475 struct net_device *dev = rt->dst.dev;
161980f4 476 if (!oif || dev->ifindex == oif)
554cfb7e 477 return 2;
161980f4
DM
478 if ((dev->flags & IFF_LOOPBACK) &&
479 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
480 return 1;
481 return 0;
554cfb7e 482}
1da177e4 483
b6f99a21 484static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 485{
f2c31e32 486 struct neighbour *neigh;
398bcbeb 487 int m;
f2c31e32
ED
488
489 rcu_read_lock();
97cac082 490 neigh = rt->n;
4d0c5911
YH
491 if (rt->rt6i_flags & RTF_NONEXTHOP ||
492 !(rt->rt6i_flags & RTF_GATEWAY))
493 m = 1;
494 else if (neigh) {
554cfb7e
YH
495 read_lock_bh(&neigh->lock);
496 if (neigh->nud_state & NUD_VALID)
4d0c5911 497 m = 2;
398bcbeb
YH
498#ifdef CONFIG_IPV6_ROUTER_PREF
499 else if (neigh->nud_state & NUD_FAILED)
500 m = 0;
501#endif
502 else
ea73ee23 503 m = 1;
554cfb7e 504 read_unlock_bh(&neigh->lock);
398bcbeb
YH
505 } else
506 m = 0;
f2c31e32 507 rcu_read_unlock();
554cfb7e 508 return m;
1da177e4
LT
509}
510
554cfb7e
YH
511static int rt6_score_route(struct rt6_info *rt, int oif,
512 int strict)
1da177e4 513{
4d0c5911 514 int m, n;
1ab1457c 515
4d0c5911 516 m = rt6_check_dev(rt, oif);
77d16f45 517 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 518 return -1;
ebacaaa0
YH
519#ifdef CONFIG_IPV6_ROUTER_PREF
520 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
521#endif
4d0c5911 522 n = rt6_check_neigh(rt);
557e92ef 523 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
524 return -1;
525 return m;
526}
527
f11e6659
DM
528static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
529 int *mpri, struct rt6_info *match)
554cfb7e 530{
f11e6659
DM
531 int m;
532
533 if (rt6_check_expired(rt))
534 goto out;
535
536 m = rt6_score_route(rt, oif, strict);
537 if (m < 0)
538 goto out;
539
540 if (m > *mpri) {
541 if (strict & RT6_LOOKUP_F_REACHABLE)
542 rt6_probe(match);
543 *mpri = m;
544 match = rt;
545 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
546 rt6_probe(rt);
547 }
548
549out:
550 return match;
551}
552
553static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
554 struct rt6_info *rr_head,
555 u32 metric, int oif, int strict)
556{
557 struct rt6_info *rt, *match;
554cfb7e 558 int mpri = -1;
1da177e4 559
f11e6659
DM
560 match = NULL;
561 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 562 rt = rt->dst.rt6_next)
f11e6659
DM
563 match = find_match(rt, oif, strict, &mpri, match);
564 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 565 rt = rt->dst.rt6_next)
f11e6659 566 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 567
f11e6659
DM
568 return match;
569}
1da177e4 570
f11e6659
DM
571static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
572{
573 struct rt6_info *match, *rt0;
8ed67789 574 struct net *net;
1da177e4 575
f11e6659
DM
576 rt0 = fn->rr_ptr;
577 if (!rt0)
578 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 579
f11e6659 580 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 581
554cfb7e 582 if (!match &&
f11e6659 583 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 584 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 585
554cfb7e 586 /* no entries matched; do round-robin */
f11e6659
DM
587 if (!next || next->rt6i_metric != rt0->rt6i_metric)
588 next = fn->leaf;
589
590 if (next != rt0)
591 fn->rr_ptr = next;
1da177e4 592 }
1da177e4 593
d1918542 594 net = dev_net(rt0->dst.dev);
a02cec21 595 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
596}
597
70ceb4f5
YH
598#ifdef CONFIG_IPV6_ROUTE_INFO
599int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 600 const struct in6_addr *gwaddr)
70ceb4f5 601{
c346dca1 602 struct net *net = dev_net(dev);
70ceb4f5
YH
603 struct route_info *rinfo = (struct route_info *) opt;
604 struct in6_addr prefix_buf, *prefix;
605 unsigned int pref;
4bed72e4 606 unsigned long lifetime;
70ceb4f5
YH
607 struct rt6_info *rt;
608
609 if (len < sizeof(struct route_info)) {
610 return -EINVAL;
611 }
612
613 /* Sanity check for prefix_len and length */
614 if (rinfo->length > 3) {
615 return -EINVAL;
616 } else if (rinfo->prefix_len > 128) {
617 return -EINVAL;
618 } else if (rinfo->prefix_len > 64) {
619 if (rinfo->length < 2) {
620 return -EINVAL;
621 }
622 } else if (rinfo->prefix_len > 0) {
623 if (rinfo->length < 1) {
624 return -EINVAL;
625 }
626 }
627
628 pref = rinfo->route_pref;
629 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 630 return -EINVAL;
70ceb4f5 631
4bed72e4 632 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
633
634 if (rinfo->length == 3)
635 prefix = (struct in6_addr *)rinfo->prefix;
636 else {
637 /* this function is safe */
638 ipv6_addr_prefix(&prefix_buf,
639 (struct in6_addr *)rinfo->prefix,
640 rinfo->prefix_len);
641 prefix = &prefix_buf;
642 }
643
efa2cea0
DL
644 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
645 dev->ifindex);
70ceb4f5
YH
646
647 if (rt && !lifetime) {
e0a1ad73 648 ip6_del_rt(rt);
70ceb4f5
YH
649 rt = NULL;
650 }
651
652 if (!rt && lifetime)
efa2cea0 653 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
654 pref);
655 else if (rt)
656 rt->rt6i_flags = RTF_ROUTEINFO |
657 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
658
659 if (rt) {
1716a961
G
660 if (!addrconf_finite_timeout(lifetime))
661 rt6_clean_expires(rt);
662 else
663 rt6_set_expires(rt, jiffies + HZ * lifetime);
664
d8d1f30b 665 dst_release(&rt->dst);
70ceb4f5
YH
666 }
667 return 0;
668}
669#endif
670
8ed67789 671#define BACKTRACK(__net, saddr) \
982f56f3 672do { \
8ed67789 673 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 674 struct fib6_node *pn; \
e0eda7bb 675 while (1) { \
982f56f3
YH
676 if (fn->fn_flags & RTN_TL_ROOT) \
677 goto out; \
678 pn = fn->parent; \
679 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 680 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
681 else \
682 fn = pn; \
683 if (fn->fn_flags & RTN_RTINFO) \
684 goto restart; \
c71099ac 685 } \
c71099ac 686 } \
38308473 687} while (0)
c71099ac 688
8ed67789
DL
689static struct rt6_info *ip6_pol_route_lookup(struct net *net,
690 struct fib6_table *table,
4c9483b2 691 struct flowi6 *fl6, int flags)
1da177e4
LT
692{
693 struct fib6_node *fn;
694 struct rt6_info *rt;
695
c71099ac 696 read_lock_bh(&table->tb6_lock);
4c9483b2 697 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
698restart:
699 rt = fn->leaf;
4c9483b2
DM
700 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
701 BACKTRACK(net, &fl6->saddr);
c71099ac 702out:
d8d1f30b 703 dst_use(&rt->dst, jiffies);
c71099ac 704 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
705 return rt;
706
707}
708
ea6e574e
FW
709struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
710 int flags)
711{
712 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
713}
714EXPORT_SYMBOL_GPL(ip6_route_lookup);
715
9acd9f3a
YH
716struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
717 const struct in6_addr *saddr, int oif, int strict)
c71099ac 718{
4c9483b2
DM
719 struct flowi6 fl6 = {
720 .flowi6_oif = oif,
721 .daddr = *daddr,
c71099ac
TG
722 };
723 struct dst_entry *dst;
77d16f45 724 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 725
adaa70bb 726 if (saddr) {
4c9483b2 727 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
728 flags |= RT6_LOOKUP_F_HAS_SADDR;
729 }
730
4c9483b2 731 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
732 if (dst->error == 0)
733 return (struct rt6_info *) dst;
734
735 dst_release(dst);
736
1da177e4
LT
737 return NULL;
738}
739
7159039a
YH
740EXPORT_SYMBOL(rt6_lookup);
741
c71099ac 742/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
743 It takes new route entry, the addition fails by any reason the
744 route is freed. In any case, if caller does not hold it, it may
745 be destroyed.
746 */
747
86872cb5 748static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
749{
750 int err;
c71099ac 751 struct fib6_table *table;
1da177e4 752
c71099ac
TG
753 table = rt->rt6i_table;
754 write_lock_bh(&table->tb6_lock);
86872cb5 755 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 756 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
757
758 return err;
759}
760
40e22e8f
TG
761int ip6_ins_rt(struct rt6_info *rt)
762{
4d1169c1 763 struct nl_info info = {
d1918542 764 .nl_net = dev_net(rt->dst.dev),
4d1169c1 765 };
528c4ceb 766 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
767}
768
1716a961 769static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 770 const struct in6_addr *daddr,
b71d1d42 771 const struct in6_addr *saddr)
1da177e4 772{
1da177e4
LT
773 struct rt6_info *rt;
774
775 /*
776 * Clone the route.
777 */
778
21efcfa0 779 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
780
781 if (rt) {
14deae41
DM
782 int attempts = !in_softirq();
783
38308473 784 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 785 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 786 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 787 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 788 rt->rt6i_gateway = *daddr;
58c4fb86 789 }
1da177e4 790
1da177e4 791 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
792
793#ifdef CONFIG_IPV6_SUBTREES
794 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 795 rt->rt6i_src.addr = *saddr;
1da177e4
LT
796 rt->rt6i_src.plen = 128;
797 }
798#endif
799
14deae41 800 retry:
8ade06c6 801 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 802 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
803 int saved_rt_min_interval =
804 net->ipv6.sysctl.ip6_rt_gc_min_interval;
805 int saved_rt_elasticity =
806 net->ipv6.sysctl.ip6_rt_gc_elasticity;
807
808 if (attempts-- > 0) {
809 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
810 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
811
86393e52 812 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
813
814 net->ipv6.sysctl.ip6_rt_gc_elasticity =
815 saved_rt_elasticity;
816 net->ipv6.sysctl.ip6_rt_gc_min_interval =
817 saved_rt_min_interval;
818 goto retry;
819 }
820
f3213831 821 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 822 dst_free(&rt->dst);
14deae41
DM
823 return NULL;
824 }
95a9a5ba 825 }
1da177e4 826
95a9a5ba
YH
827 return rt;
828}
1da177e4 829
21efcfa0
ED
830static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
831 const struct in6_addr *daddr)
299d9939 832{
21efcfa0
ED
833 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
834
299d9939 835 if (rt) {
299d9939 836 rt->rt6i_flags |= RTF_CACHE;
97cac082 837 rt->n = neigh_clone(ort->n);
299d9939
YH
838 }
839 return rt;
840}
841
8ed67789 842static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 843 struct flowi6 *fl6, int flags)
1da177e4
LT
844{
845 struct fib6_node *fn;
519fbd87 846 struct rt6_info *rt, *nrt;
c71099ac 847 int strict = 0;
1da177e4 848 int attempts = 3;
519fbd87 849 int err;
53b7997f 850 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 851
77d16f45 852 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
853
854relookup:
c71099ac 855 read_lock_bh(&table->tb6_lock);
1da177e4 856
8238dd06 857restart_2:
4c9483b2 858 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
859
860restart:
4acad72d 861 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 862
4c9483b2 863 BACKTRACK(net, &fl6->saddr);
8ed67789 864 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 865 rt->rt6i_flags & RTF_CACHE)
1ddef044 866 goto out;
1da177e4 867
d8d1f30b 868 dst_hold(&rt->dst);
c71099ac 869 read_unlock_bh(&table->tb6_lock);
fb9de91e 870
97cac082 871 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 872 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 873 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 874 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
875 else
876 goto out2;
e40cf353 877
d8d1f30b 878 dst_release(&rt->dst);
8ed67789 879 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 880
d8d1f30b 881 dst_hold(&rt->dst);
519fbd87 882 if (nrt) {
40e22e8f 883 err = ip6_ins_rt(nrt);
519fbd87 884 if (!err)
1da177e4 885 goto out2;
1da177e4 886 }
1da177e4 887
519fbd87
YH
888 if (--attempts <= 0)
889 goto out2;
890
891 /*
c71099ac 892 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
893 * released someone could insert this route. Relookup.
894 */
d8d1f30b 895 dst_release(&rt->dst);
519fbd87
YH
896 goto relookup;
897
898out:
8238dd06
YH
899 if (reachable) {
900 reachable = 0;
901 goto restart_2;
902 }
d8d1f30b 903 dst_hold(&rt->dst);
c71099ac 904 read_unlock_bh(&table->tb6_lock);
1da177e4 905out2:
d8d1f30b
CG
906 rt->dst.lastuse = jiffies;
907 rt->dst.__use++;
c71099ac
TG
908
909 return rt;
1da177e4
LT
910}
911
8ed67789 912static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 913 struct flowi6 *fl6, int flags)
4acad72d 914{
4c9483b2 915 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
916}
917
72331bc0
SL
918static struct dst_entry *ip6_route_input_lookup(struct net *net,
919 struct net_device *dev,
920 struct flowi6 *fl6, int flags)
921{
922 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
923 flags |= RT6_LOOKUP_F_IFACE;
924
925 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
926}
927
c71099ac
TG
928void ip6_route_input(struct sk_buff *skb)
929{
b71d1d42 930 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 931 struct net *net = dev_net(skb->dev);
adaa70bb 932 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
933 struct flowi6 fl6 = {
934 .flowi6_iif = skb->dev->ifindex,
935 .daddr = iph->daddr,
936 .saddr = iph->saddr,
38308473 937 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
938 .flowi6_mark = skb->mark,
939 .flowi6_proto = iph->nexthdr,
c71099ac 940 };
adaa70bb 941
72331bc0 942 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
943}
944
8ed67789 945static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 946 struct flowi6 *fl6, int flags)
1da177e4 947{
4c9483b2 948 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
949}
950
9c7a4f9c 951struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 952 struct flowi6 *fl6)
c71099ac
TG
953{
954 int flags = 0;
955
4dc27d1c
DM
956 fl6->flowi6_iif = net->loopback_dev->ifindex;
957
4c9483b2 958 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 959 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 960
4c9483b2 961 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 962 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
963 else if (sk)
964 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 965
4c9483b2 966 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
967}
968
7159039a 969EXPORT_SYMBOL(ip6_route_output);
1da177e4 970
2774c131 971struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 972{
5c1e6aa3 973 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
974 struct dst_entry *new = NULL;
975
5c1e6aa3 976 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 977 if (rt) {
cf911662 978 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 979 rt6_init_peer(rt, net->ipv6.peers);
cf911662 980
d8d1f30b 981 new = &rt->dst;
14e50e57 982
14e50e57 983 new->__use = 1;
352e512c
HX
984 new->input = dst_discard;
985 new->output = dst_discard;
14e50e57 986
21efcfa0
ED
987 if (dst_metrics_read_only(&ort->dst))
988 new->_metrics = ort->dst._metrics;
989 else
990 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
991 rt->rt6i_idev = ort->rt6i_idev;
992 if (rt->rt6i_idev)
993 in6_dev_hold(rt->rt6i_idev);
14e50e57 994
4e3fd7a0 995 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
996 rt->rt6i_flags = ort->rt6i_flags;
997 rt6_clean_expires(rt);
14e50e57
DM
998 rt->rt6i_metric = 0;
999
1000 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1001#ifdef CONFIG_IPV6_SUBTREES
1002 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1003#endif
1004
1005 dst_free(new);
1006 }
1007
69ead7af
DM
1008 dst_release(dst_orig);
1009 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1010}
14e50e57 1011
1da177e4
LT
1012/*
1013 * Destination cache support functions
1014 */
1015
1016static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1017{
1018 struct rt6_info *rt;
1019
1020 rt = (struct rt6_info *) dst;
1021
6431cbc2
DM
1022 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1023 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1024 if (!rt6_has_peer(rt))
6431cbc2
DM
1025 rt6_bind_peer(rt, 0);
1026 rt->rt6i_peer_genid = rt6_peer_genid();
1027 }
1da177e4 1028 return dst;
6431cbc2 1029 }
1da177e4
LT
1030 return NULL;
1031}
1032
1033static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1034{
1035 struct rt6_info *rt = (struct rt6_info *) dst;
1036
1037 if (rt) {
54c1a859
YH
1038 if (rt->rt6i_flags & RTF_CACHE) {
1039 if (rt6_check_expired(rt)) {
1040 ip6_del_rt(rt);
1041 dst = NULL;
1042 }
1043 } else {
1da177e4 1044 dst_release(dst);
54c1a859
YH
1045 dst = NULL;
1046 }
1da177e4 1047 }
54c1a859 1048 return dst;
1da177e4
LT
1049}
1050
1051static void ip6_link_failure(struct sk_buff *skb)
1052{
1053 struct rt6_info *rt;
1054
3ffe533c 1055 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1056
adf30907 1057 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1058 if (rt) {
1716a961
G
1059 if (rt->rt6i_flags & RTF_CACHE)
1060 rt6_update_expires(rt, 0);
1061 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1062 rt->rt6i_node->fn_sernum = -1;
1063 }
1064}
1065
1066static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1067{
1068 struct rt6_info *rt6 = (struct rt6_info*)dst;
1069
81aded24 1070 dst_confirm(dst);
1da177e4 1071 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1072 struct net *net = dev_net(dst->dev);
1073
1da177e4
LT
1074 rt6->rt6i_flags |= RTF_MODIFIED;
1075 if (mtu < IPV6_MIN_MTU) {
defb3519 1076 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1077 mtu = IPV6_MIN_MTU;
defb3519
DM
1078 features |= RTAX_FEATURE_ALLFRAG;
1079 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1080 }
defb3519 1081 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1082 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1083 }
1084}
1085
42ae66c8
DM
1086void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1087 int oif, u32 mark)
81aded24
DM
1088{
1089 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1090 struct dst_entry *dst;
1091 struct flowi6 fl6;
1092
1093 memset(&fl6, 0, sizeof(fl6));
1094 fl6.flowi6_oif = oif;
1095 fl6.flowi6_mark = mark;
3e12939a 1096 fl6.flowi6_flags = 0;
81aded24
DM
1097 fl6.daddr = iph->daddr;
1098 fl6.saddr = iph->saddr;
1099 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1100
1101 dst = ip6_route_output(net, NULL, &fl6);
1102 if (!dst->error)
1103 ip6_rt_update_pmtu(dst, ntohl(mtu));
1104 dst_release(dst);
1105}
1106EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1107
1108void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1109{
1110 ip6_update_pmtu(skb, sock_net(sk), mtu,
1111 sk->sk_bound_dev_if, sk->sk_mark);
1112}
1113EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1114
0dbaee3b 1115static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1116{
0dbaee3b
DM
1117 struct net_device *dev = dst->dev;
1118 unsigned int mtu = dst_mtu(dst);
1119 struct net *net = dev_net(dev);
1120
1da177e4
LT
1121 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1122
5578689a
DL
1123 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1124 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1125
1126 /*
1ab1457c
YH
1127 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1128 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1129 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1130 * rely only on pmtu discovery"
1131 */
1132 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1133 mtu = IPV6_MAXPLEN;
1134 return mtu;
1135}
1136
ebb762f2 1137static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1138{
d33e4553 1139 struct inet6_dev *idev;
618f9bc7
SK
1140 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1141
1142 if (mtu)
1143 return mtu;
1144
1145 mtu = IPV6_MIN_MTU;
d33e4553
DM
1146
1147 rcu_read_lock();
1148 idev = __in6_dev_get(dst->dev);
1149 if (idev)
1150 mtu = idev->cnf.mtu6;
1151 rcu_read_unlock();
1152
1153 return mtu;
1154}
1155
3b00944c
YH
1156static struct dst_entry *icmp6_dst_gc_list;
1157static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1158
3b00944c 1159struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1160 struct neighbour *neigh,
87a11578 1161 struct flowi6 *fl6)
1da177e4 1162{
87a11578 1163 struct dst_entry *dst;
1da177e4
LT
1164 struct rt6_info *rt;
1165 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1166 struct net *net = dev_net(dev);
1da177e4 1167
38308473 1168 if (unlikely(!idev))
122bdf67 1169 return ERR_PTR(-ENODEV);
1da177e4 1170
8b96d22d 1171 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1172 if (unlikely(!rt)) {
1da177e4 1173 in6_dev_put(idev);
87a11578 1174 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1175 goto out;
1176 }
1177
1da177e4
LT
1178 if (neigh)
1179 neigh_hold(neigh);
14deae41 1180 else {
f894cbf8 1181 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1182 if (IS_ERR(neigh)) {
252c3d84 1183 in6_dev_put(idev);
b43faac6
DM
1184 dst_free(&rt->dst);
1185 return ERR_CAST(neigh);
1186 }
14deae41 1187 }
1da177e4 1188
8e2ec639
YZ
1189 rt->dst.flags |= DST_HOST;
1190 rt->dst.output = ip6_output;
97cac082 1191 rt->n = neigh;
d8d1f30b 1192 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1193 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1194 rt->rt6i_dst.plen = 128;
1195 rt->rt6i_idev = idev;
7011687f 1196 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1197
3b00944c 1198 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1199 rt->dst.next = icmp6_dst_gc_list;
1200 icmp6_dst_gc_list = &rt->dst;
3b00944c 1201 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1202
5578689a 1203 fib6_force_start_gc(net);
1da177e4 1204
87a11578
DM
1205 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1206
1da177e4 1207out:
87a11578 1208 return dst;
1da177e4
LT
1209}
1210
3d0f24a7 1211int icmp6_dst_gc(void)
1da177e4 1212{
e9476e95 1213 struct dst_entry *dst, **pprev;
3d0f24a7 1214 int more = 0;
1da177e4 1215
3b00944c
YH
1216 spin_lock_bh(&icmp6_dst_lock);
1217 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1218
1da177e4
LT
1219 while ((dst = *pprev) != NULL) {
1220 if (!atomic_read(&dst->__refcnt)) {
1221 *pprev = dst->next;
1222 dst_free(dst);
1da177e4
LT
1223 } else {
1224 pprev = &dst->next;
3d0f24a7 1225 ++more;
1da177e4
LT
1226 }
1227 }
1228
3b00944c 1229 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1230
3d0f24a7 1231 return more;
1da177e4
LT
1232}
1233
1e493d19
DM
1234static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1235 void *arg)
1236{
1237 struct dst_entry *dst, **pprev;
1238
1239 spin_lock_bh(&icmp6_dst_lock);
1240 pprev = &icmp6_dst_gc_list;
1241 while ((dst = *pprev) != NULL) {
1242 struct rt6_info *rt = (struct rt6_info *) dst;
1243 if (func(rt, arg)) {
1244 *pprev = dst->next;
1245 dst_free(dst);
1246 } else {
1247 pprev = &dst->next;
1248 }
1249 }
1250 spin_unlock_bh(&icmp6_dst_lock);
1251}
1252
569d3645 1253static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1254{
1da177e4 1255 unsigned long now = jiffies;
86393e52 1256 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1257 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1258 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1259 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1260 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1261 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1262 int entries;
7019b78e 1263
fc66f95c 1264 entries = dst_entries_get_fast(ops);
7019b78e 1265 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1266 entries <= rt_max_size)
1da177e4
LT
1267 goto out;
1268
6891a346
BT
1269 net->ipv6.ip6_rt_gc_expire++;
1270 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1271 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1272 entries = dst_entries_get_slow(ops);
1273 if (entries < ops->gc_thresh)
7019b78e 1274 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1275out:
7019b78e 1276 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1277 return entries > rt_max_size;
1da177e4
LT
1278}
1279
1280/* Clean host part of a prefix. Not necessary in radix tree,
1281 but results in cleaner routing tables.
1282
1283 Remove it only when all the things will work!
1284 */
1285
6b75d090 1286int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1287{
5170ae82 1288 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1289 if (hoplimit == 0) {
6b75d090 1290 struct net_device *dev = dst->dev;
c68f24cc
ED
1291 struct inet6_dev *idev;
1292
1293 rcu_read_lock();
1294 idev = __in6_dev_get(dev);
1295 if (idev)
6b75d090 1296 hoplimit = idev->cnf.hop_limit;
c68f24cc 1297 else
53b7997f 1298 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1299 rcu_read_unlock();
1da177e4
LT
1300 }
1301 return hoplimit;
1302}
abbf46ae 1303EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1304
1305/*
1306 *
1307 */
1308
86872cb5 1309int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1310{
1311 int err;
5578689a 1312 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1313 struct rt6_info *rt = NULL;
1314 struct net_device *dev = NULL;
1315 struct inet6_dev *idev = NULL;
c71099ac 1316 struct fib6_table *table;
1da177e4
LT
1317 int addr_type;
1318
86872cb5 1319 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1320 return -EINVAL;
1321#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1322 if (cfg->fc_src_len)
1da177e4
LT
1323 return -EINVAL;
1324#endif
86872cb5 1325 if (cfg->fc_ifindex) {
1da177e4 1326 err = -ENODEV;
5578689a 1327 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1328 if (!dev)
1329 goto out;
1330 idev = in6_dev_get(dev);
1331 if (!idev)
1332 goto out;
1333 }
1334
86872cb5
TG
1335 if (cfg->fc_metric == 0)
1336 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1337
d71314b4 1338 err = -ENOBUFS;
38308473
DM
1339 if (cfg->fc_nlinfo.nlh &&
1340 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1341 table = fib6_get_table(net, cfg->fc_table);
38308473 1342 if (!table) {
f3213831 1343 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1344 table = fib6_new_table(net, cfg->fc_table);
1345 }
1346 } else {
1347 table = fib6_new_table(net, cfg->fc_table);
1348 }
38308473
DM
1349
1350 if (!table)
c71099ac 1351 goto out;
c71099ac 1352
8b96d22d 1353 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1354
38308473 1355 if (!rt) {
1da177e4
LT
1356 err = -ENOMEM;
1357 goto out;
1358 }
1359
d8d1f30b 1360 rt->dst.obsolete = -1;
1716a961
G
1361
1362 if (cfg->fc_flags & RTF_EXPIRES)
1363 rt6_set_expires(rt, jiffies +
1364 clock_t_to_jiffies(cfg->fc_expires));
1365 else
1366 rt6_clean_expires(rt);
1da177e4 1367
86872cb5
TG
1368 if (cfg->fc_protocol == RTPROT_UNSPEC)
1369 cfg->fc_protocol = RTPROT_BOOT;
1370 rt->rt6i_protocol = cfg->fc_protocol;
1371
1372 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1373
1374 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1375 rt->dst.input = ip6_mc_input;
ab79ad14
1376 else if (cfg->fc_flags & RTF_LOCAL)
1377 rt->dst.input = ip6_input;
1da177e4 1378 else
d8d1f30b 1379 rt->dst.input = ip6_forward;
1da177e4 1380
d8d1f30b 1381 rt->dst.output = ip6_output;
1da177e4 1382
86872cb5
TG
1383 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1384 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1385 if (rt->rt6i_dst.plen == 128)
11d53b49 1386 rt->dst.flags |= DST_HOST;
1da177e4 1387
8e2ec639
YZ
1388 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1389 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1390 if (!metrics) {
1391 err = -ENOMEM;
1392 goto out;
1393 }
1394 dst_init_metrics(&rt->dst, metrics, 0);
1395 }
1da177e4 1396#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1397 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1398 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1399#endif
1400
86872cb5 1401 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1402
1403 /* We cannot add true routes via loopback here,
1404 they would result in kernel looping; promote them to reject routes
1405 */
86872cb5 1406 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1407 (dev && (dev->flags & IFF_LOOPBACK) &&
1408 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1409 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1410 /* hold loopback dev/idev if we haven't done so. */
5578689a 1411 if (dev != net->loopback_dev) {
1da177e4
LT
1412 if (dev) {
1413 dev_put(dev);
1414 in6_dev_put(idev);
1415 }
5578689a 1416 dev = net->loopback_dev;
1da177e4
LT
1417 dev_hold(dev);
1418 idev = in6_dev_get(dev);
1419 if (!idev) {
1420 err = -ENODEV;
1421 goto out;
1422 }
1423 }
d8d1f30b
CG
1424 rt->dst.output = ip6_pkt_discard_out;
1425 rt->dst.input = ip6_pkt_discard;
1426 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1427 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1428 goto install_route;
1429 }
1430
86872cb5 1431 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1432 const struct in6_addr *gw_addr;
1da177e4
LT
1433 int gwa_type;
1434
86872cb5 1435 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1436 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1437 gwa_type = ipv6_addr_type(gw_addr);
1438
1439 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1440 struct rt6_info *grt;
1441
1442 /* IPv6 strictly inhibits using not link-local
1443 addresses as nexthop address.
1444 Otherwise, router will not able to send redirects.
1445 It is very good, but in some (rare!) circumstances
1446 (SIT, PtP, NBMA NOARP links) it is handy to allow
1447 some exceptions. --ANK
1448 */
1449 err = -EINVAL;
38308473 1450 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1451 goto out;
1452
5578689a 1453 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1454
1455 err = -EHOSTUNREACH;
38308473 1456 if (!grt)
1da177e4
LT
1457 goto out;
1458 if (dev) {
d1918542 1459 if (dev != grt->dst.dev) {
d8d1f30b 1460 dst_release(&grt->dst);
1da177e4
LT
1461 goto out;
1462 }
1463 } else {
d1918542 1464 dev = grt->dst.dev;
1da177e4
LT
1465 idev = grt->rt6i_idev;
1466 dev_hold(dev);
1467 in6_dev_hold(grt->rt6i_idev);
1468 }
38308473 1469 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1470 err = 0;
d8d1f30b 1471 dst_release(&grt->dst);
1da177e4
LT
1472
1473 if (err)
1474 goto out;
1475 }
1476 err = -EINVAL;
38308473 1477 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1478 goto out;
1479 }
1480
1481 err = -ENODEV;
38308473 1482 if (!dev)
1da177e4
LT
1483 goto out;
1484
c3968a85
DW
1485 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1486 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1487 err = -EINVAL;
1488 goto out;
1489 }
4e3fd7a0 1490 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1491 rt->rt6i_prefsrc.plen = 128;
1492 } else
1493 rt->rt6i_prefsrc.plen = 0;
1494
86872cb5 1495 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1496 err = rt6_bind_neighbour(rt, dev);
f83c7790 1497 if (err)
1da177e4 1498 goto out;
1da177e4
LT
1499 }
1500
86872cb5 1501 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1502
1503install_route:
86872cb5
TG
1504 if (cfg->fc_mx) {
1505 struct nlattr *nla;
1506 int remaining;
1507
1508 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1509 int type = nla_type(nla);
86872cb5
TG
1510
1511 if (type) {
1512 if (type > RTAX_MAX) {
1da177e4
LT
1513 err = -EINVAL;
1514 goto out;
1515 }
86872cb5 1516
defb3519 1517 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1518 }
1da177e4
LT
1519 }
1520 }
1521
d8d1f30b 1522 rt->dst.dev = dev;
1da177e4 1523 rt->rt6i_idev = idev;
c71099ac 1524 rt->rt6i_table = table;
63152fc0 1525
c346dca1 1526 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1527
86872cb5 1528 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1529
1530out:
1531 if (dev)
1532 dev_put(dev);
1533 if (idev)
1534 in6_dev_put(idev);
1535 if (rt)
d8d1f30b 1536 dst_free(&rt->dst);
1da177e4
LT
1537 return err;
1538}
1539
86872cb5 1540static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1541{
1542 int err;
c71099ac 1543 struct fib6_table *table;
d1918542 1544 struct net *net = dev_net(rt->dst.dev);
1da177e4 1545
8ed67789 1546 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1547 return -ENOENT;
1548
c71099ac
TG
1549 table = rt->rt6i_table;
1550 write_lock_bh(&table->tb6_lock);
1da177e4 1551
86872cb5 1552 err = fib6_del(rt, info);
d8d1f30b 1553 dst_release(&rt->dst);
1da177e4 1554
c71099ac 1555 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1556
1557 return err;
1558}
1559
e0a1ad73
TG
1560int ip6_del_rt(struct rt6_info *rt)
1561{
4d1169c1 1562 struct nl_info info = {
d1918542 1563 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1564 };
528c4ceb 1565 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1566}
1567
86872cb5 1568static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1569{
c71099ac 1570 struct fib6_table *table;
1da177e4
LT
1571 struct fib6_node *fn;
1572 struct rt6_info *rt;
1573 int err = -ESRCH;
1574
5578689a 1575 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1576 if (!table)
c71099ac
TG
1577 return err;
1578
1579 read_lock_bh(&table->tb6_lock);
1da177e4 1580
c71099ac 1581 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1582 &cfg->fc_dst, cfg->fc_dst_len,
1583 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1584
1da177e4 1585 if (fn) {
d8d1f30b 1586 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1587 if (cfg->fc_ifindex &&
d1918542
DM
1588 (!rt->dst.dev ||
1589 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1590 continue;
86872cb5
TG
1591 if (cfg->fc_flags & RTF_GATEWAY &&
1592 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1593 continue;
86872cb5 1594 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1595 continue;
d8d1f30b 1596 dst_hold(&rt->dst);
c71099ac 1597 read_unlock_bh(&table->tb6_lock);
1da177e4 1598
86872cb5 1599 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1600 }
1601 }
c71099ac 1602 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1603
1604 return err;
1605}
1606
1607/*
1608 * Handle redirects
1609 */
a6279458 1610struct ip6rd_flowi {
4c9483b2 1611 struct flowi6 fl6;
a6279458
YH
1612 struct in6_addr gateway;
1613};
1614
8ed67789
DL
1615static struct rt6_info *__ip6_route_redirect(struct net *net,
1616 struct fib6_table *table,
4c9483b2 1617 struct flowi6 *fl6,
a6279458 1618 int flags)
1da177e4 1619{
4c9483b2 1620 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1621 struct rt6_info *rt;
e843b9e1 1622 struct fib6_node *fn;
c71099ac 1623
1da177e4 1624 /*
e843b9e1
YH
1625 * Get the "current" route for this destination and
1626 * check if the redirect has come from approriate router.
1627 *
1628 * RFC 2461 specifies that redirects should only be
1629 * accepted if they come from the nexthop to the target.
1630 * Due to the way the routes are chosen, this notion
1631 * is a bit fuzzy and one might need to check all possible
1632 * routes.
1da177e4 1633 */
1da177e4 1634
c71099ac 1635 read_lock_bh(&table->tb6_lock);
4c9483b2 1636 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1637restart:
d8d1f30b 1638 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1639 /*
1640 * Current route is on-link; redirect is always invalid.
1641 *
1642 * Seems, previous statement is not true. It could
1643 * be node, which looks for us as on-link (f.e. proxy ndisc)
1644 * But then router serving it might decide, that we should
1645 * know truth 8)8) --ANK (980726).
1646 */
1647 if (rt6_check_expired(rt))
1648 continue;
1649 if (!(rt->rt6i_flags & RTF_GATEWAY))
1650 continue;
d1918542 1651 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1652 continue;
a6279458 1653 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1654 continue;
1655 break;
1656 }
a6279458 1657
cb15d9c2 1658 if (!rt)
8ed67789 1659 rt = net->ipv6.ip6_null_entry;
4c9483b2 1660 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1661out:
d8d1f30b 1662 dst_hold(&rt->dst);
a6279458 1663
c71099ac 1664 read_unlock_bh(&table->tb6_lock);
e843b9e1 1665
a6279458
YH
1666 return rt;
1667};
1668
b71d1d42
ED
1669static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1670 const struct in6_addr *src,
1671 const struct in6_addr *gateway,
a6279458
YH
1672 struct net_device *dev)
1673{
adaa70bb 1674 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1675 struct net *net = dev_net(dev);
a6279458 1676 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1677 .fl6 = {
1678 .flowi6_oif = dev->ifindex,
1679 .daddr = *dest,
1680 .saddr = *src,
a6279458 1681 },
a6279458 1682 };
adaa70bb 1683
4e3fd7a0 1684 rdfl.gateway = *gateway;
86c36ce4 1685
adaa70bb
TG
1686 if (rt6_need_strict(dest))
1687 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1688
4c9483b2 1689 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1690 flags, __ip6_route_redirect);
a6279458
YH
1691}
1692
b71d1d42
ED
1693void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1694 const struct in6_addr *saddr,
a6279458
YH
1695 struct neighbour *neigh, u8 *lladdr, int on_link)
1696{
1697 struct rt6_info *rt, *nrt = NULL;
1698 struct netevent_redirect netevent;
c346dca1 1699 struct net *net = dev_net(neigh->dev);
1d248b1c 1700 struct neighbour *old_neigh;
a6279458
YH
1701
1702 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1703
8ed67789 1704 if (rt == net->ipv6.ip6_null_entry) {
e87cc472 1705 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
a6279458 1706 goto out;
1da177e4
LT
1707 }
1708
1da177e4
LT
1709 /*
1710 * We have finally decided to accept it.
1711 */
1712
1ab1457c 1713 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1714 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1715 NEIGH_UPDATE_F_OVERRIDE|
1716 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1717 NEIGH_UPDATE_F_ISROUTER))
1718 );
1719
1720 /*
1721 * Redirect received -> path was valid.
1722 * Look, redirects are sent only in response to data packets,
1723 * so that this nexthop apparently is reachable. --ANK
1724 */
d8d1f30b 1725 dst_confirm(&rt->dst);
1da177e4
LT
1726
1727 /* Duplicate redirect: silently ignore. */
97cac082 1728 old_neigh = rt->n;
1d248b1c 1729 if (neigh == old_neigh)
1da177e4
LT
1730 goto out;
1731
21efcfa0 1732 nrt = ip6_rt_copy(rt, dest);
38308473 1733 if (!nrt)
1da177e4
LT
1734 goto out;
1735
1736 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1737 if (on_link)
1738 nrt->rt6i_flags &= ~RTF_GATEWAY;
1739
4e3fd7a0 1740 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1741 nrt->n = neigh_clone(neigh);
1da177e4 1742
40e22e8f 1743 if (ip6_ins_rt(nrt))
1da177e4
LT
1744 goto out;
1745
d8d1f30b 1746 netevent.old = &rt->dst;
1d248b1c 1747 netevent.old_neigh = old_neigh;
d8d1f30b 1748 netevent.new = &nrt->dst;
1d248b1c
DM
1749 netevent.new_neigh = neigh;
1750 netevent.daddr = dest;
8d71740c
TT
1751 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1752
38308473 1753 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1754 ip6_del_rt(rt);
1da177e4
LT
1755 return;
1756 }
1757
1758out:
d8d1f30b 1759 dst_release(&rt->dst);
1da177e4
LT
1760}
1761
1da177e4
LT
1762/*
1763 * Misc support functions
1764 */
1765
1716a961 1766static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1767 const struct in6_addr *dest)
1da177e4 1768{
d1918542 1769 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1770 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1771 ort->rt6i_table);
1da177e4
LT
1772
1773 if (rt) {
d8d1f30b
CG
1774 rt->dst.input = ort->dst.input;
1775 rt->dst.output = ort->dst.output;
8e2ec639 1776 rt->dst.flags |= DST_HOST;
d8d1f30b 1777
4e3fd7a0 1778 rt->rt6i_dst.addr = *dest;
8e2ec639 1779 rt->rt6i_dst.plen = 128;
defb3519 1780 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1781 rt->dst.error = ort->dst.error;
1da177e4
LT
1782 rt->rt6i_idev = ort->rt6i_idev;
1783 if (rt->rt6i_idev)
1784 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1785 rt->dst.lastuse = jiffies;
1da177e4 1786
4e3fd7a0 1787 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1788 rt->rt6i_flags = ort->rt6i_flags;
1789 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1790 (RTF_DEFAULT | RTF_ADDRCONF))
1791 rt6_set_from(rt, ort);
1792 else
1793 rt6_clean_expires(rt);
1da177e4
LT
1794 rt->rt6i_metric = 0;
1795
1da177e4
LT
1796#ifdef CONFIG_IPV6_SUBTREES
1797 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1798#endif
0f6c6392 1799 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1800 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1801 }
1802 return rt;
1803}
1804
70ceb4f5 1805#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1806static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1807 const struct in6_addr *prefix, int prefixlen,
1808 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1809{
1810 struct fib6_node *fn;
1811 struct rt6_info *rt = NULL;
c71099ac
TG
1812 struct fib6_table *table;
1813
efa2cea0 1814 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1815 if (!table)
c71099ac 1816 return NULL;
70ceb4f5 1817
c71099ac
TG
1818 write_lock_bh(&table->tb6_lock);
1819 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1820 if (!fn)
1821 goto out;
1822
d8d1f30b 1823 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1824 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1825 continue;
1826 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1827 continue;
1828 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1829 continue;
d8d1f30b 1830 dst_hold(&rt->dst);
70ceb4f5
YH
1831 break;
1832 }
1833out:
c71099ac 1834 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1835 return rt;
1836}
1837
efa2cea0 1838static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex,
95c96174 1841 unsigned int pref)
70ceb4f5 1842{
86872cb5
TG
1843 struct fib6_config cfg = {
1844 .fc_table = RT6_TABLE_INFO,
238fc7ea 1845 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1846 .fc_ifindex = ifindex,
1847 .fc_dst_len = prefixlen,
1848 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1849 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1850 .fc_nlinfo.pid = 0,
1851 .fc_nlinfo.nlh = NULL,
1852 .fc_nlinfo.nl_net = net,
86872cb5
TG
1853 };
1854
4e3fd7a0
AD
1855 cfg.fc_dst = *prefix;
1856 cfg.fc_gateway = *gwaddr;
70ceb4f5 1857
e317da96
YH
1858 /* We should treat it as a default route if prefix length is 0. */
1859 if (!prefixlen)
86872cb5 1860 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1861
86872cb5 1862 ip6_route_add(&cfg);
70ceb4f5 1863
efa2cea0 1864 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1865}
1866#endif
1867
b71d1d42 1868struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1869{
1da177e4 1870 struct rt6_info *rt;
c71099ac 1871 struct fib6_table *table;
1da177e4 1872
c346dca1 1873 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1874 if (!table)
c71099ac 1875 return NULL;
1da177e4 1876
c71099ac 1877 write_lock_bh(&table->tb6_lock);
d8d1f30b 1878 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1879 if (dev == rt->dst.dev &&
045927ff 1880 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1881 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1882 break;
1883 }
1884 if (rt)
d8d1f30b 1885 dst_hold(&rt->dst);
c71099ac 1886 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1887 return rt;
1888}
1889
b71d1d42 1890struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1891 struct net_device *dev,
1892 unsigned int pref)
1da177e4 1893{
86872cb5
TG
1894 struct fib6_config cfg = {
1895 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1896 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1897 .fc_ifindex = dev->ifindex,
1898 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1899 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1900 .fc_nlinfo.pid = 0,
1901 .fc_nlinfo.nlh = NULL,
c346dca1 1902 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1903 };
1da177e4 1904
4e3fd7a0 1905 cfg.fc_gateway = *gwaddr;
1da177e4 1906
86872cb5 1907 ip6_route_add(&cfg);
1da177e4 1908
1da177e4
LT
1909 return rt6_get_dflt_router(gwaddr, dev);
1910}
1911
7b4da532 1912void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1913{
1914 struct rt6_info *rt;
c71099ac
TG
1915 struct fib6_table *table;
1916
1917 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1918 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1919 if (!table)
c71099ac 1920 return;
1da177e4
LT
1921
1922restart:
c71099ac 1923 read_lock_bh(&table->tb6_lock);
d8d1f30b 1924 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1925 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1926 dst_hold(&rt->dst);
c71099ac 1927 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1928 ip6_del_rt(rt);
1da177e4
LT
1929 goto restart;
1930 }
1931 }
c71099ac 1932 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1933}
1934
5578689a
DL
1935static void rtmsg_to_fib6_config(struct net *net,
1936 struct in6_rtmsg *rtmsg,
86872cb5
TG
1937 struct fib6_config *cfg)
1938{
1939 memset(cfg, 0, sizeof(*cfg));
1940
1941 cfg->fc_table = RT6_TABLE_MAIN;
1942 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1943 cfg->fc_metric = rtmsg->rtmsg_metric;
1944 cfg->fc_expires = rtmsg->rtmsg_info;
1945 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1946 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1947 cfg->fc_flags = rtmsg->rtmsg_flags;
1948
5578689a 1949 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1950
4e3fd7a0
AD
1951 cfg->fc_dst = rtmsg->rtmsg_dst;
1952 cfg->fc_src = rtmsg->rtmsg_src;
1953 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1954}
1955
5578689a 1956int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1957{
86872cb5 1958 struct fib6_config cfg;
1da177e4
LT
1959 struct in6_rtmsg rtmsg;
1960 int err;
1961
1962 switch(cmd) {
1963 case SIOCADDRT: /* Add a route */
1964 case SIOCDELRT: /* Delete a route */
1965 if (!capable(CAP_NET_ADMIN))
1966 return -EPERM;
1967 err = copy_from_user(&rtmsg, arg,
1968 sizeof(struct in6_rtmsg));
1969 if (err)
1970 return -EFAULT;
86872cb5 1971
5578689a 1972 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1973
1da177e4
LT
1974 rtnl_lock();
1975 switch (cmd) {
1976 case SIOCADDRT:
86872cb5 1977 err = ip6_route_add(&cfg);
1da177e4
LT
1978 break;
1979 case SIOCDELRT:
86872cb5 1980 err = ip6_route_del(&cfg);
1da177e4
LT
1981 break;
1982 default:
1983 err = -EINVAL;
1984 }
1985 rtnl_unlock();
1986
1987 return err;
3ff50b79 1988 }
1da177e4
LT
1989
1990 return -EINVAL;
1991}
1992
1993/*
1994 * Drop the packet on the floor
1995 */
1996
d5fdd6ba 1997static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1998{
612f09e8 1999 int type;
adf30907 2000 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2001 switch (ipstats_mib_noroutes) {
2002 case IPSTATS_MIB_INNOROUTES:
0660e03f 2003 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2004 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2005 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2006 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2007 break;
2008 }
2009 /* FALLTHROUGH */
2010 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2011 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2012 ipstats_mib_noroutes);
612f09e8
YH
2013 break;
2014 }
3ffe533c 2015 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2016 kfree_skb(skb);
2017 return 0;
2018}
2019
9ce8ade0
TG
2020static int ip6_pkt_discard(struct sk_buff *skb)
2021{
612f09e8 2022 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2023}
2024
20380731 2025static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2026{
adf30907 2027 skb->dev = skb_dst(skb)->dev;
612f09e8 2028 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2029}
2030
6723ab54
DM
2031#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2032
9ce8ade0
TG
2033static int ip6_pkt_prohibit(struct sk_buff *skb)
2034{
612f09e8 2035 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2036}
2037
2038static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2039{
adf30907 2040 skb->dev = skb_dst(skb)->dev;
612f09e8 2041 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2042}
2043
6723ab54
DM
2044#endif
2045
1da177e4
LT
2046/*
2047 * Allocate a dst for local (unicast / anycast) address.
2048 */
2049
2050struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2051 const struct in6_addr *addr,
8f031519 2052 bool anycast)
1da177e4 2053{
c346dca1 2054 struct net *net = dev_net(idev->dev);
8b96d22d 2055 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2056 int err;
1da177e4 2057
38308473 2058 if (!rt) {
f3213831 2059 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2060 return ERR_PTR(-ENOMEM);
40385653 2061 }
1da177e4 2062
1da177e4
LT
2063 in6_dev_hold(idev);
2064
11d53b49 2065 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2066 rt->dst.input = ip6_input;
2067 rt->dst.output = ip6_output;
1da177e4 2068 rt->rt6i_idev = idev;
d8d1f30b 2069 rt->dst.obsolete = -1;
1da177e4
LT
2070
2071 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2072 if (anycast)
2073 rt->rt6i_flags |= RTF_ANYCAST;
2074 else
1da177e4 2075 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2076 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2077 if (err) {
d8d1f30b 2078 dst_free(&rt->dst);
f83c7790 2079 return ERR_PTR(err);
1da177e4
LT
2080 }
2081
4e3fd7a0 2082 rt->rt6i_dst.addr = *addr;
1da177e4 2083 rt->rt6i_dst.plen = 128;
5578689a 2084 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2085
d8d1f30b 2086 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2087
2088 return rt;
2089}
2090
c3968a85
DW
2091int ip6_route_get_saddr(struct net *net,
2092 struct rt6_info *rt,
b71d1d42 2093 const struct in6_addr *daddr,
c3968a85
DW
2094 unsigned int prefs,
2095 struct in6_addr *saddr)
2096{
2097 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2098 int err = 0;
2099 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2100 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2101 else
2102 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2103 daddr, prefs, saddr);
2104 return err;
2105}
2106
2107/* remove deleted ip from prefsrc entries */
2108struct arg_dev_net_ip {
2109 struct net_device *dev;
2110 struct net *net;
2111 struct in6_addr *addr;
2112};
2113
2114static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2115{
2116 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2117 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2118 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2119
d1918542 2120 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2121 rt != net->ipv6.ip6_null_entry &&
2122 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2123 /* remove prefsrc entry */
2124 rt->rt6i_prefsrc.plen = 0;
2125 }
2126 return 0;
2127}
2128
2129void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2130{
2131 struct net *net = dev_net(ifp->idev->dev);
2132 struct arg_dev_net_ip adni = {
2133 .dev = ifp->idev->dev,
2134 .net = net,
2135 .addr = &ifp->addr,
2136 };
2137 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2138}
2139
8ed67789
DL
2140struct arg_dev_net {
2141 struct net_device *dev;
2142 struct net *net;
2143};
2144
1da177e4
LT
2145static int fib6_ifdown(struct rt6_info *rt, void *arg)
2146{
bc3ef660 2147 const struct arg_dev_net *adn = arg;
2148 const struct net_device *dev = adn->dev;
8ed67789 2149
d1918542 2150 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2151 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2152 return -1;
c159d30c 2153
1da177e4
LT
2154 return 0;
2155}
2156
f3db4851 2157void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2158{
8ed67789
DL
2159 struct arg_dev_net adn = {
2160 .dev = dev,
2161 .net = net,
2162 };
2163
2164 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2165 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2166}
2167
95c96174 2168struct rt6_mtu_change_arg {
1da177e4 2169 struct net_device *dev;
95c96174 2170 unsigned int mtu;
1da177e4
LT
2171};
2172
2173static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2174{
2175 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2176 struct inet6_dev *idev;
2177
2178 /* In IPv6 pmtu discovery is not optional,
2179 so that RTAX_MTU lock cannot disable it.
2180 We still use this lock to block changes
2181 caused by addrconf/ndisc.
2182 */
2183
2184 idev = __in6_dev_get(arg->dev);
38308473 2185 if (!idev)
1da177e4
LT
2186 return 0;
2187
2188 /* For administrative MTU increase, there is no way to discover
2189 IPv6 PMTU increase, so PMTU increase should be updated here.
2190 Since RFC 1981 doesn't include administrative MTU increase
2191 update PMTU increase is a MUST. (i.e. jumbo frame)
2192 */
2193 /*
2194 If new MTU is less than route PMTU, this new MTU will be the
2195 lowest MTU in the path, update the route PMTU to reflect PMTU
2196 decreases; if new MTU is greater than route PMTU, and the
2197 old MTU is the lowest MTU in the path, update the route PMTU
2198 to reflect the increase. In this case if the other nodes' MTU
2199 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2200 PMTU discouvery.
2201 */
d1918542 2202 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2203 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2204 (dst_mtu(&rt->dst) >= arg->mtu ||
2205 (dst_mtu(&rt->dst) < arg->mtu &&
2206 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2207 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2208 }
1da177e4
LT
2209 return 0;
2210}
2211
95c96174 2212void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2213{
c71099ac
TG
2214 struct rt6_mtu_change_arg arg = {
2215 .dev = dev,
2216 .mtu = mtu,
2217 };
1da177e4 2218
c346dca1 2219 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2220}
2221
ef7c79ed 2222static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2223 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2224 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2225 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2226 [RTA_PRIORITY] = { .type = NLA_U32 },
2227 [RTA_METRICS] = { .type = NLA_NESTED },
2228};
2229
2230static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2231 struct fib6_config *cfg)
1da177e4 2232{
86872cb5
TG
2233 struct rtmsg *rtm;
2234 struct nlattr *tb[RTA_MAX+1];
2235 int err;
1da177e4 2236
86872cb5
TG
2237 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2238 if (err < 0)
2239 goto errout;
1da177e4 2240
86872cb5
TG
2241 err = -EINVAL;
2242 rtm = nlmsg_data(nlh);
2243 memset(cfg, 0, sizeof(*cfg));
2244
2245 cfg->fc_table = rtm->rtm_table;
2246 cfg->fc_dst_len = rtm->rtm_dst_len;
2247 cfg->fc_src_len = rtm->rtm_src_len;
2248 cfg->fc_flags = RTF_UP;
2249 cfg->fc_protocol = rtm->rtm_protocol;
2250
2251 if (rtm->rtm_type == RTN_UNREACHABLE)
2252 cfg->fc_flags |= RTF_REJECT;
2253
ab79ad14
2254 if (rtm->rtm_type == RTN_LOCAL)
2255 cfg->fc_flags |= RTF_LOCAL;
2256
86872cb5
TG
2257 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2258 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2259 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2260
2261 if (tb[RTA_GATEWAY]) {
2262 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2263 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2264 }
86872cb5
TG
2265
2266 if (tb[RTA_DST]) {
2267 int plen = (rtm->rtm_dst_len + 7) >> 3;
2268
2269 if (nla_len(tb[RTA_DST]) < plen)
2270 goto errout;
2271
2272 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2273 }
86872cb5
TG
2274
2275 if (tb[RTA_SRC]) {
2276 int plen = (rtm->rtm_src_len + 7) >> 3;
2277
2278 if (nla_len(tb[RTA_SRC]) < plen)
2279 goto errout;
2280
2281 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2282 }
86872cb5 2283
c3968a85
DW
2284 if (tb[RTA_PREFSRC])
2285 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2286
86872cb5
TG
2287 if (tb[RTA_OIF])
2288 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2289
2290 if (tb[RTA_PRIORITY])
2291 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2292
2293 if (tb[RTA_METRICS]) {
2294 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2295 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2296 }
86872cb5
TG
2297
2298 if (tb[RTA_TABLE])
2299 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2300
2301 err = 0;
2302errout:
2303 return err;
1da177e4
LT
2304}
2305
c127ea2c 2306static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2307{
86872cb5
TG
2308 struct fib6_config cfg;
2309 int err;
1da177e4 2310
86872cb5
TG
2311 err = rtm_to_fib6_config(skb, nlh, &cfg);
2312 if (err < 0)
2313 return err;
2314
2315 return ip6_route_del(&cfg);
1da177e4
LT
2316}
2317
c127ea2c 2318static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2319{
86872cb5
TG
2320 struct fib6_config cfg;
2321 int err;
1da177e4 2322
86872cb5
TG
2323 err = rtm_to_fib6_config(skb, nlh, &cfg);
2324 if (err < 0)
2325 return err;
2326
2327 return ip6_route_add(&cfg);
1da177e4
LT
2328}
2329
339bf98f
TG
2330static inline size_t rt6_nlmsg_size(void)
2331{
2332 return NLMSG_ALIGN(sizeof(struct rtmsg))
2333 + nla_total_size(16) /* RTA_SRC */
2334 + nla_total_size(16) /* RTA_DST */
2335 + nla_total_size(16) /* RTA_GATEWAY */
2336 + nla_total_size(16) /* RTA_PREFSRC */
2337 + nla_total_size(4) /* RTA_TABLE */
2338 + nla_total_size(4) /* RTA_IIF */
2339 + nla_total_size(4) /* RTA_OIF */
2340 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2341 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2342 + nla_total_size(sizeof(struct rta_cacheinfo));
2343}
2344
191cd582
BH
2345static int rt6_fill_node(struct net *net,
2346 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2347 struct in6_addr *dst, struct in6_addr *src,
2348 int iif, int type, u32 pid, u32 seq,
7bc570c8 2349 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2350{
2351 struct rtmsg *rtm;
2d7202bf 2352 struct nlmsghdr *nlh;
e3703b3d 2353 long expires;
9e762a4a 2354 u32 table;
f2c31e32 2355 struct neighbour *n;
1da177e4
LT
2356
2357 if (prefix) { /* user wants prefix routes only */
2358 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2359 /* success since this is not a prefix route */
2360 return 1;
2361 }
2362 }
2363
2d7202bf 2364 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2365 if (!nlh)
26932566 2366 return -EMSGSIZE;
2d7202bf
TG
2367
2368 rtm = nlmsg_data(nlh);
1da177e4
LT
2369 rtm->rtm_family = AF_INET6;
2370 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2371 rtm->rtm_src_len = rt->rt6i_src.plen;
2372 rtm->rtm_tos = 0;
c71099ac 2373 if (rt->rt6i_table)
9e762a4a 2374 table = rt->rt6i_table->tb6_id;
c71099ac 2375 else
9e762a4a
PM
2376 table = RT6_TABLE_UNSPEC;
2377 rtm->rtm_table = table;
c78679e8
DM
2378 if (nla_put_u32(skb, RTA_TABLE, table))
2379 goto nla_put_failure;
38308473 2380 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2381 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2382 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2383 rtm->rtm_type = RTN_LOCAL;
d1918542 2384 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2385 rtm->rtm_type = RTN_LOCAL;
2386 else
2387 rtm->rtm_type = RTN_UNICAST;
2388 rtm->rtm_flags = 0;
2389 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2390 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2391 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2392 rtm->rtm_protocol = RTPROT_REDIRECT;
2393 else if (rt->rt6i_flags & RTF_ADDRCONF)
2394 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2395 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2396 rtm->rtm_protocol = RTPROT_RA;
2397
38308473 2398 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2399 rtm->rtm_flags |= RTM_F_CLONED;
2400
2401 if (dst) {
c78679e8
DM
2402 if (nla_put(skb, RTA_DST, 16, dst))
2403 goto nla_put_failure;
1ab1457c 2404 rtm->rtm_dst_len = 128;
1da177e4 2405 } else if (rtm->rtm_dst_len)
c78679e8
DM
2406 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2407 goto nla_put_failure;
1da177e4
LT
2408#ifdef CONFIG_IPV6_SUBTREES
2409 if (src) {
c78679e8
DM
2410 if (nla_put(skb, RTA_SRC, 16, src))
2411 goto nla_put_failure;
1ab1457c 2412 rtm->rtm_src_len = 128;
c78679e8
DM
2413 } else if (rtm->rtm_src_len &&
2414 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2415 goto nla_put_failure;
1da177e4 2416#endif
7bc570c8
YH
2417 if (iif) {
2418#ifdef CONFIG_IPV6_MROUTE
2419 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2420 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2421 if (err <= 0) {
2422 if (!nowait) {
2423 if (err == 0)
2424 return 0;
2425 goto nla_put_failure;
2426 } else {
2427 if (err == -EMSGSIZE)
2428 goto nla_put_failure;
2429 }
2430 }
2431 } else
2432#endif
c78679e8
DM
2433 if (nla_put_u32(skb, RTA_IIF, iif))
2434 goto nla_put_failure;
7bc570c8 2435 } else if (dst) {
1da177e4 2436 struct in6_addr saddr_buf;
c78679e8
DM
2437 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2438 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2439 goto nla_put_failure;
1da177e4 2440 }
2d7202bf 2441
c3968a85
DW
2442 if (rt->rt6i_prefsrc.plen) {
2443 struct in6_addr saddr_buf;
4e3fd7a0 2444 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2445 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2446 goto nla_put_failure;
c3968a85
DW
2447 }
2448
defb3519 2449 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2450 goto nla_put_failure;
2451
f2c31e32 2452 rcu_read_lock();
97cac082 2453 n = rt->n;
94f826b8
ED
2454 if (n) {
2455 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2456 rcu_read_unlock();
2457 goto nla_put_failure;
2458 }
2459 }
f2c31e32 2460 rcu_read_unlock();
2d7202bf 2461
c78679e8
DM
2462 if (rt->dst.dev &&
2463 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2464 goto nla_put_failure;
2465 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2466 goto nla_put_failure;
36e3deae
YH
2467 if (!(rt->rt6i_flags & RTF_EXPIRES))
2468 expires = 0;
d1918542
DM
2469 else if (rt->dst.expires - jiffies < INT_MAX)
2470 expires = rt->dst.expires - jiffies;
36e3deae
YH
2471 else
2472 expires = INT_MAX;
69cdf8f9 2473
87a50699 2474 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2475 goto nla_put_failure;
2d7202bf
TG
2476
2477 return nlmsg_end(skb, nlh);
2478
2479nla_put_failure:
26932566
PM
2480 nlmsg_cancel(skb, nlh);
2481 return -EMSGSIZE;
1da177e4
LT
2482}
2483
1b43af54 2484int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2485{
2486 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2487 int prefix;
2488
2d7202bf
TG
2489 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2490 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2491 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2492 } else
2493 prefix = 0;
2494
191cd582
BH
2495 return rt6_fill_node(arg->net,
2496 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2497 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2498 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2499}
2500
c127ea2c 2501static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2502{
3b1e0a65 2503 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2504 struct nlattr *tb[RTA_MAX+1];
2505 struct rt6_info *rt;
1da177e4 2506 struct sk_buff *skb;
ab364a6f 2507 struct rtmsg *rtm;
4c9483b2 2508 struct flowi6 fl6;
72331bc0 2509 int err, iif = 0, oif = 0;
1da177e4 2510
ab364a6f
TG
2511 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2512 if (err < 0)
2513 goto errout;
1da177e4 2514
ab364a6f 2515 err = -EINVAL;
4c9483b2 2516 memset(&fl6, 0, sizeof(fl6));
1da177e4 2517
ab364a6f
TG
2518 if (tb[RTA_SRC]) {
2519 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2520 goto errout;
2521
4e3fd7a0 2522 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2523 }
2524
2525 if (tb[RTA_DST]) {
2526 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2527 goto errout;
2528
4e3fd7a0 2529 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2530 }
2531
2532 if (tb[RTA_IIF])
2533 iif = nla_get_u32(tb[RTA_IIF]);
2534
2535 if (tb[RTA_OIF])
72331bc0 2536 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2537
2538 if (iif) {
2539 struct net_device *dev;
72331bc0
SL
2540 int flags = 0;
2541
5578689a 2542 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2543 if (!dev) {
2544 err = -ENODEV;
ab364a6f 2545 goto errout;
1da177e4 2546 }
72331bc0
SL
2547
2548 fl6.flowi6_iif = iif;
2549
2550 if (!ipv6_addr_any(&fl6.saddr))
2551 flags |= RT6_LOOKUP_F_HAS_SADDR;
2552
2553 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2554 flags);
2555 } else {
2556 fl6.flowi6_oif = oif;
2557
2558 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2559 }
2560
ab364a6f 2561 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2562 if (!skb) {
2173bff5 2563 dst_release(&rt->dst);
ab364a6f
TG
2564 err = -ENOBUFS;
2565 goto errout;
2566 }
1da177e4 2567
ab364a6f
TG
2568 /* Reserve room for dummy headers, this skb can pass
2569 through good chunk of routing engine.
2570 */
459a98ed 2571 skb_reset_mac_header(skb);
ab364a6f 2572 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2573
d8d1f30b 2574 skb_dst_set(skb, &rt->dst);
1da177e4 2575
4c9483b2 2576 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2577 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2578 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2579 if (err < 0) {
ab364a6f
TG
2580 kfree_skb(skb);
2581 goto errout;
1da177e4
LT
2582 }
2583
5578689a 2584 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2585errout:
1da177e4 2586 return err;
1da177e4
LT
2587}
2588
86872cb5 2589void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2590{
2591 struct sk_buff *skb;
5578689a 2592 struct net *net = info->nl_net;
528c4ceb
DL
2593 u32 seq;
2594 int err;
2595
2596 err = -ENOBUFS;
38308473 2597 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2598
339bf98f 2599 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2600 if (!skb)
21713ebc
TG
2601 goto errout;
2602
191cd582 2603 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2604 event, info->pid, seq, 0, 0, 0);
26932566
PM
2605 if (err < 0) {
2606 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2607 WARN_ON(err == -EMSGSIZE);
2608 kfree_skb(skb);
2609 goto errout;
2610 }
1ce85fe4
PNA
2611 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2612 info->nlh, gfp_any());
2613 return;
21713ebc
TG
2614errout:
2615 if (err < 0)
5578689a 2616 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2617}
2618
8ed67789
DL
2619static int ip6_route_dev_notify(struct notifier_block *this,
2620 unsigned long event, void *data)
2621{
2622 struct net_device *dev = (struct net_device *)data;
c346dca1 2623 struct net *net = dev_net(dev);
8ed67789
DL
2624
2625 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2626 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2627 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2628#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2629 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2630 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2631 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2632 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2633#endif
2634 }
2635
2636 return NOTIFY_OK;
2637}
2638
1da177e4
LT
2639/*
2640 * /proc
2641 */
2642
2643#ifdef CONFIG_PROC_FS
2644
1da177e4
LT
2645struct rt6_proc_arg
2646{
2647 char *buffer;
2648 int offset;
2649 int length;
2650 int skip;
2651 int len;
2652};
2653
2654static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2655{
33120b30 2656 struct seq_file *m = p_arg;
69cce1d1 2657 struct neighbour *n;
1da177e4 2658
4b7a4274 2659 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2660
2661#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2662 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2663#else
33120b30 2664 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2665#endif
f2c31e32 2666 rcu_read_lock();
97cac082 2667 n = rt->n;
69cce1d1
DM
2668 if (n) {
2669 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2670 } else {
33120b30 2671 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2672 }
f2c31e32 2673 rcu_read_unlock();
33120b30 2674 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2675 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2676 rt->dst.__use, rt->rt6i_flags,
d1918542 2677 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2678 return 0;
2679}
2680
33120b30 2681static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2682{
f3db4851 2683 struct net *net = (struct net *)m->private;
32b293a5 2684 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2685 return 0;
2686}
1da177e4 2687
33120b30
AD
2688static int ipv6_route_open(struct inode *inode, struct file *file)
2689{
de05c557 2690 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2691}
2692
33120b30
AD
2693static const struct file_operations ipv6_route_proc_fops = {
2694 .owner = THIS_MODULE,
2695 .open = ipv6_route_open,
2696 .read = seq_read,
2697 .llseek = seq_lseek,
b6fcbdb4 2698 .release = single_release_net,
33120b30
AD
2699};
2700
1da177e4
LT
2701static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2702{
69ddb805 2703 struct net *net = (struct net *)seq->private;
1da177e4 2704 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2705 net->ipv6.rt6_stats->fib_nodes,
2706 net->ipv6.rt6_stats->fib_route_nodes,
2707 net->ipv6.rt6_stats->fib_rt_alloc,
2708 net->ipv6.rt6_stats->fib_rt_entries,
2709 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2710 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2711 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2712
2713 return 0;
2714}
2715
2716static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2717{
de05c557 2718 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2719}
2720
9a32144e 2721static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2722 .owner = THIS_MODULE,
2723 .open = rt6_stats_seq_open,
2724 .read = seq_read,
2725 .llseek = seq_lseek,
b6fcbdb4 2726 .release = single_release_net,
1da177e4
LT
2727};
2728#endif /* CONFIG_PROC_FS */
2729
2730#ifdef CONFIG_SYSCTL
2731
1da177e4 2732static
8d65af78 2733int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2734 void __user *buffer, size_t *lenp, loff_t *ppos)
2735{
c486da34
LAG
2736 struct net *net;
2737 int delay;
2738 if (!write)
1da177e4 2739 return -EINVAL;
c486da34
LAG
2740
2741 net = (struct net *)ctl->extra1;
2742 delay = net->ipv6.sysctl.flush_delay;
2743 proc_dointvec(ctl, write, buffer, lenp, ppos);
2744 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2745 return 0;
1da177e4
LT
2746}
2747
760f2d01 2748ctl_table ipv6_route_table_template[] = {
1ab1457c 2749 {
1da177e4 2750 .procname = "flush",
4990509f 2751 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2752 .maxlen = sizeof(int),
89c8b3a1 2753 .mode = 0200,
6d9f239a 2754 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2755 },
2756 {
1da177e4 2757 .procname = "gc_thresh",
9a7ec3a9 2758 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
6d9f239a 2761 .proc_handler = proc_dointvec,
1da177e4
LT
2762 },
2763 {
1da177e4 2764 .procname = "max_size",
4990509f 2765 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
6d9f239a 2768 .proc_handler = proc_dointvec,
1da177e4
LT
2769 },
2770 {
1da177e4 2771 .procname = "gc_min_interval",
4990509f 2772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
6d9f239a 2775 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2776 },
2777 {
1da177e4 2778 .procname = "gc_timeout",
4990509f 2779 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
6d9f239a 2782 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2783 },
2784 {
1da177e4 2785 .procname = "gc_interval",
4990509f 2786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
6d9f239a 2789 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2790 },
2791 {
1da177e4 2792 .procname = "gc_elasticity",
4990509f 2793 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
f3d3f616 2796 .proc_handler = proc_dointvec,
1da177e4
LT
2797 },
2798 {
1da177e4 2799 .procname = "mtu_expires",
4990509f 2800 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
6d9f239a 2803 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2804 },
2805 {
1da177e4 2806 .procname = "min_adv_mss",
4990509f 2807 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
f3d3f616 2810 .proc_handler = proc_dointvec,
1da177e4
LT
2811 },
2812 {
1da177e4 2813 .procname = "gc_min_interval_ms",
4990509f 2814 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
6d9f239a 2817 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2818 },
f8572d8f 2819 { }
1da177e4
LT
2820};
2821
2c8c1e72 2822struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2823{
2824 struct ctl_table *table;
2825
2826 table = kmemdup(ipv6_route_table_template,
2827 sizeof(ipv6_route_table_template),
2828 GFP_KERNEL);
5ee09105
YH
2829
2830 if (table) {
2831 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2832 table[0].extra1 = net;
86393e52 2833 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2834 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2835 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2836 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2837 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2838 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2839 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2840 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2841 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2842 }
2843
760f2d01
DL
2844 return table;
2845}
1da177e4
LT
2846#endif
2847
2c8c1e72 2848static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2849{
633d424b 2850 int ret = -ENOMEM;
8ed67789 2851
86393e52
AD
2852 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2853 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2854
fc66f95c
ED
2855 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2856 goto out_ip6_dst_ops;
2857
8ed67789
DL
2858 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2859 sizeof(*net->ipv6.ip6_null_entry),
2860 GFP_KERNEL);
2861 if (!net->ipv6.ip6_null_entry)
fc66f95c 2862 goto out_ip6_dst_entries;
d8d1f30b 2863 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2864 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2865 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2866 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2867 ip6_template_metrics, true);
8ed67789
DL
2868
2869#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2870 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2871 sizeof(*net->ipv6.ip6_prohibit_entry),
2872 GFP_KERNEL);
68fffc67
PZ
2873 if (!net->ipv6.ip6_prohibit_entry)
2874 goto out_ip6_null_entry;
d8d1f30b 2875 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2876 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2877 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2878 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2879 ip6_template_metrics, true);
8ed67789
DL
2880
2881 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2882 sizeof(*net->ipv6.ip6_blk_hole_entry),
2883 GFP_KERNEL);
68fffc67
PZ
2884 if (!net->ipv6.ip6_blk_hole_entry)
2885 goto out_ip6_prohibit_entry;
d8d1f30b 2886 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2887 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2888 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2889 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2890 ip6_template_metrics, true);
8ed67789
DL
2891#endif
2892
b339a47c
PZ
2893 net->ipv6.sysctl.flush_delay = 0;
2894 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2895 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2896 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2897 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2898 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2899 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2900 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2901
6891a346
BT
2902 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2903
8ed67789
DL
2904 ret = 0;
2905out:
2906 return ret;
f2fc6a54 2907
68fffc67
PZ
2908#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2909out_ip6_prohibit_entry:
2910 kfree(net->ipv6.ip6_prohibit_entry);
2911out_ip6_null_entry:
2912 kfree(net->ipv6.ip6_null_entry);
2913#endif
fc66f95c
ED
2914out_ip6_dst_entries:
2915 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2916out_ip6_dst_ops:
f2fc6a54 2917 goto out;
cdb18761
DL
2918}
2919
2c8c1e72 2920static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2921{
8ed67789
DL
2922 kfree(net->ipv6.ip6_null_entry);
2923#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2924 kfree(net->ipv6.ip6_prohibit_entry);
2925 kfree(net->ipv6.ip6_blk_hole_entry);
2926#endif
41bb78b4 2927 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2928}
2929
d189634e
TG
2930static int __net_init ip6_route_net_init_late(struct net *net)
2931{
2932#ifdef CONFIG_PROC_FS
2933 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2934 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2935#endif
2936 return 0;
2937}
2938
2939static void __net_exit ip6_route_net_exit_late(struct net *net)
2940{
2941#ifdef CONFIG_PROC_FS
2942 proc_net_remove(net, "ipv6_route");
2943 proc_net_remove(net, "rt6_stats");
2944#endif
2945}
2946
cdb18761
DL
2947static struct pernet_operations ip6_route_net_ops = {
2948 .init = ip6_route_net_init,
2949 .exit = ip6_route_net_exit,
2950};
2951
c3426b47
DM
2952static int __net_init ipv6_inetpeer_init(struct net *net)
2953{
2954 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2955
2956 if (!bp)
2957 return -ENOMEM;
2958 inet_peer_base_init(bp);
2959 net->ipv6.peers = bp;
2960 return 0;
2961}
2962
2963static void __net_exit ipv6_inetpeer_exit(struct net *net)
2964{
2965 struct inet_peer_base *bp = net->ipv6.peers;
2966
2967 net->ipv6.peers = NULL;
56a6b248 2968 inetpeer_invalidate_tree(bp);
c3426b47
DM
2969 kfree(bp);
2970}
2971
2b823f72 2972static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
2973 .init = ipv6_inetpeer_init,
2974 .exit = ipv6_inetpeer_exit,
2975};
2976
d189634e
TG
2977static struct pernet_operations ip6_route_net_late_ops = {
2978 .init = ip6_route_net_init_late,
2979 .exit = ip6_route_net_exit_late,
2980};
2981
8ed67789
DL
2982static struct notifier_block ip6_route_dev_notifier = {
2983 .notifier_call = ip6_route_dev_notify,
2984 .priority = 0,
2985};
2986
433d49c3 2987int __init ip6_route_init(void)
1da177e4 2988{
433d49c3
DL
2989 int ret;
2990
9a7ec3a9
DL
2991 ret = -ENOMEM;
2992 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2993 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2994 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2995 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2996 goto out;
14e50e57 2997
fc66f95c 2998 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2999 if (ret)
bdb3289f 3000 goto out_kmem_cache;
bdb3289f 3001
c3426b47
DM
3002 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3003 if (ret)
e8803b6c 3004 goto out_dst_entries;
2a0c451a 3005
7e52b33b
DM
3006 ret = register_pernet_subsys(&ip6_route_net_ops);
3007 if (ret)
3008 goto out_register_inetpeer;
c3426b47 3009
5dc121e9
AE
3010 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3011
8ed67789
DL
3012 /* Registering of the loopback is done before this portion of code,
3013 * the loopback reference in rt6_info will not be taken, do it
3014 * manually for init_net */
d8d1f30b 3015 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3016 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3017 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3018 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3019 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3020 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3021 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3022 #endif
e8803b6c 3023 ret = fib6_init();
433d49c3 3024 if (ret)
8ed67789 3025 goto out_register_subsys;
433d49c3 3026
433d49c3
DL
3027 ret = xfrm6_init();
3028 if (ret)
e8803b6c 3029 goto out_fib6_init;
c35b7e72 3030
433d49c3
DL
3031 ret = fib6_rules_init();
3032 if (ret)
3033 goto xfrm6_init;
7e5449c2 3034
d189634e
TG
3035 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3036 if (ret)
3037 goto fib6_rules_init;
3038
433d49c3 3039 ret = -ENOBUFS;
c7ac8679
GR
3040 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3041 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3042 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3043 goto out_register_late_subsys;
c127ea2c 3044
8ed67789 3045 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3046 if (ret)
d189634e 3047 goto out_register_late_subsys;
8ed67789 3048
433d49c3
DL
3049out:
3050 return ret;
3051
d189634e
TG
3052out_register_late_subsys:
3053 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3054fib6_rules_init:
433d49c3
DL
3055 fib6_rules_cleanup();
3056xfrm6_init:
433d49c3 3057 xfrm6_fini();
2a0c451a
TG
3058out_fib6_init:
3059 fib6_gc_cleanup();
8ed67789
DL
3060out_register_subsys:
3061 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3062out_register_inetpeer:
3063 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3064out_dst_entries:
3065 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3066out_kmem_cache:
f2fc6a54 3067 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3068 goto out;
1da177e4
LT
3069}
3070
3071void ip6_route_cleanup(void)
3072{
8ed67789 3073 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3074 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3075 fib6_rules_cleanup();
1da177e4 3076 xfrm6_fini();
1da177e4 3077 fib6_gc_cleanup();
c3426b47 3078 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3079 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3080 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3081 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3082}