Remove noisy printks from llcp_sock_connect
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
1da177e4 85
70ceb4f5 86#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 87static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
95c96174 90 unsigned int pref);
efa2cea0 91static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
94#endif
95
06582540
DM
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
8e2ec639
YZ
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
fbfe95a4 105 peer = rt6_get_peer_create(rt);
06582540
DM
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
f894cbf8
DM
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
39232973
DM
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
a7563f34 132 if (!ipv6_addr_any(p))
39232973 133 return (const void *) p;
f894cbf8
DM
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
39232973
DM
136 return daddr;
137}
138
f894cbf8
DM
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
d3aaeb38 142{
39232973
DM
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
f894cbf8 146 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
8ade06c6 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 154{
8ade06c6
DM
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
97cac082 161 rt->n = n;
f83c7790
DM
162
163 return 0;
d3aaeb38
DM
164}
165
9a7ec3a9 166static struct dst_ops ip6_dst_ops_template = {
1da177e4 167 .family = AF_INET6,
09640e63 168 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
0dbaee3b 172 .default_advmss = ip6_default_advmss,
ebb762f2 173 .mtu = ip6_mtu,
06582540 174 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 180 .redirect = rt6_do_redirect,
1ac06e03 181 .local_out = __ip6_local_out,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
183};
184
ebb762f2 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 186{
618f9bc7
SK
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
ec831ea7
RD
190}
191
6700c270
DM
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
14e50e57
DM
194{
195}
196
6700c270
DM
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
b587ee3b
DM
199{
200}
201
0972ddb2
HB
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
14e50e57
DM
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
09640e63 210 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
ebb762f2 213 .mtu = ip6_blackhole_mtu,
214f45c9 214 .default_advmss = ip6_default_advmss,
14e50e57 215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 216 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 218 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
219};
220
62fa8a84
DM
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223};
224
fb0af4c7 225static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
2c20cbd7 229 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 230 .error = -ENETUNREACH,
d8d1f30b
CG
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
1da177e4
LT
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 235 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
101367c2
TG
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
6723ab54
DM
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 244
fb0af4c7 245static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
2c20cbd7 249 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 250 .error = -EACCES,
d8d1f30b
CG
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
101367c2
TG
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 255 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
fb0af4c7 260static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
2c20cbd7 264 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 265 .error = -EINVAL,
d8d1f30b
CG
266 .input = dst_discard,
267 .output = dst_discard,
101367c2
TG
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 270 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
1da177e4 277/* allocate dst with ip6_dst_ops */
97bab73f 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 279 struct net_device *dev,
8b96d22d
DM
280 int flags,
281 struct fib6_table *table)
1da177e4 282{
97bab73f 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 284 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 285
97bab73f 286 if (rt) {
8104891b
SK
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
6f3118b5 291 rt->rt6i_genid = rt_genid(net);
97bab73f 292 }
cf911662 293 return rt;
1da177e4
LT
294}
295
296static void ip6_dst_destroy(struct dst_entry *dst)
297{
298 struct rt6_info *rt = (struct rt6_info *)dst;
299 struct inet6_dev *idev = rt->rt6i_idev;
300
97cac082
DM
301 if (rt->n)
302 neigh_release(rt->n);
303
8e2ec639
YZ
304 if (!(rt->dst.flags & DST_HOST))
305 dst_destroy_metrics_generic(dst);
306
38308473 307 if (idev) {
1da177e4
LT
308 rt->rt6i_idev = NULL;
309 in6_dev_put(idev);
1ab1457c 310 }
1716a961
G
311
312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 dst_release(dst->from);
314
97bab73f
DM
315 if (rt6_has_peer(rt)) {
316 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
317 inet_putpeer(peer);
318 }
319}
320
6431cbc2
DM
321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323static u32 rt6_peer_genid(void)
324{
325 return atomic_read(&__rt6_peer_genid);
326}
327
b3419363
DM
328void rt6_bind_peer(struct rt6_info *rt, int create)
329{
97bab73f 330 struct inet_peer_base *base;
b3419363
DM
331 struct inet_peer *peer;
332
97bab73f
DM
333 base = inetpeer_base_ptr(rt->_rt6i_peer);
334 if (!base)
335 return;
336
337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
338 if (peer) {
339 if (!rt6_set_peer(rt, peer))
340 inet_putpeer(peer);
341 else
342 rt->rt6i_peer_genid = rt6_peer_genid();
343 }
1da177e4
LT
344}
345
346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 int how)
348{
349 struct rt6_info *rt = (struct rt6_info *)dst;
350 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 351 struct net_device *loopback_dev =
c346dca1 352 dev_net(dev)->loopback_dev;
1da177e4 353
97cac082
DM
354 if (dev != loopback_dev) {
355 if (idev && idev->dev == dev) {
356 struct inet6_dev *loopback_idev =
357 in6_dev_get(loopback_dev);
358 if (loopback_idev) {
359 rt->rt6i_idev = loopback_idev;
360 in6_dev_put(idev);
361 }
362 }
363 if (rt->n && rt->n->dev == dev) {
364 rt->n->dev = loopback_dev;
365 dev_hold(loopback_dev);
366 dev_put(dev);
1da177e4
LT
367 }
368 }
369}
370
a50feda5 371static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 372{
1716a961
G
373 if (rt->rt6i_flags & RTF_EXPIRES) {
374 if (time_after(jiffies, rt->dst.expires))
a50feda5 375 return true;
1716a961 376 } else if (rt->dst.from) {
3fd91fb3 377 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 378 }
a50feda5 379 return false;
1da177e4
LT
380}
381
a50feda5 382static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 383{
a02cec21
ED
384 return ipv6_addr_type(daddr) &
385 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
386}
387
1da177e4 388/*
c71099ac 389 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
390 */
391
8ed67789
DL
392static inline struct rt6_info *rt6_device_match(struct net *net,
393 struct rt6_info *rt,
b71d1d42 394 const struct in6_addr *saddr,
1da177e4 395 int oif,
d420895e 396 int flags)
1da177e4
LT
397{
398 struct rt6_info *local = NULL;
399 struct rt6_info *sprt;
400
dd3abc4e
YH
401 if (!oif && ipv6_addr_any(saddr))
402 goto out;
403
d8d1f30b 404 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 405 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
406
407 if (oif) {
1da177e4
LT
408 if (dev->ifindex == oif)
409 return sprt;
410 if (dev->flags & IFF_LOOPBACK) {
38308473 411 if (!sprt->rt6i_idev ||
1da177e4 412 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 413 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 414 continue;
1ab1457c 415 if (local && (!oif ||
1da177e4
LT
416 local->rt6i_idev->dev->ifindex == oif))
417 continue;
418 }
419 local = sprt;
420 }
dd3abc4e
YH
421 } else {
422 if (ipv6_chk_addr(net, saddr, dev,
423 flags & RT6_LOOKUP_F_IFACE))
424 return sprt;
1da177e4 425 }
dd3abc4e 426 }
1da177e4 427
dd3abc4e 428 if (oif) {
1da177e4
LT
429 if (local)
430 return local;
431
d420895e 432 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 433 return net->ipv6.ip6_null_entry;
1da177e4 434 }
dd3abc4e 435out:
1da177e4
LT
436 return rt;
437}
438
27097255
YH
439#ifdef CONFIG_IPV6_ROUTER_PREF
440static void rt6_probe(struct rt6_info *rt)
441{
f2c31e32 442 struct neighbour *neigh;
27097255
YH
443 /*
444 * Okay, this does not seem to be appropriate
445 * for now, however, we need to check if it
446 * is really so; aka Router Reachability Probing.
447 *
448 * Router Reachability Probe MUST be rate-limited
449 * to no more than one per minute.
450 */
97cac082 451 neigh = rt ? rt->n : NULL;
27097255 452 if (!neigh || (neigh->nud_state & NUD_VALID))
fdd6681d 453 return;
27097255
YH
454 read_lock_bh(&neigh->lock);
455 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
457 struct in6_addr mcaddr;
458 struct in6_addr *target;
459
460 neigh->updated = jiffies;
461 read_unlock_bh(&neigh->lock);
462
463 target = (struct in6_addr *)&neigh->primary_key;
464 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 465 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 466 } else {
27097255 467 read_unlock_bh(&neigh->lock);
f2c31e32 468 }
27097255
YH
469}
470#else
471static inline void rt6_probe(struct rt6_info *rt)
472{
27097255
YH
473}
474#endif
475
1da177e4 476/*
554cfb7e 477 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 478 */
b6f99a21 479static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 480{
d1918542 481 struct net_device *dev = rt->dst.dev;
161980f4 482 if (!oif || dev->ifindex == oif)
554cfb7e 483 return 2;
161980f4
DM
484 if ((dev->flags & IFF_LOOPBACK) &&
485 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486 return 1;
487 return 0;
554cfb7e 488}
1da177e4 489
b6f99a21 490static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 491{
f2c31e32 492 struct neighbour *neigh;
398bcbeb 493 int m;
f2c31e32 494
97cac082 495 neigh = rt->n;
4d0c5911
YH
496 if (rt->rt6i_flags & RTF_NONEXTHOP ||
497 !(rt->rt6i_flags & RTF_GATEWAY))
498 m = 1;
499 else if (neigh) {
554cfb7e
YH
500 read_lock_bh(&neigh->lock);
501 if (neigh->nud_state & NUD_VALID)
4d0c5911 502 m = 2;
398bcbeb
YH
503#ifdef CONFIG_IPV6_ROUTER_PREF
504 else if (neigh->nud_state & NUD_FAILED)
505 m = 0;
506#endif
507 else
ea73ee23 508 m = 1;
554cfb7e 509 read_unlock_bh(&neigh->lock);
398bcbeb
YH
510 } else
511 m = 0;
554cfb7e 512 return m;
1da177e4
LT
513}
514
554cfb7e
YH
515static int rt6_score_route(struct rt6_info *rt, int oif,
516 int strict)
1da177e4 517{
4d0c5911 518 int m, n;
1ab1457c 519
4d0c5911 520 m = rt6_check_dev(rt, oif);
77d16f45 521 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 522 return -1;
ebacaaa0
YH
523#ifdef CONFIG_IPV6_ROUTER_PREF
524 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
525#endif
4d0c5911 526 n = rt6_check_neigh(rt);
557e92ef 527 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
528 return -1;
529 return m;
530}
531
f11e6659
DM
532static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
533 int *mpri, struct rt6_info *match)
554cfb7e 534{
f11e6659
DM
535 int m;
536
537 if (rt6_check_expired(rt))
538 goto out;
539
540 m = rt6_score_route(rt, oif, strict);
541 if (m < 0)
542 goto out;
543
544 if (m > *mpri) {
545 if (strict & RT6_LOOKUP_F_REACHABLE)
546 rt6_probe(match);
547 *mpri = m;
548 match = rt;
549 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
550 rt6_probe(rt);
551 }
552
553out:
554 return match;
555}
556
557static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
558 struct rt6_info *rr_head,
559 u32 metric, int oif, int strict)
560{
561 struct rt6_info *rt, *match;
554cfb7e 562 int mpri = -1;
1da177e4 563
f11e6659
DM
564 match = NULL;
565 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 566 rt = rt->dst.rt6_next)
f11e6659
DM
567 match = find_match(rt, oif, strict, &mpri, match);
568 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 569 rt = rt->dst.rt6_next)
f11e6659 570 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 571
f11e6659
DM
572 return match;
573}
1da177e4 574
f11e6659
DM
575static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
576{
577 struct rt6_info *match, *rt0;
8ed67789 578 struct net *net;
1da177e4 579
f11e6659
DM
580 rt0 = fn->rr_ptr;
581 if (!rt0)
582 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 583
f11e6659 584 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 585
554cfb7e 586 if (!match &&
f11e6659 587 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 588 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 589
554cfb7e 590 /* no entries matched; do round-robin */
f11e6659
DM
591 if (!next || next->rt6i_metric != rt0->rt6i_metric)
592 next = fn->leaf;
593
594 if (next != rt0)
595 fn->rr_ptr = next;
1da177e4 596 }
1da177e4 597
d1918542 598 net = dev_net(rt0->dst.dev);
a02cec21 599 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
600}
601
70ceb4f5
YH
602#ifdef CONFIG_IPV6_ROUTE_INFO
603int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 604 const struct in6_addr *gwaddr)
70ceb4f5 605{
c346dca1 606 struct net *net = dev_net(dev);
70ceb4f5
YH
607 struct route_info *rinfo = (struct route_info *) opt;
608 struct in6_addr prefix_buf, *prefix;
609 unsigned int pref;
4bed72e4 610 unsigned long lifetime;
70ceb4f5
YH
611 struct rt6_info *rt;
612
613 if (len < sizeof(struct route_info)) {
614 return -EINVAL;
615 }
616
617 /* Sanity check for prefix_len and length */
618 if (rinfo->length > 3) {
619 return -EINVAL;
620 } else if (rinfo->prefix_len > 128) {
621 return -EINVAL;
622 } else if (rinfo->prefix_len > 64) {
623 if (rinfo->length < 2) {
624 return -EINVAL;
625 }
626 } else if (rinfo->prefix_len > 0) {
627 if (rinfo->length < 1) {
628 return -EINVAL;
629 }
630 }
631
632 pref = rinfo->route_pref;
633 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 634 return -EINVAL;
70ceb4f5 635
4bed72e4 636 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
637
638 if (rinfo->length == 3)
639 prefix = (struct in6_addr *)rinfo->prefix;
640 else {
641 /* this function is safe */
642 ipv6_addr_prefix(&prefix_buf,
643 (struct in6_addr *)rinfo->prefix,
644 rinfo->prefix_len);
645 prefix = &prefix_buf;
646 }
647
efa2cea0
DL
648 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649 dev->ifindex);
70ceb4f5
YH
650
651 if (rt && !lifetime) {
e0a1ad73 652 ip6_del_rt(rt);
70ceb4f5
YH
653 rt = NULL;
654 }
655
656 if (!rt && lifetime)
efa2cea0 657 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
658 pref);
659 else if (rt)
660 rt->rt6i_flags = RTF_ROUTEINFO |
661 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662
663 if (rt) {
1716a961
G
664 if (!addrconf_finite_timeout(lifetime))
665 rt6_clean_expires(rt);
666 else
667 rt6_set_expires(rt, jiffies + HZ * lifetime);
668
d8d1f30b 669 dst_release(&rt->dst);
70ceb4f5
YH
670 }
671 return 0;
672}
673#endif
674
8ed67789 675#define BACKTRACK(__net, saddr) \
982f56f3 676do { \
8ed67789 677 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 678 struct fib6_node *pn; \
e0eda7bb 679 while (1) { \
982f56f3
YH
680 if (fn->fn_flags & RTN_TL_ROOT) \
681 goto out; \
682 pn = fn->parent; \
683 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 684 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
685 else \
686 fn = pn; \
687 if (fn->fn_flags & RTN_RTINFO) \
688 goto restart; \
c71099ac 689 } \
c71099ac 690 } \
38308473 691} while (0)
c71099ac 692
8ed67789
DL
693static struct rt6_info *ip6_pol_route_lookup(struct net *net,
694 struct fib6_table *table,
4c9483b2 695 struct flowi6 *fl6, int flags)
1da177e4
LT
696{
697 struct fib6_node *fn;
698 struct rt6_info *rt;
699
c71099ac 700 read_lock_bh(&table->tb6_lock);
4c9483b2 701 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
702restart:
703 rt = fn->leaf;
4c9483b2
DM
704 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
705 BACKTRACK(net, &fl6->saddr);
c71099ac 706out:
d8d1f30b 707 dst_use(&rt->dst, jiffies);
c71099ac 708 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
709 return rt;
710
711}
712
ea6e574e
FW
713struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714 int flags)
715{
716 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
717}
718EXPORT_SYMBOL_GPL(ip6_route_lookup);
719
9acd9f3a
YH
720struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
721 const struct in6_addr *saddr, int oif, int strict)
c71099ac 722{
4c9483b2
DM
723 struct flowi6 fl6 = {
724 .flowi6_oif = oif,
725 .daddr = *daddr,
c71099ac
TG
726 };
727 struct dst_entry *dst;
77d16f45 728 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 729
adaa70bb 730 if (saddr) {
4c9483b2 731 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
732 flags |= RT6_LOOKUP_F_HAS_SADDR;
733 }
734
4c9483b2 735 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
736 if (dst->error == 0)
737 return (struct rt6_info *) dst;
738
739 dst_release(dst);
740
1da177e4
LT
741 return NULL;
742}
743
7159039a
YH
744EXPORT_SYMBOL(rt6_lookup);
745
c71099ac 746/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
747 It takes new route entry, the addition fails by any reason the
748 route is freed. In any case, if caller does not hold it, it may
749 be destroyed.
750 */
751
86872cb5 752static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
753{
754 int err;
c71099ac 755 struct fib6_table *table;
1da177e4 756
c71099ac
TG
757 table = rt->rt6i_table;
758 write_lock_bh(&table->tb6_lock);
86872cb5 759 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 760 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
761
762 return err;
763}
764
40e22e8f
TG
765int ip6_ins_rt(struct rt6_info *rt)
766{
4d1169c1 767 struct nl_info info = {
d1918542 768 .nl_net = dev_net(rt->dst.dev),
4d1169c1 769 };
528c4ceb 770 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
771}
772
1716a961 773static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 774 const struct in6_addr *daddr,
b71d1d42 775 const struct in6_addr *saddr)
1da177e4 776{
1da177e4
LT
777 struct rt6_info *rt;
778
779 /*
780 * Clone the route.
781 */
782
21efcfa0 783 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
784
785 if (rt) {
14deae41
DM
786 int attempts = !in_softirq();
787
38308473 788 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 789 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 790 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 791 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 792 rt->rt6i_gateway = *daddr;
58c4fb86 793 }
1da177e4 794
1da177e4 795 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
796
797#ifdef CONFIG_IPV6_SUBTREES
798 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 799 rt->rt6i_src.addr = *saddr;
1da177e4
LT
800 rt->rt6i_src.plen = 128;
801 }
802#endif
803
14deae41 804 retry:
8ade06c6 805 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 806 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
807 int saved_rt_min_interval =
808 net->ipv6.sysctl.ip6_rt_gc_min_interval;
809 int saved_rt_elasticity =
810 net->ipv6.sysctl.ip6_rt_gc_elasticity;
811
812 if (attempts-- > 0) {
813 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
814 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
815
86393e52 816 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
817
818 net->ipv6.sysctl.ip6_rt_gc_elasticity =
819 saved_rt_elasticity;
820 net->ipv6.sysctl.ip6_rt_gc_min_interval =
821 saved_rt_min_interval;
822 goto retry;
823 }
824
f3213831 825 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 826 dst_free(&rt->dst);
14deae41
DM
827 return NULL;
828 }
95a9a5ba 829 }
1da177e4 830
95a9a5ba
YH
831 return rt;
832}
1da177e4 833
21efcfa0
ED
834static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
835 const struct in6_addr *daddr)
299d9939 836{
21efcfa0
ED
837 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838
299d9939 839 if (rt) {
299d9939 840 rt->rt6i_flags |= RTF_CACHE;
97cac082 841 rt->n = neigh_clone(ort->n);
299d9939
YH
842 }
843 return rt;
844}
845
8ed67789 846static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 847 struct flowi6 *fl6, int flags)
1da177e4
LT
848{
849 struct fib6_node *fn;
519fbd87 850 struct rt6_info *rt, *nrt;
c71099ac 851 int strict = 0;
1da177e4 852 int attempts = 3;
519fbd87 853 int err;
53b7997f 854 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 855
77d16f45 856 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
857
858relookup:
c71099ac 859 read_lock_bh(&table->tb6_lock);
1da177e4 860
8238dd06 861restart_2:
4c9483b2 862 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
863
864restart:
4acad72d 865 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 866
4c9483b2 867 BACKTRACK(net, &fl6->saddr);
8ed67789 868 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 869 rt->rt6i_flags & RTF_CACHE)
1ddef044 870 goto out;
1da177e4 871
d8d1f30b 872 dst_hold(&rt->dst);
c71099ac 873 read_unlock_bh(&table->tb6_lock);
fb9de91e 874
97cac082 875 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 876 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 877 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 878 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
879 else
880 goto out2;
e40cf353 881
d8d1f30b 882 dst_release(&rt->dst);
8ed67789 883 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 884
d8d1f30b 885 dst_hold(&rt->dst);
519fbd87 886 if (nrt) {
40e22e8f 887 err = ip6_ins_rt(nrt);
519fbd87 888 if (!err)
1da177e4 889 goto out2;
1da177e4 890 }
1da177e4 891
519fbd87
YH
892 if (--attempts <= 0)
893 goto out2;
894
895 /*
c71099ac 896 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
897 * released someone could insert this route. Relookup.
898 */
d8d1f30b 899 dst_release(&rt->dst);
519fbd87
YH
900 goto relookup;
901
902out:
8238dd06
YH
903 if (reachable) {
904 reachable = 0;
905 goto restart_2;
906 }
d8d1f30b 907 dst_hold(&rt->dst);
c71099ac 908 read_unlock_bh(&table->tb6_lock);
1da177e4 909out2:
d8d1f30b
CG
910 rt->dst.lastuse = jiffies;
911 rt->dst.__use++;
c71099ac
TG
912
913 return rt;
1da177e4
LT
914}
915
8ed67789 916static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 917 struct flowi6 *fl6, int flags)
4acad72d 918{
4c9483b2 919 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
920}
921
72331bc0
SL
922static struct dst_entry *ip6_route_input_lookup(struct net *net,
923 struct net_device *dev,
924 struct flowi6 *fl6, int flags)
925{
926 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
927 flags |= RT6_LOOKUP_F_IFACE;
928
929 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930}
931
c71099ac
TG
932void ip6_route_input(struct sk_buff *skb)
933{
b71d1d42 934 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 935 struct net *net = dev_net(skb->dev);
adaa70bb 936 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
937 struct flowi6 fl6 = {
938 .flowi6_iif = skb->dev->ifindex,
939 .daddr = iph->daddr,
940 .saddr = iph->saddr,
38308473 941 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
942 .flowi6_mark = skb->mark,
943 .flowi6_proto = iph->nexthdr,
c71099ac 944 };
adaa70bb 945
72331bc0 946 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
947}
948
8ed67789 949static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 950 struct flowi6 *fl6, int flags)
1da177e4 951{
4c9483b2 952 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
953}
954
9c7a4f9c 955struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 956 struct flowi6 *fl6)
c71099ac
TG
957{
958 int flags = 0;
959
1fb9489b 960 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 961
4c9483b2 962 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 963 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 964
4c9483b2 965 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 966 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
967 else if (sk)
968 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 969
4c9483b2 970 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
971}
972
7159039a 973EXPORT_SYMBOL(ip6_route_output);
1da177e4 974
2774c131 975struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 976{
5c1e6aa3 977 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
978 struct dst_entry *new = NULL;
979
f5b0a874 980 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 981 if (rt) {
d8d1f30b 982 new = &rt->dst;
14e50e57 983
8104891b
SK
984 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
985 rt6_init_peer(rt, net->ipv6.peers);
986
14e50e57 987 new->__use = 1;
352e512c
HX
988 new->input = dst_discard;
989 new->output = dst_discard;
14e50e57 990
21efcfa0
ED
991 if (dst_metrics_read_only(&ort->dst))
992 new->_metrics = ort->dst._metrics;
993 else
994 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
995 rt->rt6i_idev = ort->rt6i_idev;
996 if (rt->rt6i_idev)
997 in6_dev_hold(rt->rt6i_idev);
14e50e57 998
4e3fd7a0 999 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1000 rt->rt6i_flags = ort->rt6i_flags;
1001 rt6_clean_expires(rt);
14e50e57
DM
1002 rt->rt6i_metric = 0;
1003
1004 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005#ifdef CONFIG_IPV6_SUBTREES
1006 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007#endif
1008
1009 dst_free(new);
1010 }
1011
69ead7af
DM
1012 dst_release(dst_orig);
1013 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1014}
14e50e57 1015
1da177e4
LT
1016/*
1017 * Destination cache support functions
1018 */
1019
1020static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021{
1022 struct rt6_info *rt;
1023
1024 rt = (struct rt6_info *) dst;
1025
6f3118b5
ND
1026 /* All IPV6 dsts are created with ->obsolete set to the value
1027 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028 * into this function always.
1029 */
1030 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031 return NULL;
1032
6431cbc2
DM
1033 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1035 if (!rt6_has_peer(rt))
6431cbc2
DM
1036 rt6_bind_peer(rt, 0);
1037 rt->rt6i_peer_genid = rt6_peer_genid();
1038 }
1da177e4 1039 return dst;
6431cbc2 1040 }
1da177e4
LT
1041 return NULL;
1042}
1043
1044static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045{
1046 struct rt6_info *rt = (struct rt6_info *) dst;
1047
1048 if (rt) {
54c1a859
YH
1049 if (rt->rt6i_flags & RTF_CACHE) {
1050 if (rt6_check_expired(rt)) {
1051 ip6_del_rt(rt);
1052 dst = NULL;
1053 }
1054 } else {
1da177e4 1055 dst_release(dst);
54c1a859
YH
1056 dst = NULL;
1057 }
1da177e4 1058 }
54c1a859 1059 return dst;
1da177e4
LT
1060}
1061
1062static void ip6_link_failure(struct sk_buff *skb)
1063{
1064 struct rt6_info *rt;
1065
3ffe533c 1066 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1067
adf30907 1068 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1069 if (rt) {
1716a961
G
1070 if (rt->rt6i_flags & RTF_CACHE)
1071 rt6_update_expires(rt, 0);
1072 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1073 rt->rt6i_node->fn_sernum = -1;
1074 }
1075}
1076
6700c270
DM
1077static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1079{
1080 struct rt6_info *rt6 = (struct rt6_info*)dst;
1081
81aded24 1082 dst_confirm(dst);
1da177e4 1083 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1084 struct net *net = dev_net(dst->dev);
1085
1da177e4
LT
1086 rt6->rt6i_flags |= RTF_MODIFIED;
1087 if (mtu < IPV6_MIN_MTU) {
defb3519 1088 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1089 mtu = IPV6_MIN_MTU;
defb3519
DM
1090 features |= RTAX_FEATURE_ALLFRAG;
1091 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1092 }
defb3519 1093 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1094 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1095 }
1096}
1097
42ae66c8
DM
1098void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099 int oif, u32 mark)
81aded24
DM
1100{
1101 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102 struct dst_entry *dst;
1103 struct flowi6 fl6;
1104
1105 memset(&fl6, 0, sizeof(fl6));
1106 fl6.flowi6_oif = oif;
1107 fl6.flowi6_mark = mark;
3e12939a 1108 fl6.flowi6_flags = 0;
81aded24
DM
1109 fl6.daddr = iph->daddr;
1110 fl6.saddr = iph->saddr;
1111 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112
1113 dst = ip6_route_output(net, NULL, &fl6);
1114 if (!dst->error)
6700c270 1115 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1116 dst_release(dst);
1117}
1118EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1119
1120void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121{
1122 ip6_update_pmtu(skb, sock_net(sk), mtu,
1123 sk->sk_bound_dev_if, sk->sk_mark);
1124}
1125EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1126
3a5ad2ee
DM
1127void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128{
1129 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130 struct dst_entry *dst;
1131 struct flowi6 fl6;
1132
1133 memset(&fl6, 0, sizeof(fl6));
1134 fl6.flowi6_oif = oif;
1135 fl6.flowi6_mark = mark;
1136 fl6.flowi6_flags = 0;
1137 fl6.daddr = iph->daddr;
1138 fl6.saddr = iph->saddr;
1139 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140
1141 dst = ip6_route_output(net, NULL, &fl6);
1142 if (!dst->error)
6700c270 1143 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1144 dst_release(dst);
1145}
1146EXPORT_SYMBOL_GPL(ip6_redirect);
1147
1148void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149{
1150 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151}
1152EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1153
0dbaee3b 1154static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1155{
0dbaee3b
DM
1156 struct net_device *dev = dst->dev;
1157 unsigned int mtu = dst_mtu(dst);
1158 struct net *net = dev_net(dev);
1159
1da177e4
LT
1160 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161
5578689a
DL
1162 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1164
1165 /*
1ab1457c
YH
1166 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1167 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1168 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1169 * rely only on pmtu discovery"
1170 */
1171 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172 mtu = IPV6_MAXPLEN;
1173 return mtu;
1174}
1175
ebb762f2 1176static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1177{
d33e4553 1178 struct inet6_dev *idev;
618f9bc7
SK
1179 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180
1181 if (mtu)
1182 return mtu;
1183
1184 mtu = IPV6_MIN_MTU;
d33e4553
DM
1185
1186 rcu_read_lock();
1187 idev = __in6_dev_get(dst->dev);
1188 if (idev)
1189 mtu = idev->cnf.mtu6;
1190 rcu_read_unlock();
1191
1192 return mtu;
1193}
1194
3b00944c
YH
1195static struct dst_entry *icmp6_dst_gc_list;
1196static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1197
3b00944c 1198struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1199 struct neighbour *neigh,
87a11578 1200 struct flowi6 *fl6)
1da177e4 1201{
87a11578 1202 struct dst_entry *dst;
1da177e4
LT
1203 struct rt6_info *rt;
1204 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1205 struct net *net = dev_net(dev);
1da177e4 1206
38308473 1207 if (unlikely(!idev))
122bdf67 1208 return ERR_PTR(-ENODEV);
1da177e4 1209
8b96d22d 1210 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1211 if (unlikely(!rt)) {
1da177e4 1212 in6_dev_put(idev);
87a11578 1213 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1214 goto out;
1215 }
1216
1da177e4
LT
1217 if (neigh)
1218 neigh_hold(neigh);
14deae41 1219 else {
f894cbf8 1220 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1221 if (IS_ERR(neigh)) {
252c3d84 1222 in6_dev_put(idev);
b43faac6
DM
1223 dst_free(&rt->dst);
1224 return ERR_CAST(neigh);
1225 }
14deae41 1226 }
1da177e4 1227
8e2ec639
YZ
1228 rt->dst.flags |= DST_HOST;
1229 rt->dst.output = ip6_output;
97cac082 1230 rt->n = neigh;
d8d1f30b 1231 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1232 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1233 rt->rt6i_dst.plen = 128;
1234 rt->rt6i_idev = idev;
7011687f 1235 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1236
3b00944c 1237 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1238 rt->dst.next = icmp6_dst_gc_list;
1239 icmp6_dst_gc_list = &rt->dst;
3b00944c 1240 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1241
5578689a 1242 fib6_force_start_gc(net);
1da177e4 1243
87a11578
DM
1244 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245
1da177e4 1246out:
87a11578 1247 return dst;
1da177e4
LT
1248}
1249
3d0f24a7 1250int icmp6_dst_gc(void)
1da177e4 1251{
e9476e95 1252 struct dst_entry *dst, **pprev;
3d0f24a7 1253 int more = 0;
1da177e4 1254
3b00944c
YH
1255 spin_lock_bh(&icmp6_dst_lock);
1256 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1257
1da177e4
LT
1258 while ((dst = *pprev) != NULL) {
1259 if (!atomic_read(&dst->__refcnt)) {
1260 *pprev = dst->next;
1261 dst_free(dst);
1da177e4
LT
1262 } else {
1263 pprev = &dst->next;
3d0f24a7 1264 ++more;
1da177e4
LT
1265 }
1266 }
1267
3b00944c 1268 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1269
3d0f24a7 1270 return more;
1da177e4
LT
1271}
1272
1e493d19
DM
1273static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274 void *arg)
1275{
1276 struct dst_entry *dst, **pprev;
1277
1278 spin_lock_bh(&icmp6_dst_lock);
1279 pprev = &icmp6_dst_gc_list;
1280 while ((dst = *pprev) != NULL) {
1281 struct rt6_info *rt = (struct rt6_info *) dst;
1282 if (func(rt, arg)) {
1283 *pprev = dst->next;
1284 dst_free(dst);
1285 } else {
1286 pprev = &dst->next;
1287 }
1288 }
1289 spin_unlock_bh(&icmp6_dst_lock);
1290}
1291
569d3645 1292static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1293{
1da177e4 1294 unsigned long now = jiffies;
86393e52 1295 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1296 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1301 int entries;
7019b78e 1302
fc66f95c 1303 entries = dst_entries_get_fast(ops);
7019b78e 1304 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1305 entries <= rt_max_size)
1da177e4
LT
1306 goto out;
1307
6891a346
BT
1308 net->ipv6.ip6_rt_gc_expire++;
1309 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1311 entries = dst_entries_get_slow(ops);
1312 if (entries < ops->gc_thresh)
7019b78e 1313 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1314out:
7019b78e 1315 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1316 return entries > rt_max_size;
1da177e4
LT
1317}
1318
1319/* Clean host part of a prefix. Not necessary in radix tree,
1320 but results in cleaner routing tables.
1321
1322 Remove it only when all the things will work!
1323 */
1324
6b75d090 1325int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1326{
5170ae82 1327 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1328 if (hoplimit == 0) {
6b75d090 1329 struct net_device *dev = dst->dev;
c68f24cc
ED
1330 struct inet6_dev *idev;
1331
1332 rcu_read_lock();
1333 idev = __in6_dev_get(dev);
1334 if (idev)
6b75d090 1335 hoplimit = idev->cnf.hop_limit;
c68f24cc 1336 else
53b7997f 1337 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1338 rcu_read_unlock();
1da177e4
LT
1339 }
1340 return hoplimit;
1341}
abbf46ae 1342EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1343
1344/*
1345 *
1346 */
1347
86872cb5 1348int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1349{
1350 int err;
5578689a 1351 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1352 struct rt6_info *rt = NULL;
1353 struct net_device *dev = NULL;
1354 struct inet6_dev *idev = NULL;
c71099ac 1355 struct fib6_table *table;
1da177e4
LT
1356 int addr_type;
1357
86872cb5 1358 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1359 return -EINVAL;
1360#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1361 if (cfg->fc_src_len)
1da177e4
LT
1362 return -EINVAL;
1363#endif
86872cb5 1364 if (cfg->fc_ifindex) {
1da177e4 1365 err = -ENODEV;
5578689a 1366 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1367 if (!dev)
1368 goto out;
1369 idev = in6_dev_get(dev);
1370 if (!idev)
1371 goto out;
1372 }
1373
86872cb5
TG
1374 if (cfg->fc_metric == 0)
1375 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1376
d71314b4 1377 err = -ENOBUFS;
38308473
DM
1378 if (cfg->fc_nlinfo.nlh &&
1379 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1380 table = fib6_get_table(net, cfg->fc_table);
38308473 1381 if (!table) {
f3213831 1382 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1383 table = fib6_new_table(net, cfg->fc_table);
1384 }
1385 } else {
1386 table = fib6_new_table(net, cfg->fc_table);
1387 }
38308473
DM
1388
1389 if (!table)
c71099ac 1390 goto out;
c71099ac 1391
8b96d22d 1392 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1393
38308473 1394 if (!rt) {
1da177e4
LT
1395 err = -ENOMEM;
1396 goto out;
1397 }
1398
1716a961
G
1399 if (cfg->fc_flags & RTF_EXPIRES)
1400 rt6_set_expires(rt, jiffies +
1401 clock_t_to_jiffies(cfg->fc_expires));
1402 else
1403 rt6_clean_expires(rt);
1da177e4 1404
86872cb5
TG
1405 if (cfg->fc_protocol == RTPROT_UNSPEC)
1406 cfg->fc_protocol = RTPROT_BOOT;
1407 rt->rt6i_protocol = cfg->fc_protocol;
1408
1409 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1410
1411 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1412 rt->dst.input = ip6_mc_input;
ab79ad14
1413 else if (cfg->fc_flags & RTF_LOCAL)
1414 rt->dst.input = ip6_input;
1da177e4 1415 else
d8d1f30b 1416 rt->dst.input = ip6_forward;
1da177e4 1417
d8d1f30b 1418 rt->dst.output = ip6_output;
1da177e4 1419
86872cb5
TG
1420 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1422 if (rt->rt6i_dst.plen == 128)
11d53b49 1423 rt->dst.flags |= DST_HOST;
1da177e4 1424
8e2ec639
YZ
1425 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427 if (!metrics) {
1428 err = -ENOMEM;
1429 goto out;
1430 }
1431 dst_init_metrics(&rt->dst, metrics, 0);
1432 }
1da177e4 1433#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1434 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1436#endif
1437
86872cb5 1438 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1439
1440 /* We cannot add true routes via loopback here,
1441 they would result in kernel looping; promote them to reject routes
1442 */
86872cb5 1443 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1444 (dev && (dev->flags & IFF_LOOPBACK) &&
1445 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1447 /* hold loopback dev/idev if we haven't done so. */
5578689a 1448 if (dev != net->loopback_dev) {
1da177e4
LT
1449 if (dev) {
1450 dev_put(dev);
1451 in6_dev_put(idev);
1452 }
5578689a 1453 dev = net->loopback_dev;
1da177e4
LT
1454 dev_hold(dev);
1455 idev = in6_dev_get(dev);
1456 if (!idev) {
1457 err = -ENODEV;
1458 goto out;
1459 }
1460 }
d8d1f30b
CG
1461 rt->dst.output = ip6_pkt_discard_out;
1462 rt->dst.input = ip6_pkt_discard;
1da177e4 1463 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1464 switch (cfg->fc_type) {
1465 case RTN_BLACKHOLE:
1466 rt->dst.error = -EINVAL;
1467 break;
1468 case RTN_PROHIBIT:
1469 rt->dst.error = -EACCES;
1470 break;
b4949ab2
ND
1471 case RTN_THROW:
1472 rt->dst.error = -EAGAIN;
1473 break;
ef2c7d7b
ND
1474 default:
1475 rt->dst.error = -ENETUNREACH;
1476 break;
1477 }
1da177e4
LT
1478 goto install_route;
1479 }
1480
86872cb5 1481 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1482 const struct in6_addr *gw_addr;
1da177e4
LT
1483 int gwa_type;
1484
86872cb5 1485 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1486 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1487 gwa_type = ipv6_addr_type(gw_addr);
1488
1489 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490 struct rt6_info *grt;
1491
1492 /* IPv6 strictly inhibits using not link-local
1493 addresses as nexthop address.
1494 Otherwise, router will not able to send redirects.
1495 It is very good, but in some (rare!) circumstances
1496 (SIT, PtP, NBMA NOARP links) it is handy to allow
1497 some exceptions. --ANK
1498 */
1499 err = -EINVAL;
38308473 1500 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1501 goto out;
1502
5578689a 1503 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1504
1505 err = -EHOSTUNREACH;
38308473 1506 if (!grt)
1da177e4
LT
1507 goto out;
1508 if (dev) {
d1918542 1509 if (dev != grt->dst.dev) {
d8d1f30b 1510 dst_release(&grt->dst);
1da177e4
LT
1511 goto out;
1512 }
1513 } else {
d1918542 1514 dev = grt->dst.dev;
1da177e4
LT
1515 idev = grt->rt6i_idev;
1516 dev_hold(dev);
1517 in6_dev_hold(grt->rt6i_idev);
1518 }
38308473 1519 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1520 err = 0;
d8d1f30b 1521 dst_release(&grt->dst);
1da177e4
LT
1522
1523 if (err)
1524 goto out;
1525 }
1526 err = -EINVAL;
38308473 1527 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1528 goto out;
1529 }
1530
1531 err = -ENODEV;
38308473 1532 if (!dev)
1da177e4
LT
1533 goto out;
1534
c3968a85
DW
1535 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537 err = -EINVAL;
1538 goto out;
1539 }
4e3fd7a0 1540 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1541 rt->rt6i_prefsrc.plen = 128;
1542 } else
1543 rt->rt6i_prefsrc.plen = 0;
1544
86872cb5 1545 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1546 err = rt6_bind_neighbour(rt, dev);
f83c7790 1547 if (err)
1da177e4 1548 goto out;
1da177e4
LT
1549 }
1550
86872cb5 1551 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1552
1553install_route:
86872cb5
TG
1554 if (cfg->fc_mx) {
1555 struct nlattr *nla;
1556 int remaining;
1557
1558 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1559 int type = nla_type(nla);
86872cb5
TG
1560
1561 if (type) {
1562 if (type > RTAX_MAX) {
1da177e4
LT
1563 err = -EINVAL;
1564 goto out;
1565 }
86872cb5 1566
defb3519 1567 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1568 }
1da177e4
LT
1569 }
1570 }
1571
d8d1f30b 1572 rt->dst.dev = dev;
1da177e4 1573 rt->rt6i_idev = idev;
c71099ac 1574 rt->rt6i_table = table;
63152fc0 1575
c346dca1 1576 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1577
86872cb5 1578 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1579
1580out:
1581 if (dev)
1582 dev_put(dev);
1583 if (idev)
1584 in6_dev_put(idev);
1585 if (rt)
d8d1f30b 1586 dst_free(&rt->dst);
1da177e4
LT
1587 return err;
1588}
1589
86872cb5 1590static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1591{
1592 int err;
c71099ac 1593 struct fib6_table *table;
d1918542 1594 struct net *net = dev_net(rt->dst.dev);
1da177e4 1595
8ed67789 1596 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1597 return -ENOENT;
1598
c71099ac
TG
1599 table = rt->rt6i_table;
1600 write_lock_bh(&table->tb6_lock);
1da177e4 1601
86872cb5 1602 err = fib6_del(rt, info);
d8d1f30b 1603 dst_release(&rt->dst);
1da177e4 1604
c71099ac 1605 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1606
1607 return err;
1608}
1609
e0a1ad73
TG
1610int ip6_del_rt(struct rt6_info *rt)
1611{
4d1169c1 1612 struct nl_info info = {
d1918542 1613 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1614 };
528c4ceb 1615 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1616}
1617
86872cb5 1618static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1619{
c71099ac 1620 struct fib6_table *table;
1da177e4
LT
1621 struct fib6_node *fn;
1622 struct rt6_info *rt;
1623 int err = -ESRCH;
1624
5578689a 1625 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1626 if (!table)
c71099ac
TG
1627 return err;
1628
1629 read_lock_bh(&table->tb6_lock);
1da177e4 1630
c71099ac 1631 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1632 &cfg->fc_dst, cfg->fc_dst_len,
1633 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1634
1da177e4 1635 if (fn) {
d8d1f30b 1636 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1637 if (cfg->fc_ifindex &&
d1918542
DM
1638 (!rt->dst.dev ||
1639 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1640 continue;
86872cb5
TG
1641 if (cfg->fc_flags & RTF_GATEWAY &&
1642 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1643 continue;
86872cb5 1644 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1645 continue;
d8d1f30b 1646 dst_hold(&rt->dst);
c71099ac 1647 read_unlock_bh(&table->tb6_lock);
1da177e4 1648
86872cb5 1649 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1650 }
1651 }
c71099ac 1652 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1653
1654 return err;
1655}
1656
6700c270 1657static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1658{
e8599ff4 1659 struct net *net = dev_net(skb->dev);
a6279458 1660 struct netevent_redirect netevent;
e8599ff4
DM
1661 struct rt6_info *rt, *nrt = NULL;
1662 const struct in6_addr *target;
e8599ff4 1663 struct ndisc_options ndopts;
6e157b6a
DM
1664 const struct in6_addr *dest;
1665 struct neighbour *old_neigh;
e8599ff4
DM
1666 struct inet6_dev *in6_dev;
1667 struct neighbour *neigh;
1668 struct icmp6hdr *icmph;
6e157b6a
DM
1669 int optlen, on_link;
1670 u8 *lladdr;
e8599ff4
DM
1671
1672 optlen = skb->tail - skb->transport_header;
1673 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1674
1675 if (optlen < 0) {
6e157b6a 1676 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1677 return;
1678 }
1679
1680 icmph = icmp6_hdr(skb);
1681 target = (const struct in6_addr *) (icmph + 1);
1682 dest = target + 1;
1683
1684 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1685 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1686 return;
1687 }
1688
6e157b6a 1689 on_link = 0;
e8599ff4
DM
1690 if (ipv6_addr_equal(dest, target)) {
1691 on_link = 1;
1692 } else if (ipv6_addr_type(target) !=
1693 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1694 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1695 return;
1696 }
1697
1698 in6_dev = __in6_dev_get(skb->dev);
1699 if (!in6_dev)
1700 return;
1701 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1702 return;
1703
1704 /* RFC2461 8.1:
1705 * The IP source address of the Redirect MUST be the same as the current
1706 * first-hop router for the specified ICMP Destination Address.
1707 */
1708
1709 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1710 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1711 return;
1712 }
6e157b6a
DM
1713
1714 lladdr = NULL;
e8599ff4
DM
1715 if (ndopts.nd_opts_tgt_lladdr) {
1716 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1717 skb->dev);
1718 if (!lladdr) {
1719 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1720 return;
1721 }
1722 }
1723
6e157b6a
DM
1724 rt = (struct rt6_info *) dst;
1725 if (rt == net->ipv6.ip6_null_entry) {
1726 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1727 return;
6e157b6a 1728 }
e8599ff4 1729
6e157b6a
DM
1730 /* Redirect received -> path was valid.
1731 * Look, redirects are sent only in response to data packets,
1732 * so that this nexthop apparently is reachable. --ANK
1733 */
1734 dst_confirm(&rt->dst);
a6279458 1735
6e157b6a
DM
1736 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1737 if (!neigh)
1738 return;
a6279458 1739
6e157b6a
DM
1740 /* Duplicate redirect: silently ignore. */
1741 old_neigh = rt->n;
1742 if (neigh == old_neigh)
a6279458 1743 goto out;
1da177e4 1744
1da177e4
LT
1745 /*
1746 * We have finally decided to accept it.
1747 */
1748
1ab1457c 1749 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1750 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1751 NEIGH_UPDATE_F_OVERRIDE|
1752 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1753 NEIGH_UPDATE_F_ISROUTER))
1754 );
1755
21efcfa0 1756 nrt = ip6_rt_copy(rt, dest);
38308473 1757 if (!nrt)
1da177e4
LT
1758 goto out;
1759
1760 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1761 if (on_link)
1762 nrt->rt6i_flags &= ~RTF_GATEWAY;
1763
4e3fd7a0 1764 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1765 nrt->n = neigh_clone(neigh);
1da177e4 1766
40e22e8f 1767 if (ip6_ins_rt(nrt))
1da177e4
LT
1768 goto out;
1769
d8d1f30b 1770 netevent.old = &rt->dst;
1d248b1c 1771 netevent.old_neigh = old_neigh;
d8d1f30b 1772 netevent.new = &nrt->dst;
1d248b1c
DM
1773 netevent.new_neigh = neigh;
1774 netevent.daddr = dest;
8d71740c
TT
1775 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1776
38308473 1777 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1778 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1779 ip6_del_rt(rt);
1da177e4
LT
1780 }
1781
1782out:
e8599ff4 1783 neigh_release(neigh);
6e157b6a
DM
1784}
1785
1da177e4
LT
1786/*
1787 * Misc support functions
1788 */
1789
1716a961 1790static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1791 const struct in6_addr *dest)
1da177e4 1792{
d1918542 1793 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1794 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1795 ort->rt6i_table);
1da177e4
LT
1796
1797 if (rt) {
d8d1f30b
CG
1798 rt->dst.input = ort->dst.input;
1799 rt->dst.output = ort->dst.output;
8e2ec639 1800 rt->dst.flags |= DST_HOST;
d8d1f30b 1801
4e3fd7a0 1802 rt->rt6i_dst.addr = *dest;
8e2ec639 1803 rt->rt6i_dst.plen = 128;
defb3519 1804 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1805 rt->dst.error = ort->dst.error;
1da177e4
LT
1806 rt->rt6i_idev = ort->rt6i_idev;
1807 if (rt->rt6i_idev)
1808 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1809 rt->dst.lastuse = jiffies;
1da177e4 1810
4e3fd7a0 1811 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1812 rt->rt6i_flags = ort->rt6i_flags;
1813 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1814 (RTF_DEFAULT | RTF_ADDRCONF))
1815 rt6_set_from(rt, ort);
1816 else
1817 rt6_clean_expires(rt);
1da177e4
LT
1818 rt->rt6i_metric = 0;
1819
1da177e4
LT
1820#ifdef CONFIG_IPV6_SUBTREES
1821 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1822#endif
0f6c6392 1823 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1824 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1825 }
1826 return rt;
1827}
1828
70ceb4f5 1829#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1830static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1831 const struct in6_addr *prefix, int prefixlen,
1832 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1833{
1834 struct fib6_node *fn;
1835 struct rt6_info *rt = NULL;
c71099ac
TG
1836 struct fib6_table *table;
1837
efa2cea0 1838 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1839 if (!table)
c71099ac 1840 return NULL;
70ceb4f5 1841
5744dd9b 1842 read_lock_bh(&table->tb6_lock);
c71099ac 1843 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1844 if (!fn)
1845 goto out;
1846
d8d1f30b 1847 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1848 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1849 continue;
1850 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1851 continue;
1852 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1853 continue;
d8d1f30b 1854 dst_hold(&rt->dst);
70ceb4f5
YH
1855 break;
1856 }
1857out:
5744dd9b 1858 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1859 return rt;
1860}
1861
efa2cea0 1862static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1863 const struct in6_addr *prefix, int prefixlen,
1864 const struct in6_addr *gwaddr, int ifindex,
95c96174 1865 unsigned int pref)
70ceb4f5 1866{
86872cb5
TG
1867 struct fib6_config cfg = {
1868 .fc_table = RT6_TABLE_INFO,
238fc7ea 1869 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1870 .fc_ifindex = ifindex,
1871 .fc_dst_len = prefixlen,
1872 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1873 RTF_UP | RTF_PREF(pref),
15e47304 1874 .fc_nlinfo.portid = 0,
efa2cea0
DL
1875 .fc_nlinfo.nlh = NULL,
1876 .fc_nlinfo.nl_net = net,
86872cb5
TG
1877 };
1878
4e3fd7a0
AD
1879 cfg.fc_dst = *prefix;
1880 cfg.fc_gateway = *gwaddr;
70ceb4f5 1881
e317da96
YH
1882 /* We should treat it as a default route if prefix length is 0. */
1883 if (!prefixlen)
86872cb5 1884 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1885
86872cb5 1886 ip6_route_add(&cfg);
70ceb4f5 1887
efa2cea0 1888 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1889}
1890#endif
1891
b71d1d42 1892struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1893{
1da177e4 1894 struct rt6_info *rt;
c71099ac 1895 struct fib6_table *table;
1da177e4 1896
c346dca1 1897 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1898 if (!table)
c71099ac 1899 return NULL;
1da177e4 1900
5744dd9b 1901 read_lock_bh(&table->tb6_lock);
d8d1f30b 1902 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1903 if (dev == rt->dst.dev &&
045927ff 1904 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1905 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1906 break;
1907 }
1908 if (rt)
d8d1f30b 1909 dst_hold(&rt->dst);
5744dd9b 1910 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1911 return rt;
1912}
1913
b71d1d42 1914struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1915 struct net_device *dev,
1916 unsigned int pref)
1da177e4 1917{
86872cb5
TG
1918 struct fib6_config cfg = {
1919 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1920 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1921 .fc_ifindex = dev->ifindex,
1922 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1923 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1924 .fc_nlinfo.portid = 0,
5578689a 1925 .fc_nlinfo.nlh = NULL,
c346dca1 1926 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1927 };
1da177e4 1928
4e3fd7a0 1929 cfg.fc_gateway = *gwaddr;
1da177e4 1930
86872cb5 1931 ip6_route_add(&cfg);
1da177e4 1932
1da177e4
LT
1933 return rt6_get_dflt_router(gwaddr, dev);
1934}
1935
7b4da532 1936void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1937{
1938 struct rt6_info *rt;
c71099ac
TG
1939 struct fib6_table *table;
1940
1941 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1942 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1943 if (!table)
c71099ac 1944 return;
1da177e4
LT
1945
1946restart:
c71099ac 1947 read_lock_bh(&table->tb6_lock);
d8d1f30b 1948 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1949 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1950 dst_hold(&rt->dst);
c71099ac 1951 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1952 ip6_del_rt(rt);
1da177e4
LT
1953 goto restart;
1954 }
1955 }
c71099ac 1956 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1957}
1958
5578689a
DL
1959static void rtmsg_to_fib6_config(struct net *net,
1960 struct in6_rtmsg *rtmsg,
86872cb5
TG
1961 struct fib6_config *cfg)
1962{
1963 memset(cfg, 0, sizeof(*cfg));
1964
1965 cfg->fc_table = RT6_TABLE_MAIN;
1966 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1967 cfg->fc_metric = rtmsg->rtmsg_metric;
1968 cfg->fc_expires = rtmsg->rtmsg_info;
1969 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1970 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1971 cfg->fc_flags = rtmsg->rtmsg_flags;
1972
5578689a 1973 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1974
4e3fd7a0
AD
1975 cfg->fc_dst = rtmsg->rtmsg_dst;
1976 cfg->fc_src = rtmsg->rtmsg_src;
1977 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1978}
1979
5578689a 1980int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1981{
86872cb5 1982 struct fib6_config cfg;
1da177e4
LT
1983 struct in6_rtmsg rtmsg;
1984 int err;
1985
1986 switch(cmd) {
1987 case SIOCADDRT: /* Add a route */
1988 case SIOCDELRT: /* Delete a route */
1989 if (!capable(CAP_NET_ADMIN))
1990 return -EPERM;
1991 err = copy_from_user(&rtmsg, arg,
1992 sizeof(struct in6_rtmsg));
1993 if (err)
1994 return -EFAULT;
86872cb5 1995
5578689a 1996 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1997
1da177e4
LT
1998 rtnl_lock();
1999 switch (cmd) {
2000 case SIOCADDRT:
86872cb5 2001 err = ip6_route_add(&cfg);
1da177e4
LT
2002 break;
2003 case SIOCDELRT:
86872cb5 2004 err = ip6_route_del(&cfg);
1da177e4
LT
2005 break;
2006 default:
2007 err = -EINVAL;
2008 }
2009 rtnl_unlock();
2010
2011 return err;
3ff50b79 2012 }
1da177e4
LT
2013
2014 return -EINVAL;
2015}
2016
2017/*
2018 * Drop the packet on the floor
2019 */
2020
d5fdd6ba 2021static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2022{
612f09e8 2023 int type;
adf30907 2024 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2025 switch (ipstats_mib_noroutes) {
2026 case IPSTATS_MIB_INNOROUTES:
0660e03f 2027 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2028 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2029 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2030 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2031 break;
2032 }
2033 /* FALLTHROUGH */
2034 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2035 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2036 ipstats_mib_noroutes);
612f09e8
YH
2037 break;
2038 }
3ffe533c 2039 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2040 kfree_skb(skb);
2041 return 0;
2042}
2043
9ce8ade0
TG
2044static int ip6_pkt_discard(struct sk_buff *skb)
2045{
612f09e8 2046 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2047}
2048
20380731 2049static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2050{
adf30907 2051 skb->dev = skb_dst(skb)->dev;
612f09e8 2052 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2053}
2054
6723ab54
DM
2055#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2056
9ce8ade0
TG
2057static int ip6_pkt_prohibit(struct sk_buff *skb)
2058{
612f09e8 2059 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2060}
2061
2062static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2063{
adf30907 2064 skb->dev = skb_dst(skb)->dev;
612f09e8 2065 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2066}
2067
6723ab54
DM
2068#endif
2069
1da177e4
LT
2070/*
2071 * Allocate a dst for local (unicast / anycast) address.
2072 */
2073
2074struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2075 const struct in6_addr *addr,
8f031519 2076 bool anycast)
1da177e4 2077{
c346dca1 2078 struct net *net = dev_net(idev->dev);
8b96d22d 2079 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2080 int err;
1da177e4 2081
38308473 2082 if (!rt) {
f3213831 2083 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2084 return ERR_PTR(-ENOMEM);
40385653 2085 }
1da177e4 2086
1da177e4
LT
2087 in6_dev_hold(idev);
2088
11d53b49 2089 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2090 rt->dst.input = ip6_input;
2091 rt->dst.output = ip6_output;
1da177e4 2092 rt->rt6i_idev = idev;
1da177e4
LT
2093
2094 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2095 if (anycast)
2096 rt->rt6i_flags |= RTF_ANYCAST;
2097 else
1da177e4 2098 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2099 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2100 if (err) {
d8d1f30b 2101 dst_free(&rt->dst);
f83c7790 2102 return ERR_PTR(err);
1da177e4
LT
2103 }
2104
4e3fd7a0 2105 rt->rt6i_dst.addr = *addr;
1da177e4 2106 rt->rt6i_dst.plen = 128;
5578689a 2107 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2108
d8d1f30b 2109 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2110
2111 return rt;
2112}
2113
c3968a85
DW
2114int ip6_route_get_saddr(struct net *net,
2115 struct rt6_info *rt,
b71d1d42 2116 const struct in6_addr *daddr,
c3968a85
DW
2117 unsigned int prefs,
2118 struct in6_addr *saddr)
2119{
2120 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2121 int err = 0;
2122 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2123 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2124 else
2125 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2126 daddr, prefs, saddr);
2127 return err;
2128}
2129
2130/* remove deleted ip from prefsrc entries */
2131struct arg_dev_net_ip {
2132 struct net_device *dev;
2133 struct net *net;
2134 struct in6_addr *addr;
2135};
2136
2137static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2138{
2139 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2140 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2141 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2142
d1918542 2143 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2144 rt != net->ipv6.ip6_null_entry &&
2145 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2146 /* remove prefsrc entry */
2147 rt->rt6i_prefsrc.plen = 0;
2148 }
2149 return 0;
2150}
2151
2152void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2153{
2154 struct net *net = dev_net(ifp->idev->dev);
2155 struct arg_dev_net_ip adni = {
2156 .dev = ifp->idev->dev,
2157 .net = net,
2158 .addr = &ifp->addr,
2159 };
2160 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2161}
2162
8ed67789
DL
2163struct arg_dev_net {
2164 struct net_device *dev;
2165 struct net *net;
2166};
2167
1da177e4
LT
2168static int fib6_ifdown(struct rt6_info *rt, void *arg)
2169{
bc3ef660 2170 const struct arg_dev_net *adn = arg;
2171 const struct net_device *dev = adn->dev;
8ed67789 2172
d1918542 2173 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2174 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2175 return -1;
c159d30c 2176
1da177e4
LT
2177 return 0;
2178}
2179
f3db4851 2180void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2181{
8ed67789
DL
2182 struct arg_dev_net adn = {
2183 .dev = dev,
2184 .net = net,
2185 };
2186
2187 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2188 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2189}
2190
95c96174 2191struct rt6_mtu_change_arg {
1da177e4 2192 struct net_device *dev;
95c96174 2193 unsigned int mtu;
1da177e4
LT
2194};
2195
2196static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2197{
2198 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2199 struct inet6_dev *idev;
2200
2201 /* In IPv6 pmtu discovery is not optional,
2202 so that RTAX_MTU lock cannot disable it.
2203 We still use this lock to block changes
2204 caused by addrconf/ndisc.
2205 */
2206
2207 idev = __in6_dev_get(arg->dev);
38308473 2208 if (!idev)
1da177e4
LT
2209 return 0;
2210
2211 /* For administrative MTU increase, there is no way to discover
2212 IPv6 PMTU increase, so PMTU increase should be updated here.
2213 Since RFC 1981 doesn't include administrative MTU increase
2214 update PMTU increase is a MUST. (i.e. jumbo frame)
2215 */
2216 /*
2217 If new MTU is less than route PMTU, this new MTU will be the
2218 lowest MTU in the path, update the route PMTU to reflect PMTU
2219 decreases; if new MTU is greater than route PMTU, and the
2220 old MTU is the lowest MTU in the path, update the route PMTU
2221 to reflect the increase. In this case if the other nodes' MTU
2222 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2223 PMTU discouvery.
2224 */
d1918542 2225 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2226 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2227 (dst_mtu(&rt->dst) >= arg->mtu ||
2228 (dst_mtu(&rt->dst) < arg->mtu &&
2229 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2230 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2231 }
1da177e4
LT
2232 return 0;
2233}
2234
95c96174 2235void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2236{
c71099ac
TG
2237 struct rt6_mtu_change_arg arg = {
2238 .dev = dev,
2239 .mtu = mtu,
2240 };
1da177e4 2241
c346dca1 2242 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2243}
2244
ef7c79ed 2245static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2246 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2247 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2248 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2249 [RTA_PRIORITY] = { .type = NLA_U32 },
2250 [RTA_METRICS] = { .type = NLA_NESTED },
2251};
2252
2253static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2254 struct fib6_config *cfg)
1da177e4 2255{
86872cb5
TG
2256 struct rtmsg *rtm;
2257 struct nlattr *tb[RTA_MAX+1];
2258 int err;
1da177e4 2259
86872cb5
TG
2260 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2261 if (err < 0)
2262 goto errout;
1da177e4 2263
86872cb5
TG
2264 err = -EINVAL;
2265 rtm = nlmsg_data(nlh);
2266 memset(cfg, 0, sizeof(*cfg));
2267
2268 cfg->fc_table = rtm->rtm_table;
2269 cfg->fc_dst_len = rtm->rtm_dst_len;
2270 cfg->fc_src_len = rtm->rtm_src_len;
2271 cfg->fc_flags = RTF_UP;
2272 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2273 cfg->fc_type = rtm->rtm_type;
86872cb5 2274
ef2c7d7b
ND
2275 if (rtm->rtm_type == RTN_UNREACHABLE ||
2276 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2277 rtm->rtm_type == RTN_PROHIBIT ||
2278 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2279 cfg->fc_flags |= RTF_REJECT;
2280
ab79ad14
2281 if (rtm->rtm_type == RTN_LOCAL)
2282 cfg->fc_flags |= RTF_LOCAL;
2283
15e47304 2284 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2285 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2286 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2287
2288 if (tb[RTA_GATEWAY]) {
2289 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2290 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2291 }
86872cb5
TG
2292
2293 if (tb[RTA_DST]) {
2294 int plen = (rtm->rtm_dst_len + 7) >> 3;
2295
2296 if (nla_len(tb[RTA_DST]) < plen)
2297 goto errout;
2298
2299 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2300 }
86872cb5
TG
2301
2302 if (tb[RTA_SRC]) {
2303 int plen = (rtm->rtm_src_len + 7) >> 3;
2304
2305 if (nla_len(tb[RTA_SRC]) < plen)
2306 goto errout;
2307
2308 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2309 }
86872cb5 2310
c3968a85
DW
2311 if (tb[RTA_PREFSRC])
2312 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2313
86872cb5
TG
2314 if (tb[RTA_OIF])
2315 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2316
2317 if (tb[RTA_PRIORITY])
2318 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2319
2320 if (tb[RTA_METRICS]) {
2321 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2322 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2323 }
86872cb5
TG
2324
2325 if (tb[RTA_TABLE])
2326 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2327
2328 err = 0;
2329errout:
2330 return err;
1da177e4
LT
2331}
2332
c127ea2c 2333static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2334{
86872cb5
TG
2335 struct fib6_config cfg;
2336 int err;
1da177e4 2337
86872cb5
TG
2338 err = rtm_to_fib6_config(skb, nlh, &cfg);
2339 if (err < 0)
2340 return err;
2341
2342 return ip6_route_del(&cfg);
1da177e4
LT
2343}
2344
c127ea2c 2345static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2346{
86872cb5
TG
2347 struct fib6_config cfg;
2348 int err;
1da177e4 2349
86872cb5
TG
2350 err = rtm_to_fib6_config(skb, nlh, &cfg);
2351 if (err < 0)
2352 return err;
2353
2354 return ip6_route_add(&cfg);
1da177e4
LT
2355}
2356
339bf98f
TG
2357static inline size_t rt6_nlmsg_size(void)
2358{
2359 return NLMSG_ALIGN(sizeof(struct rtmsg))
2360 + nla_total_size(16) /* RTA_SRC */
2361 + nla_total_size(16) /* RTA_DST */
2362 + nla_total_size(16) /* RTA_GATEWAY */
2363 + nla_total_size(16) /* RTA_PREFSRC */
2364 + nla_total_size(4) /* RTA_TABLE */
2365 + nla_total_size(4) /* RTA_IIF */
2366 + nla_total_size(4) /* RTA_OIF */
2367 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2368 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2369 + nla_total_size(sizeof(struct rta_cacheinfo));
2370}
2371
191cd582
BH
2372static int rt6_fill_node(struct net *net,
2373 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2374 struct in6_addr *dst, struct in6_addr *src,
15e47304 2375 int iif, int type, u32 portid, u32 seq,
7bc570c8 2376 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2377{
2378 struct rtmsg *rtm;
2d7202bf 2379 struct nlmsghdr *nlh;
e3703b3d 2380 long expires;
9e762a4a 2381 u32 table;
f2c31e32 2382 struct neighbour *n;
1da177e4
LT
2383
2384 if (prefix) { /* user wants prefix routes only */
2385 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2386 /* success since this is not a prefix route */
2387 return 1;
2388 }
2389 }
2390
15e47304 2391 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2392 if (!nlh)
26932566 2393 return -EMSGSIZE;
2d7202bf
TG
2394
2395 rtm = nlmsg_data(nlh);
1da177e4
LT
2396 rtm->rtm_family = AF_INET6;
2397 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2398 rtm->rtm_src_len = rt->rt6i_src.plen;
2399 rtm->rtm_tos = 0;
c71099ac 2400 if (rt->rt6i_table)
9e762a4a 2401 table = rt->rt6i_table->tb6_id;
c71099ac 2402 else
9e762a4a
PM
2403 table = RT6_TABLE_UNSPEC;
2404 rtm->rtm_table = table;
c78679e8
DM
2405 if (nla_put_u32(skb, RTA_TABLE, table))
2406 goto nla_put_failure;
ef2c7d7b
ND
2407 if (rt->rt6i_flags & RTF_REJECT) {
2408 switch (rt->dst.error) {
2409 case -EINVAL:
2410 rtm->rtm_type = RTN_BLACKHOLE;
2411 break;
2412 case -EACCES:
2413 rtm->rtm_type = RTN_PROHIBIT;
2414 break;
b4949ab2
ND
2415 case -EAGAIN:
2416 rtm->rtm_type = RTN_THROW;
2417 break;
ef2c7d7b
ND
2418 default:
2419 rtm->rtm_type = RTN_UNREACHABLE;
2420 break;
2421 }
2422 }
38308473 2423 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2424 rtm->rtm_type = RTN_LOCAL;
d1918542 2425 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2426 rtm->rtm_type = RTN_LOCAL;
2427 else
2428 rtm->rtm_type = RTN_UNICAST;
2429 rtm->rtm_flags = 0;
2430 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2431 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2432 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2433 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2434 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2435 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2436 rtm->rtm_protocol = RTPROT_RA;
2437 else
2438 rtm->rtm_protocol = RTPROT_KERNEL;
2439 }
1da177e4 2440
38308473 2441 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2442 rtm->rtm_flags |= RTM_F_CLONED;
2443
2444 if (dst) {
c78679e8
DM
2445 if (nla_put(skb, RTA_DST, 16, dst))
2446 goto nla_put_failure;
1ab1457c 2447 rtm->rtm_dst_len = 128;
1da177e4 2448 } else if (rtm->rtm_dst_len)
c78679e8
DM
2449 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2450 goto nla_put_failure;
1da177e4
LT
2451#ifdef CONFIG_IPV6_SUBTREES
2452 if (src) {
c78679e8
DM
2453 if (nla_put(skb, RTA_SRC, 16, src))
2454 goto nla_put_failure;
1ab1457c 2455 rtm->rtm_src_len = 128;
c78679e8
DM
2456 } else if (rtm->rtm_src_len &&
2457 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2458 goto nla_put_failure;
1da177e4 2459#endif
7bc570c8
YH
2460 if (iif) {
2461#ifdef CONFIG_IPV6_MROUTE
2462 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2463 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2464 if (err <= 0) {
2465 if (!nowait) {
2466 if (err == 0)
2467 return 0;
2468 goto nla_put_failure;
2469 } else {
2470 if (err == -EMSGSIZE)
2471 goto nla_put_failure;
2472 }
2473 }
2474 } else
2475#endif
c78679e8
DM
2476 if (nla_put_u32(skb, RTA_IIF, iif))
2477 goto nla_put_failure;
7bc570c8 2478 } else if (dst) {
1da177e4 2479 struct in6_addr saddr_buf;
c78679e8
DM
2480 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2481 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2482 goto nla_put_failure;
1da177e4 2483 }
2d7202bf 2484
c3968a85
DW
2485 if (rt->rt6i_prefsrc.plen) {
2486 struct in6_addr saddr_buf;
4e3fd7a0 2487 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2488 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2489 goto nla_put_failure;
c3968a85
DW
2490 }
2491
defb3519 2492 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2493 goto nla_put_failure;
2494
97cac082 2495 n = rt->n;
94f826b8 2496 if (n) {
fdd6681d 2497 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
94f826b8 2498 goto nla_put_failure;
94f826b8 2499 }
2d7202bf 2500
c78679e8
DM
2501 if (rt->dst.dev &&
2502 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2503 goto nla_put_failure;
2504 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2505 goto nla_put_failure;
8253947e
LW
2506
2507 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2508
87a50699 2509 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2510 goto nla_put_failure;
2d7202bf
TG
2511
2512 return nlmsg_end(skb, nlh);
2513
2514nla_put_failure:
26932566
PM
2515 nlmsg_cancel(skb, nlh);
2516 return -EMSGSIZE;
1da177e4
LT
2517}
2518
1b43af54 2519int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2520{
2521 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2522 int prefix;
2523
2d7202bf
TG
2524 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2525 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2526 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2527 } else
2528 prefix = 0;
2529
191cd582
BH
2530 return rt6_fill_node(arg->net,
2531 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2532 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2533 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2534}
2535
c127ea2c 2536static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2537{
3b1e0a65 2538 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2539 struct nlattr *tb[RTA_MAX+1];
2540 struct rt6_info *rt;
1da177e4 2541 struct sk_buff *skb;
ab364a6f 2542 struct rtmsg *rtm;
4c9483b2 2543 struct flowi6 fl6;
72331bc0 2544 int err, iif = 0, oif = 0;
1da177e4 2545
ab364a6f
TG
2546 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2547 if (err < 0)
2548 goto errout;
1da177e4 2549
ab364a6f 2550 err = -EINVAL;
4c9483b2 2551 memset(&fl6, 0, sizeof(fl6));
1da177e4 2552
ab364a6f
TG
2553 if (tb[RTA_SRC]) {
2554 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2555 goto errout;
2556
4e3fd7a0 2557 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2558 }
2559
2560 if (tb[RTA_DST]) {
2561 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2562 goto errout;
2563
4e3fd7a0 2564 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2565 }
2566
2567 if (tb[RTA_IIF])
2568 iif = nla_get_u32(tb[RTA_IIF]);
2569
2570 if (tb[RTA_OIF])
72331bc0 2571 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2572
2573 if (iif) {
2574 struct net_device *dev;
72331bc0
SL
2575 int flags = 0;
2576
5578689a 2577 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2578 if (!dev) {
2579 err = -ENODEV;
ab364a6f 2580 goto errout;
1da177e4 2581 }
72331bc0
SL
2582
2583 fl6.flowi6_iif = iif;
2584
2585 if (!ipv6_addr_any(&fl6.saddr))
2586 flags |= RT6_LOOKUP_F_HAS_SADDR;
2587
2588 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2589 flags);
2590 } else {
2591 fl6.flowi6_oif = oif;
2592
2593 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2594 }
2595
ab364a6f 2596 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2597 if (!skb) {
2173bff5 2598 dst_release(&rt->dst);
ab364a6f
TG
2599 err = -ENOBUFS;
2600 goto errout;
2601 }
1da177e4 2602
ab364a6f
TG
2603 /* Reserve room for dummy headers, this skb can pass
2604 through good chunk of routing engine.
2605 */
459a98ed 2606 skb_reset_mac_header(skb);
ab364a6f 2607 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2608
d8d1f30b 2609 skb_dst_set(skb, &rt->dst);
1da177e4 2610
4c9483b2 2611 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2612 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2613 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2614 if (err < 0) {
ab364a6f
TG
2615 kfree_skb(skb);
2616 goto errout;
1da177e4
LT
2617 }
2618
15e47304 2619 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2620errout:
1da177e4 2621 return err;
1da177e4
LT
2622}
2623
86872cb5 2624void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2625{
2626 struct sk_buff *skb;
5578689a 2627 struct net *net = info->nl_net;
528c4ceb
DL
2628 u32 seq;
2629 int err;
2630
2631 err = -ENOBUFS;
38308473 2632 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2633
339bf98f 2634 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2635 if (!skb)
21713ebc
TG
2636 goto errout;
2637
191cd582 2638 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2639 event, info->portid, seq, 0, 0, 0);
26932566
PM
2640 if (err < 0) {
2641 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2642 WARN_ON(err == -EMSGSIZE);
2643 kfree_skb(skb);
2644 goto errout;
2645 }
15e47304 2646 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2647 info->nlh, gfp_any());
2648 return;
21713ebc
TG
2649errout:
2650 if (err < 0)
5578689a 2651 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2652}
2653
8ed67789
DL
2654static int ip6_route_dev_notify(struct notifier_block *this,
2655 unsigned long event, void *data)
2656{
2657 struct net_device *dev = (struct net_device *)data;
c346dca1 2658 struct net *net = dev_net(dev);
8ed67789
DL
2659
2660 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2661 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2662 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2663#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2664 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2665 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2666 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2667 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2668#endif
2669 }
2670
2671 return NOTIFY_OK;
2672}
2673
1da177e4
LT
2674/*
2675 * /proc
2676 */
2677
2678#ifdef CONFIG_PROC_FS
2679
1da177e4
LT
2680struct rt6_proc_arg
2681{
2682 char *buffer;
2683 int offset;
2684 int length;
2685 int skip;
2686 int len;
2687};
2688
2689static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2690{
33120b30 2691 struct seq_file *m = p_arg;
69cce1d1 2692 struct neighbour *n;
1da177e4 2693
4b7a4274 2694 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2695
2696#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2697 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2698#else
33120b30 2699 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2700#endif
97cac082 2701 n = rt->n;
69cce1d1
DM
2702 if (n) {
2703 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2704 } else {
33120b30 2705 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2706 }
33120b30 2707 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2708 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2709 rt->dst.__use, rt->rt6i_flags,
d1918542 2710 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2711 return 0;
2712}
2713
33120b30 2714static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2715{
f3db4851 2716 struct net *net = (struct net *)m->private;
32b293a5 2717 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2718 return 0;
2719}
1da177e4 2720
33120b30
AD
2721static int ipv6_route_open(struct inode *inode, struct file *file)
2722{
de05c557 2723 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2724}
2725
33120b30
AD
2726static const struct file_operations ipv6_route_proc_fops = {
2727 .owner = THIS_MODULE,
2728 .open = ipv6_route_open,
2729 .read = seq_read,
2730 .llseek = seq_lseek,
b6fcbdb4 2731 .release = single_release_net,
33120b30
AD
2732};
2733
1da177e4
LT
2734static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2735{
69ddb805 2736 struct net *net = (struct net *)seq->private;
1da177e4 2737 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2738 net->ipv6.rt6_stats->fib_nodes,
2739 net->ipv6.rt6_stats->fib_route_nodes,
2740 net->ipv6.rt6_stats->fib_rt_alloc,
2741 net->ipv6.rt6_stats->fib_rt_entries,
2742 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2743 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2744 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2745
2746 return 0;
2747}
2748
2749static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2750{
de05c557 2751 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2752}
2753
9a32144e 2754static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2755 .owner = THIS_MODULE,
2756 .open = rt6_stats_seq_open,
2757 .read = seq_read,
2758 .llseek = seq_lseek,
b6fcbdb4 2759 .release = single_release_net,
1da177e4
LT
2760};
2761#endif /* CONFIG_PROC_FS */
2762
2763#ifdef CONFIG_SYSCTL
2764
1da177e4 2765static
8d65af78 2766int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2767 void __user *buffer, size_t *lenp, loff_t *ppos)
2768{
c486da34
LAG
2769 struct net *net;
2770 int delay;
2771 if (!write)
1da177e4 2772 return -EINVAL;
c486da34
LAG
2773
2774 net = (struct net *)ctl->extra1;
2775 delay = net->ipv6.sysctl.flush_delay;
2776 proc_dointvec(ctl, write, buffer, lenp, ppos);
2777 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2778 return 0;
1da177e4
LT
2779}
2780
760f2d01 2781ctl_table ipv6_route_table_template[] = {
1ab1457c 2782 {
1da177e4 2783 .procname = "flush",
4990509f 2784 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2785 .maxlen = sizeof(int),
89c8b3a1 2786 .mode = 0200,
6d9f239a 2787 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2788 },
2789 {
1da177e4 2790 .procname = "gc_thresh",
9a7ec3a9 2791 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2792 .maxlen = sizeof(int),
2793 .mode = 0644,
6d9f239a 2794 .proc_handler = proc_dointvec,
1da177e4
LT
2795 },
2796 {
1da177e4 2797 .procname = "max_size",
4990509f 2798 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2799 .maxlen = sizeof(int),
2800 .mode = 0644,
6d9f239a 2801 .proc_handler = proc_dointvec,
1da177e4
LT
2802 },
2803 {
1da177e4 2804 .procname = "gc_min_interval",
4990509f 2805 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2806 .maxlen = sizeof(int),
2807 .mode = 0644,
6d9f239a 2808 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2809 },
2810 {
1da177e4 2811 .procname = "gc_timeout",
4990509f 2812 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2813 .maxlen = sizeof(int),
2814 .mode = 0644,
6d9f239a 2815 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2816 },
2817 {
1da177e4 2818 .procname = "gc_interval",
4990509f 2819 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2820 .maxlen = sizeof(int),
2821 .mode = 0644,
6d9f239a 2822 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2823 },
2824 {
1da177e4 2825 .procname = "gc_elasticity",
4990509f 2826 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2827 .maxlen = sizeof(int),
2828 .mode = 0644,
f3d3f616 2829 .proc_handler = proc_dointvec,
1da177e4
LT
2830 },
2831 {
1da177e4 2832 .procname = "mtu_expires",
4990509f 2833 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2834 .maxlen = sizeof(int),
2835 .mode = 0644,
6d9f239a 2836 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2837 },
2838 {
1da177e4 2839 .procname = "min_adv_mss",
4990509f 2840 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2841 .maxlen = sizeof(int),
2842 .mode = 0644,
f3d3f616 2843 .proc_handler = proc_dointvec,
1da177e4
LT
2844 },
2845 {
1da177e4 2846 .procname = "gc_min_interval_ms",
4990509f 2847 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2848 .maxlen = sizeof(int),
2849 .mode = 0644,
6d9f239a 2850 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2851 },
f8572d8f 2852 { }
1da177e4
LT
2853};
2854
2c8c1e72 2855struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2856{
2857 struct ctl_table *table;
2858
2859 table = kmemdup(ipv6_route_table_template,
2860 sizeof(ipv6_route_table_template),
2861 GFP_KERNEL);
5ee09105
YH
2862
2863 if (table) {
2864 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2865 table[0].extra1 = net;
86393e52 2866 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2867 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2868 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2869 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2870 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2871 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2872 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2873 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2874 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2875 }
2876
760f2d01
DL
2877 return table;
2878}
1da177e4
LT
2879#endif
2880
2c8c1e72 2881static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2882{
633d424b 2883 int ret = -ENOMEM;
8ed67789 2884
86393e52
AD
2885 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2886 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2887
fc66f95c
ED
2888 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2889 goto out_ip6_dst_ops;
2890
8ed67789
DL
2891 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2892 sizeof(*net->ipv6.ip6_null_entry),
2893 GFP_KERNEL);
2894 if (!net->ipv6.ip6_null_entry)
fc66f95c 2895 goto out_ip6_dst_entries;
d8d1f30b 2896 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2897 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2898 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2899 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2900 ip6_template_metrics, true);
8ed67789
DL
2901
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2904 sizeof(*net->ipv6.ip6_prohibit_entry),
2905 GFP_KERNEL);
68fffc67
PZ
2906 if (!net->ipv6.ip6_prohibit_entry)
2907 goto out_ip6_null_entry;
d8d1f30b 2908 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2909 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2910 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2911 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2912 ip6_template_metrics, true);
8ed67789
DL
2913
2914 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2915 sizeof(*net->ipv6.ip6_blk_hole_entry),
2916 GFP_KERNEL);
68fffc67
PZ
2917 if (!net->ipv6.ip6_blk_hole_entry)
2918 goto out_ip6_prohibit_entry;
d8d1f30b 2919 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2920 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2921 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2922 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2923 ip6_template_metrics, true);
8ed67789
DL
2924#endif
2925
b339a47c
PZ
2926 net->ipv6.sysctl.flush_delay = 0;
2927 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2928 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2929 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2930 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2931 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2932 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2933 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2934
6891a346
BT
2935 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2936
8ed67789
DL
2937 ret = 0;
2938out:
2939 return ret;
f2fc6a54 2940
68fffc67
PZ
2941#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2942out_ip6_prohibit_entry:
2943 kfree(net->ipv6.ip6_prohibit_entry);
2944out_ip6_null_entry:
2945 kfree(net->ipv6.ip6_null_entry);
2946#endif
fc66f95c
ED
2947out_ip6_dst_entries:
2948 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2949out_ip6_dst_ops:
f2fc6a54 2950 goto out;
cdb18761
DL
2951}
2952
2c8c1e72 2953static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2954{
8ed67789
DL
2955 kfree(net->ipv6.ip6_null_entry);
2956#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2957 kfree(net->ipv6.ip6_prohibit_entry);
2958 kfree(net->ipv6.ip6_blk_hole_entry);
2959#endif
41bb78b4 2960 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2961}
2962
d189634e
TG
2963static int __net_init ip6_route_net_init_late(struct net *net)
2964{
2965#ifdef CONFIG_PROC_FS
2966 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2967 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2968#endif
2969 return 0;
2970}
2971
2972static void __net_exit ip6_route_net_exit_late(struct net *net)
2973{
2974#ifdef CONFIG_PROC_FS
2975 proc_net_remove(net, "ipv6_route");
2976 proc_net_remove(net, "rt6_stats");
2977#endif
2978}
2979
cdb18761
DL
2980static struct pernet_operations ip6_route_net_ops = {
2981 .init = ip6_route_net_init,
2982 .exit = ip6_route_net_exit,
2983};
2984
c3426b47
DM
2985static int __net_init ipv6_inetpeer_init(struct net *net)
2986{
2987 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2988
2989 if (!bp)
2990 return -ENOMEM;
2991 inet_peer_base_init(bp);
2992 net->ipv6.peers = bp;
2993 return 0;
2994}
2995
2996static void __net_exit ipv6_inetpeer_exit(struct net *net)
2997{
2998 struct inet_peer_base *bp = net->ipv6.peers;
2999
3000 net->ipv6.peers = NULL;
56a6b248 3001 inetpeer_invalidate_tree(bp);
c3426b47
DM
3002 kfree(bp);
3003}
3004
2b823f72 3005static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3006 .init = ipv6_inetpeer_init,
3007 .exit = ipv6_inetpeer_exit,
3008};
3009
d189634e
TG
3010static struct pernet_operations ip6_route_net_late_ops = {
3011 .init = ip6_route_net_init_late,
3012 .exit = ip6_route_net_exit_late,
3013};
3014
8ed67789
DL
3015static struct notifier_block ip6_route_dev_notifier = {
3016 .notifier_call = ip6_route_dev_notify,
3017 .priority = 0,
3018};
3019
433d49c3 3020int __init ip6_route_init(void)
1da177e4 3021{
433d49c3
DL
3022 int ret;
3023
9a7ec3a9
DL
3024 ret = -ENOMEM;
3025 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3026 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3027 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3028 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3029 goto out;
14e50e57 3030
fc66f95c 3031 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3032 if (ret)
bdb3289f 3033 goto out_kmem_cache;
bdb3289f 3034
c3426b47
DM
3035 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3036 if (ret)
e8803b6c 3037 goto out_dst_entries;
2a0c451a 3038
7e52b33b
DM
3039 ret = register_pernet_subsys(&ip6_route_net_ops);
3040 if (ret)
3041 goto out_register_inetpeer;
c3426b47 3042
5dc121e9
AE
3043 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3044
8ed67789
DL
3045 /* Registering of the loopback is done before this portion of code,
3046 * the loopback reference in rt6_info will not be taken, do it
3047 * manually for init_net */
d8d1f30b 3048 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3049 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3050 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3051 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3052 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3053 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3054 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3055 #endif
e8803b6c 3056 ret = fib6_init();
433d49c3 3057 if (ret)
8ed67789 3058 goto out_register_subsys;
433d49c3 3059
433d49c3
DL
3060 ret = xfrm6_init();
3061 if (ret)
e8803b6c 3062 goto out_fib6_init;
c35b7e72 3063
433d49c3
DL
3064 ret = fib6_rules_init();
3065 if (ret)
3066 goto xfrm6_init;
7e5449c2 3067
d189634e
TG
3068 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3069 if (ret)
3070 goto fib6_rules_init;
3071
433d49c3 3072 ret = -ENOBUFS;
c7ac8679
GR
3073 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3074 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3075 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3076 goto out_register_late_subsys;
c127ea2c 3077
8ed67789 3078 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3079 if (ret)
d189634e 3080 goto out_register_late_subsys;
8ed67789 3081
433d49c3
DL
3082out:
3083 return ret;
3084
d189634e
TG
3085out_register_late_subsys:
3086 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3087fib6_rules_init:
433d49c3
DL
3088 fib6_rules_cleanup();
3089xfrm6_init:
433d49c3 3090 xfrm6_fini();
2a0c451a
TG
3091out_fib6_init:
3092 fib6_gc_cleanup();
8ed67789
DL
3093out_register_subsys:
3094 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3095out_register_inetpeer:
3096 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3097out_dst_entries:
3098 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3099out_kmem_cache:
f2fc6a54 3100 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3101 goto out;
1da177e4
LT
3102}
3103
3104void ip6_route_cleanup(void)
3105{
8ed67789 3106 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3107 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3108 fib6_rules_cleanup();
1da177e4 3109 xfrm6_fini();
1da177e4 3110 fib6_gc_cleanup();
c3426b47 3111 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3112 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3113 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3114 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3115}