net: track link status of ipv6 nexthops
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
19e42e45 61#include <net/lwtunnel.h>
1da177e4
LT
62
63#include <asm/uaccess.h>
64
65#ifdef CONFIG_SYSCTL
66#include <linux/sysctl.h>
67#endif
68
afc154e9 69enum rt6_nud_state {
7e980569
JB
70 RT6_NUD_FAIL_HARD = -3,
71 RT6_NUD_FAIL_PROBE = -2,
72 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
73 RT6_NUD_SUCCEED = 1
74};
75
83a09abd 76static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 79static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 88static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 90static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
4b32b5ad 95static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 96static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 97
70ceb4f5 98#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 99static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex,
95c96174 102 unsigned int pref);
efa2cea0 103static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
106#endif
107
8d0b94af
MKL
108struct uncached_list {
109 spinlock_t lock;
110 struct list_head head;
111};
112
113static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
114
115static void rt6_uncached_list_add(struct rt6_info *rt)
116{
117 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
118
119 rt->dst.flags |= DST_NOCACHE;
120 rt->rt6i_uncached_list = ul;
121
122 spin_lock_bh(&ul->lock);
123 list_add_tail(&rt->rt6i_uncached, &ul->head);
124 spin_unlock_bh(&ul->lock);
125}
126
127static void rt6_uncached_list_del(struct rt6_info *rt)
128{
129 if (!list_empty(&rt->rt6i_uncached)) {
130 struct uncached_list *ul = rt->rt6i_uncached_list;
131
132 spin_lock_bh(&ul->lock);
133 list_del(&rt->rt6i_uncached);
134 spin_unlock_bh(&ul->lock);
135 }
136}
137
138static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
139{
140 struct net_device *loopback_dev = net->loopback_dev;
141 int cpu;
142
143 for_each_possible_cpu(cpu) {
144 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
145 struct rt6_info *rt;
146
147 spin_lock_bh(&ul->lock);
148 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
149 struct inet6_dev *rt_idev = rt->rt6i_idev;
150 struct net_device *rt_dev = rt->dst.dev;
151
152 if (rt_idev && (rt_idev->dev == dev || !dev) &&
153 rt_idev->dev != loopback_dev) {
154 rt->rt6i_idev = in6_dev_get(loopback_dev);
155 in6_dev_put(rt_idev);
156 }
157
158 if (rt_dev && (rt_dev == dev || !dev) &&
159 rt_dev != loopback_dev) {
160 rt->dst.dev = loopback_dev;
161 dev_hold(rt->dst.dev);
162 dev_put(rt_dev);
163 }
164 }
165 spin_unlock_bh(&ul->lock);
166 }
167}
168
d52d3997
MKL
169static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
170{
171 return dst_metrics_write_ptr(rt->dst.from);
172}
173
06582540
DM
174static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
175{
4b32b5ad 176 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 177
d52d3997
MKL
178 if (rt->rt6i_flags & RTF_PCPU)
179 return rt6_pcpu_cow_metrics(rt);
180 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
181 return NULL;
182 else
3b471175 183 return dst_cow_metrics_generic(dst, old);
06582540
DM
184}
185
f894cbf8
DM
186static inline const void *choose_neigh_daddr(struct rt6_info *rt,
187 struct sk_buff *skb,
188 const void *daddr)
39232973
DM
189{
190 struct in6_addr *p = &rt->rt6i_gateway;
191
a7563f34 192 if (!ipv6_addr_any(p))
39232973 193 return (const void *) p;
f894cbf8
DM
194 else if (skb)
195 return &ipv6_hdr(skb)->daddr;
39232973
DM
196 return daddr;
197}
198
f894cbf8
DM
199static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
200 struct sk_buff *skb,
201 const void *daddr)
d3aaeb38 202{
39232973
DM
203 struct rt6_info *rt = (struct rt6_info *) dst;
204 struct neighbour *n;
205
f894cbf8 206 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 207 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
208 if (n)
209 return n;
210 return neigh_create(&nd_tbl, daddr, dst->dev);
211}
212
9a7ec3a9 213static struct dst_ops ip6_dst_ops_template = {
1da177e4 214 .family = AF_INET6,
1da177e4
LT
215 .gc = ip6_dst_gc,
216 .gc_thresh = 1024,
217 .check = ip6_dst_check,
0dbaee3b 218 .default_advmss = ip6_default_advmss,
ebb762f2 219 .mtu = ip6_mtu,
06582540 220 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
221 .destroy = ip6_dst_destroy,
222 .ifdown = ip6_dst_ifdown,
223 .negative_advice = ip6_negative_advice,
224 .link_failure = ip6_link_failure,
225 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 226 .redirect = rt6_do_redirect,
1ac06e03 227 .local_out = __ip6_local_out,
d3aaeb38 228 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
229};
230
ebb762f2 231static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 232{
618f9bc7
SK
233 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
234
235 return mtu ? : dst->dev->mtu;
ec831ea7
RD
236}
237
6700c270
DM
238static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
239 struct sk_buff *skb, u32 mtu)
14e50e57
DM
240{
241}
242
6700c270
DM
243static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
244 struct sk_buff *skb)
b587ee3b
DM
245{
246}
247
0972ddb2
HB
248static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
249 unsigned long old)
250{
251 return NULL;
252}
253
14e50e57
DM
254static struct dst_ops ip6_dst_blackhole_ops = {
255 .family = AF_INET6,
14e50e57
DM
256 .destroy = ip6_dst_destroy,
257 .check = ip6_dst_check,
ebb762f2 258 .mtu = ip6_blackhole_mtu,
214f45c9 259 .default_advmss = ip6_default_advmss,
14e50e57 260 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 261 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 262 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 263 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
264};
265
62fa8a84 266static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 267 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
268};
269
fb0af4c7 270static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
271 .dst = {
272 .__refcnt = ATOMIC_INIT(1),
273 .__use = 1,
2c20cbd7 274 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 275 .error = -ENETUNREACH,
d8d1f30b
CG
276 .input = ip6_pkt_discard,
277 .output = ip6_pkt_discard_out,
1da177e4
LT
278 },
279 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 280 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
281 .rt6i_metric = ~(u32) 0,
282 .rt6i_ref = ATOMIC_INIT(1),
283};
284
101367c2
TG
285#ifdef CONFIG_IPV6_MULTIPLE_TABLES
286
fb0af4c7 287static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
288 .dst = {
289 .__refcnt = ATOMIC_INIT(1),
290 .__use = 1,
2c20cbd7 291 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 292 .error = -EACCES,
d8d1f30b
CG
293 .input = ip6_pkt_prohibit,
294 .output = ip6_pkt_prohibit_out,
101367c2
TG
295 },
296 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 297 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
298 .rt6i_metric = ~(u32) 0,
299 .rt6i_ref = ATOMIC_INIT(1),
300};
301
fb0af4c7 302static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
303 .dst = {
304 .__refcnt = ATOMIC_INIT(1),
305 .__use = 1,
2c20cbd7 306 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 307 .error = -EINVAL,
d8d1f30b 308 .input = dst_discard,
aad88724 309 .output = dst_discard_sk,
101367c2
TG
310 },
311 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 312 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
313 .rt6i_metric = ~(u32) 0,
314 .rt6i_ref = ATOMIC_INIT(1),
315};
316
317#endif
318
1da177e4 319/* allocate dst with ip6_dst_ops */
d52d3997
MKL
320static struct rt6_info *__ip6_dst_alloc(struct net *net,
321 struct net_device *dev,
322 int flags,
323 struct fib6_table *table)
1da177e4 324{
97bab73f 325 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 326 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 327
97bab73f 328 if (rt) {
8104891b
SK
329 struct dst_entry *dst = &rt->dst;
330
331 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
51ebd318 332 INIT_LIST_HEAD(&rt->rt6i_siblings);
8d0b94af 333 INIT_LIST_HEAD(&rt->rt6i_uncached);
97bab73f 334 }
cf911662 335 return rt;
1da177e4
LT
336}
337
d52d3997
MKL
338static struct rt6_info *ip6_dst_alloc(struct net *net,
339 struct net_device *dev,
340 int flags,
341 struct fib6_table *table)
342{
343 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
344
345 if (rt) {
346 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 if (rt->rt6i_pcpu) {
348 int cpu;
349
350 for_each_possible_cpu(cpu) {
351 struct rt6_info **p;
352
353 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 /* no one shares rt */
355 *p = NULL;
356 }
357 } else {
358 dst_destroy((struct dst_entry *)rt);
359 return NULL;
360 }
361 }
362
363 return rt;
364}
365
1da177e4
LT
366static void ip6_dst_destroy(struct dst_entry *dst)
367{
368 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 369 struct dst_entry *from = dst->from;
8d0b94af 370 struct inet6_dev *idev;
1da177e4 371
4b32b5ad 372 dst_destroy_metrics_generic(dst);
87775312 373 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
374 rt6_uncached_list_del(rt);
375
376 idev = rt->rt6i_idev;
38308473 377 if (idev) {
1da177e4
LT
378 rt->rt6i_idev = NULL;
379 in6_dev_put(idev);
1ab1457c 380 }
1716a961 381
ecd98837
YH
382 dst->from = NULL;
383 dst_release(from);
b3419363
DM
384}
385
1da177e4
LT
386static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 int how)
388{
389 struct rt6_info *rt = (struct rt6_info *)dst;
390 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 391 struct net_device *loopback_dev =
c346dca1 392 dev_net(dev)->loopback_dev;
1da177e4 393
97cac082
DM
394 if (dev != loopback_dev) {
395 if (idev && idev->dev == dev) {
396 struct inet6_dev *loopback_idev =
397 in6_dev_get(loopback_dev);
398 if (loopback_idev) {
399 rt->rt6i_idev = loopback_idev;
400 in6_dev_put(idev);
401 }
402 }
1da177e4
LT
403 }
404}
405
a50feda5 406static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 407{
1716a961
G
408 if (rt->rt6i_flags & RTF_EXPIRES) {
409 if (time_after(jiffies, rt->dst.expires))
a50feda5 410 return true;
1716a961 411 } else if (rt->dst.from) {
3fd91fb3 412 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 413 }
a50feda5 414 return false;
1da177e4
LT
415}
416
51ebd318
ND
417/* Multipath route selection:
418 * Hash based function using packet header and flowlabel.
419 * Adapted from fib_info_hashfn()
420 */
421static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 const struct flowi6 *fl6)
423{
424 unsigned int val = fl6->flowi6_proto;
425
c08977bb
YH
426 val ^= ipv6_addr_hash(&fl6->daddr);
427 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
428
429 /* Work only if this not encapsulated */
430 switch (fl6->flowi6_proto) {
431 case IPPROTO_UDP:
432 case IPPROTO_TCP:
433 case IPPROTO_SCTP:
b3ce5ae1
ND
434 val ^= (__force u16)fl6->fl6_sport;
435 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
436 break;
437
438 case IPPROTO_ICMPV6:
b3ce5ae1
ND
439 val ^= (__force u16)fl6->fl6_icmp_type;
440 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
441 break;
442 }
443 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 444 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
445
446 /* Perhaps, we need to tune, this function? */
447 val = val ^ (val >> 7) ^ (val >> 12);
448 return val % candidate_count;
449}
450
451static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
452 struct flowi6 *fl6, int oif,
453 int strict)
51ebd318
ND
454{
455 struct rt6_info *sibling, *next_sibling;
456 int route_choosen;
457
458 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 /* Don't change the route, if route_choosen == 0
460 * (siblings does not include ourself)
461 */
462 if (route_choosen)
463 list_for_each_entry_safe(sibling, next_sibling,
464 &match->rt6i_siblings, rt6i_siblings) {
465 route_choosen--;
466 if (route_choosen == 0) {
52bd4c0c
ND
467 if (rt6_score_route(sibling, oif, strict) < 0)
468 break;
51ebd318
ND
469 match = sibling;
470 break;
471 }
472 }
473 return match;
474}
475
1da177e4 476/*
c71099ac 477 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
478 */
479
8ed67789
DL
480static inline struct rt6_info *rt6_device_match(struct net *net,
481 struct rt6_info *rt,
b71d1d42 482 const struct in6_addr *saddr,
1da177e4 483 int oif,
d420895e 484 int flags)
1da177e4
LT
485{
486 struct rt6_info *local = NULL;
487 struct rt6_info *sprt;
488
dd3abc4e
YH
489 if (!oif && ipv6_addr_any(saddr))
490 goto out;
491
d8d1f30b 492 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 493 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
494
495 if (oif) {
1da177e4
LT
496 if (dev->ifindex == oif)
497 return sprt;
498 if (dev->flags & IFF_LOOPBACK) {
38308473 499 if (!sprt->rt6i_idev ||
1da177e4 500 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 501 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 502 continue;
1ab1457c 503 if (local && (!oif ||
1da177e4
LT
504 local->rt6i_idev->dev->ifindex == oif))
505 continue;
506 }
507 local = sprt;
508 }
dd3abc4e
YH
509 } else {
510 if (ipv6_chk_addr(net, saddr, dev,
511 flags & RT6_LOOKUP_F_IFACE))
512 return sprt;
1da177e4 513 }
dd3abc4e 514 }
1da177e4 515
dd3abc4e 516 if (oif) {
1da177e4
LT
517 if (local)
518 return local;
519
d420895e 520 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 521 return net->ipv6.ip6_null_entry;
1da177e4 522 }
dd3abc4e 523out:
1da177e4
LT
524 return rt;
525}
526
27097255 527#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
528struct __rt6_probe_work {
529 struct work_struct work;
530 struct in6_addr target;
531 struct net_device *dev;
532};
533
534static void rt6_probe_deferred(struct work_struct *w)
535{
536 struct in6_addr mcaddr;
537 struct __rt6_probe_work *work =
538 container_of(w, struct __rt6_probe_work, work);
539
540 addrconf_addr_solict_mult(&work->target, &mcaddr);
541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
542 dev_put(work->dev);
662f5533 543 kfree(work);
c2f17e82
HFS
544}
545
27097255
YH
546static void rt6_probe(struct rt6_info *rt)
547{
990edb42 548 struct __rt6_probe_work *work;
f2c31e32 549 struct neighbour *neigh;
27097255
YH
550 /*
551 * Okay, this does not seem to be appropriate
552 * for now, however, we need to check if it
553 * is really so; aka Router Reachability Probing.
554 *
555 * Router Reachability Probe MUST be rate-limited
556 * to no more than one per minute.
557 */
2152caea 558 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 559 return;
2152caea
YH
560 rcu_read_lock_bh();
561 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 if (neigh) {
8d6c31bf
MKL
563 if (neigh->nud_state & NUD_VALID)
564 goto out;
565
990edb42 566 work = NULL;
2152caea 567 write_lock(&neigh->lock);
990edb42
MKL
568 if (!(neigh->nud_state & NUD_VALID) &&
569 time_after(jiffies,
570 neigh->updated +
571 rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 if (work)
574 __neigh_set_probe_once(neigh);
c2f17e82 575 }
2152caea 576 write_unlock(&neigh->lock);
990edb42
MKL
577 } else {
578 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 579 }
990edb42
MKL
580
581 if (work) {
582 INIT_WORK(&work->work, rt6_probe_deferred);
583 work->target = rt->rt6i_gateway;
584 dev_hold(rt->dst.dev);
585 work->dev = rt->dst.dev;
586 schedule_work(&work->work);
587 }
588
8d6c31bf 589out:
2152caea 590 rcu_read_unlock_bh();
27097255
YH
591}
592#else
593static inline void rt6_probe(struct rt6_info *rt)
594{
27097255
YH
595}
596#endif
597
1da177e4 598/*
554cfb7e 599 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 600 */
b6f99a21 601static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 602{
d1918542 603 struct net_device *dev = rt->dst.dev;
161980f4 604 if (!oif || dev->ifindex == oif)
554cfb7e 605 return 2;
161980f4
DM
606 if ((dev->flags & IFF_LOOPBACK) &&
607 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 return 1;
609 return 0;
554cfb7e 610}
1da177e4 611
afc154e9 612static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 613{
f2c31e32 614 struct neighbour *neigh;
afc154e9 615 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 616
4d0c5911
YH
617 if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 619 return RT6_NUD_SUCCEED;
145a3621
YH
620
621 rcu_read_lock_bh();
622 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 if (neigh) {
624 read_lock(&neigh->lock);
554cfb7e 625 if (neigh->nud_state & NUD_VALID)
afc154e9 626 ret = RT6_NUD_SUCCEED;
398bcbeb 627#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 628 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 629 ret = RT6_NUD_SUCCEED;
7e980569
JB
630 else
631 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 632#endif
145a3621 633 read_unlock(&neigh->lock);
afc154e9
HFS
634 } else {
635 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 636 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 637 }
145a3621
YH
638 rcu_read_unlock_bh();
639
a5a81f0b 640 return ret;
1da177e4
LT
641}
642
554cfb7e
YH
643static int rt6_score_route(struct rt6_info *rt, int oif,
644 int strict)
1da177e4 645{
a5a81f0b 646 int m;
1ab1457c 647
4d0c5911 648 m = rt6_check_dev(rt, oif);
77d16f45 649 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 650 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
651#ifdef CONFIG_IPV6_ROUTER_PREF
652 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653#endif
afc154e9
HFS
654 if (strict & RT6_LOOKUP_F_REACHABLE) {
655 int n = rt6_check_neigh(rt);
656 if (n < 0)
657 return n;
658 }
554cfb7e
YH
659 return m;
660}
661
f11e6659 662static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
663 int *mpri, struct rt6_info *match,
664 bool *do_rr)
554cfb7e 665{
f11e6659 666 int m;
afc154e9 667 bool match_do_rr = false;
f11e6659
DM
668
669 if (rt6_check_expired(rt))
670 goto out;
671
672 m = rt6_score_route(rt, oif, strict);
7e980569 673 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
674 match_do_rr = true;
675 m = 0; /* lowest valid score */
7e980569 676 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 677 goto out;
afc154e9
HFS
678 }
679
680 if (strict & RT6_LOOKUP_F_REACHABLE)
681 rt6_probe(rt);
f11e6659 682
7e980569 683 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 684 if (m > *mpri) {
afc154e9 685 *do_rr = match_do_rr;
f11e6659
DM
686 *mpri = m;
687 match = rt;
f11e6659 688 }
f11e6659
DM
689out:
690 return match;
691}
692
693static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
694 struct rt6_info *rr_head,
afc154e9
HFS
695 u32 metric, int oif, int strict,
696 bool *do_rr)
f11e6659 697{
9fbdcfaf 698 struct rt6_info *rt, *match, *cont;
554cfb7e 699 int mpri = -1;
1da177e4 700
f11e6659 701 match = NULL;
9fbdcfaf
SK
702 cont = NULL;
703 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
704 if (rt->rt6i_metric != metric) {
705 cont = rt;
706 break;
707 }
708
709 match = find_match(rt, oif, strict, &mpri, match, do_rr);
710 }
711
712 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
713 if (rt->rt6i_metric != metric) {
714 cont = rt;
715 break;
716 }
717
afc154e9 718 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
719 }
720
721 if (match || !cont)
722 return match;
723
724 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 725 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 726
f11e6659
DM
727 return match;
728}
1da177e4 729
f11e6659
DM
730static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
731{
732 struct rt6_info *match, *rt0;
8ed67789 733 struct net *net;
afc154e9 734 bool do_rr = false;
1da177e4 735
f11e6659
DM
736 rt0 = fn->rr_ptr;
737 if (!rt0)
738 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 739
afc154e9
HFS
740 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
741 &do_rr);
1da177e4 742
afc154e9 743 if (do_rr) {
d8d1f30b 744 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 745
554cfb7e 746 /* no entries matched; do round-robin */
f11e6659
DM
747 if (!next || next->rt6i_metric != rt0->rt6i_metric)
748 next = fn->leaf;
749
750 if (next != rt0)
751 fn->rr_ptr = next;
1da177e4 752 }
1da177e4 753
d1918542 754 net = dev_net(rt0->dst.dev);
a02cec21 755 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
756}
757
8b9df265
MKL
758static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
759{
760 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
761}
762
70ceb4f5
YH
763#ifdef CONFIG_IPV6_ROUTE_INFO
764int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 765 const struct in6_addr *gwaddr)
70ceb4f5 766{
c346dca1 767 struct net *net = dev_net(dev);
70ceb4f5
YH
768 struct route_info *rinfo = (struct route_info *) opt;
769 struct in6_addr prefix_buf, *prefix;
770 unsigned int pref;
4bed72e4 771 unsigned long lifetime;
70ceb4f5
YH
772 struct rt6_info *rt;
773
774 if (len < sizeof(struct route_info)) {
775 return -EINVAL;
776 }
777
778 /* Sanity check for prefix_len and length */
779 if (rinfo->length > 3) {
780 return -EINVAL;
781 } else if (rinfo->prefix_len > 128) {
782 return -EINVAL;
783 } else if (rinfo->prefix_len > 64) {
784 if (rinfo->length < 2) {
785 return -EINVAL;
786 }
787 } else if (rinfo->prefix_len > 0) {
788 if (rinfo->length < 1) {
789 return -EINVAL;
790 }
791 }
792
793 pref = rinfo->route_pref;
794 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 795 return -EINVAL;
70ceb4f5 796
4bed72e4 797 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
798
799 if (rinfo->length == 3)
800 prefix = (struct in6_addr *)rinfo->prefix;
801 else {
802 /* this function is safe */
803 ipv6_addr_prefix(&prefix_buf,
804 (struct in6_addr *)rinfo->prefix,
805 rinfo->prefix_len);
806 prefix = &prefix_buf;
807 }
808
f104a567
DJ
809 if (rinfo->prefix_len == 0)
810 rt = rt6_get_dflt_router(gwaddr, dev);
811 else
812 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
813 gwaddr, dev->ifindex);
70ceb4f5
YH
814
815 if (rt && !lifetime) {
e0a1ad73 816 ip6_del_rt(rt);
70ceb4f5
YH
817 rt = NULL;
818 }
819
820 if (!rt && lifetime)
efa2cea0 821 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
822 pref);
823 else if (rt)
824 rt->rt6i_flags = RTF_ROUTEINFO |
825 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
826
827 if (rt) {
1716a961
G
828 if (!addrconf_finite_timeout(lifetime))
829 rt6_clean_expires(rt);
830 else
831 rt6_set_expires(rt, jiffies + HZ * lifetime);
832
94e187c0 833 ip6_rt_put(rt);
70ceb4f5
YH
834 }
835 return 0;
836}
837#endif
838
a3c00e46
MKL
839static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
840 struct in6_addr *saddr)
841{
842 struct fib6_node *pn;
843 while (1) {
844 if (fn->fn_flags & RTN_TL_ROOT)
845 return NULL;
846 pn = fn->parent;
847 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
848 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
849 else
850 fn = pn;
851 if (fn->fn_flags & RTN_RTINFO)
852 return fn;
853 }
854}
c71099ac 855
8ed67789
DL
856static struct rt6_info *ip6_pol_route_lookup(struct net *net,
857 struct fib6_table *table,
4c9483b2 858 struct flowi6 *fl6, int flags)
1da177e4
LT
859{
860 struct fib6_node *fn;
861 struct rt6_info *rt;
862
c71099ac 863 read_lock_bh(&table->tb6_lock);
4c9483b2 864 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
865restart:
866 rt = fn->leaf;
4c9483b2 867 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 868 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 869 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
870 if (rt == net->ipv6.ip6_null_entry) {
871 fn = fib6_backtrack(fn, &fl6->saddr);
872 if (fn)
873 goto restart;
874 }
d8d1f30b 875 dst_use(&rt->dst, jiffies);
c71099ac 876 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
877 return rt;
878
879}
880
67ba4152 881struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
882 int flags)
883{
884 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
885}
886EXPORT_SYMBOL_GPL(ip6_route_lookup);
887
9acd9f3a
YH
888struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
889 const struct in6_addr *saddr, int oif, int strict)
c71099ac 890{
4c9483b2
DM
891 struct flowi6 fl6 = {
892 .flowi6_oif = oif,
893 .daddr = *daddr,
c71099ac
TG
894 };
895 struct dst_entry *dst;
77d16f45 896 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 897
adaa70bb 898 if (saddr) {
4c9483b2 899 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
900 flags |= RT6_LOOKUP_F_HAS_SADDR;
901 }
902
4c9483b2 903 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
904 if (dst->error == 0)
905 return (struct rt6_info *) dst;
906
907 dst_release(dst);
908
1da177e4
LT
909 return NULL;
910}
7159039a
YH
911EXPORT_SYMBOL(rt6_lookup);
912
c71099ac 913/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
914 It takes new route entry, the addition fails by any reason the
915 route is freed. In any case, if caller does not hold it, it may
916 be destroyed.
917 */
918
e5fd387a 919static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 920 struct mx6_config *mxc)
1da177e4
LT
921{
922 int err;
c71099ac 923 struct fib6_table *table;
1da177e4 924
c71099ac
TG
925 table = rt->rt6i_table;
926 write_lock_bh(&table->tb6_lock);
e715b6d3 927 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 928 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
929
930 return err;
931}
932
40e22e8f
TG
933int ip6_ins_rt(struct rt6_info *rt)
934{
e715b6d3
FW
935 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
936 struct mx6_config mxc = { .mx = NULL, };
937
938 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
939}
940
8b9df265
MKL
941static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
942 const struct in6_addr *daddr,
943 const struct in6_addr *saddr)
1da177e4 944{
1da177e4
LT
945 struct rt6_info *rt;
946
947 /*
948 * Clone the route.
949 */
950
d52d3997 951 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 952 ort = (struct rt6_info *)ort->dst.from;
1da177e4 953
d52d3997
MKL
954 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
955 0, ort->rt6i_table);
83a09abd
MKL
956
957 if (!rt)
958 return NULL;
959
960 ip6_rt_copy_init(rt, ort);
961 rt->rt6i_flags |= RTF_CACHE;
962 rt->rt6i_metric = 0;
963 rt->dst.flags |= DST_HOST;
964 rt->rt6i_dst.addr = *daddr;
965 rt->rt6i_dst.plen = 128;
1da177e4 966
83a09abd
MKL
967 if (!rt6_is_gw_or_nonexthop(ort)) {
968 if (ort->rt6i_dst.plen != 128 &&
969 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
970 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 971#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
972 if (rt->rt6i_src.plen && saddr) {
973 rt->rt6i_src.addr = *saddr;
974 rt->rt6i_src.plen = 128;
8b9df265 975 }
83a09abd 976#endif
95a9a5ba 977 }
1da177e4 978
95a9a5ba
YH
979 return rt;
980}
1da177e4 981
d52d3997
MKL
982static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
983{
984 struct rt6_info *pcpu_rt;
985
986 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
987 rt->dst.dev, rt->dst.flags,
988 rt->rt6i_table);
989
990 if (!pcpu_rt)
991 return NULL;
992 ip6_rt_copy_init(pcpu_rt, rt);
993 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
994 pcpu_rt->rt6i_flags |= RTF_PCPU;
995 return pcpu_rt;
996}
997
998/* It should be called with read_lock_bh(&tb6_lock) acquired */
999static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1000{
1001 struct rt6_info *pcpu_rt, *prev, **p;
1002
1003 p = this_cpu_ptr(rt->rt6i_pcpu);
1004 pcpu_rt = *p;
1005
1006 if (pcpu_rt)
1007 goto done;
1008
1009 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1010 if (!pcpu_rt) {
1011 struct net *net = dev_net(rt->dst.dev);
1012
1013 pcpu_rt = net->ipv6.ip6_null_entry;
1014 goto done;
1015 }
1016
1017 prev = cmpxchg(p, NULL, pcpu_rt);
1018 if (prev) {
1019 /* If someone did it before us, return prev instead */
1020 dst_destroy(&pcpu_rt->dst);
1021 pcpu_rt = prev;
1022 }
1023
1024done:
1025 dst_hold(&pcpu_rt->dst);
1026 rt6_dst_from_metrics_check(pcpu_rt);
1027 return pcpu_rt;
1028}
1029
8ed67789 1030static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1031 struct flowi6 *fl6, int flags)
1da177e4 1032{
367efcb9 1033 struct fib6_node *fn, *saved_fn;
45e4fd26 1034 struct rt6_info *rt;
c71099ac 1035 int strict = 0;
1da177e4 1036
77d16f45 1037 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1038 if (net->ipv6.devconf_all->forwarding == 0)
1039 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1040
c71099ac 1041 read_lock_bh(&table->tb6_lock);
1da177e4 1042
4c9483b2 1043 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1044 saved_fn = fn;
1da177e4 1045
a3c00e46 1046redo_rt6_select:
367efcb9 1047 rt = rt6_select(fn, oif, strict);
52bd4c0c 1048 if (rt->rt6i_nsiblings)
367efcb9 1049 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1050 if (rt == net->ipv6.ip6_null_entry) {
1051 fn = fib6_backtrack(fn, &fl6->saddr);
1052 if (fn)
1053 goto redo_rt6_select;
367efcb9
MKL
1054 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1055 /* also consider unreachable route */
1056 strict &= ~RT6_LOOKUP_F_REACHABLE;
1057 fn = saved_fn;
1058 goto redo_rt6_select;
367efcb9 1059 }
a3c00e46
MKL
1060 }
1061
fb9de91e 1062
3da59bd9 1063 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1064 dst_use(&rt->dst, jiffies);
1065 read_unlock_bh(&table->tb6_lock);
1066
1067 rt6_dst_from_metrics_check(rt);
1068 return rt;
3da59bd9
MKL
1069 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1070 !(rt->rt6i_flags & RTF_GATEWAY))) {
1071 /* Create a RTF_CACHE clone which will not be
1072 * owned by the fib6 tree. It is for the special case where
1073 * the daddr in the skb during the neighbor look-up is different
1074 * from the fl6->daddr used to look-up route here.
1075 */
1076
1077 struct rt6_info *uncached_rt;
1078
d52d3997
MKL
1079 dst_use(&rt->dst, jiffies);
1080 read_unlock_bh(&table->tb6_lock);
1081
3da59bd9
MKL
1082 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1083 dst_release(&rt->dst);
c71099ac 1084
3da59bd9 1085 if (uncached_rt)
8d0b94af 1086 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1087 else
1088 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1089
3da59bd9
MKL
1090 dst_hold(&uncached_rt->dst);
1091 return uncached_rt;
3da59bd9 1092
d52d3997
MKL
1093 } else {
1094 /* Get a percpu copy */
1095
1096 struct rt6_info *pcpu_rt;
1097
1098 rt->dst.lastuse = jiffies;
1099 rt->dst.__use++;
1100 pcpu_rt = rt6_get_pcpu_route(rt);
1101 read_unlock_bh(&table->tb6_lock);
1102
1103 return pcpu_rt;
1104 }
1da177e4
LT
1105}
1106
8ed67789 1107static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1108 struct flowi6 *fl6, int flags)
4acad72d 1109{
4c9483b2 1110 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1111}
1112
72331bc0
SL
1113static struct dst_entry *ip6_route_input_lookup(struct net *net,
1114 struct net_device *dev,
1115 struct flowi6 *fl6, int flags)
1116{
1117 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1118 flags |= RT6_LOOKUP_F_IFACE;
1119
1120 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1121}
1122
c71099ac
TG
1123void ip6_route_input(struct sk_buff *skb)
1124{
b71d1d42 1125 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1126 struct net *net = dev_net(skb->dev);
adaa70bb 1127 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
1128 struct flowi6 fl6 = {
1129 .flowi6_iif = skb->dev->ifindex,
1130 .daddr = iph->daddr,
1131 .saddr = iph->saddr,
6502ca52 1132 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1133 .flowi6_mark = skb->mark,
1134 .flowi6_proto = iph->nexthdr,
c71099ac 1135 };
adaa70bb 1136
72331bc0 1137 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1138}
1139
8ed67789 1140static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1141 struct flowi6 *fl6, int flags)
1da177e4 1142{
4c9483b2 1143 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1144}
1145
67ba4152 1146struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1147 struct flowi6 *fl6)
c71099ac
TG
1148{
1149 int flags = 0;
1150
1fb9489b 1151 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1152
4c9483b2 1153 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1154 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1155
4c9483b2 1156 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1157 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1158 else if (sk)
1159 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1160
4c9483b2 1161 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1162}
7159039a 1163EXPORT_SYMBOL(ip6_route_output);
1da177e4 1164
2774c131 1165struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1166{
5c1e6aa3 1167 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1168 struct dst_entry *new = NULL;
1169
f5b0a874 1170 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1171 if (rt) {
d8d1f30b 1172 new = &rt->dst;
14e50e57 1173
8104891b 1174 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
8104891b 1175
14e50e57 1176 new->__use = 1;
352e512c 1177 new->input = dst_discard;
aad88724 1178 new->output = dst_discard_sk;
14e50e57 1179
21efcfa0
ED
1180 if (dst_metrics_read_only(&ort->dst))
1181 new->_metrics = ort->dst._metrics;
1182 else
1183 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1184 rt->rt6i_idev = ort->rt6i_idev;
1185 if (rt->rt6i_idev)
1186 in6_dev_hold(rt->rt6i_idev);
14e50e57 1187
4e3fd7a0 1188 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 1189 rt->rt6i_flags = ort->rt6i_flags;
14e50e57
DM
1190 rt->rt6i_metric = 0;
1191
1192 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1193#ifdef CONFIG_IPV6_SUBTREES
1194 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1195#endif
1196
1197 dst_free(new);
1198 }
1199
69ead7af
DM
1200 dst_release(dst_orig);
1201 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1202}
14e50e57 1203
1da177e4
LT
1204/*
1205 * Destination cache support functions
1206 */
1207
4b32b5ad
MKL
1208static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1209{
1210 if (rt->dst.from &&
1211 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1212 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1213}
1214
3da59bd9
MKL
1215static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1216{
1217 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1218 return NULL;
1219
1220 if (rt6_check_expired(rt))
1221 return NULL;
1222
1223 return &rt->dst;
1224}
1225
1226static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1227{
1228 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1229 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1230 return &rt->dst;
1231 else
1232 return NULL;
1233}
1234
1da177e4
LT
1235static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1236{
1237 struct rt6_info *rt;
1238
1239 rt = (struct rt6_info *) dst;
1240
6f3118b5
ND
1241 /* All IPV6 dsts are created with ->obsolete set to the value
1242 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1243 * into this function always.
1244 */
e3bc10bd 1245
4b32b5ad
MKL
1246 rt6_dst_from_metrics_check(rt);
1247
d52d3997 1248 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
3da59bd9
MKL
1249 return rt6_dst_from_check(rt, cookie);
1250 else
1251 return rt6_check(rt, cookie);
1da177e4
LT
1252}
1253
1254static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1255{
1256 struct rt6_info *rt = (struct rt6_info *) dst;
1257
1258 if (rt) {
54c1a859
YH
1259 if (rt->rt6i_flags & RTF_CACHE) {
1260 if (rt6_check_expired(rt)) {
1261 ip6_del_rt(rt);
1262 dst = NULL;
1263 }
1264 } else {
1da177e4 1265 dst_release(dst);
54c1a859
YH
1266 dst = NULL;
1267 }
1da177e4 1268 }
54c1a859 1269 return dst;
1da177e4
LT
1270}
1271
1272static void ip6_link_failure(struct sk_buff *skb)
1273{
1274 struct rt6_info *rt;
1275
3ffe533c 1276 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1277
adf30907 1278 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1279 if (rt) {
1eb4f758
HFS
1280 if (rt->rt6i_flags & RTF_CACHE) {
1281 dst_hold(&rt->dst);
1282 if (ip6_del_rt(rt))
1283 dst_free(&rt->dst);
1284 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1285 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1286 }
1da177e4
LT
1287 }
1288}
1289
45e4fd26
MKL
1290static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1291{
1292 struct net *net = dev_net(rt->dst.dev);
1293
1294 rt->rt6i_flags |= RTF_MODIFIED;
1295 rt->rt6i_pmtu = mtu;
1296 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1297}
1298
1299static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1300 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1301{
67ba4152 1302 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1303
45e4fd26
MKL
1304 if (rt6->rt6i_flags & RTF_LOCAL)
1305 return;
81aded24 1306
45e4fd26
MKL
1307 dst_confirm(dst);
1308 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1309 if (mtu >= dst_mtu(dst))
1310 return;
9d289715 1311
45e4fd26
MKL
1312 if (rt6->rt6i_flags & RTF_CACHE) {
1313 rt6_do_update_pmtu(rt6, mtu);
1314 } else {
1315 const struct in6_addr *daddr, *saddr;
1316 struct rt6_info *nrt6;
1317
1318 if (iph) {
1319 daddr = &iph->daddr;
1320 saddr = &iph->saddr;
1321 } else if (sk) {
1322 daddr = &sk->sk_v6_daddr;
1323 saddr = &inet6_sk(sk)->saddr;
1324 } else {
1325 return;
1326 }
1327 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1328 if (nrt6) {
1329 rt6_do_update_pmtu(nrt6, mtu);
1330
1331 /* ip6_ins_rt(nrt6) will bump the
1332 * rt6->rt6i_node->fn_sernum
1333 * which will fail the next rt6_check() and
1334 * invalidate the sk->sk_dst_cache.
1335 */
1336 ip6_ins_rt(nrt6);
1337 }
1da177e4
LT
1338 }
1339}
1340
45e4fd26
MKL
1341static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1342 struct sk_buff *skb, u32 mtu)
1343{
1344 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1345}
1346
42ae66c8
DM
1347void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1348 int oif, u32 mark)
81aded24
DM
1349{
1350 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1351 struct dst_entry *dst;
1352 struct flowi6 fl6;
1353
1354 memset(&fl6, 0, sizeof(fl6));
1355 fl6.flowi6_oif = oif;
1b3c61dc 1356 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1357 fl6.daddr = iph->daddr;
1358 fl6.saddr = iph->saddr;
6502ca52 1359 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1360
1361 dst = ip6_route_output(net, NULL, &fl6);
1362 if (!dst->error)
45e4fd26 1363 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1364 dst_release(dst);
1365}
1366EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1367
1368void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1369{
1370 ip6_update_pmtu(skb, sock_net(sk), mtu,
1371 sk->sk_bound_dev_if, sk->sk_mark);
1372}
1373EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1374
b55b76b2
DJ
1375/* Handle redirects */
1376struct ip6rd_flowi {
1377 struct flowi6 fl6;
1378 struct in6_addr gateway;
1379};
1380
1381static struct rt6_info *__ip6_route_redirect(struct net *net,
1382 struct fib6_table *table,
1383 struct flowi6 *fl6,
1384 int flags)
1385{
1386 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1387 struct rt6_info *rt;
1388 struct fib6_node *fn;
1389
1390 /* Get the "current" route for this destination and
1391 * check if the redirect has come from approriate router.
1392 *
1393 * RFC 4861 specifies that redirects should only be
1394 * accepted if they come from the nexthop to the target.
1395 * Due to the way the routes are chosen, this notion
1396 * is a bit fuzzy and one might need to check all possible
1397 * routes.
1398 */
1399
1400 read_lock_bh(&table->tb6_lock);
1401 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1402restart:
1403 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1404 if (rt6_check_expired(rt))
1405 continue;
1406 if (rt->dst.error)
1407 break;
1408 if (!(rt->rt6i_flags & RTF_GATEWAY))
1409 continue;
1410 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1411 continue;
1412 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1413 continue;
1414 break;
1415 }
1416
1417 if (!rt)
1418 rt = net->ipv6.ip6_null_entry;
1419 else if (rt->dst.error) {
1420 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1421 goto out;
1422 }
1423
1424 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1425 fn = fib6_backtrack(fn, &fl6->saddr);
1426 if (fn)
1427 goto restart;
b55b76b2 1428 }
a3c00e46 1429
b0a1ba59 1430out:
b55b76b2
DJ
1431 dst_hold(&rt->dst);
1432
1433 read_unlock_bh(&table->tb6_lock);
1434
1435 return rt;
1436};
1437
1438static struct dst_entry *ip6_route_redirect(struct net *net,
1439 const struct flowi6 *fl6,
1440 const struct in6_addr *gateway)
1441{
1442 int flags = RT6_LOOKUP_F_HAS_SADDR;
1443 struct ip6rd_flowi rdfl;
1444
1445 rdfl.fl6 = *fl6;
1446 rdfl.gateway = *gateway;
1447
1448 return fib6_rule_lookup(net, &rdfl.fl6,
1449 flags, __ip6_route_redirect);
1450}
1451
3a5ad2ee
DM
1452void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1453{
1454 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1455 struct dst_entry *dst;
1456 struct flowi6 fl6;
1457
1458 memset(&fl6, 0, sizeof(fl6));
e374c618 1459 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1460 fl6.flowi6_oif = oif;
1461 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1462 fl6.daddr = iph->daddr;
1463 fl6.saddr = iph->saddr;
6502ca52 1464 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1465
b55b76b2
DJ
1466 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1467 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1468 dst_release(dst);
1469}
1470EXPORT_SYMBOL_GPL(ip6_redirect);
1471
c92a59ec
DJ
1472void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1473 u32 mark)
1474{
1475 const struct ipv6hdr *iph = ipv6_hdr(skb);
1476 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1477 struct dst_entry *dst;
1478 struct flowi6 fl6;
1479
1480 memset(&fl6, 0, sizeof(fl6));
e374c618 1481 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1482 fl6.flowi6_oif = oif;
1483 fl6.flowi6_mark = mark;
c92a59ec
DJ
1484 fl6.daddr = msg->dest;
1485 fl6.saddr = iph->daddr;
1486
b55b76b2
DJ
1487 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1488 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1489 dst_release(dst);
1490}
1491
3a5ad2ee
DM
1492void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1493{
1494 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1495}
1496EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1497
0dbaee3b 1498static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1499{
0dbaee3b
DM
1500 struct net_device *dev = dst->dev;
1501 unsigned int mtu = dst_mtu(dst);
1502 struct net *net = dev_net(dev);
1503
1da177e4
LT
1504 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1505
5578689a
DL
1506 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1507 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1508
1509 /*
1ab1457c
YH
1510 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1511 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1512 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1513 * rely only on pmtu discovery"
1514 */
1515 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1516 mtu = IPV6_MAXPLEN;
1517 return mtu;
1518}
1519
ebb762f2 1520static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1521{
4b32b5ad
MKL
1522 const struct rt6_info *rt = (const struct rt6_info *)dst;
1523 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1524 struct inet6_dev *idev;
618f9bc7 1525
4b32b5ad
MKL
1526 if (mtu)
1527 goto out;
1528
1529 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1530 if (mtu)
30f78d8e 1531 goto out;
618f9bc7
SK
1532
1533 mtu = IPV6_MIN_MTU;
d33e4553
DM
1534
1535 rcu_read_lock();
1536 idev = __in6_dev_get(dst->dev);
1537 if (idev)
1538 mtu = idev->cnf.mtu6;
1539 rcu_read_unlock();
1540
30f78d8e
ED
1541out:
1542 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1543}
1544
3b00944c
YH
1545static struct dst_entry *icmp6_dst_gc_list;
1546static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1547
3b00944c 1548struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1549 struct flowi6 *fl6)
1da177e4 1550{
87a11578 1551 struct dst_entry *dst;
1da177e4
LT
1552 struct rt6_info *rt;
1553 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1554 struct net *net = dev_net(dev);
1da177e4 1555
38308473 1556 if (unlikely(!idev))
122bdf67 1557 return ERR_PTR(-ENODEV);
1da177e4 1558
8b96d22d 1559 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1560 if (unlikely(!rt)) {
1da177e4 1561 in6_dev_put(idev);
87a11578 1562 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1563 goto out;
1564 }
1565
8e2ec639
YZ
1566 rt->dst.flags |= DST_HOST;
1567 rt->dst.output = ip6_output;
d8d1f30b 1568 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1569 rt->rt6i_gateway = fl6->daddr;
87a11578 1570 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1571 rt->rt6i_dst.plen = 128;
1572 rt->rt6i_idev = idev;
14edd87d 1573 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1574
3b00944c 1575 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1576 rt->dst.next = icmp6_dst_gc_list;
1577 icmp6_dst_gc_list = &rt->dst;
3b00944c 1578 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1579
5578689a 1580 fib6_force_start_gc(net);
1da177e4 1581
87a11578
DM
1582 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1583
1da177e4 1584out:
87a11578 1585 return dst;
1da177e4
LT
1586}
1587
3d0f24a7 1588int icmp6_dst_gc(void)
1da177e4 1589{
e9476e95 1590 struct dst_entry *dst, **pprev;
3d0f24a7 1591 int more = 0;
1da177e4 1592
3b00944c
YH
1593 spin_lock_bh(&icmp6_dst_lock);
1594 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1595
1da177e4
LT
1596 while ((dst = *pprev) != NULL) {
1597 if (!atomic_read(&dst->__refcnt)) {
1598 *pprev = dst->next;
1599 dst_free(dst);
1da177e4
LT
1600 } else {
1601 pprev = &dst->next;
3d0f24a7 1602 ++more;
1da177e4
LT
1603 }
1604 }
1605
3b00944c 1606 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1607
3d0f24a7 1608 return more;
1da177e4
LT
1609}
1610
1e493d19
DM
1611static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1612 void *arg)
1613{
1614 struct dst_entry *dst, **pprev;
1615
1616 spin_lock_bh(&icmp6_dst_lock);
1617 pprev = &icmp6_dst_gc_list;
1618 while ((dst = *pprev) != NULL) {
1619 struct rt6_info *rt = (struct rt6_info *) dst;
1620 if (func(rt, arg)) {
1621 *pprev = dst->next;
1622 dst_free(dst);
1623 } else {
1624 pprev = &dst->next;
1625 }
1626 }
1627 spin_unlock_bh(&icmp6_dst_lock);
1628}
1629
569d3645 1630static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1631{
86393e52 1632 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1633 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1634 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1635 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1636 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1637 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1638 int entries;
7019b78e 1639
fc66f95c 1640 entries = dst_entries_get_fast(ops);
49a18d86 1641 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1642 entries <= rt_max_size)
1da177e4
LT
1643 goto out;
1644
6891a346 1645 net->ipv6.ip6_rt_gc_expire++;
14956643 1646 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1647 entries = dst_entries_get_slow(ops);
1648 if (entries < ops->gc_thresh)
7019b78e 1649 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1650out:
7019b78e 1651 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1652 return entries > rt_max_size;
1da177e4
LT
1653}
1654
e715b6d3
FW
1655static int ip6_convert_metrics(struct mx6_config *mxc,
1656 const struct fib6_config *cfg)
1657{
1658 struct nlattr *nla;
1659 int remaining;
1660 u32 *mp;
1661
63159f29 1662 if (!cfg->fc_mx)
e715b6d3
FW
1663 return 0;
1664
1665 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1666 if (unlikely(!mp))
1667 return -ENOMEM;
1668
1669 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1670 int type = nla_type(nla);
1671
1672 if (type) {
ea697639
DB
1673 u32 val;
1674
e715b6d3
FW
1675 if (unlikely(type > RTAX_MAX))
1676 goto err;
ea697639
DB
1677 if (type == RTAX_CC_ALGO) {
1678 char tmp[TCP_CA_NAME_MAX];
1679
1680 nla_strlcpy(tmp, nla, sizeof(tmp));
1681 val = tcp_ca_get_key_by_name(tmp);
1682 if (val == TCP_CA_UNSPEC)
1683 goto err;
1684 } else {
1685 val = nla_get_u32(nla);
1686 }
e715b6d3 1687
ea697639 1688 mp[type - 1] = val;
e715b6d3
FW
1689 __set_bit(type - 1, mxc->mx_valid);
1690 }
1691 }
1692
1693 mxc->mx = mp;
1694
1695 return 0;
1696 err:
1697 kfree(mp);
1698 return -EINVAL;
1699}
1da177e4 1700
86872cb5 1701int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1702{
1703 int err;
5578689a 1704 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1705 struct rt6_info *rt = NULL;
1706 struct net_device *dev = NULL;
1707 struct inet6_dev *idev = NULL;
c71099ac 1708 struct fib6_table *table;
e715b6d3 1709 struct mx6_config mxc = { .mx = NULL, };
1da177e4
LT
1710 int addr_type;
1711
86872cb5 1712 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1713 return -EINVAL;
1714#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1715 if (cfg->fc_src_len)
1da177e4
LT
1716 return -EINVAL;
1717#endif
86872cb5 1718 if (cfg->fc_ifindex) {
1da177e4 1719 err = -ENODEV;
5578689a 1720 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1721 if (!dev)
1722 goto out;
1723 idev = in6_dev_get(dev);
1724 if (!idev)
1725 goto out;
1726 }
1727
86872cb5
TG
1728 if (cfg->fc_metric == 0)
1729 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1730
d71314b4 1731 err = -ENOBUFS;
38308473
DM
1732 if (cfg->fc_nlinfo.nlh &&
1733 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1734 table = fib6_get_table(net, cfg->fc_table);
38308473 1735 if (!table) {
f3213831 1736 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1737 table = fib6_new_table(net, cfg->fc_table);
1738 }
1739 } else {
1740 table = fib6_new_table(net, cfg->fc_table);
1741 }
38308473
DM
1742
1743 if (!table)
c71099ac 1744 goto out;
c71099ac 1745
c88507fb 1746 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1da177e4 1747
38308473 1748 if (!rt) {
1da177e4
LT
1749 err = -ENOMEM;
1750 goto out;
1751 }
1752
1716a961
G
1753 if (cfg->fc_flags & RTF_EXPIRES)
1754 rt6_set_expires(rt, jiffies +
1755 clock_t_to_jiffies(cfg->fc_expires));
1756 else
1757 rt6_clean_expires(rt);
1da177e4 1758
86872cb5
TG
1759 if (cfg->fc_protocol == RTPROT_UNSPEC)
1760 cfg->fc_protocol = RTPROT_BOOT;
1761 rt->rt6i_protocol = cfg->fc_protocol;
1762
1763 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1764
1765 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1766 rt->dst.input = ip6_mc_input;
ab79ad14
1767 else if (cfg->fc_flags & RTF_LOCAL)
1768 rt->dst.input = ip6_input;
1da177e4 1769 else
d8d1f30b 1770 rt->dst.input = ip6_forward;
1da177e4 1771
d8d1f30b 1772 rt->dst.output = ip6_output;
1da177e4 1773
19e42e45
RP
1774 if (cfg->fc_encap) {
1775 struct lwtunnel_state *lwtstate;
1776
1777 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1778 cfg->fc_encap, &lwtstate);
1779 if (err)
1780 goto out;
5a6228a0 1781 rt->rt6i_lwtstate = lwtstate_get(lwtstate);
6673a9f4
ND
1782 if (lwtunnel_output_redirect(rt->rt6i_lwtstate))
1783 rt->dst.output = lwtunnel_output6;
19e42e45
RP
1784 }
1785
86872cb5
TG
1786 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1787 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1788 if (rt->rt6i_dst.plen == 128)
e5fd387a 1789 rt->dst.flags |= DST_HOST;
e5fd387a 1790
1da177e4 1791#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1792 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1793 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1794#endif
1795
86872cb5 1796 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1797
1798 /* We cannot add true routes via loopback here,
1799 they would result in kernel looping; promote them to reject routes
1800 */
86872cb5 1801 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1802 (dev && (dev->flags & IFF_LOOPBACK) &&
1803 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1804 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1805 /* hold loopback dev/idev if we haven't done so. */
5578689a 1806 if (dev != net->loopback_dev) {
1da177e4
LT
1807 if (dev) {
1808 dev_put(dev);
1809 in6_dev_put(idev);
1810 }
5578689a 1811 dev = net->loopback_dev;
1da177e4
LT
1812 dev_hold(dev);
1813 idev = in6_dev_get(dev);
1814 if (!idev) {
1815 err = -ENODEV;
1816 goto out;
1817 }
1818 }
1da177e4 1819 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1820 switch (cfg->fc_type) {
1821 case RTN_BLACKHOLE:
1822 rt->dst.error = -EINVAL;
aad88724 1823 rt->dst.output = dst_discard_sk;
7150aede 1824 rt->dst.input = dst_discard;
ef2c7d7b
ND
1825 break;
1826 case RTN_PROHIBIT:
1827 rt->dst.error = -EACCES;
7150aede
K
1828 rt->dst.output = ip6_pkt_prohibit_out;
1829 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1830 break;
b4949ab2 1831 case RTN_THROW:
ef2c7d7b 1832 default:
7150aede
K
1833 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1834 : -ENETUNREACH;
1835 rt->dst.output = ip6_pkt_discard_out;
1836 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1837 break;
1838 }
1da177e4
LT
1839 goto install_route;
1840 }
1841
86872cb5 1842 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1843 const struct in6_addr *gw_addr;
1da177e4
LT
1844 int gwa_type;
1845
86872cb5 1846 gw_addr = &cfg->fc_gateway;
330567b7 1847 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1848
1849 /* if gw_addr is local we will fail to detect this in case
1850 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1851 * will return already-added prefix route via interface that
1852 * prefix route was assigned to, which might be non-loopback.
1853 */
1854 err = -EINVAL;
330567b7
FW
1855 if (ipv6_chk_addr_and_flags(net, gw_addr,
1856 gwa_type & IPV6_ADDR_LINKLOCAL ?
1857 dev : NULL, 0, 0))
48ed7b26
FW
1858 goto out;
1859
4e3fd7a0 1860 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1861
1862 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1863 struct rt6_info *grt;
1864
1865 /* IPv6 strictly inhibits using not link-local
1866 addresses as nexthop address.
1867 Otherwise, router will not able to send redirects.
1868 It is very good, but in some (rare!) circumstances
1869 (SIT, PtP, NBMA NOARP links) it is handy to allow
1870 some exceptions. --ANK
1871 */
38308473 1872 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1873 goto out;
1874
5578689a 1875 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1876
1877 err = -EHOSTUNREACH;
38308473 1878 if (!grt)
1da177e4
LT
1879 goto out;
1880 if (dev) {
d1918542 1881 if (dev != grt->dst.dev) {
94e187c0 1882 ip6_rt_put(grt);
1da177e4
LT
1883 goto out;
1884 }
1885 } else {
d1918542 1886 dev = grt->dst.dev;
1da177e4
LT
1887 idev = grt->rt6i_idev;
1888 dev_hold(dev);
1889 in6_dev_hold(grt->rt6i_idev);
1890 }
38308473 1891 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1892 err = 0;
94e187c0 1893 ip6_rt_put(grt);
1da177e4
LT
1894
1895 if (err)
1896 goto out;
1897 }
1898 err = -EINVAL;
38308473 1899 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1900 goto out;
1901 }
1902
1903 err = -ENODEV;
38308473 1904 if (!dev)
1da177e4
LT
1905 goto out;
1906
c3968a85
DW
1907 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1908 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1909 err = -EINVAL;
1910 goto out;
1911 }
4e3fd7a0 1912 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1913 rt->rt6i_prefsrc.plen = 128;
1914 } else
1915 rt->rt6i_prefsrc.plen = 0;
1916
86872cb5 1917 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1918
1919install_route:
d8d1f30b 1920 rt->dst.dev = dev;
1da177e4 1921 rt->rt6i_idev = idev;
c71099ac 1922 rt->rt6i_table = table;
63152fc0 1923
c346dca1 1924 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1925
e715b6d3
FW
1926 err = ip6_convert_metrics(&mxc, cfg);
1927 if (err)
1928 goto out;
1da177e4 1929
e715b6d3
FW
1930 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1931
1932 kfree(mxc.mx);
1933 return err;
1da177e4
LT
1934out:
1935 if (dev)
1936 dev_put(dev);
1937 if (idev)
1938 in6_dev_put(idev);
1939 if (rt)
d8d1f30b 1940 dst_free(&rt->dst);
1da177e4
LT
1941 return err;
1942}
1943
86872cb5 1944static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1945{
1946 int err;
c71099ac 1947 struct fib6_table *table;
d1918542 1948 struct net *net = dev_net(rt->dst.dev);
1da177e4 1949
6825a26c
G
1950 if (rt == net->ipv6.ip6_null_entry) {
1951 err = -ENOENT;
1952 goto out;
1953 }
6c813a72 1954
c71099ac
TG
1955 table = rt->rt6i_table;
1956 write_lock_bh(&table->tb6_lock);
86872cb5 1957 err = fib6_del(rt, info);
c71099ac 1958 write_unlock_bh(&table->tb6_lock);
1da177e4 1959
6825a26c 1960out:
94e187c0 1961 ip6_rt_put(rt);
1da177e4
LT
1962 return err;
1963}
1964
e0a1ad73
TG
1965int ip6_del_rt(struct rt6_info *rt)
1966{
4d1169c1 1967 struct nl_info info = {
d1918542 1968 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1969 };
528c4ceb 1970 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1971}
1972
86872cb5 1973static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1974{
c71099ac 1975 struct fib6_table *table;
1da177e4
LT
1976 struct fib6_node *fn;
1977 struct rt6_info *rt;
1978 int err = -ESRCH;
1979
5578689a 1980 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1981 if (!table)
c71099ac
TG
1982 return err;
1983
1984 read_lock_bh(&table->tb6_lock);
1da177e4 1985
c71099ac 1986 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1987 &cfg->fc_dst, cfg->fc_dst_len,
1988 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1989
1da177e4 1990 if (fn) {
d8d1f30b 1991 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
1992 if ((rt->rt6i_flags & RTF_CACHE) &&
1993 !(cfg->fc_flags & RTF_CACHE))
1994 continue;
86872cb5 1995 if (cfg->fc_ifindex &&
d1918542
DM
1996 (!rt->dst.dev ||
1997 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1998 continue;
86872cb5
TG
1999 if (cfg->fc_flags & RTF_GATEWAY &&
2000 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2001 continue;
86872cb5 2002 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2003 continue;
d8d1f30b 2004 dst_hold(&rt->dst);
c71099ac 2005 read_unlock_bh(&table->tb6_lock);
1da177e4 2006
86872cb5 2007 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2008 }
2009 }
c71099ac 2010 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2011
2012 return err;
2013}
2014
6700c270 2015static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2016{
e8599ff4 2017 struct net *net = dev_net(skb->dev);
a6279458 2018 struct netevent_redirect netevent;
e8599ff4 2019 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2020 struct ndisc_options ndopts;
2021 struct inet6_dev *in6_dev;
2022 struct neighbour *neigh;
71bcdba0 2023 struct rd_msg *msg;
6e157b6a
DM
2024 int optlen, on_link;
2025 u8 *lladdr;
e8599ff4 2026
29a3cad5 2027 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2028 optlen -= sizeof(*msg);
e8599ff4
DM
2029
2030 if (optlen < 0) {
6e157b6a 2031 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2032 return;
2033 }
2034
71bcdba0 2035 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2036
71bcdba0 2037 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2038 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2039 return;
2040 }
2041
6e157b6a 2042 on_link = 0;
71bcdba0 2043 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2044 on_link = 1;
71bcdba0 2045 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2046 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2047 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2048 return;
2049 }
2050
2051 in6_dev = __in6_dev_get(skb->dev);
2052 if (!in6_dev)
2053 return;
2054 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2055 return;
2056
2057 /* RFC2461 8.1:
2058 * The IP source address of the Redirect MUST be the same as the current
2059 * first-hop router for the specified ICMP Destination Address.
2060 */
2061
71bcdba0 2062 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2063 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2064 return;
2065 }
6e157b6a
DM
2066
2067 lladdr = NULL;
e8599ff4
DM
2068 if (ndopts.nd_opts_tgt_lladdr) {
2069 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2070 skb->dev);
2071 if (!lladdr) {
2072 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2073 return;
2074 }
2075 }
2076
6e157b6a
DM
2077 rt = (struct rt6_info *) dst;
2078 if (rt == net->ipv6.ip6_null_entry) {
2079 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2080 return;
6e157b6a 2081 }
e8599ff4 2082
6e157b6a
DM
2083 /* Redirect received -> path was valid.
2084 * Look, redirects are sent only in response to data packets,
2085 * so that this nexthop apparently is reachable. --ANK
2086 */
2087 dst_confirm(&rt->dst);
a6279458 2088
71bcdba0 2089 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2090 if (!neigh)
2091 return;
a6279458 2092
1da177e4
LT
2093 /*
2094 * We have finally decided to accept it.
2095 */
2096
1ab1457c 2097 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2098 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2099 NEIGH_UPDATE_F_OVERRIDE|
2100 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2101 NEIGH_UPDATE_F_ISROUTER))
2102 );
2103
83a09abd 2104 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2105 if (!nrt)
1da177e4
LT
2106 goto out;
2107
2108 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2109 if (on_link)
2110 nrt->rt6i_flags &= ~RTF_GATEWAY;
2111
4e3fd7a0 2112 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2113
40e22e8f 2114 if (ip6_ins_rt(nrt))
1da177e4
LT
2115 goto out;
2116
d8d1f30b
CG
2117 netevent.old = &rt->dst;
2118 netevent.new = &nrt->dst;
71bcdba0 2119 netevent.daddr = &msg->dest;
60592833 2120 netevent.neigh = neigh;
8d71740c
TT
2121 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2122
38308473 2123 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2124 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2125 ip6_del_rt(rt);
1da177e4
LT
2126 }
2127
2128out:
e8599ff4 2129 neigh_release(neigh);
6e157b6a
DM
2130}
2131
1da177e4
LT
2132/*
2133 * Misc support functions
2134 */
2135
4b32b5ad
MKL
2136static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2137{
2138 BUG_ON(from->dst.from);
2139
2140 rt->rt6i_flags &= ~RTF_EXPIRES;
2141 dst_hold(&from->dst);
2142 rt->dst.from = &from->dst;
2143 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2144}
2145
83a09abd
MKL
2146static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2147{
2148 rt->dst.input = ort->dst.input;
2149 rt->dst.output = ort->dst.output;
2150 rt->rt6i_dst = ort->rt6i_dst;
2151 rt->dst.error = ort->dst.error;
2152 rt->rt6i_idev = ort->rt6i_idev;
2153 if (rt->rt6i_idev)
2154 in6_dev_hold(rt->rt6i_idev);
2155 rt->dst.lastuse = jiffies;
2156 rt->rt6i_gateway = ort->rt6i_gateway;
2157 rt->rt6i_flags = ort->rt6i_flags;
2158 rt6_set_from(rt, ort);
2159 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2160#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2161 rt->rt6i_src = ort->rt6i_src;
1da177e4 2162#endif
83a09abd
MKL
2163 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2164 rt->rt6i_table = ort->rt6i_table;
5a6228a0 2165 rt->rt6i_lwtstate = lwtstate_get(ort->rt6i_lwtstate);
1da177e4
LT
2166}
2167
70ceb4f5 2168#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2169static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2170 const struct in6_addr *prefix, int prefixlen,
2171 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2172{
2173 struct fib6_node *fn;
2174 struct rt6_info *rt = NULL;
c71099ac
TG
2175 struct fib6_table *table;
2176
efa2cea0 2177 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2178 if (!table)
c71099ac 2179 return NULL;
70ceb4f5 2180
5744dd9b 2181 read_lock_bh(&table->tb6_lock);
67ba4152 2182 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2183 if (!fn)
2184 goto out;
2185
d8d1f30b 2186 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2187 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2188 continue;
2189 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2190 continue;
2191 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2192 continue;
d8d1f30b 2193 dst_hold(&rt->dst);
70ceb4f5
YH
2194 break;
2195 }
2196out:
5744dd9b 2197 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2198 return rt;
2199}
2200
efa2cea0 2201static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2202 const struct in6_addr *prefix, int prefixlen,
2203 const struct in6_addr *gwaddr, int ifindex,
95c96174 2204 unsigned int pref)
70ceb4f5 2205{
86872cb5
TG
2206 struct fib6_config cfg = {
2207 .fc_table = RT6_TABLE_INFO,
238fc7ea 2208 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2209 .fc_ifindex = ifindex,
2210 .fc_dst_len = prefixlen,
2211 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2212 RTF_UP | RTF_PREF(pref),
15e47304 2213 .fc_nlinfo.portid = 0,
efa2cea0
DL
2214 .fc_nlinfo.nlh = NULL,
2215 .fc_nlinfo.nl_net = net,
86872cb5
TG
2216 };
2217
4e3fd7a0
AD
2218 cfg.fc_dst = *prefix;
2219 cfg.fc_gateway = *gwaddr;
70ceb4f5 2220
e317da96
YH
2221 /* We should treat it as a default route if prefix length is 0. */
2222 if (!prefixlen)
86872cb5 2223 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2224
86872cb5 2225 ip6_route_add(&cfg);
70ceb4f5 2226
efa2cea0 2227 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2228}
2229#endif
2230
b71d1d42 2231struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2232{
1da177e4 2233 struct rt6_info *rt;
c71099ac 2234 struct fib6_table *table;
1da177e4 2235
c346dca1 2236 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2237 if (!table)
c71099ac 2238 return NULL;
1da177e4 2239
5744dd9b 2240 read_lock_bh(&table->tb6_lock);
67ba4152 2241 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2242 if (dev == rt->dst.dev &&
045927ff 2243 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2244 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2245 break;
2246 }
2247 if (rt)
d8d1f30b 2248 dst_hold(&rt->dst);
5744dd9b 2249 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2250 return rt;
2251}
2252
b71d1d42 2253struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2254 struct net_device *dev,
2255 unsigned int pref)
1da177e4 2256{
86872cb5
TG
2257 struct fib6_config cfg = {
2258 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2259 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2260 .fc_ifindex = dev->ifindex,
2261 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2262 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2263 .fc_nlinfo.portid = 0,
5578689a 2264 .fc_nlinfo.nlh = NULL,
c346dca1 2265 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2266 };
1da177e4 2267
4e3fd7a0 2268 cfg.fc_gateway = *gwaddr;
1da177e4 2269
86872cb5 2270 ip6_route_add(&cfg);
1da177e4 2271
1da177e4
LT
2272 return rt6_get_dflt_router(gwaddr, dev);
2273}
2274
7b4da532 2275void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2276{
2277 struct rt6_info *rt;
c71099ac
TG
2278 struct fib6_table *table;
2279
2280 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2281 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2282 if (!table)
c71099ac 2283 return;
1da177e4
LT
2284
2285restart:
c71099ac 2286 read_lock_bh(&table->tb6_lock);
d8d1f30b 2287 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2288 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2289 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2290 dst_hold(&rt->dst);
c71099ac 2291 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2292 ip6_del_rt(rt);
1da177e4
LT
2293 goto restart;
2294 }
2295 }
c71099ac 2296 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2297}
2298
5578689a
DL
2299static void rtmsg_to_fib6_config(struct net *net,
2300 struct in6_rtmsg *rtmsg,
86872cb5
TG
2301 struct fib6_config *cfg)
2302{
2303 memset(cfg, 0, sizeof(*cfg));
2304
2305 cfg->fc_table = RT6_TABLE_MAIN;
2306 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2307 cfg->fc_metric = rtmsg->rtmsg_metric;
2308 cfg->fc_expires = rtmsg->rtmsg_info;
2309 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2310 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2311 cfg->fc_flags = rtmsg->rtmsg_flags;
2312
5578689a 2313 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2314
4e3fd7a0
AD
2315 cfg->fc_dst = rtmsg->rtmsg_dst;
2316 cfg->fc_src = rtmsg->rtmsg_src;
2317 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2318}
2319
5578689a 2320int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2321{
86872cb5 2322 struct fib6_config cfg;
1da177e4
LT
2323 struct in6_rtmsg rtmsg;
2324 int err;
2325
67ba4152 2326 switch (cmd) {
1da177e4
LT
2327 case SIOCADDRT: /* Add a route */
2328 case SIOCDELRT: /* Delete a route */
af31f412 2329 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2330 return -EPERM;
2331 err = copy_from_user(&rtmsg, arg,
2332 sizeof(struct in6_rtmsg));
2333 if (err)
2334 return -EFAULT;
86872cb5 2335
5578689a 2336 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2337
1da177e4
LT
2338 rtnl_lock();
2339 switch (cmd) {
2340 case SIOCADDRT:
86872cb5 2341 err = ip6_route_add(&cfg);
1da177e4
LT
2342 break;
2343 case SIOCDELRT:
86872cb5 2344 err = ip6_route_del(&cfg);
1da177e4
LT
2345 break;
2346 default:
2347 err = -EINVAL;
2348 }
2349 rtnl_unlock();
2350
2351 return err;
3ff50b79 2352 }
1da177e4
LT
2353
2354 return -EINVAL;
2355}
2356
2357/*
2358 * Drop the packet on the floor
2359 */
2360
d5fdd6ba 2361static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2362{
612f09e8 2363 int type;
adf30907 2364 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2365 switch (ipstats_mib_noroutes) {
2366 case IPSTATS_MIB_INNOROUTES:
0660e03f 2367 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2368 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2369 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2370 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2371 break;
2372 }
2373 /* FALLTHROUGH */
2374 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2375 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2376 ipstats_mib_noroutes);
612f09e8
YH
2377 break;
2378 }
3ffe533c 2379 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2380 kfree_skb(skb);
2381 return 0;
2382}
2383
9ce8ade0
TG
2384static int ip6_pkt_discard(struct sk_buff *skb)
2385{
612f09e8 2386 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2387}
2388
aad88724 2389static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2390{
adf30907 2391 skb->dev = skb_dst(skb)->dev;
612f09e8 2392 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2393}
2394
9ce8ade0
TG
2395static int ip6_pkt_prohibit(struct sk_buff *skb)
2396{
612f09e8 2397 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2398}
2399
aad88724 2400static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2401{
adf30907 2402 skb->dev = skb_dst(skb)->dev;
612f09e8 2403 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2404}
2405
1da177e4
LT
2406/*
2407 * Allocate a dst for local (unicast / anycast) address.
2408 */
2409
2410struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2411 const struct in6_addr *addr,
8f031519 2412 bool anycast)
1da177e4 2413{
c346dca1 2414 struct net *net = dev_net(idev->dev);
a3300ef4
HFS
2415 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2416 DST_NOCOUNT, NULL);
2417 if (!rt)
1da177e4
LT
2418 return ERR_PTR(-ENOMEM);
2419
1da177e4
LT
2420 in6_dev_hold(idev);
2421
11d53b49 2422 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2423 rt->dst.input = ip6_input;
2424 rt->dst.output = ip6_output;
1da177e4 2425 rt->rt6i_idev = idev;
1da177e4
LT
2426
2427 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2428 if (anycast)
2429 rt->rt6i_flags |= RTF_ANYCAST;
2430 else
1da177e4 2431 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2432
550bab42 2433 rt->rt6i_gateway = *addr;
4e3fd7a0 2434 rt->rt6i_dst.addr = *addr;
1da177e4 2435 rt->rt6i_dst.plen = 128;
5578689a 2436 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2437
d8d1f30b 2438 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2439
2440 return rt;
2441}
2442
c3968a85
DW
2443int ip6_route_get_saddr(struct net *net,
2444 struct rt6_info *rt,
b71d1d42 2445 const struct in6_addr *daddr,
c3968a85
DW
2446 unsigned int prefs,
2447 struct in6_addr *saddr)
2448{
e16e888b
MS
2449 struct inet6_dev *idev =
2450 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2451 int err = 0;
e16e888b 2452 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2453 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2454 else
2455 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2456 daddr, prefs, saddr);
2457 return err;
2458}
2459
2460/* remove deleted ip from prefsrc entries */
2461struct arg_dev_net_ip {
2462 struct net_device *dev;
2463 struct net *net;
2464 struct in6_addr *addr;
2465};
2466
2467static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2468{
2469 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2470 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2471 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2472
d1918542 2473 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2474 rt != net->ipv6.ip6_null_entry &&
2475 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2476 /* remove prefsrc entry */
2477 rt->rt6i_prefsrc.plen = 0;
2478 }
2479 return 0;
2480}
2481
2482void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2483{
2484 struct net *net = dev_net(ifp->idev->dev);
2485 struct arg_dev_net_ip adni = {
2486 .dev = ifp->idev->dev,
2487 .net = net,
2488 .addr = &ifp->addr,
2489 };
0c3584d5 2490 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2491}
2492
be7a010d
DJ
2493#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2494#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2495
2496/* Remove routers and update dst entries when gateway turn into host. */
2497static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2498{
2499 struct in6_addr *gateway = (struct in6_addr *)arg;
2500
2501 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2502 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2503 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2504 return -1;
2505 }
2506 return 0;
2507}
2508
2509void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2510{
2511 fib6_clean_all(net, fib6_clean_tohost, gateway);
2512}
2513
8ed67789
DL
2514struct arg_dev_net {
2515 struct net_device *dev;
2516 struct net *net;
2517};
2518
1da177e4
LT
2519static int fib6_ifdown(struct rt6_info *rt, void *arg)
2520{
bc3ef660 2521 const struct arg_dev_net *adn = arg;
2522 const struct net_device *dev = adn->dev;
8ed67789 2523
d1918542 2524 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2525 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2526 return -1;
c159d30c 2527
1da177e4
LT
2528 return 0;
2529}
2530
f3db4851 2531void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2532{
8ed67789
DL
2533 struct arg_dev_net adn = {
2534 .dev = dev,
2535 .net = net,
2536 };
2537
0c3584d5 2538 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2539 icmp6_clean_all(fib6_ifdown, &adn);
8d0b94af 2540 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2541}
2542
95c96174 2543struct rt6_mtu_change_arg {
1da177e4 2544 struct net_device *dev;
95c96174 2545 unsigned int mtu;
1da177e4
LT
2546};
2547
2548static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2549{
2550 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2551 struct inet6_dev *idev;
2552
2553 /* In IPv6 pmtu discovery is not optional,
2554 so that RTAX_MTU lock cannot disable it.
2555 We still use this lock to block changes
2556 caused by addrconf/ndisc.
2557 */
2558
2559 idev = __in6_dev_get(arg->dev);
38308473 2560 if (!idev)
1da177e4
LT
2561 return 0;
2562
2563 /* For administrative MTU increase, there is no way to discover
2564 IPv6 PMTU increase, so PMTU increase should be updated here.
2565 Since RFC 1981 doesn't include administrative MTU increase
2566 update PMTU increase is a MUST. (i.e. jumbo frame)
2567 */
2568 /*
2569 If new MTU is less than route PMTU, this new MTU will be the
2570 lowest MTU in the path, update the route PMTU to reflect PMTU
2571 decreases; if new MTU is greater than route PMTU, and the
2572 old MTU is the lowest MTU in the path, update the route PMTU
2573 to reflect the increase. In this case if the other nodes' MTU
2574 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2575 PMTU discouvery.
2576 */
d1918542 2577 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2578 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2579 if (rt->rt6i_flags & RTF_CACHE) {
2580 /* For RTF_CACHE with rt6i_pmtu == 0
2581 * (i.e. a redirected route),
2582 * the metrics of its rt->dst.from has already
2583 * been updated.
2584 */
2585 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2586 rt->rt6i_pmtu = arg->mtu;
2587 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2588 (dst_mtu(&rt->dst) < arg->mtu &&
2589 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2590 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2591 }
566cfd8f 2592 }
1da177e4
LT
2593 return 0;
2594}
2595
95c96174 2596void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2597{
c71099ac
TG
2598 struct rt6_mtu_change_arg arg = {
2599 .dev = dev,
2600 .mtu = mtu,
2601 };
1da177e4 2602
0c3584d5 2603 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2604}
2605
ef7c79ed 2606static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2607 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2608 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2609 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2610 [RTA_PRIORITY] = { .type = NLA_U32 },
2611 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2612 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2613 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2614 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2615 [RTA_ENCAP] = { .type = NLA_NESTED },
86872cb5
TG
2616};
2617
2618static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2619 struct fib6_config *cfg)
1da177e4 2620{
86872cb5
TG
2621 struct rtmsg *rtm;
2622 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2623 unsigned int pref;
86872cb5 2624 int err;
1da177e4 2625
86872cb5
TG
2626 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2627 if (err < 0)
2628 goto errout;
1da177e4 2629
86872cb5
TG
2630 err = -EINVAL;
2631 rtm = nlmsg_data(nlh);
2632 memset(cfg, 0, sizeof(*cfg));
2633
2634 cfg->fc_table = rtm->rtm_table;
2635 cfg->fc_dst_len = rtm->rtm_dst_len;
2636 cfg->fc_src_len = rtm->rtm_src_len;
2637 cfg->fc_flags = RTF_UP;
2638 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2639 cfg->fc_type = rtm->rtm_type;
86872cb5 2640
ef2c7d7b
ND
2641 if (rtm->rtm_type == RTN_UNREACHABLE ||
2642 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2643 rtm->rtm_type == RTN_PROHIBIT ||
2644 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2645 cfg->fc_flags |= RTF_REJECT;
2646
ab79ad14
2647 if (rtm->rtm_type == RTN_LOCAL)
2648 cfg->fc_flags |= RTF_LOCAL;
2649
1f56a01f
MKL
2650 if (rtm->rtm_flags & RTM_F_CLONED)
2651 cfg->fc_flags |= RTF_CACHE;
2652
15e47304 2653 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2654 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2655 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2656
2657 if (tb[RTA_GATEWAY]) {
67b61f6c 2658 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2659 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2660 }
86872cb5
TG
2661
2662 if (tb[RTA_DST]) {
2663 int plen = (rtm->rtm_dst_len + 7) >> 3;
2664
2665 if (nla_len(tb[RTA_DST]) < plen)
2666 goto errout;
2667
2668 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2669 }
86872cb5
TG
2670
2671 if (tb[RTA_SRC]) {
2672 int plen = (rtm->rtm_src_len + 7) >> 3;
2673
2674 if (nla_len(tb[RTA_SRC]) < plen)
2675 goto errout;
2676
2677 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2678 }
86872cb5 2679
c3968a85 2680 if (tb[RTA_PREFSRC])
67b61f6c 2681 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2682
86872cb5
TG
2683 if (tb[RTA_OIF])
2684 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2685
2686 if (tb[RTA_PRIORITY])
2687 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2688
2689 if (tb[RTA_METRICS]) {
2690 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2691 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2692 }
86872cb5
TG
2693
2694 if (tb[RTA_TABLE])
2695 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2696
51ebd318
ND
2697 if (tb[RTA_MULTIPATH]) {
2698 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2699 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2700 }
2701
c78ba6d6
LR
2702 if (tb[RTA_PREF]) {
2703 pref = nla_get_u8(tb[RTA_PREF]);
2704 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2705 pref != ICMPV6_ROUTER_PREF_HIGH)
2706 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2707 cfg->fc_flags |= RTF_PREF(pref);
2708 }
2709
19e42e45
RP
2710 if (tb[RTA_ENCAP])
2711 cfg->fc_encap = tb[RTA_ENCAP];
2712
2713 if (tb[RTA_ENCAP_TYPE])
2714 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2715
86872cb5
TG
2716 err = 0;
2717errout:
2718 return err;
1da177e4
LT
2719}
2720
51ebd318
ND
2721static int ip6_route_multipath(struct fib6_config *cfg, int add)
2722{
2723 struct fib6_config r_cfg;
2724 struct rtnexthop *rtnh;
2725 int remaining;
2726 int attrlen;
2727 int err = 0, last_err = 0;
2728
35f1b4e9 2729 remaining = cfg->fc_mp_len;
51ebd318
ND
2730beginning:
2731 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318
ND
2732
2733 /* Parse a Multipath Entry */
2734 while (rtnh_ok(rtnh, remaining)) {
2735 memcpy(&r_cfg, cfg, sizeof(*cfg));
2736 if (rtnh->rtnh_ifindex)
2737 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2738
2739 attrlen = rtnh_attrlen(rtnh);
2740 if (attrlen > 0) {
2741 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2742
2743 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2744 if (nla) {
67b61f6c 2745 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2746 r_cfg.fc_flags |= RTF_GATEWAY;
2747 }
19e42e45
RP
2748 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2749 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2750 if (nla)
2751 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318
ND
2752 }
2753 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2754 if (err) {
2755 last_err = err;
2756 /* If we are trying to remove a route, do not stop the
2757 * loop when ip6_route_del() fails (because next hop is
2758 * already gone), we should try to remove all next hops.
2759 */
2760 if (add) {
2761 /* If add fails, we should try to delete all
2762 * next hops that have been already added.
2763 */
2764 add = 0;
35f1b4e9 2765 remaining = cfg->fc_mp_len - remaining;
51ebd318
ND
2766 goto beginning;
2767 }
2768 }
1a72418b 2769 /* Because each route is added like a single route we remove
27596472
MK
2770 * these flags after the first nexthop: if there is a collision,
2771 * we have already failed to add the first nexthop:
2772 * fib6_add_rt2node() has rejected it; when replacing, old
2773 * nexthops have been replaced by first new, the rest should
2774 * be added to it.
1a72418b 2775 */
27596472
MK
2776 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2777 NLM_F_REPLACE);
51ebd318
ND
2778 rtnh = rtnh_next(rtnh, &remaining);
2779 }
2780
2781 return last_err;
2782}
2783
67ba4152 2784static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2785{
86872cb5
TG
2786 struct fib6_config cfg;
2787 int err;
1da177e4 2788
86872cb5
TG
2789 err = rtm_to_fib6_config(skb, nlh, &cfg);
2790 if (err < 0)
2791 return err;
2792
51ebd318
ND
2793 if (cfg.fc_mp)
2794 return ip6_route_multipath(&cfg, 0);
2795 else
2796 return ip6_route_del(&cfg);
1da177e4
LT
2797}
2798
67ba4152 2799static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2800{
86872cb5
TG
2801 struct fib6_config cfg;
2802 int err;
1da177e4 2803
86872cb5
TG
2804 err = rtm_to_fib6_config(skb, nlh, &cfg);
2805 if (err < 0)
2806 return err;
2807
51ebd318
ND
2808 if (cfg.fc_mp)
2809 return ip6_route_multipath(&cfg, 1);
2810 else
2811 return ip6_route_add(&cfg);
1da177e4
LT
2812}
2813
19e42e45 2814static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
2815{
2816 return NLMSG_ALIGN(sizeof(struct rtmsg))
2817 + nla_total_size(16) /* RTA_SRC */
2818 + nla_total_size(16) /* RTA_DST */
2819 + nla_total_size(16) /* RTA_GATEWAY */
2820 + nla_total_size(16) /* RTA_PREFSRC */
2821 + nla_total_size(4) /* RTA_TABLE */
2822 + nla_total_size(4) /* RTA_IIF */
2823 + nla_total_size(4) /* RTA_OIF */
2824 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2825 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 2826 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 2827 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45
RP
2828 + nla_total_size(1) /* RTA_PREF */
2829 + lwtunnel_get_encap_size(rt->rt6i_lwtstate);
339bf98f
TG
2830}
2831
191cd582
BH
2832static int rt6_fill_node(struct net *net,
2833 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2834 struct in6_addr *dst, struct in6_addr *src,
15e47304 2835 int iif, int type, u32 portid, u32 seq,
7bc570c8 2836 int prefix, int nowait, unsigned int flags)
1da177e4 2837{
4b32b5ad 2838 u32 metrics[RTAX_MAX];
1da177e4 2839 struct rtmsg *rtm;
2d7202bf 2840 struct nlmsghdr *nlh;
e3703b3d 2841 long expires;
9e762a4a 2842 u32 table;
1da177e4
LT
2843
2844 if (prefix) { /* user wants prefix routes only */
2845 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2846 /* success since this is not a prefix route */
2847 return 1;
2848 }
2849 }
2850
15e47304 2851 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2852 if (!nlh)
26932566 2853 return -EMSGSIZE;
2d7202bf
TG
2854
2855 rtm = nlmsg_data(nlh);
1da177e4
LT
2856 rtm->rtm_family = AF_INET6;
2857 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2858 rtm->rtm_src_len = rt->rt6i_src.plen;
2859 rtm->rtm_tos = 0;
c71099ac 2860 if (rt->rt6i_table)
9e762a4a 2861 table = rt->rt6i_table->tb6_id;
c71099ac 2862 else
9e762a4a
PM
2863 table = RT6_TABLE_UNSPEC;
2864 rtm->rtm_table = table;
c78679e8
DM
2865 if (nla_put_u32(skb, RTA_TABLE, table))
2866 goto nla_put_failure;
ef2c7d7b
ND
2867 if (rt->rt6i_flags & RTF_REJECT) {
2868 switch (rt->dst.error) {
2869 case -EINVAL:
2870 rtm->rtm_type = RTN_BLACKHOLE;
2871 break;
2872 case -EACCES:
2873 rtm->rtm_type = RTN_PROHIBIT;
2874 break;
b4949ab2
ND
2875 case -EAGAIN:
2876 rtm->rtm_type = RTN_THROW;
2877 break;
ef2c7d7b
ND
2878 default:
2879 rtm->rtm_type = RTN_UNREACHABLE;
2880 break;
2881 }
2882 }
38308473 2883 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2884 rtm->rtm_type = RTN_LOCAL;
d1918542 2885 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2886 rtm->rtm_type = RTN_LOCAL;
2887 else
2888 rtm->rtm_type = RTN_UNICAST;
2889 rtm->rtm_flags = 0;
cea45e20
AG
2890 if (!netif_carrier_ok(rt->dst.dev))
2891 rtm->rtm_flags |= RTNH_F_LINKDOWN;
1da177e4
LT
2892 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2893 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2894 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2895 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2896 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2897 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2898 rtm->rtm_protocol = RTPROT_RA;
2899 else
2900 rtm->rtm_protocol = RTPROT_KERNEL;
2901 }
1da177e4 2902
38308473 2903 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2904 rtm->rtm_flags |= RTM_F_CLONED;
2905
2906 if (dst) {
930345ea 2907 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 2908 goto nla_put_failure;
1ab1457c 2909 rtm->rtm_dst_len = 128;
1da177e4 2910 } else if (rtm->rtm_dst_len)
930345ea 2911 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 2912 goto nla_put_failure;
1da177e4
LT
2913#ifdef CONFIG_IPV6_SUBTREES
2914 if (src) {
930345ea 2915 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 2916 goto nla_put_failure;
1ab1457c 2917 rtm->rtm_src_len = 128;
c78679e8 2918 } else if (rtm->rtm_src_len &&
930345ea 2919 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 2920 goto nla_put_failure;
1da177e4 2921#endif
7bc570c8
YH
2922 if (iif) {
2923#ifdef CONFIG_IPV6_MROUTE
2924 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2925 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2926 if (err <= 0) {
2927 if (!nowait) {
2928 if (err == 0)
2929 return 0;
2930 goto nla_put_failure;
2931 } else {
2932 if (err == -EMSGSIZE)
2933 goto nla_put_failure;
2934 }
2935 }
2936 } else
2937#endif
c78679e8
DM
2938 if (nla_put_u32(skb, RTA_IIF, iif))
2939 goto nla_put_failure;
7bc570c8 2940 } else if (dst) {
1da177e4 2941 struct in6_addr saddr_buf;
c78679e8 2942 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 2943 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2944 goto nla_put_failure;
1da177e4 2945 }
2d7202bf 2946
c3968a85
DW
2947 if (rt->rt6i_prefsrc.plen) {
2948 struct in6_addr saddr_buf;
4e3fd7a0 2949 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 2950 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2951 goto nla_put_failure;
c3968a85
DW
2952 }
2953
4b32b5ad
MKL
2954 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2955 if (rt->rt6i_pmtu)
2956 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2957 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
2958 goto nla_put_failure;
2959
dd0cbf29 2960 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 2961 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 2962 goto nla_put_failure;
94f826b8 2963 }
2d7202bf 2964
c78679e8
DM
2965 if (rt->dst.dev &&
2966 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2967 goto nla_put_failure;
2968 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2969 goto nla_put_failure;
8253947e
LW
2970
2971 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2972
87a50699 2973 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2974 goto nla_put_failure;
2d7202bf 2975
c78ba6d6
LR
2976 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2977 goto nla_put_failure;
2978
19e42e45
RP
2979 lwtunnel_fill_encap(skb, rt->rt6i_lwtstate);
2980
053c095a
JB
2981 nlmsg_end(skb, nlh);
2982 return 0;
2d7202bf
TG
2983
2984nla_put_failure:
26932566
PM
2985 nlmsg_cancel(skb, nlh);
2986 return -EMSGSIZE;
1da177e4
LT
2987}
2988
1b43af54 2989int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2990{
2991 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2992 int prefix;
2993
2d7202bf
TG
2994 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2995 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2996 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2997 } else
2998 prefix = 0;
2999
191cd582
BH
3000 return rt6_fill_node(arg->net,
3001 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3002 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3003 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3004}
3005
67ba4152 3006static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3007{
3b1e0a65 3008 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3009 struct nlattr *tb[RTA_MAX+1];
3010 struct rt6_info *rt;
1da177e4 3011 struct sk_buff *skb;
ab364a6f 3012 struct rtmsg *rtm;
4c9483b2 3013 struct flowi6 fl6;
72331bc0 3014 int err, iif = 0, oif = 0;
1da177e4 3015
ab364a6f
TG
3016 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3017 if (err < 0)
3018 goto errout;
1da177e4 3019
ab364a6f 3020 err = -EINVAL;
4c9483b2 3021 memset(&fl6, 0, sizeof(fl6));
1da177e4 3022
ab364a6f
TG
3023 if (tb[RTA_SRC]) {
3024 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3025 goto errout;
3026
4e3fd7a0 3027 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3028 }
3029
3030 if (tb[RTA_DST]) {
3031 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3032 goto errout;
3033
4e3fd7a0 3034 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3035 }
3036
3037 if (tb[RTA_IIF])
3038 iif = nla_get_u32(tb[RTA_IIF]);
3039
3040 if (tb[RTA_OIF])
72331bc0 3041 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3042
2e47b291
LC
3043 if (tb[RTA_MARK])
3044 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3045
1da177e4
LT
3046 if (iif) {
3047 struct net_device *dev;
72331bc0
SL
3048 int flags = 0;
3049
5578689a 3050 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3051 if (!dev) {
3052 err = -ENODEV;
ab364a6f 3053 goto errout;
1da177e4 3054 }
72331bc0
SL
3055
3056 fl6.flowi6_iif = iif;
3057
3058 if (!ipv6_addr_any(&fl6.saddr))
3059 flags |= RT6_LOOKUP_F_HAS_SADDR;
3060
3061 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3062 flags);
3063 } else {
3064 fl6.flowi6_oif = oif;
3065
3066 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3067 }
3068
ab364a6f 3069 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3070 if (!skb) {
94e187c0 3071 ip6_rt_put(rt);
ab364a6f
TG
3072 err = -ENOBUFS;
3073 goto errout;
3074 }
1da177e4 3075
ab364a6f
TG
3076 /* Reserve room for dummy headers, this skb can pass
3077 through good chunk of routing engine.
3078 */
459a98ed 3079 skb_reset_mac_header(skb);
ab364a6f 3080 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3081
d8d1f30b 3082 skb_dst_set(skb, &rt->dst);
1da177e4 3083
4c9483b2 3084 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3085 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3086 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3087 if (err < 0) {
ab364a6f
TG
3088 kfree_skb(skb);
3089 goto errout;
1da177e4
LT
3090 }
3091
15e47304 3092 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3093errout:
1da177e4 3094 return err;
1da177e4
LT
3095}
3096
86872cb5 3097void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
3098{
3099 struct sk_buff *skb;
5578689a 3100 struct net *net = info->nl_net;
528c4ceb
DL
3101 u32 seq;
3102 int err;
3103
3104 err = -ENOBUFS;
38308473 3105 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3106
19e42e45 3107 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3108 if (!skb)
21713ebc
TG
3109 goto errout;
3110
191cd582 3111 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 3112 event, info->portid, seq, 0, 0, 0);
26932566
PM
3113 if (err < 0) {
3114 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3115 WARN_ON(err == -EMSGSIZE);
3116 kfree_skb(skb);
3117 goto errout;
3118 }
15e47304 3119 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3120 info->nlh, gfp_any());
3121 return;
21713ebc
TG
3122errout:
3123 if (err < 0)
5578689a 3124 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3125}
3126
8ed67789 3127static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3128 unsigned long event, void *ptr)
8ed67789 3129{
351638e7 3130 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3131 struct net *net = dev_net(dev);
8ed67789
DL
3132
3133 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3134 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3135 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3136#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3137 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3138 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3139 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3140 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3141#endif
3142 }
3143
3144 return NOTIFY_OK;
3145}
3146
1da177e4
LT
3147/*
3148 * /proc
3149 */
3150
3151#ifdef CONFIG_PROC_FS
3152
33120b30
AD
3153static const struct file_operations ipv6_route_proc_fops = {
3154 .owner = THIS_MODULE,
3155 .open = ipv6_route_open,
3156 .read = seq_read,
3157 .llseek = seq_lseek,
8d2ca1d7 3158 .release = seq_release_net,
33120b30
AD
3159};
3160
1da177e4
LT
3161static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3162{
69ddb805 3163 struct net *net = (struct net *)seq->private;
1da177e4 3164 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3165 net->ipv6.rt6_stats->fib_nodes,
3166 net->ipv6.rt6_stats->fib_route_nodes,
3167 net->ipv6.rt6_stats->fib_rt_alloc,
3168 net->ipv6.rt6_stats->fib_rt_entries,
3169 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3170 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3171 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3172
3173 return 0;
3174}
3175
3176static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3177{
de05c557 3178 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3179}
3180
9a32144e 3181static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3182 .owner = THIS_MODULE,
3183 .open = rt6_stats_seq_open,
3184 .read = seq_read,
3185 .llseek = seq_lseek,
b6fcbdb4 3186 .release = single_release_net,
1da177e4
LT
3187};
3188#endif /* CONFIG_PROC_FS */
3189
3190#ifdef CONFIG_SYSCTL
3191
1da177e4 3192static
fe2c6338 3193int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3194 void __user *buffer, size_t *lenp, loff_t *ppos)
3195{
c486da34
LAG
3196 struct net *net;
3197 int delay;
3198 if (!write)
1da177e4 3199 return -EINVAL;
c486da34
LAG
3200
3201 net = (struct net *)ctl->extra1;
3202 delay = net->ipv6.sysctl.flush_delay;
3203 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3204 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3205 return 0;
1da177e4
LT
3206}
3207
fe2c6338 3208struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3209 {
1da177e4 3210 .procname = "flush",
4990509f 3211 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3212 .maxlen = sizeof(int),
89c8b3a1 3213 .mode = 0200,
6d9f239a 3214 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3215 },
3216 {
1da177e4 3217 .procname = "gc_thresh",
9a7ec3a9 3218 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3219 .maxlen = sizeof(int),
3220 .mode = 0644,
6d9f239a 3221 .proc_handler = proc_dointvec,
1da177e4
LT
3222 },
3223 {
1da177e4 3224 .procname = "max_size",
4990509f 3225 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3226 .maxlen = sizeof(int),
3227 .mode = 0644,
6d9f239a 3228 .proc_handler = proc_dointvec,
1da177e4
LT
3229 },
3230 {
1da177e4 3231 .procname = "gc_min_interval",
4990509f 3232 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3233 .maxlen = sizeof(int),
3234 .mode = 0644,
6d9f239a 3235 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3236 },
3237 {
1da177e4 3238 .procname = "gc_timeout",
4990509f 3239 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3240 .maxlen = sizeof(int),
3241 .mode = 0644,
6d9f239a 3242 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3243 },
3244 {
1da177e4 3245 .procname = "gc_interval",
4990509f 3246 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3247 .maxlen = sizeof(int),
3248 .mode = 0644,
6d9f239a 3249 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3250 },
3251 {
1da177e4 3252 .procname = "gc_elasticity",
4990509f 3253 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3254 .maxlen = sizeof(int),
3255 .mode = 0644,
f3d3f616 3256 .proc_handler = proc_dointvec,
1da177e4
LT
3257 },
3258 {
1da177e4 3259 .procname = "mtu_expires",
4990509f 3260 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3261 .maxlen = sizeof(int),
3262 .mode = 0644,
6d9f239a 3263 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3264 },
3265 {
1da177e4 3266 .procname = "min_adv_mss",
4990509f 3267 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3268 .maxlen = sizeof(int),
3269 .mode = 0644,
f3d3f616 3270 .proc_handler = proc_dointvec,
1da177e4
LT
3271 },
3272 {
1da177e4 3273 .procname = "gc_min_interval_ms",
4990509f 3274 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3275 .maxlen = sizeof(int),
3276 .mode = 0644,
6d9f239a 3277 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3278 },
f8572d8f 3279 { }
1da177e4
LT
3280};
3281
2c8c1e72 3282struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3283{
3284 struct ctl_table *table;
3285
3286 table = kmemdup(ipv6_route_table_template,
3287 sizeof(ipv6_route_table_template),
3288 GFP_KERNEL);
5ee09105
YH
3289
3290 if (table) {
3291 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3292 table[0].extra1 = net;
86393e52 3293 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3294 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3295 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3296 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3297 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3298 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3299 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3300 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3301 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3302
3303 /* Don't export sysctls to unprivileged users */
3304 if (net->user_ns != &init_user_ns)
3305 table[0].procname = NULL;
5ee09105
YH
3306 }
3307
760f2d01
DL
3308 return table;
3309}
1da177e4
LT
3310#endif
3311
2c8c1e72 3312static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3313{
633d424b 3314 int ret = -ENOMEM;
8ed67789 3315
86393e52
AD
3316 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3317 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3318
fc66f95c
ED
3319 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3320 goto out_ip6_dst_ops;
3321
8ed67789
DL
3322 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3323 sizeof(*net->ipv6.ip6_null_entry),
3324 GFP_KERNEL);
3325 if (!net->ipv6.ip6_null_entry)
fc66f95c 3326 goto out_ip6_dst_entries;
d8d1f30b 3327 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3328 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3329 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3330 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3331 ip6_template_metrics, true);
8ed67789
DL
3332
3333#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3334 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3335 sizeof(*net->ipv6.ip6_prohibit_entry),
3336 GFP_KERNEL);
68fffc67
PZ
3337 if (!net->ipv6.ip6_prohibit_entry)
3338 goto out_ip6_null_entry;
d8d1f30b 3339 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3340 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3341 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3342 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3343 ip6_template_metrics, true);
8ed67789
DL
3344
3345 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3346 sizeof(*net->ipv6.ip6_blk_hole_entry),
3347 GFP_KERNEL);
68fffc67
PZ
3348 if (!net->ipv6.ip6_blk_hole_entry)
3349 goto out_ip6_prohibit_entry;
d8d1f30b 3350 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3351 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3352 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3353 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3354 ip6_template_metrics, true);
8ed67789
DL
3355#endif
3356
b339a47c
PZ
3357 net->ipv6.sysctl.flush_delay = 0;
3358 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3359 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3360 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3361 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3362 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3363 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3364 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3365
6891a346
BT
3366 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3367
8ed67789
DL
3368 ret = 0;
3369out:
3370 return ret;
f2fc6a54 3371
68fffc67
PZ
3372#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3373out_ip6_prohibit_entry:
3374 kfree(net->ipv6.ip6_prohibit_entry);
3375out_ip6_null_entry:
3376 kfree(net->ipv6.ip6_null_entry);
3377#endif
fc66f95c
ED
3378out_ip6_dst_entries:
3379 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3380out_ip6_dst_ops:
f2fc6a54 3381 goto out;
cdb18761
DL
3382}
3383
2c8c1e72 3384static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3385{
8ed67789
DL
3386 kfree(net->ipv6.ip6_null_entry);
3387#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3388 kfree(net->ipv6.ip6_prohibit_entry);
3389 kfree(net->ipv6.ip6_blk_hole_entry);
3390#endif
41bb78b4 3391 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3392}
3393
d189634e
TG
3394static int __net_init ip6_route_net_init_late(struct net *net)
3395{
3396#ifdef CONFIG_PROC_FS
d4beaa66
G
3397 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3398 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3399#endif
3400 return 0;
3401}
3402
3403static void __net_exit ip6_route_net_exit_late(struct net *net)
3404{
3405#ifdef CONFIG_PROC_FS
ece31ffd
G
3406 remove_proc_entry("ipv6_route", net->proc_net);
3407 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3408#endif
3409}
3410
cdb18761
DL
3411static struct pernet_operations ip6_route_net_ops = {
3412 .init = ip6_route_net_init,
3413 .exit = ip6_route_net_exit,
3414};
3415
c3426b47
DM
3416static int __net_init ipv6_inetpeer_init(struct net *net)
3417{
3418 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3419
3420 if (!bp)
3421 return -ENOMEM;
3422 inet_peer_base_init(bp);
3423 net->ipv6.peers = bp;
3424 return 0;
3425}
3426
3427static void __net_exit ipv6_inetpeer_exit(struct net *net)
3428{
3429 struct inet_peer_base *bp = net->ipv6.peers;
3430
3431 net->ipv6.peers = NULL;
56a6b248 3432 inetpeer_invalidate_tree(bp);
c3426b47
DM
3433 kfree(bp);
3434}
3435
2b823f72 3436static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3437 .init = ipv6_inetpeer_init,
3438 .exit = ipv6_inetpeer_exit,
3439};
3440
d189634e
TG
3441static struct pernet_operations ip6_route_net_late_ops = {
3442 .init = ip6_route_net_init_late,
3443 .exit = ip6_route_net_exit_late,
3444};
3445
8ed67789
DL
3446static struct notifier_block ip6_route_dev_notifier = {
3447 .notifier_call = ip6_route_dev_notify,
3448 .priority = 0,
3449};
3450
433d49c3 3451int __init ip6_route_init(void)
1da177e4 3452{
433d49c3 3453 int ret;
8d0b94af 3454 int cpu;
433d49c3 3455
9a7ec3a9
DL
3456 ret = -ENOMEM;
3457 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3458 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3459 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3460 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3461 goto out;
14e50e57 3462
fc66f95c 3463 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3464 if (ret)
bdb3289f 3465 goto out_kmem_cache;
bdb3289f 3466
c3426b47
DM
3467 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3468 if (ret)
e8803b6c 3469 goto out_dst_entries;
2a0c451a 3470
7e52b33b
DM
3471 ret = register_pernet_subsys(&ip6_route_net_ops);
3472 if (ret)
3473 goto out_register_inetpeer;
c3426b47 3474
5dc121e9
AE
3475 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3476
8ed67789
DL
3477 /* Registering of the loopback is done before this portion of code,
3478 * the loopback reference in rt6_info will not be taken, do it
3479 * manually for init_net */
d8d1f30b 3480 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3481 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3482 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3483 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3484 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3485 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3486 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3487 #endif
e8803b6c 3488 ret = fib6_init();
433d49c3 3489 if (ret)
8ed67789 3490 goto out_register_subsys;
433d49c3 3491
433d49c3
DL
3492 ret = xfrm6_init();
3493 if (ret)
e8803b6c 3494 goto out_fib6_init;
c35b7e72 3495
433d49c3
DL
3496 ret = fib6_rules_init();
3497 if (ret)
3498 goto xfrm6_init;
7e5449c2 3499
d189634e
TG
3500 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3501 if (ret)
3502 goto fib6_rules_init;
3503
433d49c3 3504 ret = -ENOBUFS;
c7ac8679
GR
3505 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3506 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3507 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3508 goto out_register_late_subsys;
c127ea2c 3509
8ed67789 3510 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3511 if (ret)
d189634e 3512 goto out_register_late_subsys;
8ed67789 3513
8d0b94af
MKL
3514 for_each_possible_cpu(cpu) {
3515 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3516
3517 INIT_LIST_HEAD(&ul->head);
3518 spin_lock_init(&ul->lock);
3519 }
3520
433d49c3
DL
3521out:
3522 return ret;
3523
d189634e
TG
3524out_register_late_subsys:
3525 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3526fib6_rules_init:
433d49c3
DL
3527 fib6_rules_cleanup();
3528xfrm6_init:
433d49c3 3529 xfrm6_fini();
2a0c451a
TG
3530out_fib6_init:
3531 fib6_gc_cleanup();
8ed67789
DL
3532out_register_subsys:
3533 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3534out_register_inetpeer:
3535 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3536out_dst_entries:
3537 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3538out_kmem_cache:
f2fc6a54 3539 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3540 goto out;
1da177e4
LT
3541}
3542
3543void ip6_route_cleanup(void)
3544{
8ed67789 3545 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3546 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3547 fib6_rules_cleanup();
1da177e4 3548 xfrm6_fini();
1da177e4 3549 fib6_gc_cleanup();
c3426b47 3550 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3551 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3552 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3553 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3554}