ipv6: Fold rt6_info_hash_nhsfn() into its only caller
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4 66
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
101static size_t rt6_nlmsg_size(struct rt6_info *rt);
102static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
1da177e4 107
70ceb4f5 108#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 109static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 110 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
111 const struct in6_addr *gwaddr,
112 struct net_device *dev,
95c96174 113 unsigned int pref);
efa2cea0 114static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 115 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
116 const struct in6_addr *gwaddr,
117 struct net_device *dev);
70ceb4f5
YH
118#endif
119
8d0b94af
MKL
120struct uncached_list {
121 spinlock_t lock;
122 struct list_head head;
123};
124
125static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126
127static void rt6_uncached_list_add(struct rt6_info *rt)
128{
129 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130
8d0b94af
MKL
131 rt->rt6i_uncached_list = ul;
132
133 spin_lock_bh(&ul->lock);
134 list_add_tail(&rt->rt6i_uncached, &ul->head);
135 spin_unlock_bh(&ul->lock);
136}
137
138static void rt6_uncached_list_del(struct rt6_info *rt)
139{
140 if (!list_empty(&rt->rt6i_uncached)) {
141 struct uncached_list *ul = rt->rt6i_uncached_list;
142
143 spin_lock_bh(&ul->lock);
144 list_del(&rt->rt6i_uncached);
145 spin_unlock_bh(&ul->lock);
146 }
147}
148
149static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
150{
151 struct net_device *loopback_dev = net->loopback_dev;
152 int cpu;
153
e332bc67
EB
154 if (dev == loopback_dev)
155 return;
156
8d0b94af
MKL
157 for_each_possible_cpu(cpu) {
158 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
159 struct rt6_info *rt;
160
161 spin_lock_bh(&ul->lock);
162 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
163 struct inet6_dev *rt_idev = rt->rt6i_idev;
164 struct net_device *rt_dev = rt->dst.dev;
165
e332bc67 166 if (rt_idev->dev == dev) {
8d0b94af
MKL
167 rt->rt6i_idev = in6_dev_get(loopback_dev);
168 in6_dev_put(rt_idev);
169 }
170
e332bc67 171 if (rt_dev == dev) {
8d0b94af
MKL
172 rt->dst.dev = loopback_dev;
173 dev_hold(rt->dst.dev);
174 dev_put(rt_dev);
175 }
176 }
177 spin_unlock_bh(&ul->lock);
178 }
179}
180
d52d3997
MKL
181static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
182{
183 return dst_metrics_write_ptr(rt->dst.from);
184}
185
06582540
DM
186static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
187{
4b32b5ad 188 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 189
d52d3997
MKL
190 if (rt->rt6i_flags & RTF_PCPU)
191 return rt6_pcpu_cow_metrics(rt);
192 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
193 return NULL;
194 else
3b471175 195 return dst_cow_metrics_generic(dst, old);
06582540
DM
196}
197
f894cbf8
DM
198static inline const void *choose_neigh_daddr(struct rt6_info *rt,
199 struct sk_buff *skb,
200 const void *daddr)
39232973
DM
201{
202 struct in6_addr *p = &rt->rt6i_gateway;
203
a7563f34 204 if (!ipv6_addr_any(p))
39232973 205 return (const void *) p;
f894cbf8
DM
206 else if (skb)
207 return &ipv6_hdr(skb)->daddr;
39232973
DM
208 return daddr;
209}
210
f894cbf8
DM
211static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
212 struct sk_buff *skb,
213 const void *daddr)
d3aaeb38 214{
39232973
DM
215 struct rt6_info *rt = (struct rt6_info *) dst;
216 struct neighbour *n;
217
f894cbf8 218 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 219 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
220 if (n)
221 return n;
222 return neigh_create(&nd_tbl, daddr, dst->dev);
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
230 daddr = choose_neigh_daddr(rt, NULL, daddr);
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
06582540 247 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
d3aaeb38 255 .neigh_lookup = ip6_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 285 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
fb0af4c7 292static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
293 .dst = {
294 .__refcnt = ATOMIC_INIT(1),
295 .__use = 1,
2c20cbd7 296 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 297 .error = -ENETUNREACH,
d8d1f30b
CG
298 .input = ip6_pkt_discard,
299 .output = ip6_pkt_discard_out,
1da177e4
LT
300 },
301 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 302 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
303 .rt6i_metric = ~(u32) 0,
304 .rt6i_ref = ATOMIC_INIT(1),
305};
306
101367c2
TG
307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
308
fb0af4c7 309static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
310 .dst = {
311 .__refcnt = ATOMIC_INIT(1),
312 .__use = 1,
2c20cbd7 313 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 314 .error = -EACCES,
d8d1f30b
CG
315 .input = ip6_pkt_prohibit,
316 .output = ip6_pkt_prohibit_out,
101367c2
TG
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 319 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
320 .rt6i_metric = ~(u32) 0,
321 .rt6i_ref = ATOMIC_INIT(1),
322};
323
fb0af4c7 324static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
325 .dst = {
326 .__refcnt = ATOMIC_INIT(1),
327 .__use = 1,
2c20cbd7 328 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 329 .error = -EINVAL,
d8d1f30b 330 .input = dst_discard,
ede2059d 331 .output = dst_discard_out,
101367c2
TG
332 },
333 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 334 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
335 .rt6i_metric = ~(u32) 0,
336 .rt6i_ref = ATOMIC_INIT(1),
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 INIT_LIST_HEAD(&rt->rt6i_siblings);
347 INIT_LIST_HEAD(&rt->rt6i_uncached);
348}
349
1da177e4 350/* allocate dst with ip6_dst_ops */
d52d3997
MKL
351static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 struct net_device *dev,
ad706862 353 int flags)
1da177e4 354{
97bab73f 355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 356 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 357
ebfa45f0
MKL
358 if (rt)
359 rt6_info_init(rt);
8104891b 360
cf911662 361 return rt;
1da177e4
LT
362}
363
9ab179d8
DA
364struct rt6_info *ip6_dst_alloc(struct net *net,
365 struct net_device *dev,
366 int flags)
d52d3997 367{
ad706862 368 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
369
370 if (rt) {
371 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 if (rt->rt6i_pcpu) {
373 int cpu;
374
375 for_each_possible_cpu(cpu) {
376 struct rt6_info **p;
377
378 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 /* no one shares rt */
380 *p = NULL;
381 }
382 } else {
587fea74 383 dst_release_immediate(&rt->dst);
d52d3997
MKL
384 return NULL;
385 }
386 }
387
388 return rt;
389}
9ab179d8 390EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 391
1da177e4
LT
392static void ip6_dst_destroy(struct dst_entry *dst)
393{
394 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 395 struct dst_entry *from = dst->from;
8d0b94af 396 struct inet6_dev *idev;
1da177e4 397
4b32b5ad 398 dst_destroy_metrics_generic(dst);
87775312 399 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
400 rt6_uncached_list_del(rt);
401
402 idev = rt->rt6i_idev;
38308473 403 if (idev) {
1da177e4
LT
404 rt->rt6i_idev = NULL;
405 in6_dev_put(idev);
1ab1457c 406 }
1716a961 407
ecd98837
YH
408 dst->from = NULL;
409 dst_release(from);
b3419363
DM
410}
411
1da177e4
LT
412static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
413 int how)
414{
415 struct rt6_info *rt = (struct rt6_info *)dst;
416 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 417 struct net_device *loopback_dev =
c346dca1 418 dev_net(dev)->loopback_dev;
1da177e4 419
e5645f51
WW
420 if (idev && idev->dev != loopback_dev) {
421 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
422 if (loopback_idev) {
423 rt->rt6i_idev = loopback_idev;
424 in6_dev_put(idev);
97cac082 425 }
1da177e4
LT
426 }
427}
428
5973fb1e
MKL
429static bool __rt6_check_expired(const struct rt6_info *rt)
430{
431 if (rt->rt6i_flags & RTF_EXPIRES)
432 return time_after(jiffies, rt->dst.expires);
433 else
434 return false;
435}
436
a50feda5 437static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 438{
1716a961
G
439 if (rt->rt6i_flags & RTF_EXPIRES) {
440 if (time_after(jiffies, rt->dst.expires))
a50feda5 441 return true;
1716a961 442 } else if (rt->dst.from) {
3fd91fb3 443 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 444 }
a50feda5 445 return false;
1da177e4
LT
446}
447
51ebd318 448static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
449 struct flowi6 *fl6, int oif,
450 int strict)
51ebd318
ND
451{
452 struct rt6_info *sibling, *next_sibling;
453 int route_choosen;
454
956b4531 455 route_choosen = get_hash_from_flowi6(fl6) % (match->rt6i_nsiblings + 1);
51ebd318
ND
456 /* Don't change the route, if route_choosen == 0
457 * (siblings does not include ourself)
458 */
459 if (route_choosen)
460 list_for_each_entry_safe(sibling, next_sibling,
461 &match->rt6i_siblings, rt6i_siblings) {
462 route_choosen--;
463 if (route_choosen == 0) {
52bd4c0c
ND
464 if (rt6_score_route(sibling, oif, strict) < 0)
465 break;
51ebd318
ND
466 match = sibling;
467 break;
468 }
469 }
470 return match;
471}
472
1da177e4 473/*
c71099ac 474 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
475 */
476
8ed67789
DL
477static inline struct rt6_info *rt6_device_match(struct net *net,
478 struct rt6_info *rt,
b71d1d42 479 const struct in6_addr *saddr,
1da177e4 480 int oif,
d420895e 481 int flags)
1da177e4
LT
482{
483 struct rt6_info *local = NULL;
484 struct rt6_info *sprt;
485
dd3abc4e
YH
486 if (!oif && ipv6_addr_any(saddr))
487 goto out;
488
d8d1f30b 489 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 490 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
491
492 if (oif) {
1da177e4
LT
493 if (dev->ifindex == oif)
494 return sprt;
495 if (dev->flags & IFF_LOOPBACK) {
38308473 496 if (!sprt->rt6i_idev ||
1da177e4 497 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 498 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 499 continue;
17fb0b2b
DA
500 if (local &&
501 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
502 continue;
503 }
504 local = sprt;
505 }
dd3abc4e
YH
506 } else {
507 if (ipv6_chk_addr(net, saddr, dev,
508 flags & RT6_LOOKUP_F_IFACE))
509 return sprt;
1da177e4 510 }
dd3abc4e 511 }
1da177e4 512
dd3abc4e 513 if (oif) {
1da177e4
LT
514 if (local)
515 return local;
516
d420895e 517 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 518 return net->ipv6.ip6_null_entry;
1da177e4 519 }
dd3abc4e 520out:
1da177e4
LT
521 return rt;
522}
523
27097255 524#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
525struct __rt6_probe_work {
526 struct work_struct work;
527 struct in6_addr target;
528 struct net_device *dev;
529};
530
531static void rt6_probe_deferred(struct work_struct *w)
532{
533 struct in6_addr mcaddr;
534 struct __rt6_probe_work *work =
535 container_of(w, struct __rt6_probe_work, work);
536
537 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 538 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 539 dev_put(work->dev);
662f5533 540 kfree(work);
c2f17e82
HFS
541}
542
27097255
YH
543static void rt6_probe(struct rt6_info *rt)
544{
990edb42 545 struct __rt6_probe_work *work;
f2c31e32 546 struct neighbour *neigh;
27097255
YH
547 /*
548 * Okay, this does not seem to be appropriate
549 * for now, however, we need to check if it
550 * is really so; aka Router Reachability Probing.
551 *
552 * Router Reachability Probe MUST be rate-limited
553 * to no more than one per minute.
554 */
2152caea 555 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 556 return;
2152caea
YH
557 rcu_read_lock_bh();
558 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
559 if (neigh) {
8d6c31bf
MKL
560 if (neigh->nud_state & NUD_VALID)
561 goto out;
562
990edb42 563 work = NULL;
2152caea 564 write_lock(&neigh->lock);
990edb42
MKL
565 if (!(neigh->nud_state & NUD_VALID) &&
566 time_after(jiffies,
567 neigh->updated +
568 rt->rt6i_idev->cnf.rtr_probe_interval)) {
569 work = kmalloc(sizeof(*work), GFP_ATOMIC);
570 if (work)
571 __neigh_set_probe_once(neigh);
c2f17e82 572 }
2152caea 573 write_unlock(&neigh->lock);
990edb42
MKL
574 } else {
575 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 576 }
990edb42
MKL
577
578 if (work) {
579 INIT_WORK(&work->work, rt6_probe_deferred);
580 work->target = rt->rt6i_gateway;
581 dev_hold(rt->dst.dev);
582 work->dev = rt->dst.dev;
583 schedule_work(&work->work);
584 }
585
8d6c31bf 586out:
2152caea 587 rcu_read_unlock_bh();
27097255
YH
588}
589#else
590static inline void rt6_probe(struct rt6_info *rt)
591{
27097255
YH
592}
593#endif
594
1da177e4 595/*
554cfb7e 596 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 597 */
b6f99a21 598static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 599{
d1918542 600 struct net_device *dev = rt->dst.dev;
161980f4 601 if (!oif || dev->ifindex == oif)
554cfb7e 602 return 2;
161980f4
DM
603 if ((dev->flags & IFF_LOOPBACK) &&
604 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
605 return 1;
606 return 0;
554cfb7e 607}
1da177e4 608
afc154e9 609static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 610{
f2c31e32 611 struct neighbour *neigh;
afc154e9 612 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 613
4d0c5911
YH
614 if (rt->rt6i_flags & RTF_NONEXTHOP ||
615 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 616 return RT6_NUD_SUCCEED;
145a3621
YH
617
618 rcu_read_lock_bh();
619 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
620 if (neigh) {
621 read_lock(&neigh->lock);
554cfb7e 622 if (neigh->nud_state & NUD_VALID)
afc154e9 623 ret = RT6_NUD_SUCCEED;
398bcbeb 624#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 625 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 626 ret = RT6_NUD_SUCCEED;
7e980569
JB
627 else
628 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 629#endif
145a3621 630 read_unlock(&neigh->lock);
afc154e9
HFS
631 } else {
632 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 633 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 634 }
145a3621
YH
635 rcu_read_unlock_bh();
636
a5a81f0b 637 return ret;
1da177e4
LT
638}
639
554cfb7e
YH
640static int rt6_score_route(struct rt6_info *rt, int oif,
641 int strict)
1da177e4 642{
a5a81f0b 643 int m;
1ab1457c 644
4d0c5911 645 m = rt6_check_dev(rt, oif);
77d16f45 646 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 647 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
648#ifdef CONFIG_IPV6_ROUTER_PREF
649 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
650#endif
afc154e9
HFS
651 if (strict & RT6_LOOKUP_F_REACHABLE) {
652 int n = rt6_check_neigh(rt);
653 if (n < 0)
654 return n;
655 }
554cfb7e
YH
656 return m;
657}
658
f11e6659 659static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
660 int *mpri, struct rt6_info *match,
661 bool *do_rr)
554cfb7e 662{
f11e6659 663 int m;
afc154e9 664 bool match_do_rr = false;
35103d11
AG
665 struct inet6_dev *idev = rt->rt6i_idev;
666 struct net_device *dev = rt->dst.dev;
667
668 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
669 idev->cnf.ignore_routes_with_linkdown &&
670 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 671 goto out;
f11e6659
DM
672
673 if (rt6_check_expired(rt))
674 goto out;
675
676 m = rt6_score_route(rt, oif, strict);
7e980569 677 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
678 match_do_rr = true;
679 m = 0; /* lowest valid score */
7e980569 680 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 681 goto out;
afc154e9
HFS
682 }
683
684 if (strict & RT6_LOOKUP_F_REACHABLE)
685 rt6_probe(rt);
f11e6659 686
7e980569 687 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 688 if (m > *mpri) {
afc154e9 689 *do_rr = match_do_rr;
f11e6659
DM
690 *mpri = m;
691 match = rt;
f11e6659 692 }
f11e6659
DM
693out:
694 return match;
695}
696
697static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
698 struct rt6_info *rr_head,
afc154e9
HFS
699 u32 metric, int oif, int strict,
700 bool *do_rr)
f11e6659 701{
9fbdcfaf 702 struct rt6_info *rt, *match, *cont;
554cfb7e 703 int mpri = -1;
1da177e4 704
f11e6659 705 match = NULL;
9fbdcfaf
SK
706 cont = NULL;
707 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
708 if (rt->rt6i_metric != metric) {
709 cont = rt;
710 break;
711 }
712
713 match = find_match(rt, oif, strict, &mpri, match, do_rr);
714 }
715
716 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
717 if (rt->rt6i_metric != metric) {
718 cont = rt;
719 break;
720 }
721
afc154e9 722 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
723 }
724
725 if (match || !cont)
726 return match;
727
728 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 729 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 730
f11e6659
DM
731 return match;
732}
1da177e4 733
f11e6659
DM
734static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
735{
736 struct rt6_info *match, *rt0;
8ed67789 737 struct net *net;
afc154e9 738 bool do_rr = false;
1da177e4 739
f11e6659
DM
740 rt0 = fn->rr_ptr;
741 if (!rt0)
742 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 743
afc154e9
HFS
744 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
745 &do_rr);
1da177e4 746
afc154e9 747 if (do_rr) {
d8d1f30b 748 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 749
554cfb7e 750 /* no entries matched; do round-robin */
f11e6659
DM
751 if (!next || next->rt6i_metric != rt0->rt6i_metric)
752 next = fn->leaf;
753
754 if (next != rt0)
755 fn->rr_ptr = next;
1da177e4 756 }
1da177e4 757
d1918542 758 net = dev_net(rt0->dst.dev);
a02cec21 759 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
760}
761
8b9df265
MKL
762static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
763{
764 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
765}
766
70ceb4f5
YH
767#ifdef CONFIG_IPV6_ROUTE_INFO
768int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 769 const struct in6_addr *gwaddr)
70ceb4f5 770{
c346dca1 771 struct net *net = dev_net(dev);
70ceb4f5
YH
772 struct route_info *rinfo = (struct route_info *) opt;
773 struct in6_addr prefix_buf, *prefix;
774 unsigned int pref;
4bed72e4 775 unsigned long lifetime;
70ceb4f5
YH
776 struct rt6_info *rt;
777
778 if (len < sizeof(struct route_info)) {
779 return -EINVAL;
780 }
781
782 /* Sanity check for prefix_len and length */
783 if (rinfo->length > 3) {
784 return -EINVAL;
785 } else if (rinfo->prefix_len > 128) {
786 return -EINVAL;
787 } else if (rinfo->prefix_len > 64) {
788 if (rinfo->length < 2) {
789 return -EINVAL;
790 }
791 } else if (rinfo->prefix_len > 0) {
792 if (rinfo->length < 1) {
793 return -EINVAL;
794 }
795 }
796
797 pref = rinfo->route_pref;
798 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 799 return -EINVAL;
70ceb4f5 800
4bed72e4 801 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
802
803 if (rinfo->length == 3)
804 prefix = (struct in6_addr *)rinfo->prefix;
805 else {
806 /* this function is safe */
807 ipv6_addr_prefix(&prefix_buf,
808 (struct in6_addr *)rinfo->prefix,
809 rinfo->prefix_len);
810 prefix = &prefix_buf;
811 }
812
f104a567
DJ
813 if (rinfo->prefix_len == 0)
814 rt = rt6_get_dflt_router(gwaddr, dev);
815 else
816 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 817 gwaddr, dev);
70ceb4f5
YH
818
819 if (rt && !lifetime) {
e0a1ad73 820 ip6_del_rt(rt);
70ceb4f5
YH
821 rt = NULL;
822 }
823
824 if (!rt && lifetime)
830218c1
DA
825 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
826 dev, pref);
70ceb4f5
YH
827 else if (rt)
828 rt->rt6i_flags = RTF_ROUTEINFO |
829 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
830
831 if (rt) {
1716a961
G
832 if (!addrconf_finite_timeout(lifetime))
833 rt6_clean_expires(rt);
834 else
835 rt6_set_expires(rt, jiffies + HZ * lifetime);
836
94e187c0 837 ip6_rt_put(rt);
70ceb4f5
YH
838 }
839 return 0;
840}
841#endif
842
a3c00e46
MKL
843static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
844 struct in6_addr *saddr)
845{
846 struct fib6_node *pn;
847 while (1) {
848 if (fn->fn_flags & RTN_TL_ROOT)
849 return NULL;
850 pn = fn->parent;
851 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
852 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
853 else
854 fn = pn;
855 if (fn->fn_flags & RTN_RTINFO)
856 return fn;
857 }
858}
c71099ac 859
8ed67789
DL
860static struct rt6_info *ip6_pol_route_lookup(struct net *net,
861 struct fib6_table *table,
4c9483b2 862 struct flowi6 *fl6, int flags)
1da177e4
LT
863{
864 struct fib6_node *fn;
865 struct rt6_info *rt;
866
c71099ac 867 read_lock_bh(&table->tb6_lock);
4c9483b2 868 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
869restart:
870 rt = fn->leaf;
4c9483b2 871 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 872 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 873 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
874 if (rt == net->ipv6.ip6_null_entry) {
875 fn = fib6_backtrack(fn, &fl6->saddr);
876 if (fn)
877 goto restart;
878 }
d8d1f30b 879 dst_use(&rt->dst, jiffies);
c71099ac 880 read_unlock_bh(&table->tb6_lock);
b811580d
DA
881
882 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
883
c71099ac
TG
884 return rt;
885
886}
887
67ba4152 888struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
889 int flags)
890{
891 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
892}
893EXPORT_SYMBOL_GPL(ip6_route_lookup);
894
9acd9f3a
YH
895struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
896 const struct in6_addr *saddr, int oif, int strict)
c71099ac 897{
4c9483b2
DM
898 struct flowi6 fl6 = {
899 .flowi6_oif = oif,
900 .daddr = *daddr,
c71099ac
TG
901 };
902 struct dst_entry *dst;
77d16f45 903 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 904
adaa70bb 905 if (saddr) {
4c9483b2 906 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
907 flags |= RT6_LOOKUP_F_HAS_SADDR;
908 }
909
4c9483b2 910 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
911 if (dst->error == 0)
912 return (struct rt6_info *) dst;
913
914 dst_release(dst);
915
1da177e4
LT
916 return NULL;
917}
7159039a
YH
918EXPORT_SYMBOL(rt6_lookup);
919
c71099ac 920/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
921 * It takes new route entry, the addition fails by any reason the
922 * route is released.
923 * Caller must hold dst before calling it.
1da177e4
LT
924 */
925
e5fd387a 926static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
927 struct mx6_config *mxc,
928 struct netlink_ext_ack *extack)
1da177e4
LT
929{
930 int err;
c71099ac 931 struct fib6_table *table;
1da177e4 932
c71099ac
TG
933 table = rt->rt6i_table;
934 write_lock_bh(&table->tb6_lock);
333c4301 935 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
c71099ac 936 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
937
938 return err;
939}
940
40e22e8f
TG
941int ip6_ins_rt(struct rt6_info *rt)
942{
e715b6d3
FW
943 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
944 struct mx6_config mxc = { .mx = NULL, };
945
1cfb71ee
WW
946 /* Hold dst to account for the reference from the fib6 tree */
947 dst_hold(&rt->dst);
333c4301 948 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
949}
950
4832c30d
DA
951/* called with rcu_lock held */
952static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
953{
954 struct net_device *dev = rt->dst.dev;
955
956 if (rt->rt6i_flags & RTF_LOCAL) {
957 /* for copies of local routes, dst->dev needs to be the
958 * device if it is a master device, the master device if
959 * device is enslaved, and the loopback as the default
960 */
961 if (netif_is_l3_slave(dev) &&
962 !rt6_need_strict(&rt->rt6i_dst.addr))
963 dev = l3mdev_master_dev_rcu(dev);
964 else if (!netif_is_l3_master(dev))
965 dev = dev_net(dev)->loopback_dev;
966 /* last case is netif_is_l3_master(dev) is true in which
967 * case we want dev returned to be dev
968 */
969 }
970
971 return dev;
972}
973
8b9df265
MKL
974static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
975 const struct in6_addr *daddr,
976 const struct in6_addr *saddr)
1da177e4 977{
4832c30d 978 struct net_device *dev;
1da177e4
LT
979 struct rt6_info *rt;
980
981 /*
982 * Clone the route.
983 */
984
d52d3997 985 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 986 ort = (struct rt6_info *)ort->dst.from;
1da177e4 987
4832c30d
DA
988 rcu_read_lock();
989 dev = ip6_rt_get_dev_rcu(ort);
990 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
991 rcu_read_unlock();
83a09abd
MKL
992 if (!rt)
993 return NULL;
994
995 ip6_rt_copy_init(rt, ort);
996 rt->rt6i_flags |= RTF_CACHE;
997 rt->rt6i_metric = 0;
998 rt->dst.flags |= DST_HOST;
999 rt->rt6i_dst.addr = *daddr;
1000 rt->rt6i_dst.plen = 128;
1da177e4 1001
83a09abd
MKL
1002 if (!rt6_is_gw_or_nonexthop(ort)) {
1003 if (ort->rt6i_dst.plen != 128 &&
1004 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1005 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1006#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1007 if (rt->rt6i_src.plen && saddr) {
1008 rt->rt6i_src.addr = *saddr;
1009 rt->rt6i_src.plen = 128;
8b9df265 1010 }
83a09abd 1011#endif
95a9a5ba 1012 }
1da177e4 1013
95a9a5ba
YH
1014 return rt;
1015}
1da177e4 1016
d52d3997
MKL
1017static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1018{
4832c30d 1019 struct net_device *dev;
d52d3997
MKL
1020 struct rt6_info *pcpu_rt;
1021
4832c30d
DA
1022 rcu_read_lock();
1023 dev = ip6_rt_get_dev_rcu(rt);
1024 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1025 rcu_read_unlock();
d52d3997
MKL
1026 if (!pcpu_rt)
1027 return NULL;
1028 ip6_rt_copy_init(pcpu_rt, rt);
1029 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1030 pcpu_rt->rt6i_flags |= RTF_PCPU;
1031 return pcpu_rt;
1032}
1033
1034/* It should be called with read_lock_bh(&tb6_lock) acquired */
1035static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1036{
a73e4195 1037 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1038
1039 p = this_cpu_ptr(rt->rt6i_pcpu);
1040 pcpu_rt = *p;
1041
a73e4195
MKL
1042 if (pcpu_rt) {
1043 dst_hold(&pcpu_rt->dst);
1044 rt6_dst_from_metrics_check(pcpu_rt);
1045 }
1046 return pcpu_rt;
1047}
1048
1049static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1050{
9c7370a1 1051 struct fib6_table *table = rt->rt6i_table;
a73e4195 1052 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1053
1054 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1055 if (!pcpu_rt) {
1056 struct net *net = dev_net(rt->dst.dev);
1057
9c7370a1
MKL
1058 dst_hold(&net->ipv6.ip6_null_entry->dst);
1059 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1060 }
1061
9c7370a1
MKL
1062 read_lock_bh(&table->tb6_lock);
1063 if (rt->rt6i_pcpu) {
1064 p = this_cpu_ptr(rt->rt6i_pcpu);
1065 prev = cmpxchg(p, NULL, pcpu_rt);
1066 if (prev) {
1067 /* If someone did it before us, return prev instead */
587fea74 1068 dst_release_immediate(&pcpu_rt->dst);
9c7370a1
MKL
1069 pcpu_rt = prev;
1070 }
1071 } else {
1072 /* rt has been removed from the fib6 tree
1073 * before we have a chance to acquire the read_lock.
1074 * In this case, don't brother to create a pcpu rt
1075 * since rt is going away anyway. The next
1076 * dst_check() will trigger a re-lookup.
1077 */
587fea74 1078 dst_release_immediate(&pcpu_rt->dst);
9c7370a1 1079 pcpu_rt = rt;
d52d3997 1080 }
d52d3997
MKL
1081 dst_hold(&pcpu_rt->dst);
1082 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1083 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1084 return pcpu_rt;
1085}
1086
9ff74384
DA
1087struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1088 int oif, struct flowi6 *fl6, int flags)
1da177e4 1089{
367efcb9 1090 struct fib6_node *fn, *saved_fn;
45e4fd26 1091 struct rt6_info *rt;
c71099ac 1092 int strict = 0;
1da177e4 1093
77d16f45 1094 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1095 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1096 if (net->ipv6.devconf_all->forwarding == 0)
1097 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1098
c71099ac 1099 read_lock_bh(&table->tb6_lock);
1da177e4 1100
4c9483b2 1101 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1102 saved_fn = fn;
1da177e4 1103
ca254490
DA
1104 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1105 oif = 0;
1106
a3c00e46 1107redo_rt6_select:
367efcb9 1108 rt = rt6_select(fn, oif, strict);
52bd4c0c 1109 if (rt->rt6i_nsiblings)
367efcb9 1110 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1111 if (rt == net->ipv6.ip6_null_entry) {
1112 fn = fib6_backtrack(fn, &fl6->saddr);
1113 if (fn)
1114 goto redo_rt6_select;
367efcb9
MKL
1115 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1116 /* also consider unreachable route */
1117 strict &= ~RT6_LOOKUP_F_REACHABLE;
1118 fn = saved_fn;
1119 goto redo_rt6_select;
367efcb9 1120 }
a3c00e46
MKL
1121 }
1122
fb9de91e 1123
3da59bd9 1124 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1125 dst_use(&rt->dst, jiffies);
1126 read_unlock_bh(&table->tb6_lock);
1127
1128 rt6_dst_from_metrics_check(rt);
b811580d
DA
1129
1130 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1131 return rt;
3da59bd9
MKL
1132 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1133 !(rt->rt6i_flags & RTF_GATEWAY))) {
1134 /* Create a RTF_CACHE clone which will not be
1135 * owned by the fib6 tree. It is for the special case where
1136 * the daddr in the skb during the neighbor look-up is different
1137 * from the fl6->daddr used to look-up route here.
1138 */
1139
1140 struct rt6_info *uncached_rt;
1141
d52d3997
MKL
1142 dst_use(&rt->dst, jiffies);
1143 read_unlock_bh(&table->tb6_lock);
1144
3da59bd9
MKL
1145 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1146 dst_release(&rt->dst);
c71099ac 1147
1cfb71ee
WW
1148 if (uncached_rt) {
1149 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1150 * No need for another dst_hold()
1151 */
8d0b94af 1152 rt6_uncached_list_add(uncached_rt);
1cfb71ee 1153 } else {
3da59bd9 1154 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1155 dst_hold(&uncached_rt->dst);
1156 }
b811580d
DA
1157
1158 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1159 return uncached_rt;
3da59bd9 1160
d52d3997
MKL
1161 } else {
1162 /* Get a percpu copy */
1163
1164 struct rt6_info *pcpu_rt;
1165
1166 rt->dst.lastuse = jiffies;
1167 rt->dst.__use++;
1168 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1169
9c7370a1
MKL
1170 if (pcpu_rt) {
1171 read_unlock_bh(&table->tb6_lock);
1172 } else {
1173 /* We have to do the read_unlock first
1174 * because rt6_make_pcpu_route() may trigger
1175 * ip6_dst_gc() which will take the write_lock.
1176 */
1177 dst_hold(&rt->dst);
1178 read_unlock_bh(&table->tb6_lock);
a73e4195 1179 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1180 dst_release(&rt->dst);
1181 }
d52d3997 1182
b811580d 1183 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1184 return pcpu_rt;
9c7370a1 1185
d52d3997 1186 }
1da177e4 1187}
9ff74384 1188EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1189
8ed67789 1190static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1191 struct flowi6 *fl6, int flags)
4acad72d 1192{
4c9483b2 1193 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1194}
1195
d409b847
MB
1196struct dst_entry *ip6_route_input_lookup(struct net *net,
1197 struct net_device *dev,
1198 struct flowi6 *fl6, int flags)
72331bc0
SL
1199{
1200 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1201 flags |= RT6_LOOKUP_F_IFACE;
1202
1203 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1204}
d409b847 1205EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1206
23aebdac
JS
1207static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1208 struct flow_keys *keys)
1209{
1210 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1211 const struct ipv6hdr *key_iph = outer_iph;
1212 const struct ipv6hdr *inner_iph;
1213 const struct icmp6hdr *icmph;
1214 struct ipv6hdr _inner_iph;
1215
1216 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1217 goto out;
1218
1219 icmph = icmp6_hdr(skb);
1220 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1221 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1222 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1223 icmph->icmp6_type != ICMPV6_PARAMPROB)
1224 goto out;
1225
1226 inner_iph = skb_header_pointer(skb,
1227 skb_transport_offset(skb) + sizeof(*icmph),
1228 sizeof(_inner_iph), &_inner_iph);
1229 if (!inner_iph)
1230 goto out;
1231
1232 key_iph = inner_iph;
1233out:
1234 memset(keys, 0, sizeof(*keys));
1235 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1236 keys->addrs.v6addrs.src = key_iph->saddr;
1237 keys->addrs.v6addrs.dst = key_iph->daddr;
1238 keys->tags.flow_label = ip6_flowinfo(key_iph);
1239 keys->basic.ip_proto = key_iph->nexthdr;
1240}
1241
1242/* if skb is set it will be used and fl6 can be NULL */
1243u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1244{
1245 struct flow_keys hash_keys;
1246
1247 if (skb) {
1248 ip6_multipath_l3_keys(skb, &hash_keys);
1249 return flow_hash_from_keys(&hash_keys);
1250 }
1251
1252 return get_hash_from_flowi6(fl6);
1253}
1254
c71099ac
TG
1255void ip6_route_input(struct sk_buff *skb)
1256{
b71d1d42 1257 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1258 struct net *net = dev_net(skb->dev);
adaa70bb 1259 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1260 struct ip_tunnel_info *tun_info;
4c9483b2 1261 struct flowi6 fl6 = {
e0d56fdd 1262 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1263 .daddr = iph->daddr,
1264 .saddr = iph->saddr,
6502ca52 1265 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1266 .flowi6_mark = skb->mark,
1267 .flowi6_proto = iph->nexthdr,
c71099ac 1268 };
adaa70bb 1269
904af04d 1270 tun_info = skb_tunnel_info(skb);
46fa062a 1271 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1272 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1273 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1274 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1275 skb_dst_drop(skb);
72331bc0 1276 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1277}
1278
8ed67789 1279static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1280 struct flowi6 *fl6, int flags)
1da177e4 1281{
4c9483b2 1282 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1283}
1284
6f21c96a
PA
1285struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1286 struct flowi6 *fl6, int flags)
c71099ac 1287{
d46a9d67 1288 bool any_src;
c71099ac 1289
4c1feac5
DA
1290 if (rt6_need_strict(&fl6->daddr)) {
1291 struct dst_entry *dst;
1292
1293 dst = l3mdev_link_scope_lookup(net, fl6);
1294 if (dst)
1295 return dst;
1296 }
ca254490 1297
1fb9489b 1298 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1299
d46a9d67 1300 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1301 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1302 (fl6->flowi6_oif && any_src))
77d16f45 1303 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1304
d46a9d67 1305 if (!any_src)
adaa70bb 1306 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1307 else if (sk)
1308 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1309
4c9483b2 1310 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1311}
6f21c96a 1312EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1313
2774c131 1314struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1315{
5c1e6aa3 1316 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1317 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1318 struct dst_entry *new = NULL;
1319
1dbe3252 1320 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
b2a9c0ed 1321 DST_OBSOLETE_NONE, 0);
14e50e57 1322 if (rt) {
0a1f5962 1323 rt6_info_init(rt);
8104891b 1324
0a1f5962 1325 new = &rt->dst;
14e50e57 1326 new->__use = 1;
352e512c 1327 new->input = dst_discard;
ede2059d 1328 new->output = dst_discard_out;
14e50e57 1329
0a1f5962 1330 dst_copy_metrics(new, &ort->dst);
14e50e57 1331
1dbe3252 1332 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1333 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1334 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1335 rt->rt6i_metric = 0;
1336
1337 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1338#ifdef CONFIG_IPV6_SUBTREES
1339 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1340#endif
14e50e57
DM
1341 }
1342
69ead7af
DM
1343 dst_release(dst_orig);
1344 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1345}
14e50e57 1346
1da177e4
LT
1347/*
1348 * Destination cache support functions
1349 */
1350
4b32b5ad
MKL
1351static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1352{
1353 if (rt->dst.from &&
1354 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1355 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1356}
1357
3da59bd9
MKL
1358static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1359{
1360 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1361 return NULL;
1362
1363 if (rt6_check_expired(rt))
1364 return NULL;
1365
1366 return &rt->dst;
1367}
1368
1369static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1370{
5973fb1e
MKL
1371 if (!__rt6_check_expired(rt) &&
1372 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1373 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1374 return &rt->dst;
1375 else
1376 return NULL;
1377}
1378
1da177e4
LT
1379static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1380{
1381 struct rt6_info *rt;
1382
1383 rt = (struct rt6_info *) dst;
1384
6f3118b5
ND
1385 /* All IPV6 dsts are created with ->obsolete set to the value
1386 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1387 * into this function always.
1388 */
e3bc10bd 1389
4b32b5ad
MKL
1390 rt6_dst_from_metrics_check(rt);
1391
02bcf4e0 1392 if (rt->rt6i_flags & RTF_PCPU ||
a4c2fd7f 1393 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
3da59bd9
MKL
1394 return rt6_dst_from_check(rt, cookie);
1395 else
1396 return rt6_check(rt, cookie);
1da177e4
LT
1397}
1398
1399static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1400{
1401 struct rt6_info *rt = (struct rt6_info *) dst;
1402
1403 if (rt) {
54c1a859
YH
1404 if (rt->rt6i_flags & RTF_CACHE) {
1405 if (rt6_check_expired(rt)) {
1406 ip6_del_rt(rt);
1407 dst = NULL;
1408 }
1409 } else {
1da177e4 1410 dst_release(dst);
54c1a859
YH
1411 dst = NULL;
1412 }
1da177e4 1413 }
54c1a859 1414 return dst;
1da177e4
LT
1415}
1416
1417static void ip6_link_failure(struct sk_buff *skb)
1418{
1419 struct rt6_info *rt;
1420
3ffe533c 1421 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1422
adf30907 1423 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1424 if (rt) {
1eb4f758 1425 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
1426 if (dst_hold_safe(&rt->dst))
1427 ip6_del_rt(rt);
1eb4f758 1428 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1429 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1430 }
1da177e4
LT
1431 }
1432}
1433
45e4fd26
MKL
1434static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1435{
1436 struct net *net = dev_net(rt->dst.dev);
1437
1438 rt->rt6i_flags |= RTF_MODIFIED;
1439 rt->rt6i_pmtu = mtu;
1440 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1441}
1442
0d3f6d29
MKL
1443static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1444{
1445 return !(rt->rt6i_flags & RTF_CACHE) &&
1446 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1447}
1448
45e4fd26
MKL
1449static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1450 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1451{
0dec879f 1452 const struct in6_addr *daddr, *saddr;
67ba4152 1453 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1454
45e4fd26
MKL
1455 if (rt6->rt6i_flags & RTF_LOCAL)
1456 return;
81aded24 1457
19bda36c
XL
1458 if (dst_metric_locked(dst, RTAX_MTU))
1459 return;
1460
0dec879f
JA
1461 if (iph) {
1462 daddr = &iph->daddr;
1463 saddr = &iph->saddr;
1464 } else if (sk) {
1465 daddr = &sk->sk_v6_daddr;
1466 saddr = &inet6_sk(sk)->saddr;
1467 } else {
1468 daddr = NULL;
1469 saddr = NULL;
1470 }
1471 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
1472 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1473 if (mtu >= dst_mtu(dst))
1474 return;
9d289715 1475
0d3f6d29 1476 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 1477 rt6_do_update_pmtu(rt6, mtu);
0dec879f 1478 } else if (daddr) {
45e4fd26
MKL
1479 struct rt6_info *nrt6;
1480
45e4fd26
MKL
1481 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1482 if (nrt6) {
1483 rt6_do_update_pmtu(nrt6, mtu);
1484
1485 /* ip6_ins_rt(nrt6) will bump the
1486 * rt6->rt6i_node->fn_sernum
1487 * which will fail the next rt6_check() and
1488 * invalidate the sk->sk_dst_cache.
1489 */
1490 ip6_ins_rt(nrt6);
1cfb71ee
WW
1491 /* Release the reference taken in
1492 * ip6_rt_cache_alloc()
1493 */
1494 dst_release(&nrt6->dst);
45e4fd26 1495 }
1da177e4
LT
1496 }
1497}
1498
45e4fd26
MKL
1499static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1500 struct sk_buff *skb, u32 mtu)
1501{
1502 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1503}
1504
42ae66c8 1505void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1506 int oif, u32 mark, kuid_t uid)
81aded24
DM
1507{
1508 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1509 struct dst_entry *dst;
1510 struct flowi6 fl6;
1511
1512 memset(&fl6, 0, sizeof(fl6));
1513 fl6.flowi6_oif = oif;
1b3c61dc 1514 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1515 fl6.daddr = iph->daddr;
1516 fl6.saddr = iph->saddr;
6502ca52 1517 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1518 fl6.flowi6_uid = uid;
81aded24
DM
1519
1520 dst = ip6_route_output(net, NULL, &fl6);
1521 if (!dst->error)
45e4fd26 1522 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1523 dst_release(dst);
1524}
1525EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1526
1527void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1528{
33c162a9
MKL
1529 struct dst_entry *dst;
1530
81aded24 1531 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 1532 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1533
1534 dst = __sk_dst_get(sk);
1535 if (!dst || !dst->obsolete ||
1536 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1537 return;
1538
1539 bh_lock_sock(sk);
1540 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1541 ip6_datagram_dst_update(sk, false);
1542 bh_unlock_sock(sk);
81aded24
DM
1543}
1544EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1545
b55b76b2
DJ
1546/* Handle redirects */
1547struct ip6rd_flowi {
1548 struct flowi6 fl6;
1549 struct in6_addr gateway;
1550};
1551
1552static struct rt6_info *__ip6_route_redirect(struct net *net,
1553 struct fib6_table *table,
1554 struct flowi6 *fl6,
1555 int flags)
1556{
1557 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1558 struct rt6_info *rt;
1559 struct fib6_node *fn;
1560
1561 /* Get the "current" route for this destination and
67c408cf 1562 * check if the redirect has come from appropriate router.
b55b76b2
DJ
1563 *
1564 * RFC 4861 specifies that redirects should only be
1565 * accepted if they come from the nexthop to the target.
1566 * Due to the way the routes are chosen, this notion
1567 * is a bit fuzzy and one might need to check all possible
1568 * routes.
1569 */
1570
1571 read_lock_bh(&table->tb6_lock);
1572 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1573restart:
1574 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1575 if (rt6_check_expired(rt))
1576 continue;
1577 if (rt->dst.error)
1578 break;
1579 if (!(rt->rt6i_flags & RTF_GATEWAY))
1580 continue;
1581 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1582 continue;
1583 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1584 continue;
1585 break;
1586 }
1587
1588 if (!rt)
1589 rt = net->ipv6.ip6_null_entry;
1590 else if (rt->dst.error) {
1591 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1592 goto out;
1593 }
1594
1595 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1596 fn = fib6_backtrack(fn, &fl6->saddr);
1597 if (fn)
1598 goto restart;
b55b76b2 1599 }
a3c00e46 1600
b0a1ba59 1601out:
b55b76b2
DJ
1602 dst_hold(&rt->dst);
1603
1604 read_unlock_bh(&table->tb6_lock);
1605
b811580d 1606 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1607 return rt;
1608};
1609
1610static struct dst_entry *ip6_route_redirect(struct net *net,
1611 const struct flowi6 *fl6,
1612 const struct in6_addr *gateway)
1613{
1614 int flags = RT6_LOOKUP_F_HAS_SADDR;
1615 struct ip6rd_flowi rdfl;
1616
1617 rdfl.fl6 = *fl6;
1618 rdfl.gateway = *gateway;
1619
1620 return fib6_rule_lookup(net, &rdfl.fl6,
1621 flags, __ip6_route_redirect);
1622}
1623
e2d118a1
LC
1624void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1625 kuid_t uid)
3a5ad2ee
DM
1626{
1627 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1628 struct dst_entry *dst;
1629 struct flowi6 fl6;
1630
1631 memset(&fl6, 0, sizeof(fl6));
e374c618 1632 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1633 fl6.flowi6_oif = oif;
1634 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1635 fl6.daddr = iph->daddr;
1636 fl6.saddr = iph->saddr;
6502ca52 1637 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1638 fl6.flowi6_uid = uid;
3a5ad2ee 1639
b55b76b2
DJ
1640 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1641 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1642 dst_release(dst);
1643}
1644EXPORT_SYMBOL_GPL(ip6_redirect);
1645
c92a59ec
DJ
1646void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1647 u32 mark)
1648{
1649 const struct ipv6hdr *iph = ipv6_hdr(skb);
1650 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1651 struct dst_entry *dst;
1652 struct flowi6 fl6;
1653
1654 memset(&fl6, 0, sizeof(fl6));
e374c618 1655 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1656 fl6.flowi6_oif = oif;
1657 fl6.flowi6_mark = mark;
c92a59ec
DJ
1658 fl6.daddr = msg->dest;
1659 fl6.saddr = iph->daddr;
e2d118a1 1660 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 1661
b55b76b2
DJ
1662 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1663 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1664 dst_release(dst);
1665}
1666
3a5ad2ee
DM
1667void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1668{
e2d118a1
LC
1669 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1670 sk->sk_uid);
3a5ad2ee
DM
1671}
1672EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1673
0dbaee3b 1674static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1675{
0dbaee3b
DM
1676 struct net_device *dev = dst->dev;
1677 unsigned int mtu = dst_mtu(dst);
1678 struct net *net = dev_net(dev);
1679
1da177e4
LT
1680 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1681
5578689a
DL
1682 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1683 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1684
1685 /*
1ab1457c
YH
1686 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1687 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1688 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1689 * rely only on pmtu discovery"
1690 */
1691 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1692 mtu = IPV6_MAXPLEN;
1693 return mtu;
1694}
1695
ebb762f2 1696static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1697{
4b32b5ad
MKL
1698 const struct rt6_info *rt = (const struct rt6_info *)dst;
1699 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1700 struct inet6_dev *idev;
618f9bc7 1701
4b32b5ad
MKL
1702 if (mtu)
1703 goto out;
1704
1705 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1706 if (mtu)
30f78d8e 1707 goto out;
618f9bc7
SK
1708
1709 mtu = IPV6_MIN_MTU;
d33e4553
DM
1710
1711 rcu_read_lock();
1712 idev = __in6_dev_get(dst->dev);
1713 if (idev)
1714 mtu = idev->cnf.mtu6;
1715 rcu_read_unlock();
1716
30f78d8e 1717out:
14972cbd
RP
1718 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1719
1720 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
1721}
1722
3b00944c 1723struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1724 struct flowi6 *fl6)
1da177e4 1725{
87a11578 1726 struct dst_entry *dst;
1da177e4
LT
1727 struct rt6_info *rt;
1728 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1729 struct net *net = dev_net(dev);
1da177e4 1730
38308473 1731 if (unlikely(!idev))
122bdf67 1732 return ERR_PTR(-ENODEV);
1da177e4 1733
ad706862 1734 rt = ip6_dst_alloc(net, dev, 0);
38308473 1735 if (unlikely(!rt)) {
1da177e4 1736 in6_dev_put(idev);
87a11578 1737 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1738 goto out;
1739 }
1740
8e2ec639
YZ
1741 rt->dst.flags |= DST_HOST;
1742 rt->dst.output = ip6_output;
550bab42 1743 rt->rt6i_gateway = fl6->daddr;
87a11578 1744 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1745 rt->rt6i_dst.plen = 128;
1746 rt->rt6i_idev = idev;
14edd87d 1747 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1748
587fea74
WW
1749 /* Add this dst into uncached_list so that rt6_ifdown() can
1750 * do proper release of the net_device
1751 */
1752 rt6_uncached_list_add(rt);
1da177e4 1753
87a11578
DM
1754 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1755
1da177e4 1756out:
87a11578 1757 return dst;
1da177e4
LT
1758}
1759
569d3645 1760static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1761{
86393e52 1762 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1763 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1764 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1765 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1766 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1767 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1768 int entries;
7019b78e 1769
fc66f95c 1770 entries = dst_entries_get_fast(ops);
49a18d86 1771 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1772 entries <= rt_max_size)
1da177e4
LT
1773 goto out;
1774
6891a346 1775 net->ipv6.ip6_rt_gc_expire++;
14956643 1776 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1777 entries = dst_entries_get_slow(ops);
1778 if (entries < ops->gc_thresh)
7019b78e 1779 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1780out:
7019b78e 1781 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1782 return entries > rt_max_size;
1da177e4
LT
1783}
1784
e715b6d3
FW
1785static int ip6_convert_metrics(struct mx6_config *mxc,
1786 const struct fib6_config *cfg)
1787{
c3a8d947 1788 bool ecn_ca = false;
e715b6d3
FW
1789 struct nlattr *nla;
1790 int remaining;
1791 u32 *mp;
1792
63159f29 1793 if (!cfg->fc_mx)
e715b6d3
FW
1794 return 0;
1795
1796 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1797 if (unlikely(!mp))
1798 return -ENOMEM;
1799
1800 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1801 int type = nla_type(nla);
1bb14807 1802 u32 val;
e715b6d3 1803
1bb14807
DB
1804 if (!type)
1805 continue;
1806 if (unlikely(type > RTAX_MAX))
1807 goto err;
ea697639 1808
1bb14807
DB
1809 if (type == RTAX_CC_ALGO) {
1810 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1811
1bb14807 1812 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1813 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1814 if (val == TCP_CA_UNSPEC)
1815 goto err;
1816 } else {
1817 val = nla_get_u32(nla);
e715b6d3 1818 }
626abd59
PA
1819 if (type == RTAX_HOPLIMIT && val > 255)
1820 val = 255;
b8d3e416
DB
1821 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1822 goto err;
1bb14807
DB
1823
1824 mp[type - 1] = val;
1825 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1826 }
1827
c3a8d947
DB
1828 if (ecn_ca) {
1829 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1830 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1831 }
e715b6d3 1832
c3a8d947 1833 mxc->mx = mp;
e715b6d3
FW
1834 return 0;
1835 err:
1836 kfree(mp);
1837 return -EINVAL;
1838}
1da177e4 1839
8c14586f
DA
1840static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1841 struct fib6_config *cfg,
1842 const struct in6_addr *gw_addr)
1843{
1844 struct flowi6 fl6 = {
1845 .flowi6_oif = cfg->fc_ifindex,
1846 .daddr = *gw_addr,
1847 .saddr = cfg->fc_prefsrc,
1848 };
1849 struct fib6_table *table;
1850 struct rt6_info *rt;
d5d32e4b 1851 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
1852
1853 table = fib6_get_table(net, cfg->fc_table);
1854 if (!table)
1855 return NULL;
1856
1857 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1858 flags |= RT6_LOOKUP_F_HAS_SADDR;
1859
1860 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1861
1862 /* if table lookup failed, fall back to full lookup */
1863 if (rt == net->ipv6.ip6_null_entry) {
1864 ip6_rt_put(rt);
1865 rt = NULL;
1866 }
1867
1868 return rt;
1869}
1870
333c4301
DA
1871static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1872 struct netlink_ext_ack *extack)
1da177e4 1873{
5578689a 1874 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1875 struct rt6_info *rt = NULL;
1876 struct net_device *dev = NULL;
1877 struct inet6_dev *idev = NULL;
c71099ac 1878 struct fib6_table *table;
1da177e4 1879 int addr_type;
8c5b83f0 1880 int err = -EINVAL;
1da177e4 1881
557c44be 1882 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
1883 if (cfg->fc_flags & RTF_PCPU) {
1884 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 1885 goto out;
d5d531cb 1886 }
557c44be 1887
d5d531cb
DA
1888 if (cfg->fc_dst_len > 128) {
1889 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1890 goto out;
1891 }
1892 if (cfg->fc_src_len > 128) {
1893 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 1894 goto out;
d5d531cb 1895 }
1da177e4 1896#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
1897 if (cfg->fc_src_len) {
1898 NL_SET_ERR_MSG(extack,
1899 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 1900 goto out;
d5d531cb 1901 }
1da177e4 1902#endif
86872cb5 1903 if (cfg->fc_ifindex) {
1da177e4 1904 err = -ENODEV;
5578689a 1905 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1906 if (!dev)
1907 goto out;
1908 idev = in6_dev_get(dev);
1909 if (!idev)
1910 goto out;
1911 }
1912
86872cb5
TG
1913 if (cfg->fc_metric == 0)
1914 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1915
d71314b4 1916 err = -ENOBUFS;
38308473
DM
1917 if (cfg->fc_nlinfo.nlh &&
1918 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1919 table = fib6_get_table(net, cfg->fc_table);
38308473 1920 if (!table) {
f3213831 1921 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1922 table = fib6_new_table(net, cfg->fc_table);
1923 }
1924 } else {
1925 table = fib6_new_table(net, cfg->fc_table);
1926 }
38308473
DM
1927
1928 if (!table)
c71099ac 1929 goto out;
c71099ac 1930
ad706862
MKL
1931 rt = ip6_dst_alloc(net, NULL,
1932 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1933
38308473 1934 if (!rt) {
1da177e4
LT
1935 err = -ENOMEM;
1936 goto out;
1937 }
1938
1716a961
G
1939 if (cfg->fc_flags & RTF_EXPIRES)
1940 rt6_set_expires(rt, jiffies +
1941 clock_t_to_jiffies(cfg->fc_expires));
1942 else
1943 rt6_clean_expires(rt);
1da177e4 1944
86872cb5
TG
1945 if (cfg->fc_protocol == RTPROT_UNSPEC)
1946 cfg->fc_protocol = RTPROT_BOOT;
1947 rt->rt6i_protocol = cfg->fc_protocol;
1948
1949 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1950
1951 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1952 rt->dst.input = ip6_mc_input;
ab79ad14
1953 else if (cfg->fc_flags & RTF_LOCAL)
1954 rt->dst.input = ip6_input;
1da177e4 1955 else
d8d1f30b 1956 rt->dst.input = ip6_forward;
1da177e4 1957
d8d1f30b 1958 rt->dst.output = ip6_output;
1da177e4 1959
19e42e45
RP
1960 if (cfg->fc_encap) {
1961 struct lwtunnel_state *lwtstate;
1962
30357d7d 1963 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 1964 cfg->fc_encap, AF_INET6, cfg,
9ae28727 1965 &lwtstate, extack);
19e42e45
RP
1966 if (err)
1967 goto out;
61adedf3
JB
1968 rt->dst.lwtstate = lwtstate_get(lwtstate);
1969 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1970 rt->dst.lwtstate->orig_output = rt->dst.output;
1971 rt->dst.output = lwtunnel_output;
25368623 1972 }
61adedf3
JB
1973 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1974 rt->dst.lwtstate->orig_input = rt->dst.input;
1975 rt->dst.input = lwtunnel_input;
25368623 1976 }
19e42e45
RP
1977 }
1978
86872cb5
TG
1979 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1980 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1981 if (rt->rt6i_dst.plen == 128)
e5fd387a 1982 rt->dst.flags |= DST_HOST;
e5fd387a 1983
1da177e4 1984#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1985 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1986 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1987#endif
1988
86872cb5 1989 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1990
1991 /* We cannot add true routes via loopback here,
1992 they would result in kernel looping; promote them to reject routes
1993 */
86872cb5 1994 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1995 (dev && (dev->flags & IFF_LOOPBACK) &&
1996 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1997 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1998 /* hold loopback dev/idev if we haven't done so. */
5578689a 1999 if (dev != net->loopback_dev) {
1da177e4
LT
2000 if (dev) {
2001 dev_put(dev);
2002 in6_dev_put(idev);
2003 }
5578689a 2004 dev = net->loopback_dev;
1da177e4
LT
2005 dev_hold(dev);
2006 idev = in6_dev_get(dev);
2007 if (!idev) {
2008 err = -ENODEV;
2009 goto out;
2010 }
2011 }
1da177e4 2012 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2013 switch (cfg->fc_type) {
2014 case RTN_BLACKHOLE:
2015 rt->dst.error = -EINVAL;
ede2059d 2016 rt->dst.output = dst_discard_out;
7150aede 2017 rt->dst.input = dst_discard;
ef2c7d7b
ND
2018 break;
2019 case RTN_PROHIBIT:
2020 rt->dst.error = -EACCES;
7150aede
K
2021 rt->dst.output = ip6_pkt_prohibit_out;
2022 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2023 break;
b4949ab2 2024 case RTN_THROW:
0315e382 2025 case RTN_UNREACHABLE:
ef2c7d7b 2026 default:
7150aede 2027 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2028 : (cfg->fc_type == RTN_UNREACHABLE)
2029 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2030 rt->dst.output = ip6_pkt_discard_out;
2031 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2032 break;
2033 }
1da177e4
LT
2034 goto install_route;
2035 }
2036
86872cb5 2037 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2038 const struct in6_addr *gw_addr;
1da177e4
LT
2039 int gwa_type;
2040
86872cb5 2041 gw_addr = &cfg->fc_gateway;
330567b7 2042 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2043
2044 /* if gw_addr is local we will fail to detect this in case
2045 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2046 * will return already-added prefix route via interface that
2047 * prefix route was assigned to, which might be non-loopback.
2048 */
2049 err = -EINVAL;
330567b7
FW
2050 if (ipv6_chk_addr_and_flags(net, gw_addr,
2051 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2052 dev : NULL, 0, 0)) {
2053 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2054 goto out;
d5d531cb 2055 }
4e3fd7a0 2056 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2057
2058 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2059 struct rt6_info *grt = NULL;
1da177e4
LT
2060
2061 /* IPv6 strictly inhibits using not link-local
2062 addresses as nexthop address.
2063 Otherwise, router will not able to send redirects.
2064 It is very good, but in some (rare!) circumstances
2065 (SIT, PtP, NBMA NOARP links) it is handy to allow
2066 some exceptions. --ANK
96d5822c
EN
2067 We allow IPv4-mapped nexthops to support RFC4798-type
2068 addressing
1da177e4 2069 */
96d5822c 2070 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2071 IPV6_ADDR_MAPPED))) {
2072 NL_SET_ERR_MSG(extack,
2073 "Invalid gateway address");
1da177e4 2074 goto out;
d5d531cb 2075 }
1da177e4 2076
a435a07f 2077 if (cfg->fc_table) {
8c14586f
DA
2078 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2079
a435a07f
VB
2080 if (grt) {
2081 if (grt->rt6i_flags & RTF_GATEWAY ||
2082 (dev && dev != grt->dst.dev)) {
2083 ip6_rt_put(grt);
2084 grt = NULL;
2085 }
2086 }
2087 }
2088
8c14586f
DA
2089 if (!grt)
2090 grt = rt6_lookup(net, gw_addr, NULL,
2091 cfg->fc_ifindex, 1);
1da177e4
LT
2092
2093 err = -EHOSTUNREACH;
38308473 2094 if (!grt)
1da177e4
LT
2095 goto out;
2096 if (dev) {
d1918542 2097 if (dev != grt->dst.dev) {
94e187c0 2098 ip6_rt_put(grt);
1da177e4
LT
2099 goto out;
2100 }
2101 } else {
d1918542 2102 dev = grt->dst.dev;
1da177e4
LT
2103 idev = grt->rt6i_idev;
2104 dev_hold(dev);
2105 in6_dev_hold(grt->rt6i_idev);
2106 }
38308473 2107 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2108 err = 0;
94e187c0 2109 ip6_rt_put(grt);
1da177e4
LT
2110
2111 if (err)
2112 goto out;
2113 }
2114 err = -EINVAL;
d5d531cb
DA
2115 if (!dev) {
2116 NL_SET_ERR_MSG(extack, "Egress device not specified");
2117 goto out;
2118 } else if (dev->flags & IFF_LOOPBACK) {
2119 NL_SET_ERR_MSG(extack,
2120 "Egress device can not be loopback device for this route");
1da177e4 2121 goto out;
d5d531cb 2122 }
1da177e4
LT
2123 }
2124
2125 err = -ENODEV;
38308473 2126 if (!dev)
1da177e4
LT
2127 goto out;
2128
c3968a85
DW
2129 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2130 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2131 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2132 err = -EINVAL;
2133 goto out;
2134 }
4e3fd7a0 2135 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2136 rt->rt6i_prefsrc.plen = 128;
2137 } else
2138 rt->rt6i_prefsrc.plen = 0;
2139
86872cb5 2140 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2141
2142install_route:
d8d1f30b 2143 rt->dst.dev = dev;
1da177e4 2144 rt->rt6i_idev = idev;
c71099ac 2145 rt->rt6i_table = table;
63152fc0 2146
c346dca1 2147 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2148
8c5b83f0 2149 return rt;
6b9ea5a6
RP
2150out:
2151 if (dev)
2152 dev_put(dev);
2153 if (idev)
2154 in6_dev_put(idev);
587fea74
WW
2155 if (rt)
2156 dst_release_immediate(&rt->dst);
6b9ea5a6 2157
8c5b83f0 2158 return ERR_PTR(err);
6b9ea5a6
RP
2159}
2160
333c4301
DA
2161int ip6_route_add(struct fib6_config *cfg,
2162 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2163{
2164 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2165 struct rt6_info *rt;
6b9ea5a6
RP
2166 int err;
2167
333c4301 2168 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2169 if (IS_ERR(rt)) {
2170 err = PTR_ERR(rt);
2171 rt = NULL;
6b9ea5a6 2172 goto out;
8c5b83f0 2173 }
6b9ea5a6 2174
e715b6d3
FW
2175 err = ip6_convert_metrics(&mxc, cfg);
2176 if (err)
2177 goto out;
1da177e4 2178
333c4301 2179 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2180
2181 kfree(mxc.mx);
6b9ea5a6 2182
e715b6d3 2183 return err;
1da177e4 2184out:
587fea74
WW
2185 if (rt)
2186 dst_release_immediate(&rt->dst);
6b9ea5a6 2187
1da177e4
LT
2188 return err;
2189}
2190
86872cb5 2191static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2192{
2193 int err;
c71099ac 2194 struct fib6_table *table;
d1918542 2195 struct net *net = dev_net(rt->dst.dev);
1da177e4 2196
a4c2fd7f 2197 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2198 err = -ENOENT;
2199 goto out;
2200 }
6c813a72 2201
c71099ac
TG
2202 table = rt->rt6i_table;
2203 write_lock_bh(&table->tb6_lock);
86872cb5 2204 err = fib6_del(rt, info);
c71099ac 2205 write_unlock_bh(&table->tb6_lock);
1da177e4 2206
6825a26c 2207out:
94e187c0 2208 ip6_rt_put(rt);
1da177e4
LT
2209 return err;
2210}
2211
e0a1ad73
TG
2212int ip6_del_rt(struct rt6_info *rt)
2213{
4d1169c1 2214 struct nl_info info = {
d1918542 2215 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2216 };
528c4ceb 2217 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2218}
2219
0ae81335
DA
2220static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2221{
2222 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2223 struct net *net = info->nl_net;
16a16cd3 2224 struct sk_buff *skb = NULL;
0ae81335 2225 struct fib6_table *table;
e3330039 2226 int err = -ENOENT;
0ae81335 2227
e3330039
WC
2228 if (rt == net->ipv6.ip6_null_entry)
2229 goto out_put;
0ae81335
DA
2230 table = rt->rt6i_table;
2231 write_lock_bh(&table->tb6_lock);
2232
2233 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2234 struct rt6_info *sibling, *next_sibling;
2235
16a16cd3
DA
2236 /* prefer to send a single notification with all hops */
2237 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2238 if (skb) {
2239 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2240
e3330039 2241 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2242 NULL, NULL, 0, RTM_DELROUTE,
2243 info->portid, seq, 0) < 0) {
2244 kfree_skb(skb);
2245 skb = NULL;
2246 } else
2247 info->skip_notify = 1;
2248 }
2249
0ae81335
DA
2250 list_for_each_entry_safe(sibling, next_sibling,
2251 &rt->rt6i_siblings,
2252 rt6i_siblings) {
2253 err = fib6_del(sibling, info);
2254 if (err)
e3330039 2255 goto out_unlock;
0ae81335
DA
2256 }
2257 }
2258
2259 err = fib6_del(rt, info);
e3330039 2260out_unlock:
0ae81335 2261 write_unlock_bh(&table->tb6_lock);
e3330039 2262out_put:
0ae81335 2263 ip6_rt_put(rt);
16a16cd3
DA
2264
2265 if (skb) {
e3330039 2266 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2267 info->nlh, gfp_any());
2268 }
0ae81335
DA
2269 return err;
2270}
2271
333c4301
DA
2272static int ip6_route_del(struct fib6_config *cfg,
2273 struct netlink_ext_ack *extack)
1da177e4 2274{
c71099ac 2275 struct fib6_table *table;
1da177e4
LT
2276 struct fib6_node *fn;
2277 struct rt6_info *rt;
2278 int err = -ESRCH;
2279
5578689a 2280 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2281 if (!table) {
2282 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2283 return err;
d5d531cb 2284 }
c71099ac
TG
2285
2286 read_lock_bh(&table->tb6_lock);
1da177e4 2287
c71099ac 2288 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2289 &cfg->fc_dst, cfg->fc_dst_len,
2290 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2291
1da177e4 2292 if (fn) {
d8d1f30b 2293 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2294 if ((rt->rt6i_flags & RTF_CACHE) &&
2295 !(cfg->fc_flags & RTF_CACHE))
2296 continue;
86872cb5 2297 if (cfg->fc_ifindex &&
d1918542
DM
2298 (!rt->dst.dev ||
2299 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2300 continue;
86872cb5
TG
2301 if (cfg->fc_flags & RTF_GATEWAY &&
2302 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2303 continue;
86872cb5 2304 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2305 continue;
c2ed1880
M
2306 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2307 continue;
d8d1f30b 2308 dst_hold(&rt->dst);
c71099ac 2309 read_unlock_bh(&table->tb6_lock);
1da177e4 2310
0ae81335
DA
2311 /* if gateway was specified only delete the one hop */
2312 if (cfg->fc_flags & RTF_GATEWAY)
2313 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2314
2315 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2316 }
2317 }
c71099ac 2318 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2319
2320 return err;
2321}
2322
6700c270 2323static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2324{
a6279458 2325 struct netevent_redirect netevent;
e8599ff4 2326 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2327 struct ndisc_options ndopts;
2328 struct inet6_dev *in6_dev;
2329 struct neighbour *neigh;
71bcdba0 2330 struct rd_msg *msg;
6e157b6a
DM
2331 int optlen, on_link;
2332 u8 *lladdr;
e8599ff4 2333
29a3cad5 2334 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2335 optlen -= sizeof(*msg);
e8599ff4
DM
2336
2337 if (optlen < 0) {
6e157b6a 2338 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2339 return;
2340 }
2341
71bcdba0 2342 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2343
71bcdba0 2344 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2345 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2346 return;
2347 }
2348
6e157b6a 2349 on_link = 0;
71bcdba0 2350 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2351 on_link = 1;
71bcdba0 2352 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2353 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2354 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2355 return;
2356 }
2357
2358 in6_dev = __in6_dev_get(skb->dev);
2359 if (!in6_dev)
2360 return;
2361 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2362 return;
2363
2364 /* RFC2461 8.1:
2365 * The IP source address of the Redirect MUST be the same as the current
2366 * first-hop router for the specified ICMP Destination Address.
2367 */
2368
f997c55c 2369 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2370 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2371 return;
2372 }
6e157b6a
DM
2373
2374 lladdr = NULL;
e8599ff4
DM
2375 if (ndopts.nd_opts_tgt_lladdr) {
2376 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2377 skb->dev);
2378 if (!lladdr) {
2379 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2380 return;
2381 }
2382 }
2383
6e157b6a 2384 rt = (struct rt6_info *) dst;
ec13ad1d 2385 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2386 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2387 return;
6e157b6a 2388 }
e8599ff4 2389
6e157b6a
DM
2390 /* Redirect received -> path was valid.
2391 * Look, redirects are sent only in response to data packets,
2392 * so that this nexthop apparently is reachable. --ANK
2393 */
0dec879f 2394 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2395
71bcdba0 2396 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2397 if (!neigh)
2398 return;
a6279458 2399
1da177e4
LT
2400 /*
2401 * We have finally decided to accept it.
2402 */
2403
f997c55c 2404 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2405 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2406 NEIGH_UPDATE_F_OVERRIDE|
2407 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2408 NEIGH_UPDATE_F_ISROUTER)),
2409 NDISC_REDIRECT, &ndopts);
1da177e4 2410
83a09abd 2411 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2412 if (!nrt)
1da177e4
LT
2413 goto out;
2414
2415 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2416 if (on_link)
2417 nrt->rt6i_flags &= ~RTF_GATEWAY;
2418
b91d5329 2419 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 2420 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2421
40e22e8f 2422 if (ip6_ins_rt(nrt))
1cfb71ee 2423 goto out_release;
1da177e4 2424
d8d1f30b
CG
2425 netevent.old = &rt->dst;
2426 netevent.new = &nrt->dst;
71bcdba0 2427 netevent.daddr = &msg->dest;
60592833 2428 netevent.neigh = neigh;
8d71740c
TT
2429 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2430
38308473 2431 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2432 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2433 ip6_del_rt(rt);
1da177e4
LT
2434 }
2435
1cfb71ee
WW
2436out_release:
2437 /* Release the reference taken in
2438 * ip6_rt_cache_alloc()
2439 */
2440 dst_release(&nrt->dst);
2441
1da177e4 2442out:
e8599ff4 2443 neigh_release(neigh);
6e157b6a
DM
2444}
2445
1da177e4
LT
2446/*
2447 * Misc support functions
2448 */
2449
4b32b5ad
MKL
2450static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2451{
2452 BUG_ON(from->dst.from);
2453
2454 rt->rt6i_flags &= ~RTF_EXPIRES;
2455 dst_hold(&from->dst);
2456 rt->dst.from = &from->dst;
2457 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2458}
2459
83a09abd
MKL
2460static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2461{
2462 rt->dst.input = ort->dst.input;
2463 rt->dst.output = ort->dst.output;
2464 rt->rt6i_dst = ort->rt6i_dst;
2465 rt->dst.error = ort->dst.error;
2466 rt->rt6i_idev = ort->rt6i_idev;
2467 if (rt->rt6i_idev)
2468 in6_dev_hold(rt->rt6i_idev);
2469 rt->dst.lastuse = jiffies;
2470 rt->rt6i_gateway = ort->rt6i_gateway;
2471 rt->rt6i_flags = ort->rt6i_flags;
2472 rt6_set_from(rt, ort);
2473 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2474#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2475 rt->rt6i_src = ort->rt6i_src;
1da177e4 2476#endif
83a09abd
MKL
2477 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2478 rt->rt6i_table = ort->rt6i_table;
61adedf3 2479 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2480}
2481
70ceb4f5 2482#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2483static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2484 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2485 const struct in6_addr *gwaddr,
2486 struct net_device *dev)
70ceb4f5 2487{
830218c1
DA
2488 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2489 int ifindex = dev->ifindex;
70ceb4f5
YH
2490 struct fib6_node *fn;
2491 struct rt6_info *rt = NULL;
c71099ac
TG
2492 struct fib6_table *table;
2493
830218c1 2494 table = fib6_get_table(net, tb_id);
38308473 2495 if (!table)
c71099ac 2496 return NULL;
70ceb4f5 2497
5744dd9b 2498 read_lock_bh(&table->tb6_lock);
67ba4152 2499 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2500 if (!fn)
2501 goto out;
2502
d8d1f30b 2503 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2504 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2505 continue;
2506 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2507 continue;
2508 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2509 continue;
d8d1f30b 2510 dst_hold(&rt->dst);
70ceb4f5
YH
2511 break;
2512 }
2513out:
5744dd9b 2514 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2515 return rt;
2516}
2517
efa2cea0 2518static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2519 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2520 const struct in6_addr *gwaddr,
2521 struct net_device *dev,
95c96174 2522 unsigned int pref)
70ceb4f5 2523{
86872cb5 2524 struct fib6_config cfg = {
238fc7ea 2525 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2526 .fc_ifindex = dev->ifindex,
86872cb5
TG
2527 .fc_dst_len = prefixlen,
2528 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2529 RTF_UP | RTF_PREF(pref),
b91d5329 2530 .fc_protocol = RTPROT_RA,
15e47304 2531 .fc_nlinfo.portid = 0,
efa2cea0
DL
2532 .fc_nlinfo.nlh = NULL,
2533 .fc_nlinfo.nl_net = net,
86872cb5
TG
2534 };
2535
830218c1 2536 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2537 cfg.fc_dst = *prefix;
2538 cfg.fc_gateway = *gwaddr;
70ceb4f5 2539
e317da96
YH
2540 /* We should treat it as a default route if prefix length is 0. */
2541 if (!prefixlen)
86872cb5 2542 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2543
333c4301 2544 ip6_route_add(&cfg, NULL);
70ceb4f5 2545
830218c1 2546 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2547}
2548#endif
2549
b71d1d42 2550struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2551{
830218c1 2552 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 2553 struct rt6_info *rt;
c71099ac 2554 struct fib6_table *table;
1da177e4 2555
830218c1 2556 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2557 if (!table)
c71099ac 2558 return NULL;
1da177e4 2559
5744dd9b 2560 read_lock_bh(&table->tb6_lock);
67ba4152 2561 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2562 if (dev == rt->dst.dev &&
045927ff 2563 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2564 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2565 break;
2566 }
2567 if (rt)
d8d1f30b 2568 dst_hold(&rt->dst);
5744dd9b 2569 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2570 return rt;
2571}
2572
b71d1d42 2573struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2574 struct net_device *dev,
2575 unsigned int pref)
1da177e4 2576{
86872cb5 2577 struct fib6_config cfg = {
ca254490 2578 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2579 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2580 .fc_ifindex = dev->ifindex,
2581 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2582 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 2583 .fc_protocol = RTPROT_RA,
15e47304 2584 .fc_nlinfo.portid = 0,
5578689a 2585 .fc_nlinfo.nlh = NULL,
c346dca1 2586 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2587 };
1da177e4 2588
4e3fd7a0 2589 cfg.fc_gateway = *gwaddr;
1da177e4 2590
333c4301 2591 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
2592 struct fib6_table *table;
2593
2594 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2595 if (table)
2596 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2597 }
1da177e4 2598
1da177e4
LT
2599 return rt6_get_dflt_router(gwaddr, dev);
2600}
2601
830218c1 2602static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
2603{
2604 struct rt6_info *rt;
2605
2606restart:
c71099ac 2607 read_lock_bh(&table->tb6_lock);
d8d1f30b 2608 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2609 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2610 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2611 dst_hold(&rt->dst);
c71099ac 2612 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2613 ip6_del_rt(rt);
1da177e4
LT
2614 goto restart;
2615 }
2616 }
c71099ac 2617 read_unlock_bh(&table->tb6_lock);
830218c1
DA
2618
2619 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2620}
2621
2622void rt6_purge_dflt_routers(struct net *net)
2623{
2624 struct fib6_table *table;
2625 struct hlist_head *head;
2626 unsigned int h;
2627
2628 rcu_read_lock();
2629
2630 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2631 head = &net->ipv6.fib_table_hash[h];
2632 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2633 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2634 __rt6_purge_dflt_routers(table);
2635 }
2636 }
2637
2638 rcu_read_unlock();
1da177e4
LT
2639}
2640
5578689a
DL
2641static void rtmsg_to_fib6_config(struct net *net,
2642 struct in6_rtmsg *rtmsg,
86872cb5
TG
2643 struct fib6_config *cfg)
2644{
2645 memset(cfg, 0, sizeof(*cfg));
2646
ca254490
DA
2647 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2648 : RT6_TABLE_MAIN;
86872cb5
TG
2649 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2650 cfg->fc_metric = rtmsg->rtmsg_metric;
2651 cfg->fc_expires = rtmsg->rtmsg_info;
2652 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2653 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2654 cfg->fc_flags = rtmsg->rtmsg_flags;
2655
5578689a 2656 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2657
4e3fd7a0
AD
2658 cfg->fc_dst = rtmsg->rtmsg_dst;
2659 cfg->fc_src = rtmsg->rtmsg_src;
2660 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2661}
2662
5578689a 2663int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2664{
86872cb5 2665 struct fib6_config cfg;
1da177e4
LT
2666 struct in6_rtmsg rtmsg;
2667 int err;
2668
67ba4152 2669 switch (cmd) {
1da177e4
LT
2670 case SIOCADDRT: /* Add a route */
2671 case SIOCDELRT: /* Delete a route */
af31f412 2672 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2673 return -EPERM;
2674 err = copy_from_user(&rtmsg, arg,
2675 sizeof(struct in6_rtmsg));
2676 if (err)
2677 return -EFAULT;
86872cb5 2678
5578689a 2679 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2680
1da177e4
LT
2681 rtnl_lock();
2682 switch (cmd) {
2683 case SIOCADDRT:
333c4301 2684 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
2685 break;
2686 case SIOCDELRT:
333c4301 2687 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
2688 break;
2689 default:
2690 err = -EINVAL;
2691 }
2692 rtnl_unlock();
2693
2694 return err;
3ff50b79 2695 }
1da177e4
LT
2696
2697 return -EINVAL;
2698}
2699
2700/*
2701 * Drop the packet on the floor
2702 */
2703
d5fdd6ba 2704static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2705{
612f09e8 2706 int type;
adf30907 2707 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2708 switch (ipstats_mib_noroutes) {
2709 case IPSTATS_MIB_INNOROUTES:
0660e03f 2710 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2711 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2712 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2713 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2714 break;
2715 }
2716 /* FALLTHROUGH */
2717 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2718 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2719 ipstats_mib_noroutes);
612f09e8
YH
2720 break;
2721 }
3ffe533c 2722 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2723 kfree_skb(skb);
2724 return 0;
2725}
2726
9ce8ade0
TG
2727static int ip6_pkt_discard(struct sk_buff *skb)
2728{
612f09e8 2729 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2730}
2731
ede2059d 2732static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2733{
adf30907 2734 skb->dev = skb_dst(skb)->dev;
612f09e8 2735 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2736}
2737
9ce8ade0
TG
2738static int ip6_pkt_prohibit(struct sk_buff *skb)
2739{
612f09e8 2740 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2741}
2742
ede2059d 2743static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2744{
adf30907 2745 skb->dev = skb_dst(skb)->dev;
612f09e8 2746 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2747}
2748
1da177e4
LT
2749/*
2750 * Allocate a dst for local (unicast / anycast) address.
2751 */
2752
2753struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2754 const struct in6_addr *addr,
8f031519 2755 bool anycast)
1da177e4 2756{
ca254490 2757 u32 tb_id;
c346dca1 2758 struct net *net = dev_net(idev->dev);
4832c30d 2759 struct net_device *dev = idev->dev;
5f02ce24
DA
2760 struct rt6_info *rt;
2761
5f02ce24 2762 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 2763 if (!rt)
1da177e4
LT
2764 return ERR_PTR(-ENOMEM);
2765
1da177e4
LT
2766 in6_dev_hold(idev);
2767
11d53b49 2768 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2769 rt->dst.input = ip6_input;
2770 rt->dst.output = ip6_output;
1da177e4 2771 rt->rt6i_idev = idev;
1da177e4 2772
94b5e0f9 2773 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 2774 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2775 if (anycast)
2776 rt->rt6i_flags |= RTF_ANYCAST;
2777 else
1da177e4 2778 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2779
550bab42 2780 rt->rt6i_gateway = *addr;
4e3fd7a0 2781 rt->rt6i_dst.addr = *addr;
1da177e4 2782 rt->rt6i_dst.plen = 128;
ca254490
DA
2783 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2784 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 2785
1da177e4
LT
2786 return rt;
2787}
2788
c3968a85
DW
2789/* remove deleted ip from prefsrc entries */
2790struct arg_dev_net_ip {
2791 struct net_device *dev;
2792 struct net *net;
2793 struct in6_addr *addr;
2794};
2795
2796static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2797{
2798 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2799 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2800 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2801
d1918542 2802 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2803 rt != net->ipv6.ip6_null_entry &&
2804 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2805 /* remove prefsrc entry */
2806 rt->rt6i_prefsrc.plen = 0;
2807 }
2808 return 0;
2809}
2810
2811void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2812{
2813 struct net *net = dev_net(ifp->idev->dev);
2814 struct arg_dev_net_ip adni = {
2815 .dev = ifp->idev->dev,
2816 .net = net,
2817 .addr = &ifp->addr,
2818 };
0c3584d5 2819 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2820}
2821
be7a010d
DJ
2822#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2823#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2824
2825/* Remove routers and update dst entries when gateway turn into host. */
2826static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2827{
2828 struct in6_addr *gateway = (struct in6_addr *)arg;
2829
2830 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2831 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2832 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2833 return -1;
2834 }
2835 return 0;
2836}
2837
2838void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2839{
2840 fib6_clean_all(net, fib6_clean_tohost, gateway);
2841}
2842
8ed67789
DL
2843struct arg_dev_net {
2844 struct net_device *dev;
2845 struct net *net;
2846};
2847
a1a22c12 2848/* called with write lock held for table with rt */
1da177e4
LT
2849static int fib6_ifdown(struct rt6_info *rt, void *arg)
2850{
bc3ef660 2851 const struct arg_dev_net *adn = arg;
2852 const struct net_device *dev = adn->dev;
8ed67789 2853
d1918542 2854 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
2855 rt != adn->net->ipv6.ip6_null_entry &&
2856 (rt->rt6i_nsiblings == 0 ||
8397ed36 2857 (dev && netdev_unregistering(dev)) ||
a1a22c12 2858 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 2859 return -1;
c159d30c 2860
1da177e4
LT
2861 return 0;
2862}
2863
f3db4851 2864void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2865{
8ed67789
DL
2866 struct arg_dev_net adn = {
2867 .dev = dev,
2868 .net = net,
2869 };
2870
0c3584d5 2871 fib6_clean_all(net, fib6_ifdown, &adn);
e332bc67
EB
2872 if (dev)
2873 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2874}
2875
95c96174 2876struct rt6_mtu_change_arg {
1da177e4 2877 struct net_device *dev;
95c96174 2878 unsigned int mtu;
1da177e4
LT
2879};
2880
2881static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2882{
2883 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2884 struct inet6_dev *idev;
2885
2886 /* In IPv6 pmtu discovery is not optional,
2887 so that RTAX_MTU lock cannot disable it.
2888 We still use this lock to block changes
2889 caused by addrconf/ndisc.
2890 */
2891
2892 idev = __in6_dev_get(arg->dev);
38308473 2893 if (!idev)
1da177e4
LT
2894 return 0;
2895
2896 /* For administrative MTU increase, there is no way to discover
2897 IPv6 PMTU increase, so PMTU increase should be updated here.
2898 Since RFC 1981 doesn't include administrative MTU increase
2899 update PMTU increase is a MUST. (i.e. jumbo frame)
2900 */
2901 /*
2902 If new MTU is less than route PMTU, this new MTU will be the
2903 lowest MTU in the path, update the route PMTU to reflect PMTU
2904 decreases; if new MTU is greater than route PMTU, and the
2905 old MTU is the lowest MTU in the path, update the route PMTU
2906 to reflect the increase. In this case if the other nodes' MTU
2907 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 2908 PMTU discovery.
1da177e4 2909 */
d1918542 2910 if (rt->dst.dev == arg->dev &&
fb56be83 2911 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad
MKL
2912 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2913 if (rt->rt6i_flags & RTF_CACHE) {
2914 /* For RTF_CACHE with rt6i_pmtu == 0
2915 * (i.e. a redirected route),
2916 * the metrics of its rt->dst.from has already
2917 * been updated.
2918 */
2919 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2920 rt->rt6i_pmtu = arg->mtu;
2921 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2922 (dst_mtu(&rt->dst) < arg->mtu &&
2923 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2924 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2925 }
566cfd8f 2926 }
1da177e4
LT
2927 return 0;
2928}
2929
95c96174 2930void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2931{
c71099ac
TG
2932 struct rt6_mtu_change_arg arg = {
2933 .dev = dev,
2934 .mtu = mtu,
2935 };
1da177e4 2936
0c3584d5 2937 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2938}
2939
ef7c79ed 2940static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2941 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2942 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2943 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2944 [RTA_PRIORITY] = { .type = NLA_U32 },
2945 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2946 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2947 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2948 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2949 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2950 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 2951 [RTA_UID] = { .type = NLA_U32 },
3b45a410 2952 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
2953};
2954
2955static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
2956 struct fib6_config *cfg,
2957 struct netlink_ext_ack *extack)
1da177e4 2958{
86872cb5
TG
2959 struct rtmsg *rtm;
2960 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2961 unsigned int pref;
86872cb5 2962 int err;
1da177e4 2963
fceb6435
JB
2964 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2965 NULL);
86872cb5
TG
2966 if (err < 0)
2967 goto errout;
1da177e4 2968
86872cb5
TG
2969 err = -EINVAL;
2970 rtm = nlmsg_data(nlh);
2971 memset(cfg, 0, sizeof(*cfg));
2972
2973 cfg->fc_table = rtm->rtm_table;
2974 cfg->fc_dst_len = rtm->rtm_dst_len;
2975 cfg->fc_src_len = rtm->rtm_src_len;
2976 cfg->fc_flags = RTF_UP;
2977 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2978 cfg->fc_type = rtm->rtm_type;
86872cb5 2979
ef2c7d7b
ND
2980 if (rtm->rtm_type == RTN_UNREACHABLE ||
2981 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2982 rtm->rtm_type == RTN_PROHIBIT ||
2983 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2984 cfg->fc_flags |= RTF_REJECT;
2985
ab79ad14
2986 if (rtm->rtm_type == RTN_LOCAL)
2987 cfg->fc_flags |= RTF_LOCAL;
2988
1f56a01f
MKL
2989 if (rtm->rtm_flags & RTM_F_CLONED)
2990 cfg->fc_flags |= RTF_CACHE;
2991
15e47304 2992 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2993 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2994 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2995
2996 if (tb[RTA_GATEWAY]) {
67b61f6c 2997 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2998 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2999 }
86872cb5
TG
3000
3001 if (tb[RTA_DST]) {
3002 int plen = (rtm->rtm_dst_len + 7) >> 3;
3003
3004 if (nla_len(tb[RTA_DST]) < plen)
3005 goto errout;
3006
3007 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3008 }
86872cb5
TG
3009
3010 if (tb[RTA_SRC]) {
3011 int plen = (rtm->rtm_src_len + 7) >> 3;
3012
3013 if (nla_len(tb[RTA_SRC]) < plen)
3014 goto errout;
3015
3016 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3017 }
86872cb5 3018
c3968a85 3019 if (tb[RTA_PREFSRC])
67b61f6c 3020 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3021
86872cb5
TG
3022 if (tb[RTA_OIF])
3023 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3024
3025 if (tb[RTA_PRIORITY])
3026 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3027
3028 if (tb[RTA_METRICS]) {
3029 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3030 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3031 }
86872cb5
TG
3032
3033 if (tb[RTA_TABLE])
3034 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3035
51ebd318
ND
3036 if (tb[RTA_MULTIPATH]) {
3037 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3038 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3039
3040 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3041 cfg->fc_mp_len, extack);
9ed59592
DA
3042 if (err < 0)
3043 goto errout;
51ebd318
ND
3044 }
3045
c78ba6d6
LR
3046 if (tb[RTA_PREF]) {
3047 pref = nla_get_u8(tb[RTA_PREF]);
3048 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3049 pref != ICMPV6_ROUTER_PREF_HIGH)
3050 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3051 cfg->fc_flags |= RTF_PREF(pref);
3052 }
3053
19e42e45
RP
3054 if (tb[RTA_ENCAP])
3055 cfg->fc_encap = tb[RTA_ENCAP];
3056
9ed59592 3057 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3058 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3059
c255bd68 3060 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3061 if (err < 0)
3062 goto errout;
3063 }
3064
32bc201e
XL
3065 if (tb[RTA_EXPIRES]) {
3066 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3067
3068 if (addrconf_finite_timeout(timeout)) {
3069 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3070 cfg->fc_flags |= RTF_EXPIRES;
3071 }
3072 }
3073
86872cb5
TG
3074 err = 0;
3075errout:
3076 return err;
1da177e4
LT
3077}
3078
6b9ea5a6
RP
3079struct rt6_nh {
3080 struct rt6_info *rt6_info;
3081 struct fib6_config r_cfg;
3082 struct mx6_config mxc;
3083 struct list_head next;
3084};
3085
3086static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3087{
3088 struct rt6_nh *nh;
3089
3090 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3091 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3092 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3093 nh->r_cfg.fc_ifindex);
3094 }
3095}
3096
3097static int ip6_route_info_append(struct list_head *rt6_nh_list,
3098 struct rt6_info *rt, struct fib6_config *r_cfg)
3099{
3100 struct rt6_nh *nh;
6b9ea5a6
RP
3101 int err = -EEXIST;
3102
3103 list_for_each_entry(nh, rt6_nh_list, next) {
3104 /* check if rt6_info already exists */
f06b7549 3105 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
3106 return err;
3107 }
3108
3109 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3110 if (!nh)
3111 return -ENOMEM;
3112 nh->rt6_info = rt;
3113 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3114 if (err) {
3115 kfree(nh);
3116 return err;
3117 }
3118 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3119 list_add_tail(&nh->next, rt6_nh_list);
3120
3121 return 0;
3122}
3123
3b1137fe
DA
3124static void ip6_route_mpath_notify(struct rt6_info *rt,
3125 struct rt6_info *rt_last,
3126 struct nl_info *info,
3127 __u16 nlflags)
3128{
3129 /* if this is an APPEND route, then rt points to the first route
3130 * inserted and rt_last points to last route inserted. Userspace
3131 * wants a consistent dump of the route which starts at the first
3132 * nexthop. Since sibling routes are always added at the end of
3133 * the list, find the first sibling of the last route appended
3134 */
3135 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3136 rt = list_first_entry(&rt_last->rt6i_siblings,
3137 struct rt6_info,
3138 rt6i_siblings);
3139 }
3140
3141 if (rt)
3142 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3143}
3144
333c4301
DA
3145static int ip6_route_multipath_add(struct fib6_config *cfg,
3146 struct netlink_ext_ack *extack)
51ebd318 3147{
3b1137fe
DA
3148 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3149 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3150 struct fib6_config r_cfg;
3151 struct rtnexthop *rtnh;
6b9ea5a6
RP
3152 struct rt6_info *rt;
3153 struct rt6_nh *err_nh;
3154 struct rt6_nh *nh, *nh_safe;
3b1137fe 3155 __u16 nlflags;
51ebd318
ND
3156 int remaining;
3157 int attrlen;
6b9ea5a6
RP
3158 int err = 1;
3159 int nhn = 0;
3160 int replace = (cfg->fc_nlinfo.nlh &&
3161 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3162 LIST_HEAD(rt6_nh_list);
51ebd318 3163
3b1137fe
DA
3164 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3165 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3166 nlflags |= NLM_F_APPEND;
3167
35f1b4e9 3168 remaining = cfg->fc_mp_len;
51ebd318 3169 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3170
6b9ea5a6
RP
3171 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3172 * rt6_info structs per nexthop
3173 */
51ebd318
ND
3174 while (rtnh_ok(rtnh, remaining)) {
3175 memcpy(&r_cfg, cfg, sizeof(*cfg));
3176 if (rtnh->rtnh_ifindex)
3177 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3178
3179 attrlen = rtnh_attrlen(rtnh);
3180 if (attrlen > 0) {
3181 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3182
3183 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3184 if (nla) {
67b61f6c 3185 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3186 r_cfg.fc_flags |= RTF_GATEWAY;
3187 }
19e42e45
RP
3188 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3189 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3190 if (nla)
3191 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3192 }
6b9ea5a6 3193
333c4301 3194 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3195 if (IS_ERR(rt)) {
3196 err = PTR_ERR(rt);
3197 rt = NULL;
6b9ea5a6 3198 goto cleanup;
8c5b83f0 3199 }
6b9ea5a6
RP
3200
3201 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3202 if (err) {
587fea74 3203 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
3204 goto cleanup;
3205 }
3206
3207 rtnh = rtnh_next(rtnh, &remaining);
3208 }
3209
3b1137fe
DA
3210 /* for add and replace send one notification with all nexthops.
3211 * Skip the notification in fib6_add_rt2node and send one with
3212 * the full route when done
3213 */
3214 info->skip_notify = 1;
3215
6b9ea5a6
RP
3216 err_nh = NULL;
3217 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 3218 rt_last = nh->rt6_info;
333c4301 3219 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
3220 /* save reference to first route for notification */
3221 if (!rt_notif && !err)
3222 rt_notif = nh->rt6_info;
3223
6b9ea5a6
RP
3224 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3225 nh->rt6_info = NULL;
3226 if (err) {
3227 if (replace && nhn)
3228 ip6_print_replace_route_err(&rt6_nh_list);
3229 err_nh = nh;
3230 goto add_errout;
51ebd318 3231 }
6b9ea5a6 3232
1a72418b 3233 /* Because each route is added like a single route we remove
27596472
MK
3234 * these flags after the first nexthop: if there is a collision,
3235 * we have already failed to add the first nexthop:
3236 * fib6_add_rt2node() has rejected it; when replacing, old
3237 * nexthops have been replaced by first new, the rest should
3238 * be added to it.
1a72418b 3239 */
27596472
MK
3240 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3241 NLM_F_REPLACE);
6b9ea5a6
RP
3242 nhn++;
3243 }
3244
3b1137fe
DA
3245 /* success ... tell user about new route */
3246 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3247 goto cleanup;
3248
3249add_errout:
3b1137fe
DA
3250 /* send notification for routes that were added so that
3251 * the delete notifications sent by ip6_route_del are
3252 * coherent
3253 */
3254 if (rt_notif)
3255 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3256
6b9ea5a6
RP
3257 /* Delete routes that were already added */
3258 list_for_each_entry(nh, &rt6_nh_list, next) {
3259 if (err_nh == nh)
3260 break;
333c4301 3261 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3262 }
3263
3264cleanup:
3265 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
3266 if (nh->rt6_info)
3267 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 3268 kfree(nh->mxc.mx);
6b9ea5a6
RP
3269 list_del(&nh->next);
3270 kfree(nh);
3271 }
3272
3273 return err;
3274}
3275
333c4301
DA
3276static int ip6_route_multipath_del(struct fib6_config *cfg,
3277 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3278{
3279 struct fib6_config r_cfg;
3280 struct rtnexthop *rtnh;
3281 int remaining;
3282 int attrlen;
3283 int err = 1, last_err = 0;
3284
3285 remaining = cfg->fc_mp_len;
3286 rtnh = (struct rtnexthop *)cfg->fc_mp;
3287
3288 /* Parse a Multipath Entry */
3289 while (rtnh_ok(rtnh, remaining)) {
3290 memcpy(&r_cfg, cfg, sizeof(*cfg));
3291 if (rtnh->rtnh_ifindex)
3292 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3293
3294 attrlen = rtnh_attrlen(rtnh);
3295 if (attrlen > 0) {
3296 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3297
3298 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3299 if (nla) {
3300 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3301 r_cfg.fc_flags |= RTF_GATEWAY;
3302 }
3303 }
333c4301 3304 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3305 if (err)
3306 last_err = err;
3307
51ebd318
ND
3308 rtnh = rtnh_next(rtnh, &remaining);
3309 }
3310
3311 return last_err;
3312}
3313
c21ef3e3
DA
3314static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3315 struct netlink_ext_ack *extack)
1da177e4 3316{
86872cb5
TG
3317 struct fib6_config cfg;
3318 int err;
1da177e4 3319
333c4301 3320 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3321 if (err < 0)
3322 return err;
3323
51ebd318 3324 if (cfg.fc_mp)
333c4301 3325 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3326 else {
3327 cfg.fc_delete_all_nh = 1;
333c4301 3328 return ip6_route_del(&cfg, extack);
0ae81335 3329 }
1da177e4
LT
3330}
3331
c21ef3e3
DA
3332static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3333 struct netlink_ext_ack *extack)
1da177e4 3334{
86872cb5
TG
3335 struct fib6_config cfg;
3336 int err;
1da177e4 3337
333c4301 3338 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3339 if (err < 0)
3340 return err;
3341
51ebd318 3342 if (cfg.fc_mp)
333c4301 3343 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3344 else
333c4301 3345 return ip6_route_add(&cfg, extack);
1da177e4
LT
3346}
3347
beb1afac 3348static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3349{
beb1afac
DA
3350 int nexthop_len = 0;
3351
3352 if (rt->rt6i_nsiblings) {
3353 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3354 + NLA_ALIGN(sizeof(struct rtnexthop))
3355 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3356 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3357
3358 nexthop_len *= rt->rt6i_nsiblings;
3359 }
3360
339bf98f
TG
3361 return NLMSG_ALIGN(sizeof(struct rtmsg))
3362 + nla_total_size(16) /* RTA_SRC */
3363 + nla_total_size(16) /* RTA_DST */
3364 + nla_total_size(16) /* RTA_GATEWAY */
3365 + nla_total_size(16) /* RTA_PREFSRC */
3366 + nla_total_size(4) /* RTA_TABLE */
3367 + nla_total_size(4) /* RTA_IIF */
3368 + nla_total_size(4) /* RTA_OIF */
3369 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3370 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3371 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3372 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3373 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3374 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3375 + nexthop_len;
3376}
3377
3378static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3379 unsigned int *flags, bool skip_oif)
beb1afac
DA
3380{
3381 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3382 *flags |= RTNH_F_LINKDOWN;
3383 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3384 *flags |= RTNH_F_DEAD;
3385 }
3386
3387 if (rt->rt6i_flags & RTF_GATEWAY) {
3388 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3389 goto nla_put_failure;
3390 }
3391
fe400799 3392 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
3393 *flags |= RTNH_F_OFFLOAD;
3394
5be083ce
DA
3395 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3396 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3397 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3398 goto nla_put_failure;
3399
3400 if (rt->dst.lwtstate &&
3401 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3402 goto nla_put_failure;
3403
3404 return 0;
3405
3406nla_put_failure:
3407 return -EMSGSIZE;
3408}
3409
5be083ce 3410/* add multipath next hop */
beb1afac
DA
3411static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3412{
3413 struct rtnexthop *rtnh;
3414 unsigned int flags = 0;
3415
3416 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3417 if (!rtnh)
3418 goto nla_put_failure;
3419
3420 rtnh->rtnh_hops = 0;
3421 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3422
5be083ce 3423 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
3424 goto nla_put_failure;
3425
3426 rtnh->rtnh_flags = flags;
3427
3428 /* length of rtnetlink header + attributes */
3429 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3430
3431 return 0;
3432
3433nla_put_failure:
3434 return -EMSGSIZE;
339bf98f
TG
3435}
3436
191cd582
BH
3437static int rt6_fill_node(struct net *net,
3438 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3439 struct in6_addr *dst, struct in6_addr *src,
15e47304 3440 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 3441 unsigned int flags)
1da177e4 3442{
4b32b5ad 3443 u32 metrics[RTAX_MAX];
1da177e4 3444 struct rtmsg *rtm;
2d7202bf 3445 struct nlmsghdr *nlh;
e3703b3d 3446 long expires;
9e762a4a 3447 u32 table;
1da177e4 3448
15e47304 3449 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3450 if (!nlh)
26932566 3451 return -EMSGSIZE;
2d7202bf
TG
3452
3453 rtm = nlmsg_data(nlh);
1da177e4
LT
3454 rtm->rtm_family = AF_INET6;
3455 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3456 rtm->rtm_src_len = rt->rt6i_src.plen;
3457 rtm->rtm_tos = 0;
c71099ac 3458 if (rt->rt6i_table)
9e762a4a 3459 table = rt->rt6i_table->tb6_id;
c71099ac 3460 else
9e762a4a
PM
3461 table = RT6_TABLE_UNSPEC;
3462 rtm->rtm_table = table;
c78679e8
DM
3463 if (nla_put_u32(skb, RTA_TABLE, table))
3464 goto nla_put_failure;
ef2c7d7b
ND
3465 if (rt->rt6i_flags & RTF_REJECT) {
3466 switch (rt->dst.error) {
3467 case -EINVAL:
3468 rtm->rtm_type = RTN_BLACKHOLE;
3469 break;
3470 case -EACCES:
3471 rtm->rtm_type = RTN_PROHIBIT;
3472 break;
b4949ab2
ND
3473 case -EAGAIN:
3474 rtm->rtm_type = RTN_THROW;
3475 break;
ef2c7d7b
ND
3476 default:
3477 rtm->rtm_type = RTN_UNREACHABLE;
3478 break;
3479 }
3480 }
38308473 3481 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3482 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
3483 else if (rt->rt6i_flags & RTF_ANYCAST)
3484 rtm->rtm_type = RTN_ANYCAST;
d1918542 3485 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3486 rtm->rtm_type = RTN_LOCAL;
3487 else
3488 rtm->rtm_type = RTN_UNICAST;
3489 rtm->rtm_flags = 0;
3490 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3491 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 3492
38308473 3493 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3494 rtm->rtm_flags |= RTM_F_CLONED;
3495
3496 if (dst) {
930345ea 3497 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3498 goto nla_put_failure;
1ab1457c 3499 rtm->rtm_dst_len = 128;
1da177e4 3500 } else if (rtm->rtm_dst_len)
930345ea 3501 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3502 goto nla_put_failure;
1da177e4
LT
3503#ifdef CONFIG_IPV6_SUBTREES
3504 if (src) {
930345ea 3505 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3506 goto nla_put_failure;
1ab1457c 3507 rtm->rtm_src_len = 128;
c78679e8 3508 } else if (rtm->rtm_src_len &&
930345ea 3509 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3510 goto nla_put_failure;
1da177e4 3511#endif
7bc570c8
YH
3512 if (iif) {
3513#ifdef CONFIG_IPV6_MROUTE
3514 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
3515 int err = ip6mr_get_route(net, skb, rtm, portid);
3516
3517 if (err == 0)
3518 return 0;
3519 if (err < 0)
3520 goto nla_put_failure;
7bc570c8
YH
3521 } else
3522#endif
c78679e8
DM
3523 if (nla_put_u32(skb, RTA_IIF, iif))
3524 goto nla_put_failure;
7bc570c8 3525 } else if (dst) {
1da177e4 3526 struct in6_addr saddr_buf;
c78679e8 3527 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3528 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3529 goto nla_put_failure;
1da177e4 3530 }
2d7202bf 3531
c3968a85
DW
3532 if (rt->rt6i_prefsrc.plen) {
3533 struct in6_addr saddr_buf;
4e3fd7a0 3534 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3535 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3536 goto nla_put_failure;
c3968a85
DW
3537 }
3538
4b32b5ad
MKL
3539 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3540 if (rt->rt6i_pmtu)
3541 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3542 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3543 goto nla_put_failure;
3544
c78679e8
DM
3545 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3546 goto nla_put_failure;
8253947e 3547
beb1afac
DA
3548 /* For multipath routes, walk the siblings list and add
3549 * each as a nexthop within RTA_MULTIPATH.
3550 */
3551 if (rt->rt6i_nsiblings) {
3552 struct rt6_info *sibling, *next_sibling;
3553 struct nlattr *mp;
3554
3555 mp = nla_nest_start(skb, RTA_MULTIPATH);
3556 if (!mp)
3557 goto nla_put_failure;
3558
3559 if (rt6_add_nexthop(skb, rt) < 0)
3560 goto nla_put_failure;
3561
3562 list_for_each_entry_safe(sibling, next_sibling,
3563 &rt->rt6i_siblings, rt6i_siblings) {
3564 if (rt6_add_nexthop(skb, sibling) < 0)
3565 goto nla_put_failure;
3566 }
3567
3568 nla_nest_end(skb, mp);
3569 } else {
5be083ce 3570 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
3571 goto nla_put_failure;
3572 }
3573
8253947e 3574 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3575
87a50699 3576 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3577 goto nla_put_failure;
2d7202bf 3578
c78ba6d6
LR
3579 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3580 goto nla_put_failure;
3581
19e42e45 3582
053c095a
JB
3583 nlmsg_end(skb, nlh);
3584 return 0;
2d7202bf
TG
3585
3586nla_put_failure:
26932566
PM
3587 nlmsg_cancel(skb, nlh);
3588 return -EMSGSIZE;
1da177e4
LT
3589}
3590
1b43af54 3591int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3592{
3593 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
3594 struct net *net = arg->net;
3595
3596 if (rt == net->ipv6.ip6_null_entry)
3597 return 0;
1da177e4 3598
2d7202bf
TG
3599 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3600 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
3601
3602 /* user wants prefix routes only */
3603 if (rtm->rtm_flags & RTM_F_PREFIX &&
3604 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3605 /* success since this is not a prefix route */
3606 return 1;
3607 }
3608 }
1da177e4 3609
1f17e2f2 3610 return rt6_fill_node(net,
191cd582 3611 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3612 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 3613 NLM_F_MULTI);
1da177e4
LT
3614}
3615
c21ef3e3
DA
3616static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3617 struct netlink_ext_ack *extack)
1da177e4 3618{
3b1e0a65 3619 struct net *net = sock_net(in_skb->sk);
ab364a6f 3620 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
3621 int err, iif = 0, oif = 0;
3622 struct dst_entry *dst;
ab364a6f 3623 struct rt6_info *rt;
1da177e4 3624 struct sk_buff *skb;
ab364a6f 3625 struct rtmsg *rtm;
4c9483b2 3626 struct flowi6 fl6;
18c3a61c 3627 bool fibmatch;
1da177e4 3628
fceb6435 3629 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 3630 extack);
ab364a6f
TG
3631 if (err < 0)
3632 goto errout;
1da177e4 3633
ab364a6f 3634 err = -EINVAL;
4c9483b2 3635 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3636 rtm = nlmsg_data(nlh);
3637 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 3638 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 3639
ab364a6f
TG
3640 if (tb[RTA_SRC]) {
3641 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3642 goto errout;
3643
4e3fd7a0 3644 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3645 }
3646
3647 if (tb[RTA_DST]) {
3648 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3649 goto errout;
3650
4e3fd7a0 3651 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3652 }
3653
3654 if (tb[RTA_IIF])
3655 iif = nla_get_u32(tb[RTA_IIF]);
3656
3657 if (tb[RTA_OIF])
72331bc0 3658 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3659
2e47b291
LC
3660 if (tb[RTA_MARK])
3661 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3662
622ec2c9
LC
3663 if (tb[RTA_UID])
3664 fl6.flowi6_uid = make_kuid(current_user_ns(),
3665 nla_get_u32(tb[RTA_UID]));
3666 else
3667 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3668
1da177e4
LT
3669 if (iif) {
3670 struct net_device *dev;
72331bc0
SL
3671 int flags = 0;
3672
121622db
FW
3673 rcu_read_lock();
3674
3675 dev = dev_get_by_index_rcu(net, iif);
1da177e4 3676 if (!dev) {
121622db 3677 rcu_read_unlock();
1da177e4 3678 err = -ENODEV;
ab364a6f 3679 goto errout;
1da177e4 3680 }
72331bc0
SL
3681
3682 fl6.flowi6_iif = iif;
3683
3684 if (!ipv6_addr_any(&fl6.saddr))
3685 flags |= RT6_LOOKUP_F_HAS_SADDR;
3686
18c3a61c
RP
3687 if (!fibmatch)
3688 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
401481e0
AB
3689 else
3690 dst = ip6_route_lookup(net, &fl6, 0);
121622db
FW
3691
3692 rcu_read_unlock();
72331bc0
SL
3693 } else {
3694 fl6.flowi6_oif = oif;
3695
18c3a61c
RP
3696 if (!fibmatch)
3697 dst = ip6_route_output(net, NULL, &fl6);
401481e0
AB
3698 else
3699 dst = ip6_route_lookup(net, &fl6, 0);
18c3a61c
RP
3700 }
3701
18c3a61c
RP
3702
3703 rt = container_of(dst, struct rt6_info, dst);
3704 if (rt->dst.error) {
3705 err = rt->dst.error;
3706 ip6_rt_put(rt);
3707 goto errout;
1da177e4
LT
3708 }
3709
9d6acb3b
WC
3710 if (rt == net->ipv6.ip6_null_entry) {
3711 err = rt->dst.error;
3712 ip6_rt_put(rt);
3713 goto errout;
3714 }
3715
ab364a6f 3716 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3717 if (!skb) {
94e187c0 3718 ip6_rt_put(rt);
ab364a6f
TG
3719 err = -ENOBUFS;
3720 goto errout;
3721 }
1da177e4 3722
d8d1f30b 3723 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
3724 if (fibmatch)
3725 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3726 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3727 nlh->nlmsg_seq, 0);
3728 else
3729 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3730 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3731 nlh->nlmsg_seq, 0);
1da177e4 3732 if (err < 0) {
ab364a6f
TG
3733 kfree_skb(skb);
3734 goto errout;
1da177e4
LT
3735 }
3736
15e47304 3737 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3738errout:
1da177e4 3739 return err;
1da177e4
LT
3740}
3741
37a1d361
RP
3742void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3743 unsigned int nlm_flags)
1da177e4
LT
3744{
3745 struct sk_buff *skb;
5578689a 3746 struct net *net = info->nl_net;
528c4ceb
DL
3747 u32 seq;
3748 int err;
3749
3750 err = -ENOBUFS;
38308473 3751 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3752
19e42e45 3753 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3754 if (!skb)
21713ebc
TG
3755 goto errout;
3756
191cd582 3757 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 3758 event, info->portid, seq, nlm_flags);
26932566
PM
3759 if (err < 0) {
3760 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3761 WARN_ON(err == -EMSGSIZE);
3762 kfree_skb(skb);
3763 goto errout;
3764 }
15e47304 3765 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3766 info->nlh, gfp_any());
3767 return;
21713ebc
TG
3768errout:
3769 if (err < 0)
5578689a 3770 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3771}
3772
8ed67789 3773static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3774 unsigned long event, void *ptr)
8ed67789 3775{
351638e7 3776 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3777 struct net *net = dev_net(dev);
8ed67789 3778
242d3a49
WC
3779 if (!(dev->flags & IFF_LOOPBACK))
3780 return NOTIFY_OK;
3781
3782 if (event == NETDEV_REGISTER) {
d8d1f30b 3783 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3784 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3785#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3786 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3787 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3788 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 3789 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 3790#endif
76da0704
WC
3791 } else if (event == NETDEV_UNREGISTER &&
3792 dev->reg_state != NETREG_UNREGISTERED) {
3793 /* NETDEV_UNREGISTER could be fired for multiple times by
3794 * netdev_wait_allrefs(). Make sure we only call this once.
3795 */
12d94a80 3796 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 3797#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
3798 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3799 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
3800#endif
3801 }
3802
3803 return NOTIFY_OK;
3804}
3805
1da177e4
LT
3806/*
3807 * /proc
3808 */
3809
3810#ifdef CONFIG_PROC_FS
3811
33120b30
AD
3812static const struct file_operations ipv6_route_proc_fops = {
3813 .owner = THIS_MODULE,
3814 .open = ipv6_route_open,
3815 .read = seq_read,
3816 .llseek = seq_lseek,
8d2ca1d7 3817 .release = seq_release_net,
33120b30
AD
3818};
3819
1da177e4
LT
3820static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3821{
69ddb805 3822 struct net *net = (struct net *)seq->private;
1da177e4 3823 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3824 net->ipv6.rt6_stats->fib_nodes,
3825 net->ipv6.rt6_stats->fib_route_nodes,
3826 net->ipv6.rt6_stats->fib_rt_alloc,
3827 net->ipv6.rt6_stats->fib_rt_entries,
3828 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3829 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3830 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3831
3832 return 0;
3833}
3834
3835static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3836{
de05c557 3837 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3838}
3839
9a32144e 3840static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3841 .owner = THIS_MODULE,
3842 .open = rt6_stats_seq_open,
3843 .read = seq_read,
3844 .llseek = seq_lseek,
b6fcbdb4 3845 .release = single_release_net,
1da177e4
LT
3846};
3847#endif /* CONFIG_PROC_FS */
3848
3849#ifdef CONFIG_SYSCTL
3850
1da177e4 3851static
fe2c6338 3852int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3853 void __user *buffer, size_t *lenp, loff_t *ppos)
3854{
c486da34
LAG
3855 struct net *net;
3856 int delay;
3857 if (!write)
1da177e4 3858 return -EINVAL;
c486da34
LAG
3859
3860 net = (struct net *)ctl->extra1;
3861 delay = net->ipv6.sysctl.flush_delay;
3862 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3863 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3864 return 0;
1da177e4
LT
3865}
3866
fe2c6338 3867struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3868 {
1da177e4 3869 .procname = "flush",
4990509f 3870 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3871 .maxlen = sizeof(int),
89c8b3a1 3872 .mode = 0200,
6d9f239a 3873 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3874 },
3875 {
1da177e4 3876 .procname = "gc_thresh",
9a7ec3a9 3877 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3878 .maxlen = sizeof(int),
3879 .mode = 0644,
6d9f239a 3880 .proc_handler = proc_dointvec,
1da177e4
LT
3881 },
3882 {
1da177e4 3883 .procname = "max_size",
4990509f 3884 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3885 .maxlen = sizeof(int),
3886 .mode = 0644,
6d9f239a 3887 .proc_handler = proc_dointvec,
1da177e4
LT
3888 },
3889 {
1da177e4 3890 .procname = "gc_min_interval",
4990509f 3891 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3892 .maxlen = sizeof(int),
3893 .mode = 0644,
6d9f239a 3894 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3895 },
3896 {
1da177e4 3897 .procname = "gc_timeout",
4990509f 3898 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3899 .maxlen = sizeof(int),
3900 .mode = 0644,
6d9f239a 3901 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3902 },
3903 {
1da177e4 3904 .procname = "gc_interval",
4990509f 3905 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3906 .maxlen = sizeof(int),
3907 .mode = 0644,
6d9f239a 3908 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3909 },
3910 {
1da177e4 3911 .procname = "gc_elasticity",
4990509f 3912 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3913 .maxlen = sizeof(int),
3914 .mode = 0644,
f3d3f616 3915 .proc_handler = proc_dointvec,
1da177e4
LT
3916 },
3917 {
1da177e4 3918 .procname = "mtu_expires",
4990509f 3919 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3920 .maxlen = sizeof(int),
3921 .mode = 0644,
6d9f239a 3922 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3923 },
3924 {
1da177e4 3925 .procname = "min_adv_mss",
4990509f 3926 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3927 .maxlen = sizeof(int),
3928 .mode = 0644,
f3d3f616 3929 .proc_handler = proc_dointvec,
1da177e4
LT
3930 },
3931 {
1da177e4 3932 .procname = "gc_min_interval_ms",
4990509f 3933 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3934 .maxlen = sizeof(int),
3935 .mode = 0644,
6d9f239a 3936 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3937 },
f8572d8f 3938 { }
1da177e4
LT
3939};
3940
2c8c1e72 3941struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3942{
3943 struct ctl_table *table;
3944
3945 table = kmemdup(ipv6_route_table_template,
3946 sizeof(ipv6_route_table_template),
3947 GFP_KERNEL);
5ee09105
YH
3948
3949 if (table) {
3950 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3951 table[0].extra1 = net;
86393e52 3952 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3953 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3954 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3955 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3956 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3957 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3958 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3959 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3960 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3961
3962 /* Don't export sysctls to unprivileged users */
3963 if (net->user_ns != &init_user_ns)
3964 table[0].procname = NULL;
5ee09105
YH
3965 }
3966
760f2d01
DL
3967 return table;
3968}
1da177e4
LT
3969#endif
3970
2c8c1e72 3971static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3972{
633d424b 3973 int ret = -ENOMEM;
8ed67789 3974
86393e52
AD
3975 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3976 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3977
fc66f95c
ED
3978 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3979 goto out_ip6_dst_ops;
3980
8ed67789
DL
3981 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3982 sizeof(*net->ipv6.ip6_null_entry),
3983 GFP_KERNEL);
3984 if (!net->ipv6.ip6_null_entry)
fc66f95c 3985 goto out_ip6_dst_entries;
d8d1f30b 3986 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3987 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3988 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3989 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3990 ip6_template_metrics, true);
8ed67789
DL
3991
3992#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 3993 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
3994 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3995 sizeof(*net->ipv6.ip6_prohibit_entry),
3996 GFP_KERNEL);
68fffc67
PZ
3997 if (!net->ipv6.ip6_prohibit_entry)
3998 goto out_ip6_null_entry;
d8d1f30b 3999 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4000 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4001 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4002 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4003 ip6_template_metrics, true);
8ed67789
DL
4004
4005 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4006 sizeof(*net->ipv6.ip6_blk_hole_entry),
4007 GFP_KERNEL);
68fffc67
PZ
4008 if (!net->ipv6.ip6_blk_hole_entry)
4009 goto out_ip6_prohibit_entry;
d8d1f30b 4010 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4011 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4012 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4013 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4014 ip6_template_metrics, true);
8ed67789
DL
4015#endif
4016
b339a47c
PZ
4017 net->ipv6.sysctl.flush_delay = 0;
4018 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4019 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4020 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4021 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4022 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4023 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4024 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4025
6891a346
BT
4026 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4027
8ed67789
DL
4028 ret = 0;
4029out:
4030 return ret;
f2fc6a54 4031
68fffc67
PZ
4032#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4033out_ip6_prohibit_entry:
4034 kfree(net->ipv6.ip6_prohibit_entry);
4035out_ip6_null_entry:
4036 kfree(net->ipv6.ip6_null_entry);
4037#endif
fc66f95c
ED
4038out_ip6_dst_entries:
4039 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4040out_ip6_dst_ops:
f2fc6a54 4041 goto out;
cdb18761
DL
4042}
4043
2c8c1e72 4044static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4045{
8ed67789
DL
4046 kfree(net->ipv6.ip6_null_entry);
4047#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4048 kfree(net->ipv6.ip6_prohibit_entry);
4049 kfree(net->ipv6.ip6_blk_hole_entry);
4050#endif
41bb78b4 4051 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4052}
4053
d189634e
TG
4054static int __net_init ip6_route_net_init_late(struct net *net)
4055{
4056#ifdef CONFIG_PROC_FS
d4beaa66
G
4057 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4058 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4059#endif
4060 return 0;
4061}
4062
4063static void __net_exit ip6_route_net_exit_late(struct net *net)
4064{
4065#ifdef CONFIG_PROC_FS
ece31ffd
G
4066 remove_proc_entry("ipv6_route", net->proc_net);
4067 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4068#endif
4069}
4070
cdb18761
DL
4071static struct pernet_operations ip6_route_net_ops = {
4072 .init = ip6_route_net_init,
4073 .exit = ip6_route_net_exit,
4074};
4075
c3426b47
DM
4076static int __net_init ipv6_inetpeer_init(struct net *net)
4077{
4078 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4079
4080 if (!bp)
4081 return -ENOMEM;
4082 inet_peer_base_init(bp);
4083 net->ipv6.peers = bp;
4084 return 0;
4085}
4086
4087static void __net_exit ipv6_inetpeer_exit(struct net *net)
4088{
4089 struct inet_peer_base *bp = net->ipv6.peers;
4090
4091 net->ipv6.peers = NULL;
56a6b248 4092 inetpeer_invalidate_tree(bp);
c3426b47
DM
4093 kfree(bp);
4094}
4095
2b823f72 4096static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4097 .init = ipv6_inetpeer_init,
4098 .exit = ipv6_inetpeer_exit,
4099};
4100
d189634e
TG
4101static struct pernet_operations ip6_route_net_late_ops = {
4102 .init = ip6_route_net_init_late,
4103 .exit = ip6_route_net_exit_late,
4104};
4105
8ed67789
DL
4106static struct notifier_block ip6_route_dev_notifier = {
4107 .notifier_call = ip6_route_dev_notify,
242d3a49 4108 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4109};
4110
2f460933
WC
4111void __init ip6_route_init_special_entries(void)
4112{
4113 /* Registering of the loopback is done before this portion of code,
4114 * the loopback reference in rt6_info will not be taken, do it
4115 * manually for init_net */
4116 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4117 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4118 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4119 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4120 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4121 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4122 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4123 #endif
4124}
4125
433d49c3 4126int __init ip6_route_init(void)
1da177e4 4127{
433d49c3 4128 int ret;
8d0b94af 4129 int cpu;
433d49c3 4130
9a7ec3a9
DL
4131 ret = -ENOMEM;
4132 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4133 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4134 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4135 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4136 goto out;
14e50e57 4137
fc66f95c 4138 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4139 if (ret)
bdb3289f 4140 goto out_kmem_cache;
bdb3289f 4141
c3426b47
DM
4142 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4143 if (ret)
e8803b6c 4144 goto out_dst_entries;
2a0c451a 4145
7e52b33b
DM
4146 ret = register_pernet_subsys(&ip6_route_net_ops);
4147 if (ret)
4148 goto out_register_inetpeer;
c3426b47 4149
5dc121e9
AE
4150 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4151
e8803b6c 4152 ret = fib6_init();
433d49c3 4153 if (ret)
8ed67789 4154 goto out_register_subsys;
433d49c3 4155
433d49c3
DL
4156 ret = xfrm6_init();
4157 if (ret)
e8803b6c 4158 goto out_fib6_init;
c35b7e72 4159
433d49c3
DL
4160 ret = fib6_rules_init();
4161 if (ret)
4162 goto xfrm6_init;
7e5449c2 4163
d189634e
TG
4164 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4165 if (ret)
4166 goto fib6_rules_init;
4167
433d49c3 4168 ret = -ENOBUFS;
b97bac64
FW
4169 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4170 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
e3a22b7f
FW
4171 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4172 RTNL_FLAG_DOIT_UNLOCKED))
d189634e 4173 goto out_register_late_subsys;
c127ea2c 4174
8ed67789 4175 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4176 if (ret)
d189634e 4177 goto out_register_late_subsys;
8ed67789 4178
8d0b94af
MKL
4179 for_each_possible_cpu(cpu) {
4180 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4181
4182 INIT_LIST_HEAD(&ul->head);
4183 spin_lock_init(&ul->lock);
4184 }
4185
433d49c3
DL
4186out:
4187 return ret;
4188
d189634e
TG
4189out_register_late_subsys:
4190 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4191fib6_rules_init:
433d49c3
DL
4192 fib6_rules_cleanup();
4193xfrm6_init:
433d49c3 4194 xfrm6_fini();
2a0c451a
TG
4195out_fib6_init:
4196 fib6_gc_cleanup();
8ed67789
DL
4197out_register_subsys:
4198 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4199out_register_inetpeer:
4200 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4201out_dst_entries:
4202 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4203out_kmem_cache:
f2fc6a54 4204 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4205 goto out;
1da177e4
LT
4206}
4207
4208void ip6_route_cleanup(void)
4209{
8ed67789 4210 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4211 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4212 fib6_rules_cleanup();
1da177e4 4213 xfrm6_fini();
1da177e4 4214 fib6_gc_cleanup();
c3426b47 4215 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4216 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4217 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4218 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4219}