ipv6: Handle all fib6_nh in a nexthop in fib6_drop_pcpu_from
[linux-block.git] / net / ipv6 / route.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * Linux INET6 implementation
4 * FIB front-end.
5 *
6 * Authors:
1ab1457c 7 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
8 */
9
10/* Changes:
11 *
12 * YOSHIFUJI Hideaki @USAGI
13 * reworked default router selection.
14 * - respect outgoing interface
15 * - select from (probably) reachable routers (i.e.
16 * routers in REACHABLE, STALE, DELAY or PROBE states).
17 * - always select the same router if it is (probably)
18 * reachable. otherwise, round-robin the list.
c0bece9f
YH
19 * Ville Nuorvala
20 * Fixed routing subtrees.
1da177e4
LT
21 */
22
f3213831
JP
23#define pr_fmt(fmt) "IPv6: " fmt
24
4fc268d2 25#include <linux/capability.h>
1da177e4 26#include <linux/errno.h>
bc3b2d7f 27#include <linux/export.h>
1da177e4
LT
28#include <linux/types.h>
29#include <linux/times.h>
30#include <linux/socket.h>
31#include <linux/sockios.h>
32#include <linux/net.h>
33#include <linux/route.h>
34#include <linux/netdevice.h>
35#include <linux/in6.h>
7bc570c8 36#include <linux/mroute6.h>
1da177e4 37#include <linux/init.h>
1da177e4 38#include <linux/if_arp.h>
1da177e4
LT
39#include <linux/proc_fs.h>
40#include <linux/seq_file.h>
5b7c931d 41#include <linux/nsproxy.h>
5a0e3ad6 42#include <linux/slab.h>
35732d01 43#include <linux/jhash.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
904af04d 54#include <net/dst_metadata.h>
1da177e4 55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
3c618c1d 58#include <net/rtnh.h>
19e42e45 59#include <net/lwtunnel.h>
904af04d 60#include <net/ip_tunnels.h>
ca254490 61#include <net/l3mdev.h>
eacb9384 62#include <net/ip.h>
7c0f6ba6 63#include <linux/uaccess.h>
1da177e4
LT
64
65#ifdef CONFIG_SYSCTL
66#include <linux/sysctl.h>
67#endif
68
30d444d3
DA
69static int ip6_rt_type_to_error(u8 fib6_type);
70
71#define CREATE_TRACE_POINTS
72#include <trace/events/fib6.h>
73EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
74#undef CREATE_TRACE_POINTS
75
afc154e9 76enum rt6_nud_state {
7e980569
JB
77 RT6_NUD_FAIL_HARD = -3,
78 RT6_NUD_FAIL_PROBE = -2,
79 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
80 RT6_NUD_SUCCEED = 1
81};
82
1da177e4 83static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 84static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 85static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
569d3645 90static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
91
92static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 94static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 95static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 96static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
97static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb, u32 mtu);
99static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
100 struct sk_buff *skb);
702cea56
DA
101static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
102 int strict);
8d1c802b 103static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 104static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 105 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 106 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
107 int iif, int type, u32 portid, u32 seq,
108 unsigned int flags);
7e4b5128 109static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
110 const struct in6_addr *daddr,
111 const struct in6_addr *saddr);
1da177e4 112
70ceb4f5 113#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 114static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 115 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
116 const struct in6_addr *gwaddr,
117 struct net_device *dev,
95c96174 118 unsigned int pref);
8d1c802b 119static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 120 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
121 const struct in6_addr *gwaddr,
122 struct net_device *dev);
70ceb4f5
YH
123#endif
124
8d0b94af
MKL
125struct uncached_list {
126 spinlock_t lock;
127 struct list_head head;
128};
129
130static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
131
510c321b 132void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
133{
134 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
135
8d0b94af
MKL
136 rt->rt6i_uncached_list = ul;
137
138 spin_lock_bh(&ul->lock);
139 list_add_tail(&rt->rt6i_uncached, &ul->head);
140 spin_unlock_bh(&ul->lock);
141}
142
510c321b 143void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
144{
145 if (!list_empty(&rt->rt6i_uncached)) {
146 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 147 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
148
149 spin_lock_bh(&ul->lock);
150 list_del(&rt->rt6i_uncached);
81eb8447 151 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
152 spin_unlock_bh(&ul->lock);
153 }
154}
155
156static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
157{
158 struct net_device *loopback_dev = net->loopback_dev;
159 int cpu;
160
e332bc67
EB
161 if (dev == loopback_dev)
162 return;
163
8d0b94af
MKL
164 for_each_possible_cpu(cpu) {
165 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
166 struct rt6_info *rt;
167
168 spin_lock_bh(&ul->lock);
169 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
170 struct inet6_dev *rt_idev = rt->rt6i_idev;
171 struct net_device *rt_dev = rt->dst.dev;
172
e332bc67 173 if (rt_idev->dev == dev) {
8d0b94af
MKL
174 rt->rt6i_idev = in6_dev_get(loopback_dev);
175 in6_dev_put(rt_idev);
176 }
177
e332bc67 178 if (rt_dev == dev) {
8d0b94af
MKL
179 rt->dst.dev = loopback_dev;
180 dev_hold(rt->dst.dev);
181 dev_put(rt_dev);
182 }
183 }
184 spin_unlock_bh(&ul->lock);
185 }
186}
187
f8a1b43b 188static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
189 struct sk_buff *skb,
190 const void *daddr)
39232973 191{
a7563f34 192 if (!ipv6_addr_any(p))
39232973 193 return (const void *) p;
f894cbf8
DM
194 else if (skb)
195 return &ipv6_hdr(skb)->daddr;
39232973
DM
196 return daddr;
197}
198
f8a1b43b
DA
199struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
200 struct net_device *dev,
201 struct sk_buff *skb,
202 const void *daddr)
d3aaeb38 203{
39232973
DM
204 struct neighbour *n;
205
f8a1b43b
DA
206 daddr = choose_neigh_daddr(gw, skb, daddr);
207 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
208 if (n)
209 return n;
7adf3246
SB
210
211 n = neigh_create(&nd_tbl, daddr, dev);
212 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
213}
214
215static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
216 struct sk_buff *skb,
217 const void *daddr)
218{
219 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
220
221 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
222}
223
63fca65d
JA
224static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
225{
226 struct net_device *dev = dst->dev;
227 struct rt6_info *rt = (struct rt6_info *)dst;
228
f8a1b43b 229 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
230 if (!daddr)
231 return;
232 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
233 return;
234 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
235 return;
236 __ipv6_confirm_neigh(dev, daddr);
237}
238
9a7ec3a9 239static struct dst_ops ip6_dst_ops_template = {
1da177e4 240 .family = AF_INET6,
1da177e4
LT
241 .gc = ip6_dst_gc,
242 .gc_thresh = 1024,
243 .check = ip6_dst_check,
0dbaee3b 244 .default_advmss = ip6_default_advmss,
ebb762f2 245 .mtu = ip6_mtu,
d4ead6b3 246 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
247 .destroy = ip6_dst_destroy,
248 .ifdown = ip6_dst_ifdown,
249 .negative_advice = ip6_negative_advice,
250 .link_failure = ip6_link_failure,
251 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 252 .redirect = rt6_do_redirect,
9f8955cc 253 .local_out = __ip6_local_out,
f8a1b43b 254 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 255 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
256};
257
ebb762f2 258static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 259{
618f9bc7
SK
260 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
261
262 return mtu ? : dst->dev->mtu;
ec831ea7
RD
263}
264
6700c270
DM
265static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb, u32 mtu)
14e50e57
DM
267{
268}
269
6700c270
DM
270static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
271 struct sk_buff *skb)
b587ee3b
DM
272{
273}
274
14e50e57
DM
275static struct dst_ops ip6_dst_blackhole_ops = {
276 .family = AF_INET6,
14e50e57
DM
277 .destroy = ip6_dst_destroy,
278 .check = ip6_dst_check,
ebb762f2 279 .mtu = ip6_blackhole_mtu,
214f45c9 280 .default_advmss = ip6_default_advmss,
14e50e57 281 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 282 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 283 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 284 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
285};
286
62fa8a84 287static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 288 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
289};
290
8d1c802b 291static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
292 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
293 .fib6_protocol = RTPROT_KERNEL,
294 .fib6_metric = ~(u32)0,
f05713e0 295 .fib6_ref = REFCOUNT_INIT(1),
421842ed
DA
296 .fib6_type = RTN_UNREACHABLE,
297 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
298};
299
fb0af4c7 300static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
301 .dst = {
302 .__refcnt = ATOMIC_INIT(1),
303 .__use = 1,
2c20cbd7 304 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 305 .error = -ENETUNREACH,
d8d1f30b
CG
306 .input = ip6_pkt_discard,
307 .output = ip6_pkt_discard_out,
1da177e4
LT
308 },
309 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
310};
311
101367c2
TG
312#ifdef CONFIG_IPV6_MULTIPLE_TABLES
313
fb0af4c7 314static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
315 .dst = {
316 .__refcnt = ATOMIC_INIT(1),
317 .__use = 1,
2c20cbd7 318 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 319 .error = -EACCES,
d8d1f30b
CG
320 .input = ip6_pkt_prohibit,
321 .output = ip6_pkt_prohibit_out,
101367c2
TG
322 },
323 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
324};
325
fb0af4c7 326static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
327 .dst = {
328 .__refcnt = ATOMIC_INIT(1),
329 .__use = 1,
2c20cbd7 330 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 331 .error = -EINVAL,
d8d1f30b 332 .input = dst_discard,
ede2059d 333 .output = dst_discard_out,
101367c2
TG
334 },
335 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
336};
337
338#endif
339
ebfa45f0
MKL
340static void rt6_info_init(struct rt6_info *rt)
341{
342 struct dst_entry *dst = &rt->dst;
343
344 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
345 INIT_LIST_HEAD(&rt->rt6i_uncached);
346}
347
1da177e4 348/* allocate dst with ip6_dst_ops */
93531c67
DA
349struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
350 int flags)
1da177e4 351{
97bab73f 352 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 353 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 354
81eb8447 355 if (rt) {
ebfa45f0 356 rt6_info_init(rt);
81eb8447
WW
357 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
358 }
8104891b 359
cf911662 360 return rt;
1da177e4 361}
9ab179d8 362EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 363
1da177e4
LT
364static void ip6_dst_destroy(struct dst_entry *dst)
365{
366 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 367 struct fib6_info *from;
8d0b94af 368 struct inet6_dev *idev;
1da177e4 369
1620a336 370 ip_dst_metrics_put(dst);
8d0b94af
MKL
371 rt6_uncached_list_del(rt);
372
373 idev = rt->rt6i_idev;
38308473 374 if (idev) {
1da177e4
LT
375 rt->rt6i_idev = NULL;
376 in6_dev_put(idev);
1ab1457c 377 }
1716a961 378
0e233874 379 from = xchg((__force struct fib6_info **)&rt->from, NULL);
93531c67 380 fib6_info_release(from);
b3419363
DM
381}
382
1da177e4
LT
383static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
384 int how)
385{
386 struct rt6_info *rt = (struct rt6_info *)dst;
387 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 388 struct net_device *loopback_dev =
c346dca1 389 dev_net(dev)->loopback_dev;
1da177e4 390
e5645f51
WW
391 if (idev && idev->dev != loopback_dev) {
392 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
393 if (loopback_idev) {
394 rt->rt6i_idev = loopback_idev;
395 in6_dev_put(idev);
97cac082 396 }
1da177e4
LT
397 }
398}
399
5973fb1e
MKL
400static bool __rt6_check_expired(const struct rt6_info *rt)
401{
402 if (rt->rt6i_flags & RTF_EXPIRES)
403 return time_after(jiffies, rt->dst.expires);
404 else
405 return false;
406}
407
a50feda5 408static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 409{
a68886a6
DA
410 struct fib6_info *from;
411
412 from = rcu_dereference(rt->from);
413
1716a961
G
414 if (rt->rt6i_flags & RTF_EXPIRES) {
415 if (time_after(jiffies, rt->dst.expires))
a50feda5 416 return true;
a68886a6 417 } else if (from) {
1e2ea8ad 418 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 419 fib6_check_expired(from);
1716a961 420 }
a50feda5 421 return false;
1da177e4
LT
422}
423
b1d40991
DA
424void fib6_select_path(const struct net *net, struct fib6_result *res,
425 struct flowi6 *fl6, int oif, bool have_oif_match,
426 const struct sk_buff *skb, int strict)
51ebd318 427{
8d1c802b 428 struct fib6_info *sibling, *next_sibling;
b1d40991
DA
429 struct fib6_info *match = res->f6i;
430
f88d8ea6 431 if ((!match->fib6_nsiblings && !match->nh) || have_oif_match)
b1d40991 432 goto out;
51ebd318 433
b673d6cc
JS
434 /* We might have already computed the hash for ICMPv6 errors. In such
435 * case it will always be non-zero. Otherwise now is the time to do it.
436 */
f88d8ea6
DA
437 if (!fl6->mp_hash &&
438 (!match->nh || nexthop_is_multipath(match->nh)))
b4bac172 439 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 440
f88d8ea6
DA
441 if (unlikely(match->nh)) {
442 nexthop_path_fib6_result(res, fl6->mp_hash);
443 return;
444 }
445
1cf844c7 446 if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
b1d40991 447 goto out;
3d709f69 448
93c2fb25
DA
449 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
450 fib6_siblings) {
1cf844c7 451 const struct fib6_nh *nh = sibling->fib6_nh;
5e670d84
DA
452 int nh_upper_bound;
453
702cea56 454 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
5e670d84 455 if (fl6->mp_hash > nh_upper_bound)
3d709f69 456 continue;
702cea56 457 if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
3d709f69
IS
458 break;
459 match = sibling;
460 break;
461 }
462
b1d40991
DA
463out:
464 res->f6i = match;
1cf844c7 465 res->nh = match->fib6_nh;
51ebd318
ND
466}
467
1da177e4 468/*
66f5d6ce 469 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
470 */
471
0c59d006
DA
472static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
473 const struct in6_addr *saddr, int oif, int flags)
474{
475 const struct net_device *dev;
476
477 if (nh->fib_nh_flags & RTNH_F_DEAD)
478 return false;
479
480 dev = nh->fib_nh_dev;
481 if (oif) {
482 if (dev->ifindex == oif)
483 return true;
484 } else {
485 if (ipv6_chk_addr(net, saddr, dev,
486 flags & RT6_LOOKUP_F_IFACE))
487 return true;
488 }
489
490 return false;
491}
492
75ef7389
DA
493static void rt6_device_match(struct net *net, struct fib6_result *res,
494 const struct in6_addr *saddr, int oif, int flags)
1da177e4 495{
75ef7389
DA
496 struct fib6_info *f6i = res->f6i;
497 struct fib6_info *spf6i;
498 struct fib6_nh *nh;
1da177e4 499
75ef7389 500 if (!oif && ipv6_addr_any(saddr)) {
f88d8ea6
DA
501 if (unlikely(f6i->nh)) {
502 nh = nexthop_fib6_nh(f6i->nh);
503 if (nexthop_is_blackhole(f6i->nh))
504 goto out_blackhole;
505 } else {
506 nh = f6i->fib6_nh;
507 }
7d21fec9
DA
508 if (!(nh->fib_nh_flags & RTNH_F_DEAD))
509 goto out;
75ef7389 510 }
dd3abc4e 511
75ef7389 512 for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
1cf844c7 513 nh = spf6i->fib6_nh;
75ef7389
DA
514 if (__rt6_device_match(net, nh, saddr, oif, flags)) {
515 res->f6i = spf6i;
7d21fec9 516 goto out;
75ef7389 517 }
dd3abc4e 518 }
1da177e4 519
75ef7389
DA
520 if (oif && flags & RT6_LOOKUP_F_IFACE) {
521 res->f6i = net->ipv6.fib6_null_entry;
1cf844c7 522 nh = res->f6i->fib6_nh;
7d21fec9 523 goto out;
75ef7389 524 }
8067bb8c 525
f88d8ea6
DA
526 if (unlikely(f6i->nh)) {
527 nh = nexthop_fib6_nh(f6i->nh);
528 if (nexthop_is_blackhole(f6i->nh))
529 goto out_blackhole;
530 } else {
531 nh = f6i->fib6_nh;
532 }
533
7d21fec9 534 if (nh->fib_nh_flags & RTNH_F_DEAD) {
75ef7389 535 res->f6i = net->ipv6.fib6_null_entry;
1cf844c7 536 nh = res->f6i->fib6_nh;
75ef7389 537 }
7d21fec9
DA
538out:
539 res->nh = nh;
540 res->fib6_type = res->f6i->fib6_type;
541 res->fib6_flags = res->f6i->fib6_flags;
f88d8ea6
DA
542 return;
543
544out_blackhole:
545 res->fib6_flags |= RTF_REJECT;
546 res->fib6_type = RTN_BLACKHOLE;
547 res->nh = nh;
1da177e4
LT
548}
549
27097255 550#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
551struct __rt6_probe_work {
552 struct work_struct work;
553 struct in6_addr target;
554 struct net_device *dev;
555};
556
557static void rt6_probe_deferred(struct work_struct *w)
558{
559 struct in6_addr mcaddr;
560 struct __rt6_probe_work *work =
561 container_of(w, struct __rt6_probe_work, work);
562
563 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 564 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 565 dev_put(work->dev);
662f5533 566 kfree(work);
c2f17e82
HFS
567}
568
cc3a86c8 569static void rt6_probe(struct fib6_nh *fib6_nh)
27097255 570{
f547fac6 571 struct __rt6_probe_work *work = NULL;
5e670d84 572 const struct in6_addr *nh_gw;
f2c31e32 573 struct neighbour *neigh;
5e670d84 574 struct net_device *dev;
f547fac6 575 struct inet6_dev *idev;
5e670d84 576
27097255
YH
577 /*
578 * Okay, this does not seem to be appropriate
579 * for now, however, we need to check if it
580 * is really so; aka Router Reachability Probing.
581 *
582 * Router Reachability Probe MUST be rate-limited
583 * to no more than one per minute.
584 */
cc3a86c8 585 if (fib6_nh->fib_nh_gw_family)
7ff74a59 586 return;
5e670d84 587
cc3a86c8
DA
588 nh_gw = &fib6_nh->fib_nh_gw6;
589 dev = fib6_nh->fib_nh_dev;
2152caea 590 rcu_read_lock_bh();
f547fac6 591 idev = __in6_dev_get(dev);
5e670d84 592 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 593 if (neigh) {
8d6c31bf
MKL
594 if (neigh->nud_state & NUD_VALID)
595 goto out;
596
2152caea 597 write_lock(&neigh->lock);
990edb42
MKL
598 if (!(neigh->nud_state & NUD_VALID) &&
599 time_after(jiffies,
dcd1f572 600 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
601 work = kmalloc(sizeof(*work), GFP_ATOMIC);
602 if (work)
603 __neigh_set_probe_once(neigh);
c2f17e82 604 }
2152caea 605 write_unlock(&neigh->lock);
cc3a86c8 606 } else if (time_after(jiffies, fib6_nh->last_probe +
f547fac6 607 idev->cnf.rtr_probe_interval)) {
990edb42 608 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 609 }
990edb42
MKL
610
611 if (work) {
cc3a86c8 612 fib6_nh->last_probe = jiffies;
990edb42 613 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
614 work->target = *nh_gw;
615 dev_hold(dev);
616 work->dev = dev;
990edb42
MKL
617 schedule_work(&work->work);
618 }
619
8d6c31bf 620out:
2152caea 621 rcu_read_unlock_bh();
27097255
YH
622}
623#else
cc3a86c8 624static inline void rt6_probe(struct fib6_nh *fib6_nh)
27097255 625{
27097255
YH
626}
627#endif
628
1da177e4 629/*
554cfb7e 630 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 631 */
1ba9a895 632static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
1da177e4 633{
afc154e9 634 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 635 struct neighbour *neigh;
f2c31e32 636
145a3621 637 rcu_read_lock_bh();
1ba9a895
DA
638 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
639 &fib6_nh->fib_nh_gw6);
145a3621
YH
640 if (neigh) {
641 read_lock(&neigh->lock);
554cfb7e 642 if (neigh->nud_state & NUD_VALID)
afc154e9 643 ret = RT6_NUD_SUCCEED;
398bcbeb 644#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 645 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 646 ret = RT6_NUD_SUCCEED;
7e980569
JB
647 else
648 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 649#endif
145a3621 650 read_unlock(&neigh->lock);
afc154e9
HFS
651 } else {
652 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 653 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 654 }
145a3621
YH
655 rcu_read_unlock_bh();
656
a5a81f0b 657 return ret;
1da177e4
LT
658}
659
702cea56
DA
660static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
661 int strict)
1da177e4 662{
6e1809a5
DA
663 int m = 0;
664
665 if (!oif || nh->fib_nh_dev->ifindex == oif)
666 m = 2;
1ab1457c 667
77d16f45 668 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 669 return RT6_NUD_FAIL_HARD;
ebacaaa0 670#ifdef CONFIG_IPV6_ROUTER_PREF
702cea56 671 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
ebacaaa0 672#endif
1ba9a895 673 if ((strict & RT6_LOOKUP_F_REACHABLE) &&
702cea56 674 !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
1ba9a895 675 int n = rt6_check_neigh(nh);
afc154e9
HFS
676 if (n < 0)
677 return n;
678 }
554cfb7e
YH
679 return m;
680}
681
28679ed1
DA
682static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
683 int oif, int strict, int *mpri, bool *do_rr)
554cfb7e 684{
afc154e9 685 bool match_do_rr = false;
28679ed1
DA
686 bool rc = false;
687 int m;
35103d11 688
28679ed1 689 if (nh->fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
690 goto out;
691
28679ed1
DA
692 if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
693 nh->fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 694 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 695 goto out;
f11e6659 696
28679ed1 697 m = rt6_score_route(nh, fib6_flags, oif, strict);
7e980569 698 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
699 match_do_rr = true;
700 m = 0; /* lowest valid score */
7e980569 701 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 702 goto out;
afc154e9
HFS
703 }
704
705 if (strict & RT6_LOOKUP_F_REACHABLE)
28679ed1 706 rt6_probe(nh);
f11e6659 707
7e980569 708 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 709 if (m > *mpri) {
afc154e9 710 *do_rr = match_do_rr;
f11e6659 711 *mpri = m;
28679ed1 712 rc = true;
f11e6659 713 }
f11e6659 714out:
28679ed1 715 return rc;
f11e6659
DM
716}
717
b7bc4b6a 718static void __find_rr_leaf(struct fib6_info *f6i_start,
30c15f03 719 struct fib6_info *nomatch, u32 metric,
b7bc4b6a 720 struct fib6_result *res, struct fib6_info **cont,
30c15f03 721 int oif, int strict, bool *do_rr, int *mpri)
f11e6659 722{
b7bc4b6a 723 struct fib6_info *f6i;
1da177e4 724
b7bc4b6a
DA
725 for (f6i = f6i_start;
726 f6i && f6i != nomatch;
727 f6i = rcu_dereference(f6i->fib6_next)) {
30c15f03
DA
728 struct fib6_nh *nh;
729
b7bc4b6a
DA
730 if (cont && f6i->fib6_metric != metric) {
731 *cont = f6i;
30c15f03 732 return;
9fbdcfaf
SK
733 }
734
b7bc4b6a 735 if (fib6_check_expired(f6i))
28679ed1
DA
736 continue;
737
1cf844c7 738 nh = f6i->fib6_nh;
b7bc4b6a
DA
739 if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
740 res->f6i = f6i;
741 res->nh = nh;
7d21fec9
DA
742 res->fib6_flags = f6i->fib6_flags;
743 res->fib6_type = f6i->fib6_type;
b7bc4b6a 744 }
9fbdcfaf 745 }
30c15f03 746}
9fbdcfaf 747
b7bc4b6a
DA
748static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
749 struct fib6_info *rr_head, int oif, int strict,
750 bool *do_rr, struct fib6_result *res)
30c15f03 751{
b7bc4b6a
DA
752 u32 metric = rr_head->fib6_metric;
753 struct fib6_info *cont = NULL;
30c15f03 754 int mpri = -1;
9fbdcfaf 755
b7bc4b6a 756 __find_rr_leaf(rr_head, NULL, metric, res, &cont,
30c15f03 757 oif, strict, do_rr, &mpri);
28679ed1 758
b7bc4b6a 759 __find_rr_leaf(leaf, rr_head, metric, res, &cont,
30c15f03 760 oif, strict, do_rr, &mpri);
9fbdcfaf 761
b7bc4b6a
DA
762 if (res->f6i || !cont)
763 return;
9fbdcfaf 764
b7bc4b6a 765 __find_rr_leaf(cont, NULL, metric, res, NULL,
30c15f03 766 oif, strict, do_rr, &mpri);
f11e6659 767}
1da177e4 768
b7bc4b6a
DA
769static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
770 struct fib6_result *res, int strict)
f11e6659 771{
8d1c802b 772 struct fib6_info *leaf = rcu_dereference(fn->leaf);
b7bc4b6a 773 struct fib6_info *rt0;
afc154e9 774 bool do_rr = false;
17ecf590 775 int key_plen;
1da177e4 776
b7bc4b6a
DA
777 /* make sure this function or its helpers sets f6i */
778 res->f6i = NULL;
779
421842ed 780 if (!leaf || leaf == net->ipv6.fib6_null_entry)
b7bc4b6a 781 goto out;
8d1040e8 782
66f5d6ce 783 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 784 if (!rt0)
66f5d6ce 785 rt0 = leaf;
1da177e4 786
17ecf590
WW
787 /* Double check to make sure fn is not an intermediate node
788 * and fn->leaf does not points to its child's leaf
789 * (This might happen if all routes under fn are deleted from
790 * the tree and fib6_repair_tree() is called on the node.)
791 */
93c2fb25 792 key_plen = rt0->fib6_dst.plen;
17ecf590 793#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
794 if (rt0->fib6_src.plen)
795 key_plen = rt0->fib6_src.plen;
17ecf590
WW
796#endif
797 if (fn->fn_bit != key_plen)
b7bc4b6a 798 goto out;
1da177e4 799
b7bc4b6a 800 find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
afc154e9 801 if (do_rr) {
8fb11a9a 802 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 803
554cfb7e 804 /* no entries matched; do round-robin */
93c2fb25 805 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 806 next = leaf;
f11e6659 807
66f5d6ce 808 if (next != rt0) {
93c2fb25 809 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 810 /* make sure next is not being deleted from the tree */
93c2fb25 811 if (next->fib6_node)
66f5d6ce 812 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 813 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 814 }
1da177e4 815 }
1da177e4 816
b7bc4b6a
DA
817out:
818 if (!res->f6i) {
819 res->f6i = net->ipv6.fib6_null_entry;
1cf844c7 820 res->nh = res->f6i->fib6_nh;
7d21fec9
DA
821 res->fib6_flags = res->f6i->fib6_flags;
822 res->fib6_type = res->f6i->fib6_type;
b7bc4b6a 823 }
1da177e4
LT
824}
825
85bd05de 826static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
8b9df265 827{
85bd05de
DA
828 return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
829 res->nh->fib_nh_gw_family;
8b9df265
MKL
830}
831
70ceb4f5
YH
832#ifdef CONFIG_IPV6_ROUTE_INFO
833int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 834 const struct in6_addr *gwaddr)
70ceb4f5 835{
c346dca1 836 struct net *net = dev_net(dev);
70ceb4f5
YH
837 struct route_info *rinfo = (struct route_info *) opt;
838 struct in6_addr prefix_buf, *prefix;
839 unsigned int pref;
4bed72e4 840 unsigned long lifetime;
8d1c802b 841 struct fib6_info *rt;
70ceb4f5
YH
842
843 if (len < sizeof(struct route_info)) {
844 return -EINVAL;
845 }
846
847 /* Sanity check for prefix_len and length */
848 if (rinfo->length > 3) {
849 return -EINVAL;
850 } else if (rinfo->prefix_len > 128) {
851 return -EINVAL;
852 } else if (rinfo->prefix_len > 64) {
853 if (rinfo->length < 2) {
854 return -EINVAL;
855 }
856 } else if (rinfo->prefix_len > 0) {
857 if (rinfo->length < 1) {
858 return -EINVAL;
859 }
860 }
861
862 pref = rinfo->route_pref;
863 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 864 return -EINVAL;
70ceb4f5 865
4bed72e4 866 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
867
868 if (rinfo->length == 3)
869 prefix = (struct in6_addr *)rinfo->prefix;
870 else {
871 /* this function is safe */
872 ipv6_addr_prefix(&prefix_buf,
873 (struct in6_addr *)rinfo->prefix,
874 rinfo->prefix_len);
875 prefix = &prefix_buf;
876 }
877
f104a567 878 if (rinfo->prefix_len == 0)
afb1d4b5 879 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
880 else
881 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 882 gwaddr, dev);
70ceb4f5
YH
883
884 if (rt && !lifetime) {
afb1d4b5 885 ip6_del_rt(net, rt);
70ceb4f5
YH
886 rt = NULL;
887 }
888
889 if (!rt && lifetime)
830218c1
DA
890 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
891 dev, pref);
70ceb4f5 892 else if (rt)
93c2fb25
DA
893 rt->fib6_flags = RTF_ROUTEINFO |
894 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
895
896 if (rt) {
1716a961 897 if (!addrconf_finite_timeout(lifetime))
14895687 898 fib6_clean_expires(rt);
1716a961 899 else
14895687 900 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 901
93531c67 902 fib6_info_release(rt);
70ceb4f5
YH
903 }
904 return 0;
905}
906#endif
907
ae90d867
DA
908/*
909 * Misc support functions
910 */
911
912/* called with rcu_lock held */
0d161581 913static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
ae90d867 914{
0d161581 915 struct net_device *dev = res->nh->fib_nh_dev;
ae90d867 916
7d21fec9 917 if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
918 /* for copies of local routes, dst->dev needs to be the
919 * device if it is a master device, the master device if
920 * device is enslaved, and the loopback as the default
921 */
922 if (netif_is_l3_slave(dev) &&
7d21fec9 923 !rt6_need_strict(&res->f6i->fib6_dst.addr))
ae90d867
DA
924 dev = l3mdev_master_dev_rcu(dev);
925 else if (!netif_is_l3_master(dev))
926 dev = dev_net(dev)->loopback_dev;
927 /* last case is netif_is_l3_master(dev) is true in which
928 * case we want dev returned to be dev
929 */
930 }
931
932 return dev;
933}
934
6edb3c96
DA
935static const int fib6_prop[RTN_MAX + 1] = {
936 [RTN_UNSPEC] = 0,
937 [RTN_UNICAST] = 0,
938 [RTN_LOCAL] = 0,
939 [RTN_BROADCAST] = 0,
940 [RTN_ANYCAST] = 0,
941 [RTN_MULTICAST] = 0,
942 [RTN_BLACKHOLE] = -EINVAL,
943 [RTN_UNREACHABLE] = -EHOSTUNREACH,
944 [RTN_PROHIBIT] = -EACCES,
945 [RTN_THROW] = -EAGAIN,
946 [RTN_NAT] = -EINVAL,
947 [RTN_XRESOLVE] = -EINVAL,
948};
949
950static int ip6_rt_type_to_error(u8 fib6_type)
951{
952 return fib6_prop[fib6_type];
953}
954
8d1c802b 955static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
956{
957 unsigned short flags = 0;
958
959 if (rt->dst_nocount)
960 flags |= DST_NOCOUNT;
961 if (rt->dst_nopolicy)
962 flags |= DST_NOPOLICY;
963 if (rt->dst_host)
964 flags |= DST_HOST;
965
966 return flags;
967}
968
7d21fec9 969static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
6edb3c96 970{
7d21fec9 971 rt->dst.error = ip6_rt_type_to_error(fib6_type);
6edb3c96 972
7d21fec9 973 switch (fib6_type) {
6edb3c96
DA
974 case RTN_BLACKHOLE:
975 rt->dst.output = dst_discard_out;
976 rt->dst.input = dst_discard;
977 break;
978 case RTN_PROHIBIT:
979 rt->dst.output = ip6_pkt_prohibit_out;
980 rt->dst.input = ip6_pkt_prohibit;
981 break;
982 case RTN_THROW:
983 case RTN_UNREACHABLE:
984 default:
985 rt->dst.output = ip6_pkt_discard_out;
986 rt->dst.input = ip6_pkt_discard;
987 break;
988 }
989}
990
0d161581 991static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
6edb3c96 992{
7d21fec9 993 struct fib6_info *f6i = res->f6i;
0d161581 994
7d21fec9
DA
995 if (res->fib6_flags & RTF_REJECT) {
996 ip6_rt_init_dst_reject(rt, res->fib6_type);
6edb3c96
DA
997 return;
998 }
999
1000 rt->dst.error = 0;
1001 rt->dst.output = ip6_output;
1002
7d21fec9 1003 if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
6edb3c96 1004 rt->dst.input = ip6_input;
7d21fec9 1005 } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
1006 rt->dst.input = ip6_mc_input;
1007 } else {
1008 rt->dst.input = ip6_forward;
1009 }
1010
0d161581
DA
1011 if (res->nh->fib_nh_lws) {
1012 rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
6edb3c96
DA
1013 lwtunnel_set_redirect(&rt->dst);
1014 }
1015
1016 rt->dst.lastuse = jiffies;
1017}
1018
e873e4b9 1019/* Caller must already hold reference to @from */
8d1c802b 1020static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 1021{
ae90d867 1022 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 1023 rcu_assign_pointer(rt->from, from);
e1255ed4 1024 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
1025}
1026
0d161581
DA
1027/* Caller must already hold reference to f6i in result */
1028static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
ae90d867 1029{
0d161581
DA
1030 const struct fib6_nh *nh = res->nh;
1031 const struct net_device *dev = nh->fib_nh_dev;
1032 struct fib6_info *f6i = res->f6i;
dcd1f572 1033
0d161581 1034 ip6_rt_init_dst(rt, res);
6edb3c96 1035
0d161581 1036 rt->rt6i_dst = f6i->fib6_dst;
dcd1f572 1037 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
7d21fec9 1038 rt->rt6i_flags = res->fib6_flags;
0d161581
DA
1039 if (nh->fib_nh_gw_family) {
1040 rt->rt6i_gateway = nh->fib_nh_gw6;
2b2450ca
DA
1041 rt->rt6i_flags |= RTF_GATEWAY;
1042 }
0d161581 1043 rt6_set_from(rt, f6i);
ae90d867 1044#ifdef CONFIG_IPV6_SUBTREES
0d161581 1045 rt->rt6i_src = f6i->fib6_src;
ae90d867 1046#endif
ae90d867
DA
1047}
1048
a3c00e46
MKL
1049static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1050 struct in6_addr *saddr)
1051{
66f5d6ce 1052 struct fib6_node *pn, *sn;
a3c00e46
MKL
1053 while (1) {
1054 if (fn->fn_flags & RTN_TL_ROOT)
1055 return NULL;
66f5d6ce
WW
1056 pn = rcu_dereference(fn->parent);
1057 sn = FIB6_SUBTREE(pn);
1058 if (sn && sn != fn)
6454743b 1059 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1060 else
1061 fn = pn;
1062 if (fn->fn_flags & RTN_RTINFO)
1063 return fn;
1064 }
1065}
c71099ac 1066
10585b43 1067static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1068{
1069 struct rt6_info *rt = *prt;
1070
1071 if (dst_hold_safe(&rt->dst))
1072 return true;
10585b43 1073 if (net) {
d3843fe5
WW
1074 rt = net->ipv6.ip6_null_entry;
1075 dst_hold(&rt->dst);
1076 } else {
1077 rt = NULL;
1078 }
1079 *prt = rt;
1080 return false;
1081}
1082
dec9b0e2 1083/* called with rcu_lock held */
9b6b35ab 1084static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
dec9b0e2 1085{
9b6b35ab
DA
1086 struct net_device *dev = res->nh->fib_nh_dev;
1087 struct fib6_info *f6i = res->f6i;
1088 unsigned short flags;
dec9b0e2
DA
1089 struct rt6_info *nrt;
1090
9b6b35ab 1091 if (!fib6_info_hold_safe(f6i))
1c87e79a 1092 goto fallback;
e873e4b9 1093
9b6b35ab 1094 flags = fib6_info_dst_flags(f6i);
93531c67 1095 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1096 if (!nrt) {
9b6b35ab 1097 fib6_info_release(f6i);
1c87e79a
XL
1098 goto fallback;
1099 }
dec9b0e2 1100
0d161581 1101 ip6_rt_copy_init(nrt, res);
1c87e79a
XL
1102 return nrt;
1103
1104fallback:
1105 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1106 dst_hold(&nrt->dst);
dec9b0e2
DA
1107 return nrt;
1108}
1109
8ed67789
DL
1110static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1111 struct fib6_table *table,
b75cc8f9
DA
1112 struct flowi6 *fl6,
1113 const struct sk_buff *skb,
1114 int flags)
1da177e4 1115{
b1d40991 1116 struct fib6_result res = {};
1da177e4 1117 struct fib6_node *fn;
23fb93a4 1118 struct rt6_info *rt;
1da177e4 1119
b6cdbc85
DA
1120 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1121 flags &= ~RT6_LOOKUP_F_IFACE;
1122
66f5d6ce 1123 rcu_read_lock();
6454743b 1124 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1125restart:
b1d40991
DA
1126 res.f6i = rcu_dereference(fn->leaf);
1127 if (!res.f6i)
1128 res.f6i = net->ipv6.fib6_null_entry;
af52a52c 1129 else
75ef7389
DA
1130 rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1131 flags);
af52a52c 1132
b1d40991 1133 if (res.f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1134 fn = fib6_backtrack(fn, &fl6->saddr);
1135 if (fn)
1136 goto restart;
2b760fcf 1137
af52a52c
DA
1138 rt = net->ipv6.ip6_null_entry;
1139 dst_hold(&rt->dst);
1140 goto out;
f88d8ea6
DA
1141 } else if (res.fib6_flags & RTF_REJECT) {
1142 goto do_create;
af52a52c 1143 }
d3843fe5 1144
b1d40991
DA
1145 fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1146 fl6->flowi6_oif != 0, skb, flags);
1147
2b760fcf 1148 /* Search through exception table */
7e4b5128 1149 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1150 if (rt) {
10585b43 1151 if (ip6_hold_safe(net, &rt))
dec9b0e2 1152 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1153 } else {
f88d8ea6 1154do_create:
9b6b35ab 1155 rt = ip6_create_rt_rcu(&res);
dec9b0e2 1156 }
b811580d 1157
af52a52c 1158out:
8ff2e5b2 1159 trace_fib6_table_lookup(net, &res, table, fl6);
af52a52c 1160
66f5d6ce 1161 rcu_read_unlock();
b811580d 1162
c71099ac 1163 return rt;
c71099ac
TG
1164}
1165
67ba4152 1166struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1167 const struct sk_buff *skb, int flags)
ea6e574e 1168{
b75cc8f9 1169 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1170}
1171EXPORT_SYMBOL_GPL(ip6_route_lookup);
1172
9acd9f3a 1173struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1174 const struct in6_addr *saddr, int oif,
1175 const struct sk_buff *skb, int strict)
c71099ac 1176{
4c9483b2
DM
1177 struct flowi6 fl6 = {
1178 .flowi6_oif = oif,
1179 .daddr = *daddr,
c71099ac
TG
1180 };
1181 struct dst_entry *dst;
77d16f45 1182 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1183
adaa70bb 1184 if (saddr) {
4c9483b2 1185 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1186 flags |= RT6_LOOKUP_F_HAS_SADDR;
1187 }
1188
b75cc8f9 1189 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1190 if (dst->error == 0)
1191 return (struct rt6_info *) dst;
1192
1193 dst_release(dst);
1194
1da177e4
LT
1195 return NULL;
1196}
7159039a
YH
1197EXPORT_SYMBOL(rt6_lookup);
1198
c71099ac 1199/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1200 * It takes new route entry, the addition fails by any reason the
1201 * route is released.
1202 * Caller must hold dst before calling it.
1da177e4
LT
1203 */
1204
8d1c802b 1205static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1206 struct netlink_ext_ack *extack)
1da177e4
LT
1207{
1208 int err;
c71099ac 1209 struct fib6_table *table;
1da177e4 1210
93c2fb25 1211 table = rt->fib6_table;
66f5d6ce 1212 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1213 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1214 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1215
1216 return err;
1217}
1218
8d1c802b 1219int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1220{
afb1d4b5 1221 struct nl_info info = { .nl_net = net, };
e715b6d3 1222
d4ead6b3 1223 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1224}
1225
85bd05de 1226static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
8b9df265
MKL
1227 const struct in6_addr *daddr,
1228 const struct in6_addr *saddr)
1da177e4 1229{
85bd05de 1230 struct fib6_info *f6i = res->f6i;
4832c30d 1231 struct net_device *dev;
1da177e4
LT
1232 struct rt6_info *rt;
1233
1234 /*
1235 * Clone the route.
1236 */
1237
85bd05de 1238 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1239 return NULL;
1240
0d161581 1241 dev = ip6_rt_get_dev_rcu(res);
93531c67 1242 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9 1243 if (!rt) {
85bd05de 1244 fib6_info_release(f6i);
83a09abd 1245 return NULL;
e873e4b9 1246 }
83a09abd 1247
0d161581 1248 ip6_rt_copy_init(rt, res);
83a09abd 1249 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1250 rt->dst.flags |= DST_HOST;
1251 rt->rt6i_dst.addr = *daddr;
1252 rt->rt6i_dst.plen = 128;
1da177e4 1253
85bd05de
DA
1254 if (!rt6_is_gw_or_nonexthop(res)) {
1255 if (f6i->fib6_dst.plen != 128 &&
1256 ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
83a09abd 1257 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1258#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1259 if (rt->rt6i_src.plen && saddr) {
1260 rt->rt6i_src.addr = *saddr;
1261 rt->rt6i_src.plen = 128;
8b9df265 1262 }
83a09abd 1263#endif
95a9a5ba 1264 }
1da177e4 1265
95a9a5ba
YH
1266 return rt;
1267}
1da177e4 1268
db3fedee 1269static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
d52d3997 1270{
db3fedee
DA
1271 struct fib6_info *f6i = res->f6i;
1272 unsigned short flags = fib6_info_dst_flags(f6i);
4832c30d 1273 struct net_device *dev;
d52d3997
MKL
1274 struct rt6_info *pcpu_rt;
1275
db3fedee 1276 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1277 return NULL;
1278
4832c30d 1279 rcu_read_lock();
0d161581 1280 dev = ip6_rt_get_dev_rcu(res);
93531c67 1281 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1282 rcu_read_unlock();
e873e4b9 1283 if (!pcpu_rt) {
db3fedee 1284 fib6_info_release(f6i);
d52d3997 1285 return NULL;
e873e4b9 1286 }
0d161581 1287 ip6_rt_copy_init(pcpu_rt, res);
d52d3997
MKL
1288 pcpu_rt->rt6i_flags |= RTF_PCPU;
1289 return pcpu_rt;
1290}
1291
66f5d6ce 1292/* It should be called with rcu_read_lock() acquired */
db3fedee 1293static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
d52d3997 1294{
c353071a 1295 struct rt6_info *pcpu_rt;
d52d3997 1296
c353071a 1297 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
d52d3997 1298
d4ead6b3 1299 if (pcpu_rt)
10585b43 1300 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1301
a73e4195
MKL
1302 return pcpu_rt;
1303}
1304
afb1d4b5 1305static struct rt6_info *rt6_make_pcpu_route(struct net *net,
db3fedee 1306 const struct fib6_result *res)
a73e4195
MKL
1307{
1308 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997 1309
db3fedee 1310 pcpu_rt = ip6_rt_pcpu_alloc(res);
d52d3997 1311 if (!pcpu_rt) {
9c7370a1
MKL
1312 dst_hold(&net->ipv6.ip6_null_entry->dst);
1313 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1314 }
1315
a94b9367 1316 dst_hold(&pcpu_rt->dst);
f40b6ae2 1317 p = this_cpu_ptr(res->nh->rt6i_pcpu);
a94b9367 1318 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1319 BUG_ON(prev);
a94b9367 1320
61fb0d01
ED
1321 if (res->f6i->fib6_destroying) {
1322 struct fib6_info *from;
1323
1324 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1325 fib6_info_release(from);
1326 }
1327
d52d3997
MKL
1328 return pcpu_rt;
1329}
1330
35732d01
WW
1331/* exception hash table implementation
1332 */
1333static DEFINE_SPINLOCK(rt6_exception_lock);
1334
1335/* Remove rt6_ex from hash table and free the memory
1336 * Caller must hold rt6_exception_lock
1337 */
1338static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1339 struct rt6_exception *rt6_ex)
1340{
f5b51fe8 1341 struct fib6_info *from;
b2427e67 1342 struct net *net;
81eb8447 1343
35732d01
WW
1344 if (!bucket || !rt6_ex)
1345 return;
b2427e67
CIK
1346
1347 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1348 net->ipv6.rt6_stats->fib_rt_cache--;
1349
1350 /* purge completely the exception to allow releasing the held resources:
1351 * some [sk] cache may keep the dst around for unlimited time
1352 */
0e233874 1353 from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
f5b51fe8
PA
1354 fib6_info_release(from);
1355 dst_dev_put(&rt6_ex->rt6i->dst);
1356
35732d01 1357 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1358 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1359 kfree_rcu(rt6_ex, rcu);
1360 WARN_ON_ONCE(!bucket->depth);
1361 bucket->depth--;
1362}
1363
1364/* Remove oldest rt6_ex in bucket and free the memory
1365 * Caller must hold rt6_exception_lock
1366 */
1367static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1368{
1369 struct rt6_exception *rt6_ex, *oldest = NULL;
1370
1371 if (!bucket)
1372 return;
1373
1374 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1375 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1376 oldest = rt6_ex;
1377 }
1378 rt6_remove_exception(bucket, oldest);
1379}
1380
1381static u32 rt6_exception_hash(const struct in6_addr *dst,
1382 const struct in6_addr *src)
1383{
1384 static u32 seed __read_mostly;
1385 u32 val;
1386
1387 net_get_random_once(&seed, sizeof(seed));
1388 val = jhash(dst, sizeof(*dst), seed);
1389
1390#ifdef CONFIG_IPV6_SUBTREES
1391 if (src)
1392 val = jhash(src, sizeof(*src), val);
1393#endif
1394 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1395}
1396
1397/* Helper function to find the cached rt in the hash table
1398 * and update bucket pointer to point to the bucket for this
1399 * (daddr, saddr) pair
1400 * Caller must hold rt6_exception_lock
1401 */
1402static struct rt6_exception *
1403__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1404 const struct in6_addr *daddr,
1405 const struct in6_addr *saddr)
1406{
1407 struct rt6_exception *rt6_ex;
1408 u32 hval;
1409
1410 if (!(*bucket) || !daddr)
1411 return NULL;
1412
1413 hval = rt6_exception_hash(daddr, saddr);
1414 *bucket += hval;
1415
1416 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1417 struct rt6_info *rt6 = rt6_ex->rt6i;
1418 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1419
1420#ifdef CONFIG_IPV6_SUBTREES
1421 if (matched && saddr)
1422 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1423#endif
1424 if (matched)
1425 return rt6_ex;
1426 }
1427 return NULL;
1428}
1429
1430/* Helper function to find the cached rt in the hash table
1431 * and update bucket pointer to point to the bucket for this
1432 * (daddr, saddr) pair
1433 * Caller must hold rcu_read_lock()
1434 */
1435static struct rt6_exception *
1436__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1437 const struct in6_addr *daddr,
1438 const struct in6_addr *saddr)
1439{
1440 struct rt6_exception *rt6_ex;
1441 u32 hval;
1442
1443 WARN_ON_ONCE(!rcu_read_lock_held());
1444
1445 if (!(*bucket) || !daddr)
1446 return NULL;
1447
1448 hval = rt6_exception_hash(daddr, saddr);
1449 *bucket += hval;
1450
1451 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1452 struct rt6_info *rt6 = rt6_ex->rt6i;
1453 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1454
1455#ifdef CONFIG_IPV6_SUBTREES
1456 if (matched && saddr)
1457 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1458#endif
1459 if (matched)
1460 return rt6_ex;
1461 }
1462 return NULL;
1463}
1464
b748f260 1465static unsigned int fib6_mtu(const struct fib6_result *res)
d4ead6b3 1466{
b748f260 1467 const struct fib6_nh *nh = res->nh;
d4ead6b3
DA
1468 unsigned int mtu;
1469
b748f260
DA
1470 if (res->f6i->fib6_pmtu) {
1471 mtu = res->f6i->fib6_pmtu;
dcd1f572 1472 } else {
b748f260 1473 struct net_device *dev = nh->fib_nh_dev;
dcd1f572
DA
1474 struct inet6_dev *idev;
1475
1476 rcu_read_lock();
1477 idev = __in6_dev_get(dev);
1478 mtu = idev->cnf.mtu6;
1479 rcu_read_unlock();
1480 }
1481
d4ead6b3
DA
1482 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1483
b748f260 1484 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
d4ead6b3
DA
1485}
1486
cc5c073a
DA
1487#define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL
1488
1489/* used when the flushed bit is not relevant, only access to the bucket
1490 * (ie., all bucket users except rt6_insert_exception);
1491 *
1492 * called under rcu lock; sometimes called with rt6_exception_lock held
1493 */
1494static
1495struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1496 spinlock_t *lock)
1497{
1498 struct rt6_exception_bucket *bucket;
1499
1500 if (lock)
1501 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1502 lockdep_is_held(lock));
1503 else
1504 bucket = rcu_dereference(nh->rt6i_exception_bucket);
1505
1506 /* remove bucket flushed bit if set */
1507 if (bucket) {
1508 unsigned long p = (unsigned long)bucket;
1509
1510 p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1511 bucket = (struct rt6_exception_bucket *)p;
1512 }
1513
1514 return bucket;
1515}
1516
1517static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1518{
1519 unsigned long p = (unsigned long)bucket;
1520
1521 return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1522}
1523
1524/* called with rt6_exception_lock held */
1525static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1526 spinlock_t *lock)
1527{
1528 struct rt6_exception_bucket *bucket;
1529 unsigned long p;
1530
1531 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1532 lockdep_is_held(lock));
1533
1534 p = (unsigned long)bucket;
1535 p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1536 bucket = (struct rt6_exception_bucket *)p;
1537 rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1538}
1539
35732d01 1540static int rt6_insert_exception(struct rt6_info *nrt,
5012f0a5 1541 const struct fib6_result *res)
35732d01 1542{
5e670d84 1543 struct net *net = dev_net(nrt->dst.dev);
35732d01 1544 struct rt6_exception_bucket *bucket;
cc5c073a 1545 struct fib6_info *f6i = res->f6i;
35732d01
WW
1546 struct in6_addr *src_key = NULL;
1547 struct rt6_exception *rt6_ex;
cc5c073a 1548 struct fib6_nh *nh = res->nh;
35732d01
WW
1549 int err = 0;
1550
35732d01
WW
1551 spin_lock_bh(&rt6_exception_lock);
1552
cc5c073a
DA
1553 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1554 lockdep_is_held(&rt6_exception_lock));
35732d01
WW
1555 if (!bucket) {
1556 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1557 GFP_ATOMIC);
1558 if (!bucket) {
1559 err = -ENOMEM;
1560 goto out;
1561 }
cc5c073a
DA
1562 rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1563 } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1564 err = -EINVAL;
1565 goto out;
35732d01
WW
1566 }
1567
1568#ifdef CONFIG_IPV6_SUBTREES
5012f0a5 1569 /* fib6_src.plen != 0 indicates f6i is in subtree
35732d01 1570 * and exception table is indexed by a hash of
5012f0a5 1571 * both fib6_dst and fib6_src.
35732d01 1572 * Otherwise, the exception table is indexed by
5012f0a5 1573 * a hash of only fib6_dst.
35732d01 1574 */
5012f0a5 1575 if (f6i->fib6_src.plen)
35732d01
WW
1576 src_key = &nrt->rt6i_src.addr;
1577#endif
5012f0a5 1578 /* rt6_mtu_change() might lower mtu on f6i.
f5bbe7ee 1579 * Only insert this exception route if its mtu
5012f0a5 1580 * is less than f6i's mtu value.
f5bbe7ee 1581 */
b748f260 1582 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
f5bbe7ee
WW
1583 err = -EINVAL;
1584 goto out;
1585 }
60006a48 1586
35732d01
WW
1587 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1588 src_key);
1589 if (rt6_ex)
1590 rt6_remove_exception(bucket, rt6_ex);
1591
1592 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1593 if (!rt6_ex) {
1594 err = -ENOMEM;
1595 goto out;
1596 }
1597 rt6_ex->rt6i = nrt;
1598 rt6_ex->stamp = jiffies;
35732d01
WW
1599 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1600 bucket->depth++;
81eb8447 1601 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1602
1603 if (bucket->depth > FIB6_MAX_DEPTH)
1604 rt6_exception_remove_oldest(bucket);
1605
1606out:
1607 spin_unlock_bh(&rt6_exception_lock);
1608
1609 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1610 if (!err) {
5012f0a5
DA
1611 spin_lock_bh(&f6i->fib6_table->tb6_lock);
1612 fib6_update_sernum(net, f6i);
1613 spin_unlock_bh(&f6i->fib6_table->tb6_lock);
b886d5f2
PA
1614 fib6_force_start_gc(net);
1615 }
35732d01
WW
1616
1617 return err;
1618}
1619
c0b220cf 1620static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
35732d01
WW
1621{
1622 struct rt6_exception_bucket *bucket;
1623 struct rt6_exception *rt6_ex;
1624 struct hlist_node *tmp;
1625 int i;
1626
1627 spin_lock_bh(&rt6_exception_lock);
35732d01 1628
cc5c073a 1629 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
35732d01
WW
1630 if (!bucket)
1631 goto out;
1632
cc5c073a
DA
1633 /* Prevent rt6_insert_exception() to recreate the bucket list */
1634 if (!from)
1635 fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1636
35732d01 1637 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
cc5c073a
DA
1638 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1639 if (!from ||
1640 rcu_access_pointer(rt6_ex->rt6i->from) == from)
1641 rt6_remove_exception(bucket, rt6_ex);
1642 }
1643 WARN_ON_ONCE(!from && bucket->depth);
35732d01
WW
1644 bucket++;
1645 }
35732d01
WW
1646out:
1647 spin_unlock_bh(&rt6_exception_lock);
1648}
1649
c0b220cf
DA
1650void rt6_flush_exceptions(struct fib6_info *f6i)
1651{
1cf844c7 1652 fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
c0b220cf
DA
1653}
1654
35732d01
WW
1655/* Find cached rt in the hash table inside passed in rt
1656 * Caller has to hold rcu_read_lock()
1657 */
7e4b5128 1658static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
1659 const struct in6_addr *daddr,
1660 const struct in6_addr *saddr)
35732d01 1661{
510e2ced 1662 const struct in6_addr *src_key = NULL;
35732d01 1663 struct rt6_exception_bucket *bucket;
35732d01 1664 struct rt6_exception *rt6_ex;
7e4b5128 1665 struct rt6_info *ret = NULL;
35732d01 1666
35732d01 1667#ifdef CONFIG_IPV6_SUBTREES
7e4b5128 1668 /* fib6i_src.plen != 0 indicates f6i is in subtree
35732d01 1669 * and exception table is indexed by a hash of
7e4b5128 1670 * both fib6_dst and fib6_src.
510e2ced
WW
1671 * However, the src addr used to create the hash
1672 * might not be exactly the passed in saddr which
1673 * is a /128 addr from the flow.
1674 * So we need to use f6i->fib6_src to redo lookup
1675 * if the passed in saddr does not find anything.
1676 * (See the logic in ip6_rt_cache_alloc() on how
1677 * rt->rt6i_src is updated.)
35732d01 1678 */
7e4b5128 1679 if (res->f6i->fib6_src.plen)
35732d01 1680 src_key = saddr;
510e2ced 1681find_ex:
35732d01 1682#endif
cc5c073a 1683 bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
35732d01
WW
1684 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1685
1686 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
7e4b5128 1687 ret = rt6_ex->rt6i;
35732d01 1688
510e2ced
WW
1689#ifdef CONFIG_IPV6_SUBTREES
1690 /* Use fib6_src as src_key and redo lookup */
1691 if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1692 src_key = &res->f6i->fib6_src.addr;
1693 goto find_ex;
1694 }
1695#endif
1696
7e4b5128 1697 return ret;
35732d01
WW
1698}
1699
1700/* Remove the passed in cached rt from the hash table that contains it */
cc5c073a 1701static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
c0b220cf 1702 const struct rt6_info *rt)
35732d01 1703{
c0b220cf 1704 const struct in6_addr *src_key = NULL;
35732d01 1705 struct rt6_exception_bucket *bucket;
35732d01
WW
1706 struct rt6_exception *rt6_ex;
1707 int err;
1708
cc5c073a 1709 if (!rcu_access_pointer(nh->rt6i_exception_bucket))
35732d01
WW
1710 return -ENOENT;
1711
1712 spin_lock_bh(&rt6_exception_lock);
cc5c073a
DA
1713 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1714
35732d01 1715#ifdef CONFIG_IPV6_SUBTREES
cc5c073a
DA
1716 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1717 * and exception table is indexed by a hash of
1718 * both rt6i_dst and rt6i_src.
35732d01
WW
1719 * Otherwise, the exception table is indexed by
1720 * a hash of only rt6i_dst.
1721 */
c0b220cf 1722 if (plen)
35732d01
WW
1723 src_key = &rt->rt6i_src.addr;
1724#endif
1725 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1726 &rt->rt6i_dst.addr,
1727 src_key);
1728 if (rt6_ex) {
1729 rt6_remove_exception(bucket, rt6_ex);
1730 err = 0;
1731 } else {
1732 err = -ENOENT;
1733 }
1734
1735 spin_unlock_bh(&rt6_exception_lock);
1736 return err;
1737}
1738
c0b220cf
DA
1739static int rt6_remove_exception_rt(struct rt6_info *rt)
1740{
1741 struct fib6_info *from;
1742
1743 from = rcu_dereference(rt->from);
cc5c073a 1744 if (!from || !(rt->rt6i_flags & RTF_CACHE))
c0b220cf
DA
1745 return -EINVAL;
1746
1cf844c7 1747 return fib6_nh_remove_exception(from->fib6_nh,
cc5c073a 1748 from->fib6_src.plen, rt);
c0b220cf
DA
1749}
1750
35732d01
WW
1751/* Find rt6_ex which contains the passed in rt cache and
1752 * refresh its stamp
1753 */
cc5c073a 1754static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
c0b220cf 1755 const struct rt6_info *rt)
35732d01 1756{
c0b220cf 1757 const struct in6_addr *src_key = NULL;
35732d01 1758 struct rt6_exception_bucket *bucket;
35732d01 1759 struct rt6_exception *rt6_ex;
193f3685 1760
cc5c073a 1761 bucket = fib6_nh_get_excptn_bucket(nh, NULL);
35732d01 1762#ifdef CONFIG_IPV6_SUBTREES
cc5c073a
DA
1763 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1764 * and exception table is indexed by a hash of
1765 * both rt6i_dst and rt6i_src.
35732d01
WW
1766 * Otherwise, the exception table is indexed by
1767 * a hash of only rt6i_dst.
1768 */
c0b220cf 1769 if (plen)
35732d01
WW
1770 src_key = &rt->rt6i_src.addr;
1771#endif
cc5c073a 1772 rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
35732d01
WW
1773 if (rt6_ex)
1774 rt6_ex->stamp = jiffies;
c0b220cf
DA
1775}
1776
1777static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1778{
1779 struct fib6_info *from;
35732d01 1780
c0b220cf
DA
1781 rcu_read_lock();
1782
1783 from = rcu_dereference(rt->from);
1784 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1785 goto unlock;
1786
1cf844c7 1787 fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt);
193f3685 1788unlock:
35732d01
WW
1789 rcu_read_unlock();
1790}
1791
e9fa1495
SB
1792static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1793 struct rt6_info *rt, int mtu)
1794{
1795 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1796 * lowest MTU in the path: always allow updating the route PMTU to
1797 * reflect PMTU decreases.
1798 *
1799 * If the new MTU is higher, and the route PMTU is equal to the local
1800 * MTU, this means the old MTU is the lowest in the path, so allow
1801 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1802 * handle this.
1803 */
1804
1805 if (dst_mtu(&rt->dst) >= mtu)
1806 return true;
1807
1808 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1809 return true;
1810
1811 return false;
1812}
1813
1814static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
cc5c073a 1815 const struct fib6_nh *nh, int mtu)
f5bbe7ee
WW
1816{
1817 struct rt6_exception_bucket *bucket;
1818 struct rt6_exception *rt6_ex;
1819 int i;
1820
cc5c073a 1821 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
e9fa1495
SB
1822 if (!bucket)
1823 return;
1824
1825 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1826 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1827 struct rt6_info *entry = rt6_ex->rt6i;
1828
1829 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1830 * route), the metrics of its rt->from have already
e9fa1495
SB
1831 * been updated.
1832 */
d4ead6b3 1833 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1834 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1835 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1836 }
e9fa1495 1837 bucket++;
f5bbe7ee
WW
1838 }
1839}
1840
b16cb459
WW
1841#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1842
cc5c073a
DA
1843static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
1844 const struct in6_addr *gateway)
b16cb459
WW
1845{
1846 struct rt6_exception_bucket *bucket;
1847 struct rt6_exception *rt6_ex;
1848 struct hlist_node *tmp;
1849 int i;
1850
cc5c073a 1851 if (!rcu_access_pointer(nh->rt6i_exception_bucket))
b16cb459
WW
1852 return;
1853
1854 spin_lock_bh(&rt6_exception_lock);
cc5c073a 1855 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
b16cb459
WW
1856 if (bucket) {
1857 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1858 hlist_for_each_entry_safe(rt6_ex, tmp,
1859 &bucket->chain, hlist) {
1860 struct rt6_info *entry = rt6_ex->rt6i;
1861
1862 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1863 RTF_CACHE_GATEWAY &&
1864 ipv6_addr_equal(gateway,
1865 &entry->rt6i_gateway)) {
1866 rt6_remove_exception(bucket, rt6_ex);
1867 }
1868 }
1869 bucket++;
1870 }
1871 }
1872
1873 spin_unlock_bh(&rt6_exception_lock);
1874}
1875
c757faa8
WW
1876static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1877 struct rt6_exception *rt6_ex,
1878 struct fib6_gc_args *gc_args,
1879 unsigned long now)
1880{
1881 struct rt6_info *rt = rt6_ex->rt6i;
1882
1859bac0
PA
1883 /* we are pruning and obsoleting aged-out and non gateway exceptions
1884 * even if others have still references to them, so that on next
1885 * dst_check() such references can be dropped.
1886 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1887 * expired, independently from their aging, as per RFC 8201 section 4
1888 */
31afeb42
WW
1889 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1890 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1891 RT6_TRACE("aging clone %p\n", rt);
1892 rt6_remove_exception(bucket, rt6_ex);
1893 return;
1894 }
1895 } else if (time_after(jiffies, rt->dst.expires)) {
1896 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1897 rt6_remove_exception(bucket, rt6_ex);
1898 return;
31afeb42
WW
1899 }
1900
1901 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1902 struct neighbour *neigh;
1903 __u8 neigh_flags = 0;
1904
1bfa26ff
ED
1905 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1906 if (neigh)
c757faa8 1907 neigh_flags = neigh->flags;
1bfa26ff 1908
c757faa8
WW
1909 if (!(neigh_flags & NTF_ROUTER)) {
1910 RT6_TRACE("purging route %p via non-router but gateway\n",
1911 rt);
1912 rt6_remove_exception(bucket, rt6_ex);
1913 return;
1914 }
1915 }
31afeb42 1916
c757faa8
WW
1917 gc_args->more++;
1918}
1919
cc5c073a 1920static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
c0b220cf
DA
1921 struct fib6_gc_args *gc_args,
1922 unsigned long now)
c757faa8
WW
1923{
1924 struct rt6_exception_bucket *bucket;
1925 struct rt6_exception *rt6_ex;
1926 struct hlist_node *tmp;
1927 int i;
1928
cc5c073a 1929 if (!rcu_access_pointer(nh->rt6i_exception_bucket))
c757faa8
WW
1930 return;
1931
1bfa26ff
ED
1932 rcu_read_lock_bh();
1933 spin_lock(&rt6_exception_lock);
cc5c073a 1934 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
c757faa8
WW
1935 if (bucket) {
1936 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1937 hlist_for_each_entry_safe(rt6_ex, tmp,
1938 &bucket->chain, hlist) {
1939 rt6_age_examine_exception(bucket, rt6_ex,
1940 gc_args, now);
1941 }
1942 bucket++;
1943 }
1944 }
1bfa26ff
ED
1945 spin_unlock(&rt6_exception_lock);
1946 rcu_read_unlock_bh();
c757faa8
WW
1947}
1948
cc5c073a 1949void rt6_age_exceptions(struct fib6_info *f6i,
c0b220cf
DA
1950 struct fib6_gc_args *gc_args,
1951 unsigned long now)
1952{
1cf844c7 1953 fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
c0b220cf
DA
1954}
1955
1d053da9 1956/* must be called with rcu lock held */
effda4dd
DA
1957int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
1958 struct flowi6 *fl6, struct fib6_result *res, int strict)
1da177e4 1959{
367efcb9 1960 struct fib6_node *fn, *saved_fn;
1da177e4 1961
6454743b 1962 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1963 saved_fn = fn;
1da177e4 1964
ca254490
DA
1965 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1966 oif = 0;
1967
a3c00e46 1968redo_rt6_select:
effda4dd
DA
1969 rt6_select(net, fn, oif, res, strict);
1970 if (res->f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1971 fn = fib6_backtrack(fn, &fl6->saddr);
1972 if (fn)
1973 goto redo_rt6_select;
367efcb9
MKL
1974 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1975 /* also consider unreachable route */
1976 strict &= ~RT6_LOOKUP_F_REACHABLE;
1977 fn = saved_fn;
1978 goto redo_rt6_select;
367efcb9 1979 }
a3c00e46
MKL
1980 }
1981
effda4dd 1982 trace_fib6_table_lookup(net, res, table, fl6);
fb9de91e 1983
effda4dd 1984 return 0;
1d053da9
DA
1985}
1986
1987struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1988 int oif, struct flowi6 *fl6,
1989 const struct sk_buff *skb, int flags)
1990{
b1d40991 1991 struct fib6_result res = {};
1d053da9
DA
1992 struct rt6_info *rt;
1993 int strict = 0;
1994
1995 strict |= flags & RT6_LOOKUP_F_IFACE;
1996 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1997 if (net->ipv6.devconf_all->forwarding == 0)
1998 strict |= RT6_LOOKUP_F_REACHABLE;
1999
2000 rcu_read_lock();
2001
effda4dd 2002 fib6_table_lookup(net, table, oif, fl6, &res, strict);
b1d40991 2003 if (res.f6i == net->ipv6.fib6_null_entry) {
421842ed 2004 rt = net->ipv6.ip6_null_entry;
66f5d6ce 2005 rcu_read_unlock();
d3843fe5 2006 dst_hold(&rt->dst);
d3843fe5 2007 return rt;
23fb93a4
DA
2008 }
2009
b1d40991 2010 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
d83009d4 2011
23fb93a4 2012 /*Search through exception table */
7e4b5128 2013 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 2014 if (rt) {
10585b43 2015 if (ip6_hold_safe(net, &rt))
d3843fe5 2016 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 2017
66f5d6ce 2018 rcu_read_unlock();
d52d3997 2019 return rt;
3da59bd9 2020 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
b1d40991 2021 !res.nh->fib_nh_gw_family)) {
3da59bd9
MKL
2022 /* Create a RTF_CACHE clone which will not be
2023 * owned by the fib6 tree. It is for the special case where
2024 * the daddr in the skb during the neighbor look-up is different
2025 * from the fl6->daddr used to look-up route here.
2026 */
3da59bd9
MKL
2027 struct rt6_info *uncached_rt;
2028
85bd05de 2029 uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
d52d3997 2030
4d85cd0c 2031 rcu_read_unlock();
c71099ac 2032
1cfb71ee
WW
2033 if (uncached_rt) {
2034 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
2035 * No need for another dst_hold()
2036 */
8d0b94af 2037 rt6_uncached_list_add(uncached_rt);
81eb8447 2038 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 2039 } else {
3da59bd9 2040 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
2041 dst_hold(&uncached_rt->dst);
2042 }
b811580d 2043
3da59bd9 2044 return uncached_rt;
d52d3997
MKL
2045 } else {
2046 /* Get a percpu copy */
2047
2048 struct rt6_info *pcpu_rt;
2049
951f788a 2050 local_bh_disable();
db3fedee 2051 pcpu_rt = rt6_get_pcpu_route(&res);
d52d3997 2052
93531c67 2053 if (!pcpu_rt)
db3fedee 2054 pcpu_rt = rt6_make_pcpu_route(net, &res);
93531c67 2055
951f788a
ED
2056 local_bh_enable();
2057 rcu_read_unlock();
d4bea421 2058
d52d3997
MKL
2059 return pcpu_rt;
2060 }
1da177e4 2061}
9ff74384 2062EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 2063
b75cc8f9
DA
2064static struct rt6_info *ip6_pol_route_input(struct net *net,
2065 struct fib6_table *table,
2066 struct flowi6 *fl6,
2067 const struct sk_buff *skb,
2068 int flags)
4acad72d 2069{
b75cc8f9 2070 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
2071}
2072
d409b847
MB
2073struct dst_entry *ip6_route_input_lookup(struct net *net,
2074 struct net_device *dev,
b75cc8f9
DA
2075 struct flowi6 *fl6,
2076 const struct sk_buff *skb,
2077 int flags)
72331bc0
SL
2078{
2079 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2080 flags |= RT6_LOOKUP_F_IFACE;
2081
b75cc8f9 2082 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 2083}
d409b847 2084EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 2085
23aebdac 2086static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
2087 struct flow_keys *keys,
2088 struct flow_keys *flkeys)
23aebdac
JS
2089{
2090 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2091 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 2092 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
2093 const struct ipv6hdr *inner_iph;
2094 const struct icmp6hdr *icmph;
2095 struct ipv6hdr _inner_iph;
cea67a2d 2096 struct icmp6hdr _icmph;
23aebdac
JS
2097
2098 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2099 goto out;
2100
cea67a2d
ED
2101 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2102 sizeof(_icmph), &_icmph);
2103 if (!icmph)
2104 goto out;
2105
23aebdac
JS
2106 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2107 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2108 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2109 icmph->icmp6_type != ICMPV6_PARAMPROB)
2110 goto out;
2111
2112 inner_iph = skb_header_pointer(skb,
2113 skb_transport_offset(skb) + sizeof(*icmph),
2114 sizeof(_inner_iph), &_inner_iph);
2115 if (!inner_iph)
2116 goto out;
2117
2118 key_iph = inner_iph;
5e5d6fed 2119 _flkeys = NULL;
23aebdac 2120out:
5e5d6fed
RP
2121 if (_flkeys) {
2122 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2123 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2124 keys->tags.flow_label = _flkeys->tags.flow_label;
2125 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2126 } else {
2127 keys->addrs.v6addrs.src = key_iph->saddr;
2128 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 2129 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
2130 keys->basic.ip_proto = key_iph->nexthdr;
2131 }
23aebdac
JS
2132}
2133
2134/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2135u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2136 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2137{
2138 struct flow_keys hash_keys;
9a2a537a 2139 u32 mhash;
23aebdac 2140
bbfa047a 2141 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2142 case 0:
2143 memset(&hash_keys, 0, sizeof(hash_keys));
2144 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2145 if (skb) {
2146 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2147 } else {
2148 hash_keys.addrs.v6addrs.src = fl6->saddr;
2149 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2150 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2151 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2152 }
2153 break;
2154 case 1:
2155 if (skb) {
2156 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2157 struct flow_keys keys;
2158
2159 /* short-circuit if we already have L4 hash present */
2160 if (skb->l4_hash)
2161 return skb_get_hash_raw(skb) >> 1;
2162
2163 memset(&hash_keys, 0, sizeof(hash_keys));
2164
2165 if (!flkeys) {
2166 skb_flow_dissect_flow_keys(skb, &keys, flag);
2167 flkeys = &keys;
2168 }
2169 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2170 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2171 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2172 hash_keys.ports.src = flkeys->ports.src;
2173 hash_keys.ports.dst = flkeys->ports.dst;
2174 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2175 } else {
2176 memset(&hash_keys, 0, sizeof(hash_keys));
2177 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2178 hash_keys.addrs.v6addrs.src = fl6->saddr;
2179 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2180 hash_keys.ports.src = fl6->fl6_sport;
2181 hash_keys.ports.dst = fl6->fl6_dport;
2182 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2183 }
2184 break;
23aebdac 2185 }
9a2a537a 2186 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2187
9a2a537a 2188 return mhash >> 1;
23aebdac
JS
2189}
2190
c71099ac
TG
2191void ip6_route_input(struct sk_buff *skb)
2192{
b71d1d42 2193 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2194 struct net *net = dev_net(skb->dev);
adaa70bb 2195 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2196 struct ip_tunnel_info *tun_info;
4c9483b2 2197 struct flowi6 fl6 = {
e0d56fdd 2198 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2199 .daddr = iph->daddr,
2200 .saddr = iph->saddr,
6502ca52 2201 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2202 .flowi6_mark = skb->mark,
2203 .flowi6_proto = iph->nexthdr,
c71099ac 2204 };
5e5d6fed 2205 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2206
904af04d 2207 tun_info = skb_tunnel_info(skb);
46fa062a 2208 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2209 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2210
2211 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2212 flkeys = &_flkeys;
2213
23aebdac 2214 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2215 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2216 skb_dst_drop(skb);
b75cc8f9
DA
2217 skb_dst_set(skb,
2218 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2219}
2220
b75cc8f9
DA
2221static struct rt6_info *ip6_pol_route_output(struct net *net,
2222 struct fib6_table *table,
2223 struct flowi6 *fl6,
2224 const struct sk_buff *skb,
2225 int flags)
1da177e4 2226{
b75cc8f9 2227 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2228}
2229
6f21c96a
PA
2230struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2231 struct flowi6 *fl6, int flags)
c71099ac 2232{
d46a9d67 2233 bool any_src;
c71099ac 2234
3ede0bbc
RS
2235 if (ipv6_addr_type(&fl6->daddr) &
2236 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2237 struct dst_entry *dst;
2238
2239 dst = l3mdev_link_scope_lookup(net, fl6);
2240 if (dst)
2241 return dst;
2242 }
ca254490 2243
1fb9489b 2244 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2245
d46a9d67 2246 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2247 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2248 (fl6->flowi6_oif && any_src))
77d16f45 2249 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2250
d46a9d67 2251 if (!any_src)
adaa70bb 2252 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2253 else if (sk)
2254 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2255
b75cc8f9 2256 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2257}
6f21c96a 2258EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2259
2774c131 2260struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2261{
5c1e6aa3 2262 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2263 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2264 struct dst_entry *new = NULL;
2265
1dbe3252 2266 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2267 DST_OBSOLETE_DEAD, 0);
14e50e57 2268 if (rt) {
0a1f5962 2269 rt6_info_init(rt);
81eb8447 2270 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2271
0a1f5962 2272 new = &rt->dst;
14e50e57 2273 new->__use = 1;
352e512c 2274 new->input = dst_discard;
ede2059d 2275 new->output = dst_discard_out;
14e50e57 2276
0a1f5962 2277 dst_copy_metrics(new, &ort->dst);
14e50e57 2278
1dbe3252 2279 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2280 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2281 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2282
2283 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2284#ifdef CONFIG_IPV6_SUBTREES
2285 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2286#endif
14e50e57
DM
2287 }
2288
69ead7af
DM
2289 dst_release(dst_orig);
2290 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2291}
14e50e57 2292
1da177e4
LT
2293/*
2294 * Destination cache support functions
2295 */
2296
8d1c802b 2297static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2298{
93531c67
DA
2299 u32 rt_cookie = 0;
2300
8ae86971 2301 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2302 return false;
2303
2304 if (fib6_check_expired(f6i))
2305 return false;
2306
2307 return true;
4b32b5ad
MKL
2308}
2309
a68886a6
DA
2310static struct dst_entry *rt6_check(struct rt6_info *rt,
2311 struct fib6_info *from,
2312 u32 cookie)
3da59bd9 2313{
36143645 2314 u32 rt_cookie = 0;
c5cff856 2315
a68886a6 2316 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2317 rt_cookie != cookie)
3da59bd9
MKL
2318 return NULL;
2319
2320 if (rt6_check_expired(rt))
2321 return NULL;
2322
2323 return &rt->dst;
2324}
2325
a68886a6
DA
2326static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2327 struct fib6_info *from,
2328 u32 cookie)
3da59bd9 2329{
5973fb1e
MKL
2330 if (!__rt6_check_expired(rt) &&
2331 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2332 fib6_check(from, cookie))
3da59bd9
MKL
2333 return &rt->dst;
2334 else
2335 return NULL;
2336}
2337
1da177e4
LT
2338static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2339{
a87b7dc9 2340 struct dst_entry *dst_ret;
a68886a6 2341 struct fib6_info *from;
1da177e4
LT
2342 struct rt6_info *rt;
2343
a87b7dc9
DA
2344 rt = container_of(dst, struct rt6_info, dst);
2345
2346 rcu_read_lock();
1da177e4 2347
6f3118b5
ND
2348 /* All IPV6 dsts are created with ->obsolete set to the value
2349 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2350 * into this function always.
2351 */
e3bc10bd 2352
a68886a6 2353 from = rcu_dereference(rt->from);
4b32b5ad 2354
a68886a6
DA
2355 if (from && (rt->rt6i_flags & RTF_PCPU ||
2356 unlikely(!list_empty(&rt->rt6i_uncached))))
2357 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2358 else
a68886a6 2359 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2360
2361 rcu_read_unlock();
2362
2363 return dst_ret;
1da177e4
LT
2364}
2365
2366static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2367{
2368 struct rt6_info *rt = (struct rt6_info *) dst;
2369
2370 if (rt) {
54c1a859 2371 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2372 rcu_read_lock();
54c1a859 2373 if (rt6_check_expired(rt)) {
93531c67 2374 rt6_remove_exception_rt(rt);
54c1a859
YH
2375 dst = NULL;
2376 }
c3c14da0 2377 rcu_read_unlock();
54c1a859 2378 } else {
1da177e4 2379 dst_release(dst);
54c1a859
YH
2380 dst = NULL;
2381 }
1da177e4 2382 }
54c1a859 2383 return dst;
1da177e4
LT
2384}
2385
2386static void ip6_link_failure(struct sk_buff *skb)
2387{
2388 struct rt6_info *rt;
2389
3ffe533c 2390 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2391
adf30907 2392 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2393 if (rt) {
8a14e46f 2394 rcu_read_lock();
1eb4f758 2395 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2396 rt6_remove_exception_rt(rt);
c5cff856 2397 } else {
a68886a6 2398 struct fib6_info *from;
c5cff856
WW
2399 struct fib6_node *fn;
2400
a68886a6
DA
2401 from = rcu_dereference(rt->from);
2402 if (from) {
2403 fn = rcu_dereference(from->fib6_node);
2404 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2405 fn->fn_sernum = -1;
2406 }
1eb4f758 2407 }
8a14e46f 2408 rcu_read_unlock();
1da177e4
LT
2409 }
2410}
2411
6a3e030f
DA
2412static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2413{
a68886a6
DA
2414 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2415 struct fib6_info *from;
2416
2417 rcu_read_lock();
2418 from = rcu_dereference(rt0->from);
2419 if (from)
2420 rt0->dst.expires = from->expires;
2421 rcu_read_unlock();
2422 }
6a3e030f
DA
2423
2424 dst_set_expires(&rt0->dst, timeout);
2425 rt0->rt6i_flags |= RTF_EXPIRES;
2426}
2427
45e4fd26
MKL
2428static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2429{
2430 struct net *net = dev_net(rt->dst.dev);
2431
d4ead6b3 2432 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2433 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2434 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2435}
2436
0d3f6d29
MKL
2437static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2438{
2439 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2440 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2441}
2442
45e4fd26
MKL
2443static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2444 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2445{
0dec879f 2446 const struct in6_addr *daddr, *saddr;
67ba4152 2447 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2448
19bda36c
XL
2449 if (dst_metric_locked(dst, RTAX_MTU))
2450 return;
2451
0dec879f
JA
2452 if (iph) {
2453 daddr = &iph->daddr;
2454 saddr = &iph->saddr;
2455 } else if (sk) {
2456 daddr = &sk->sk_v6_daddr;
2457 saddr = &inet6_sk(sk)->saddr;
2458 } else {
2459 daddr = NULL;
2460 saddr = NULL;
2461 }
2462 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2463 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2464 if (mtu >= dst_mtu(dst))
2465 return;
9d289715 2466
0d3f6d29 2467 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2468 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2469 /* update rt6_ex->stamp for cache */
2470 if (rt6->rt6i_flags & RTF_CACHE)
2471 rt6_update_exception_stamp_rt(rt6);
0dec879f 2472 } else if (daddr) {
85bd05de 2473 struct fib6_result res = {};
45e4fd26
MKL
2474 struct rt6_info *nrt6;
2475
4d85cd0c 2476 rcu_read_lock();
85bd05de
DA
2477 res.f6i = rcu_dereference(rt6->from);
2478 if (!res.f6i) {
9c69a132
JL
2479 rcu_read_unlock();
2480 return;
2481 }
1cf844c7 2482 res.nh = res.f6i->fib6_nh;
7d21fec9
DA
2483 res.fib6_flags = res.f6i->fib6_flags;
2484 res.fib6_type = res.f6i->fib6_type;
2485
85bd05de 2486 nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
45e4fd26
MKL
2487 if (nrt6) {
2488 rt6_do_update_pmtu(nrt6, mtu);
5012f0a5 2489 if (rt6_insert_exception(nrt6, &res))
2b760fcf 2490 dst_release_immediate(&nrt6->dst);
45e4fd26 2491 }
a68886a6 2492 rcu_read_unlock();
1da177e4
LT
2493 }
2494}
2495
45e4fd26
MKL
2496static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2497 struct sk_buff *skb, u32 mtu)
2498{
2499 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2500}
2501
42ae66c8 2502void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2503 int oif, u32 mark, kuid_t uid)
81aded24
DM
2504{
2505 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2506 struct dst_entry *dst;
dc92095d
2507 struct flowi6 fl6 = {
2508 .flowi6_oif = oif,
2509 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2510 .daddr = iph->daddr,
2511 .saddr = iph->saddr,
2512 .flowlabel = ip6_flowinfo(iph),
2513 .flowi6_uid = uid,
2514 };
81aded24
DM
2515
2516 dst = ip6_route_output(net, NULL, &fl6);
2517 if (!dst->error)
45e4fd26 2518 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2519 dst_release(dst);
2520}
2521EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2522
2523void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2524{
7ddacfa5 2525 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2526 struct dst_entry *dst;
2527
7ddacfa5
DA
2528 if (!oif && skb->dev)
2529 oif = l3mdev_master_ifindex(skb->dev);
2530
2531 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2532
2533 dst = __sk_dst_get(sk);
2534 if (!dst || !dst->obsolete ||
2535 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2536 return;
2537
2538 bh_lock_sock(sk);
2539 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2540 ip6_datagram_dst_update(sk, false);
2541 bh_unlock_sock(sk);
81aded24
DM
2542}
2543EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2544
7d6850f7
AK
2545void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2546 const struct flowi6 *fl6)
2547{
2548#ifdef CONFIG_IPV6_SUBTREES
2549 struct ipv6_pinfo *np = inet6_sk(sk);
2550#endif
2551
2552 ip6_dst_store(sk, dst,
2553 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2554 &sk->sk_v6_daddr : NULL,
2555#ifdef CONFIG_IPV6_SUBTREES
2556 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2557 &np->saddr :
2558#endif
2559 NULL);
2560}
2561
9b6b35ab 2562static bool ip6_redirect_nh_match(const struct fib6_result *res,
0b34eb00
DA
2563 struct flowi6 *fl6,
2564 const struct in6_addr *gw,
2565 struct rt6_info **ret)
2566{
9b6b35ab
DA
2567 const struct fib6_nh *nh = res->nh;
2568
0b34eb00
DA
2569 if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2570 fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2571 return false;
2572
2573 /* rt_cache's gateway might be different from its 'parent'
2574 * in the case of an ip redirect.
2575 * So we keep searching in the exception table if the gateway
2576 * is different.
2577 */
2578 if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2579 struct rt6_info *rt_cache;
2580
9b6b35ab 2581 rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
0b34eb00
DA
2582 if (rt_cache &&
2583 ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2584 *ret = rt_cache;
2585 return true;
2586 }
2587 return false;
2588 }
2589 return true;
2590}
2591
b55b76b2
DJ
2592/* Handle redirects */
2593struct ip6rd_flowi {
2594 struct flowi6 fl6;
2595 struct in6_addr gateway;
2596};
2597
2598static struct rt6_info *__ip6_route_redirect(struct net *net,
2599 struct fib6_table *table,
2600 struct flowi6 *fl6,
b75cc8f9 2601 const struct sk_buff *skb,
b55b76b2
DJ
2602 int flags)
2603{
2604 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
0b34eb00 2605 struct rt6_info *ret = NULL;
9b6b35ab 2606 struct fib6_result res = {};
8d1c802b 2607 struct fib6_info *rt;
b55b76b2
DJ
2608 struct fib6_node *fn;
2609
31680ac2
DA
2610 /* l3mdev_update_flow overrides oif if the device is enslaved; in
2611 * this case we must match on the real ingress device, so reset it
2612 */
2613 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2614 fl6->flowi6_oif = skb->dev->ifindex;
2615
b55b76b2 2616 /* Get the "current" route for this destination and
67c408cf 2617 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2618 *
2619 * RFC 4861 specifies that redirects should only be
2620 * accepted if they come from the nexthop to the target.
2621 * Due to the way the routes are chosen, this notion
2622 * is a bit fuzzy and one might need to check all possible
2623 * routes.
2624 */
2625
66f5d6ce 2626 rcu_read_lock();
6454743b 2627 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2628restart:
66f5d6ce 2629 for_each_fib6_node_rt_rcu(fn) {
9b6b35ab 2630 res.f6i = rt;
1cf844c7 2631 res.nh = rt->fib6_nh;
9b6b35ab 2632
14895687 2633 if (fib6_check_expired(rt))
b55b76b2 2634 continue;
93c2fb25 2635 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2636 break;
9b6b35ab 2637 if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
0b34eb00 2638 goto out;
b55b76b2
DJ
2639 }
2640
2641 if (!rt)
421842ed 2642 rt = net->ipv6.fib6_null_entry;
93c2fb25 2643 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2644 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2645 goto out;
2646 }
2647
421842ed 2648 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2649 fn = fib6_backtrack(fn, &fl6->saddr);
2650 if (fn)
2651 goto restart;
b55b76b2 2652 }
a3c00e46 2653
9b6b35ab 2654 res.f6i = rt;
1cf844c7 2655 res.nh = rt->fib6_nh;
b0a1ba59 2656out:
7d21fec9 2657 if (ret) {
10585b43 2658 ip6_hold_safe(net, &ret);
7d21fec9
DA
2659 } else {
2660 res.fib6_flags = res.f6i->fib6_flags;
2661 res.fib6_type = res.f6i->fib6_type;
9b6b35ab 2662 ret = ip6_create_rt_rcu(&res);
7d21fec9 2663 }
b55b76b2 2664
66f5d6ce 2665 rcu_read_unlock();
b55b76b2 2666
8ff2e5b2 2667 trace_fib6_table_lookup(net, &res, table, fl6);
23fb93a4 2668 return ret;
b55b76b2
DJ
2669};
2670
2671static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2672 const struct flowi6 *fl6,
2673 const struct sk_buff *skb,
2674 const struct in6_addr *gateway)
b55b76b2
DJ
2675{
2676 int flags = RT6_LOOKUP_F_HAS_SADDR;
2677 struct ip6rd_flowi rdfl;
2678
2679 rdfl.fl6 = *fl6;
2680 rdfl.gateway = *gateway;
2681
b75cc8f9 2682 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2683 flags, __ip6_route_redirect);
2684}
2685
e2d118a1
LC
2686void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2687 kuid_t uid)
3a5ad2ee
DM
2688{
2689 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2690 struct dst_entry *dst;
1f7f10ac
2691 struct flowi6 fl6 = {
2692 .flowi6_iif = LOOPBACK_IFINDEX,
2693 .flowi6_oif = oif,
2694 .flowi6_mark = mark,
2695 .daddr = iph->daddr,
2696 .saddr = iph->saddr,
2697 .flowlabel = ip6_flowinfo(iph),
2698 .flowi6_uid = uid,
2699 };
3a5ad2ee 2700
b75cc8f9 2701 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2702 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2703 dst_release(dst);
2704}
2705EXPORT_SYMBOL_GPL(ip6_redirect);
2706
d456336d 2707void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2708{
2709 const struct ipv6hdr *iph = ipv6_hdr(skb);
2710 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2711 struct dst_entry *dst;
0b26fb17
2712 struct flowi6 fl6 = {
2713 .flowi6_iif = LOOPBACK_IFINDEX,
2714 .flowi6_oif = oif,
0b26fb17
2715 .daddr = msg->dest,
2716 .saddr = iph->daddr,
2717 .flowi6_uid = sock_net_uid(net, NULL),
2718 };
c92a59ec 2719
b75cc8f9 2720 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2721 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2722 dst_release(dst);
2723}
2724
3a5ad2ee
DM
2725void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2726{
e2d118a1
LC
2727 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2728 sk->sk_uid);
3a5ad2ee
DM
2729}
2730EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2731
0dbaee3b 2732static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2733{
0dbaee3b
DM
2734 struct net_device *dev = dst->dev;
2735 unsigned int mtu = dst_mtu(dst);
2736 struct net *net = dev_net(dev);
2737
1da177e4
LT
2738 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2739
5578689a
DL
2740 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2741 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2742
2743 /*
1ab1457c
YH
2744 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2745 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2746 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2747 * rely only on pmtu discovery"
2748 */
2749 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2750 mtu = IPV6_MAXPLEN;
2751 return mtu;
2752}
2753
ebb762f2 2754static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2755{
d33e4553 2756 struct inet6_dev *idev;
d4ead6b3 2757 unsigned int mtu;
4b32b5ad
MKL
2758
2759 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2760 if (mtu)
30f78d8e 2761 goto out;
618f9bc7
SK
2762
2763 mtu = IPV6_MIN_MTU;
d33e4553
DM
2764
2765 rcu_read_lock();
2766 idev = __in6_dev_get(dst->dev);
2767 if (idev)
2768 mtu = idev->cnf.mtu6;
2769 rcu_read_unlock();
2770
30f78d8e 2771out:
14972cbd
RP
2772 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2773
2774 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2775}
2776
901731b8
DA
2777/* MTU selection:
2778 * 1. mtu on route is locked - use it
2779 * 2. mtu from nexthop exception
2780 * 3. mtu from egress device
2781 *
2782 * based on ip6_dst_mtu_forward and exception logic of
2783 * rt6_find_cached_rt; called with rcu_read_lock
2784 */
b748f260
DA
2785u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2786 const struct in6_addr *daddr,
2787 const struct in6_addr *saddr)
901731b8 2788{
b748f260
DA
2789 const struct fib6_nh *nh = res->nh;
2790 struct fib6_info *f6i = res->f6i;
901731b8 2791 struct inet6_dev *idev;
510e2ced 2792 struct rt6_info *rt;
901731b8
DA
2793 u32 mtu = 0;
2794
2795 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2796 mtu = f6i->fib6_pmtu;
2797 if (mtu)
2798 goto out;
2799 }
2800
510e2ced
WW
2801 rt = rt6_find_cached_rt(res, daddr, saddr);
2802 if (unlikely(rt)) {
2803 mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
2804 } else {
b748f260 2805 struct net_device *dev = nh->fib_nh_dev;
901731b8
DA
2806
2807 mtu = IPV6_MIN_MTU;
2808 idev = __in6_dev_get(dev);
2809 if (idev && idev->cnf.mtu6 > mtu)
2810 mtu = idev->cnf.mtu6;
2811 }
2812
2813 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2814out:
b748f260 2815 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
901731b8
DA
2816}
2817
3b00944c 2818struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2819 struct flowi6 *fl6)
1da177e4 2820{
87a11578 2821 struct dst_entry *dst;
1da177e4
LT
2822 struct rt6_info *rt;
2823 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2824 struct net *net = dev_net(dev);
1da177e4 2825
38308473 2826 if (unlikely(!idev))
122bdf67 2827 return ERR_PTR(-ENODEV);
1da177e4 2828
ad706862 2829 rt = ip6_dst_alloc(net, dev, 0);
38308473 2830 if (unlikely(!rt)) {
1da177e4 2831 in6_dev_put(idev);
87a11578 2832 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2833 goto out;
2834 }
2835
8e2ec639 2836 rt->dst.flags |= DST_HOST;
588753f1 2837 rt->dst.input = ip6_input;
8e2ec639 2838 rt->dst.output = ip6_output;
550bab42 2839 rt->rt6i_gateway = fl6->daddr;
87a11578 2840 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2841 rt->rt6i_dst.plen = 128;
2842 rt->rt6i_idev = idev;
14edd87d 2843 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2844
4c981e28 2845 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2846 * do proper release of the net_device
2847 */
2848 rt6_uncached_list_add(rt);
81eb8447 2849 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2850
87a11578
DM
2851 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2852
1da177e4 2853out:
87a11578 2854 return dst;
1da177e4
LT
2855}
2856
569d3645 2857static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2858{
86393e52 2859 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2860 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2861 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2862 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2863 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2864 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2865 int entries;
7019b78e 2866
fc66f95c 2867 entries = dst_entries_get_fast(ops);
49a18d86 2868 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2869 entries <= rt_max_size)
1da177e4
LT
2870 goto out;
2871
6891a346 2872 net->ipv6.ip6_rt_gc_expire++;
14956643 2873 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2874 entries = dst_entries_get_slow(ops);
2875 if (entries < ops->gc_thresh)
7019b78e 2876 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2877out:
7019b78e 2878 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2879 return entries > rt_max_size;
1da177e4
LT
2880}
2881
8c14586f
DA
2882static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2883 struct fib6_config *cfg,
f4797b33
DA
2884 const struct in6_addr *gw_addr,
2885 u32 tbid, int flags)
8c14586f
DA
2886{
2887 struct flowi6 fl6 = {
2888 .flowi6_oif = cfg->fc_ifindex,
2889 .daddr = *gw_addr,
2890 .saddr = cfg->fc_prefsrc,
2891 };
2892 struct fib6_table *table;
2893 struct rt6_info *rt;
8c14586f 2894
f4797b33 2895 table = fib6_get_table(net, tbid);
8c14586f
DA
2896 if (!table)
2897 return NULL;
2898
2899 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2900 flags |= RT6_LOOKUP_F_HAS_SADDR;
2901
f4797b33 2902 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2903 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2904
2905 /* if table lookup failed, fall back to full lookup */
2906 if (rt == net->ipv6.ip6_null_entry) {
2907 ip6_rt_put(rt);
2908 rt = NULL;
2909 }
2910
2911 return rt;
2912}
2913
fc1e64e1
DA
2914static int ip6_route_check_nh_onlink(struct net *net,
2915 struct fib6_config *cfg,
9fbb704c 2916 const struct net_device *dev,
fc1e64e1
DA
2917 struct netlink_ext_ack *extack)
2918{
44750f84 2919 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2920 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2921 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2922 struct fib6_info *from;
fc1e64e1
DA
2923 struct rt6_info *grt;
2924 int err;
2925
2926 err = 0;
2927 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2928 if (grt) {
bf1dc8ba
PA
2929 rcu_read_lock();
2930 from = rcu_dereference(grt->from);
58e354c0 2931 if (!grt->dst.error &&
4ed591c8 2932 /* ignore match if it is the default route */
bf1dc8ba 2933 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2934 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2935 NL_SET_ERR_MSG(extack,
2936 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2937 err = -EINVAL;
2938 }
bf1dc8ba 2939 rcu_read_unlock();
fc1e64e1
DA
2940
2941 ip6_rt_put(grt);
2942 }
2943
2944 return err;
2945}
2946
1edce99f
DA
2947static int ip6_route_check_nh(struct net *net,
2948 struct fib6_config *cfg,
2949 struct net_device **_dev,
2950 struct inet6_dev **idev)
2951{
2952 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2953 struct net_device *dev = _dev ? *_dev : NULL;
2954 struct rt6_info *grt = NULL;
2955 int err = -EHOSTUNREACH;
2956
2957 if (cfg->fc_table) {
f4797b33
DA
2958 int flags = RT6_LOOKUP_F_IFACE;
2959
2960 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2961 cfg->fc_table, flags);
1edce99f
DA
2962 if (grt) {
2963 if (grt->rt6i_flags & RTF_GATEWAY ||
2964 (dev && dev != grt->dst.dev)) {
2965 ip6_rt_put(grt);
2966 grt = NULL;
2967 }
2968 }
2969 }
2970
2971 if (!grt)
b75cc8f9 2972 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2973
2974 if (!grt)
2975 goto out;
2976
2977 if (dev) {
2978 if (dev != grt->dst.dev) {
2979 ip6_rt_put(grt);
2980 goto out;
2981 }
2982 } else {
2983 *_dev = dev = grt->dst.dev;
2984 *idev = grt->rt6i_idev;
2985 dev_hold(dev);
2986 in6_dev_hold(grt->rt6i_idev);
2987 }
2988
2989 if (!(grt->rt6i_flags & RTF_GATEWAY))
2990 err = 0;
2991
2992 ip6_rt_put(grt);
2993
2994out:
2995 return err;
2996}
2997
9fbb704c
DA
2998static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2999 struct net_device **_dev, struct inet6_dev **idev,
3000 struct netlink_ext_ack *extack)
3001{
3002 const struct in6_addr *gw_addr = &cfg->fc_gateway;
3003 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 3004 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 3005 const struct net_device *dev = *_dev;
232378e8 3006 bool need_addr_check = !dev;
9fbb704c
DA
3007 int err = -EINVAL;
3008
3009 /* if gw_addr is local we will fail to detect this in case
3010 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3011 * will return already-added prefix route via interface that
3012 * prefix route was assigned to, which might be non-loopback.
3013 */
232378e8
DA
3014 if (dev &&
3015 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3016 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
3017 goto out;
3018 }
3019
3020 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3021 /* IPv6 strictly inhibits using not link-local
3022 * addresses as nexthop address.
3023 * Otherwise, router will not able to send redirects.
3024 * It is very good, but in some (rare!) circumstances
3025 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3026 * some exceptions. --ANK
3027 * We allow IPv4-mapped nexthops to support RFC4798-type
3028 * addressing
3029 */
3030 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3031 NL_SET_ERR_MSG(extack, "Invalid gateway address");
3032 goto out;
3033 }
3034
3035 if (cfg->fc_flags & RTNH_F_ONLINK)
3036 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3037 else
3038 err = ip6_route_check_nh(net, cfg, _dev, idev);
3039
3040 if (err)
3041 goto out;
3042 }
3043
3044 /* reload in case device was changed */
3045 dev = *_dev;
3046
3047 err = -EINVAL;
3048 if (!dev) {
3049 NL_SET_ERR_MSG(extack, "Egress device not specified");
3050 goto out;
3051 } else if (dev->flags & IFF_LOOPBACK) {
3052 NL_SET_ERR_MSG(extack,
3053 "Egress device can not be loopback device for this route");
3054 goto out;
3055 }
232378e8
DA
3056
3057 /* if we did not check gw_addr above, do so now that the
3058 * egress device has been resolved.
3059 */
3060 if (need_addr_check &&
3061 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3062 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3063 goto out;
3064 }
3065
9fbb704c
DA
3066 err = 0;
3067out:
3068 return err;
3069}
3070
83c44251
DA
3071static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3072{
3073 if ((flags & RTF_REJECT) ||
3074 (dev && (dev->flags & IFF_LOOPBACK) &&
3075 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3076 !(flags & RTF_LOCAL)))
3077 return true;
3078
3079 return false;
3080}
3081
3082int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3083 struct fib6_config *cfg, gfp_t gfp_flags,
3084 struct netlink_ext_ack *extack)
3085{
3086 struct net_device *dev = NULL;
3087 struct inet6_dev *idev = NULL;
3088 int addr_type;
3089 int err;
3090
f1741730
DA
3091 fib6_nh->fib_nh_family = AF_INET6;
3092
83c44251
DA
3093 err = -ENODEV;
3094 if (cfg->fc_ifindex) {
3095 dev = dev_get_by_index(net, cfg->fc_ifindex);
3096 if (!dev)
3097 goto out;
3098 idev = in6_dev_get(dev);
3099 if (!idev)
3100 goto out;
3101 }
3102
3103 if (cfg->fc_flags & RTNH_F_ONLINK) {
3104 if (!dev) {
3105 NL_SET_ERR_MSG(extack,
3106 "Nexthop device required for onlink");
3107 goto out;
3108 }
3109
3110 if (!(dev->flags & IFF_UP)) {
3111 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3112 err = -ENETDOWN;
3113 goto out;
3114 }
3115
ad1601ae 3116 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
3117 }
3118
ad1601ae 3119 fib6_nh->fib_nh_weight = 1;
83c44251
DA
3120
3121 /* We cannot add true routes via loopback here,
3122 * they would result in kernel looping; promote them to reject routes
3123 */
3124 addr_type = ipv6_addr_type(&cfg->fc_dst);
3125 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
3126 /* hold loopback dev/idev if we haven't done so. */
3127 if (dev != net->loopback_dev) {
3128 if (dev) {
3129 dev_put(dev);
3130 in6_dev_put(idev);
3131 }
3132 dev = net->loopback_dev;
3133 dev_hold(dev);
3134 idev = in6_dev_get(dev);
3135 if (!idev) {
3136 err = -ENODEV;
3137 goto out;
3138 }
3139 }
7dd73168 3140 goto pcpu_alloc;
83c44251
DA
3141 }
3142
3143 if (cfg->fc_flags & RTF_GATEWAY) {
3144 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3145 if (err)
3146 goto out;
3147
ad1601ae 3148 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
bdf00467 3149 fib6_nh->fib_nh_gw_family = AF_INET6;
83c44251
DA
3150 }
3151
3152 err = -ENODEV;
3153 if (!dev)
3154 goto out;
3155
3156 if (idev->cnf.disable_ipv6) {
3157 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3158 err = -EACCES;
3159 goto out;
3160 }
3161
3162 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3163 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3164 err = -ENETDOWN;
3165 goto out;
3166 }
3167
3168 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3169 !netif_carrier_ok(dev))
ad1601ae 3170 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 3171
7dd73168
DA
3172 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3173 cfg->fc_encap_type, cfg, gfp_flags, extack);
3174 if (err)
3175 goto out;
3176
3177pcpu_alloc:
f40b6ae2
DA
3178 fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3179 if (!fib6_nh->rt6i_pcpu) {
3180 err = -ENOMEM;
3181 goto out;
3182 }
3183
ad1601ae 3184 fib6_nh->fib_nh_dev = dev;
f1741730 3185 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
3186 err = 0;
3187out:
3188 if (idev)
3189 in6_dev_put(idev);
3190
3191 if (err) {
ad1601ae
DA
3192 lwtstate_put(fib6_nh->fib_nh_lws);
3193 fib6_nh->fib_nh_lws = NULL;
83c44251
DA
3194 if (dev)
3195 dev_put(dev);
3196 }
3197
3198 return err;
3199}
3200
dac7d0f2
DA
3201void fib6_nh_release(struct fib6_nh *fib6_nh)
3202{
cc5c073a
DA
3203 struct rt6_exception_bucket *bucket;
3204
3205 rcu_read_lock();
3206
3207 fib6_nh_flush_exceptions(fib6_nh, NULL);
3208 bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3209 if (bucket) {
3210 rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3211 kfree(bucket);
3212 }
3213
3214 rcu_read_unlock();
3215
f40b6ae2
DA
3216 if (fib6_nh->rt6i_pcpu) {
3217 int cpu;
3218
3219 for_each_possible_cpu(cpu) {
3220 struct rt6_info **ppcpu_rt;
3221 struct rt6_info *pcpu_rt;
3222
3223 ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3224 pcpu_rt = *ppcpu_rt;
3225 if (pcpu_rt) {
3226 dst_dev_put(&pcpu_rt->dst);
3227 dst_release(&pcpu_rt->dst);
3228 *ppcpu_rt = NULL;
3229 }
3230 }
3231
3232 free_percpu(fib6_nh->rt6i_pcpu);
3233 }
3234
979e276e 3235 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3236}
3237
8d1c802b 3238static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3239 gfp_t gfp_flags,
333c4301 3240 struct netlink_ext_ack *extack)
1da177e4 3241{
5578689a 3242 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3243 struct fib6_info *rt = NULL;
f88d8ea6 3244 struct nexthop *nh = NULL;
c71099ac 3245 struct fib6_table *table;
f88d8ea6 3246 struct fib6_nh *fib6_nh;
8c5b83f0 3247 int err = -EINVAL;
83c44251 3248 int addr_type;
1da177e4 3249
557c44be 3250 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3251 if (cfg->fc_flags & RTF_PCPU) {
3252 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3253 goto out;
d5d531cb 3254 }
557c44be 3255
2ea2352e
WW
3256 /* RTF_CACHE is an internal flag; can not be set by userspace */
3257 if (cfg->fc_flags & RTF_CACHE) {
3258 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3259 goto out;
3260 }
3261
e8478e80
DA
3262 if (cfg->fc_type > RTN_MAX) {
3263 NL_SET_ERR_MSG(extack, "Invalid route type");
3264 goto out;
3265 }
3266
d5d531cb
DA
3267 if (cfg->fc_dst_len > 128) {
3268 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3269 goto out;
3270 }
3271 if (cfg->fc_src_len > 128) {
3272 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3273 goto out;
d5d531cb 3274 }
1da177e4 3275#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3276 if (cfg->fc_src_len) {
3277 NL_SET_ERR_MSG(extack,
3278 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3279 goto out;
d5d531cb 3280 }
1da177e4 3281#endif
fc1e64e1 3282
d71314b4 3283 err = -ENOBUFS;
38308473
DM
3284 if (cfg->fc_nlinfo.nlh &&
3285 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3286 table = fib6_get_table(net, cfg->fc_table);
38308473 3287 if (!table) {
f3213831 3288 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3289 table = fib6_new_table(net, cfg->fc_table);
3290 }
3291 } else {
3292 table = fib6_new_table(net, cfg->fc_table);
3293 }
38308473
DM
3294
3295 if (!table)
c71099ac 3296 goto out;
c71099ac 3297
93531c67 3298 err = -ENOMEM;
f88d8ea6 3299 rt = fib6_info_alloc(gfp_flags, !nh);
93531c67 3300 if (!rt)
1da177e4 3301 goto out;
93531c67 3302
d7e774f3
DA
3303 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3304 extack);
767a2217
DA
3305 if (IS_ERR(rt->fib6_metrics)) {
3306 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3307 /* Do not leave garbage there. */
3308 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3309 goto out;
3310 }
3311
93531c67
DA
3312 if (cfg->fc_flags & RTF_ADDRCONF)
3313 rt->dst_nocount = true;
1da177e4 3314
1716a961 3315 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3316 fib6_set_expires(rt, jiffies +
1716a961
G
3317 clock_t_to_jiffies(cfg->fc_expires));
3318 else
14895687 3319 fib6_clean_expires(rt);
1da177e4 3320
86872cb5
TG
3321 if (cfg->fc_protocol == RTPROT_UNSPEC)
3322 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3323 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3324
83c44251
DA
3325 rt->fib6_table = table;
3326 rt->fib6_metric = cfg->fc_metric;
3327 rt->fib6_type = cfg->fc_type;
2b2450ca 3328 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3329
93c2fb25
DA
3330 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3331 rt->fib6_dst.plen = cfg->fc_dst_len;
3332 if (rt->fib6_dst.plen == 128)
3b6761d1 3333 rt->dst_host = true;
e5fd387a 3334
1da177e4 3335#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3336 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3337 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3338#endif
f88d8ea6
DA
3339 if (nh) {
3340 if (!nexthop_get(nh)) {
3341 NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3342 goto out;
3343 }
3344 if (rt->fib6_src.plen) {
4daa95af 3345 NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
f88d8ea6
DA
3346 goto out;
3347 }
3348 rt->nh = nh;
3349 fib6_nh = nexthop_fib6_nh(rt->nh);
3350 } else {
3351 err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3352 if (err)
3353 goto out;
1da177e4 3354
f88d8ea6
DA
3355 fib6_nh = rt->fib6_nh;
3356
3357 /* We cannot add true routes via loopback here, they would
3358 * result in kernel looping; promote them to reject routes
3359 */
3360 addr_type = ipv6_addr_type(&cfg->fc_dst);
3361 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3362 addr_type))
3363 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3364 }
955ec4cb 3365
c3968a85 3366 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
f88d8ea6 3367 struct net_device *dev = fib6_nh->fib_nh_dev;
83c44251 3368
c3968a85 3369 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3370 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3371 err = -EINVAL;
3372 goto out;
3373 }
93c2fb25
DA
3374 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3375 rt->fib6_prefsrc.plen = 128;
c3968a85 3376 } else
93c2fb25 3377 rt->fib6_prefsrc.plen = 0;
c3968a85 3378
8c5b83f0 3379 return rt;
6b9ea5a6 3380out:
93531c67 3381 fib6_info_release(rt);
8c5b83f0 3382 return ERR_PTR(err);
6b9ea5a6
RP
3383}
3384
acb54e3c 3385int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3386 struct netlink_ext_ack *extack)
6b9ea5a6 3387{
8d1c802b 3388 struct fib6_info *rt;
6b9ea5a6
RP
3389 int err;
3390
acb54e3c 3391 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3392 if (IS_ERR(rt))
3393 return PTR_ERR(rt);
6b9ea5a6 3394
d4ead6b3 3395 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3396 fib6_info_release(rt);
6b9ea5a6 3397
1da177e4
LT
3398 return err;
3399}
3400
8d1c802b 3401static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3402{
afb1d4b5 3403 struct net *net = info->nl_net;
c71099ac 3404 struct fib6_table *table;
afb1d4b5 3405 int err;
1da177e4 3406
421842ed 3407 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3408 err = -ENOENT;
3409 goto out;
3410 }
6c813a72 3411
93c2fb25 3412 table = rt->fib6_table;
66f5d6ce 3413 spin_lock_bh(&table->tb6_lock);
86872cb5 3414 err = fib6_del(rt, info);
66f5d6ce 3415 spin_unlock_bh(&table->tb6_lock);
1da177e4 3416
6825a26c 3417out:
93531c67 3418 fib6_info_release(rt);
1da177e4
LT
3419 return err;
3420}
3421
8d1c802b 3422int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3423{
afb1d4b5
DA
3424 struct nl_info info = { .nl_net = net };
3425
528c4ceb 3426 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3427}
3428
8d1c802b 3429static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3430{
3431 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3432 struct net *net = info->nl_net;
16a16cd3 3433 struct sk_buff *skb = NULL;
0ae81335 3434 struct fib6_table *table;
e3330039 3435 int err = -ENOENT;
0ae81335 3436
421842ed 3437 if (rt == net->ipv6.fib6_null_entry)
e3330039 3438 goto out_put;
93c2fb25 3439 table = rt->fib6_table;
66f5d6ce 3440 spin_lock_bh(&table->tb6_lock);
0ae81335 3441
93c2fb25 3442 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3443 struct fib6_info *sibling, *next_sibling;
0ae81335 3444
16a16cd3
DA
3445 /* prefer to send a single notification with all hops */
3446 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3447 if (skb) {
3448 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3449
d4ead6b3 3450 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3451 NULL, NULL, 0, RTM_DELROUTE,
3452 info->portid, seq, 0) < 0) {
3453 kfree_skb(skb);
3454 skb = NULL;
3455 } else
3456 info->skip_notify = 1;
3457 }
3458
0ae81335 3459 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3460 &rt->fib6_siblings,
3461 fib6_siblings) {
0ae81335
DA
3462 err = fib6_del(sibling, info);
3463 if (err)
e3330039 3464 goto out_unlock;
0ae81335
DA
3465 }
3466 }
3467
3468 err = fib6_del(rt, info);
e3330039 3469out_unlock:
66f5d6ce 3470 spin_unlock_bh(&table->tb6_lock);
e3330039 3471out_put:
93531c67 3472 fib6_info_release(rt);
16a16cd3
DA
3473
3474 if (skb) {
e3330039 3475 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3476 info->nlh, gfp_any());
3477 }
0ae81335
DA
3478 return err;
3479}
3480
0fa6efc5 3481static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
23fb93a4
DA
3482{
3483 int rc = -ESRCH;
3484
3485 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3486 goto out;
3487
3488 if (cfg->fc_flags & RTF_GATEWAY &&
3489 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3490 goto out;
761f6026
XL
3491
3492 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3493out:
3494 return rc;
3495}
3496
0fa6efc5
DA
3497static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3498 struct fib6_nh *nh)
3499{
3500 struct fib6_result res = {
3501 .f6i = rt,
3502 .nh = nh,
3503 };
3504 struct rt6_info *rt_cache;
3505
3506 rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3507 if (rt_cache)
3508 return __ip6_del_cached_rt(rt_cache, cfg);
3509
3510 return 0;
3511}
3512
333c4301
DA
3513static int ip6_route_del(struct fib6_config *cfg,
3514 struct netlink_ext_ack *extack)
1da177e4 3515{
c71099ac 3516 struct fib6_table *table;
8d1c802b 3517 struct fib6_info *rt;
1da177e4 3518 struct fib6_node *fn;
1da177e4
LT
3519 int err = -ESRCH;
3520
5578689a 3521 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3522 if (!table) {
3523 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3524 return err;
d5d531cb 3525 }
c71099ac 3526
66f5d6ce 3527 rcu_read_lock();
1da177e4 3528
c71099ac 3529 fn = fib6_locate(&table->tb6_root,
86872cb5 3530 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3531 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3532 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3533
1da177e4 3534 if (fn) {
66f5d6ce 3535 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
3536 struct fib6_nh *nh;
3537
1cf844c7 3538 nh = rt->fib6_nh;
2b760fcf 3539 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3540 int rc;
3541
0fa6efc5
DA
3542 rc = ip6_del_cached_rt(cfg, rt, nh);
3543 if (rc != -ESRCH) {
3544 rcu_read_unlock();
3545 return rc;
23fb93a4
DA
3546 }
3547 continue;
2b760fcf 3548 }
ad1601ae 3549
86872cb5 3550 if (cfg->fc_ifindex &&
ad1601ae
DA
3551 (!nh->fib_nh_dev ||
3552 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3553 continue;
86872cb5 3554 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 3555 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 3556 continue;
93c2fb25 3557 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3558 continue;
93c2fb25 3559 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3560 continue;
e873e4b9
WW
3561 if (!fib6_info_hold_safe(rt))
3562 continue;
66f5d6ce 3563 rcu_read_unlock();
1da177e4 3564
0ae81335
DA
3565 /* if gateway was specified only delete the one hop */
3566 if (cfg->fc_flags & RTF_GATEWAY)
3567 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3568
3569 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3570 }
3571 }
66f5d6ce 3572 rcu_read_unlock();
1da177e4
LT
3573
3574 return err;
3575}
3576
6700c270 3577static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3578{
a6279458 3579 struct netevent_redirect netevent;
e8599ff4 3580 struct rt6_info *rt, *nrt = NULL;
85bd05de 3581 struct fib6_result res = {};
e8599ff4
DM
3582 struct ndisc_options ndopts;
3583 struct inet6_dev *in6_dev;
3584 struct neighbour *neigh;
71bcdba0 3585 struct rd_msg *msg;
6e157b6a
DM
3586 int optlen, on_link;
3587 u8 *lladdr;
e8599ff4 3588
29a3cad5 3589 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3590 optlen -= sizeof(*msg);
e8599ff4
DM
3591
3592 if (optlen < 0) {
6e157b6a 3593 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3594 return;
3595 }
3596
71bcdba0 3597 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3598
71bcdba0 3599 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3600 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3601 return;
3602 }
3603
6e157b6a 3604 on_link = 0;
71bcdba0 3605 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3606 on_link = 1;
71bcdba0 3607 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3608 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3609 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3610 return;
3611 }
3612
3613 in6_dev = __in6_dev_get(skb->dev);
3614 if (!in6_dev)
3615 return;
3616 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3617 return;
3618
3619 /* RFC2461 8.1:
3620 * The IP source address of the Redirect MUST be the same as the current
3621 * first-hop router for the specified ICMP Destination Address.
3622 */
3623
f997c55c 3624 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3625 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3626 return;
3627 }
6e157b6a
DM
3628
3629 lladdr = NULL;
e8599ff4
DM
3630 if (ndopts.nd_opts_tgt_lladdr) {
3631 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3632 skb->dev);
3633 if (!lladdr) {
3634 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3635 return;
3636 }
3637 }
3638
6e157b6a 3639 rt = (struct rt6_info *) dst;
ec13ad1d 3640 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3641 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3642 return;
6e157b6a 3643 }
e8599ff4 3644
6e157b6a
DM
3645 /* Redirect received -> path was valid.
3646 * Look, redirects are sent only in response to data packets,
3647 * so that this nexthop apparently is reachable. --ANK
3648 */
0dec879f 3649 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3650
71bcdba0 3651 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3652 if (!neigh)
3653 return;
a6279458 3654
1da177e4
LT
3655 /*
3656 * We have finally decided to accept it.
3657 */
3658
f997c55c 3659 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3660 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3661 NEIGH_UPDATE_F_OVERRIDE|
3662 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3663 NEIGH_UPDATE_F_ISROUTER)),
3664 NDISC_REDIRECT, &ndopts);
1da177e4 3665
4d85cd0c 3666 rcu_read_lock();
85bd05de 3667 res.f6i = rcu_dereference(rt->from);
ff24e498 3668 if (!res.f6i)
886b7a50 3669 goto out;
8a14e46f 3670
1cf844c7 3671 res.nh = res.f6i->fib6_nh;
7d21fec9
DA
3672 res.fib6_flags = res.f6i->fib6_flags;
3673 res.fib6_type = res.f6i->fib6_type;
85bd05de 3674 nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
38308473 3675 if (!nrt)
1da177e4
LT
3676 goto out;
3677
3678 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3679 if (on_link)
3680 nrt->rt6i_flags &= ~RTF_GATEWAY;
3681
4e3fd7a0 3682 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3683
886b7a50 3684 /* rt6_insert_exception() will take care of duplicated exceptions */
5012f0a5 3685 if (rt6_insert_exception(nrt, &res)) {
2b760fcf
WW
3686 dst_release_immediate(&nrt->dst);
3687 goto out;
3688 }
1da177e4 3689
d8d1f30b
CG
3690 netevent.old = &rt->dst;
3691 netevent.new = &nrt->dst;
71bcdba0 3692 netevent.daddr = &msg->dest;
60592833 3693 netevent.neigh = neigh;
8d71740c
TT
3694 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3695
1da177e4 3696out:
886b7a50 3697 rcu_read_unlock();
e8599ff4 3698 neigh_release(neigh);
6e157b6a
DM
3699}
3700
70ceb4f5 3701#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3702static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3703 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3704 const struct in6_addr *gwaddr,
3705 struct net_device *dev)
70ceb4f5 3706{
830218c1
DA
3707 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3708 int ifindex = dev->ifindex;
70ceb4f5 3709 struct fib6_node *fn;
8d1c802b 3710 struct fib6_info *rt = NULL;
c71099ac
TG
3711 struct fib6_table *table;
3712
830218c1 3713 table = fib6_get_table(net, tb_id);
38308473 3714 if (!table)
c71099ac 3715 return NULL;
70ceb4f5 3716
66f5d6ce 3717 rcu_read_lock();
38fbeeee 3718 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3719 if (!fn)
3720 goto out;
3721
66f5d6ce 3722 for_each_fib6_node_rt_rcu(fn) {
f88d8ea6
DA
3723 /* these routes do not use nexthops */
3724 if (rt->nh)
3725 continue;
1cf844c7 3726 if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
70ceb4f5 3727 continue;
2b2450ca 3728 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
1cf844c7 3729 !rt->fib6_nh->fib_nh_gw_family)
70ceb4f5 3730 continue;
1cf844c7 3731 if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
70ceb4f5 3732 continue;
e873e4b9
WW
3733 if (!fib6_info_hold_safe(rt))
3734 continue;
70ceb4f5
YH
3735 break;
3736 }
3737out:
66f5d6ce 3738 rcu_read_unlock();
70ceb4f5
YH
3739 return rt;
3740}
3741
8d1c802b 3742static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3743 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3744 const struct in6_addr *gwaddr,
3745 struct net_device *dev,
95c96174 3746 unsigned int pref)
70ceb4f5 3747{
86872cb5 3748 struct fib6_config cfg = {
238fc7ea 3749 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3750 .fc_ifindex = dev->ifindex,
86872cb5
TG
3751 .fc_dst_len = prefixlen,
3752 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3753 RTF_UP | RTF_PREF(pref),
b91d5329 3754 .fc_protocol = RTPROT_RA,
e8478e80 3755 .fc_type = RTN_UNICAST,
15e47304 3756 .fc_nlinfo.portid = 0,
efa2cea0
DL
3757 .fc_nlinfo.nlh = NULL,
3758 .fc_nlinfo.nl_net = net,
86872cb5
TG
3759 };
3760
830218c1 3761 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3762 cfg.fc_dst = *prefix;
3763 cfg.fc_gateway = *gwaddr;
70ceb4f5 3764
e317da96
YH
3765 /* We should treat it as a default route if prefix length is 0. */
3766 if (!prefixlen)
86872cb5 3767 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3768
acb54e3c 3769 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3770
830218c1 3771 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3772}
3773#endif
3774
8d1c802b 3775struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3776 const struct in6_addr *addr,
3777 struct net_device *dev)
1ab1457c 3778{
830218c1 3779 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3780 struct fib6_info *rt;
c71099ac 3781 struct fib6_table *table;
1da177e4 3782
afb1d4b5 3783 table = fib6_get_table(net, tb_id);
38308473 3784 if (!table)
c71099ac 3785 return NULL;
1da177e4 3786
66f5d6ce
WW
3787 rcu_read_lock();
3788 for_each_fib6_node_rt_rcu(&table->tb6_root) {
f88d8ea6 3789 struct fib6_nh *nh;
ad1601ae 3790
f88d8ea6
DA
3791 /* RA routes do not use nexthops */
3792 if (rt->nh)
3793 continue;
3794
3795 nh = rt->fib6_nh;
ad1601ae 3796 if (dev == nh->fib_nh_dev &&
93c2fb25 3797 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 3798 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
3799 break;
3800 }
e873e4b9
WW
3801 if (rt && !fib6_info_hold_safe(rt))
3802 rt = NULL;
66f5d6ce 3803 rcu_read_unlock();
1da177e4
LT
3804 return rt;
3805}
3806
8d1c802b 3807struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3808 const struct in6_addr *gwaddr,
ebacaaa0
YH
3809 struct net_device *dev,
3810 unsigned int pref)
1da177e4 3811{
86872cb5 3812 struct fib6_config cfg = {
ca254490 3813 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3814 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3815 .fc_ifindex = dev->ifindex,
3816 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3817 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3818 .fc_protocol = RTPROT_RA,
e8478e80 3819 .fc_type = RTN_UNICAST,
15e47304 3820 .fc_nlinfo.portid = 0,
5578689a 3821 .fc_nlinfo.nlh = NULL,
afb1d4b5 3822 .fc_nlinfo.nl_net = net,
86872cb5 3823 };
1da177e4 3824
4e3fd7a0 3825 cfg.fc_gateway = *gwaddr;
1da177e4 3826
acb54e3c 3827 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3828 struct fib6_table *table;
3829
3830 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3831 if (table)
3832 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3833 }
1da177e4 3834
afb1d4b5 3835 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3836}
3837
afb1d4b5
DA
3838static void __rt6_purge_dflt_routers(struct net *net,
3839 struct fib6_table *table)
1da177e4 3840{
8d1c802b 3841 struct fib6_info *rt;
1da177e4
LT
3842
3843restart:
66f5d6ce
WW
3844 rcu_read_lock();
3845 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3846 struct net_device *dev = fib6_info_nh_dev(rt);
3847 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3848
93c2fb25 3849 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3850 (!idev || idev->cnf.accept_ra != 2) &&
3851 fib6_info_hold_safe(rt)) {
93531c67
DA
3852 rcu_read_unlock();
3853 ip6_del_rt(net, rt);
1da177e4
LT
3854 goto restart;
3855 }
3856 }
66f5d6ce 3857 rcu_read_unlock();
830218c1
DA
3858
3859 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3860}
3861
3862void rt6_purge_dflt_routers(struct net *net)
3863{
3864 struct fib6_table *table;
3865 struct hlist_head *head;
3866 unsigned int h;
3867
3868 rcu_read_lock();
3869
3870 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3871 head = &net->ipv6.fib_table_hash[h];
3872 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3873 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3874 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3875 }
3876 }
3877
3878 rcu_read_unlock();
1da177e4
LT
3879}
3880
5578689a
DL
3881static void rtmsg_to_fib6_config(struct net *net,
3882 struct in6_rtmsg *rtmsg,
86872cb5
TG
3883 struct fib6_config *cfg)
3884{
8823a3ac
3885 *cfg = (struct fib6_config){
3886 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3887 : RT6_TABLE_MAIN,
3888 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3889 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3890 .fc_expires = rtmsg->rtmsg_info,
3891 .fc_dst_len = rtmsg->rtmsg_dst_len,
3892 .fc_src_len = rtmsg->rtmsg_src_len,
3893 .fc_flags = rtmsg->rtmsg_flags,
3894 .fc_type = rtmsg->rtmsg_type,
3895
3896 .fc_nlinfo.nl_net = net,
3897
3898 .fc_dst = rtmsg->rtmsg_dst,
3899 .fc_src = rtmsg->rtmsg_src,
3900 .fc_gateway = rtmsg->rtmsg_gateway,
3901 };
86872cb5
TG
3902}
3903
5578689a 3904int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3905{
86872cb5 3906 struct fib6_config cfg;
1da177e4
LT
3907 struct in6_rtmsg rtmsg;
3908 int err;
3909
67ba4152 3910 switch (cmd) {
1da177e4
LT
3911 case SIOCADDRT: /* Add a route */
3912 case SIOCDELRT: /* Delete a route */
af31f412 3913 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3914 return -EPERM;
3915 err = copy_from_user(&rtmsg, arg,
3916 sizeof(struct in6_rtmsg));
3917 if (err)
3918 return -EFAULT;
86872cb5 3919
5578689a 3920 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3921
1da177e4
LT
3922 rtnl_lock();
3923 switch (cmd) {
3924 case SIOCADDRT:
acb54e3c 3925 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3926 break;
3927 case SIOCDELRT:
333c4301 3928 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3929 break;
3930 default:
3931 err = -EINVAL;
3932 }
3933 rtnl_unlock();
3934
3935 return err;
3ff50b79 3936 }
1da177e4
LT
3937
3938 return -EINVAL;
3939}
3940
3941/*
3942 * Drop the packet on the floor
3943 */
3944
d5fdd6ba 3945static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3946{
adf30907 3947 struct dst_entry *dst = skb_dst(skb);
1d3fd8a1
SS
3948 struct net *net = dev_net(dst->dev);
3949 struct inet6_dev *idev;
3950 int type;
3951
3952 if (netif_is_l3_master(skb->dev) &&
3953 dst->dev == net->loopback_dev)
3954 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
3955 else
3956 idev = ip6_dst_idev(dst);
3957
612f09e8
YH
3958 switch (ipstats_mib_noroutes) {
3959 case IPSTATS_MIB_INNOROUTES:
0660e03f 3960 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3961 if (type == IPV6_ADDR_ANY) {
1d3fd8a1 3962 IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3963 break;
3964 }
3965 /* FALLTHROUGH */
3966 case IPSTATS_MIB_OUTNOROUTES:
1d3fd8a1 3967 IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
612f09e8
YH
3968 break;
3969 }
1d3fd8a1
SS
3970
3971 /* Start over by dropping the dst for l3mdev case */
3972 if (netif_is_l3_master(skb->dev))
3973 skb_dst_drop(skb);
3974
3ffe533c 3975 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3976 kfree_skb(skb);
3977 return 0;
3978}
3979
9ce8ade0
TG
3980static int ip6_pkt_discard(struct sk_buff *skb)
3981{
612f09e8 3982 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3983}
3984
ede2059d 3985static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3986{
adf30907 3987 skb->dev = skb_dst(skb)->dev;
612f09e8 3988 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3989}
3990
9ce8ade0
TG
3991static int ip6_pkt_prohibit(struct sk_buff *skb)
3992{
612f09e8 3993 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3994}
3995
ede2059d 3996static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3997{
adf30907 3998 skb->dev = skb_dst(skb)->dev;
612f09e8 3999 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
4000}
4001
1da177e4
LT
4002/*
4003 * Allocate a dst for local (unicast / anycast) address.
4004 */
4005
360a9887
DA
4006struct fib6_info *addrconf_f6i_alloc(struct net *net,
4007 struct inet6_dev *idev,
4008 const struct in6_addr *addr,
4009 bool anycast, gfp_t gfp_flags)
1da177e4 4010{
c7a1ce39
DA
4011 struct fib6_config cfg = {
4012 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4013 .fc_ifindex = idev->dev->ifindex,
4014 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
4015 .fc_dst = *addr,
4016 .fc_dst_len = 128,
4017 .fc_protocol = RTPROT_KERNEL,
4018 .fc_nlinfo.nl_net = net,
4019 .fc_ignore_dev_down = true,
4020 };
1da177e4 4021
e8478e80 4022 if (anycast) {
c7a1ce39
DA
4023 cfg.fc_type = RTN_ANYCAST;
4024 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 4025 } else {
c7a1ce39
DA
4026 cfg.fc_type = RTN_LOCAL;
4027 cfg.fc_flags |= RTF_LOCAL;
e8478e80 4028 }
1da177e4 4029
c7a1ce39 4030 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
4031}
4032
c3968a85
DW
4033/* remove deleted ip from prefsrc entries */
4034struct arg_dev_net_ip {
4035 struct net_device *dev;
4036 struct net *net;
4037 struct in6_addr *addr;
4038};
4039
8d1c802b 4040static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
4041{
4042 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
4043 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4044 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4045
f88d8ea6
DA
4046 if (!rt->nh &&
4047 ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
421842ed 4048 rt != net->ipv6.fib6_null_entry &&
93c2fb25 4049 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 4050 spin_lock_bh(&rt6_exception_lock);
c3968a85 4051 /* remove prefsrc entry */
93c2fb25 4052 rt->fib6_prefsrc.plen = 0;
60006a48 4053 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
4054 }
4055 return 0;
4056}
4057
4058void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4059{
4060 struct net *net = dev_net(ifp->idev->dev);
4061 struct arg_dev_net_ip adni = {
4062 .dev = ifp->idev->dev,
4063 .net = net,
4064 .addr = &ifp->addr,
4065 };
0c3584d5 4066 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
4067}
4068
2b2450ca 4069#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
4070
4071/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 4072static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
4073{
4074 struct in6_addr *gateway = (struct in6_addr *)arg;
f88d8ea6
DA
4075 struct fib6_nh *nh;
4076
4077 /* RA routes do not use nexthops */
4078 if (rt->nh)
4079 return 0;
be7a010d 4080
f88d8ea6 4081 nh = rt->fib6_nh;
93c2fb25 4082 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
cc5c073a 4083 nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
be7a010d 4084 return -1;
b16cb459
WW
4085
4086 /* Further clean up cached routes in exception table.
4087 * This is needed because cached route may have a different
4088 * gateway than its 'parent' in the case of an ip redirect.
4089 */
cc5c073a 4090 fib6_nh_exceptions_clean_tohost(nh, gateway);
b16cb459 4091
be7a010d
DJ
4092 return 0;
4093}
4094
4095void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4096{
4097 fib6_clean_all(net, fib6_clean_tohost, gateway);
4098}
4099
2127d95a
IS
4100struct arg_netdev_event {
4101 const struct net_device *dev;
4c981e28 4102 union {
ecc5663c 4103 unsigned char nh_flags;
4c981e28
IS
4104 unsigned long event;
4105 };
2127d95a
IS
4106};
4107
8d1c802b 4108static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 4109{
8d1c802b 4110 struct fib6_info *iter;
d7dedee1
IS
4111 struct fib6_node *fn;
4112
93c2fb25
DA
4113 fn = rcu_dereference_protected(rt->fib6_node,
4114 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 4115 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 4116 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 4117 while (iter) {
93c2fb25 4118 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 4119 rt6_qualify_for_ecmp(iter))
d7dedee1 4120 return iter;
8fb11a9a 4121 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 4122 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
4123 }
4124
4125 return NULL;
4126}
4127
f88d8ea6 4128/* only called for fib entries with builtin fib6_nh */
8d1c802b 4129static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 4130{
1cf844c7
DA
4131 if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4132 (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4133 ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
d7dedee1
IS
4134 return true;
4135
4136 return false;
4137}
4138
8d1c802b 4139static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 4140{
8d1c802b 4141 struct fib6_info *iter;
d7dedee1
IS
4142 int total = 0;
4143
4144 if (!rt6_is_dead(rt))
1cf844c7 4145 total += rt->fib6_nh->fib_nh_weight;
d7dedee1 4146
93c2fb25 4147 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 4148 if (!rt6_is_dead(iter))
1cf844c7 4149 total += iter->fib6_nh->fib_nh_weight;
d7dedee1
IS
4150 }
4151
4152 return total;
4153}
4154
8d1c802b 4155static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
4156{
4157 int upper_bound = -1;
4158
4159 if (!rt6_is_dead(rt)) {
1cf844c7 4160 *weight += rt->fib6_nh->fib_nh_weight;
d7dedee1
IS
4161 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4162 total) - 1;
4163 }
1cf844c7 4164 atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
d7dedee1
IS
4165}
4166
8d1c802b 4167static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 4168{
8d1c802b 4169 struct fib6_info *iter;
d7dedee1
IS
4170 int weight = 0;
4171
4172 rt6_upper_bound_set(rt, &weight, total);
4173
93c2fb25 4174 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
4175 rt6_upper_bound_set(iter, &weight, total);
4176}
4177
8d1c802b 4178void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 4179{
8d1c802b 4180 struct fib6_info *first;
d7dedee1
IS
4181 int total;
4182
4183 /* In case the entire multipath route was marked for flushing,
4184 * then there is no need to rebalance upon the removal of every
4185 * sibling route.
4186 */
93c2fb25 4187 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
4188 return;
4189
4190 /* During lookup routes are evaluated in order, so we need to
4191 * make sure upper bounds are assigned from the first sibling
4192 * onwards.
4193 */
4194 first = rt6_multipath_first_sibling(rt);
4195 if (WARN_ON_ONCE(!first))
4196 return;
4197
4198 total = rt6_multipath_total_weight(first);
4199 rt6_multipath_upper_bound_set(first, total);
4200}
4201
8d1c802b 4202static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
4203{
4204 const struct arg_netdev_event *arg = p_arg;
7aef6859 4205 struct net *net = dev_net(arg->dev);
2127d95a 4206
f88d8ea6 4207 if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
1cf844c7
DA
4208 rt->fib6_nh->fib_nh_dev == arg->dev) {
4209 rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
7aef6859 4210 fib6_update_sernum_upto_root(net, rt);
d7dedee1 4211 rt6_multipath_rebalance(rt);
1de178ed 4212 }
2127d95a
IS
4213
4214 return 0;
4215}
4216
ecc5663c 4217void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
2127d95a
IS
4218{
4219 struct arg_netdev_event arg = {
4220 .dev = dev,
6802f3ad
IS
4221 {
4222 .nh_flags = nh_flags,
4223 },
2127d95a
IS
4224 };
4225
4226 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4227 arg.nh_flags |= RTNH_F_LINKDOWN;
4228
4229 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4230}
4231
f88d8ea6 4232/* only called for fib entries with inline fib6_nh */
8d1c802b 4233static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
4234 const struct net_device *dev)
4235{
8d1c802b 4236 struct fib6_info *iter;
1de178ed 4237
1cf844c7 4238 if (rt->fib6_nh->fib_nh_dev == dev)
1de178ed 4239 return true;
93c2fb25 4240 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1cf844c7 4241 if (iter->fib6_nh->fib_nh_dev == dev)
1de178ed
IS
4242 return true;
4243
4244 return false;
4245}
4246
8d1c802b 4247static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 4248{
8d1c802b 4249 struct fib6_info *iter;
1de178ed
IS
4250
4251 rt->should_flush = 1;
93c2fb25 4252 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
4253 iter->should_flush = 1;
4254}
4255
8d1c802b 4256static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
4257 const struct net_device *down_dev)
4258{
8d1c802b 4259 struct fib6_info *iter;
1de178ed
IS
4260 unsigned int dead = 0;
4261
1cf844c7
DA
4262 if (rt->fib6_nh->fib_nh_dev == down_dev ||
4263 rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
1de178ed 4264 dead++;
93c2fb25 4265 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1cf844c7
DA
4266 if (iter->fib6_nh->fib_nh_dev == down_dev ||
4267 iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
4268 dead++;
4269
4270 return dead;
4271}
4272
8d1c802b 4273static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed 4274 const struct net_device *dev,
ecc5663c 4275 unsigned char nh_flags)
1de178ed 4276{
8d1c802b 4277 struct fib6_info *iter;
1de178ed 4278
1cf844c7
DA
4279 if (rt->fib6_nh->fib_nh_dev == dev)
4280 rt->fib6_nh->fib_nh_flags |= nh_flags;
93c2fb25 4281 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1cf844c7
DA
4282 if (iter->fib6_nh->fib_nh_dev == dev)
4283 iter->fib6_nh->fib_nh_flags |= nh_flags;
1de178ed
IS
4284}
4285
a1a22c12 4286/* called with write lock held for table with rt */
8d1c802b 4287static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4288{
4c981e28
IS
4289 const struct arg_netdev_event *arg = p_arg;
4290 const struct net_device *dev = arg->dev;
7aef6859 4291 struct net *net = dev_net(dev);
8ed67789 4292
f88d8ea6 4293 if (rt == net->ipv6.fib6_null_entry || rt->nh)
27c6fa73
IS
4294 return 0;
4295
4296 switch (arg->event) {
4297 case NETDEV_UNREGISTER:
1cf844c7 4298 return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
27c6fa73 4299 case NETDEV_DOWN:
1de178ed 4300 if (rt->should_flush)
27c6fa73 4301 return -1;
93c2fb25 4302 if (!rt->fib6_nsiblings)
1cf844c7 4303 return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4304 if (rt6_multipath_uses_dev(rt, dev)) {
4305 unsigned int count;
4306
4307 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4308 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4309 rt6_multipath_flush(rt);
4310 return -1;
4311 }
4312 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4313 RTNH_F_LINKDOWN);
7aef6859 4314 fib6_update_sernum(net, rt);
d7dedee1 4315 rt6_multipath_rebalance(rt);
1de178ed
IS
4316 }
4317 return -2;
27c6fa73 4318 case NETDEV_CHANGE:
1cf844c7 4319 if (rt->fib6_nh->fib_nh_dev != dev ||
93c2fb25 4320 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4321 break;
1cf844c7 4322 rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4323 rt6_multipath_rebalance(rt);
27c6fa73 4324 break;
2b241361 4325 }
c159d30c 4326
1da177e4
LT
4327 return 0;
4328}
4329
27c6fa73 4330void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4331{
4c981e28 4332 struct arg_netdev_event arg = {
8ed67789 4333 .dev = dev,
6802f3ad
IS
4334 {
4335 .event = event,
4336 },
8ed67789 4337 };
7c6bb7d2 4338 struct net *net = dev_net(dev);
8ed67789 4339
7c6bb7d2
DA
4340 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4341 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4342 else
4343 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4344}
4345
4346void rt6_disable_ip(struct net_device *dev, unsigned long event)
4347{
4348 rt6_sync_down_dev(dev, event);
4349 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4350 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4351}
4352
95c96174 4353struct rt6_mtu_change_arg {
1da177e4 4354 struct net_device *dev;
95c96174 4355 unsigned int mtu;
c0b220cf 4356 struct fib6_info *f6i;
1da177e4
LT
4357};
4358
cc5c073a 4359static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
c0b220cf
DA
4360{
4361 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
cc5c073a 4362 struct fib6_info *f6i = arg->f6i;
c0b220cf
DA
4363
4364 /* For administrative MTU increase, there is no way to discover
4365 * IPv6 PMTU increase, so PMTU increase should be updated here.
4366 * Since RFC 1981 doesn't include administrative MTU increase
4367 * update PMTU increase is a MUST. (i.e. jumbo frame)
4368 */
4369 if (nh->fib_nh_dev == arg->dev) {
4370 struct inet6_dev *idev = __in6_dev_get(arg->dev);
4371 u32 mtu = f6i->fib6_pmtu;
4372
4373 if (mtu >= arg->mtu ||
4374 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4375 fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4376
4377 spin_lock_bh(&rt6_exception_lock);
cc5c073a 4378 rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
c0b220cf
DA
4379 spin_unlock_bh(&rt6_exception_lock);
4380 }
4381
4382 return 0;
4383}
4384
4385static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
1da177e4
LT
4386{
4387 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4388 struct inet6_dev *idev;
4389
4390 /* In IPv6 pmtu discovery is not optional,
4391 so that RTAX_MTU lock cannot disable it.
4392 We still use this lock to block changes
4393 caused by addrconf/ndisc.
4394 */
4395
4396 idev = __in6_dev_get(arg->dev);
38308473 4397 if (!idev)
1da177e4
LT
4398 return 0;
4399
c0b220cf
DA
4400 if (fib6_metric_locked(f6i, RTAX_MTU))
4401 return 0;
d4ead6b3 4402
c0b220cf 4403 arg->f6i = f6i;
1cf844c7 4404 return fib6_nh_mtu_change(f6i->fib6_nh, arg);
1da177e4
LT
4405}
4406
95c96174 4407void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4408{
c71099ac
TG
4409 struct rt6_mtu_change_arg arg = {
4410 .dev = dev,
4411 .mtu = mtu,
4412 };
1da177e4 4413
0c3584d5 4414 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4415}
4416
ef7c79ed 4417static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
75425657 4418 [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
5176f91e 4419 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4420 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4421 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4422 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4423 [RTA_PRIORITY] = { .type = NLA_U32 },
4424 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4425 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4426 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4427 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4428 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4429 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4430 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4431 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4432 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4433 [RTA_IP_PROTO] = { .type = NLA_U8 },
4434 [RTA_SPORT] = { .type = NLA_U16 },
4435 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4436};
4437
4438static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4439 struct fib6_config *cfg,
4440 struct netlink_ext_ack *extack)
1da177e4 4441{
86872cb5
TG
4442 struct rtmsg *rtm;
4443 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4444 unsigned int pref;
86872cb5 4445 int err;
1da177e4 4446
8cb08174
JB
4447 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4448 rtm_ipv6_policy, extack);
86872cb5
TG
4449 if (err < 0)
4450 goto errout;
1da177e4 4451
86872cb5
TG
4452 err = -EINVAL;
4453 rtm = nlmsg_data(nlh);
86872cb5 4454
84db8407
4455 *cfg = (struct fib6_config){
4456 .fc_table = rtm->rtm_table,
4457 .fc_dst_len = rtm->rtm_dst_len,
4458 .fc_src_len = rtm->rtm_src_len,
4459 .fc_flags = RTF_UP,
4460 .fc_protocol = rtm->rtm_protocol,
4461 .fc_type = rtm->rtm_type,
4462
4463 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4464 .fc_nlinfo.nlh = nlh,
4465 .fc_nlinfo.nl_net = sock_net(skb->sk),
4466 };
86872cb5 4467
ef2c7d7b
ND
4468 if (rtm->rtm_type == RTN_UNREACHABLE ||
4469 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4470 rtm->rtm_type == RTN_PROHIBIT ||
4471 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4472 cfg->fc_flags |= RTF_REJECT;
4473
ab79ad14
4474 if (rtm->rtm_type == RTN_LOCAL)
4475 cfg->fc_flags |= RTF_LOCAL;
4476
1f56a01f
MKL
4477 if (rtm->rtm_flags & RTM_F_CLONED)
4478 cfg->fc_flags |= RTF_CACHE;
4479
fc1e64e1
DA
4480 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4481
86872cb5 4482 if (tb[RTA_GATEWAY]) {
67b61f6c 4483 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4484 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4485 }
e3818541
DA
4486 if (tb[RTA_VIA]) {
4487 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4488 goto errout;
4489 }
86872cb5
TG
4490
4491 if (tb[RTA_DST]) {
4492 int plen = (rtm->rtm_dst_len + 7) >> 3;
4493
4494 if (nla_len(tb[RTA_DST]) < plen)
4495 goto errout;
4496
4497 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4498 }
86872cb5
TG
4499
4500 if (tb[RTA_SRC]) {
4501 int plen = (rtm->rtm_src_len + 7) >> 3;
4502
4503 if (nla_len(tb[RTA_SRC]) < plen)
4504 goto errout;
4505
4506 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4507 }
86872cb5 4508
c3968a85 4509 if (tb[RTA_PREFSRC])
67b61f6c 4510 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4511
86872cb5
TG
4512 if (tb[RTA_OIF])
4513 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4514
4515 if (tb[RTA_PRIORITY])
4516 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4517
4518 if (tb[RTA_METRICS]) {
4519 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4520 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4521 }
86872cb5
TG
4522
4523 if (tb[RTA_TABLE])
4524 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4525
51ebd318
ND
4526 if (tb[RTA_MULTIPATH]) {
4527 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4528 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4529
4530 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4531 cfg->fc_mp_len, extack);
9ed59592
DA
4532 if (err < 0)
4533 goto errout;
51ebd318
ND
4534 }
4535
c78ba6d6
LR
4536 if (tb[RTA_PREF]) {
4537 pref = nla_get_u8(tb[RTA_PREF]);
4538 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4539 pref != ICMPV6_ROUTER_PREF_HIGH)
4540 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4541 cfg->fc_flags |= RTF_PREF(pref);
4542 }
4543
19e42e45
RP
4544 if (tb[RTA_ENCAP])
4545 cfg->fc_encap = tb[RTA_ENCAP];
4546
9ed59592 4547 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4548 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4549
c255bd68 4550 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4551 if (err < 0)
4552 goto errout;
4553 }
4554
32bc201e
XL
4555 if (tb[RTA_EXPIRES]) {
4556 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4557
4558 if (addrconf_finite_timeout(timeout)) {
4559 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4560 cfg->fc_flags |= RTF_EXPIRES;
4561 }
4562 }
4563
86872cb5
TG
4564 err = 0;
4565errout:
4566 return err;
1da177e4
LT
4567}
4568
6b9ea5a6 4569struct rt6_nh {
8d1c802b 4570 struct fib6_info *fib6_info;
6b9ea5a6 4571 struct fib6_config r_cfg;
6b9ea5a6
RP
4572 struct list_head next;
4573};
4574
d4ead6b3
DA
4575static int ip6_route_info_append(struct net *net,
4576 struct list_head *rt6_nh_list,
8d1c802b
DA
4577 struct fib6_info *rt,
4578 struct fib6_config *r_cfg)
6b9ea5a6
RP
4579{
4580 struct rt6_nh *nh;
6b9ea5a6
RP
4581 int err = -EEXIST;
4582
4583 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4584 /* check if fib6_info already exists */
4585 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4586 return err;
4587 }
4588
4589 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4590 if (!nh)
4591 return -ENOMEM;
8d1c802b 4592 nh->fib6_info = rt;
6b9ea5a6
RP
4593 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4594 list_add_tail(&nh->next, rt6_nh_list);
4595
4596 return 0;
4597}
4598
8d1c802b
DA
4599static void ip6_route_mpath_notify(struct fib6_info *rt,
4600 struct fib6_info *rt_last,
3b1137fe
DA
4601 struct nl_info *info,
4602 __u16 nlflags)
4603{
4604 /* if this is an APPEND route, then rt points to the first route
4605 * inserted and rt_last points to last route inserted. Userspace
4606 * wants a consistent dump of the route which starts at the first
4607 * nexthop. Since sibling routes are always added at the end of
4608 * the list, find the first sibling of the last route appended
4609 */
93c2fb25
DA
4610 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4611 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4612 struct fib6_info,
93c2fb25 4613 fib6_siblings);
3b1137fe
DA
4614 }
4615
4616 if (rt)
4617 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4618}
4619
333c4301
DA
4620static int ip6_route_multipath_add(struct fib6_config *cfg,
4621 struct netlink_ext_ack *extack)
51ebd318 4622{
8d1c802b 4623 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4624 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4625 struct fib6_config r_cfg;
4626 struct rtnexthop *rtnh;
8d1c802b 4627 struct fib6_info *rt;
6b9ea5a6
RP
4628 struct rt6_nh *err_nh;
4629 struct rt6_nh *nh, *nh_safe;
3b1137fe 4630 __u16 nlflags;
51ebd318
ND
4631 int remaining;
4632 int attrlen;
6b9ea5a6
RP
4633 int err = 1;
4634 int nhn = 0;
4635 int replace = (cfg->fc_nlinfo.nlh &&
4636 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4637 LIST_HEAD(rt6_nh_list);
51ebd318 4638
3b1137fe
DA
4639 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4640 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4641 nlflags |= NLM_F_APPEND;
4642
35f1b4e9 4643 remaining = cfg->fc_mp_len;
51ebd318 4644 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4645
6b9ea5a6 4646 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4647 * fib6_info structs per nexthop
6b9ea5a6 4648 */
51ebd318
ND
4649 while (rtnh_ok(rtnh, remaining)) {
4650 memcpy(&r_cfg, cfg, sizeof(*cfg));
4651 if (rtnh->rtnh_ifindex)
4652 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4653
4654 attrlen = rtnh_attrlen(rtnh);
4655 if (attrlen > 0) {
4656 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4657
4658 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4659 if (nla) {
67b61f6c 4660 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4661 r_cfg.fc_flags |= RTF_GATEWAY;
4662 }
19e42e45
RP
4663 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4664 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4665 if (nla)
4666 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4667 }
6b9ea5a6 4668
68e2ffde 4669 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4670 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4671 if (IS_ERR(rt)) {
4672 err = PTR_ERR(rt);
4673 rt = NULL;
6b9ea5a6 4674 goto cleanup;
8c5b83f0 4675 }
b5d2d75e
DA
4676 if (!rt6_qualify_for_ecmp(rt)) {
4677 err = -EINVAL;
4678 NL_SET_ERR_MSG(extack,
4679 "Device only routes can not be added for IPv6 using the multipath API.");
4680 fib6_info_release(rt);
4681 goto cleanup;
4682 }
6b9ea5a6 4683
1cf844c7 4684 rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 4685
d4ead6b3
DA
4686 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4687 rt, &r_cfg);
51ebd318 4688 if (err) {
93531c67 4689 fib6_info_release(rt);
6b9ea5a6
RP
4690 goto cleanup;
4691 }
4692
4693 rtnh = rtnh_next(rtnh, &remaining);
4694 }
4695
3b1137fe
DA
4696 /* for add and replace send one notification with all nexthops.
4697 * Skip the notification in fib6_add_rt2node and send one with
4698 * the full route when done
4699 */
4700 info->skip_notify = 1;
4701
6b9ea5a6
RP
4702 err_nh = NULL;
4703 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4704 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4705 fib6_info_release(nh->fib6_info);
93531c67 4706
f7225172
DA
4707 if (!err) {
4708 /* save reference to last route successfully inserted */
4709 rt_last = nh->fib6_info;
4710
4711 /* save reference to first route for notification */
4712 if (!rt_notif)
4713 rt_notif = nh->fib6_info;
4714 }
3b1137fe 4715
8d1c802b
DA
4716 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4717 nh->fib6_info = NULL;
6b9ea5a6
RP
4718 if (err) {
4719 if (replace && nhn)
a5a82d84
JK
4720 NL_SET_ERR_MSG_MOD(extack,
4721 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4722 err_nh = nh;
4723 goto add_errout;
51ebd318 4724 }
6b9ea5a6 4725
1a72418b 4726 /* Because each route is added like a single route we remove
27596472
MK
4727 * these flags after the first nexthop: if there is a collision,
4728 * we have already failed to add the first nexthop:
4729 * fib6_add_rt2node() has rejected it; when replacing, old
4730 * nexthops have been replaced by first new, the rest should
4731 * be added to it.
1a72418b 4732 */
27596472
MK
4733 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4734 NLM_F_REPLACE);
6b9ea5a6
RP
4735 nhn++;
4736 }
4737
3b1137fe
DA
4738 /* success ... tell user about new route */
4739 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4740 goto cleanup;
4741
4742add_errout:
3b1137fe
DA
4743 /* send notification for routes that were added so that
4744 * the delete notifications sent by ip6_route_del are
4745 * coherent
4746 */
4747 if (rt_notif)
4748 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4749
6b9ea5a6
RP
4750 /* Delete routes that were already added */
4751 list_for_each_entry(nh, &rt6_nh_list, next) {
4752 if (err_nh == nh)
4753 break;
333c4301 4754 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4755 }
4756
4757cleanup:
4758 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4759 if (nh->fib6_info)
4760 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4761 list_del(&nh->next);
4762 kfree(nh);
4763 }
4764
4765 return err;
4766}
4767
333c4301
DA
4768static int ip6_route_multipath_del(struct fib6_config *cfg,
4769 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4770{
4771 struct fib6_config r_cfg;
4772 struct rtnexthop *rtnh;
4773 int remaining;
4774 int attrlen;
4775 int err = 1, last_err = 0;
4776
4777 remaining = cfg->fc_mp_len;
4778 rtnh = (struct rtnexthop *)cfg->fc_mp;
4779
4780 /* Parse a Multipath Entry */
4781 while (rtnh_ok(rtnh, remaining)) {
4782 memcpy(&r_cfg, cfg, sizeof(*cfg));
4783 if (rtnh->rtnh_ifindex)
4784 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4785
4786 attrlen = rtnh_attrlen(rtnh);
4787 if (attrlen > 0) {
4788 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4789
4790 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4791 if (nla) {
4792 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4793 r_cfg.fc_flags |= RTF_GATEWAY;
4794 }
4795 }
333c4301 4796 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4797 if (err)
4798 last_err = err;
4799
51ebd318
ND
4800 rtnh = rtnh_next(rtnh, &remaining);
4801 }
4802
4803 return last_err;
4804}
4805
c21ef3e3
DA
4806static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4807 struct netlink_ext_ack *extack)
1da177e4 4808{
86872cb5
TG
4809 struct fib6_config cfg;
4810 int err;
1da177e4 4811
333c4301 4812 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4813 if (err < 0)
4814 return err;
4815
51ebd318 4816 if (cfg.fc_mp)
333c4301 4817 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4818 else {
4819 cfg.fc_delete_all_nh = 1;
333c4301 4820 return ip6_route_del(&cfg, extack);
0ae81335 4821 }
1da177e4
LT
4822}
4823
c21ef3e3
DA
4824static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4825 struct netlink_ext_ack *extack)
1da177e4 4826{
86872cb5
TG
4827 struct fib6_config cfg;
4828 int err;
1da177e4 4829
333c4301 4830 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4831 if (err < 0)
4832 return err;
4833
67f69513
DA
4834 if (cfg.fc_metric == 0)
4835 cfg.fc_metric = IP6_RT_PRIO_USER;
4836
51ebd318 4837 if (cfg.fc_mp)
333c4301 4838 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4839 else
acb54e3c 4840 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4841}
4842
8d1c802b 4843static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4844{
beb1afac
DA
4845 int nexthop_len = 0;
4846
f88d8ea6
DA
4847 if (rt->nh)
4848 nexthop_len += nla_total_size(4); /* RTA_NH_ID */
4849
93c2fb25 4850 if (rt->fib6_nsiblings) {
beb1afac
DA
4851 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4852 + NLA_ALIGN(sizeof(struct rtnexthop))
4853 + nla_total_size(16) /* RTA_GATEWAY */
1cf844c7 4854 + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws);
beb1afac 4855
93c2fb25 4856 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4857 }
4858
339bf98f
TG
4859 return NLMSG_ALIGN(sizeof(struct rtmsg))
4860 + nla_total_size(16) /* RTA_SRC */
4861 + nla_total_size(16) /* RTA_DST */
4862 + nla_total_size(16) /* RTA_GATEWAY */
4863 + nla_total_size(16) /* RTA_PREFSRC */
4864 + nla_total_size(4) /* RTA_TABLE */
4865 + nla_total_size(4) /* RTA_IIF */
4866 + nla_total_size(4) /* RTA_OIF */
4867 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4868 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4869 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4870 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4871 + nla_total_size(1) /* RTA_PREF */
1cf844c7 4872 + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws)
beb1afac
DA
4873 + nexthop_len;
4874}
4875
f88d8ea6
DA
4876static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
4877 unsigned char *flags)
4878{
4879 if (nexthop_is_multipath(nh)) {
4880 struct nlattr *mp;
4881
4882 mp = nla_nest_start(skb, RTA_MULTIPATH);
4883 if (!mp)
4884 goto nla_put_failure;
4885
4886 if (nexthop_mpath_fill_node(skb, nh))
4887 goto nla_put_failure;
4888
4889 nla_nest_end(skb, mp);
4890 } else {
4891 struct fib6_nh *fib6_nh;
4892
4893 fib6_nh = nexthop_fib6_nh(nh);
4894 if (fib_nexthop_info(skb, &fib6_nh->nh_common,
4895 flags, false) < 0)
4896 goto nla_put_failure;
4897 }
4898
4899 return 0;
4900
4901nla_put_failure:
4902 return -EMSGSIZE;
4903}
4904
d4ead6b3 4905static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4906 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4907 struct in6_addr *dest, struct in6_addr *src,
15e47304 4908 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4909 unsigned int flags)
1da177e4 4910{
22d0bd82
XL
4911 struct rt6_info *rt6 = (struct rt6_info *)dst;
4912 struct rt6key *rt6_dst, *rt6_src;
4913 u32 *pmetrics, table, rt6_flags;
f88d8ea6 4914 unsigned char nh_flags = 0;
2d7202bf 4915 struct nlmsghdr *nlh;
22d0bd82 4916 struct rtmsg *rtm;
d4ead6b3 4917 long expires = 0;
1da177e4 4918
15e47304 4919 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4920 if (!nlh)
26932566 4921 return -EMSGSIZE;
2d7202bf 4922
22d0bd82
XL
4923 if (rt6) {
4924 rt6_dst = &rt6->rt6i_dst;
4925 rt6_src = &rt6->rt6i_src;
4926 rt6_flags = rt6->rt6i_flags;
4927 } else {
4928 rt6_dst = &rt->fib6_dst;
4929 rt6_src = &rt->fib6_src;
4930 rt6_flags = rt->fib6_flags;
4931 }
4932
2d7202bf 4933 rtm = nlmsg_data(nlh);
1da177e4 4934 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4935 rtm->rtm_dst_len = rt6_dst->plen;
4936 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4937 rtm->rtm_tos = 0;
93c2fb25
DA
4938 if (rt->fib6_table)
4939 table = rt->fib6_table->tb6_id;
c71099ac 4940 else
9e762a4a 4941 table = RT6_TABLE_UNSPEC;
97f0082a 4942 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4943 if (nla_put_u32(skb, RTA_TABLE, table))
4944 goto nla_put_failure;
e8478e80
DA
4945
4946 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4947 rtm->rtm_flags = 0;
4948 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4949 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4950
22d0bd82 4951 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4952 rtm->rtm_flags |= RTM_F_CLONED;
4953
d4ead6b3
DA
4954 if (dest) {
4955 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4956 goto nla_put_failure;
1ab1457c 4957 rtm->rtm_dst_len = 128;
1da177e4 4958 } else if (rtm->rtm_dst_len)
22d0bd82 4959 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4960 goto nla_put_failure;
1da177e4
LT
4961#ifdef CONFIG_IPV6_SUBTREES
4962 if (src) {
930345ea 4963 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4964 goto nla_put_failure;
1ab1457c 4965 rtm->rtm_src_len = 128;
c78679e8 4966 } else if (rtm->rtm_src_len &&
22d0bd82 4967 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4968 goto nla_put_failure;
1da177e4 4969#endif
7bc570c8
YH
4970 if (iif) {
4971#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4972 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4973 int err = ip6mr_get_route(net, skb, rtm, portid);
4974
4975 if (err == 0)
4976 return 0;
4977 if (err < 0)
4978 goto nla_put_failure;
7bc570c8
YH
4979 } else
4980#endif
c78679e8
DM
4981 if (nla_put_u32(skb, RTA_IIF, iif))
4982 goto nla_put_failure;
d4ead6b3 4983 } else if (dest) {
1da177e4 4984 struct in6_addr saddr_buf;
d4ead6b3 4985 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4986 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4987 goto nla_put_failure;
1da177e4 4988 }
2d7202bf 4989
93c2fb25 4990 if (rt->fib6_prefsrc.plen) {
c3968a85 4991 struct in6_addr saddr_buf;
93c2fb25 4992 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4993 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4994 goto nla_put_failure;
c3968a85
DW
4995 }
4996
d4ead6b3
DA
4997 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4998 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4999 goto nla_put_failure;
5000
93c2fb25 5001 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 5002 goto nla_put_failure;
8253947e 5003
beb1afac
DA
5004 /* For multipath routes, walk the siblings list and add
5005 * each as a nexthop within RTA_MULTIPATH.
5006 */
22d0bd82
XL
5007 if (rt6) {
5008 if (rt6_flags & RTF_GATEWAY &&
5009 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5010 goto nla_put_failure;
5011
5012 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5013 goto nla_put_failure;
5014 } else if (rt->fib6_nsiblings) {
8d1c802b 5015 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
5016 struct nlattr *mp;
5017
ae0be8de 5018 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
beb1afac
DA
5019 if (!mp)
5020 goto nla_put_failure;
5021
1cf844c7
DA
5022 if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5023 rt->fib6_nh->fib_nh_weight) < 0)
beb1afac
DA
5024 goto nla_put_failure;
5025
5026 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 5027 &rt->fib6_siblings, fib6_siblings) {
1cf844c7
DA
5028 if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5029 sibling->fib6_nh->fib_nh_weight) < 0)
beb1afac
DA
5030 goto nla_put_failure;
5031 }
5032
5033 nla_nest_end(skb, mp);
f88d8ea6
DA
5034 } else if (rt->nh) {
5035 if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5036 goto nla_put_failure;
ecc5663c 5037
f88d8ea6
DA
5038 if (nexthop_is_blackhole(rt->nh))
5039 rtm->rtm_type = RTN_BLACKHOLE;
5040
5041 if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5042 goto nla_put_failure;
5043
5044 rtm->rtm_flags |= nh_flags;
5045 } else {
1cf844c7 5046 if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
ecc5663c 5047 &nh_flags, false) < 0)
beb1afac 5048 goto nla_put_failure;
ecc5663c
DA
5049
5050 rtm->rtm_flags |= nh_flags;
beb1afac
DA
5051 }
5052
22d0bd82 5053 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
5054 expires = dst ? dst->expires : rt->expires;
5055 expires -= jiffies;
5056 }
69cdf8f9 5057
d4ead6b3 5058 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 5059 goto nla_put_failure;
2d7202bf 5060
22d0bd82 5061 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
5062 goto nla_put_failure;
5063
19e42e45 5064
053c095a
JB
5065 nlmsg_end(skb, nlh);
5066 return 0;
2d7202bf
TG
5067
5068nla_put_failure:
26932566
PM
5069 nlmsg_cancel(skb, nlh);
5070 return -EMSGSIZE;
1da177e4
LT
5071}
5072
13e38901
DA
5073static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5074 const struct net_device *dev)
5075{
1cf844c7 5076 if (f6i->fib6_nh->fib_nh_dev == dev)
13e38901
DA
5077 return true;
5078
5079 if (f6i->fib6_nsiblings) {
5080 struct fib6_info *sibling, *next_sibling;
5081
5082 list_for_each_entry_safe(sibling, next_sibling,
5083 &f6i->fib6_siblings, fib6_siblings) {
1cf844c7 5084 if (sibling->fib6_nh->fib_nh_dev == dev)
13e38901
DA
5085 return true;
5086 }
5087 }
5088
5089 return false;
5090}
5091
8d1c802b 5092int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
5093{
5094 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
5095 struct fib_dump_filter *filter = &arg->filter;
5096 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
5097 struct net *net = arg->net;
5098
421842ed 5099 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 5100 return 0;
1da177e4 5101
13e38901
DA
5102 if ((filter->flags & RTM_F_PREFIX) &&
5103 !(rt->fib6_flags & RTF_PREFIX_RT)) {
5104 /* success since this is not a prefix route */
5105 return 1;
5106 }
5107 if (filter->filter_set) {
5108 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
5109 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
5110 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
5111 return 1;
5112 }
13e38901 5113 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 5114 }
1da177e4 5115
d4ead6b3
DA
5116 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
5117 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 5118 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
5119}
5120
0eff0a27
JK
5121static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5122 const struct nlmsghdr *nlh,
5123 struct nlattr **tb,
5124 struct netlink_ext_ack *extack)
5125{
5126 struct rtmsg *rtm;
5127 int i, err;
5128
5129 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5130 NL_SET_ERR_MSG_MOD(extack,
5131 "Invalid header for get route request");
5132 return -EINVAL;
5133 }
5134
5135 if (!netlink_strict_get_check(skb))
8cb08174
JB
5136 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5137 rtm_ipv6_policy, extack);
0eff0a27
JK
5138
5139 rtm = nlmsg_data(nlh);
5140 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5141 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5142 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5143 rtm->rtm_type) {
5144 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5145 return -EINVAL;
5146 }
5147 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5148 NL_SET_ERR_MSG_MOD(extack,
5149 "Invalid flags for get route request");
5150 return -EINVAL;
5151 }
5152
8cb08174
JB
5153 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5154 rtm_ipv6_policy, extack);
0eff0a27
JK
5155 if (err)
5156 return err;
5157
5158 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5159 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5160 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5161 return -EINVAL;
5162 }
5163
5164 for (i = 0; i <= RTA_MAX; i++) {
5165 if (!tb[i])
5166 continue;
5167
5168 switch (i) {
5169 case RTA_SRC:
5170 case RTA_DST:
5171 case RTA_IIF:
5172 case RTA_OIF:
5173 case RTA_MARK:
5174 case RTA_UID:
5175 case RTA_SPORT:
5176 case RTA_DPORT:
5177 case RTA_IP_PROTO:
5178 break;
5179 default:
5180 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
5181 return -EINVAL;
5182 }
5183 }
5184
5185 return 0;
5186}
5187
c21ef3e3
DA
5188static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
5189 struct netlink_ext_ack *extack)
1da177e4 5190{
3b1e0a65 5191 struct net *net = sock_net(in_skb->sk);
ab364a6f 5192 struct nlattr *tb[RTA_MAX+1];
18c3a61c 5193 int err, iif = 0, oif = 0;
a68886a6 5194 struct fib6_info *from;
18c3a61c 5195 struct dst_entry *dst;
ab364a6f 5196 struct rt6_info *rt;
1da177e4 5197 struct sk_buff *skb;
ab364a6f 5198 struct rtmsg *rtm;
744486d4 5199 struct flowi6 fl6 = {};
18c3a61c 5200 bool fibmatch;
1da177e4 5201
0eff0a27 5202 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
5203 if (err < 0)
5204 goto errout;
1da177e4 5205
ab364a6f 5206 err = -EINVAL;
38b7097b
HFS
5207 rtm = nlmsg_data(nlh);
5208 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 5209 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 5210
ab364a6f
TG
5211 if (tb[RTA_SRC]) {
5212 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
5213 goto errout;
5214
4e3fd7a0 5215 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
5216 }
5217
5218 if (tb[RTA_DST]) {
5219 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
5220 goto errout;
5221
4e3fd7a0 5222 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
5223 }
5224
5225 if (tb[RTA_IIF])
5226 iif = nla_get_u32(tb[RTA_IIF]);
5227
5228 if (tb[RTA_OIF])
72331bc0 5229 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 5230
2e47b291
LC
5231 if (tb[RTA_MARK])
5232 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
5233
622ec2c9
LC
5234 if (tb[RTA_UID])
5235 fl6.flowi6_uid = make_kuid(current_user_ns(),
5236 nla_get_u32(tb[RTA_UID]));
5237 else
5238 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5239
eacb9384
RP
5240 if (tb[RTA_SPORT])
5241 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5242
5243 if (tb[RTA_DPORT])
5244 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5245
5246 if (tb[RTA_IP_PROTO]) {
5247 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
5248 &fl6.flowi6_proto, AF_INET6,
5249 extack);
eacb9384
RP
5250 if (err)
5251 goto errout;
5252 }
5253
1da177e4
LT
5254 if (iif) {
5255 struct net_device *dev;
72331bc0
SL
5256 int flags = 0;
5257
121622db
FW
5258 rcu_read_lock();
5259
5260 dev = dev_get_by_index_rcu(net, iif);
1da177e4 5261 if (!dev) {
121622db 5262 rcu_read_unlock();
1da177e4 5263 err = -ENODEV;
ab364a6f 5264 goto errout;
1da177e4 5265 }
72331bc0
SL
5266
5267 fl6.flowi6_iif = iif;
5268
5269 if (!ipv6_addr_any(&fl6.saddr))
5270 flags |= RT6_LOOKUP_F_HAS_SADDR;
5271
b75cc8f9 5272 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
5273
5274 rcu_read_unlock();
72331bc0
SL
5275 } else {
5276 fl6.flowi6_oif = oif;
5277
58acfd71 5278 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
5279 }
5280
18c3a61c
RP
5281
5282 rt = container_of(dst, struct rt6_info, dst);
5283 if (rt->dst.error) {
5284 err = rt->dst.error;
5285 ip6_rt_put(rt);
5286 goto errout;
1da177e4
LT
5287 }
5288
9d6acb3b
WC
5289 if (rt == net->ipv6.ip6_null_entry) {
5290 err = rt->dst.error;
5291 ip6_rt_put(rt);
5292 goto errout;
5293 }
5294
ab364a6f 5295 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 5296 if (!skb) {
94e187c0 5297 ip6_rt_put(rt);
ab364a6f
TG
5298 err = -ENOBUFS;
5299 goto errout;
5300 }
1da177e4 5301
d8d1f30b 5302 skb_dst_set(skb, &rt->dst);
a68886a6
DA
5303
5304 rcu_read_lock();
5305 from = rcu_dereference(rt->from);
886b7a50
MKL
5306 if (from) {
5307 if (fibmatch)
5308 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5309 iif, RTM_NEWROUTE,
5310 NETLINK_CB(in_skb).portid,
5311 nlh->nlmsg_seq, 0);
5312 else
5313 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5314 &fl6.saddr, iif, RTM_NEWROUTE,
5315 NETLINK_CB(in_skb).portid,
5316 nlh->nlmsg_seq, 0);
5317 } else {
5318 err = -ENETUNREACH;
5319 }
a68886a6
DA
5320 rcu_read_unlock();
5321
1da177e4 5322 if (err < 0) {
ab364a6f
TG
5323 kfree_skb(skb);
5324 goto errout;
1da177e4
LT
5325 }
5326
15e47304 5327 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 5328errout:
1da177e4 5329 return err;
1da177e4
LT
5330}
5331
8d1c802b 5332void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 5333 unsigned int nlm_flags)
1da177e4
LT
5334{
5335 struct sk_buff *skb;
5578689a 5336 struct net *net = info->nl_net;
528c4ceb
DL
5337 u32 seq;
5338 int err;
5339
5340 err = -ENOBUFS;
38308473 5341 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 5342
19e42e45 5343 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 5344 if (!skb)
21713ebc
TG
5345 goto errout;
5346
d4ead6b3
DA
5347 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5348 event, info->portid, seq, nlm_flags);
26932566
PM
5349 if (err < 0) {
5350 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5351 WARN_ON(err == -EMSGSIZE);
5352 kfree_skb(skb);
5353 goto errout;
5354 }
15e47304 5355 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5356 info->nlh, gfp_any());
5357 return;
21713ebc
TG
5358errout:
5359 if (err < 0)
5578689a 5360 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5361}
5362
19a3b7ee
DA
5363void fib6_rt_update(struct net *net, struct fib6_info *rt,
5364 struct nl_info *info)
5365{
5366 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5367 struct sk_buff *skb;
5368 int err = -ENOBUFS;
5369
5370 /* call_fib6_entry_notifiers will be removed when in-kernel notifier
5371 * is implemented and supported for nexthop objects
5372 */
5373 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
5374
5375 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5376 if (!skb)
5377 goto errout;
5378
5379 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5380 RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
5381 if (err < 0) {
5382 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5383 WARN_ON(err == -EMSGSIZE);
5384 kfree_skb(skb);
5385 goto errout;
5386 }
5387 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5388 info->nlh, gfp_any());
5389 return;
5390errout:
5391 if (err < 0)
5392 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5393}
5394
8ed67789 5395static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5396 unsigned long event, void *ptr)
8ed67789 5397{
351638e7 5398 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5399 struct net *net = dev_net(dev);
8ed67789 5400
242d3a49
WC
5401 if (!(dev->flags & IFF_LOOPBACK))
5402 return NOTIFY_OK;
5403
5404 if (event == NETDEV_REGISTER) {
1cf844c7 5405 net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
d8d1f30b 5406 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5407 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5408#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5409 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5410 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5411 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5412 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5413#endif
76da0704
WC
5414 } else if (event == NETDEV_UNREGISTER &&
5415 dev->reg_state != NETREG_UNREGISTERED) {
5416 /* NETDEV_UNREGISTER could be fired for multiple times by
5417 * netdev_wait_allrefs(). Make sure we only call this once.
5418 */
12d94a80 5419 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5420#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5421 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5422 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5423#endif
5424 }
5425
5426 return NOTIFY_OK;
5427}
5428
1da177e4
LT
5429/*
5430 * /proc
5431 */
5432
5433#ifdef CONFIG_PROC_FS
1da177e4
LT
5434static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5435{
69ddb805 5436 struct net *net = (struct net *)seq->private;
1da177e4 5437 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5438 net->ipv6.rt6_stats->fib_nodes,
5439 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5440 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5441 net->ipv6.rt6_stats->fib_rt_entries,
5442 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5443 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5444 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5445
5446 return 0;
5447}
1da177e4
LT
5448#endif /* CONFIG_PROC_FS */
5449
5450#ifdef CONFIG_SYSCTL
5451
1da177e4 5452static
fe2c6338 5453int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5454 void __user *buffer, size_t *lenp, loff_t *ppos)
5455{
c486da34
LAG
5456 struct net *net;
5457 int delay;
f0fb9b28 5458 int ret;
c486da34 5459 if (!write)
1da177e4 5460 return -EINVAL;
c486da34
LAG
5461
5462 net = (struct net *)ctl->extra1;
5463 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5464 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5465 if (ret)
5466 return ret;
5467
2ac3ac8f 5468 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5469 return 0;
1da177e4
LT
5470}
5471
7c6bb7d2
DA
5472static int zero;
5473static int one = 1;
5474
ed792e28 5475static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5476 {
1da177e4 5477 .procname = "flush",
4990509f 5478 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5479 .maxlen = sizeof(int),
89c8b3a1 5480 .mode = 0200,
6d9f239a 5481 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5482 },
5483 {
1da177e4 5484 .procname = "gc_thresh",
9a7ec3a9 5485 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5486 .maxlen = sizeof(int),
5487 .mode = 0644,
6d9f239a 5488 .proc_handler = proc_dointvec,
1da177e4
LT
5489 },
5490 {
1da177e4 5491 .procname = "max_size",
4990509f 5492 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5493 .maxlen = sizeof(int),
5494 .mode = 0644,
6d9f239a 5495 .proc_handler = proc_dointvec,
1da177e4
LT
5496 },
5497 {
1da177e4 5498 .procname = "gc_min_interval",
4990509f 5499 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5500 .maxlen = sizeof(int),
5501 .mode = 0644,
6d9f239a 5502 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5503 },
5504 {
1da177e4 5505 .procname = "gc_timeout",
4990509f 5506 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5507 .maxlen = sizeof(int),
5508 .mode = 0644,
6d9f239a 5509 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5510 },
5511 {
1da177e4 5512 .procname = "gc_interval",
4990509f 5513 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5514 .maxlen = sizeof(int),
5515 .mode = 0644,
6d9f239a 5516 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5517 },
5518 {
1da177e4 5519 .procname = "gc_elasticity",
4990509f 5520 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5521 .maxlen = sizeof(int),
5522 .mode = 0644,
f3d3f616 5523 .proc_handler = proc_dointvec,
1da177e4
LT
5524 },
5525 {
1da177e4 5526 .procname = "mtu_expires",
4990509f 5527 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5528 .maxlen = sizeof(int),
5529 .mode = 0644,
6d9f239a 5530 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5531 },
5532 {
1da177e4 5533 .procname = "min_adv_mss",
4990509f 5534 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5535 .maxlen = sizeof(int),
5536 .mode = 0644,
f3d3f616 5537 .proc_handler = proc_dointvec,
1da177e4
LT
5538 },
5539 {
1da177e4 5540 .procname = "gc_min_interval_ms",
4990509f 5541 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5542 .maxlen = sizeof(int),
5543 .mode = 0644,
6d9f239a 5544 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5545 },
7c6bb7d2
DA
5546 {
5547 .procname = "skip_notify_on_dev_down",
5548 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5549 .maxlen = sizeof(int),
5550 .mode = 0644,
5551 .proc_handler = proc_dointvec,
5552 .extra1 = &zero,
5553 .extra2 = &one,
5554 },
f8572d8f 5555 { }
1da177e4
LT
5556};
5557
2c8c1e72 5558struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5559{
5560 struct ctl_table *table;
5561
5562 table = kmemdup(ipv6_route_table_template,
5563 sizeof(ipv6_route_table_template),
5564 GFP_KERNEL);
5ee09105
YH
5565
5566 if (table) {
5567 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5568 table[0].extra1 = net;
86393e52 5569 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5570 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5571 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5572 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5573 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5574 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5575 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5576 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5577 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5578 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5579
5580 /* Don't export sysctls to unprivileged users */
5581 if (net->user_ns != &init_user_ns)
5582 table[0].procname = NULL;
5ee09105
YH
5583 }
5584
760f2d01
DL
5585 return table;
5586}
1da177e4
LT
5587#endif
5588
2c8c1e72 5589static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5590{
633d424b 5591 int ret = -ENOMEM;
8ed67789 5592
86393e52
AD
5593 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5594 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5595
fc66f95c
ED
5596 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5597 goto out_ip6_dst_ops;
5598
1cf844c7 5599 net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
421842ed
DA
5600 if (!net->ipv6.fib6_null_entry)
5601 goto out_ip6_dst_entries;
1cf844c7
DA
5602 memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
5603 sizeof(*net->ipv6.fib6_null_entry));
421842ed 5604
8ed67789
DL
5605 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5606 sizeof(*net->ipv6.ip6_null_entry),
5607 GFP_KERNEL);
5608 if (!net->ipv6.ip6_null_entry)
421842ed 5609 goto out_fib6_null_entry;
d8d1f30b 5610 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5611 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5612 ip6_template_metrics, true);
8ed67789
DL
5613
5614#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5615 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5616 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5617 sizeof(*net->ipv6.ip6_prohibit_entry),
5618 GFP_KERNEL);
68fffc67
PZ
5619 if (!net->ipv6.ip6_prohibit_entry)
5620 goto out_ip6_null_entry;
d8d1f30b 5621 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5622 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5623 ip6_template_metrics, true);
8ed67789
DL
5624
5625 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5626 sizeof(*net->ipv6.ip6_blk_hole_entry),
5627 GFP_KERNEL);
68fffc67
PZ
5628 if (!net->ipv6.ip6_blk_hole_entry)
5629 goto out_ip6_prohibit_entry;
d8d1f30b 5630 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5631 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5632 ip6_template_metrics, true);
8ed67789
DL
5633#endif
5634
b339a47c
PZ
5635 net->ipv6.sysctl.flush_delay = 0;
5636 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5637 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5638 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5639 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5640 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5641 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5642 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5643 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5644
6891a346
BT
5645 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5646
8ed67789
DL
5647 ret = 0;
5648out:
5649 return ret;
f2fc6a54 5650
68fffc67
PZ
5651#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5652out_ip6_prohibit_entry:
5653 kfree(net->ipv6.ip6_prohibit_entry);
5654out_ip6_null_entry:
5655 kfree(net->ipv6.ip6_null_entry);
5656#endif
421842ed
DA
5657out_fib6_null_entry:
5658 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5659out_ip6_dst_entries:
5660 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5661out_ip6_dst_ops:
f2fc6a54 5662 goto out;
cdb18761
DL
5663}
5664
2c8c1e72 5665static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5666{
421842ed 5667 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5668 kfree(net->ipv6.ip6_null_entry);
5669#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5670 kfree(net->ipv6.ip6_prohibit_entry);
5671 kfree(net->ipv6.ip6_blk_hole_entry);
5672#endif
41bb78b4 5673 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5674}
5675
d189634e
TG
5676static int __net_init ip6_route_net_init_late(struct net *net)
5677{
5678#ifdef CONFIG_PROC_FS
c3506372
CH
5679 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5680 sizeof(struct ipv6_route_iter));
3617d949
CH
5681 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5682 rt6_stats_seq_show, NULL);
d189634e
TG
5683#endif
5684 return 0;
5685}
5686
5687static void __net_exit ip6_route_net_exit_late(struct net *net)
5688{
5689#ifdef CONFIG_PROC_FS
ece31ffd
G
5690 remove_proc_entry("ipv6_route", net->proc_net);
5691 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5692#endif
5693}
5694
cdb18761
DL
5695static struct pernet_operations ip6_route_net_ops = {
5696 .init = ip6_route_net_init,
5697 .exit = ip6_route_net_exit,
5698};
5699
c3426b47
DM
5700static int __net_init ipv6_inetpeer_init(struct net *net)
5701{
5702 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5703
5704 if (!bp)
5705 return -ENOMEM;
5706 inet_peer_base_init(bp);
5707 net->ipv6.peers = bp;
5708 return 0;
5709}
5710
5711static void __net_exit ipv6_inetpeer_exit(struct net *net)
5712{
5713 struct inet_peer_base *bp = net->ipv6.peers;
5714
5715 net->ipv6.peers = NULL;
56a6b248 5716 inetpeer_invalidate_tree(bp);
c3426b47
DM
5717 kfree(bp);
5718}
5719
2b823f72 5720static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5721 .init = ipv6_inetpeer_init,
5722 .exit = ipv6_inetpeer_exit,
5723};
5724
d189634e
TG
5725static struct pernet_operations ip6_route_net_late_ops = {
5726 .init = ip6_route_net_init_late,
5727 .exit = ip6_route_net_exit_late,
5728};
5729
8ed67789
DL
5730static struct notifier_block ip6_route_dev_notifier = {
5731 .notifier_call = ip6_route_dev_notify,
242d3a49 5732 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5733};
5734
2f460933
WC
5735void __init ip6_route_init_special_entries(void)
5736{
5737 /* Registering of the loopback is done before this portion of code,
5738 * the loopback reference in rt6_info will not be taken, do it
5739 * manually for init_net */
1cf844c7 5740 init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
2f460933
WC
5741 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5742 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5743 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5744 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5745 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5746 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5747 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5748 #endif
5749}
5750
433d49c3 5751int __init ip6_route_init(void)
1da177e4 5752{
433d49c3 5753 int ret;
8d0b94af 5754 int cpu;
433d49c3 5755
9a7ec3a9
DL
5756 ret = -ENOMEM;
5757 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5758 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5759 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5760 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5761 goto out;
14e50e57 5762
fc66f95c 5763 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5764 if (ret)
bdb3289f 5765 goto out_kmem_cache;
bdb3289f 5766
c3426b47
DM
5767 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5768 if (ret)
e8803b6c 5769 goto out_dst_entries;
2a0c451a 5770
7e52b33b
DM
5771 ret = register_pernet_subsys(&ip6_route_net_ops);
5772 if (ret)
5773 goto out_register_inetpeer;
c3426b47 5774
5dc121e9
AE
5775 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5776
e8803b6c 5777 ret = fib6_init();
433d49c3 5778 if (ret)
8ed67789 5779 goto out_register_subsys;
433d49c3 5780
433d49c3
DL
5781 ret = xfrm6_init();
5782 if (ret)
e8803b6c 5783 goto out_fib6_init;
c35b7e72 5784
433d49c3
DL
5785 ret = fib6_rules_init();
5786 if (ret)
5787 goto xfrm6_init;
7e5449c2 5788
d189634e
TG
5789 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5790 if (ret)
5791 goto fib6_rules_init;
5792
16feebcf
FW
5793 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5794 inet6_rtm_newroute, NULL, 0);
5795 if (ret < 0)
5796 goto out_register_late_subsys;
5797
5798 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5799 inet6_rtm_delroute, NULL, 0);
5800 if (ret < 0)
5801 goto out_register_late_subsys;
5802
5803 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5804 inet6_rtm_getroute, NULL,
5805 RTNL_FLAG_DOIT_UNLOCKED);
5806 if (ret < 0)
d189634e 5807 goto out_register_late_subsys;
c127ea2c 5808
8ed67789 5809 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5810 if (ret)
d189634e 5811 goto out_register_late_subsys;
8ed67789 5812
8d0b94af
MKL
5813 for_each_possible_cpu(cpu) {
5814 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5815
5816 INIT_LIST_HEAD(&ul->head);
5817 spin_lock_init(&ul->lock);
5818 }
5819
433d49c3
DL
5820out:
5821 return ret;
5822
d189634e 5823out_register_late_subsys:
16feebcf 5824 rtnl_unregister_all(PF_INET6);
d189634e 5825 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5826fib6_rules_init:
433d49c3
DL
5827 fib6_rules_cleanup();
5828xfrm6_init:
433d49c3 5829 xfrm6_fini();
2a0c451a
TG
5830out_fib6_init:
5831 fib6_gc_cleanup();
8ed67789
DL
5832out_register_subsys:
5833 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5834out_register_inetpeer:
5835 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5836out_dst_entries:
5837 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5838out_kmem_cache:
f2fc6a54 5839 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5840 goto out;
1da177e4
LT
5841}
5842
5843void ip6_route_cleanup(void)
5844{
8ed67789 5845 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5846 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5847 fib6_rules_cleanup();
1da177e4 5848 xfrm6_fini();
1da177e4 5849 fib6_gc_cleanup();
c3426b47 5850 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5851 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5852 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5853 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5854}