net: hns3: Fix inconsistent indenting
[linux-block.git] / net / ipv6 / route.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * Linux INET6 implementation
4 * FIB front-end.
5 *
6 * Authors:
1ab1457c 7 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
8 */
9
10/* Changes:
11 *
12 * YOSHIFUJI Hideaki @USAGI
13 * reworked default router selection.
14 * - respect outgoing interface
15 * - select from (probably) reachable routers (i.e.
16 * routers in REACHABLE, STALE, DELAY or PROBE states).
17 * - always select the same router if it is (probably)
18 * reachable. otherwise, round-robin the list.
c0bece9f
YH
19 * Ville Nuorvala
20 * Fixed routing subtrees.
1da177e4
LT
21 */
22
f3213831
JP
23#define pr_fmt(fmt) "IPv6: " fmt
24
4fc268d2 25#include <linux/capability.h>
1da177e4 26#include <linux/errno.h>
bc3b2d7f 27#include <linux/export.h>
1da177e4
LT
28#include <linux/types.h>
29#include <linux/times.h>
30#include <linux/socket.h>
31#include <linux/sockios.h>
32#include <linux/net.h>
33#include <linux/route.h>
34#include <linux/netdevice.h>
35#include <linux/in6.h>
7bc570c8 36#include <linux/mroute6.h>
1da177e4 37#include <linux/init.h>
1da177e4 38#include <linux/if_arp.h>
1da177e4
LT
39#include <linux/proc_fs.h>
40#include <linux/seq_file.h>
5b7c931d 41#include <linux/nsproxy.h>
5a0e3ad6 42#include <linux/slab.h>
35732d01 43#include <linux/jhash.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
904af04d 54#include <net/dst_metadata.h>
1da177e4 55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
3c618c1d 58#include <net/rtnh.h>
19e42e45 59#include <net/lwtunnel.h>
904af04d 60#include <net/ip_tunnels.h>
ca254490 61#include <net/l3mdev.h>
eacb9384 62#include <net/ip.h>
7c0f6ba6 63#include <linux/uaccess.h>
1da177e4
LT
64
65#ifdef CONFIG_SYSCTL
66#include <linux/sysctl.h>
67#endif
68
30d444d3
DA
69static int ip6_rt_type_to_error(u8 fib6_type);
70
71#define CREATE_TRACE_POINTS
72#include <trace/events/fib6.h>
73EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
74#undef CREATE_TRACE_POINTS
75
afc154e9 76enum rt6_nud_state {
7e980569
JB
77 RT6_NUD_FAIL_HARD = -3,
78 RT6_NUD_FAIL_PROBE = -2,
79 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
80 RT6_NUD_SUCCEED = 1
81};
82
1da177e4 83static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 84static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 85static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
569d3645 90static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
91
92static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 94static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 95static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 96static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
97static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb, u32 mtu);
99static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
100 struct sk_buff *skb);
702cea56
DA
101static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
102 int strict);
8d1c802b 103static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 104static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 105 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 106 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
107 int iif, int type, u32 portid, u32 seq,
108 unsigned int flags);
7e4b5128 109static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
110 const struct in6_addr *daddr,
111 const struct in6_addr *saddr);
1da177e4 112
70ceb4f5 113#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 114static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 115 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
116 const struct in6_addr *gwaddr,
117 struct net_device *dev,
95c96174 118 unsigned int pref);
8d1c802b 119static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 120 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
121 const struct in6_addr *gwaddr,
122 struct net_device *dev);
70ceb4f5
YH
123#endif
124
8d0b94af
MKL
125struct uncached_list {
126 spinlock_t lock;
127 struct list_head head;
128};
129
130static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
131
510c321b 132void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
133{
134 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
135
8d0b94af
MKL
136 rt->rt6i_uncached_list = ul;
137
138 spin_lock_bh(&ul->lock);
139 list_add_tail(&rt->rt6i_uncached, &ul->head);
140 spin_unlock_bh(&ul->lock);
141}
142
510c321b 143void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
144{
145 if (!list_empty(&rt->rt6i_uncached)) {
146 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 147 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
148
149 spin_lock_bh(&ul->lock);
150 list_del(&rt->rt6i_uncached);
81eb8447 151 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
152 spin_unlock_bh(&ul->lock);
153 }
154}
155
156static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
157{
158 struct net_device *loopback_dev = net->loopback_dev;
159 int cpu;
160
e332bc67
EB
161 if (dev == loopback_dev)
162 return;
163
8d0b94af
MKL
164 for_each_possible_cpu(cpu) {
165 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
166 struct rt6_info *rt;
167
168 spin_lock_bh(&ul->lock);
169 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
170 struct inet6_dev *rt_idev = rt->rt6i_idev;
171 struct net_device *rt_dev = rt->dst.dev;
172
e332bc67 173 if (rt_idev->dev == dev) {
8d0b94af
MKL
174 rt->rt6i_idev = in6_dev_get(loopback_dev);
175 in6_dev_put(rt_idev);
176 }
177
e332bc67 178 if (rt_dev == dev) {
8d0b94af
MKL
179 rt->dst.dev = loopback_dev;
180 dev_hold(rt->dst.dev);
181 dev_put(rt_dev);
182 }
183 }
184 spin_unlock_bh(&ul->lock);
185 }
186}
187
f8a1b43b 188static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
189 struct sk_buff *skb,
190 const void *daddr)
39232973 191{
a7563f34 192 if (!ipv6_addr_any(p))
39232973 193 return (const void *) p;
f894cbf8
DM
194 else if (skb)
195 return &ipv6_hdr(skb)->daddr;
39232973
DM
196 return daddr;
197}
198
f8a1b43b
DA
199struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
200 struct net_device *dev,
201 struct sk_buff *skb,
202 const void *daddr)
d3aaeb38 203{
39232973
DM
204 struct neighbour *n;
205
f8a1b43b
DA
206 daddr = choose_neigh_daddr(gw, skb, daddr);
207 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
208 if (n)
209 return n;
7adf3246
SB
210
211 n = neigh_create(&nd_tbl, daddr, dev);
212 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
213}
214
215static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
216 struct sk_buff *skb,
217 const void *daddr)
218{
219 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
220
221 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
222}
223
63fca65d
JA
224static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
225{
226 struct net_device *dev = dst->dev;
227 struct rt6_info *rt = (struct rt6_info *)dst;
228
f8a1b43b 229 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
230 if (!daddr)
231 return;
232 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
233 return;
234 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
235 return;
236 __ipv6_confirm_neigh(dev, daddr);
237}
238
9a7ec3a9 239static struct dst_ops ip6_dst_ops_template = {
1da177e4 240 .family = AF_INET6,
1da177e4
LT
241 .gc = ip6_dst_gc,
242 .gc_thresh = 1024,
243 .check = ip6_dst_check,
0dbaee3b 244 .default_advmss = ip6_default_advmss,
ebb762f2 245 .mtu = ip6_mtu,
d4ead6b3 246 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
247 .destroy = ip6_dst_destroy,
248 .ifdown = ip6_dst_ifdown,
249 .negative_advice = ip6_negative_advice,
250 .link_failure = ip6_link_failure,
251 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 252 .redirect = rt6_do_redirect,
9f8955cc 253 .local_out = __ip6_local_out,
f8a1b43b 254 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 255 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
256};
257
ebb762f2 258static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 259{
618f9bc7
SK
260 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
261
262 return mtu ? : dst->dev->mtu;
ec831ea7
RD
263}
264
6700c270
DM
265static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb, u32 mtu)
14e50e57
DM
267{
268}
269
6700c270
DM
270static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
271 struct sk_buff *skb)
b587ee3b
DM
272{
273}
274
14e50e57
DM
275static struct dst_ops ip6_dst_blackhole_ops = {
276 .family = AF_INET6,
14e50e57
DM
277 .destroy = ip6_dst_destroy,
278 .check = ip6_dst_check,
ebb762f2 279 .mtu = ip6_blackhole_mtu,
214f45c9 280 .default_advmss = ip6_default_advmss,
14e50e57 281 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 282 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 283 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 284 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
285};
286
62fa8a84 287static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 288 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
289};
290
8d1c802b 291static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
292 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
293 .fib6_protocol = RTPROT_KERNEL,
294 .fib6_metric = ~(u32)0,
f05713e0 295 .fib6_ref = REFCOUNT_INIT(1),
421842ed
DA
296 .fib6_type = RTN_UNREACHABLE,
297 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
298};
299
fb0af4c7 300static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
301 .dst = {
302 .__refcnt = ATOMIC_INIT(1),
303 .__use = 1,
2c20cbd7 304 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 305 .error = -ENETUNREACH,
d8d1f30b
CG
306 .input = ip6_pkt_discard,
307 .output = ip6_pkt_discard_out,
1da177e4
LT
308 },
309 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
310};
311
101367c2
TG
312#ifdef CONFIG_IPV6_MULTIPLE_TABLES
313
fb0af4c7 314static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
315 .dst = {
316 .__refcnt = ATOMIC_INIT(1),
317 .__use = 1,
2c20cbd7 318 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 319 .error = -EACCES,
d8d1f30b
CG
320 .input = ip6_pkt_prohibit,
321 .output = ip6_pkt_prohibit_out,
101367c2
TG
322 },
323 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
324};
325
fb0af4c7 326static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
327 .dst = {
328 .__refcnt = ATOMIC_INIT(1),
329 .__use = 1,
2c20cbd7 330 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 331 .error = -EINVAL,
d8d1f30b 332 .input = dst_discard,
ede2059d 333 .output = dst_discard_out,
101367c2
TG
334 },
335 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
336};
337
338#endif
339
ebfa45f0
MKL
340static void rt6_info_init(struct rt6_info *rt)
341{
342 struct dst_entry *dst = &rt->dst;
343
344 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
345 INIT_LIST_HEAD(&rt->rt6i_uncached);
346}
347
1da177e4 348/* allocate dst with ip6_dst_ops */
93531c67
DA
349struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
350 int flags)
1da177e4 351{
97bab73f 352 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 353 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 354
81eb8447 355 if (rt) {
ebfa45f0 356 rt6_info_init(rt);
81eb8447
WW
357 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
358 }
8104891b 359
cf911662 360 return rt;
1da177e4 361}
9ab179d8 362EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 363
1da177e4
LT
364static void ip6_dst_destroy(struct dst_entry *dst)
365{
366 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 367 struct fib6_info *from;
8d0b94af 368 struct inet6_dev *idev;
1da177e4 369
1620a336 370 ip_dst_metrics_put(dst);
8d0b94af
MKL
371 rt6_uncached_list_del(rt);
372
373 idev = rt->rt6i_idev;
38308473 374 if (idev) {
1da177e4
LT
375 rt->rt6i_idev = NULL;
376 in6_dev_put(idev);
1ab1457c 377 }
1716a961 378
0e233874 379 from = xchg((__force struct fib6_info **)&rt->from, NULL);
93531c67 380 fib6_info_release(from);
b3419363
DM
381}
382
1da177e4
LT
383static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
384 int how)
385{
386 struct rt6_info *rt = (struct rt6_info *)dst;
387 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 388 struct net_device *loopback_dev =
c346dca1 389 dev_net(dev)->loopback_dev;
1da177e4 390
e5645f51
WW
391 if (idev && idev->dev != loopback_dev) {
392 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
393 if (loopback_idev) {
394 rt->rt6i_idev = loopback_idev;
395 in6_dev_put(idev);
97cac082 396 }
1da177e4
LT
397 }
398}
399
5973fb1e
MKL
400static bool __rt6_check_expired(const struct rt6_info *rt)
401{
402 if (rt->rt6i_flags & RTF_EXPIRES)
403 return time_after(jiffies, rt->dst.expires);
404 else
405 return false;
406}
407
a50feda5 408static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 409{
a68886a6
DA
410 struct fib6_info *from;
411
412 from = rcu_dereference(rt->from);
413
1716a961
G
414 if (rt->rt6i_flags & RTF_EXPIRES) {
415 if (time_after(jiffies, rt->dst.expires))
a50feda5 416 return true;
a68886a6 417 } else if (from) {
1e2ea8ad 418 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 419 fib6_check_expired(from);
1716a961 420 }
a50feda5 421 return false;
1da177e4
LT
422}
423
b1d40991
DA
424void fib6_select_path(const struct net *net, struct fib6_result *res,
425 struct flowi6 *fl6, int oif, bool have_oif_match,
426 const struct sk_buff *skb, int strict)
51ebd318 427{
8d1c802b 428 struct fib6_info *sibling, *next_sibling;
b1d40991
DA
429 struct fib6_info *match = res->f6i;
430
431 if (!match->fib6_nsiblings || have_oif_match)
432 goto out;
51ebd318 433
b673d6cc
JS
434 /* We might have already computed the hash for ICMPv6 errors. In such
435 * case it will always be non-zero. Otherwise now is the time to do it.
436 */
437 if (!fl6->mp_hash)
b4bac172 438 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 439
ad1601ae 440 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
b1d40991 441 goto out;
3d709f69 442
93c2fb25
DA
443 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
444 fib6_siblings) {
702cea56 445 const struct fib6_nh *nh = &sibling->fib6_nh;
5e670d84
DA
446 int nh_upper_bound;
447
702cea56 448 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
5e670d84 449 if (fl6->mp_hash > nh_upper_bound)
3d709f69 450 continue;
702cea56 451 if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
3d709f69
IS
452 break;
453 match = sibling;
454 break;
455 }
456
b1d40991
DA
457out:
458 res->f6i = match;
459 res->nh = &match->fib6_nh;
51ebd318
ND
460}
461
1da177e4 462/*
66f5d6ce 463 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
464 */
465
0c59d006
DA
466static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
467 const struct in6_addr *saddr, int oif, int flags)
468{
469 const struct net_device *dev;
470
471 if (nh->fib_nh_flags & RTNH_F_DEAD)
472 return false;
473
474 dev = nh->fib_nh_dev;
475 if (oif) {
476 if (dev->ifindex == oif)
477 return true;
478 } else {
479 if (ipv6_chk_addr(net, saddr, dev,
480 flags & RT6_LOOKUP_F_IFACE))
481 return true;
482 }
483
484 return false;
485}
486
75ef7389
DA
487static void rt6_device_match(struct net *net, struct fib6_result *res,
488 const struct in6_addr *saddr, int oif, int flags)
1da177e4 489{
75ef7389
DA
490 struct fib6_info *f6i = res->f6i;
491 struct fib6_info *spf6i;
492 struct fib6_nh *nh;
1da177e4 493
75ef7389
DA
494 if (!oif && ipv6_addr_any(saddr)) {
495 nh = &f6i->fib6_nh;
7d21fec9
DA
496 if (!(nh->fib_nh_flags & RTNH_F_DEAD))
497 goto out;
75ef7389 498 }
dd3abc4e 499
75ef7389
DA
500 for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
501 nh = &spf6i->fib6_nh;
502 if (__rt6_device_match(net, nh, saddr, oif, flags)) {
503 res->f6i = spf6i;
7d21fec9 504 goto out;
75ef7389 505 }
dd3abc4e 506 }
1da177e4 507
75ef7389
DA
508 if (oif && flags & RT6_LOOKUP_F_IFACE) {
509 res->f6i = net->ipv6.fib6_null_entry;
7d21fec9
DA
510 nh = &res->f6i->fib6_nh;
511 goto out;
75ef7389 512 }
8067bb8c 513
7d21fec9
DA
514 nh = &f6i->fib6_nh;
515 if (nh->fib_nh_flags & RTNH_F_DEAD) {
75ef7389 516 res->f6i = net->ipv6.fib6_null_entry;
7d21fec9 517 nh = &res->f6i->fib6_nh;
75ef7389 518 }
7d21fec9
DA
519out:
520 res->nh = nh;
521 res->fib6_type = res->f6i->fib6_type;
522 res->fib6_flags = res->f6i->fib6_flags;
1da177e4
LT
523}
524
27097255 525#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
526struct __rt6_probe_work {
527 struct work_struct work;
528 struct in6_addr target;
529 struct net_device *dev;
530};
531
532static void rt6_probe_deferred(struct work_struct *w)
533{
534 struct in6_addr mcaddr;
535 struct __rt6_probe_work *work =
536 container_of(w, struct __rt6_probe_work, work);
537
538 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 539 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 540 dev_put(work->dev);
662f5533 541 kfree(work);
c2f17e82
HFS
542}
543
cc3a86c8 544static void rt6_probe(struct fib6_nh *fib6_nh)
27097255 545{
f547fac6 546 struct __rt6_probe_work *work = NULL;
5e670d84 547 const struct in6_addr *nh_gw;
f2c31e32 548 struct neighbour *neigh;
5e670d84 549 struct net_device *dev;
f547fac6 550 struct inet6_dev *idev;
5e670d84 551
27097255
YH
552 /*
553 * Okay, this does not seem to be appropriate
554 * for now, however, we need to check if it
555 * is really so; aka Router Reachability Probing.
556 *
557 * Router Reachability Probe MUST be rate-limited
558 * to no more than one per minute.
559 */
cc3a86c8 560 if (fib6_nh->fib_nh_gw_family)
7ff74a59 561 return;
5e670d84 562
cc3a86c8
DA
563 nh_gw = &fib6_nh->fib_nh_gw6;
564 dev = fib6_nh->fib_nh_dev;
2152caea 565 rcu_read_lock_bh();
f547fac6 566 idev = __in6_dev_get(dev);
5e670d84 567 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 568 if (neigh) {
8d6c31bf
MKL
569 if (neigh->nud_state & NUD_VALID)
570 goto out;
571
2152caea 572 write_lock(&neigh->lock);
990edb42
MKL
573 if (!(neigh->nud_state & NUD_VALID) &&
574 time_after(jiffies,
dcd1f572 575 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
576 work = kmalloc(sizeof(*work), GFP_ATOMIC);
577 if (work)
578 __neigh_set_probe_once(neigh);
c2f17e82 579 }
2152caea 580 write_unlock(&neigh->lock);
cc3a86c8 581 } else if (time_after(jiffies, fib6_nh->last_probe +
f547fac6 582 idev->cnf.rtr_probe_interval)) {
990edb42 583 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 584 }
990edb42
MKL
585
586 if (work) {
cc3a86c8 587 fib6_nh->last_probe = jiffies;
990edb42 588 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
589 work->target = *nh_gw;
590 dev_hold(dev);
591 work->dev = dev;
990edb42
MKL
592 schedule_work(&work->work);
593 }
594
8d6c31bf 595out:
2152caea 596 rcu_read_unlock_bh();
27097255
YH
597}
598#else
cc3a86c8 599static inline void rt6_probe(struct fib6_nh *fib6_nh)
27097255 600{
27097255
YH
601}
602#endif
603
1da177e4 604/*
554cfb7e 605 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 606 */
1ba9a895 607static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
1da177e4 608{
afc154e9 609 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 610 struct neighbour *neigh;
f2c31e32 611
145a3621 612 rcu_read_lock_bh();
1ba9a895
DA
613 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
614 &fib6_nh->fib_nh_gw6);
145a3621
YH
615 if (neigh) {
616 read_lock(&neigh->lock);
554cfb7e 617 if (neigh->nud_state & NUD_VALID)
afc154e9 618 ret = RT6_NUD_SUCCEED;
398bcbeb 619#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 620 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 621 ret = RT6_NUD_SUCCEED;
7e980569
JB
622 else
623 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 624#endif
145a3621 625 read_unlock(&neigh->lock);
afc154e9
HFS
626 } else {
627 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 628 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 629 }
145a3621
YH
630 rcu_read_unlock_bh();
631
a5a81f0b 632 return ret;
1da177e4
LT
633}
634
702cea56
DA
635static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
636 int strict)
1da177e4 637{
6e1809a5
DA
638 int m = 0;
639
640 if (!oif || nh->fib_nh_dev->ifindex == oif)
641 m = 2;
1ab1457c 642
77d16f45 643 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 644 return RT6_NUD_FAIL_HARD;
ebacaaa0 645#ifdef CONFIG_IPV6_ROUTER_PREF
702cea56 646 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
ebacaaa0 647#endif
1ba9a895 648 if ((strict & RT6_LOOKUP_F_REACHABLE) &&
702cea56 649 !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
1ba9a895 650 int n = rt6_check_neigh(nh);
afc154e9
HFS
651 if (n < 0)
652 return n;
653 }
554cfb7e
YH
654 return m;
655}
656
28679ed1
DA
657static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
658 int oif, int strict, int *mpri, bool *do_rr)
554cfb7e 659{
afc154e9 660 bool match_do_rr = false;
28679ed1
DA
661 bool rc = false;
662 int m;
35103d11 663
28679ed1 664 if (nh->fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
665 goto out;
666
28679ed1
DA
667 if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
668 nh->fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 669 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 670 goto out;
f11e6659 671
28679ed1 672 m = rt6_score_route(nh, fib6_flags, oif, strict);
7e980569 673 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
674 match_do_rr = true;
675 m = 0; /* lowest valid score */
7e980569 676 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 677 goto out;
afc154e9
HFS
678 }
679
680 if (strict & RT6_LOOKUP_F_REACHABLE)
28679ed1 681 rt6_probe(nh);
f11e6659 682
7e980569 683 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 684 if (m > *mpri) {
afc154e9 685 *do_rr = match_do_rr;
f11e6659 686 *mpri = m;
28679ed1 687 rc = true;
f11e6659 688 }
f11e6659 689out:
28679ed1 690 return rc;
f11e6659
DM
691}
692
b7bc4b6a 693static void __find_rr_leaf(struct fib6_info *f6i_start,
30c15f03 694 struct fib6_info *nomatch, u32 metric,
b7bc4b6a 695 struct fib6_result *res, struct fib6_info **cont,
30c15f03 696 int oif, int strict, bool *do_rr, int *mpri)
f11e6659 697{
b7bc4b6a 698 struct fib6_info *f6i;
1da177e4 699
b7bc4b6a
DA
700 for (f6i = f6i_start;
701 f6i && f6i != nomatch;
702 f6i = rcu_dereference(f6i->fib6_next)) {
30c15f03
DA
703 struct fib6_nh *nh;
704
b7bc4b6a
DA
705 if (cont && f6i->fib6_metric != metric) {
706 *cont = f6i;
30c15f03 707 return;
9fbdcfaf
SK
708 }
709
b7bc4b6a 710 if (fib6_check_expired(f6i))
28679ed1
DA
711 continue;
712
b7bc4b6a
DA
713 nh = &f6i->fib6_nh;
714 if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
715 res->f6i = f6i;
716 res->nh = nh;
7d21fec9
DA
717 res->fib6_flags = f6i->fib6_flags;
718 res->fib6_type = f6i->fib6_type;
b7bc4b6a 719 }
9fbdcfaf 720 }
30c15f03 721}
9fbdcfaf 722
b7bc4b6a
DA
723static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
724 struct fib6_info *rr_head, int oif, int strict,
725 bool *do_rr, struct fib6_result *res)
30c15f03 726{
b7bc4b6a
DA
727 u32 metric = rr_head->fib6_metric;
728 struct fib6_info *cont = NULL;
30c15f03 729 int mpri = -1;
9fbdcfaf 730
b7bc4b6a 731 __find_rr_leaf(rr_head, NULL, metric, res, &cont,
30c15f03 732 oif, strict, do_rr, &mpri);
28679ed1 733
b7bc4b6a 734 __find_rr_leaf(leaf, rr_head, metric, res, &cont,
30c15f03 735 oif, strict, do_rr, &mpri);
9fbdcfaf 736
b7bc4b6a
DA
737 if (res->f6i || !cont)
738 return;
9fbdcfaf 739
b7bc4b6a 740 __find_rr_leaf(cont, NULL, metric, res, NULL,
30c15f03 741 oif, strict, do_rr, &mpri);
f11e6659 742}
1da177e4 743
b7bc4b6a
DA
744static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
745 struct fib6_result *res, int strict)
f11e6659 746{
8d1c802b 747 struct fib6_info *leaf = rcu_dereference(fn->leaf);
b7bc4b6a 748 struct fib6_info *rt0;
afc154e9 749 bool do_rr = false;
17ecf590 750 int key_plen;
1da177e4 751
b7bc4b6a
DA
752 /* make sure this function or its helpers sets f6i */
753 res->f6i = NULL;
754
421842ed 755 if (!leaf || leaf == net->ipv6.fib6_null_entry)
b7bc4b6a 756 goto out;
8d1040e8 757
66f5d6ce 758 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 759 if (!rt0)
66f5d6ce 760 rt0 = leaf;
1da177e4 761
17ecf590
WW
762 /* Double check to make sure fn is not an intermediate node
763 * and fn->leaf does not points to its child's leaf
764 * (This might happen if all routes under fn are deleted from
765 * the tree and fib6_repair_tree() is called on the node.)
766 */
93c2fb25 767 key_plen = rt0->fib6_dst.plen;
17ecf590 768#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
769 if (rt0->fib6_src.plen)
770 key_plen = rt0->fib6_src.plen;
17ecf590
WW
771#endif
772 if (fn->fn_bit != key_plen)
b7bc4b6a 773 goto out;
1da177e4 774
b7bc4b6a 775 find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
afc154e9 776 if (do_rr) {
8fb11a9a 777 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 778
554cfb7e 779 /* no entries matched; do round-robin */
93c2fb25 780 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 781 next = leaf;
f11e6659 782
66f5d6ce 783 if (next != rt0) {
93c2fb25 784 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 785 /* make sure next is not being deleted from the tree */
93c2fb25 786 if (next->fib6_node)
66f5d6ce 787 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 788 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 789 }
1da177e4 790 }
1da177e4 791
b7bc4b6a
DA
792out:
793 if (!res->f6i) {
794 res->f6i = net->ipv6.fib6_null_entry;
795 res->nh = &res->f6i->fib6_nh;
7d21fec9
DA
796 res->fib6_flags = res->f6i->fib6_flags;
797 res->fib6_type = res->f6i->fib6_type;
b7bc4b6a 798 }
1da177e4
LT
799}
800
85bd05de 801static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
8b9df265 802{
85bd05de
DA
803 return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
804 res->nh->fib_nh_gw_family;
8b9df265
MKL
805}
806
70ceb4f5
YH
807#ifdef CONFIG_IPV6_ROUTE_INFO
808int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 809 const struct in6_addr *gwaddr)
70ceb4f5 810{
c346dca1 811 struct net *net = dev_net(dev);
70ceb4f5
YH
812 struct route_info *rinfo = (struct route_info *) opt;
813 struct in6_addr prefix_buf, *prefix;
814 unsigned int pref;
4bed72e4 815 unsigned long lifetime;
8d1c802b 816 struct fib6_info *rt;
70ceb4f5
YH
817
818 if (len < sizeof(struct route_info)) {
819 return -EINVAL;
820 }
821
822 /* Sanity check for prefix_len and length */
823 if (rinfo->length > 3) {
824 return -EINVAL;
825 } else if (rinfo->prefix_len > 128) {
826 return -EINVAL;
827 } else if (rinfo->prefix_len > 64) {
828 if (rinfo->length < 2) {
829 return -EINVAL;
830 }
831 } else if (rinfo->prefix_len > 0) {
832 if (rinfo->length < 1) {
833 return -EINVAL;
834 }
835 }
836
837 pref = rinfo->route_pref;
838 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 839 return -EINVAL;
70ceb4f5 840
4bed72e4 841 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
842
843 if (rinfo->length == 3)
844 prefix = (struct in6_addr *)rinfo->prefix;
845 else {
846 /* this function is safe */
847 ipv6_addr_prefix(&prefix_buf,
848 (struct in6_addr *)rinfo->prefix,
849 rinfo->prefix_len);
850 prefix = &prefix_buf;
851 }
852
f104a567 853 if (rinfo->prefix_len == 0)
afb1d4b5 854 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
855 else
856 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 857 gwaddr, dev);
70ceb4f5
YH
858
859 if (rt && !lifetime) {
afb1d4b5 860 ip6_del_rt(net, rt);
70ceb4f5
YH
861 rt = NULL;
862 }
863
864 if (!rt && lifetime)
830218c1
DA
865 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
866 dev, pref);
70ceb4f5 867 else if (rt)
93c2fb25
DA
868 rt->fib6_flags = RTF_ROUTEINFO |
869 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
870
871 if (rt) {
1716a961 872 if (!addrconf_finite_timeout(lifetime))
14895687 873 fib6_clean_expires(rt);
1716a961 874 else
14895687 875 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 876
93531c67 877 fib6_info_release(rt);
70ceb4f5
YH
878 }
879 return 0;
880}
881#endif
882
ae90d867
DA
883/*
884 * Misc support functions
885 */
886
887/* called with rcu_lock held */
0d161581 888static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
ae90d867 889{
0d161581 890 struct net_device *dev = res->nh->fib_nh_dev;
ae90d867 891
7d21fec9 892 if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
893 /* for copies of local routes, dst->dev needs to be the
894 * device if it is a master device, the master device if
895 * device is enslaved, and the loopback as the default
896 */
897 if (netif_is_l3_slave(dev) &&
7d21fec9 898 !rt6_need_strict(&res->f6i->fib6_dst.addr))
ae90d867
DA
899 dev = l3mdev_master_dev_rcu(dev);
900 else if (!netif_is_l3_master(dev))
901 dev = dev_net(dev)->loopback_dev;
902 /* last case is netif_is_l3_master(dev) is true in which
903 * case we want dev returned to be dev
904 */
905 }
906
907 return dev;
908}
909
6edb3c96
DA
910static const int fib6_prop[RTN_MAX + 1] = {
911 [RTN_UNSPEC] = 0,
912 [RTN_UNICAST] = 0,
913 [RTN_LOCAL] = 0,
914 [RTN_BROADCAST] = 0,
915 [RTN_ANYCAST] = 0,
916 [RTN_MULTICAST] = 0,
917 [RTN_BLACKHOLE] = -EINVAL,
918 [RTN_UNREACHABLE] = -EHOSTUNREACH,
919 [RTN_PROHIBIT] = -EACCES,
920 [RTN_THROW] = -EAGAIN,
921 [RTN_NAT] = -EINVAL,
922 [RTN_XRESOLVE] = -EINVAL,
923};
924
925static int ip6_rt_type_to_error(u8 fib6_type)
926{
927 return fib6_prop[fib6_type];
928}
929
8d1c802b 930static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
931{
932 unsigned short flags = 0;
933
934 if (rt->dst_nocount)
935 flags |= DST_NOCOUNT;
936 if (rt->dst_nopolicy)
937 flags |= DST_NOPOLICY;
938 if (rt->dst_host)
939 flags |= DST_HOST;
940
941 return flags;
942}
943
7d21fec9 944static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
6edb3c96 945{
7d21fec9 946 rt->dst.error = ip6_rt_type_to_error(fib6_type);
6edb3c96 947
7d21fec9 948 switch (fib6_type) {
6edb3c96
DA
949 case RTN_BLACKHOLE:
950 rt->dst.output = dst_discard_out;
951 rt->dst.input = dst_discard;
952 break;
953 case RTN_PROHIBIT:
954 rt->dst.output = ip6_pkt_prohibit_out;
955 rt->dst.input = ip6_pkt_prohibit;
956 break;
957 case RTN_THROW:
958 case RTN_UNREACHABLE:
959 default:
960 rt->dst.output = ip6_pkt_discard_out;
961 rt->dst.input = ip6_pkt_discard;
962 break;
963 }
964}
965
0d161581 966static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
6edb3c96 967{
7d21fec9 968 struct fib6_info *f6i = res->f6i;
0d161581 969
7d21fec9
DA
970 if (res->fib6_flags & RTF_REJECT) {
971 ip6_rt_init_dst_reject(rt, res->fib6_type);
6edb3c96
DA
972 return;
973 }
974
975 rt->dst.error = 0;
976 rt->dst.output = ip6_output;
977
7d21fec9 978 if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
6edb3c96 979 rt->dst.input = ip6_input;
7d21fec9 980 } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
981 rt->dst.input = ip6_mc_input;
982 } else {
983 rt->dst.input = ip6_forward;
984 }
985
0d161581
DA
986 if (res->nh->fib_nh_lws) {
987 rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
6edb3c96
DA
988 lwtunnel_set_redirect(&rt->dst);
989 }
990
991 rt->dst.lastuse = jiffies;
992}
993
e873e4b9 994/* Caller must already hold reference to @from */
8d1c802b 995static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 996{
ae90d867 997 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 998 rcu_assign_pointer(rt->from, from);
e1255ed4 999 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
1000}
1001
0d161581
DA
1002/* Caller must already hold reference to f6i in result */
1003static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
ae90d867 1004{
0d161581
DA
1005 const struct fib6_nh *nh = res->nh;
1006 const struct net_device *dev = nh->fib_nh_dev;
1007 struct fib6_info *f6i = res->f6i;
dcd1f572 1008
0d161581 1009 ip6_rt_init_dst(rt, res);
6edb3c96 1010
0d161581 1011 rt->rt6i_dst = f6i->fib6_dst;
dcd1f572 1012 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
7d21fec9 1013 rt->rt6i_flags = res->fib6_flags;
0d161581
DA
1014 if (nh->fib_nh_gw_family) {
1015 rt->rt6i_gateway = nh->fib_nh_gw6;
2b2450ca
DA
1016 rt->rt6i_flags |= RTF_GATEWAY;
1017 }
0d161581 1018 rt6_set_from(rt, f6i);
ae90d867 1019#ifdef CONFIG_IPV6_SUBTREES
0d161581 1020 rt->rt6i_src = f6i->fib6_src;
ae90d867 1021#endif
ae90d867
DA
1022}
1023
a3c00e46
MKL
1024static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1025 struct in6_addr *saddr)
1026{
66f5d6ce 1027 struct fib6_node *pn, *sn;
a3c00e46
MKL
1028 while (1) {
1029 if (fn->fn_flags & RTN_TL_ROOT)
1030 return NULL;
66f5d6ce
WW
1031 pn = rcu_dereference(fn->parent);
1032 sn = FIB6_SUBTREE(pn);
1033 if (sn && sn != fn)
6454743b 1034 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1035 else
1036 fn = pn;
1037 if (fn->fn_flags & RTN_RTINFO)
1038 return fn;
1039 }
1040}
c71099ac 1041
10585b43 1042static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1043{
1044 struct rt6_info *rt = *prt;
1045
1046 if (dst_hold_safe(&rt->dst))
1047 return true;
10585b43 1048 if (net) {
d3843fe5
WW
1049 rt = net->ipv6.ip6_null_entry;
1050 dst_hold(&rt->dst);
1051 } else {
1052 rt = NULL;
1053 }
1054 *prt = rt;
1055 return false;
1056}
1057
dec9b0e2 1058/* called with rcu_lock held */
9b6b35ab 1059static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
dec9b0e2 1060{
9b6b35ab
DA
1061 struct net_device *dev = res->nh->fib_nh_dev;
1062 struct fib6_info *f6i = res->f6i;
1063 unsigned short flags;
dec9b0e2
DA
1064 struct rt6_info *nrt;
1065
9b6b35ab 1066 if (!fib6_info_hold_safe(f6i))
1c87e79a 1067 goto fallback;
e873e4b9 1068
9b6b35ab 1069 flags = fib6_info_dst_flags(f6i);
93531c67 1070 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1071 if (!nrt) {
9b6b35ab 1072 fib6_info_release(f6i);
1c87e79a
XL
1073 goto fallback;
1074 }
dec9b0e2 1075
0d161581 1076 ip6_rt_copy_init(nrt, res);
1c87e79a
XL
1077 return nrt;
1078
1079fallback:
1080 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1081 dst_hold(&nrt->dst);
dec9b0e2
DA
1082 return nrt;
1083}
1084
8ed67789
DL
1085static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1086 struct fib6_table *table,
b75cc8f9
DA
1087 struct flowi6 *fl6,
1088 const struct sk_buff *skb,
1089 int flags)
1da177e4 1090{
b1d40991 1091 struct fib6_result res = {};
1da177e4 1092 struct fib6_node *fn;
23fb93a4 1093 struct rt6_info *rt;
1da177e4 1094
b6cdbc85
DA
1095 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1096 flags &= ~RT6_LOOKUP_F_IFACE;
1097
66f5d6ce 1098 rcu_read_lock();
6454743b 1099 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1100restart:
b1d40991
DA
1101 res.f6i = rcu_dereference(fn->leaf);
1102 if (!res.f6i)
1103 res.f6i = net->ipv6.fib6_null_entry;
af52a52c 1104 else
75ef7389
DA
1105 rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1106 flags);
af52a52c 1107
b1d40991 1108 if (res.f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1109 fn = fib6_backtrack(fn, &fl6->saddr);
1110 if (fn)
1111 goto restart;
2b760fcf 1112
af52a52c
DA
1113 rt = net->ipv6.ip6_null_entry;
1114 dst_hold(&rt->dst);
1115 goto out;
1116 }
d3843fe5 1117
b1d40991
DA
1118 fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1119 fl6->flowi6_oif != 0, skb, flags);
1120
2b760fcf 1121 /* Search through exception table */
7e4b5128 1122 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1123 if (rt) {
10585b43 1124 if (ip6_hold_safe(net, &rt))
dec9b0e2 1125 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1126 } else {
9b6b35ab 1127 rt = ip6_create_rt_rcu(&res);
dec9b0e2 1128 }
b811580d 1129
af52a52c 1130out:
8ff2e5b2 1131 trace_fib6_table_lookup(net, &res, table, fl6);
af52a52c 1132
66f5d6ce 1133 rcu_read_unlock();
b811580d 1134
c71099ac 1135 return rt;
c71099ac
TG
1136}
1137
67ba4152 1138struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1139 const struct sk_buff *skb, int flags)
ea6e574e 1140{
b75cc8f9 1141 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1142}
1143EXPORT_SYMBOL_GPL(ip6_route_lookup);
1144
9acd9f3a 1145struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1146 const struct in6_addr *saddr, int oif,
1147 const struct sk_buff *skb, int strict)
c71099ac 1148{
4c9483b2
DM
1149 struct flowi6 fl6 = {
1150 .flowi6_oif = oif,
1151 .daddr = *daddr,
c71099ac
TG
1152 };
1153 struct dst_entry *dst;
77d16f45 1154 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1155
adaa70bb 1156 if (saddr) {
4c9483b2 1157 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1158 flags |= RT6_LOOKUP_F_HAS_SADDR;
1159 }
1160
b75cc8f9 1161 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1162 if (dst->error == 0)
1163 return (struct rt6_info *) dst;
1164
1165 dst_release(dst);
1166
1da177e4
LT
1167 return NULL;
1168}
7159039a
YH
1169EXPORT_SYMBOL(rt6_lookup);
1170
c71099ac 1171/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1172 * It takes new route entry, the addition fails by any reason the
1173 * route is released.
1174 * Caller must hold dst before calling it.
1da177e4
LT
1175 */
1176
8d1c802b 1177static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1178 struct netlink_ext_ack *extack)
1da177e4
LT
1179{
1180 int err;
c71099ac 1181 struct fib6_table *table;
1da177e4 1182
93c2fb25 1183 table = rt->fib6_table;
66f5d6ce 1184 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1185 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1186 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1187
1188 return err;
1189}
1190
8d1c802b 1191int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1192{
afb1d4b5 1193 struct nl_info info = { .nl_net = net, };
e715b6d3 1194
d4ead6b3 1195 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1196}
1197
85bd05de 1198static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
8b9df265
MKL
1199 const struct in6_addr *daddr,
1200 const struct in6_addr *saddr)
1da177e4 1201{
85bd05de 1202 struct fib6_info *f6i = res->f6i;
4832c30d 1203 struct net_device *dev;
1da177e4
LT
1204 struct rt6_info *rt;
1205
1206 /*
1207 * Clone the route.
1208 */
1209
85bd05de 1210 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1211 return NULL;
1212
0d161581 1213 dev = ip6_rt_get_dev_rcu(res);
93531c67 1214 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9 1215 if (!rt) {
85bd05de 1216 fib6_info_release(f6i);
83a09abd 1217 return NULL;
e873e4b9 1218 }
83a09abd 1219
0d161581 1220 ip6_rt_copy_init(rt, res);
83a09abd 1221 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1222 rt->dst.flags |= DST_HOST;
1223 rt->rt6i_dst.addr = *daddr;
1224 rt->rt6i_dst.plen = 128;
1da177e4 1225
85bd05de
DA
1226 if (!rt6_is_gw_or_nonexthop(res)) {
1227 if (f6i->fib6_dst.plen != 128 &&
1228 ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
83a09abd 1229 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1230#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1231 if (rt->rt6i_src.plen && saddr) {
1232 rt->rt6i_src.addr = *saddr;
1233 rt->rt6i_src.plen = 128;
8b9df265 1234 }
83a09abd 1235#endif
95a9a5ba 1236 }
1da177e4 1237
95a9a5ba
YH
1238 return rt;
1239}
1da177e4 1240
db3fedee 1241static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
d52d3997 1242{
db3fedee
DA
1243 struct fib6_info *f6i = res->f6i;
1244 unsigned short flags = fib6_info_dst_flags(f6i);
4832c30d 1245 struct net_device *dev;
d52d3997
MKL
1246 struct rt6_info *pcpu_rt;
1247
db3fedee 1248 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1249 return NULL;
1250
4832c30d 1251 rcu_read_lock();
0d161581 1252 dev = ip6_rt_get_dev_rcu(res);
93531c67 1253 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1254 rcu_read_unlock();
e873e4b9 1255 if (!pcpu_rt) {
db3fedee 1256 fib6_info_release(f6i);
d52d3997 1257 return NULL;
e873e4b9 1258 }
0d161581 1259 ip6_rt_copy_init(pcpu_rt, res);
d52d3997
MKL
1260 pcpu_rt->rt6i_flags |= RTF_PCPU;
1261 return pcpu_rt;
1262}
1263
66f5d6ce 1264/* It should be called with rcu_read_lock() acquired */
db3fedee 1265static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
d52d3997 1266{
a73e4195 1267 struct rt6_info *pcpu_rt, **p;
d52d3997 1268
db3fedee 1269 p = this_cpu_ptr(res->f6i->rt6i_pcpu);
d52d3997
MKL
1270 pcpu_rt = *p;
1271
d4ead6b3 1272 if (pcpu_rt)
10585b43 1273 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1274
a73e4195
MKL
1275 return pcpu_rt;
1276}
1277
afb1d4b5 1278static struct rt6_info *rt6_make_pcpu_route(struct net *net,
db3fedee 1279 const struct fib6_result *res)
a73e4195
MKL
1280{
1281 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997 1282
db3fedee 1283 pcpu_rt = ip6_rt_pcpu_alloc(res);
d52d3997 1284 if (!pcpu_rt) {
9c7370a1
MKL
1285 dst_hold(&net->ipv6.ip6_null_entry->dst);
1286 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1287 }
1288
a94b9367 1289 dst_hold(&pcpu_rt->dst);
db3fedee 1290 p = this_cpu_ptr(res->f6i->rt6i_pcpu);
a94b9367 1291 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1292 BUG_ON(prev);
a94b9367 1293
61fb0d01
ED
1294 if (res->f6i->fib6_destroying) {
1295 struct fib6_info *from;
1296
1297 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1298 fib6_info_release(from);
1299 }
1300
d52d3997
MKL
1301 return pcpu_rt;
1302}
1303
35732d01
WW
1304/* exception hash table implementation
1305 */
1306static DEFINE_SPINLOCK(rt6_exception_lock);
1307
1308/* Remove rt6_ex from hash table and free the memory
1309 * Caller must hold rt6_exception_lock
1310 */
1311static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1312 struct rt6_exception *rt6_ex)
1313{
f5b51fe8 1314 struct fib6_info *from;
b2427e67 1315 struct net *net;
81eb8447 1316
35732d01
WW
1317 if (!bucket || !rt6_ex)
1318 return;
b2427e67
CIK
1319
1320 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1321 net->ipv6.rt6_stats->fib_rt_cache--;
1322
1323 /* purge completely the exception to allow releasing the held resources:
1324 * some [sk] cache may keep the dst around for unlimited time
1325 */
0e233874 1326 from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
f5b51fe8
PA
1327 fib6_info_release(from);
1328 dst_dev_put(&rt6_ex->rt6i->dst);
1329
35732d01 1330 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1331 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1332 kfree_rcu(rt6_ex, rcu);
1333 WARN_ON_ONCE(!bucket->depth);
1334 bucket->depth--;
1335}
1336
1337/* Remove oldest rt6_ex in bucket and free the memory
1338 * Caller must hold rt6_exception_lock
1339 */
1340static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1341{
1342 struct rt6_exception *rt6_ex, *oldest = NULL;
1343
1344 if (!bucket)
1345 return;
1346
1347 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1348 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1349 oldest = rt6_ex;
1350 }
1351 rt6_remove_exception(bucket, oldest);
1352}
1353
1354static u32 rt6_exception_hash(const struct in6_addr *dst,
1355 const struct in6_addr *src)
1356{
1357 static u32 seed __read_mostly;
1358 u32 val;
1359
1360 net_get_random_once(&seed, sizeof(seed));
1361 val = jhash(dst, sizeof(*dst), seed);
1362
1363#ifdef CONFIG_IPV6_SUBTREES
1364 if (src)
1365 val = jhash(src, sizeof(*src), val);
1366#endif
1367 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1368}
1369
1370/* Helper function to find the cached rt in the hash table
1371 * and update bucket pointer to point to the bucket for this
1372 * (daddr, saddr) pair
1373 * Caller must hold rt6_exception_lock
1374 */
1375static struct rt6_exception *
1376__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1377 const struct in6_addr *daddr,
1378 const struct in6_addr *saddr)
1379{
1380 struct rt6_exception *rt6_ex;
1381 u32 hval;
1382
1383 if (!(*bucket) || !daddr)
1384 return NULL;
1385
1386 hval = rt6_exception_hash(daddr, saddr);
1387 *bucket += hval;
1388
1389 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1390 struct rt6_info *rt6 = rt6_ex->rt6i;
1391 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1392
1393#ifdef CONFIG_IPV6_SUBTREES
1394 if (matched && saddr)
1395 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1396#endif
1397 if (matched)
1398 return rt6_ex;
1399 }
1400 return NULL;
1401}
1402
1403/* Helper function to find the cached rt in the hash table
1404 * and update bucket pointer to point to the bucket for this
1405 * (daddr, saddr) pair
1406 * Caller must hold rcu_read_lock()
1407 */
1408static struct rt6_exception *
1409__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1410 const struct in6_addr *daddr,
1411 const struct in6_addr *saddr)
1412{
1413 struct rt6_exception *rt6_ex;
1414 u32 hval;
1415
1416 WARN_ON_ONCE(!rcu_read_lock_held());
1417
1418 if (!(*bucket) || !daddr)
1419 return NULL;
1420
1421 hval = rt6_exception_hash(daddr, saddr);
1422 *bucket += hval;
1423
1424 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1425 struct rt6_info *rt6 = rt6_ex->rt6i;
1426 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1427
1428#ifdef CONFIG_IPV6_SUBTREES
1429 if (matched && saddr)
1430 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1431#endif
1432 if (matched)
1433 return rt6_ex;
1434 }
1435 return NULL;
1436}
1437
b748f260 1438static unsigned int fib6_mtu(const struct fib6_result *res)
d4ead6b3 1439{
b748f260 1440 const struct fib6_nh *nh = res->nh;
d4ead6b3
DA
1441 unsigned int mtu;
1442
b748f260
DA
1443 if (res->f6i->fib6_pmtu) {
1444 mtu = res->f6i->fib6_pmtu;
dcd1f572 1445 } else {
b748f260 1446 struct net_device *dev = nh->fib_nh_dev;
dcd1f572
DA
1447 struct inet6_dev *idev;
1448
1449 rcu_read_lock();
1450 idev = __in6_dev_get(dev);
1451 mtu = idev->cnf.mtu6;
1452 rcu_read_unlock();
1453 }
1454
d4ead6b3
DA
1455 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1456
b748f260 1457 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
d4ead6b3
DA
1458}
1459
35732d01 1460static int rt6_insert_exception(struct rt6_info *nrt,
5012f0a5 1461 const struct fib6_result *res)
35732d01 1462{
5e670d84 1463 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1464 struct rt6_exception_bucket *bucket;
1465 struct in6_addr *src_key = NULL;
1466 struct rt6_exception *rt6_ex;
5012f0a5 1467 struct fib6_info *f6i = res->f6i;
35732d01
WW
1468 int err = 0;
1469
35732d01
WW
1470 spin_lock_bh(&rt6_exception_lock);
1471
5012f0a5 1472 if (f6i->exception_bucket_flushed) {
35732d01
WW
1473 err = -EINVAL;
1474 goto out;
1475 }
1476
5012f0a5 1477 bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
35732d01
WW
1478 lockdep_is_held(&rt6_exception_lock));
1479 if (!bucket) {
1480 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1481 GFP_ATOMIC);
1482 if (!bucket) {
1483 err = -ENOMEM;
1484 goto out;
1485 }
5012f0a5 1486 rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
35732d01
WW
1487 }
1488
1489#ifdef CONFIG_IPV6_SUBTREES
5012f0a5 1490 /* fib6_src.plen != 0 indicates f6i is in subtree
35732d01 1491 * and exception table is indexed by a hash of
5012f0a5 1492 * both fib6_dst and fib6_src.
35732d01 1493 * Otherwise, the exception table is indexed by
5012f0a5 1494 * a hash of only fib6_dst.
35732d01 1495 */
5012f0a5 1496 if (f6i->fib6_src.plen)
35732d01
WW
1497 src_key = &nrt->rt6i_src.addr;
1498#endif
5012f0a5 1499 /* rt6_mtu_change() might lower mtu on f6i.
f5bbe7ee 1500 * Only insert this exception route if its mtu
5012f0a5 1501 * is less than f6i's mtu value.
f5bbe7ee 1502 */
b748f260 1503 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
f5bbe7ee
WW
1504 err = -EINVAL;
1505 goto out;
1506 }
60006a48 1507
35732d01
WW
1508 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1509 src_key);
1510 if (rt6_ex)
1511 rt6_remove_exception(bucket, rt6_ex);
1512
1513 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1514 if (!rt6_ex) {
1515 err = -ENOMEM;
1516 goto out;
1517 }
1518 rt6_ex->rt6i = nrt;
1519 rt6_ex->stamp = jiffies;
35732d01
WW
1520 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1521 bucket->depth++;
81eb8447 1522 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1523
1524 if (bucket->depth > FIB6_MAX_DEPTH)
1525 rt6_exception_remove_oldest(bucket);
1526
1527out:
1528 spin_unlock_bh(&rt6_exception_lock);
1529
1530 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1531 if (!err) {
5012f0a5
DA
1532 spin_lock_bh(&f6i->fib6_table->tb6_lock);
1533 fib6_update_sernum(net, f6i);
1534 spin_unlock_bh(&f6i->fib6_table->tb6_lock);
b886d5f2
PA
1535 fib6_force_start_gc(net);
1536 }
35732d01
WW
1537
1538 return err;
1539}
1540
8d1c802b 1541void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1542{
1543 struct rt6_exception_bucket *bucket;
1544 struct rt6_exception *rt6_ex;
1545 struct hlist_node *tmp;
1546 int i;
1547
1548 spin_lock_bh(&rt6_exception_lock);
1549 /* Prevent rt6_insert_exception() to recreate the bucket list */
1550 rt->exception_bucket_flushed = 1;
1551
1552 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1553 lockdep_is_held(&rt6_exception_lock));
1554 if (!bucket)
1555 goto out;
1556
1557 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1558 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1559 rt6_remove_exception(bucket, rt6_ex);
1560 WARN_ON_ONCE(bucket->depth);
1561 bucket++;
1562 }
1563
1564out:
1565 spin_unlock_bh(&rt6_exception_lock);
1566}
1567
1568/* Find cached rt in the hash table inside passed in rt
1569 * Caller has to hold rcu_read_lock()
1570 */
7e4b5128 1571static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
1572 const struct in6_addr *daddr,
1573 const struct in6_addr *saddr)
35732d01 1574{
510e2ced 1575 const struct in6_addr *src_key = NULL;
35732d01 1576 struct rt6_exception_bucket *bucket;
35732d01 1577 struct rt6_exception *rt6_ex;
7e4b5128 1578 struct rt6_info *ret = NULL;
35732d01 1579
35732d01 1580#ifdef CONFIG_IPV6_SUBTREES
7e4b5128 1581 /* fib6i_src.plen != 0 indicates f6i is in subtree
35732d01 1582 * and exception table is indexed by a hash of
7e4b5128 1583 * both fib6_dst and fib6_src.
510e2ced
WW
1584 * However, the src addr used to create the hash
1585 * might not be exactly the passed in saddr which
1586 * is a /128 addr from the flow.
1587 * So we need to use f6i->fib6_src to redo lookup
1588 * if the passed in saddr does not find anything.
1589 * (See the logic in ip6_rt_cache_alloc() on how
1590 * rt->rt6i_src is updated.)
35732d01 1591 */
7e4b5128 1592 if (res->f6i->fib6_src.plen)
35732d01 1593 src_key = saddr;
510e2ced 1594find_ex:
35732d01 1595#endif
510e2ced 1596 bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
35732d01
WW
1597 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1598
1599 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
7e4b5128 1600 ret = rt6_ex->rt6i;
35732d01 1601
510e2ced
WW
1602#ifdef CONFIG_IPV6_SUBTREES
1603 /* Use fib6_src as src_key and redo lookup */
1604 if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1605 src_key = &res->f6i->fib6_src.addr;
1606 goto find_ex;
1607 }
1608#endif
1609
7e4b5128 1610 return ret;
35732d01
WW
1611}
1612
1613/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1614static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1615{
35732d01
WW
1616 struct rt6_exception_bucket *bucket;
1617 struct in6_addr *src_key = NULL;
1618 struct rt6_exception *rt6_ex;
8a14e46f 1619 struct fib6_info *from;
35732d01
WW
1620 int err;
1621
091311de 1622 from = rcu_dereference(rt->from);
35732d01 1623 if (!from ||
442d713b 1624 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1625 return -EINVAL;
1626
1627 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1628 return -ENOENT;
1629
1630 spin_lock_bh(&rt6_exception_lock);
1631 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1632 lockdep_is_held(&rt6_exception_lock));
1633#ifdef CONFIG_IPV6_SUBTREES
1634 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1635 * and exception table is indexed by a hash of
1636 * both rt6i_dst and rt6i_src.
1637 * Otherwise, the exception table is indexed by
1638 * a hash of only rt6i_dst.
1639 */
93c2fb25 1640 if (from->fib6_src.plen)
35732d01
WW
1641 src_key = &rt->rt6i_src.addr;
1642#endif
1643 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1644 &rt->rt6i_dst.addr,
1645 src_key);
1646 if (rt6_ex) {
1647 rt6_remove_exception(bucket, rt6_ex);
1648 err = 0;
1649 } else {
1650 err = -ENOENT;
1651 }
1652
1653 spin_unlock_bh(&rt6_exception_lock);
1654 return err;
1655}
1656
1657/* Find rt6_ex which contains the passed in rt cache and
1658 * refresh its stamp
1659 */
1660static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1661{
35732d01
WW
1662 struct rt6_exception_bucket *bucket;
1663 struct in6_addr *src_key = NULL;
1664 struct rt6_exception *rt6_ex;
193f3685 1665 struct fib6_info *from;
35732d01
WW
1666
1667 rcu_read_lock();
193f3685
PA
1668 from = rcu_dereference(rt->from);
1669 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1670 goto unlock;
1671
35732d01
WW
1672 bucket = rcu_dereference(from->rt6i_exception_bucket);
1673
1674#ifdef CONFIG_IPV6_SUBTREES
1675 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1676 * and exception table is indexed by a hash of
1677 * both rt6i_dst and rt6i_src.
1678 * Otherwise, the exception table is indexed by
1679 * a hash of only rt6i_dst.
1680 */
93c2fb25 1681 if (from->fib6_src.plen)
35732d01
WW
1682 src_key = &rt->rt6i_src.addr;
1683#endif
1684 rt6_ex = __rt6_find_exception_rcu(&bucket,
1685 &rt->rt6i_dst.addr,
1686 src_key);
1687 if (rt6_ex)
1688 rt6_ex->stamp = jiffies;
1689
193f3685 1690unlock:
35732d01
WW
1691 rcu_read_unlock();
1692}
1693
e9fa1495
SB
1694static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1695 struct rt6_info *rt, int mtu)
1696{
1697 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1698 * lowest MTU in the path: always allow updating the route PMTU to
1699 * reflect PMTU decreases.
1700 *
1701 * If the new MTU is higher, and the route PMTU is equal to the local
1702 * MTU, this means the old MTU is the lowest in the path, so allow
1703 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1704 * handle this.
1705 */
1706
1707 if (dst_mtu(&rt->dst) >= mtu)
1708 return true;
1709
1710 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1711 return true;
1712
1713 return false;
1714}
1715
1716static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1717 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1718{
1719 struct rt6_exception_bucket *bucket;
1720 struct rt6_exception *rt6_ex;
1721 int i;
1722
1723 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1724 lockdep_is_held(&rt6_exception_lock));
1725
e9fa1495
SB
1726 if (!bucket)
1727 return;
1728
1729 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1730 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1731 struct rt6_info *entry = rt6_ex->rt6i;
1732
1733 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1734 * route), the metrics of its rt->from have already
e9fa1495
SB
1735 * been updated.
1736 */
d4ead6b3 1737 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1738 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1739 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1740 }
e9fa1495 1741 bucket++;
f5bbe7ee
WW
1742 }
1743}
1744
b16cb459
WW
1745#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1746
8d1c802b 1747static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1748 struct in6_addr *gateway)
1749{
1750 struct rt6_exception_bucket *bucket;
1751 struct rt6_exception *rt6_ex;
1752 struct hlist_node *tmp;
1753 int i;
1754
1755 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1756 return;
1757
1758 spin_lock_bh(&rt6_exception_lock);
1759 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1760 lockdep_is_held(&rt6_exception_lock));
1761
1762 if (bucket) {
1763 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1764 hlist_for_each_entry_safe(rt6_ex, tmp,
1765 &bucket->chain, hlist) {
1766 struct rt6_info *entry = rt6_ex->rt6i;
1767
1768 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1769 RTF_CACHE_GATEWAY &&
1770 ipv6_addr_equal(gateway,
1771 &entry->rt6i_gateway)) {
1772 rt6_remove_exception(bucket, rt6_ex);
1773 }
1774 }
1775 bucket++;
1776 }
1777 }
1778
1779 spin_unlock_bh(&rt6_exception_lock);
1780}
1781
c757faa8
WW
1782static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1783 struct rt6_exception *rt6_ex,
1784 struct fib6_gc_args *gc_args,
1785 unsigned long now)
1786{
1787 struct rt6_info *rt = rt6_ex->rt6i;
1788
1859bac0
PA
1789 /* we are pruning and obsoleting aged-out and non gateway exceptions
1790 * even if others have still references to them, so that on next
1791 * dst_check() such references can be dropped.
1792 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1793 * expired, independently from their aging, as per RFC 8201 section 4
1794 */
31afeb42
WW
1795 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1796 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1797 RT6_TRACE("aging clone %p\n", rt);
1798 rt6_remove_exception(bucket, rt6_ex);
1799 return;
1800 }
1801 } else if (time_after(jiffies, rt->dst.expires)) {
1802 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1803 rt6_remove_exception(bucket, rt6_ex);
1804 return;
31afeb42
WW
1805 }
1806
1807 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1808 struct neighbour *neigh;
1809 __u8 neigh_flags = 0;
1810
1bfa26ff
ED
1811 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1812 if (neigh)
c757faa8 1813 neigh_flags = neigh->flags;
1bfa26ff 1814
c757faa8
WW
1815 if (!(neigh_flags & NTF_ROUTER)) {
1816 RT6_TRACE("purging route %p via non-router but gateway\n",
1817 rt);
1818 rt6_remove_exception(bucket, rt6_ex);
1819 return;
1820 }
1821 }
31afeb42 1822
c757faa8
WW
1823 gc_args->more++;
1824}
1825
8d1c802b 1826void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1827 struct fib6_gc_args *gc_args,
1828 unsigned long now)
1829{
1830 struct rt6_exception_bucket *bucket;
1831 struct rt6_exception *rt6_ex;
1832 struct hlist_node *tmp;
1833 int i;
1834
1835 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1836 return;
1837
1bfa26ff
ED
1838 rcu_read_lock_bh();
1839 spin_lock(&rt6_exception_lock);
c757faa8
WW
1840 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1841 lockdep_is_held(&rt6_exception_lock));
1842
1843 if (bucket) {
1844 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1845 hlist_for_each_entry_safe(rt6_ex, tmp,
1846 &bucket->chain, hlist) {
1847 rt6_age_examine_exception(bucket, rt6_ex,
1848 gc_args, now);
1849 }
1850 bucket++;
1851 }
1852 }
1bfa26ff
ED
1853 spin_unlock(&rt6_exception_lock);
1854 rcu_read_unlock_bh();
c757faa8
WW
1855}
1856
1d053da9 1857/* must be called with rcu lock held */
effda4dd
DA
1858int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
1859 struct flowi6 *fl6, struct fib6_result *res, int strict)
1da177e4 1860{
367efcb9 1861 struct fib6_node *fn, *saved_fn;
1da177e4 1862
6454743b 1863 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1864 saved_fn = fn;
1da177e4 1865
ca254490
DA
1866 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1867 oif = 0;
1868
a3c00e46 1869redo_rt6_select:
effda4dd
DA
1870 rt6_select(net, fn, oif, res, strict);
1871 if (res->f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1872 fn = fib6_backtrack(fn, &fl6->saddr);
1873 if (fn)
1874 goto redo_rt6_select;
367efcb9
MKL
1875 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1876 /* also consider unreachable route */
1877 strict &= ~RT6_LOOKUP_F_REACHABLE;
1878 fn = saved_fn;
1879 goto redo_rt6_select;
367efcb9 1880 }
a3c00e46
MKL
1881 }
1882
effda4dd 1883 trace_fib6_table_lookup(net, res, table, fl6);
fb9de91e 1884
effda4dd 1885 return 0;
1d053da9
DA
1886}
1887
1888struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1889 int oif, struct flowi6 *fl6,
1890 const struct sk_buff *skb, int flags)
1891{
b1d40991 1892 struct fib6_result res = {};
1d053da9
DA
1893 struct rt6_info *rt;
1894 int strict = 0;
1895
1896 strict |= flags & RT6_LOOKUP_F_IFACE;
1897 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1898 if (net->ipv6.devconf_all->forwarding == 0)
1899 strict |= RT6_LOOKUP_F_REACHABLE;
1900
1901 rcu_read_lock();
1902
effda4dd 1903 fib6_table_lookup(net, table, oif, fl6, &res, strict);
b1d40991 1904 if (res.f6i == net->ipv6.fib6_null_entry) {
421842ed 1905 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1906 rcu_read_unlock();
d3843fe5 1907 dst_hold(&rt->dst);
d3843fe5 1908 return rt;
23fb93a4
DA
1909 }
1910
b1d40991 1911 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
d83009d4 1912
23fb93a4 1913 /*Search through exception table */
7e4b5128 1914 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1915 if (rt) {
10585b43 1916 if (ip6_hold_safe(net, &rt))
d3843fe5 1917 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1918
66f5d6ce 1919 rcu_read_unlock();
d52d3997 1920 return rt;
3da59bd9 1921 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
b1d40991 1922 !res.nh->fib_nh_gw_family)) {
3da59bd9
MKL
1923 /* Create a RTF_CACHE clone which will not be
1924 * owned by the fib6 tree. It is for the special case where
1925 * the daddr in the skb during the neighbor look-up is different
1926 * from the fl6->daddr used to look-up route here.
1927 */
3da59bd9
MKL
1928 struct rt6_info *uncached_rt;
1929
85bd05de 1930 uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
d52d3997 1931
4d85cd0c 1932 rcu_read_unlock();
c71099ac 1933
1cfb71ee
WW
1934 if (uncached_rt) {
1935 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1936 * No need for another dst_hold()
1937 */
8d0b94af 1938 rt6_uncached_list_add(uncached_rt);
81eb8447 1939 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1940 } else {
3da59bd9 1941 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1942 dst_hold(&uncached_rt->dst);
1943 }
b811580d 1944
3da59bd9 1945 return uncached_rt;
d52d3997
MKL
1946 } else {
1947 /* Get a percpu copy */
1948
1949 struct rt6_info *pcpu_rt;
1950
951f788a 1951 local_bh_disable();
db3fedee 1952 pcpu_rt = rt6_get_pcpu_route(&res);
d52d3997 1953
93531c67 1954 if (!pcpu_rt)
db3fedee 1955 pcpu_rt = rt6_make_pcpu_route(net, &res);
93531c67 1956
951f788a
ED
1957 local_bh_enable();
1958 rcu_read_unlock();
d4bea421 1959
d52d3997
MKL
1960 return pcpu_rt;
1961 }
1da177e4 1962}
9ff74384 1963EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1964
b75cc8f9
DA
1965static struct rt6_info *ip6_pol_route_input(struct net *net,
1966 struct fib6_table *table,
1967 struct flowi6 *fl6,
1968 const struct sk_buff *skb,
1969 int flags)
4acad72d 1970{
b75cc8f9 1971 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1972}
1973
d409b847
MB
1974struct dst_entry *ip6_route_input_lookup(struct net *net,
1975 struct net_device *dev,
b75cc8f9
DA
1976 struct flowi6 *fl6,
1977 const struct sk_buff *skb,
1978 int flags)
72331bc0
SL
1979{
1980 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1981 flags |= RT6_LOOKUP_F_IFACE;
1982
b75cc8f9 1983 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1984}
d409b847 1985EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1986
23aebdac 1987static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1988 struct flow_keys *keys,
1989 struct flow_keys *flkeys)
23aebdac
JS
1990{
1991 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1992 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1993 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1994 const struct ipv6hdr *inner_iph;
1995 const struct icmp6hdr *icmph;
1996 struct ipv6hdr _inner_iph;
cea67a2d 1997 struct icmp6hdr _icmph;
23aebdac
JS
1998
1999 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2000 goto out;
2001
cea67a2d
ED
2002 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2003 sizeof(_icmph), &_icmph);
2004 if (!icmph)
2005 goto out;
2006
23aebdac
JS
2007 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2008 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2009 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2010 icmph->icmp6_type != ICMPV6_PARAMPROB)
2011 goto out;
2012
2013 inner_iph = skb_header_pointer(skb,
2014 skb_transport_offset(skb) + sizeof(*icmph),
2015 sizeof(_inner_iph), &_inner_iph);
2016 if (!inner_iph)
2017 goto out;
2018
2019 key_iph = inner_iph;
5e5d6fed 2020 _flkeys = NULL;
23aebdac 2021out:
5e5d6fed
RP
2022 if (_flkeys) {
2023 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2024 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2025 keys->tags.flow_label = _flkeys->tags.flow_label;
2026 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2027 } else {
2028 keys->addrs.v6addrs.src = key_iph->saddr;
2029 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 2030 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
2031 keys->basic.ip_proto = key_iph->nexthdr;
2032 }
23aebdac
JS
2033}
2034
2035/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2036u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2037 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2038{
2039 struct flow_keys hash_keys;
9a2a537a 2040 u32 mhash;
23aebdac 2041
bbfa047a 2042 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2043 case 0:
2044 memset(&hash_keys, 0, sizeof(hash_keys));
2045 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2046 if (skb) {
2047 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2048 } else {
2049 hash_keys.addrs.v6addrs.src = fl6->saddr;
2050 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2051 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2052 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2053 }
2054 break;
2055 case 1:
2056 if (skb) {
2057 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2058 struct flow_keys keys;
2059
2060 /* short-circuit if we already have L4 hash present */
2061 if (skb->l4_hash)
2062 return skb_get_hash_raw(skb) >> 1;
2063
2064 memset(&hash_keys, 0, sizeof(hash_keys));
2065
2066 if (!flkeys) {
2067 skb_flow_dissect_flow_keys(skb, &keys, flag);
2068 flkeys = &keys;
2069 }
2070 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2071 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2072 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2073 hash_keys.ports.src = flkeys->ports.src;
2074 hash_keys.ports.dst = flkeys->ports.dst;
2075 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2076 } else {
2077 memset(&hash_keys, 0, sizeof(hash_keys));
2078 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2079 hash_keys.addrs.v6addrs.src = fl6->saddr;
2080 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2081 hash_keys.ports.src = fl6->fl6_sport;
2082 hash_keys.ports.dst = fl6->fl6_dport;
2083 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2084 }
2085 break;
23aebdac 2086 }
9a2a537a 2087 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2088
9a2a537a 2089 return mhash >> 1;
23aebdac
JS
2090}
2091
c71099ac
TG
2092void ip6_route_input(struct sk_buff *skb)
2093{
b71d1d42 2094 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2095 struct net *net = dev_net(skb->dev);
adaa70bb 2096 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2097 struct ip_tunnel_info *tun_info;
4c9483b2 2098 struct flowi6 fl6 = {
e0d56fdd 2099 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2100 .daddr = iph->daddr,
2101 .saddr = iph->saddr,
6502ca52 2102 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2103 .flowi6_mark = skb->mark,
2104 .flowi6_proto = iph->nexthdr,
c71099ac 2105 };
5e5d6fed 2106 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2107
904af04d 2108 tun_info = skb_tunnel_info(skb);
46fa062a 2109 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2110 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2111
2112 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2113 flkeys = &_flkeys;
2114
23aebdac 2115 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2116 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2117 skb_dst_drop(skb);
b75cc8f9
DA
2118 skb_dst_set(skb,
2119 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2120}
2121
b75cc8f9
DA
2122static struct rt6_info *ip6_pol_route_output(struct net *net,
2123 struct fib6_table *table,
2124 struct flowi6 *fl6,
2125 const struct sk_buff *skb,
2126 int flags)
1da177e4 2127{
b75cc8f9 2128 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2129}
2130
6f21c96a
PA
2131struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2132 struct flowi6 *fl6, int flags)
c71099ac 2133{
d46a9d67 2134 bool any_src;
c71099ac 2135
3ede0bbc
RS
2136 if (ipv6_addr_type(&fl6->daddr) &
2137 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2138 struct dst_entry *dst;
2139
2140 dst = l3mdev_link_scope_lookup(net, fl6);
2141 if (dst)
2142 return dst;
2143 }
ca254490 2144
1fb9489b 2145 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2146
d46a9d67 2147 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2148 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2149 (fl6->flowi6_oif && any_src))
77d16f45 2150 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2151
d46a9d67 2152 if (!any_src)
adaa70bb 2153 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2154 else if (sk)
2155 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2156
b75cc8f9 2157 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2158}
6f21c96a 2159EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2160
2774c131 2161struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2162{
5c1e6aa3 2163 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2164 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2165 struct dst_entry *new = NULL;
2166
1dbe3252 2167 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2168 DST_OBSOLETE_DEAD, 0);
14e50e57 2169 if (rt) {
0a1f5962 2170 rt6_info_init(rt);
81eb8447 2171 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2172
0a1f5962 2173 new = &rt->dst;
14e50e57 2174 new->__use = 1;
352e512c 2175 new->input = dst_discard;
ede2059d 2176 new->output = dst_discard_out;
14e50e57 2177
0a1f5962 2178 dst_copy_metrics(new, &ort->dst);
14e50e57 2179
1dbe3252 2180 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2181 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2182 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2183
2184 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2185#ifdef CONFIG_IPV6_SUBTREES
2186 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2187#endif
14e50e57
DM
2188 }
2189
69ead7af
DM
2190 dst_release(dst_orig);
2191 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2192}
14e50e57 2193
1da177e4
LT
2194/*
2195 * Destination cache support functions
2196 */
2197
8d1c802b 2198static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2199{
93531c67
DA
2200 u32 rt_cookie = 0;
2201
8ae86971 2202 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2203 return false;
2204
2205 if (fib6_check_expired(f6i))
2206 return false;
2207
2208 return true;
4b32b5ad
MKL
2209}
2210
a68886a6
DA
2211static struct dst_entry *rt6_check(struct rt6_info *rt,
2212 struct fib6_info *from,
2213 u32 cookie)
3da59bd9 2214{
36143645 2215 u32 rt_cookie = 0;
c5cff856 2216
a68886a6 2217 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2218 rt_cookie != cookie)
3da59bd9
MKL
2219 return NULL;
2220
2221 if (rt6_check_expired(rt))
2222 return NULL;
2223
2224 return &rt->dst;
2225}
2226
a68886a6
DA
2227static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2228 struct fib6_info *from,
2229 u32 cookie)
3da59bd9 2230{
5973fb1e
MKL
2231 if (!__rt6_check_expired(rt) &&
2232 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2233 fib6_check(from, cookie))
3da59bd9
MKL
2234 return &rt->dst;
2235 else
2236 return NULL;
2237}
2238
1da177e4
LT
2239static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2240{
a87b7dc9 2241 struct dst_entry *dst_ret;
a68886a6 2242 struct fib6_info *from;
1da177e4
LT
2243 struct rt6_info *rt;
2244
a87b7dc9
DA
2245 rt = container_of(dst, struct rt6_info, dst);
2246
2247 rcu_read_lock();
1da177e4 2248
6f3118b5
ND
2249 /* All IPV6 dsts are created with ->obsolete set to the value
2250 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2251 * into this function always.
2252 */
e3bc10bd 2253
a68886a6 2254 from = rcu_dereference(rt->from);
4b32b5ad 2255
a68886a6
DA
2256 if (from && (rt->rt6i_flags & RTF_PCPU ||
2257 unlikely(!list_empty(&rt->rt6i_uncached))))
2258 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2259 else
a68886a6 2260 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2261
2262 rcu_read_unlock();
2263
2264 return dst_ret;
1da177e4
LT
2265}
2266
2267static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2268{
2269 struct rt6_info *rt = (struct rt6_info *) dst;
2270
2271 if (rt) {
54c1a859 2272 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2273 rcu_read_lock();
54c1a859 2274 if (rt6_check_expired(rt)) {
93531c67 2275 rt6_remove_exception_rt(rt);
54c1a859
YH
2276 dst = NULL;
2277 }
c3c14da0 2278 rcu_read_unlock();
54c1a859 2279 } else {
1da177e4 2280 dst_release(dst);
54c1a859
YH
2281 dst = NULL;
2282 }
1da177e4 2283 }
54c1a859 2284 return dst;
1da177e4
LT
2285}
2286
2287static void ip6_link_failure(struct sk_buff *skb)
2288{
2289 struct rt6_info *rt;
2290
3ffe533c 2291 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2292
adf30907 2293 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2294 if (rt) {
8a14e46f 2295 rcu_read_lock();
1eb4f758 2296 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2297 rt6_remove_exception_rt(rt);
c5cff856 2298 } else {
a68886a6 2299 struct fib6_info *from;
c5cff856
WW
2300 struct fib6_node *fn;
2301
a68886a6
DA
2302 from = rcu_dereference(rt->from);
2303 if (from) {
2304 fn = rcu_dereference(from->fib6_node);
2305 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2306 fn->fn_sernum = -1;
2307 }
1eb4f758 2308 }
8a14e46f 2309 rcu_read_unlock();
1da177e4
LT
2310 }
2311}
2312
6a3e030f
DA
2313static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2314{
a68886a6
DA
2315 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2316 struct fib6_info *from;
2317
2318 rcu_read_lock();
2319 from = rcu_dereference(rt0->from);
2320 if (from)
2321 rt0->dst.expires = from->expires;
2322 rcu_read_unlock();
2323 }
6a3e030f
DA
2324
2325 dst_set_expires(&rt0->dst, timeout);
2326 rt0->rt6i_flags |= RTF_EXPIRES;
2327}
2328
45e4fd26
MKL
2329static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2330{
2331 struct net *net = dev_net(rt->dst.dev);
2332
d4ead6b3 2333 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2334 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2335 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2336}
2337
0d3f6d29
MKL
2338static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2339{
2340 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2341 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2342}
2343
45e4fd26
MKL
2344static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2345 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2346{
0dec879f 2347 const struct in6_addr *daddr, *saddr;
67ba4152 2348 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2349
19bda36c
XL
2350 if (dst_metric_locked(dst, RTAX_MTU))
2351 return;
2352
0dec879f
JA
2353 if (iph) {
2354 daddr = &iph->daddr;
2355 saddr = &iph->saddr;
2356 } else if (sk) {
2357 daddr = &sk->sk_v6_daddr;
2358 saddr = &inet6_sk(sk)->saddr;
2359 } else {
2360 daddr = NULL;
2361 saddr = NULL;
2362 }
2363 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2364 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2365 if (mtu >= dst_mtu(dst))
2366 return;
9d289715 2367
0d3f6d29 2368 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2369 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2370 /* update rt6_ex->stamp for cache */
2371 if (rt6->rt6i_flags & RTF_CACHE)
2372 rt6_update_exception_stamp_rt(rt6);
0dec879f 2373 } else if (daddr) {
85bd05de 2374 struct fib6_result res = {};
45e4fd26
MKL
2375 struct rt6_info *nrt6;
2376
4d85cd0c 2377 rcu_read_lock();
85bd05de
DA
2378 res.f6i = rcu_dereference(rt6->from);
2379 if (!res.f6i) {
9c69a132
JL
2380 rcu_read_unlock();
2381 return;
2382 }
85bd05de 2383 res.nh = &res.f6i->fib6_nh;
7d21fec9
DA
2384 res.fib6_flags = res.f6i->fib6_flags;
2385 res.fib6_type = res.f6i->fib6_type;
2386
85bd05de 2387 nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
45e4fd26
MKL
2388 if (nrt6) {
2389 rt6_do_update_pmtu(nrt6, mtu);
5012f0a5 2390 if (rt6_insert_exception(nrt6, &res))
2b760fcf 2391 dst_release_immediate(&nrt6->dst);
45e4fd26 2392 }
a68886a6 2393 rcu_read_unlock();
1da177e4
LT
2394 }
2395}
2396
45e4fd26
MKL
2397static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2398 struct sk_buff *skb, u32 mtu)
2399{
2400 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2401}
2402
42ae66c8 2403void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2404 int oif, u32 mark, kuid_t uid)
81aded24
DM
2405{
2406 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2407 struct dst_entry *dst;
dc92095d
2408 struct flowi6 fl6 = {
2409 .flowi6_oif = oif,
2410 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2411 .daddr = iph->daddr,
2412 .saddr = iph->saddr,
2413 .flowlabel = ip6_flowinfo(iph),
2414 .flowi6_uid = uid,
2415 };
81aded24
DM
2416
2417 dst = ip6_route_output(net, NULL, &fl6);
2418 if (!dst->error)
45e4fd26 2419 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2420 dst_release(dst);
2421}
2422EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2423
2424void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2425{
7ddacfa5 2426 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2427 struct dst_entry *dst;
2428
7ddacfa5
DA
2429 if (!oif && skb->dev)
2430 oif = l3mdev_master_ifindex(skb->dev);
2431
2432 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2433
2434 dst = __sk_dst_get(sk);
2435 if (!dst || !dst->obsolete ||
2436 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2437 return;
2438
2439 bh_lock_sock(sk);
2440 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2441 ip6_datagram_dst_update(sk, false);
2442 bh_unlock_sock(sk);
81aded24
DM
2443}
2444EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2445
7d6850f7
AK
2446void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2447 const struct flowi6 *fl6)
2448{
2449#ifdef CONFIG_IPV6_SUBTREES
2450 struct ipv6_pinfo *np = inet6_sk(sk);
2451#endif
2452
2453 ip6_dst_store(sk, dst,
2454 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2455 &sk->sk_v6_daddr : NULL,
2456#ifdef CONFIG_IPV6_SUBTREES
2457 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2458 &np->saddr :
2459#endif
2460 NULL);
2461}
2462
9b6b35ab 2463static bool ip6_redirect_nh_match(const struct fib6_result *res,
0b34eb00
DA
2464 struct flowi6 *fl6,
2465 const struct in6_addr *gw,
2466 struct rt6_info **ret)
2467{
9b6b35ab
DA
2468 const struct fib6_nh *nh = res->nh;
2469
0b34eb00
DA
2470 if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2471 fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2472 return false;
2473
2474 /* rt_cache's gateway might be different from its 'parent'
2475 * in the case of an ip redirect.
2476 * So we keep searching in the exception table if the gateway
2477 * is different.
2478 */
2479 if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2480 struct rt6_info *rt_cache;
2481
9b6b35ab 2482 rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
0b34eb00
DA
2483 if (rt_cache &&
2484 ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2485 *ret = rt_cache;
2486 return true;
2487 }
2488 return false;
2489 }
2490 return true;
2491}
2492
b55b76b2
DJ
2493/* Handle redirects */
2494struct ip6rd_flowi {
2495 struct flowi6 fl6;
2496 struct in6_addr gateway;
2497};
2498
2499static struct rt6_info *__ip6_route_redirect(struct net *net,
2500 struct fib6_table *table,
2501 struct flowi6 *fl6,
b75cc8f9 2502 const struct sk_buff *skb,
b55b76b2
DJ
2503 int flags)
2504{
2505 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
0b34eb00 2506 struct rt6_info *ret = NULL;
9b6b35ab 2507 struct fib6_result res = {};
8d1c802b 2508 struct fib6_info *rt;
b55b76b2
DJ
2509 struct fib6_node *fn;
2510
31680ac2
DA
2511 /* l3mdev_update_flow overrides oif if the device is enslaved; in
2512 * this case we must match on the real ingress device, so reset it
2513 */
2514 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2515 fl6->flowi6_oif = skb->dev->ifindex;
2516
b55b76b2 2517 /* Get the "current" route for this destination and
67c408cf 2518 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2519 *
2520 * RFC 4861 specifies that redirects should only be
2521 * accepted if they come from the nexthop to the target.
2522 * Due to the way the routes are chosen, this notion
2523 * is a bit fuzzy and one might need to check all possible
2524 * routes.
2525 */
2526
66f5d6ce 2527 rcu_read_lock();
6454743b 2528 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2529restart:
66f5d6ce 2530 for_each_fib6_node_rt_rcu(fn) {
9b6b35ab
DA
2531 res.f6i = rt;
2532 res.nh = &rt->fib6_nh;
2533
14895687 2534 if (fib6_check_expired(rt))
b55b76b2 2535 continue;
93c2fb25 2536 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2537 break;
9b6b35ab 2538 if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
0b34eb00 2539 goto out;
b55b76b2
DJ
2540 }
2541
2542 if (!rt)
421842ed 2543 rt = net->ipv6.fib6_null_entry;
93c2fb25 2544 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2545 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2546 goto out;
2547 }
2548
421842ed 2549 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2550 fn = fib6_backtrack(fn, &fl6->saddr);
2551 if (fn)
2552 goto restart;
b55b76b2 2553 }
a3c00e46 2554
9b6b35ab
DA
2555 res.f6i = rt;
2556 res.nh = &rt->fib6_nh;
b0a1ba59 2557out:
7d21fec9 2558 if (ret) {
10585b43 2559 ip6_hold_safe(net, &ret);
7d21fec9
DA
2560 } else {
2561 res.fib6_flags = res.f6i->fib6_flags;
2562 res.fib6_type = res.f6i->fib6_type;
9b6b35ab 2563 ret = ip6_create_rt_rcu(&res);
7d21fec9 2564 }
b55b76b2 2565
66f5d6ce 2566 rcu_read_unlock();
b55b76b2 2567
8ff2e5b2 2568 trace_fib6_table_lookup(net, &res, table, fl6);
23fb93a4 2569 return ret;
b55b76b2
DJ
2570};
2571
2572static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2573 const struct flowi6 *fl6,
2574 const struct sk_buff *skb,
2575 const struct in6_addr *gateway)
b55b76b2
DJ
2576{
2577 int flags = RT6_LOOKUP_F_HAS_SADDR;
2578 struct ip6rd_flowi rdfl;
2579
2580 rdfl.fl6 = *fl6;
2581 rdfl.gateway = *gateway;
2582
b75cc8f9 2583 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2584 flags, __ip6_route_redirect);
2585}
2586
e2d118a1
LC
2587void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2588 kuid_t uid)
3a5ad2ee
DM
2589{
2590 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2591 struct dst_entry *dst;
1f7f10ac
2592 struct flowi6 fl6 = {
2593 .flowi6_iif = LOOPBACK_IFINDEX,
2594 .flowi6_oif = oif,
2595 .flowi6_mark = mark,
2596 .daddr = iph->daddr,
2597 .saddr = iph->saddr,
2598 .flowlabel = ip6_flowinfo(iph),
2599 .flowi6_uid = uid,
2600 };
3a5ad2ee 2601
b75cc8f9 2602 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2603 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2604 dst_release(dst);
2605}
2606EXPORT_SYMBOL_GPL(ip6_redirect);
2607
d456336d 2608void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2609{
2610 const struct ipv6hdr *iph = ipv6_hdr(skb);
2611 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2612 struct dst_entry *dst;
0b26fb17
2613 struct flowi6 fl6 = {
2614 .flowi6_iif = LOOPBACK_IFINDEX,
2615 .flowi6_oif = oif,
0b26fb17
2616 .daddr = msg->dest,
2617 .saddr = iph->daddr,
2618 .flowi6_uid = sock_net_uid(net, NULL),
2619 };
c92a59ec 2620
b75cc8f9 2621 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2622 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2623 dst_release(dst);
2624}
2625
3a5ad2ee
DM
2626void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2627{
e2d118a1
LC
2628 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2629 sk->sk_uid);
3a5ad2ee
DM
2630}
2631EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2632
0dbaee3b 2633static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2634{
0dbaee3b
DM
2635 struct net_device *dev = dst->dev;
2636 unsigned int mtu = dst_mtu(dst);
2637 struct net *net = dev_net(dev);
2638
1da177e4
LT
2639 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2640
5578689a
DL
2641 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2642 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2643
2644 /*
1ab1457c
YH
2645 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2646 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2647 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2648 * rely only on pmtu discovery"
2649 */
2650 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2651 mtu = IPV6_MAXPLEN;
2652 return mtu;
2653}
2654
ebb762f2 2655static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2656{
d33e4553 2657 struct inet6_dev *idev;
d4ead6b3 2658 unsigned int mtu;
4b32b5ad
MKL
2659
2660 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2661 if (mtu)
30f78d8e 2662 goto out;
618f9bc7
SK
2663
2664 mtu = IPV6_MIN_MTU;
d33e4553
DM
2665
2666 rcu_read_lock();
2667 idev = __in6_dev_get(dst->dev);
2668 if (idev)
2669 mtu = idev->cnf.mtu6;
2670 rcu_read_unlock();
2671
30f78d8e 2672out:
14972cbd
RP
2673 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2674
2675 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2676}
2677
901731b8
DA
2678/* MTU selection:
2679 * 1. mtu on route is locked - use it
2680 * 2. mtu from nexthop exception
2681 * 3. mtu from egress device
2682 *
2683 * based on ip6_dst_mtu_forward and exception logic of
2684 * rt6_find_cached_rt; called with rcu_read_lock
2685 */
b748f260
DA
2686u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2687 const struct in6_addr *daddr,
2688 const struct in6_addr *saddr)
901731b8 2689{
b748f260
DA
2690 const struct fib6_nh *nh = res->nh;
2691 struct fib6_info *f6i = res->f6i;
901731b8 2692 struct inet6_dev *idev;
510e2ced 2693 struct rt6_info *rt;
901731b8
DA
2694 u32 mtu = 0;
2695
2696 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2697 mtu = f6i->fib6_pmtu;
2698 if (mtu)
2699 goto out;
2700 }
2701
510e2ced
WW
2702 rt = rt6_find_cached_rt(res, daddr, saddr);
2703 if (unlikely(rt)) {
2704 mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
2705 } else {
b748f260 2706 struct net_device *dev = nh->fib_nh_dev;
901731b8
DA
2707
2708 mtu = IPV6_MIN_MTU;
2709 idev = __in6_dev_get(dev);
2710 if (idev && idev->cnf.mtu6 > mtu)
2711 mtu = idev->cnf.mtu6;
2712 }
2713
2714 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2715out:
b748f260 2716 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
901731b8
DA
2717}
2718
3b00944c 2719struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2720 struct flowi6 *fl6)
1da177e4 2721{
87a11578 2722 struct dst_entry *dst;
1da177e4
LT
2723 struct rt6_info *rt;
2724 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2725 struct net *net = dev_net(dev);
1da177e4 2726
38308473 2727 if (unlikely(!idev))
122bdf67 2728 return ERR_PTR(-ENODEV);
1da177e4 2729
ad706862 2730 rt = ip6_dst_alloc(net, dev, 0);
38308473 2731 if (unlikely(!rt)) {
1da177e4 2732 in6_dev_put(idev);
87a11578 2733 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2734 goto out;
2735 }
2736
8e2ec639 2737 rt->dst.flags |= DST_HOST;
588753f1 2738 rt->dst.input = ip6_input;
8e2ec639 2739 rt->dst.output = ip6_output;
550bab42 2740 rt->rt6i_gateway = fl6->daddr;
87a11578 2741 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2742 rt->rt6i_dst.plen = 128;
2743 rt->rt6i_idev = idev;
14edd87d 2744 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2745
4c981e28 2746 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2747 * do proper release of the net_device
2748 */
2749 rt6_uncached_list_add(rt);
81eb8447 2750 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2751
87a11578
DM
2752 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2753
1da177e4 2754out:
87a11578 2755 return dst;
1da177e4
LT
2756}
2757
569d3645 2758static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2759{
86393e52 2760 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2761 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2762 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2763 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2764 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2765 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2766 int entries;
7019b78e 2767
fc66f95c 2768 entries = dst_entries_get_fast(ops);
49a18d86 2769 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2770 entries <= rt_max_size)
1da177e4
LT
2771 goto out;
2772
6891a346 2773 net->ipv6.ip6_rt_gc_expire++;
14956643 2774 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2775 entries = dst_entries_get_slow(ops);
2776 if (entries < ops->gc_thresh)
7019b78e 2777 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2778out:
7019b78e 2779 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2780 return entries > rt_max_size;
1da177e4
LT
2781}
2782
8c14586f
DA
2783static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2784 struct fib6_config *cfg,
f4797b33
DA
2785 const struct in6_addr *gw_addr,
2786 u32 tbid, int flags)
8c14586f
DA
2787{
2788 struct flowi6 fl6 = {
2789 .flowi6_oif = cfg->fc_ifindex,
2790 .daddr = *gw_addr,
2791 .saddr = cfg->fc_prefsrc,
2792 };
2793 struct fib6_table *table;
2794 struct rt6_info *rt;
8c14586f 2795
f4797b33 2796 table = fib6_get_table(net, tbid);
8c14586f
DA
2797 if (!table)
2798 return NULL;
2799
2800 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2801 flags |= RT6_LOOKUP_F_HAS_SADDR;
2802
f4797b33 2803 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2804 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2805
2806 /* if table lookup failed, fall back to full lookup */
2807 if (rt == net->ipv6.ip6_null_entry) {
2808 ip6_rt_put(rt);
2809 rt = NULL;
2810 }
2811
2812 return rt;
2813}
2814
fc1e64e1
DA
2815static int ip6_route_check_nh_onlink(struct net *net,
2816 struct fib6_config *cfg,
9fbb704c 2817 const struct net_device *dev,
fc1e64e1
DA
2818 struct netlink_ext_ack *extack)
2819{
44750f84 2820 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2821 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2822 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2823 struct fib6_info *from;
fc1e64e1
DA
2824 struct rt6_info *grt;
2825 int err;
2826
2827 err = 0;
2828 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2829 if (grt) {
bf1dc8ba
PA
2830 rcu_read_lock();
2831 from = rcu_dereference(grt->from);
58e354c0 2832 if (!grt->dst.error &&
4ed591c8 2833 /* ignore match if it is the default route */
bf1dc8ba 2834 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2835 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2836 NL_SET_ERR_MSG(extack,
2837 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2838 err = -EINVAL;
2839 }
bf1dc8ba 2840 rcu_read_unlock();
fc1e64e1
DA
2841
2842 ip6_rt_put(grt);
2843 }
2844
2845 return err;
2846}
2847
1edce99f
DA
2848static int ip6_route_check_nh(struct net *net,
2849 struct fib6_config *cfg,
2850 struct net_device **_dev,
2851 struct inet6_dev **idev)
2852{
2853 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2854 struct net_device *dev = _dev ? *_dev : NULL;
2855 struct rt6_info *grt = NULL;
2856 int err = -EHOSTUNREACH;
2857
2858 if (cfg->fc_table) {
f4797b33
DA
2859 int flags = RT6_LOOKUP_F_IFACE;
2860
2861 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2862 cfg->fc_table, flags);
1edce99f
DA
2863 if (grt) {
2864 if (grt->rt6i_flags & RTF_GATEWAY ||
2865 (dev && dev != grt->dst.dev)) {
2866 ip6_rt_put(grt);
2867 grt = NULL;
2868 }
2869 }
2870 }
2871
2872 if (!grt)
b75cc8f9 2873 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2874
2875 if (!grt)
2876 goto out;
2877
2878 if (dev) {
2879 if (dev != grt->dst.dev) {
2880 ip6_rt_put(grt);
2881 goto out;
2882 }
2883 } else {
2884 *_dev = dev = grt->dst.dev;
2885 *idev = grt->rt6i_idev;
2886 dev_hold(dev);
2887 in6_dev_hold(grt->rt6i_idev);
2888 }
2889
2890 if (!(grt->rt6i_flags & RTF_GATEWAY))
2891 err = 0;
2892
2893 ip6_rt_put(grt);
2894
2895out:
2896 return err;
2897}
2898
9fbb704c
DA
2899static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2900 struct net_device **_dev, struct inet6_dev **idev,
2901 struct netlink_ext_ack *extack)
2902{
2903 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2904 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2905 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2906 const struct net_device *dev = *_dev;
232378e8 2907 bool need_addr_check = !dev;
9fbb704c
DA
2908 int err = -EINVAL;
2909
2910 /* if gw_addr is local we will fail to detect this in case
2911 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2912 * will return already-added prefix route via interface that
2913 * prefix route was assigned to, which might be non-loopback.
2914 */
232378e8
DA
2915 if (dev &&
2916 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2917 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2918 goto out;
2919 }
2920
2921 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2922 /* IPv6 strictly inhibits using not link-local
2923 * addresses as nexthop address.
2924 * Otherwise, router will not able to send redirects.
2925 * It is very good, but in some (rare!) circumstances
2926 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2927 * some exceptions. --ANK
2928 * We allow IPv4-mapped nexthops to support RFC4798-type
2929 * addressing
2930 */
2931 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2932 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2933 goto out;
2934 }
2935
2936 if (cfg->fc_flags & RTNH_F_ONLINK)
2937 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2938 else
2939 err = ip6_route_check_nh(net, cfg, _dev, idev);
2940
2941 if (err)
2942 goto out;
2943 }
2944
2945 /* reload in case device was changed */
2946 dev = *_dev;
2947
2948 err = -EINVAL;
2949 if (!dev) {
2950 NL_SET_ERR_MSG(extack, "Egress device not specified");
2951 goto out;
2952 } else if (dev->flags & IFF_LOOPBACK) {
2953 NL_SET_ERR_MSG(extack,
2954 "Egress device can not be loopback device for this route");
2955 goto out;
2956 }
232378e8
DA
2957
2958 /* if we did not check gw_addr above, do so now that the
2959 * egress device has been resolved.
2960 */
2961 if (need_addr_check &&
2962 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2963 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2964 goto out;
2965 }
2966
9fbb704c
DA
2967 err = 0;
2968out:
2969 return err;
2970}
2971
83c44251
DA
2972static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2973{
2974 if ((flags & RTF_REJECT) ||
2975 (dev && (dev->flags & IFF_LOOPBACK) &&
2976 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2977 !(flags & RTF_LOCAL)))
2978 return true;
2979
2980 return false;
2981}
2982
2983int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2984 struct fib6_config *cfg, gfp_t gfp_flags,
2985 struct netlink_ext_ack *extack)
2986{
2987 struct net_device *dev = NULL;
2988 struct inet6_dev *idev = NULL;
2989 int addr_type;
2990 int err;
2991
f1741730
DA
2992 fib6_nh->fib_nh_family = AF_INET6;
2993
83c44251
DA
2994 err = -ENODEV;
2995 if (cfg->fc_ifindex) {
2996 dev = dev_get_by_index(net, cfg->fc_ifindex);
2997 if (!dev)
2998 goto out;
2999 idev = in6_dev_get(dev);
3000 if (!idev)
3001 goto out;
3002 }
3003
3004 if (cfg->fc_flags & RTNH_F_ONLINK) {
3005 if (!dev) {
3006 NL_SET_ERR_MSG(extack,
3007 "Nexthop device required for onlink");
3008 goto out;
3009 }
3010
3011 if (!(dev->flags & IFF_UP)) {
3012 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3013 err = -ENETDOWN;
3014 goto out;
3015 }
3016
ad1601ae 3017 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
3018 }
3019
ad1601ae 3020 fib6_nh->fib_nh_weight = 1;
83c44251
DA
3021
3022 /* We cannot add true routes via loopback here,
3023 * they would result in kernel looping; promote them to reject routes
3024 */
3025 addr_type = ipv6_addr_type(&cfg->fc_dst);
3026 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
3027 /* hold loopback dev/idev if we haven't done so. */
3028 if (dev != net->loopback_dev) {
3029 if (dev) {
3030 dev_put(dev);
3031 in6_dev_put(idev);
3032 }
3033 dev = net->loopback_dev;
3034 dev_hold(dev);
3035 idev = in6_dev_get(dev);
3036 if (!idev) {
3037 err = -ENODEV;
3038 goto out;
3039 }
3040 }
3041 goto set_dev;
3042 }
3043
3044 if (cfg->fc_flags & RTF_GATEWAY) {
3045 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3046 if (err)
3047 goto out;
3048
ad1601ae 3049 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
bdf00467 3050 fib6_nh->fib_nh_gw_family = AF_INET6;
83c44251
DA
3051 }
3052
3053 err = -ENODEV;
3054 if (!dev)
3055 goto out;
3056
3057 if (idev->cnf.disable_ipv6) {
3058 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3059 err = -EACCES;
3060 goto out;
3061 }
3062
3063 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3064 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3065 err = -ENETDOWN;
3066 goto out;
3067 }
3068
3069 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3070 !netif_carrier_ok(dev))
ad1601ae 3071 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 3072
979e276e
DA
3073 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3074 cfg->fc_encap_type, cfg, gfp_flags, extack);
3075 if (err)
3076 goto out;
83c44251 3077set_dev:
ad1601ae 3078 fib6_nh->fib_nh_dev = dev;
f1741730 3079 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
3080 err = 0;
3081out:
3082 if (idev)
3083 in6_dev_put(idev);
3084
3085 if (err) {
ad1601ae
DA
3086 lwtstate_put(fib6_nh->fib_nh_lws);
3087 fib6_nh->fib_nh_lws = NULL;
83c44251
DA
3088 if (dev)
3089 dev_put(dev);
3090 }
3091
3092 return err;
3093}
3094
dac7d0f2
DA
3095void fib6_nh_release(struct fib6_nh *fib6_nh)
3096{
979e276e 3097 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3098}
3099
8d1c802b 3100static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3101 gfp_t gfp_flags,
333c4301 3102 struct netlink_ext_ack *extack)
1da177e4 3103{
5578689a 3104 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3105 struct fib6_info *rt = NULL;
c71099ac 3106 struct fib6_table *table;
8c5b83f0 3107 int err = -EINVAL;
83c44251 3108 int addr_type;
1da177e4 3109
557c44be 3110 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3111 if (cfg->fc_flags & RTF_PCPU) {
3112 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3113 goto out;
d5d531cb 3114 }
557c44be 3115
2ea2352e
WW
3116 /* RTF_CACHE is an internal flag; can not be set by userspace */
3117 if (cfg->fc_flags & RTF_CACHE) {
3118 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3119 goto out;
3120 }
3121
e8478e80
DA
3122 if (cfg->fc_type > RTN_MAX) {
3123 NL_SET_ERR_MSG(extack, "Invalid route type");
3124 goto out;
3125 }
3126
d5d531cb
DA
3127 if (cfg->fc_dst_len > 128) {
3128 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3129 goto out;
3130 }
3131 if (cfg->fc_src_len > 128) {
3132 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3133 goto out;
d5d531cb 3134 }
1da177e4 3135#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3136 if (cfg->fc_src_len) {
3137 NL_SET_ERR_MSG(extack,
3138 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3139 goto out;
d5d531cb 3140 }
1da177e4 3141#endif
fc1e64e1 3142
d71314b4 3143 err = -ENOBUFS;
38308473
DM
3144 if (cfg->fc_nlinfo.nlh &&
3145 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3146 table = fib6_get_table(net, cfg->fc_table);
38308473 3147 if (!table) {
f3213831 3148 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3149 table = fib6_new_table(net, cfg->fc_table);
3150 }
3151 } else {
3152 table = fib6_new_table(net, cfg->fc_table);
3153 }
38308473
DM
3154
3155 if (!table)
c71099ac 3156 goto out;
c71099ac 3157
93531c67
DA
3158 err = -ENOMEM;
3159 rt = fib6_info_alloc(gfp_flags);
3160 if (!rt)
1da177e4 3161 goto out;
93531c67 3162
d7e774f3
DA
3163 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3164 extack);
767a2217
DA
3165 if (IS_ERR(rt->fib6_metrics)) {
3166 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3167 /* Do not leave garbage there. */
3168 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3169 goto out;
3170 }
3171
93531c67
DA
3172 if (cfg->fc_flags & RTF_ADDRCONF)
3173 rt->dst_nocount = true;
1da177e4 3174
1716a961 3175 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3176 fib6_set_expires(rt, jiffies +
1716a961
G
3177 clock_t_to_jiffies(cfg->fc_expires));
3178 else
14895687 3179 fib6_clean_expires(rt);
1da177e4 3180
86872cb5
TG
3181 if (cfg->fc_protocol == RTPROT_UNSPEC)
3182 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3183 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3184
83c44251
DA
3185 rt->fib6_table = table;
3186 rt->fib6_metric = cfg->fc_metric;
3187 rt->fib6_type = cfg->fc_type;
2b2450ca 3188 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3189
93c2fb25
DA
3190 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3191 rt->fib6_dst.plen = cfg->fc_dst_len;
3192 if (rt->fib6_dst.plen == 128)
3b6761d1 3193 rt->dst_host = true;
e5fd387a 3194
1da177e4 3195#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3196 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3197 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3198#endif
83c44251
DA
3199 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3200 if (err)
3201 goto out;
1da177e4
LT
3202
3203 /* We cannot add true routes via loopback here,
83c44251 3204 * they would result in kernel looping; promote them to reject routes
1da177e4 3205 */
83c44251 3206 addr_type = ipv6_addr_type(&cfg->fc_dst);
ad1601ae 3207 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
83c44251 3208 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
955ec4cb 3209
c3968a85 3210 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
83c44251
DA
3211 struct net_device *dev = fib6_info_nh_dev(rt);
3212
c3968a85 3213 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3214 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3215 err = -EINVAL;
3216 goto out;
3217 }
93c2fb25
DA
3218 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3219 rt->fib6_prefsrc.plen = 128;
c3968a85 3220 } else
93c2fb25 3221 rt->fib6_prefsrc.plen = 0;
c3968a85 3222
8c5b83f0 3223 return rt;
6b9ea5a6 3224out:
93531c67 3225 fib6_info_release(rt);
8c5b83f0 3226 return ERR_PTR(err);
6b9ea5a6
RP
3227}
3228
acb54e3c 3229int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3230 struct netlink_ext_ack *extack)
6b9ea5a6 3231{
8d1c802b 3232 struct fib6_info *rt;
6b9ea5a6
RP
3233 int err;
3234
acb54e3c 3235 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3236 if (IS_ERR(rt))
3237 return PTR_ERR(rt);
6b9ea5a6 3238
d4ead6b3 3239 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3240 fib6_info_release(rt);
6b9ea5a6 3241
1da177e4
LT
3242 return err;
3243}
3244
8d1c802b 3245static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3246{
afb1d4b5 3247 struct net *net = info->nl_net;
c71099ac 3248 struct fib6_table *table;
afb1d4b5 3249 int err;
1da177e4 3250
421842ed 3251 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3252 err = -ENOENT;
3253 goto out;
3254 }
6c813a72 3255
93c2fb25 3256 table = rt->fib6_table;
66f5d6ce 3257 spin_lock_bh(&table->tb6_lock);
86872cb5 3258 err = fib6_del(rt, info);
66f5d6ce 3259 spin_unlock_bh(&table->tb6_lock);
1da177e4 3260
6825a26c 3261out:
93531c67 3262 fib6_info_release(rt);
1da177e4
LT
3263 return err;
3264}
3265
8d1c802b 3266int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3267{
afb1d4b5
DA
3268 struct nl_info info = { .nl_net = net };
3269
528c4ceb 3270 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3271}
3272
8d1c802b 3273static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3274{
3275 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3276 struct net *net = info->nl_net;
16a16cd3 3277 struct sk_buff *skb = NULL;
0ae81335 3278 struct fib6_table *table;
e3330039 3279 int err = -ENOENT;
0ae81335 3280
421842ed 3281 if (rt == net->ipv6.fib6_null_entry)
e3330039 3282 goto out_put;
93c2fb25 3283 table = rt->fib6_table;
66f5d6ce 3284 spin_lock_bh(&table->tb6_lock);
0ae81335 3285
93c2fb25 3286 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3287 struct fib6_info *sibling, *next_sibling;
0ae81335 3288
16a16cd3
DA
3289 /* prefer to send a single notification with all hops */
3290 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3291 if (skb) {
3292 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3293
d4ead6b3 3294 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3295 NULL, NULL, 0, RTM_DELROUTE,
3296 info->portid, seq, 0) < 0) {
3297 kfree_skb(skb);
3298 skb = NULL;
3299 } else
3300 info->skip_notify = 1;
3301 }
3302
0ae81335 3303 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3304 &rt->fib6_siblings,
3305 fib6_siblings) {
0ae81335
DA
3306 err = fib6_del(sibling, info);
3307 if (err)
e3330039 3308 goto out_unlock;
0ae81335
DA
3309 }
3310 }
3311
3312 err = fib6_del(rt, info);
e3330039 3313out_unlock:
66f5d6ce 3314 spin_unlock_bh(&table->tb6_lock);
e3330039 3315out_put:
93531c67 3316 fib6_info_release(rt);
16a16cd3
DA
3317
3318 if (skb) {
e3330039 3319 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3320 info->nlh, gfp_any());
3321 }
0ae81335
DA
3322 return err;
3323}
3324
23fb93a4
DA
3325static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3326{
3327 int rc = -ESRCH;
3328
3329 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3330 goto out;
3331
3332 if (cfg->fc_flags & RTF_GATEWAY &&
3333 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3334 goto out;
761f6026
XL
3335
3336 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3337out:
3338 return rc;
3339}
3340
333c4301
DA
3341static int ip6_route_del(struct fib6_config *cfg,
3342 struct netlink_ext_ack *extack)
1da177e4 3343{
8d1c802b 3344 struct rt6_info *rt_cache;
c71099ac 3345 struct fib6_table *table;
8d1c802b 3346 struct fib6_info *rt;
1da177e4 3347 struct fib6_node *fn;
1da177e4
LT
3348 int err = -ESRCH;
3349
5578689a 3350 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3351 if (!table) {
3352 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3353 return err;
d5d531cb 3354 }
c71099ac 3355
66f5d6ce 3356 rcu_read_lock();
1da177e4 3357
c71099ac 3358 fn = fib6_locate(&table->tb6_root,
86872cb5 3359 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3360 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3361 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3362
1da177e4 3363 if (fn) {
66f5d6ce 3364 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
3365 struct fib6_nh *nh;
3366
2b760fcf 3367 if (cfg->fc_flags & RTF_CACHE) {
7e4b5128
DA
3368 struct fib6_result res = {
3369 .f6i = rt,
3370 };
23fb93a4
DA
3371 int rc;
3372
7e4b5128
DA
3373 rt_cache = rt6_find_cached_rt(&res,
3374 &cfg->fc_dst,
2b760fcf 3375 &cfg->fc_src);
23fb93a4
DA
3376 if (rt_cache) {
3377 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3378 if (rc != -ESRCH) {
3379 rcu_read_unlock();
23fb93a4 3380 return rc;
9e575010 3381 }
23fb93a4
DA
3382 }
3383 continue;
2b760fcf 3384 }
ad1601ae
DA
3385
3386 nh = &rt->fib6_nh;
86872cb5 3387 if (cfg->fc_ifindex &&
ad1601ae
DA
3388 (!nh->fib_nh_dev ||
3389 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3390 continue;
86872cb5 3391 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 3392 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 3393 continue;
93c2fb25 3394 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3395 continue;
93c2fb25 3396 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3397 continue;
e873e4b9
WW
3398 if (!fib6_info_hold_safe(rt))
3399 continue;
66f5d6ce 3400 rcu_read_unlock();
1da177e4 3401
0ae81335
DA
3402 /* if gateway was specified only delete the one hop */
3403 if (cfg->fc_flags & RTF_GATEWAY)
3404 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3405
3406 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3407 }
3408 }
66f5d6ce 3409 rcu_read_unlock();
1da177e4
LT
3410
3411 return err;
3412}
3413
6700c270 3414static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3415{
a6279458 3416 struct netevent_redirect netevent;
e8599ff4 3417 struct rt6_info *rt, *nrt = NULL;
85bd05de 3418 struct fib6_result res = {};
e8599ff4
DM
3419 struct ndisc_options ndopts;
3420 struct inet6_dev *in6_dev;
3421 struct neighbour *neigh;
71bcdba0 3422 struct rd_msg *msg;
6e157b6a
DM
3423 int optlen, on_link;
3424 u8 *lladdr;
e8599ff4 3425
29a3cad5 3426 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3427 optlen -= sizeof(*msg);
e8599ff4
DM
3428
3429 if (optlen < 0) {
6e157b6a 3430 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3431 return;
3432 }
3433
71bcdba0 3434 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3435
71bcdba0 3436 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3437 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3438 return;
3439 }
3440
6e157b6a 3441 on_link = 0;
71bcdba0 3442 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3443 on_link = 1;
71bcdba0 3444 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3445 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3446 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3447 return;
3448 }
3449
3450 in6_dev = __in6_dev_get(skb->dev);
3451 if (!in6_dev)
3452 return;
3453 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3454 return;
3455
3456 /* RFC2461 8.1:
3457 * The IP source address of the Redirect MUST be the same as the current
3458 * first-hop router for the specified ICMP Destination Address.
3459 */
3460
f997c55c 3461 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3462 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3463 return;
3464 }
6e157b6a
DM
3465
3466 lladdr = NULL;
e8599ff4
DM
3467 if (ndopts.nd_opts_tgt_lladdr) {
3468 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3469 skb->dev);
3470 if (!lladdr) {
3471 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3472 return;
3473 }
3474 }
3475
6e157b6a 3476 rt = (struct rt6_info *) dst;
ec13ad1d 3477 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3478 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3479 return;
6e157b6a 3480 }
e8599ff4 3481
6e157b6a
DM
3482 /* Redirect received -> path was valid.
3483 * Look, redirects are sent only in response to data packets,
3484 * so that this nexthop apparently is reachable. --ANK
3485 */
0dec879f 3486 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3487
71bcdba0 3488 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3489 if (!neigh)
3490 return;
a6279458 3491
1da177e4
LT
3492 /*
3493 * We have finally decided to accept it.
3494 */
3495
f997c55c 3496 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3497 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3498 NEIGH_UPDATE_F_OVERRIDE|
3499 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3500 NEIGH_UPDATE_F_ISROUTER)),
3501 NDISC_REDIRECT, &ndopts);
1da177e4 3502
4d85cd0c 3503 rcu_read_lock();
85bd05de 3504 res.f6i = rcu_dereference(rt->from);
ff24e498 3505 if (!res.f6i)
886b7a50 3506 goto out;
8a14e46f 3507
85bd05de 3508 res.nh = &res.f6i->fib6_nh;
7d21fec9
DA
3509 res.fib6_flags = res.f6i->fib6_flags;
3510 res.fib6_type = res.f6i->fib6_type;
85bd05de 3511 nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
38308473 3512 if (!nrt)
1da177e4
LT
3513 goto out;
3514
3515 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3516 if (on_link)
3517 nrt->rt6i_flags &= ~RTF_GATEWAY;
3518
4e3fd7a0 3519 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3520
886b7a50 3521 /* rt6_insert_exception() will take care of duplicated exceptions */
5012f0a5 3522 if (rt6_insert_exception(nrt, &res)) {
2b760fcf
WW
3523 dst_release_immediate(&nrt->dst);
3524 goto out;
3525 }
1da177e4 3526
d8d1f30b
CG
3527 netevent.old = &rt->dst;
3528 netevent.new = &nrt->dst;
71bcdba0 3529 netevent.daddr = &msg->dest;
60592833 3530 netevent.neigh = neigh;
8d71740c
TT
3531 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3532
1da177e4 3533out:
886b7a50 3534 rcu_read_unlock();
e8599ff4 3535 neigh_release(neigh);
6e157b6a
DM
3536}
3537
70ceb4f5 3538#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3539static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3540 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3541 const struct in6_addr *gwaddr,
3542 struct net_device *dev)
70ceb4f5 3543{
830218c1
DA
3544 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3545 int ifindex = dev->ifindex;
70ceb4f5 3546 struct fib6_node *fn;
8d1c802b 3547 struct fib6_info *rt = NULL;
c71099ac
TG
3548 struct fib6_table *table;
3549
830218c1 3550 table = fib6_get_table(net, tb_id);
38308473 3551 if (!table)
c71099ac 3552 return NULL;
70ceb4f5 3553
66f5d6ce 3554 rcu_read_lock();
38fbeeee 3555 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3556 if (!fn)
3557 goto out;
3558
66f5d6ce 3559 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 3560 if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
70ceb4f5 3561 continue;
2b2450ca 3562 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
bdf00467 3563 !rt->fib6_nh.fib_nh_gw_family)
70ceb4f5 3564 continue;
ad1601ae 3565 if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
70ceb4f5 3566 continue;
e873e4b9
WW
3567 if (!fib6_info_hold_safe(rt))
3568 continue;
70ceb4f5
YH
3569 break;
3570 }
3571out:
66f5d6ce 3572 rcu_read_unlock();
70ceb4f5
YH
3573 return rt;
3574}
3575
8d1c802b 3576static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3577 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3578 const struct in6_addr *gwaddr,
3579 struct net_device *dev,
95c96174 3580 unsigned int pref)
70ceb4f5 3581{
86872cb5 3582 struct fib6_config cfg = {
238fc7ea 3583 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3584 .fc_ifindex = dev->ifindex,
86872cb5
TG
3585 .fc_dst_len = prefixlen,
3586 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3587 RTF_UP | RTF_PREF(pref),
b91d5329 3588 .fc_protocol = RTPROT_RA,
e8478e80 3589 .fc_type = RTN_UNICAST,
15e47304 3590 .fc_nlinfo.portid = 0,
efa2cea0
DL
3591 .fc_nlinfo.nlh = NULL,
3592 .fc_nlinfo.nl_net = net,
86872cb5
TG
3593 };
3594
830218c1 3595 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3596 cfg.fc_dst = *prefix;
3597 cfg.fc_gateway = *gwaddr;
70ceb4f5 3598
e317da96
YH
3599 /* We should treat it as a default route if prefix length is 0. */
3600 if (!prefixlen)
86872cb5 3601 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3602
acb54e3c 3603 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3604
830218c1 3605 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3606}
3607#endif
3608
8d1c802b 3609struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3610 const struct in6_addr *addr,
3611 struct net_device *dev)
1ab1457c 3612{
830218c1 3613 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3614 struct fib6_info *rt;
c71099ac 3615 struct fib6_table *table;
1da177e4 3616
afb1d4b5 3617 table = fib6_get_table(net, tb_id);
38308473 3618 if (!table)
c71099ac 3619 return NULL;
1da177e4 3620
66f5d6ce
WW
3621 rcu_read_lock();
3622 for_each_fib6_node_rt_rcu(&table->tb6_root) {
ad1601ae
DA
3623 struct fib6_nh *nh = &rt->fib6_nh;
3624
3625 if (dev == nh->fib_nh_dev &&
93c2fb25 3626 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 3627 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
3628 break;
3629 }
e873e4b9
WW
3630 if (rt && !fib6_info_hold_safe(rt))
3631 rt = NULL;
66f5d6ce 3632 rcu_read_unlock();
1da177e4
LT
3633 return rt;
3634}
3635
8d1c802b 3636struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3637 const struct in6_addr *gwaddr,
ebacaaa0
YH
3638 struct net_device *dev,
3639 unsigned int pref)
1da177e4 3640{
86872cb5 3641 struct fib6_config cfg = {
ca254490 3642 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3643 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3644 .fc_ifindex = dev->ifindex,
3645 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3646 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3647 .fc_protocol = RTPROT_RA,
e8478e80 3648 .fc_type = RTN_UNICAST,
15e47304 3649 .fc_nlinfo.portid = 0,
5578689a 3650 .fc_nlinfo.nlh = NULL,
afb1d4b5 3651 .fc_nlinfo.nl_net = net,
86872cb5 3652 };
1da177e4 3653
4e3fd7a0 3654 cfg.fc_gateway = *gwaddr;
1da177e4 3655
acb54e3c 3656 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3657 struct fib6_table *table;
3658
3659 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3660 if (table)
3661 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3662 }
1da177e4 3663
afb1d4b5 3664 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3665}
3666
afb1d4b5
DA
3667static void __rt6_purge_dflt_routers(struct net *net,
3668 struct fib6_table *table)
1da177e4 3669{
8d1c802b 3670 struct fib6_info *rt;
1da177e4
LT
3671
3672restart:
66f5d6ce
WW
3673 rcu_read_lock();
3674 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3675 struct net_device *dev = fib6_info_nh_dev(rt);
3676 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3677
93c2fb25 3678 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3679 (!idev || idev->cnf.accept_ra != 2) &&
3680 fib6_info_hold_safe(rt)) {
93531c67
DA
3681 rcu_read_unlock();
3682 ip6_del_rt(net, rt);
1da177e4
LT
3683 goto restart;
3684 }
3685 }
66f5d6ce 3686 rcu_read_unlock();
830218c1
DA
3687
3688 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3689}
3690
3691void rt6_purge_dflt_routers(struct net *net)
3692{
3693 struct fib6_table *table;
3694 struct hlist_head *head;
3695 unsigned int h;
3696
3697 rcu_read_lock();
3698
3699 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3700 head = &net->ipv6.fib_table_hash[h];
3701 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3702 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3703 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3704 }
3705 }
3706
3707 rcu_read_unlock();
1da177e4
LT
3708}
3709
5578689a
DL
3710static void rtmsg_to_fib6_config(struct net *net,
3711 struct in6_rtmsg *rtmsg,
86872cb5
TG
3712 struct fib6_config *cfg)
3713{
8823a3ac
3714 *cfg = (struct fib6_config){
3715 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3716 : RT6_TABLE_MAIN,
3717 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3718 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3719 .fc_expires = rtmsg->rtmsg_info,
3720 .fc_dst_len = rtmsg->rtmsg_dst_len,
3721 .fc_src_len = rtmsg->rtmsg_src_len,
3722 .fc_flags = rtmsg->rtmsg_flags,
3723 .fc_type = rtmsg->rtmsg_type,
3724
3725 .fc_nlinfo.nl_net = net,
3726
3727 .fc_dst = rtmsg->rtmsg_dst,
3728 .fc_src = rtmsg->rtmsg_src,
3729 .fc_gateway = rtmsg->rtmsg_gateway,
3730 };
86872cb5
TG
3731}
3732
5578689a 3733int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3734{
86872cb5 3735 struct fib6_config cfg;
1da177e4
LT
3736 struct in6_rtmsg rtmsg;
3737 int err;
3738
67ba4152 3739 switch (cmd) {
1da177e4
LT
3740 case SIOCADDRT: /* Add a route */
3741 case SIOCDELRT: /* Delete a route */
af31f412 3742 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3743 return -EPERM;
3744 err = copy_from_user(&rtmsg, arg,
3745 sizeof(struct in6_rtmsg));
3746 if (err)
3747 return -EFAULT;
86872cb5 3748
5578689a 3749 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3750
1da177e4
LT
3751 rtnl_lock();
3752 switch (cmd) {
3753 case SIOCADDRT:
acb54e3c 3754 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3755 break;
3756 case SIOCDELRT:
333c4301 3757 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3758 break;
3759 default:
3760 err = -EINVAL;
3761 }
3762 rtnl_unlock();
3763
3764 return err;
3ff50b79 3765 }
1da177e4
LT
3766
3767 return -EINVAL;
3768}
3769
3770/*
3771 * Drop the packet on the floor
3772 */
3773
d5fdd6ba 3774static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3775{
adf30907 3776 struct dst_entry *dst = skb_dst(skb);
1d3fd8a1
SS
3777 struct net *net = dev_net(dst->dev);
3778 struct inet6_dev *idev;
3779 int type;
3780
3781 if (netif_is_l3_master(skb->dev) &&
3782 dst->dev == net->loopback_dev)
3783 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
3784 else
3785 idev = ip6_dst_idev(dst);
3786
612f09e8
YH
3787 switch (ipstats_mib_noroutes) {
3788 case IPSTATS_MIB_INNOROUTES:
0660e03f 3789 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3790 if (type == IPV6_ADDR_ANY) {
1d3fd8a1 3791 IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3792 break;
3793 }
3794 /* FALLTHROUGH */
3795 case IPSTATS_MIB_OUTNOROUTES:
1d3fd8a1 3796 IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
612f09e8
YH
3797 break;
3798 }
1d3fd8a1
SS
3799
3800 /* Start over by dropping the dst for l3mdev case */
3801 if (netif_is_l3_master(skb->dev))
3802 skb_dst_drop(skb);
3803
3ffe533c 3804 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3805 kfree_skb(skb);
3806 return 0;
3807}
3808
9ce8ade0
TG
3809static int ip6_pkt_discard(struct sk_buff *skb)
3810{
612f09e8 3811 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3812}
3813
ede2059d 3814static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3815{
adf30907 3816 skb->dev = skb_dst(skb)->dev;
612f09e8 3817 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3818}
3819
9ce8ade0
TG
3820static int ip6_pkt_prohibit(struct sk_buff *skb)
3821{
612f09e8 3822 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3823}
3824
ede2059d 3825static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3826{
adf30907 3827 skb->dev = skb_dst(skb)->dev;
612f09e8 3828 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3829}
3830
1da177e4
LT
3831/*
3832 * Allocate a dst for local (unicast / anycast) address.
3833 */
3834
360a9887
DA
3835struct fib6_info *addrconf_f6i_alloc(struct net *net,
3836 struct inet6_dev *idev,
3837 const struct in6_addr *addr,
3838 bool anycast, gfp_t gfp_flags)
1da177e4 3839{
c7a1ce39
DA
3840 struct fib6_config cfg = {
3841 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3842 .fc_ifindex = idev->dev->ifindex,
3843 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3844 .fc_dst = *addr,
3845 .fc_dst_len = 128,
3846 .fc_protocol = RTPROT_KERNEL,
3847 .fc_nlinfo.nl_net = net,
3848 .fc_ignore_dev_down = true,
3849 };
1da177e4 3850
e8478e80 3851 if (anycast) {
c7a1ce39
DA
3852 cfg.fc_type = RTN_ANYCAST;
3853 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 3854 } else {
c7a1ce39
DA
3855 cfg.fc_type = RTN_LOCAL;
3856 cfg.fc_flags |= RTF_LOCAL;
e8478e80 3857 }
1da177e4 3858
c7a1ce39 3859 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
3860}
3861
c3968a85
DW
3862/* remove deleted ip from prefsrc entries */
3863struct arg_dev_net_ip {
3864 struct net_device *dev;
3865 struct net *net;
3866 struct in6_addr *addr;
3867};
3868
8d1c802b 3869static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3870{
3871 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3872 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3873 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3874
ad1601ae 3875 if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
421842ed 3876 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3877 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3878 spin_lock_bh(&rt6_exception_lock);
c3968a85 3879 /* remove prefsrc entry */
93c2fb25 3880 rt->fib6_prefsrc.plen = 0;
60006a48 3881 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3882 }
3883 return 0;
3884}
3885
3886void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3887{
3888 struct net *net = dev_net(ifp->idev->dev);
3889 struct arg_dev_net_ip adni = {
3890 .dev = ifp->idev->dev,
3891 .net = net,
3892 .addr = &ifp->addr,
3893 };
0c3584d5 3894 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3895}
3896
2b2450ca 3897#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
3898
3899/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3900static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3901{
3902 struct in6_addr *gateway = (struct in6_addr *)arg;
3903
93c2fb25 3904 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
bdf00467 3905 rt->fib6_nh.fib_nh_gw_family &&
ad1601ae 3906 ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
be7a010d
DJ
3907 return -1;
3908 }
b16cb459
WW
3909
3910 /* Further clean up cached routes in exception table.
3911 * This is needed because cached route may have a different
3912 * gateway than its 'parent' in the case of an ip redirect.
3913 */
3914 rt6_exceptions_clean_tohost(rt, gateway);
3915
be7a010d
DJ
3916 return 0;
3917}
3918
3919void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3920{
3921 fib6_clean_all(net, fib6_clean_tohost, gateway);
3922}
3923
2127d95a
IS
3924struct arg_netdev_event {
3925 const struct net_device *dev;
4c981e28 3926 union {
ecc5663c 3927 unsigned char nh_flags;
4c981e28
IS
3928 unsigned long event;
3929 };
2127d95a
IS
3930};
3931
8d1c802b 3932static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3933{
8d1c802b 3934 struct fib6_info *iter;
d7dedee1
IS
3935 struct fib6_node *fn;
3936
93c2fb25
DA
3937 fn = rcu_dereference_protected(rt->fib6_node,
3938 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3939 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3940 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3941 while (iter) {
93c2fb25 3942 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3943 rt6_qualify_for_ecmp(iter))
d7dedee1 3944 return iter;
8fb11a9a 3945 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3946 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3947 }
3948
3949 return NULL;
3950}
3951
8d1c802b 3952static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3953{
ad1601ae
DA
3954 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3955 (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3956 ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
d7dedee1
IS
3957 return true;
3958
3959 return false;
3960}
3961
8d1c802b 3962static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3963{
8d1c802b 3964 struct fib6_info *iter;
d7dedee1
IS
3965 int total = 0;
3966
3967 if (!rt6_is_dead(rt))
ad1601ae 3968 total += rt->fib6_nh.fib_nh_weight;
d7dedee1 3969
93c2fb25 3970 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3971 if (!rt6_is_dead(iter))
ad1601ae 3972 total += iter->fib6_nh.fib_nh_weight;
d7dedee1
IS
3973 }
3974
3975 return total;
3976}
3977
8d1c802b 3978static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3979{
3980 int upper_bound = -1;
3981
3982 if (!rt6_is_dead(rt)) {
ad1601ae 3983 *weight += rt->fib6_nh.fib_nh_weight;
d7dedee1
IS
3984 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3985 total) - 1;
3986 }
ad1601ae 3987 atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
d7dedee1
IS
3988}
3989
8d1c802b 3990static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3991{
8d1c802b 3992 struct fib6_info *iter;
d7dedee1
IS
3993 int weight = 0;
3994
3995 rt6_upper_bound_set(rt, &weight, total);
3996
93c2fb25 3997 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3998 rt6_upper_bound_set(iter, &weight, total);
3999}
4000
8d1c802b 4001void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 4002{
8d1c802b 4003 struct fib6_info *first;
d7dedee1
IS
4004 int total;
4005
4006 /* In case the entire multipath route was marked for flushing,
4007 * then there is no need to rebalance upon the removal of every
4008 * sibling route.
4009 */
93c2fb25 4010 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
4011 return;
4012
4013 /* During lookup routes are evaluated in order, so we need to
4014 * make sure upper bounds are assigned from the first sibling
4015 * onwards.
4016 */
4017 first = rt6_multipath_first_sibling(rt);
4018 if (WARN_ON_ONCE(!first))
4019 return;
4020
4021 total = rt6_multipath_total_weight(first);
4022 rt6_multipath_upper_bound_set(first, total);
4023}
4024
8d1c802b 4025static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
4026{
4027 const struct arg_netdev_event *arg = p_arg;
7aef6859 4028 struct net *net = dev_net(arg->dev);
2127d95a 4029
ad1601ae
DA
4030 if (rt != net->ipv6.fib6_null_entry &&
4031 rt->fib6_nh.fib_nh_dev == arg->dev) {
4032 rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
7aef6859 4033 fib6_update_sernum_upto_root(net, rt);
d7dedee1 4034 rt6_multipath_rebalance(rt);
1de178ed 4035 }
2127d95a
IS
4036
4037 return 0;
4038}
4039
ecc5663c 4040void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
2127d95a
IS
4041{
4042 struct arg_netdev_event arg = {
4043 .dev = dev,
6802f3ad
IS
4044 {
4045 .nh_flags = nh_flags,
4046 },
2127d95a
IS
4047 };
4048
4049 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4050 arg.nh_flags |= RTNH_F_LINKDOWN;
4051
4052 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4053}
4054
8d1c802b 4055static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
4056 const struct net_device *dev)
4057{
8d1c802b 4058 struct fib6_info *iter;
1de178ed 4059
ad1601ae 4060 if (rt->fib6_nh.fib_nh_dev == dev)
1de178ed 4061 return true;
93c2fb25 4062 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae 4063 if (iter->fib6_nh.fib_nh_dev == dev)
1de178ed
IS
4064 return true;
4065
4066 return false;
4067}
4068
8d1c802b 4069static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 4070{
8d1c802b 4071 struct fib6_info *iter;
1de178ed
IS
4072
4073 rt->should_flush = 1;
93c2fb25 4074 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
4075 iter->should_flush = 1;
4076}
4077
8d1c802b 4078static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
4079 const struct net_device *down_dev)
4080{
8d1c802b 4081 struct fib6_info *iter;
1de178ed
IS
4082 unsigned int dead = 0;
4083
ad1601ae
DA
4084 if (rt->fib6_nh.fib_nh_dev == down_dev ||
4085 rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed 4086 dead++;
93c2fb25 4087 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4088 if (iter->fib6_nh.fib_nh_dev == down_dev ||
4089 iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
4090 dead++;
4091
4092 return dead;
4093}
4094
8d1c802b 4095static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed 4096 const struct net_device *dev,
ecc5663c 4097 unsigned char nh_flags)
1de178ed 4098{
8d1c802b 4099 struct fib6_info *iter;
1de178ed 4100
ad1601ae
DA
4101 if (rt->fib6_nh.fib_nh_dev == dev)
4102 rt->fib6_nh.fib_nh_flags |= nh_flags;
93c2fb25 4103 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4104 if (iter->fib6_nh.fib_nh_dev == dev)
4105 iter->fib6_nh.fib_nh_flags |= nh_flags;
1de178ed
IS
4106}
4107
a1a22c12 4108/* called with write lock held for table with rt */
8d1c802b 4109static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4110{
4c981e28
IS
4111 const struct arg_netdev_event *arg = p_arg;
4112 const struct net_device *dev = arg->dev;
7aef6859 4113 struct net *net = dev_net(dev);
8ed67789 4114
421842ed 4115 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4116 return 0;
4117
4118 switch (arg->event) {
4119 case NETDEV_UNREGISTER:
ad1601ae 4120 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
27c6fa73 4121 case NETDEV_DOWN:
1de178ed 4122 if (rt->should_flush)
27c6fa73 4123 return -1;
93c2fb25 4124 if (!rt->fib6_nsiblings)
ad1601ae 4125 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4126 if (rt6_multipath_uses_dev(rt, dev)) {
4127 unsigned int count;
4128
4129 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4130 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4131 rt6_multipath_flush(rt);
4132 return -1;
4133 }
4134 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4135 RTNH_F_LINKDOWN);
7aef6859 4136 fib6_update_sernum(net, rt);
d7dedee1 4137 rt6_multipath_rebalance(rt);
1de178ed
IS
4138 }
4139 return -2;
27c6fa73 4140 case NETDEV_CHANGE:
ad1601ae 4141 if (rt->fib6_nh.fib_nh_dev != dev ||
93c2fb25 4142 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4143 break;
ad1601ae 4144 rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4145 rt6_multipath_rebalance(rt);
27c6fa73 4146 break;
2b241361 4147 }
c159d30c 4148
1da177e4
LT
4149 return 0;
4150}
4151
27c6fa73 4152void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4153{
4c981e28 4154 struct arg_netdev_event arg = {
8ed67789 4155 .dev = dev,
6802f3ad
IS
4156 {
4157 .event = event,
4158 },
8ed67789 4159 };
7c6bb7d2 4160 struct net *net = dev_net(dev);
8ed67789 4161
7c6bb7d2
DA
4162 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4163 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4164 else
4165 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4166}
4167
4168void rt6_disable_ip(struct net_device *dev, unsigned long event)
4169{
4170 rt6_sync_down_dev(dev, event);
4171 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4172 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4173}
4174
95c96174 4175struct rt6_mtu_change_arg {
1da177e4 4176 struct net_device *dev;
95c96174 4177 unsigned int mtu;
1da177e4
LT
4178};
4179
8d1c802b 4180static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4181{
4182 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4183 struct inet6_dev *idev;
4184
4185 /* In IPv6 pmtu discovery is not optional,
4186 so that RTAX_MTU lock cannot disable it.
4187 We still use this lock to block changes
4188 caused by addrconf/ndisc.
4189 */
4190
4191 idev = __in6_dev_get(arg->dev);
38308473 4192 if (!idev)
1da177e4
LT
4193 return 0;
4194
4195 /* For administrative MTU increase, there is no way to discover
4196 IPv6 PMTU increase, so PMTU increase should be updated here.
4197 Since RFC 1981 doesn't include administrative MTU increase
4198 update PMTU increase is a MUST. (i.e. jumbo frame)
4199 */
ad1601ae 4200 if (rt->fib6_nh.fib_nh_dev == arg->dev &&
d4ead6b3
DA
4201 !fib6_metric_locked(rt, RTAX_MTU)) {
4202 u32 mtu = rt->fib6_pmtu;
4203
4204 if (mtu >= arg->mtu ||
4205 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4206 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4207
f5bbe7ee 4208 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4209 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4210 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4211 }
1da177e4
LT
4212 return 0;
4213}
4214
95c96174 4215void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4216{
c71099ac
TG
4217 struct rt6_mtu_change_arg arg = {
4218 .dev = dev,
4219 .mtu = mtu,
4220 };
1da177e4 4221
0c3584d5 4222 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4223}
4224
ef7c79ed 4225static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4226 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4227 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4228 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4229 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4230 [RTA_PRIORITY] = { .type = NLA_U32 },
4231 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4232 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4233 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4234 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4235 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4236 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4237 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4238 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4239 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4240 [RTA_IP_PROTO] = { .type = NLA_U8 },
4241 [RTA_SPORT] = { .type = NLA_U16 },
4242 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4243};
4244
4245static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4246 struct fib6_config *cfg,
4247 struct netlink_ext_ack *extack)
1da177e4 4248{
86872cb5
TG
4249 struct rtmsg *rtm;
4250 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4251 unsigned int pref;
86872cb5 4252 int err;
1da177e4 4253
8cb08174
JB
4254 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4255 rtm_ipv6_policy, extack);
86872cb5
TG
4256 if (err < 0)
4257 goto errout;
1da177e4 4258
86872cb5
TG
4259 err = -EINVAL;
4260 rtm = nlmsg_data(nlh);
86872cb5 4261
84db8407
4262 *cfg = (struct fib6_config){
4263 .fc_table = rtm->rtm_table,
4264 .fc_dst_len = rtm->rtm_dst_len,
4265 .fc_src_len = rtm->rtm_src_len,
4266 .fc_flags = RTF_UP,
4267 .fc_protocol = rtm->rtm_protocol,
4268 .fc_type = rtm->rtm_type,
4269
4270 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4271 .fc_nlinfo.nlh = nlh,
4272 .fc_nlinfo.nl_net = sock_net(skb->sk),
4273 };
86872cb5 4274
ef2c7d7b
ND
4275 if (rtm->rtm_type == RTN_UNREACHABLE ||
4276 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4277 rtm->rtm_type == RTN_PROHIBIT ||
4278 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4279 cfg->fc_flags |= RTF_REJECT;
4280
ab79ad14
4281 if (rtm->rtm_type == RTN_LOCAL)
4282 cfg->fc_flags |= RTF_LOCAL;
4283
1f56a01f
MKL
4284 if (rtm->rtm_flags & RTM_F_CLONED)
4285 cfg->fc_flags |= RTF_CACHE;
4286
fc1e64e1
DA
4287 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4288
86872cb5 4289 if (tb[RTA_GATEWAY]) {
67b61f6c 4290 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4291 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4292 }
e3818541
DA
4293 if (tb[RTA_VIA]) {
4294 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4295 goto errout;
4296 }
86872cb5
TG
4297
4298 if (tb[RTA_DST]) {
4299 int plen = (rtm->rtm_dst_len + 7) >> 3;
4300
4301 if (nla_len(tb[RTA_DST]) < plen)
4302 goto errout;
4303
4304 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4305 }
86872cb5
TG
4306
4307 if (tb[RTA_SRC]) {
4308 int plen = (rtm->rtm_src_len + 7) >> 3;
4309
4310 if (nla_len(tb[RTA_SRC]) < plen)
4311 goto errout;
4312
4313 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4314 }
86872cb5 4315
c3968a85 4316 if (tb[RTA_PREFSRC])
67b61f6c 4317 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4318
86872cb5
TG
4319 if (tb[RTA_OIF])
4320 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4321
4322 if (tb[RTA_PRIORITY])
4323 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4324
4325 if (tb[RTA_METRICS]) {
4326 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4327 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4328 }
86872cb5
TG
4329
4330 if (tb[RTA_TABLE])
4331 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4332
51ebd318
ND
4333 if (tb[RTA_MULTIPATH]) {
4334 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4335 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4336
4337 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4338 cfg->fc_mp_len, extack);
9ed59592
DA
4339 if (err < 0)
4340 goto errout;
51ebd318
ND
4341 }
4342
c78ba6d6
LR
4343 if (tb[RTA_PREF]) {
4344 pref = nla_get_u8(tb[RTA_PREF]);
4345 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4346 pref != ICMPV6_ROUTER_PREF_HIGH)
4347 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4348 cfg->fc_flags |= RTF_PREF(pref);
4349 }
4350
19e42e45
RP
4351 if (tb[RTA_ENCAP])
4352 cfg->fc_encap = tb[RTA_ENCAP];
4353
9ed59592 4354 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4355 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4356
c255bd68 4357 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4358 if (err < 0)
4359 goto errout;
4360 }
4361
32bc201e
XL
4362 if (tb[RTA_EXPIRES]) {
4363 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4364
4365 if (addrconf_finite_timeout(timeout)) {
4366 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4367 cfg->fc_flags |= RTF_EXPIRES;
4368 }
4369 }
4370
86872cb5
TG
4371 err = 0;
4372errout:
4373 return err;
1da177e4
LT
4374}
4375
6b9ea5a6 4376struct rt6_nh {
8d1c802b 4377 struct fib6_info *fib6_info;
6b9ea5a6 4378 struct fib6_config r_cfg;
6b9ea5a6
RP
4379 struct list_head next;
4380};
4381
d4ead6b3
DA
4382static int ip6_route_info_append(struct net *net,
4383 struct list_head *rt6_nh_list,
8d1c802b
DA
4384 struct fib6_info *rt,
4385 struct fib6_config *r_cfg)
6b9ea5a6
RP
4386{
4387 struct rt6_nh *nh;
6b9ea5a6
RP
4388 int err = -EEXIST;
4389
4390 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4391 /* check if fib6_info already exists */
4392 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4393 return err;
4394 }
4395
4396 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4397 if (!nh)
4398 return -ENOMEM;
8d1c802b 4399 nh->fib6_info = rt;
6b9ea5a6
RP
4400 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4401 list_add_tail(&nh->next, rt6_nh_list);
4402
4403 return 0;
4404}
4405
8d1c802b
DA
4406static void ip6_route_mpath_notify(struct fib6_info *rt,
4407 struct fib6_info *rt_last,
3b1137fe
DA
4408 struct nl_info *info,
4409 __u16 nlflags)
4410{
4411 /* if this is an APPEND route, then rt points to the first route
4412 * inserted and rt_last points to last route inserted. Userspace
4413 * wants a consistent dump of the route which starts at the first
4414 * nexthop. Since sibling routes are always added at the end of
4415 * the list, find the first sibling of the last route appended
4416 */
93c2fb25
DA
4417 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4418 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4419 struct fib6_info,
93c2fb25 4420 fib6_siblings);
3b1137fe
DA
4421 }
4422
4423 if (rt)
4424 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4425}
4426
333c4301
DA
4427static int ip6_route_multipath_add(struct fib6_config *cfg,
4428 struct netlink_ext_ack *extack)
51ebd318 4429{
8d1c802b 4430 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4431 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4432 struct fib6_config r_cfg;
4433 struct rtnexthop *rtnh;
8d1c802b 4434 struct fib6_info *rt;
6b9ea5a6
RP
4435 struct rt6_nh *err_nh;
4436 struct rt6_nh *nh, *nh_safe;
3b1137fe 4437 __u16 nlflags;
51ebd318
ND
4438 int remaining;
4439 int attrlen;
6b9ea5a6
RP
4440 int err = 1;
4441 int nhn = 0;
4442 int replace = (cfg->fc_nlinfo.nlh &&
4443 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4444 LIST_HEAD(rt6_nh_list);
51ebd318 4445
3b1137fe
DA
4446 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4447 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4448 nlflags |= NLM_F_APPEND;
4449
35f1b4e9 4450 remaining = cfg->fc_mp_len;
51ebd318 4451 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4452
6b9ea5a6 4453 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4454 * fib6_info structs per nexthop
6b9ea5a6 4455 */
51ebd318
ND
4456 while (rtnh_ok(rtnh, remaining)) {
4457 memcpy(&r_cfg, cfg, sizeof(*cfg));
4458 if (rtnh->rtnh_ifindex)
4459 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4460
4461 attrlen = rtnh_attrlen(rtnh);
4462 if (attrlen > 0) {
4463 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4464
4465 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4466 if (nla) {
67b61f6c 4467 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4468 r_cfg.fc_flags |= RTF_GATEWAY;
4469 }
19e42e45
RP
4470 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4471 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4472 if (nla)
4473 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4474 }
6b9ea5a6 4475
68e2ffde 4476 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4477 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4478 if (IS_ERR(rt)) {
4479 err = PTR_ERR(rt);
4480 rt = NULL;
6b9ea5a6 4481 goto cleanup;
8c5b83f0 4482 }
b5d2d75e
DA
4483 if (!rt6_qualify_for_ecmp(rt)) {
4484 err = -EINVAL;
4485 NL_SET_ERR_MSG(extack,
4486 "Device only routes can not be added for IPv6 using the multipath API.");
4487 fib6_info_release(rt);
4488 goto cleanup;
4489 }
6b9ea5a6 4490
ad1601ae 4491 rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 4492
d4ead6b3
DA
4493 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4494 rt, &r_cfg);
51ebd318 4495 if (err) {
93531c67 4496 fib6_info_release(rt);
6b9ea5a6
RP
4497 goto cleanup;
4498 }
4499
4500 rtnh = rtnh_next(rtnh, &remaining);
4501 }
4502
3b1137fe
DA
4503 /* for add and replace send one notification with all nexthops.
4504 * Skip the notification in fib6_add_rt2node and send one with
4505 * the full route when done
4506 */
4507 info->skip_notify = 1;
4508
6b9ea5a6
RP
4509 err_nh = NULL;
4510 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4511 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4512 fib6_info_release(nh->fib6_info);
93531c67 4513
f7225172
DA
4514 if (!err) {
4515 /* save reference to last route successfully inserted */
4516 rt_last = nh->fib6_info;
4517
4518 /* save reference to first route for notification */
4519 if (!rt_notif)
4520 rt_notif = nh->fib6_info;
4521 }
3b1137fe 4522
8d1c802b
DA
4523 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4524 nh->fib6_info = NULL;
6b9ea5a6
RP
4525 if (err) {
4526 if (replace && nhn)
a5a82d84
JK
4527 NL_SET_ERR_MSG_MOD(extack,
4528 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4529 err_nh = nh;
4530 goto add_errout;
51ebd318 4531 }
6b9ea5a6 4532
1a72418b 4533 /* Because each route is added like a single route we remove
27596472
MK
4534 * these flags after the first nexthop: if there is a collision,
4535 * we have already failed to add the first nexthop:
4536 * fib6_add_rt2node() has rejected it; when replacing, old
4537 * nexthops have been replaced by first new, the rest should
4538 * be added to it.
1a72418b 4539 */
27596472
MK
4540 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4541 NLM_F_REPLACE);
6b9ea5a6
RP
4542 nhn++;
4543 }
4544
3b1137fe
DA
4545 /* success ... tell user about new route */
4546 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4547 goto cleanup;
4548
4549add_errout:
3b1137fe
DA
4550 /* send notification for routes that were added so that
4551 * the delete notifications sent by ip6_route_del are
4552 * coherent
4553 */
4554 if (rt_notif)
4555 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4556
6b9ea5a6
RP
4557 /* Delete routes that were already added */
4558 list_for_each_entry(nh, &rt6_nh_list, next) {
4559 if (err_nh == nh)
4560 break;
333c4301 4561 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4562 }
4563
4564cleanup:
4565 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4566 if (nh->fib6_info)
4567 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4568 list_del(&nh->next);
4569 kfree(nh);
4570 }
4571
4572 return err;
4573}
4574
333c4301
DA
4575static int ip6_route_multipath_del(struct fib6_config *cfg,
4576 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4577{
4578 struct fib6_config r_cfg;
4579 struct rtnexthop *rtnh;
4580 int remaining;
4581 int attrlen;
4582 int err = 1, last_err = 0;
4583
4584 remaining = cfg->fc_mp_len;
4585 rtnh = (struct rtnexthop *)cfg->fc_mp;
4586
4587 /* Parse a Multipath Entry */
4588 while (rtnh_ok(rtnh, remaining)) {
4589 memcpy(&r_cfg, cfg, sizeof(*cfg));
4590 if (rtnh->rtnh_ifindex)
4591 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4592
4593 attrlen = rtnh_attrlen(rtnh);
4594 if (attrlen > 0) {
4595 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4596
4597 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4598 if (nla) {
4599 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4600 r_cfg.fc_flags |= RTF_GATEWAY;
4601 }
4602 }
333c4301 4603 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4604 if (err)
4605 last_err = err;
4606
51ebd318
ND
4607 rtnh = rtnh_next(rtnh, &remaining);
4608 }
4609
4610 return last_err;
4611}
4612
c21ef3e3
DA
4613static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4614 struct netlink_ext_ack *extack)
1da177e4 4615{
86872cb5
TG
4616 struct fib6_config cfg;
4617 int err;
1da177e4 4618
333c4301 4619 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4620 if (err < 0)
4621 return err;
4622
51ebd318 4623 if (cfg.fc_mp)
333c4301 4624 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4625 else {
4626 cfg.fc_delete_all_nh = 1;
333c4301 4627 return ip6_route_del(&cfg, extack);
0ae81335 4628 }
1da177e4
LT
4629}
4630
c21ef3e3
DA
4631static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4632 struct netlink_ext_ack *extack)
1da177e4 4633{
86872cb5
TG
4634 struct fib6_config cfg;
4635 int err;
1da177e4 4636
333c4301 4637 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4638 if (err < 0)
4639 return err;
4640
67f69513
DA
4641 if (cfg.fc_metric == 0)
4642 cfg.fc_metric = IP6_RT_PRIO_USER;
4643
51ebd318 4644 if (cfg.fc_mp)
333c4301 4645 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4646 else
acb54e3c 4647 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4648}
4649
8d1c802b 4650static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4651{
beb1afac
DA
4652 int nexthop_len = 0;
4653
93c2fb25 4654 if (rt->fib6_nsiblings) {
beb1afac
DA
4655 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4656 + NLA_ALIGN(sizeof(struct rtnexthop))
4657 + nla_total_size(16) /* RTA_GATEWAY */
ad1601ae 4658 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
beb1afac 4659
93c2fb25 4660 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4661 }
4662
339bf98f
TG
4663 return NLMSG_ALIGN(sizeof(struct rtmsg))
4664 + nla_total_size(16) /* RTA_SRC */
4665 + nla_total_size(16) /* RTA_DST */
4666 + nla_total_size(16) /* RTA_GATEWAY */
4667 + nla_total_size(16) /* RTA_PREFSRC */
4668 + nla_total_size(4) /* RTA_TABLE */
4669 + nla_total_size(4) /* RTA_IIF */
4670 + nla_total_size(4) /* RTA_OIF */
4671 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4672 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4673 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4674 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4675 + nla_total_size(1) /* RTA_PREF */
ad1601ae 4676 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
beb1afac
DA
4677 + nexthop_len;
4678}
4679
d4ead6b3 4680static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4681 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4682 struct in6_addr *dest, struct in6_addr *src,
15e47304 4683 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4684 unsigned int flags)
1da177e4 4685{
22d0bd82
XL
4686 struct rt6_info *rt6 = (struct rt6_info *)dst;
4687 struct rt6key *rt6_dst, *rt6_src;
4688 u32 *pmetrics, table, rt6_flags;
2d7202bf 4689 struct nlmsghdr *nlh;
22d0bd82 4690 struct rtmsg *rtm;
d4ead6b3 4691 long expires = 0;
1da177e4 4692
15e47304 4693 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4694 if (!nlh)
26932566 4695 return -EMSGSIZE;
2d7202bf 4696
22d0bd82
XL
4697 if (rt6) {
4698 rt6_dst = &rt6->rt6i_dst;
4699 rt6_src = &rt6->rt6i_src;
4700 rt6_flags = rt6->rt6i_flags;
4701 } else {
4702 rt6_dst = &rt->fib6_dst;
4703 rt6_src = &rt->fib6_src;
4704 rt6_flags = rt->fib6_flags;
4705 }
4706
2d7202bf 4707 rtm = nlmsg_data(nlh);
1da177e4 4708 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4709 rtm->rtm_dst_len = rt6_dst->plen;
4710 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4711 rtm->rtm_tos = 0;
93c2fb25
DA
4712 if (rt->fib6_table)
4713 table = rt->fib6_table->tb6_id;
c71099ac 4714 else
9e762a4a 4715 table = RT6_TABLE_UNSPEC;
97f0082a 4716 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4717 if (nla_put_u32(skb, RTA_TABLE, table))
4718 goto nla_put_failure;
e8478e80
DA
4719
4720 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4721 rtm->rtm_flags = 0;
4722 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4723 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4724
22d0bd82 4725 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4726 rtm->rtm_flags |= RTM_F_CLONED;
4727
d4ead6b3
DA
4728 if (dest) {
4729 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4730 goto nla_put_failure;
1ab1457c 4731 rtm->rtm_dst_len = 128;
1da177e4 4732 } else if (rtm->rtm_dst_len)
22d0bd82 4733 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4734 goto nla_put_failure;
1da177e4
LT
4735#ifdef CONFIG_IPV6_SUBTREES
4736 if (src) {
930345ea 4737 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4738 goto nla_put_failure;
1ab1457c 4739 rtm->rtm_src_len = 128;
c78679e8 4740 } else if (rtm->rtm_src_len &&
22d0bd82 4741 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4742 goto nla_put_failure;
1da177e4 4743#endif
7bc570c8
YH
4744 if (iif) {
4745#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4746 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4747 int err = ip6mr_get_route(net, skb, rtm, portid);
4748
4749 if (err == 0)
4750 return 0;
4751 if (err < 0)
4752 goto nla_put_failure;
7bc570c8
YH
4753 } else
4754#endif
c78679e8
DM
4755 if (nla_put_u32(skb, RTA_IIF, iif))
4756 goto nla_put_failure;
d4ead6b3 4757 } else if (dest) {
1da177e4 4758 struct in6_addr saddr_buf;
d4ead6b3 4759 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4760 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4761 goto nla_put_failure;
1da177e4 4762 }
2d7202bf 4763
93c2fb25 4764 if (rt->fib6_prefsrc.plen) {
c3968a85 4765 struct in6_addr saddr_buf;
93c2fb25 4766 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4767 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4768 goto nla_put_failure;
c3968a85
DW
4769 }
4770
d4ead6b3
DA
4771 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4772 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4773 goto nla_put_failure;
4774
93c2fb25 4775 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4776 goto nla_put_failure;
8253947e 4777
beb1afac
DA
4778 /* For multipath routes, walk the siblings list and add
4779 * each as a nexthop within RTA_MULTIPATH.
4780 */
22d0bd82
XL
4781 if (rt6) {
4782 if (rt6_flags & RTF_GATEWAY &&
4783 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4784 goto nla_put_failure;
4785
4786 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4787 goto nla_put_failure;
4788 } else if (rt->fib6_nsiblings) {
8d1c802b 4789 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4790 struct nlattr *mp;
4791
ae0be8de 4792 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
beb1afac
DA
4793 if (!mp)
4794 goto nla_put_failure;
4795
c0a72077
DA
4796 if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4797 rt->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4798 goto nla_put_failure;
4799
4800 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4801 &rt->fib6_siblings, fib6_siblings) {
c0a72077
DA
4802 if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4803 sibling->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4804 goto nla_put_failure;
4805 }
4806
4807 nla_nest_end(skb, mp);
4808 } else {
ecc5663c
DA
4809 unsigned char nh_flags = 0;
4810
c0a72077 4811 if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
ecc5663c 4812 &nh_flags, false) < 0)
beb1afac 4813 goto nla_put_failure;
ecc5663c
DA
4814
4815 rtm->rtm_flags |= nh_flags;
beb1afac
DA
4816 }
4817
22d0bd82 4818 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4819 expires = dst ? dst->expires : rt->expires;
4820 expires -= jiffies;
4821 }
69cdf8f9 4822
d4ead6b3 4823 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4824 goto nla_put_failure;
2d7202bf 4825
22d0bd82 4826 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4827 goto nla_put_failure;
4828
19e42e45 4829
053c095a
JB
4830 nlmsg_end(skb, nlh);
4831 return 0;
2d7202bf
TG
4832
4833nla_put_failure:
26932566
PM
4834 nlmsg_cancel(skb, nlh);
4835 return -EMSGSIZE;
1da177e4
LT
4836}
4837
13e38901
DA
4838static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4839 const struct net_device *dev)
4840{
ad1601ae 4841 if (f6i->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4842 return true;
4843
4844 if (f6i->fib6_nsiblings) {
4845 struct fib6_info *sibling, *next_sibling;
4846
4847 list_for_each_entry_safe(sibling, next_sibling,
4848 &f6i->fib6_siblings, fib6_siblings) {
ad1601ae 4849 if (sibling->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4850 return true;
4851 }
4852 }
4853
4854 return false;
4855}
4856
8d1c802b 4857int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4858{
4859 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4860 struct fib_dump_filter *filter = &arg->filter;
4861 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4862 struct net *net = arg->net;
4863
421842ed 4864 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4865 return 0;
1da177e4 4866
13e38901
DA
4867 if ((filter->flags & RTM_F_PREFIX) &&
4868 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4869 /* success since this is not a prefix route */
4870 return 1;
4871 }
4872 if (filter->filter_set) {
4873 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4874 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4875 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4876 return 1;
4877 }
13e38901 4878 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4879 }
1da177e4 4880
d4ead6b3
DA
4881 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4882 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4883 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4884}
4885
0eff0a27
JK
4886static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4887 const struct nlmsghdr *nlh,
4888 struct nlattr **tb,
4889 struct netlink_ext_ack *extack)
4890{
4891 struct rtmsg *rtm;
4892 int i, err;
4893
4894 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4895 NL_SET_ERR_MSG_MOD(extack,
4896 "Invalid header for get route request");
4897 return -EINVAL;
4898 }
4899
4900 if (!netlink_strict_get_check(skb))
8cb08174
JB
4901 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4902 rtm_ipv6_policy, extack);
0eff0a27
JK
4903
4904 rtm = nlmsg_data(nlh);
4905 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4906 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4907 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4908 rtm->rtm_type) {
4909 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4910 return -EINVAL;
4911 }
4912 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4913 NL_SET_ERR_MSG_MOD(extack,
4914 "Invalid flags for get route request");
4915 return -EINVAL;
4916 }
4917
8cb08174
JB
4918 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4919 rtm_ipv6_policy, extack);
0eff0a27
JK
4920 if (err)
4921 return err;
4922
4923 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4924 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4925 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4926 return -EINVAL;
4927 }
4928
4929 for (i = 0; i <= RTA_MAX; i++) {
4930 if (!tb[i])
4931 continue;
4932
4933 switch (i) {
4934 case RTA_SRC:
4935 case RTA_DST:
4936 case RTA_IIF:
4937 case RTA_OIF:
4938 case RTA_MARK:
4939 case RTA_UID:
4940 case RTA_SPORT:
4941 case RTA_DPORT:
4942 case RTA_IP_PROTO:
4943 break;
4944 default:
4945 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4946 return -EINVAL;
4947 }
4948 }
4949
4950 return 0;
4951}
4952
c21ef3e3
DA
4953static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4954 struct netlink_ext_ack *extack)
1da177e4 4955{
3b1e0a65 4956 struct net *net = sock_net(in_skb->sk);
ab364a6f 4957 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4958 int err, iif = 0, oif = 0;
a68886a6 4959 struct fib6_info *from;
18c3a61c 4960 struct dst_entry *dst;
ab364a6f 4961 struct rt6_info *rt;
1da177e4 4962 struct sk_buff *skb;
ab364a6f 4963 struct rtmsg *rtm;
744486d4 4964 struct flowi6 fl6 = {};
18c3a61c 4965 bool fibmatch;
1da177e4 4966
0eff0a27 4967 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
4968 if (err < 0)
4969 goto errout;
1da177e4 4970
ab364a6f 4971 err = -EINVAL;
38b7097b
HFS
4972 rtm = nlmsg_data(nlh);
4973 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4974 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4975
ab364a6f
TG
4976 if (tb[RTA_SRC]) {
4977 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4978 goto errout;
4979
4e3fd7a0 4980 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4981 }
4982
4983 if (tb[RTA_DST]) {
4984 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4985 goto errout;
4986
4e3fd7a0 4987 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4988 }
4989
4990 if (tb[RTA_IIF])
4991 iif = nla_get_u32(tb[RTA_IIF]);
4992
4993 if (tb[RTA_OIF])
72331bc0 4994 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4995
2e47b291
LC
4996 if (tb[RTA_MARK])
4997 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4998
622ec2c9
LC
4999 if (tb[RTA_UID])
5000 fl6.flowi6_uid = make_kuid(current_user_ns(),
5001 nla_get_u32(tb[RTA_UID]));
5002 else
5003 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5004
eacb9384
RP
5005 if (tb[RTA_SPORT])
5006 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5007
5008 if (tb[RTA_DPORT])
5009 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5010
5011 if (tb[RTA_IP_PROTO]) {
5012 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
5013 &fl6.flowi6_proto, AF_INET6,
5014 extack);
eacb9384
RP
5015 if (err)
5016 goto errout;
5017 }
5018
1da177e4
LT
5019 if (iif) {
5020 struct net_device *dev;
72331bc0
SL
5021 int flags = 0;
5022
121622db
FW
5023 rcu_read_lock();
5024
5025 dev = dev_get_by_index_rcu(net, iif);
1da177e4 5026 if (!dev) {
121622db 5027 rcu_read_unlock();
1da177e4 5028 err = -ENODEV;
ab364a6f 5029 goto errout;
1da177e4 5030 }
72331bc0
SL
5031
5032 fl6.flowi6_iif = iif;
5033
5034 if (!ipv6_addr_any(&fl6.saddr))
5035 flags |= RT6_LOOKUP_F_HAS_SADDR;
5036
b75cc8f9 5037 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
5038
5039 rcu_read_unlock();
72331bc0
SL
5040 } else {
5041 fl6.flowi6_oif = oif;
5042
58acfd71 5043 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
5044 }
5045
18c3a61c
RP
5046
5047 rt = container_of(dst, struct rt6_info, dst);
5048 if (rt->dst.error) {
5049 err = rt->dst.error;
5050 ip6_rt_put(rt);
5051 goto errout;
1da177e4
LT
5052 }
5053
9d6acb3b
WC
5054 if (rt == net->ipv6.ip6_null_entry) {
5055 err = rt->dst.error;
5056 ip6_rt_put(rt);
5057 goto errout;
5058 }
5059
ab364a6f 5060 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 5061 if (!skb) {
94e187c0 5062 ip6_rt_put(rt);
ab364a6f
TG
5063 err = -ENOBUFS;
5064 goto errout;
5065 }
1da177e4 5066
d8d1f30b 5067 skb_dst_set(skb, &rt->dst);
a68886a6
DA
5068
5069 rcu_read_lock();
5070 from = rcu_dereference(rt->from);
886b7a50
MKL
5071 if (from) {
5072 if (fibmatch)
5073 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5074 iif, RTM_NEWROUTE,
5075 NETLINK_CB(in_skb).portid,
5076 nlh->nlmsg_seq, 0);
5077 else
5078 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5079 &fl6.saddr, iif, RTM_NEWROUTE,
5080 NETLINK_CB(in_skb).portid,
5081 nlh->nlmsg_seq, 0);
5082 } else {
5083 err = -ENETUNREACH;
5084 }
a68886a6
DA
5085 rcu_read_unlock();
5086
1da177e4 5087 if (err < 0) {
ab364a6f
TG
5088 kfree_skb(skb);
5089 goto errout;
1da177e4
LT
5090 }
5091
15e47304 5092 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 5093errout:
1da177e4 5094 return err;
1da177e4
LT
5095}
5096
8d1c802b 5097void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 5098 unsigned int nlm_flags)
1da177e4
LT
5099{
5100 struct sk_buff *skb;
5578689a 5101 struct net *net = info->nl_net;
528c4ceb
DL
5102 u32 seq;
5103 int err;
5104
5105 err = -ENOBUFS;
38308473 5106 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 5107
19e42e45 5108 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 5109 if (!skb)
21713ebc
TG
5110 goto errout;
5111
d4ead6b3
DA
5112 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5113 event, info->portid, seq, nlm_flags);
26932566
PM
5114 if (err < 0) {
5115 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5116 WARN_ON(err == -EMSGSIZE);
5117 kfree_skb(skb);
5118 goto errout;
5119 }
15e47304 5120 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5121 info->nlh, gfp_any());
5122 return;
21713ebc
TG
5123errout:
5124 if (err < 0)
5578689a 5125 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5126}
5127
8ed67789 5128static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5129 unsigned long event, void *ptr)
8ed67789 5130{
351638e7 5131 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5132 struct net *net = dev_net(dev);
8ed67789 5133
242d3a49
WC
5134 if (!(dev->flags & IFF_LOOPBACK))
5135 return NOTIFY_OK;
5136
5137 if (event == NETDEV_REGISTER) {
ad1601ae 5138 net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
d8d1f30b 5139 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5140 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5141#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5142 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5143 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5144 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5145 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5146#endif
76da0704
WC
5147 } else if (event == NETDEV_UNREGISTER &&
5148 dev->reg_state != NETREG_UNREGISTERED) {
5149 /* NETDEV_UNREGISTER could be fired for multiple times by
5150 * netdev_wait_allrefs(). Make sure we only call this once.
5151 */
12d94a80 5152 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5153#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5154 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5155 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5156#endif
5157 }
5158
5159 return NOTIFY_OK;
5160}
5161
1da177e4
LT
5162/*
5163 * /proc
5164 */
5165
5166#ifdef CONFIG_PROC_FS
1da177e4
LT
5167static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5168{
69ddb805 5169 struct net *net = (struct net *)seq->private;
1da177e4 5170 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5171 net->ipv6.rt6_stats->fib_nodes,
5172 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5173 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5174 net->ipv6.rt6_stats->fib_rt_entries,
5175 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5176 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5177 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5178
5179 return 0;
5180}
1da177e4
LT
5181#endif /* CONFIG_PROC_FS */
5182
5183#ifdef CONFIG_SYSCTL
5184
1da177e4 5185static
fe2c6338 5186int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5187 void __user *buffer, size_t *lenp, loff_t *ppos)
5188{
c486da34
LAG
5189 struct net *net;
5190 int delay;
f0fb9b28 5191 int ret;
c486da34 5192 if (!write)
1da177e4 5193 return -EINVAL;
c486da34
LAG
5194
5195 net = (struct net *)ctl->extra1;
5196 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5197 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5198 if (ret)
5199 return ret;
5200
2ac3ac8f 5201 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5202 return 0;
1da177e4
LT
5203}
5204
7c6bb7d2
DA
5205static int zero;
5206static int one = 1;
5207
ed792e28 5208static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5209 {
1da177e4 5210 .procname = "flush",
4990509f 5211 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5212 .maxlen = sizeof(int),
89c8b3a1 5213 .mode = 0200,
6d9f239a 5214 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5215 },
5216 {
1da177e4 5217 .procname = "gc_thresh",
9a7ec3a9 5218 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5219 .maxlen = sizeof(int),
5220 .mode = 0644,
6d9f239a 5221 .proc_handler = proc_dointvec,
1da177e4
LT
5222 },
5223 {
1da177e4 5224 .procname = "max_size",
4990509f 5225 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5226 .maxlen = sizeof(int),
5227 .mode = 0644,
6d9f239a 5228 .proc_handler = proc_dointvec,
1da177e4
LT
5229 },
5230 {
1da177e4 5231 .procname = "gc_min_interval",
4990509f 5232 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5233 .maxlen = sizeof(int),
5234 .mode = 0644,
6d9f239a 5235 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5236 },
5237 {
1da177e4 5238 .procname = "gc_timeout",
4990509f 5239 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5240 .maxlen = sizeof(int),
5241 .mode = 0644,
6d9f239a 5242 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5243 },
5244 {
1da177e4 5245 .procname = "gc_interval",
4990509f 5246 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5247 .maxlen = sizeof(int),
5248 .mode = 0644,
6d9f239a 5249 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5250 },
5251 {
1da177e4 5252 .procname = "gc_elasticity",
4990509f 5253 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5254 .maxlen = sizeof(int),
5255 .mode = 0644,
f3d3f616 5256 .proc_handler = proc_dointvec,
1da177e4
LT
5257 },
5258 {
1da177e4 5259 .procname = "mtu_expires",
4990509f 5260 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5261 .maxlen = sizeof(int),
5262 .mode = 0644,
6d9f239a 5263 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5264 },
5265 {
1da177e4 5266 .procname = "min_adv_mss",
4990509f 5267 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5268 .maxlen = sizeof(int),
5269 .mode = 0644,
f3d3f616 5270 .proc_handler = proc_dointvec,
1da177e4
LT
5271 },
5272 {
1da177e4 5273 .procname = "gc_min_interval_ms",
4990509f 5274 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5275 .maxlen = sizeof(int),
5276 .mode = 0644,
6d9f239a 5277 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5278 },
7c6bb7d2
DA
5279 {
5280 .procname = "skip_notify_on_dev_down",
5281 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5282 .maxlen = sizeof(int),
5283 .mode = 0644,
5284 .proc_handler = proc_dointvec,
5285 .extra1 = &zero,
5286 .extra2 = &one,
5287 },
f8572d8f 5288 { }
1da177e4
LT
5289};
5290
2c8c1e72 5291struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5292{
5293 struct ctl_table *table;
5294
5295 table = kmemdup(ipv6_route_table_template,
5296 sizeof(ipv6_route_table_template),
5297 GFP_KERNEL);
5ee09105
YH
5298
5299 if (table) {
5300 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5301 table[0].extra1 = net;
86393e52 5302 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5303 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5304 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5305 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5306 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5307 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5308 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5309 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5310 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5311 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5312
5313 /* Don't export sysctls to unprivileged users */
5314 if (net->user_ns != &init_user_ns)
5315 table[0].procname = NULL;
5ee09105
YH
5316 }
5317
760f2d01
DL
5318 return table;
5319}
1da177e4
LT
5320#endif
5321
2c8c1e72 5322static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5323{
633d424b 5324 int ret = -ENOMEM;
8ed67789 5325
86393e52
AD
5326 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5327 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5328
fc66f95c
ED
5329 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5330 goto out_ip6_dst_ops;
5331
421842ed
DA
5332 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5333 sizeof(*net->ipv6.fib6_null_entry),
5334 GFP_KERNEL);
5335 if (!net->ipv6.fib6_null_entry)
5336 goto out_ip6_dst_entries;
5337
8ed67789
DL
5338 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5339 sizeof(*net->ipv6.ip6_null_entry),
5340 GFP_KERNEL);
5341 if (!net->ipv6.ip6_null_entry)
421842ed 5342 goto out_fib6_null_entry;
d8d1f30b 5343 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5344 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5345 ip6_template_metrics, true);
8ed67789
DL
5346
5347#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5348 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5349 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5350 sizeof(*net->ipv6.ip6_prohibit_entry),
5351 GFP_KERNEL);
68fffc67
PZ
5352 if (!net->ipv6.ip6_prohibit_entry)
5353 goto out_ip6_null_entry;
d8d1f30b 5354 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5355 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5356 ip6_template_metrics, true);
8ed67789
DL
5357
5358 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5359 sizeof(*net->ipv6.ip6_blk_hole_entry),
5360 GFP_KERNEL);
68fffc67
PZ
5361 if (!net->ipv6.ip6_blk_hole_entry)
5362 goto out_ip6_prohibit_entry;
d8d1f30b 5363 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5364 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5365 ip6_template_metrics, true);
8ed67789
DL
5366#endif
5367
b339a47c
PZ
5368 net->ipv6.sysctl.flush_delay = 0;
5369 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5370 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5371 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5372 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5373 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5374 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5375 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5376 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5377
6891a346
BT
5378 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5379
8ed67789
DL
5380 ret = 0;
5381out:
5382 return ret;
f2fc6a54 5383
68fffc67
PZ
5384#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5385out_ip6_prohibit_entry:
5386 kfree(net->ipv6.ip6_prohibit_entry);
5387out_ip6_null_entry:
5388 kfree(net->ipv6.ip6_null_entry);
5389#endif
421842ed
DA
5390out_fib6_null_entry:
5391 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5392out_ip6_dst_entries:
5393 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5394out_ip6_dst_ops:
f2fc6a54 5395 goto out;
cdb18761
DL
5396}
5397
2c8c1e72 5398static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5399{
421842ed 5400 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5401 kfree(net->ipv6.ip6_null_entry);
5402#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5403 kfree(net->ipv6.ip6_prohibit_entry);
5404 kfree(net->ipv6.ip6_blk_hole_entry);
5405#endif
41bb78b4 5406 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5407}
5408
d189634e
TG
5409static int __net_init ip6_route_net_init_late(struct net *net)
5410{
5411#ifdef CONFIG_PROC_FS
c3506372
CH
5412 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5413 sizeof(struct ipv6_route_iter));
3617d949
CH
5414 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5415 rt6_stats_seq_show, NULL);
d189634e
TG
5416#endif
5417 return 0;
5418}
5419
5420static void __net_exit ip6_route_net_exit_late(struct net *net)
5421{
5422#ifdef CONFIG_PROC_FS
ece31ffd
G
5423 remove_proc_entry("ipv6_route", net->proc_net);
5424 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5425#endif
5426}
5427
cdb18761
DL
5428static struct pernet_operations ip6_route_net_ops = {
5429 .init = ip6_route_net_init,
5430 .exit = ip6_route_net_exit,
5431};
5432
c3426b47
DM
5433static int __net_init ipv6_inetpeer_init(struct net *net)
5434{
5435 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5436
5437 if (!bp)
5438 return -ENOMEM;
5439 inet_peer_base_init(bp);
5440 net->ipv6.peers = bp;
5441 return 0;
5442}
5443
5444static void __net_exit ipv6_inetpeer_exit(struct net *net)
5445{
5446 struct inet_peer_base *bp = net->ipv6.peers;
5447
5448 net->ipv6.peers = NULL;
56a6b248 5449 inetpeer_invalidate_tree(bp);
c3426b47
DM
5450 kfree(bp);
5451}
5452
2b823f72 5453static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5454 .init = ipv6_inetpeer_init,
5455 .exit = ipv6_inetpeer_exit,
5456};
5457
d189634e
TG
5458static struct pernet_operations ip6_route_net_late_ops = {
5459 .init = ip6_route_net_init_late,
5460 .exit = ip6_route_net_exit_late,
5461};
5462
8ed67789
DL
5463static struct notifier_block ip6_route_dev_notifier = {
5464 .notifier_call = ip6_route_dev_notify,
242d3a49 5465 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5466};
5467
2f460933
WC
5468void __init ip6_route_init_special_entries(void)
5469{
5470 /* Registering of the loopback is done before this portion of code,
5471 * the loopback reference in rt6_info will not be taken, do it
5472 * manually for init_net */
ad1601ae 5473 init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
2f460933
WC
5474 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5475 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5476 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5477 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5478 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5479 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5480 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5481 #endif
5482}
5483
433d49c3 5484int __init ip6_route_init(void)
1da177e4 5485{
433d49c3 5486 int ret;
8d0b94af 5487 int cpu;
433d49c3 5488
9a7ec3a9
DL
5489 ret = -ENOMEM;
5490 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5491 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5492 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5493 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5494 goto out;
14e50e57 5495
fc66f95c 5496 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5497 if (ret)
bdb3289f 5498 goto out_kmem_cache;
bdb3289f 5499
c3426b47
DM
5500 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5501 if (ret)
e8803b6c 5502 goto out_dst_entries;
2a0c451a 5503
7e52b33b
DM
5504 ret = register_pernet_subsys(&ip6_route_net_ops);
5505 if (ret)
5506 goto out_register_inetpeer;
c3426b47 5507
5dc121e9
AE
5508 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5509
e8803b6c 5510 ret = fib6_init();
433d49c3 5511 if (ret)
8ed67789 5512 goto out_register_subsys;
433d49c3 5513
433d49c3
DL
5514 ret = xfrm6_init();
5515 if (ret)
e8803b6c 5516 goto out_fib6_init;
c35b7e72 5517
433d49c3
DL
5518 ret = fib6_rules_init();
5519 if (ret)
5520 goto xfrm6_init;
7e5449c2 5521
d189634e
TG
5522 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5523 if (ret)
5524 goto fib6_rules_init;
5525
16feebcf
FW
5526 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5527 inet6_rtm_newroute, NULL, 0);
5528 if (ret < 0)
5529 goto out_register_late_subsys;
5530
5531 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5532 inet6_rtm_delroute, NULL, 0);
5533 if (ret < 0)
5534 goto out_register_late_subsys;
5535
5536 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5537 inet6_rtm_getroute, NULL,
5538 RTNL_FLAG_DOIT_UNLOCKED);
5539 if (ret < 0)
d189634e 5540 goto out_register_late_subsys;
c127ea2c 5541
8ed67789 5542 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5543 if (ret)
d189634e 5544 goto out_register_late_subsys;
8ed67789 5545
8d0b94af
MKL
5546 for_each_possible_cpu(cpu) {
5547 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5548
5549 INIT_LIST_HEAD(&ul->head);
5550 spin_lock_init(&ul->lock);
5551 }
5552
433d49c3
DL
5553out:
5554 return ret;
5555
d189634e 5556out_register_late_subsys:
16feebcf 5557 rtnl_unregister_all(PF_INET6);
d189634e 5558 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5559fib6_rules_init:
433d49c3
DL
5560 fib6_rules_cleanup();
5561xfrm6_init:
433d49c3 5562 xfrm6_fini();
2a0c451a
TG
5563out_fib6_init:
5564 fib6_gc_cleanup();
8ed67789
DL
5565out_register_subsys:
5566 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5567out_register_inetpeer:
5568 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5569out_dst_entries:
5570 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5571out_kmem_cache:
f2fc6a54 5572 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5573 goto out;
1da177e4
LT
5574}
5575
5576void ip6_route_cleanup(void)
5577{
8ed67789 5578 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5579 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5580 fib6_rules_cleanup();
1da177e4 5581 xfrm6_fini();
1da177e4 5582 fib6_gc_cleanup();
c3426b47 5583 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5584 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5585 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5586 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5587}