net/ipv6: Refactor gateway validation on route add
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 146 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
147
148 spin_lock_bh(&ul->lock);
149 list_del(&rt->rt6i_uncached);
81eb8447 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
151 spin_unlock_bh(&ul->lock);
152 }
153}
154
155static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156{
157 struct net_device *loopback_dev = net->loopback_dev;
158 int cpu;
159
e332bc67
EB
160 if (dev == loopback_dev)
161 return;
162
8d0b94af
MKL
163 for_each_possible_cpu(cpu) {
164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 struct rt6_info *rt;
166
167 spin_lock_bh(&ul->lock);
168 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 struct inet6_dev *rt_idev = rt->rt6i_idev;
170 struct net_device *rt_dev = rt->dst.dev;
171
e332bc67 172 if (rt_idev->dev == dev) {
8d0b94af
MKL
173 rt->rt6i_idev = in6_dev_get(loopback_dev);
174 in6_dev_put(rt_idev);
175 }
176
e332bc67 177 if (rt_dev == dev) {
8d0b94af
MKL
178 rt->dst.dev = loopback_dev;
179 dev_hold(rt->dst.dev);
180 dev_put(rt_dev);
181 }
182 }
183 spin_unlock_bh(&ul->lock);
184 }
185}
186
d52d3997
MKL
187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{
3a2232e9 189 return dst_metrics_write_ptr(&rt->from->dst);
d52d3997
MKL
190}
191
06582540
DM
192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193{
4b32b5ad 194 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 195
d52d3997
MKL
196 if (rt->rt6i_flags & RTF_PCPU)
197 return rt6_pcpu_cow_metrics(rt);
198 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
199 return NULL;
200 else
3b471175 201 return dst_cow_metrics_generic(dst, old);
06582540
DM
202}
203
f894cbf8
DM
204static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 struct sk_buff *skb,
206 const void *daddr)
39232973
DM
207{
208 struct in6_addr *p = &rt->rt6i_gateway;
209
a7563f34 210 if (!ipv6_addr_any(p))
39232973 211 return (const void *) p;
f894cbf8
DM
212 else if (skb)
213 return &ipv6_hdr(skb)->daddr;
39232973
DM
214 return daddr;
215}
216
f894cbf8
DM
217static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 struct sk_buff *skb,
219 const void *daddr)
d3aaeb38 220{
39232973
DM
221 struct rt6_info *rt = (struct rt6_info *) dst;
222 struct neighbour *n;
223
f894cbf8 224 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 225 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
226 if (n)
227 return n;
228 return neigh_create(&nd_tbl, daddr, dst->dev);
229}
230
63fca65d
JA
231static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232{
233 struct net_device *dev = dst->dev;
234 struct rt6_info *rt = (struct rt6_info *)dst;
235
236 daddr = choose_neigh_daddr(rt, NULL, daddr);
237 if (!daddr)
238 return;
239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 return;
241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 return;
243 __ipv6_confirm_neigh(dev, daddr);
244}
245
9a7ec3a9 246static struct dst_ops ip6_dst_ops_template = {
1da177e4 247 .family = AF_INET6,
1da177e4
LT
248 .gc = ip6_dst_gc,
249 .gc_thresh = 1024,
250 .check = ip6_dst_check,
0dbaee3b 251 .default_advmss = ip6_default_advmss,
ebb762f2 252 .mtu = ip6_mtu,
06582540 253 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
254 .destroy = ip6_dst_destroy,
255 .ifdown = ip6_dst_ifdown,
256 .negative_advice = ip6_negative_advice,
257 .link_failure = ip6_link_failure,
258 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 259 .redirect = rt6_do_redirect,
9f8955cc 260 .local_out = __ip6_local_out,
d3aaeb38 261 .neigh_lookup = ip6_neigh_lookup,
63fca65d 262 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
263};
264
ebb762f2 265static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 266{
618f9bc7
SK
267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268
269 return mtu ? : dst->dev->mtu;
ec831ea7
RD
270}
271
6700c270
DM
272static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb, u32 mtu)
14e50e57
DM
274{
275}
276
6700c270
DM
277static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 struct sk_buff *skb)
b587ee3b
DM
279{
280}
281
14e50e57
DM
282static struct dst_ops ip6_dst_blackhole_ops = {
283 .family = AF_INET6,
14e50e57
DM
284 .destroy = ip6_dst_destroy,
285 .check = ip6_dst_check,
ebb762f2 286 .mtu = ip6_blackhole_mtu,
214f45c9 287 .default_advmss = ip6_default_advmss,
14e50e57 288 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 289 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 290 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 291 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
292};
293
62fa8a84 294static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 295 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
296};
297
fb0af4c7 298static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
299 .dst = {
300 .__refcnt = ATOMIC_INIT(1),
301 .__use = 1,
2c20cbd7 302 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 303 .error = -ENETUNREACH,
d8d1f30b
CG
304 .input = ip6_pkt_discard,
305 .output = ip6_pkt_discard_out,
1da177e4
LT
306 },
307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 308 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
309 .rt6i_metric = ~(u32) 0,
310 .rt6i_ref = ATOMIC_INIT(1),
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 325 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 340 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
341 .rt6i_metric = ~(u32) 0,
342 .rt6i_ref = ATOMIC_INIT(1),
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
1da177e4 356/* allocate dst with ip6_dst_ops */
d52d3997
MKL
357static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 struct net_device *dev,
ad706862 359 int flags)
1da177e4 360{
97bab73f 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 362 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 363
81eb8447 364 if (rt) {
ebfa45f0 365 rt6_info_init(rt);
81eb8447
WW
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
8104891b 368
cf911662 369 return rt;
1da177e4
LT
370}
371
9ab179d8
DA
372struct rt6_info *ip6_dst_alloc(struct net *net,
373 struct net_device *dev,
374 int flags)
d52d3997 375{
ad706862 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
377
378 if (rt) {
379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 380 if (!rt->rt6i_pcpu) {
587fea74 381 dst_release_immediate(&rt->dst);
d52d3997
MKL
382 return NULL;
383 }
384 }
385
386 return rt;
387}
9ab179d8 388EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 389
1da177e4
LT
390static void ip6_dst_destroy(struct dst_entry *dst)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 393 struct rt6_exception_bucket *bucket;
3a2232e9 394 struct rt6_info *from = rt->from;
8d0b94af 395 struct inet6_dev *idev;
1da177e4 396
4b32b5ad 397 dst_destroy_metrics_generic(dst);
87775312 398 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
399 rt6_uncached_list_del(rt);
400
401 idev = rt->rt6i_idev;
38308473 402 if (idev) {
1da177e4
LT
403 rt->rt6i_idev = NULL;
404 in6_dev_put(idev);
1ab1457c 405 }
35732d01
WW
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
1716a961 411
3a2232e9
DM
412 rt->from = NULL;
413 dst_release(&from->dst);
b3419363
DM
414}
415
1da177e4
LT
416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 int how)
418{
419 struct rt6_info *rt = (struct rt6_info *)dst;
420 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 421 struct net_device *loopback_dev =
c346dca1 422 dev_net(dev)->loopback_dev;
1da177e4 423
e5645f51
WW
424 if (idev && idev->dev != loopback_dev) {
425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 if (loopback_idev) {
427 rt->rt6i_idev = loopback_idev;
428 in6_dev_put(idev);
97cac082 429 }
1da177e4
LT
430 }
431}
432
5973fb1e
MKL
433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
a50feda5 441static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 442{
1716a961
G
443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
a50feda5 445 return true;
3a2232e9 446 } else if (rt->from) {
1e2ea8ad 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
3a2232e9 448 rt6_check_expired(rt->from);
1716a961 449 }
a50feda5 450 return false;
1da177e4
LT
451}
452
b4bac172
DA
453static struct rt6_info *rt6_multipath_select(const struct net *net,
454 struct rt6_info *match,
52bd4c0c 455 struct flowi6 *fl6, int oif,
b75cc8f9 456 const struct sk_buff *skb,
52bd4c0c 457 int strict)
51ebd318
ND
458{
459 struct rt6_info *sibling, *next_sibling;
51ebd318 460
b673d6cc
JS
461 /* We might have already computed the hash for ICMPv6 errors. In such
462 * case it will always be non-zero. Otherwise now is the time to do it.
463 */
464 if (!fl6->mp_hash)
b4bac172 465 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 466
3d709f69
IS
467 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
468 return match;
469
470 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
471 rt6i_siblings) {
472 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
473 continue;
474 if (rt6_score_route(sibling, oif, strict) < 0)
475 break;
476 match = sibling;
477 break;
478 }
479
51ebd318
ND
480 return match;
481}
482
1da177e4 483/*
66f5d6ce 484 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
485 */
486
8ed67789
DL
487static inline struct rt6_info *rt6_device_match(struct net *net,
488 struct rt6_info *rt,
b71d1d42 489 const struct in6_addr *saddr,
1da177e4 490 int oif,
d420895e 491 int flags)
1da177e4
LT
492{
493 struct rt6_info *local = NULL;
494 struct rt6_info *sprt;
495
8067bb8c
IS
496 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
497 return rt;
dd3abc4e 498
071fb37e 499 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
d1918542 500 struct net_device *dev = sprt->dst.dev;
dd3abc4e 501
8067bb8c
IS
502 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
503 continue;
504
dd3abc4e 505 if (oif) {
1da177e4
LT
506 if (dev->ifindex == oif)
507 return sprt;
508 if (dev->flags & IFF_LOOPBACK) {
38308473 509 if (!sprt->rt6i_idev ||
1da177e4 510 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 511 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 512 continue;
17fb0b2b
DA
513 if (local &&
514 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
515 continue;
516 }
517 local = sprt;
518 }
dd3abc4e
YH
519 } else {
520 if (ipv6_chk_addr(net, saddr, dev,
521 flags & RT6_LOOKUP_F_IFACE))
522 return sprt;
1da177e4 523 }
dd3abc4e 524 }
1da177e4 525
dd3abc4e 526 if (oif) {
1da177e4
LT
527 if (local)
528 return local;
529
d420895e 530 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 531 return net->ipv6.ip6_null_entry;
1da177e4 532 }
8067bb8c
IS
533
534 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
1da177e4
LT
535}
536
27097255 537#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
538struct __rt6_probe_work {
539 struct work_struct work;
540 struct in6_addr target;
541 struct net_device *dev;
542};
543
544static void rt6_probe_deferred(struct work_struct *w)
545{
546 struct in6_addr mcaddr;
547 struct __rt6_probe_work *work =
548 container_of(w, struct __rt6_probe_work, work);
549
550 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 551 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 552 dev_put(work->dev);
662f5533 553 kfree(work);
c2f17e82
HFS
554}
555
27097255
YH
556static void rt6_probe(struct rt6_info *rt)
557{
990edb42 558 struct __rt6_probe_work *work;
f2c31e32 559 struct neighbour *neigh;
27097255
YH
560 /*
561 * Okay, this does not seem to be appropriate
562 * for now, however, we need to check if it
563 * is really so; aka Router Reachability Probing.
564 *
565 * Router Reachability Probe MUST be rate-limited
566 * to no more than one per minute.
567 */
2152caea 568 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 569 return;
2152caea
YH
570 rcu_read_lock_bh();
571 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
572 if (neigh) {
8d6c31bf
MKL
573 if (neigh->nud_state & NUD_VALID)
574 goto out;
575
990edb42 576 work = NULL;
2152caea 577 write_lock(&neigh->lock);
990edb42
MKL
578 if (!(neigh->nud_state & NUD_VALID) &&
579 time_after(jiffies,
580 neigh->updated +
581 rt->rt6i_idev->cnf.rtr_probe_interval)) {
582 work = kmalloc(sizeof(*work), GFP_ATOMIC);
583 if (work)
584 __neigh_set_probe_once(neigh);
c2f17e82 585 }
2152caea 586 write_unlock(&neigh->lock);
990edb42
MKL
587 } else {
588 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 589 }
990edb42
MKL
590
591 if (work) {
592 INIT_WORK(&work->work, rt6_probe_deferred);
593 work->target = rt->rt6i_gateway;
594 dev_hold(rt->dst.dev);
595 work->dev = rt->dst.dev;
596 schedule_work(&work->work);
597 }
598
8d6c31bf 599out:
2152caea 600 rcu_read_unlock_bh();
27097255
YH
601}
602#else
603static inline void rt6_probe(struct rt6_info *rt)
604{
27097255
YH
605}
606#endif
607
1da177e4 608/*
554cfb7e 609 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 610 */
b6f99a21 611static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 612{
d1918542 613 struct net_device *dev = rt->dst.dev;
161980f4 614 if (!oif || dev->ifindex == oif)
554cfb7e 615 return 2;
161980f4
DM
616 if ((dev->flags & IFF_LOOPBACK) &&
617 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
618 return 1;
619 return 0;
554cfb7e 620}
1da177e4 621
afc154e9 622static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 623{
f2c31e32 624 struct neighbour *neigh;
afc154e9 625 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 626
4d0c5911
YH
627 if (rt->rt6i_flags & RTF_NONEXTHOP ||
628 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 629 return RT6_NUD_SUCCEED;
145a3621
YH
630
631 rcu_read_lock_bh();
632 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
633 if (neigh) {
634 read_lock(&neigh->lock);
554cfb7e 635 if (neigh->nud_state & NUD_VALID)
afc154e9 636 ret = RT6_NUD_SUCCEED;
398bcbeb 637#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 638 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 639 ret = RT6_NUD_SUCCEED;
7e980569
JB
640 else
641 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 642#endif
145a3621 643 read_unlock(&neigh->lock);
afc154e9
HFS
644 } else {
645 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 646 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 647 }
145a3621
YH
648 rcu_read_unlock_bh();
649
a5a81f0b 650 return ret;
1da177e4
LT
651}
652
554cfb7e
YH
653static int rt6_score_route(struct rt6_info *rt, int oif,
654 int strict)
1da177e4 655{
a5a81f0b 656 int m;
1ab1457c 657
4d0c5911 658 m = rt6_check_dev(rt, oif);
77d16f45 659 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 660 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
661#ifdef CONFIG_IPV6_ROUTER_PREF
662 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
663#endif
afc154e9
HFS
664 if (strict & RT6_LOOKUP_F_REACHABLE) {
665 int n = rt6_check_neigh(rt);
666 if (n < 0)
667 return n;
668 }
554cfb7e
YH
669 return m;
670}
671
f11e6659 672static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
673 int *mpri, struct rt6_info *match,
674 bool *do_rr)
554cfb7e 675{
f11e6659 676 int m;
afc154e9 677 bool match_do_rr = false;
35103d11 678 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 679
8067bb8c
IS
680 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
681 goto out;
682
14c5206c
IS
683 if (idev->cnf.ignore_routes_with_linkdown &&
684 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 685 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 686 goto out;
f11e6659
DM
687
688 if (rt6_check_expired(rt))
689 goto out;
690
691 m = rt6_score_route(rt, oif, strict);
7e980569 692 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
693 match_do_rr = true;
694 m = 0; /* lowest valid score */
7e980569 695 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 696 goto out;
afc154e9
HFS
697 }
698
699 if (strict & RT6_LOOKUP_F_REACHABLE)
700 rt6_probe(rt);
f11e6659 701
7e980569 702 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 703 if (m > *mpri) {
afc154e9 704 *do_rr = match_do_rr;
f11e6659
DM
705 *mpri = m;
706 match = rt;
f11e6659 707 }
f11e6659
DM
708out:
709 return match;
710}
711
712static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 713 struct rt6_info *leaf,
f11e6659 714 struct rt6_info *rr_head,
afc154e9
HFS
715 u32 metric, int oif, int strict,
716 bool *do_rr)
f11e6659 717{
9fbdcfaf 718 struct rt6_info *rt, *match, *cont;
554cfb7e 719 int mpri = -1;
1da177e4 720
f11e6659 721 match = NULL;
9fbdcfaf 722 cont = NULL;
071fb37e 723 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
724 if (rt->rt6i_metric != metric) {
725 cont = rt;
726 break;
727 }
728
729 match = find_match(rt, oif, strict, &mpri, match, do_rr);
730 }
731
66f5d6ce 732 for (rt = leaf; rt && rt != rr_head;
071fb37e 733 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
734 if (rt->rt6i_metric != metric) {
735 cont = rt;
736 break;
737 }
738
afc154e9 739 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
740 }
741
742 if (match || !cont)
743 return match;
744
071fb37e 745 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 746 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 747
f11e6659
DM
748 return match;
749}
1da177e4 750
8d1040e8
WW
751static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
752 int oif, int strict)
f11e6659 753{
66f5d6ce 754 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 755 struct rt6_info *match, *rt0;
afc154e9 756 bool do_rr = false;
17ecf590 757 int key_plen;
1da177e4 758
87b1af8d 759 if (!leaf || leaf == net->ipv6.ip6_null_entry)
8d1040e8
WW
760 return net->ipv6.ip6_null_entry;
761
66f5d6ce 762 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 763 if (!rt0)
66f5d6ce 764 rt0 = leaf;
1da177e4 765
17ecf590
WW
766 /* Double check to make sure fn is not an intermediate node
767 * and fn->leaf does not points to its child's leaf
768 * (This might happen if all routes under fn are deleted from
769 * the tree and fib6_repair_tree() is called on the node.)
770 */
771 key_plen = rt0->rt6i_dst.plen;
772#ifdef CONFIG_IPV6_SUBTREES
773 if (rt0->rt6i_src.plen)
774 key_plen = rt0->rt6i_src.plen;
775#endif
776 if (fn->fn_bit != key_plen)
777 return net->ipv6.ip6_null_entry;
778
8d1040e8 779 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 780 &do_rr);
1da177e4 781
afc154e9 782 if (do_rr) {
071fb37e 783 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 784
554cfb7e 785 /* no entries matched; do round-robin */
f11e6659 786 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 787 next = leaf;
f11e6659 788
66f5d6ce
WW
789 if (next != rt0) {
790 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
791 /* make sure next is not being deleted from the tree */
792 if (next->rt6i_node)
793 rcu_assign_pointer(fn->rr_ptr, next);
794 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
795 }
1da177e4 796 }
1da177e4 797
a02cec21 798 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
799}
800
8b9df265
MKL
801static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
802{
803 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
804}
805
70ceb4f5
YH
806#ifdef CONFIG_IPV6_ROUTE_INFO
807int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 808 const struct in6_addr *gwaddr)
70ceb4f5 809{
c346dca1 810 struct net *net = dev_net(dev);
70ceb4f5
YH
811 struct route_info *rinfo = (struct route_info *) opt;
812 struct in6_addr prefix_buf, *prefix;
813 unsigned int pref;
4bed72e4 814 unsigned long lifetime;
70ceb4f5
YH
815 struct rt6_info *rt;
816
817 if (len < sizeof(struct route_info)) {
818 return -EINVAL;
819 }
820
821 /* Sanity check for prefix_len and length */
822 if (rinfo->length > 3) {
823 return -EINVAL;
824 } else if (rinfo->prefix_len > 128) {
825 return -EINVAL;
826 } else if (rinfo->prefix_len > 64) {
827 if (rinfo->length < 2) {
828 return -EINVAL;
829 }
830 } else if (rinfo->prefix_len > 0) {
831 if (rinfo->length < 1) {
832 return -EINVAL;
833 }
834 }
835
836 pref = rinfo->route_pref;
837 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 838 return -EINVAL;
70ceb4f5 839
4bed72e4 840 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
841
842 if (rinfo->length == 3)
843 prefix = (struct in6_addr *)rinfo->prefix;
844 else {
845 /* this function is safe */
846 ipv6_addr_prefix(&prefix_buf,
847 (struct in6_addr *)rinfo->prefix,
848 rinfo->prefix_len);
849 prefix = &prefix_buf;
850 }
851
f104a567
DJ
852 if (rinfo->prefix_len == 0)
853 rt = rt6_get_dflt_router(gwaddr, dev);
854 else
855 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 856 gwaddr, dev);
70ceb4f5
YH
857
858 if (rt && !lifetime) {
e0a1ad73 859 ip6_del_rt(rt);
70ceb4f5
YH
860 rt = NULL;
861 }
862
863 if (!rt && lifetime)
830218c1
DA
864 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
865 dev, pref);
70ceb4f5
YH
866 else if (rt)
867 rt->rt6i_flags = RTF_ROUTEINFO |
868 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
869
870 if (rt) {
1716a961
G
871 if (!addrconf_finite_timeout(lifetime))
872 rt6_clean_expires(rt);
873 else
874 rt6_set_expires(rt, jiffies + HZ * lifetime);
875
94e187c0 876 ip6_rt_put(rt);
70ceb4f5
YH
877 }
878 return 0;
879}
880#endif
881
a3c00e46
MKL
882static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
883 struct in6_addr *saddr)
884{
66f5d6ce 885 struct fib6_node *pn, *sn;
a3c00e46
MKL
886 while (1) {
887 if (fn->fn_flags & RTN_TL_ROOT)
888 return NULL;
66f5d6ce
WW
889 pn = rcu_dereference(fn->parent);
890 sn = FIB6_SUBTREE(pn);
891 if (sn && sn != fn)
892 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
893 else
894 fn = pn;
895 if (fn->fn_flags & RTN_RTINFO)
896 return fn;
897 }
898}
c71099ac 899
d3843fe5
WW
900static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
901 bool null_fallback)
902{
903 struct rt6_info *rt = *prt;
904
905 if (dst_hold_safe(&rt->dst))
906 return true;
907 if (null_fallback) {
908 rt = net->ipv6.ip6_null_entry;
909 dst_hold(&rt->dst);
910 } else {
911 rt = NULL;
912 }
913 *prt = rt;
914 return false;
915}
916
8ed67789
DL
917static struct rt6_info *ip6_pol_route_lookup(struct net *net,
918 struct fib6_table *table,
b75cc8f9
DA
919 struct flowi6 *fl6,
920 const struct sk_buff *skb,
921 int flags)
1da177e4 922{
2b760fcf 923 struct rt6_info *rt, *rt_cache;
1da177e4 924 struct fib6_node *fn;
1da177e4 925
66f5d6ce 926 rcu_read_lock();
4c9483b2 927 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 928restart:
66f5d6ce
WW
929 rt = rcu_dereference(fn->leaf);
930 if (!rt) {
931 rt = net->ipv6.ip6_null_entry;
932 } else {
933 rt = rt6_device_match(net, rt, &fl6->saddr,
934 fl6->flowi6_oif, flags);
935 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
b4bac172 936 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
b75cc8f9 937 skb, flags);
66f5d6ce 938 }
a3c00e46
MKL
939 if (rt == net->ipv6.ip6_null_entry) {
940 fn = fib6_backtrack(fn, &fl6->saddr);
941 if (fn)
942 goto restart;
943 }
2b760fcf
WW
944 /* Search through exception table */
945 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
946 if (rt_cache)
947 rt = rt_cache;
948
d3843fe5
WW
949 if (ip6_hold_safe(net, &rt, true))
950 dst_use_noref(&rt->dst, jiffies);
951
66f5d6ce 952 rcu_read_unlock();
b811580d 953
b65f164d 954 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 955
c71099ac
TG
956 return rt;
957
958}
959
67ba4152 960struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 961 const struct sk_buff *skb, int flags)
ea6e574e 962{
b75cc8f9 963 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
964}
965EXPORT_SYMBOL_GPL(ip6_route_lookup);
966
9acd9f3a 967struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
968 const struct in6_addr *saddr, int oif,
969 const struct sk_buff *skb, int strict)
c71099ac 970{
4c9483b2
DM
971 struct flowi6 fl6 = {
972 .flowi6_oif = oif,
973 .daddr = *daddr,
c71099ac
TG
974 };
975 struct dst_entry *dst;
77d16f45 976 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 977
adaa70bb 978 if (saddr) {
4c9483b2 979 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
980 flags |= RT6_LOOKUP_F_HAS_SADDR;
981 }
982
b75cc8f9 983 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
984 if (dst->error == 0)
985 return (struct rt6_info *) dst;
986
987 dst_release(dst);
988
1da177e4
LT
989 return NULL;
990}
7159039a
YH
991EXPORT_SYMBOL(rt6_lookup);
992
c71099ac 993/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
994 * It takes new route entry, the addition fails by any reason the
995 * route is released.
996 * Caller must hold dst before calling it.
1da177e4
LT
997 */
998
e5fd387a 999static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
1000 struct mx6_config *mxc,
1001 struct netlink_ext_ack *extack)
1da177e4
LT
1002{
1003 int err;
c71099ac 1004 struct fib6_table *table;
1da177e4 1005
c71099ac 1006 table = rt->rt6i_table;
66f5d6ce 1007 spin_lock_bh(&table->tb6_lock);
333c4301 1008 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
66f5d6ce 1009 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1010
1011 return err;
1012}
1013
40e22e8f
TG
1014int ip6_ins_rt(struct rt6_info *rt)
1015{
e715b6d3
FW
1016 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
1017 struct mx6_config mxc = { .mx = NULL, };
1018
1cfb71ee
WW
1019 /* Hold dst to account for the reference from the fib6 tree */
1020 dst_hold(&rt->dst);
333c4301 1021 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
1022}
1023
4832c30d
DA
1024/* called with rcu_lock held */
1025static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1026{
1027 struct net_device *dev = rt->dst.dev;
1028
98d11291 1029 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
4832c30d
DA
1030 /* for copies of local routes, dst->dev needs to be the
1031 * device if it is a master device, the master device if
1032 * device is enslaved, and the loopback as the default
1033 */
1034 if (netif_is_l3_slave(dev) &&
1035 !rt6_need_strict(&rt->rt6i_dst.addr))
1036 dev = l3mdev_master_dev_rcu(dev);
1037 else if (!netif_is_l3_master(dev))
1038 dev = dev_net(dev)->loopback_dev;
1039 /* last case is netif_is_l3_master(dev) is true in which
1040 * case we want dev returned to be dev
1041 */
1042 }
1043
1044 return dev;
1045}
1046
8b9df265
MKL
1047static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1048 const struct in6_addr *daddr,
1049 const struct in6_addr *saddr)
1da177e4 1050{
4832c30d 1051 struct net_device *dev;
1da177e4
LT
1052 struct rt6_info *rt;
1053
1054 /*
1055 * Clone the route.
1056 */
1057
d52d3997 1058 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1059 ort = ort->from;
1da177e4 1060
4832c30d
DA
1061 rcu_read_lock();
1062 dev = ip6_rt_get_dev_rcu(ort);
1063 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1064 rcu_read_unlock();
83a09abd
MKL
1065 if (!rt)
1066 return NULL;
1067
1068 ip6_rt_copy_init(rt, ort);
1069 rt->rt6i_flags |= RTF_CACHE;
1070 rt->rt6i_metric = 0;
1071 rt->dst.flags |= DST_HOST;
1072 rt->rt6i_dst.addr = *daddr;
1073 rt->rt6i_dst.plen = 128;
1da177e4 1074
83a09abd
MKL
1075 if (!rt6_is_gw_or_nonexthop(ort)) {
1076 if (ort->rt6i_dst.plen != 128 &&
1077 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1078 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1079#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1080 if (rt->rt6i_src.plen && saddr) {
1081 rt->rt6i_src.addr = *saddr;
1082 rt->rt6i_src.plen = 128;
8b9df265 1083 }
83a09abd 1084#endif
95a9a5ba 1085 }
1da177e4 1086
95a9a5ba
YH
1087 return rt;
1088}
1da177e4 1089
d52d3997
MKL
1090static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1091{
4832c30d 1092 struct net_device *dev;
d52d3997
MKL
1093 struct rt6_info *pcpu_rt;
1094
4832c30d
DA
1095 rcu_read_lock();
1096 dev = ip6_rt_get_dev_rcu(rt);
1097 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1098 rcu_read_unlock();
d52d3997
MKL
1099 if (!pcpu_rt)
1100 return NULL;
1101 ip6_rt_copy_init(pcpu_rt, rt);
1102 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1103 pcpu_rt->rt6i_flags |= RTF_PCPU;
1104 return pcpu_rt;
1105}
1106
66f5d6ce 1107/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1108static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1109{
a73e4195 1110 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1111
1112 p = this_cpu_ptr(rt->rt6i_pcpu);
1113 pcpu_rt = *p;
1114
d3843fe5 1115 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1116 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1117
a73e4195
MKL
1118 return pcpu_rt;
1119}
1120
1121static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1122{
1123 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1124
1125 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1126 if (!pcpu_rt) {
1127 struct net *net = dev_net(rt->dst.dev);
1128
9c7370a1
MKL
1129 dst_hold(&net->ipv6.ip6_null_entry->dst);
1130 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1131 }
1132
a94b9367
WW
1133 dst_hold(&pcpu_rt->dst);
1134 p = this_cpu_ptr(rt->rt6i_pcpu);
1135 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1136 BUG_ON(prev);
a94b9367 1137
d52d3997
MKL
1138 rt6_dst_from_metrics_check(pcpu_rt);
1139 return pcpu_rt;
1140}
1141
35732d01
WW
1142/* exception hash table implementation
1143 */
1144static DEFINE_SPINLOCK(rt6_exception_lock);
1145
1146/* Remove rt6_ex from hash table and free the memory
1147 * Caller must hold rt6_exception_lock
1148 */
1149static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1150 struct rt6_exception *rt6_ex)
1151{
b2427e67 1152 struct net *net;
81eb8447 1153
35732d01
WW
1154 if (!bucket || !rt6_ex)
1155 return;
b2427e67
CIK
1156
1157 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1158 rt6_ex->rt6i->rt6i_node = NULL;
1159 hlist_del_rcu(&rt6_ex->hlist);
1160 rt6_release(rt6_ex->rt6i);
1161 kfree_rcu(rt6_ex, rcu);
1162 WARN_ON_ONCE(!bucket->depth);
1163 bucket->depth--;
81eb8447 1164 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1165}
1166
1167/* Remove oldest rt6_ex in bucket and free the memory
1168 * Caller must hold rt6_exception_lock
1169 */
1170static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1171{
1172 struct rt6_exception *rt6_ex, *oldest = NULL;
1173
1174 if (!bucket)
1175 return;
1176
1177 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1178 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1179 oldest = rt6_ex;
1180 }
1181 rt6_remove_exception(bucket, oldest);
1182}
1183
1184static u32 rt6_exception_hash(const struct in6_addr *dst,
1185 const struct in6_addr *src)
1186{
1187 static u32 seed __read_mostly;
1188 u32 val;
1189
1190 net_get_random_once(&seed, sizeof(seed));
1191 val = jhash(dst, sizeof(*dst), seed);
1192
1193#ifdef CONFIG_IPV6_SUBTREES
1194 if (src)
1195 val = jhash(src, sizeof(*src), val);
1196#endif
1197 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1198}
1199
1200/* Helper function to find the cached rt in the hash table
1201 * and update bucket pointer to point to the bucket for this
1202 * (daddr, saddr) pair
1203 * Caller must hold rt6_exception_lock
1204 */
1205static struct rt6_exception *
1206__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1207 const struct in6_addr *daddr,
1208 const struct in6_addr *saddr)
1209{
1210 struct rt6_exception *rt6_ex;
1211 u32 hval;
1212
1213 if (!(*bucket) || !daddr)
1214 return NULL;
1215
1216 hval = rt6_exception_hash(daddr, saddr);
1217 *bucket += hval;
1218
1219 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1220 struct rt6_info *rt6 = rt6_ex->rt6i;
1221 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1222
1223#ifdef CONFIG_IPV6_SUBTREES
1224 if (matched && saddr)
1225 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1226#endif
1227 if (matched)
1228 return rt6_ex;
1229 }
1230 return NULL;
1231}
1232
1233/* Helper function to find the cached rt in the hash table
1234 * and update bucket pointer to point to the bucket for this
1235 * (daddr, saddr) pair
1236 * Caller must hold rcu_read_lock()
1237 */
1238static struct rt6_exception *
1239__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1240 const struct in6_addr *daddr,
1241 const struct in6_addr *saddr)
1242{
1243 struct rt6_exception *rt6_ex;
1244 u32 hval;
1245
1246 WARN_ON_ONCE(!rcu_read_lock_held());
1247
1248 if (!(*bucket) || !daddr)
1249 return NULL;
1250
1251 hval = rt6_exception_hash(daddr, saddr);
1252 *bucket += hval;
1253
1254 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1255 struct rt6_info *rt6 = rt6_ex->rt6i;
1256 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1257
1258#ifdef CONFIG_IPV6_SUBTREES
1259 if (matched && saddr)
1260 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1261#endif
1262 if (matched)
1263 return rt6_ex;
1264 }
1265 return NULL;
1266}
1267
1268static int rt6_insert_exception(struct rt6_info *nrt,
1269 struct rt6_info *ort)
1270{
81eb8447 1271 struct net *net = dev_net(ort->dst.dev);
35732d01
WW
1272 struct rt6_exception_bucket *bucket;
1273 struct in6_addr *src_key = NULL;
1274 struct rt6_exception *rt6_ex;
1275 int err = 0;
1276
1277 /* ort can't be a cache or pcpu route */
1278 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1279 ort = ort->from;
35732d01
WW
1280 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1281
1282 spin_lock_bh(&rt6_exception_lock);
1283
1284 if (ort->exception_bucket_flushed) {
1285 err = -EINVAL;
1286 goto out;
1287 }
1288
1289 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1290 lockdep_is_held(&rt6_exception_lock));
1291 if (!bucket) {
1292 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1293 GFP_ATOMIC);
1294 if (!bucket) {
1295 err = -ENOMEM;
1296 goto out;
1297 }
1298 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1299 }
1300
1301#ifdef CONFIG_IPV6_SUBTREES
1302 /* rt6i_src.plen != 0 indicates ort is in subtree
1303 * and exception table is indexed by a hash of
1304 * both rt6i_dst and rt6i_src.
1305 * Otherwise, the exception table is indexed by
1306 * a hash of only rt6i_dst.
1307 */
1308 if (ort->rt6i_src.plen)
1309 src_key = &nrt->rt6i_src.addr;
1310#endif
60006a48
WW
1311
1312 /* Update rt6i_prefsrc as it could be changed
1313 * in rt6_remove_prefsrc()
1314 */
1315 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1316 /* rt6_mtu_change() might lower mtu on ort.
1317 * Only insert this exception route if its mtu
1318 * is less than ort's mtu value.
1319 */
1320 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1321 err = -EINVAL;
1322 goto out;
1323 }
60006a48 1324
35732d01
WW
1325 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1326 src_key);
1327 if (rt6_ex)
1328 rt6_remove_exception(bucket, rt6_ex);
1329
1330 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1331 if (!rt6_ex) {
1332 err = -ENOMEM;
1333 goto out;
1334 }
1335 rt6_ex->rt6i = nrt;
1336 rt6_ex->stamp = jiffies;
1337 atomic_inc(&nrt->rt6i_ref);
1338 nrt->rt6i_node = ort->rt6i_node;
1339 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1340 bucket->depth++;
81eb8447 1341 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1342
1343 if (bucket->depth > FIB6_MAX_DEPTH)
1344 rt6_exception_remove_oldest(bucket);
1345
1346out:
1347 spin_unlock_bh(&rt6_exception_lock);
1348
1349 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1350 if (!err) {
922c2ac8 1351 spin_lock_bh(&ort->rt6i_table->tb6_lock);
35732d01 1352 fib6_update_sernum(ort);
922c2ac8 1353 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1354 fib6_force_start_gc(net);
1355 }
35732d01
WW
1356
1357 return err;
1358}
1359
1360void rt6_flush_exceptions(struct rt6_info *rt)
1361{
1362 struct rt6_exception_bucket *bucket;
1363 struct rt6_exception *rt6_ex;
1364 struct hlist_node *tmp;
1365 int i;
1366
1367 spin_lock_bh(&rt6_exception_lock);
1368 /* Prevent rt6_insert_exception() to recreate the bucket list */
1369 rt->exception_bucket_flushed = 1;
1370
1371 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1372 lockdep_is_held(&rt6_exception_lock));
1373 if (!bucket)
1374 goto out;
1375
1376 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1377 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1378 rt6_remove_exception(bucket, rt6_ex);
1379 WARN_ON_ONCE(bucket->depth);
1380 bucket++;
1381 }
1382
1383out:
1384 spin_unlock_bh(&rt6_exception_lock);
1385}
1386
1387/* Find cached rt in the hash table inside passed in rt
1388 * Caller has to hold rcu_read_lock()
1389 */
1390static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1391 struct in6_addr *daddr,
1392 struct in6_addr *saddr)
1393{
1394 struct rt6_exception_bucket *bucket;
1395 struct in6_addr *src_key = NULL;
1396 struct rt6_exception *rt6_ex;
1397 struct rt6_info *res = NULL;
1398
1399 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1400
1401#ifdef CONFIG_IPV6_SUBTREES
1402 /* rt6i_src.plen != 0 indicates rt is in subtree
1403 * and exception table is indexed by a hash of
1404 * both rt6i_dst and rt6i_src.
1405 * Otherwise, the exception table is indexed by
1406 * a hash of only rt6i_dst.
1407 */
1408 if (rt->rt6i_src.plen)
1409 src_key = saddr;
1410#endif
1411 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1412
1413 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1414 res = rt6_ex->rt6i;
1415
1416 return res;
1417}
1418
1419/* Remove the passed in cached rt from the hash table that contains it */
1420int rt6_remove_exception_rt(struct rt6_info *rt)
1421{
35732d01 1422 struct rt6_exception_bucket *bucket;
3a2232e9 1423 struct rt6_info *from = rt->from;
35732d01
WW
1424 struct in6_addr *src_key = NULL;
1425 struct rt6_exception *rt6_ex;
1426 int err;
1427
1428 if (!from ||
442d713b 1429 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1430 return -EINVAL;
1431
1432 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1433 return -ENOENT;
1434
1435 spin_lock_bh(&rt6_exception_lock);
1436 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1437 lockdep_is_held(&rt6_exception_lock));
1438#ifdef CONFIG_IPV6_SUBTREES
1439 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1440 * and exception table is indexed by a hash of
1441 * both rt6i_dst and rt6i_src.
1442 * Otherwise, the exception table is indexed by
1443 * a hash of only rt6i_dst.
1444 */
1445 if (from->rt6i_src.plen)
1446 src_key = &rt->rt6i_src.addr;
1447#endif
1448 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1449 &rt->rt6i_dst.addr,
1450 src_key);
1451 if (rt6_ex) {
1452 rt6_remove_exception(bucket, rt6_ex);
1453 err = 0;
1454 } else {
1455 err = -ENOENT;
1456 }
1457
1458 spin_unlock_bh(&rt6_exception_lock);
1459 return err;
1460}
1461
1462/* Find rt6_ex which contains the passed in rt cache and
1463 * refresh its stamp
1464 */
1465static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1466{
35732d01 1467 struct rt6_exception_bucket *bucket;
3a2232e9 1468 struct rt6_info *from = rt->from;
35732d01
WW
1469 struct in6_addr *src_key = NULL;
1470 struct rt6_exception *rt6_ex;
1471
1472 if (!from ||
442d713b 1473 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1474 return;
1475
1476 rcu_read_lock();
1477 bucket = rcu_dereference(from->rt6i_exception_bucket);
1478
1479#ifdef CONFIG_IPV6_SUBTREES
1480 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1481 * and exception table is indexed by a hash of
1482 * both rt6i_dst and rt6i_src.
1483 * Otherwise, the exception table is indexed by
1484 * a hash of only rt6i_dst.
1485 */
1486 if (from->rt6i_src.plen)
1487 src_key = &rt->rt6i_src.addr;
1488#endif
1489 rt6_ex = __rt6_find_exception_rcu(&bucket,
1490 &rt->rt6i_dst.addr,
1491 src_key);
1492 if (rt6_ex)
1493 rt6_ex->stamp = jiffies;
1494
1495 rcu_read_unlock();
1496}
1497
60006a48
WW
1498static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1499{
1500 struct rt6_exception_bucket *bucket;
1501 struct rt6_exception *rt6_ex;
1502 int i;
1503
1504 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1505 lockdep_is_held(&rt6_exception_lock));
1506
1507 if (bucket) {
1508 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1509 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1510 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1511 }
1512 bucket++;
1513 }
1514 }
1515}
1516
f5bbe7ee
WW
1517static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1518{
1519 struct rt6_exception_bucket *bucket;
1520 struct rt6_exception *rt6_ex;
1521 int i;
1522
1523 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1524 lockdep_is_held(&rt6_exception_lock));
1525
1526 if (bucket) {
1527 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1528 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1529 struct rt6_info *entry = rt6_ex->rt6i;
1530 /* For RTF_CACHE with rt6i_pmtu == 0
1531 * (i.e. a redirected route),
1532 * the metrics of its rt->dst.from has already
1533 * been updated.
1534 */
1535 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1536 entry->rt6i_pmtu = mtu;
1537 }
1538 bucket++;
1539 }
1540 }
1541}
1542
b16cb459
WW
1543#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1544
1545static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1546 struct in6_addr *gateway)
1547{
1548 struct rt6_exception_bucket *bucket;
1549 struct rt6_exception *rt6_ex;
1550 struct hlist_node *tmp;
1551 int i;
1552
1553 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1554 return;
1555
1556 spin_lock_bh(&rt6_exception_lock);
1557 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1558 lockdep_is_held(&rt6_exception_lock));
1559
1560 if (bucket) {
1561 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1562 hlist_for_each_entry_safe(rt6_ex, tmp,
1563 &bucket->chain, hlist) {
1564 struct rt6_info *entry = rt6_ex->rt6i;
1565
1566 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1567 RTF_CACHE_GATEWAY &&
1568 ipv6_addr_equal(gateway,
1569 &entry->rt6i_gateway)) {
1570 rt6_remove_exception(bucket, rt6_ex);
1571 }
1572 }
1573 bucket++;
1574 }
1575 }
1576
1577 spin_unlock_bh(&rt6_exception_lock);
1578}
1579
c757faa8
WW
1580static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1581 struct rt6_exception *rt6_ex,
1582 struct fib6_gc_args *gc_args,
1583 unsigned long now)
1584{
1585 struct rt6_info *rt = rt6_ex->rt6i;
1586
1859bac0
PA
1587 /* we are pruning and obsoleting aged-out and non gateway exceptions
1588 * even if others have still references to them, so that on next
1589 * dst_check() such references can be dropped.
1590 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1591 * expired, independently from their aging, as per RFC 8201 section 4
1592 */
31afeb42
WW
1593 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1594 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1595 RT6_TRACE("aging clone %p\n", rt);
1596 rt6_remove_exception(bucket, rt6_ex);
1597 return;
1598 }
1599 } else if (time_after(jiffies, rt->dst.expires)) {
1600 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1601 rt6_remove_exception(bucket, rt6_ex);
1602 return;
31afeb42
WW
1603 }
1604
1605 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1606 struct neighbour *neigh;
1607 __u8 neigh_flags = 0;
1608
1609 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1610 if (neigh) {
1611 neigh_flags = neigh->flags;
1612 neigh_release(neigh);
1613 }
1614 if (!(neigh_flags & NTF_ROUTER)) {
1615 RT6_TRACE("purging route %p via non-router but gateway\n",
1616 rt);
1617 rt6_remove_exception(bucket, rt6_ex);
1618 return;
1619 }
1620 }
31afeb42 1621
c757faa8
WW
1622 gc_args->more++;
1623}
1624
1625void rt6_age_exceptions(struct rt6_info *rt,
1626 struct fib6_gc_args *gc_args,
1627 unsigned long now)
1628{
1629 struct rt6_exception_bucket *bucket;
1630 struct rt6_exception *rt6_ex;
1631 struct hlist_node *tmp;
1632 int i;
1633
1634 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1635 return;
1636
1637 spin_lock_bh(&rt6_exception_lock);
1638 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1639 lockdep_is_held(&rt6_exception_lock));
1640
1641 if (bucket) {
1642 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1643 hlist_for_each_entry_safe(rt6_ex, tmp,
1644 &bucket->chain, hlist) {
1645 rt6_age_examine_exception(bucket, rt6_ex,
1646 gc_args, now);
1647 }
1648 bucket++;
1649 }
1650 }
1651 spin_unlock_bh(&rt6_exception_lock);
1652}
1653
9ff74384 1654struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1655 int oif, struct flowi6 *fl6,
1656 const struct sk_buff *skb, int flags)
1da177e4 1657{
367efcb9 1658 struct fib6_node *fn, *saved_fn;
2b760fcf 1659 struct rt6_info *rt, *rt_cache;
c71099ac 1660 int strict = 0;
1da177e4 1661
77d16f45 1662 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1663 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1664 if (net->ipv6.devconf_all->forwarding == 0)
1665 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1666
66f5d6ce 1667 rcu_read_lock();
1da177e4 1668
4c9483b2 1669 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1670 saved_fn = fn;
1da177e4 1671
ca254490
DA
1672 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1673 oif = 0;
1674
a3c00e46 1675redo_rt6_select:
8d1040e8 1676 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1677 if (rt->rt6i_nsiblings)
b4bac172 1678 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
a3c00e46
MKL
1679 if (rt == net->ipv6.ip6_null_entry) {
1680 fn = fib6_backtrack(fn, &fl6->saddr);
1681 if (fn)
1682 goto redo_rt6_select;
367efcb9
MKL
1683 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1684 /* also consider unreachable route */
1685 strict &= ~RT6_LOOKUP_F_REACHABLE;
1686 fn = saved_fn;
1687 goto redo_rt6_select;
367efcb9 1688 }
a3c00e46
MKL
1689 }
1690
2b760fcf
WW
1691 /*Search through exception table */
1692 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1693 if (rt_cache)
1694 rt = rt_cache;
fb9de91e 1695
d3843fe5 1696 if (rt == net->ipv6.ip6_null_entry) {
66f5d6ce 1697 rcu_read_unlock();
d3843fe5 1698 dst_hold(&rt->dst);
b65f164d 1699 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1700 return rt;
1701 } else if (rt->rt6i_flags & RTF_CACHE) {
1702 if (ip6_hold_safe(net, &rt, true)) {
1703 dst_use_noref(&rt->dst, jiffies);
1704 rt6_dst_from_metrics_check(rt);
1705 }
66f5d6ce 1706 rcu_read_unlock();
b65f164d 1707 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1708 return rt;
3da59bd9
MKL
1709 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1710 !(rt->rt6i_flags & RTF_GATEWAY))) {
1711 /* Create a RTF_CACHE clone which will not be
1712 * owned by the fib6 tree. It is for the special case where
1713 * the daddr in the skb during the neighbor look-up is different
1714 * from the fl6->daddr used to look-up route here.
1715 */
1716
1717 struct rt6_info *uncached_rt;
1718
d3843fe5
WW
1719 if (ip6_hold_safe(net, &rt, true)) {
1720 dst_use_noref(&rt->dst, jiffies);
1721 } else {
66f5d6ce 1722 rcu_read_unlock();
d3843fe5
WW
1723 uncached_rt = rt;
1724 goto uncached_rt_out;
1725 }
66f5d6ce 1726 rcu_read_unlock();
d52d3997 1727
3da59bd9
MKL
1728 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1729 dst_release(&rt->dst);
c71099ac 1730
1cfb71ee
WW
1731 if (uncached_rt) {
1732 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1733 * No need for another dst_hold()
1734 */
8d0b94af 1735 rt6_uncached_list_add(uncached_rt);
81eb8447 1736 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1737 } else {
3da59bd9 1738 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1739 dst_hold(&uncached_rt->dst);
1740 }
b811580d 1741
d3843fe5 1742uncached_rt_out:
b65f164d 1743 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1744 return uncached_rt;
3da59bd9 1745
d52d3997
MKL
1746 } else {
1747 /* Get a percpu copy */
1748
1749 struct rt6_info *pcpu_rt;
1750
d3843fe5 1751 dst_use_noref(&rt->dst, jiffies);
951f788a 1752 local_bh_disable();
d52d3997 1753 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1754
951f788a 1755 if (!pcpu_rt) {
a94b9367
WW
1756 /* atomic_inc_not_zero() is needed when using rcu */
1757 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1758 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1759 * rt->rt6i_ref makes sure rt can't be released.
1760 */
a94b9367
WW
1761 pcpu_rt = rt6_make_pcpu_route(rt);
1762 rt6_release(rt);
1763 } else {
1764 /* rt is already removed from tree */
a94b9367
WW
1765 pcpu_rt = net->ipv6.ip6_null_entry;
1766 dst_hold(&pcpu_rt->dst);
1767 }
9c7370a1 1768 }
951f788a
ED
1769 local_bh_enable();
1770 rcu_read_unlock();
b65f164d 1771 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1772 return pcpu_rt;
1773 }
1da177e4 1774}
9ff74384 1775EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1776
b75cc8f9
DA
1777static struct rt6_info *ip6_pol_route_input(struct net *net,
1778 struct fib6_table *table,
1779 struct flowi6 *fl6,
1780 const struct sk_buff *skb,
1781 int flags)
4acad72d 1782{
b75cc8f9 1783 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1784}
1785
d409b847
MB
1786struct dst_entry *ip6_route_input_lookup(struct net *net,
1787 struct net_device *dev,
b75cc8f9
DA
1788 struct flowi6 *fl6,
1789 const struct sk_buff *skb,
1790 int flags)
72331bc0
SL
1791{
1792 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1793 flags |= RT6_LOOKUP_F_IFACE;
1794
b75cc8f9 1795 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1796}
d409b847 1797EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1798
23aebdac 1799static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1800 struct flow_keys *keys,
1801 struct flow_keys *flkeys)
23aebdac
JS
1802{
1803 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1804 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1805 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1806 const struct ipv6hdr *inner_iph;
1807 const struct icmp6hdr *icmph;
1808 struct ipv6hdr _inner_iph;
1809
1810 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1811 goto out;
1812
1813 icmph = icmp6_hdr(skb);
1814 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1815 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1816 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1817 icmph->icmp6_type != ICMPV6_PARAMPROB)
1818 goto out;
1819
1820 inner_iph = skb_header_pointer(skb,
1821 skb_transport_offset(skb) + sizeof(*icmph),
1822 sizeof(_inner_iph), &_inner_iph);
1823 if (!inner_iph)
1824 goto out;
1825
1826 key_iph = inner_iph;
5e5d6fed 1827 _flkeys = NULL;
23aebdac 1828out:
5e5d6fed
RP
1829 if (_flkeys) {
1830 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1831 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1832 keys->tags.flow_label = _flkeys->tags.flow_label;
1833 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1834 } else {
1835 keys->addrs.v6addrs.src = key_iph->saddr;
1836 keys->addrs.v6addrs.dst = key_iph->daddr;
1837 keys->tags.flow_label = ip6_flowinfo(key_iph);
1838 keys->basic.ip_proto = key_iph->nexthdr;
1839 }
23aebdac
JS
1840}
1841
1842/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1843u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1844 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1845{
1846 struct flow_keys hash_keys;
9a2a537a 1847 u32 mhash;
23aebdac 1848
bbfa047a 1849 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1850 case 0:
1851 memset(&hash_keys, 0, sizeof(hash_keys));
1852 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1853 if (skb) {
1854 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1855 } else {
1856 hash_keys.addrs.v6addrs.src = fl6->saddr;
1857 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1858 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1859 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1860 }
1861 break;
1862 case 1:
1863 if (skb) {
1864 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1865 struct flow_keys keys;
1866
1867 /* short-circuit if we already have L4 hash present */
1868 if (skb->l4_hash)
1869 return skb_get_hash_raw(skb) >> 1;
1870
1871 memset(&hash_keys, 0, sizeof(hash_keys));
1872
1873 if (!flkeys) {
1874 skb_flow_dissect_flow_keys(skb, &keys, flag);
1875 flkeys = &keys;
1876 }
1877 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1878 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
1879 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
1880 hash_keys.ports.src = flkeys->ports.src;
1881 hash_keys.ports.dst = flkeys->ports.dst;
1882 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
1883 } else {
1884 memset(&hash_keys, 0, sizeof(hash_keys));
1885 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1886 hash_keys.addrs.v6addrs.src = fl6->saddr;
1887 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1888 hash_keys.ports.src = fl6->fl6_sport;
1889 hash_keys.ports.dst = fl6->fl6_dport;
1890 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1891 }
1892 break;
23aebdac 1893 }
9a2a537a 1894 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 1895
9a2a537a 1896 return mhash >> 1;
23aebdac
JS
1897}
1898
c71099ac
TG
1899void ip6_route_input(struct sk_buff *skb)
1900{
b71d1d42 1901 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1902 struct net *net = dev_net(skb->dev);
adaa70bb 1903 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1904 struct ip_tunnel_info *tun_info;
4c9483b2 1905 struct flowi6 fl6 = {
e0d56fdd 1906 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1907 .daddr = iph->daddr,
1908 .saddr = iph->saddr,
6502ca52 1909 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1910 .flowi6_mark = skb->mark,
1911 .flowi6_proto = iph->nexthdr,
c71099ac 1912 };
5e5d6fed 1913 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 1914
904af04d 1915 tun_info = skb_tunnel_info(skb);
46fa062a 1916 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1917 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
1918
1919 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
1920 flkeys = &_flkeys;
1921
23aebdac 1922 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 1923 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 1924 skb_dst_drop(skb);
b75cc8f9
DA
1925 skb_dst_set(skb,
1926 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
1927}
1928
b75cc8f9
DA
1929static struct rt6_info *ip6_pol_route_output(struct net *net,
1930 struct fib6_table *table,
1931 struct flowi6 *fl6,
1932 const struct sk_buff *skb,
1933 int flags)
1da177e4 1934{
b75cc8f9 1935 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
1936}
1937
6f21c96a
PA
1938struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1939 struct flowi6 *fl6, int flags)
c71099ac 1940{
d46a9d67 1941 bool any_src;
c71099ac 1942
4c1feac5
DA
1943 if (rt6_need_strict(&fl6->daddr)) {
1944 struct dst_entry *dst;
1945
1946 dst = l3mdev_link_scope_lookup(net, fl6);
1947 if (dst)
1948 return dst;
1949 }
ca254490 1950
1fb9489b 1951 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1952
d46a9d67 1953 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1954 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1955 (fl6->flowi6_oif && any_src))
77d16f45 1956 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1957
d46a9d67 1958 if (!any_src)
adaa70bb 1959 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1960 else if (sk)
1961 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1962
b75cc8f9 1963 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 1964}
6f21c96a 1965EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1966
2774c131 1967struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1968{
5c1e6aa3 1969 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1970 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1971 struct dst_entry *new = NULL;
1972
1dbe3252 1973 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 1974 DST_OBSOLETE_DEAD, 0);
14e50e57 1975 if (rt) {
0a1f5962 1976 rt6_info_init(rt);
81eb8447 1977 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 1978
0a1f5962 1979 new = &rt->dst;
14e50e57 1980 new->__use = 1;
352e512c 1981 new->input = dst_discard;
ede2059d 1982 new->output = dst_discard_out;
14e50e57 1983
0a1f5962 1984 dst_copy_metrics(new, &ort->dst);
14e50e57 1985
1dbe3252 1986 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1987 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1988 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1989 rt->rt6i_metric = 0;
1990
1991 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1992#ifdef CONFIG_IPV6_SUBTREES
1993 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1994#endif
14e50e57
DM
1995 }
1996
69ead7af
DM
1997 dst_release(dst_orig);
1998 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1999}
14e50e57 2000
1da177e4
LT
2001/*
2002 * Destination cache support functions
2003 */
2004
4b32b5ad
MKL
2005static void rt6_dst_from_metrics_check(struct rt6_info *rt)
2006{
3a2232e9
DM
2007 if (rt->from &&
2008 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
2009 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
4b32b5ad
MKL
2010}
2011
3da59bd9
MKL
2012static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2013{
36143645 2014 u32 rt_cookie = 0;
c5cff856
WW
2015
2016 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
2017 return NULL;
2018
2019 if (rt6_check_expired(rt))
2020 return NULL;
2021
2022 return &rt->dst;
2023}
2024
2025static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2026{
5973fb1e
MKL
2027 if (!__rt6_check_expired(rt) &&
2028 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 2029 rt6_check(rt->from, cookie))
3da59bd9
MKL
2030 return &rt->dst;
2031 else
2032 return NULL;
2033}
2034
1da177e4
LT
2035static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2036{
2037 struct rt6_info *rt;
2038
2039 rt = (struct rt6_info *) dst;
2040
6f3118b5
ND
2041 /* All IPV6 dsts are created with ->obsolete set to the value
2042 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2043 * into this function always.
2044 */
e3bc10bd 2045
4b32b5ad
MKL
2046 rt6_dst_from_metrics_check(rt);
2047
02bcf4e0 2048 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2049 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2050 return rt6_dst_from_check(rt, cookie);
2051 else
2052 return rt6_check(rt, cookie);
1da177e4
LT
2053}
2054
2055static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2056{
2057 struct rt6_info *rt = (struct rt6_info *) dst;
2058
2059 if (rt) {
54c1a859
YH
2060 if (rt->rt6i_flags & RTF_CACHE) {
2061 if (rt6_check_expired(rt)) {
2062 ip6_del_rt(rt);
2063 dst = NULL;
2064 }
2065 } else {
1da177e4 2066 dst_release(dst);
54c1a859
YH
2067 dst = NULL;
2068 }
1da177e4 2069 }
54c1a859 2070 return dst;
1da177e4
LT
2071}
2072
2073static void ip6_link_failure(struct sk_buff *skb)
2074{
2075 struct rt6_info *rt;
2076
3ffe533c 2077 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2078
adf30907 2079 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2080 if (rt) {
1eb4f758 2081 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
2082 if (dst_hold_safe(&rt->dst))
2083 ip6_del_rt(rt);
c5cff856
WW
2084 } else {
2085 struct fib6_node *fn;
2086
2087 rcu_read_lock();
2088 fn = rcu_dereference(rt->rt6i_node);
2089 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2090 fn->fn_sernum = -1;
2091 rcu_read_unlock();
1eb4f758 2092 }
1da177e4
LT
2093 }
2094}
2095
45e4fd26
MKL
2096static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2097{
2098 struct net *net = dev_net(rt->dst.dev);
2099
2100 rt->rt6i_flags |= RTF_MODIFIED;
2101 rt->rt6i_pmtu = mtu;
2102 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2103}
2104
0d3f6d29
MKL
2105static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2106{
2107 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2108 (rt->rt6i_flags & RTF_PCPU ||
2109 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2110}
2111
45e4fd26
MKL
2112static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2113 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2114{
0dec879f 2115 const struct in6_addr *daddr, *saddr;
67ba4152 2116 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2117
45e4fd26
MKL
2118 if (rt6->rt6i_flags & RTF_LOCAL)
2119 return;
81aded24 2120
19bda36c
XL
2121 if (dst_metric_locked(dst, RTAX_MTU))
2122 return;
2123
0dec879f
JA
2124 if (iph) {
2125 daddr = &iph->daddr;
2126 saddr = &iph->saddr;
2127 } else if (sk) {
2128 daddr = &sk->sk_v6_daddr;
2129 saddr = &inet6_sk(sk)->saddr;
2130 } else {
2131 daddr = NULL;
2132 saddr = NULL;
2133 }
2134 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2135 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2136 if (mtu >= dst_mtu(dst))
2137 return;
9d289715 2138
0d3f6d29 2139 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2140 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2141 /* update rt6_ex->stamp for cache */
2142 if (rt6->rt6i_flags & RTF_CACHE)
2143 rt6_update_exception_stamp_rt(rt6);
0dec879f 2144 } else if (daddr) {
45e4fd26
MKL
2145 struct rt6_info *nrt6;
2146
45e4fd26
MKL
2147 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2148 if (nrt6) {
2149 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2150 if (rt6_insert_exception(nrt6, rt6))
2151 dst_release_immediate(&nrt6->dst);
45e4fd26 2152 }
1da177e4
LT
2153 }
2154}
2155
45e4fd26
MKL
2156static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2157 struct sk_buff *skb, u32 mtu)
2158{
2159 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2160}
2161
42ae66c8 2162void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2163 int oif, u32 mark, kuid_t uid)
81aded24
DM
2164{
2165 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2166 struct dst_entry *dst;
2167 struct flowi6 fl6;
2168
2169 memset(&fl6, 0, sizeof(fl6));
2170 fl6.flowi6_oif = oif;
1b3c61dc 2171 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2172 fl6.daddr = iph->daddr;
2173 fl6.saddr = iph->saddr;
6502ca52 2174 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2175 fl6.flowi6_uid = uid;
81aded24
DM
2176
2177 dst = ip6_route_output(net, NULL, &fl6);
2178 if (!dst->error)
45e4fd26 2179 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2180 dst_release(dst);
2181}
2182EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2183
2184void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2185{
33c162a9
MKL
2186 struct dst_entry *dst;
2187
81aded24 2188 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2189 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2190
2191 dst = __sk_dst_get(sk);
2192 if (!dst || !dst->obsolete ||
2193 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2194 return;
2195
2196 bh_lock_sock(sk);
2197 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2198 ip6_datagram_dst_update(sk, false);
2199 bh_unlock_sock(sk);
81aded24
DM
2200}
2201EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2202
b55b76b2
DJ
2203/* Handle redirects */
2204struct ip6rd_flowi {
2205 struct flowi6 fl6;
2206 struct in6_addr gateway;
2207};
2208
2209static struct rt6_info *__ip6_route_redirect(struct net *net,
2210 struct fib6_table *table,
2211 struct flowi6 *fl6,
b75cc8f9 2212 const struct sk_buff *skb,
b55b76b2
DJ
2213 int flags)
2214{
2215 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2216 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2217 struct fib6_node *fn;
2218
2219 /* Get the "current" route for this destination and
67c408cf 2220 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2221 *
2222 * RFC 4861 specifies that redirects should only be
2223 * accepted if they come from the nexthop to the target.
2224 * Due to the way the routes are chosen, this notion
2225 * is a bit fuzzy and one might need to check all possible
2226 * routes.
2227 */
2228
66f5d6ce 2229 rcu_read_lock();
b55b76b2
DJ
2230 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2231restart:
66f5d6ce 2232 for_each_fib6_node_rt_rcu(fn) {
8067bb8c
IS
2233 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2234 continue;
b55b76b2
DJ
2235 if (rt6_check_expired(rt))
2236 continue;
2237 if (rt->dst.error)
2238 break;
2239 if (!(rt->rt6i_flags & RTF_GATEWAY))
2240 continue;
2241 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2242 continue;
2b760fcf
WW
2243 /* rt_cache's gateway might be different from its 'parent'
2244 * in the case of an ip redirect.
2245 * So we keep searching in the exception table if the gateway
2246 * is different.
2247 */
2248 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2249 rt_cache = rt6_find_cached_rt(rt,
2250 &fl6->daddr,
2251 &fl6->saddr);
2252 if (rt_cache &&
2253 ipv6_addr_equal(&rdfl->gateway,
2254 &rt_cache->rt6i_gateway)) {
2255 rt = rt_cache;
2256 break;
2257 }
b55b76b2 2258 continue;
2b760fcf 2259 }
b55b76b2
DJ
2260 break;
2261 }
2262
2263 if (!rt)
2264 rt = net->ipv6.ip6_null_entry;
2265 else if (rt->dst.error) {
2266 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2267 goto out;
2268 }
2269
2270 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2271 fn = fib6_backtrack(fn, &fl6->saddr);
2272 if (fn)
2273 goto restart;
b55b76b2 2274 }
a3c00e46 2275
b0a1ba59 2276out:
d3843fe5 2277 ip6_hold_safe(net, &rt, true);
b55b76b2 2278
66f5d6ce 2279 rcu_read_unlock();
b55b76b2 2280
b65f164d 2281 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2282 return rt;
2283};
2284
2285static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2286 const struct flowi6 *fl6,
2287 const struct sk_buff *skb,
2288 const struct in6_addr *gateway)
b55b76b2
DJ
2289{
2290 int flags = RT6_LOOKUP_F_HAS_SADDR;
2291 struct ip6rd_flowi rdfl;
2292
2293 rdfl.fl6 = *fl6;
2294 rdfl.gateway = *gateway;
2295
b75cc8f9 2296 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2297 flags, __ip6_route_redirect);
2298}
2299
e2d118a1
LC
2300void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2301 kuid_t uid)
3a5ad2ee
DM
2302{
2303 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2304 struct dst_entry *dst;
2305 struct flowi6 fl6;
2306
2307 memset(&fl6, 0, sizeof(fl6));
e374c618 2308 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2309 fl6.flowi6_oif = oif;
2310 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2311 fl6.daddr = iph->daddr;
2312 fl6.saddr = iph->saddr;
6502ca52 2313 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2314 fl6.flowi6_uid = uid;
3a5ad2ee 2315
b75cc8f9 2316 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2317 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2318 dst_release(dst);
2319}
2320EXPORT_SYMBOL_GPL(ip6_redirect);
2321
c92a59ec
DJ
2322void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2323 u32 mark)
2324{
2325 const struct ipv6hdr *iph = ipv6_hdr(skb);
2326 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2327 struct dst_entry *dst;
2328 struct flowi6 fl6;
2329
2330 memset(&fl6, 0, sizeof(fl6));
e374c618 2331 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2332 fl6.flowi6_oif = oif;
2333 fl6.flowi6_mark = mark;
c92a59ec
DJ
2334 fl6.daddr = msg->dest;
2335 fl6.saddr = iph->daddr;
e2d118a1 2336 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2337
b75cc8f9 2338 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2339 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2340 dst_release(dst);
2341}
2342
3a5ad2ee
DM
2343void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2344{
e2d118a1
LC
2345 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2346 sk->sk_uid);
3a5ad2ee
DM
2347}
2348EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2349
0dbaee3b 2350static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2351{
0dbaee3b
DM
2352 struct net_device *dev = dst->dev;
2353 unsigned int mtu = dst_mtu(dst);
2354 struct net *net = dev_net(dev);
2355
1da177e4
LT
2356 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2357
5578689a
DL
2358 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2359 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2360
2361 /*
1ab1457c
YH
2362 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2363 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2364 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2365 * rely only on pmtu discovery"
2366 */
2367 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2368 mtu = IPV6_MAXPLEN;
2369 return mtu;
2370}
2371
ebb762f2 2372static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2373{
4b32b5ad
MKL
2374 const struct rt6_info *rt = (const struct rt6_info *)dst;
2375 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2376 struct inet6_dev *idev;
618f9bc7 2377
4b32b5ad
MKL
2378 if (mtu)
2379 goto out;
2380
2381 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2382 if (mtu)
30f78d8e 2383 goto out;
618f9bc7
SK
2384
2385 mtu = IPV6_MIN_MTU;
d33e4553
DM
2386
2387 rcu_read_lock();
2388 idev = __in6_dev_get(dst->dev);
2389 if (idev)
2390 mtu = idev->cnf.mtu6;
2391 rcu_read_unlock();
2392
30f78d8e 2393out:
14972cbd
RP
2394 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2395
2396 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2397}
2398
3b00944c 2399struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2400 struct flowi6 *fl6)
1da177e4 2401{
87a11578 2402 struct dst_entry *dst;
1da177e4
LT
2403 struct rt6_info *rt;
2404 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2405 struct net *net = dev_net(dev);
1da177e4 2406
38308473 2407 if (unlikely(!idev))
122bdf67 2408 return ERR_PTR(-ENODEV);
1da177e4 2409
ad706862 2410 rt = ip6_dst_alloc(net, dev, 0);
38308473 2411 if (unlikely(!rt)) {
1da177e4 2412 in6_dev_put(idev);
87a11578 2413 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2414 goto out;
2415 }
2416
8e2ec639 2417 rt->dst.flags |= DST_HOST;
588753f1 2418 rt->dst.input = ip6_input;
8e2ec639 2419 rt->dst.output = ip6_output;
550bab42 2420 rt->rt6i_gateway = fl6->daddr;
87a11578 2421 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2422 rt->rt6i_dst.plen = 128;
2423 rt->rt6i_idev = idev;
14edd87d 2424 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2425
4c981e28 2426 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2427 * do proper release of the net_device
2428 */
2429 rt6_uncached_list_add(rt);
81eb8447 2430 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2431
87a11578
DM
2432 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2433
1da177e4 2434out:
87a11578 2435 return dst;
1da177e4
LT
2436}
2437
569d3645 2438static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2439{
86393e52 2440 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2441 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2442 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2443 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2444 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2445 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2446 int entries;
7019b78e 2447
fc66f95c 2448 entries = dst_entries_get_fast(ops);
49a18d86 2449 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2450 entries <= rt_max_size)
1da177e4
LT
2451 goto out;
2452
6891a346 2453 net->ipv6.ip6_rt_gc_expire++;
14956643 2454 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2455 entries = dst_entries_get_slow(ops);
2456 if (entries < ops->gc_thresh)
7019b78e 2457 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2458out:
7019b78e 2459 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2460 return entries > rt_max_size;
1da177e4
LT
2461}
2462
e715b6d3
FW
2463static int ip6_convert_metrics(struct mx6_config *mxc,
2464 const struct fib6_config *cfg)
2465{
6670e152 2466 struct net *net = cfg->fc_nlinfo.nl_net;
c3a8d947 2467 bool ecn_ca = false;
e715b6d3
FW
2468 struct nlattr *nla;
2469 int remaining;
2470 u32 *mp;
2471
63159f29 2472 if (!cfg->fc_mx)
e715b6d3
FW
2473 return 0;
2474
2475 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2476 if (unlikely(!mp))
2477 return -ENOMEM;
2478
2479 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2480 int type = nla_type(nla);
1bb14807 2481 u32 val;
e715b6d3 2482
1bb14807
DB
2483 if (!type)
2484 continue;
2485 if (unlikely(type > RTAX_MAX))
2486 goto err;
ea697639 2487
1bb14807
DB
2488 if (type == RTAX_CC_ALGO) {
2489 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2490
1bb14807 2491 nla_strlcpy(tmp, nla, sizeof(tmp));
6670e152 2492 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1bb14807
DB
2493 if (val == TCP_CA_UNSPEC)
2494 goto err;
2495 } else {
2496 val = nla_get_u32(nla);
e715b6d3 2497 }
626abd59
PA
2498 if (type == RTAX_HOPLIMIT && val > 255)
2499 val = 255;
b8d3e416
DB
2500 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2501 goto err;
1bb14807
DB
2502
2503 mp[type - 1] = val;
2504 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2505 }
2506
c3a8d947
DB
2507 if (ecn_ca) {
2508 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2509 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2510 }
e715b6d3 2511
c3a8d947 2512 mxc->mx = mp;
e715b6d3
FW
2513 return 0;
2514 err:
2515 kfree(mp);
2516 return -EINVAL;
2517}
1da177e4 2518
8c14586f
DA
2519static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2520 struct fib6_config *cfg,
f4797b33
DA
2521 const struct in6_addr *gw_addr,
2522 u32 tbid, int flags)
8c14586f
DA
2523{
2524 struct flowi6 fl6 = {
2525 .flowi6_oif = cfg->fc_ifindex,
2526 .daddr = *gw_addr,
2527 .saddr = cfg->fc_prefsrc,
2528 };
2529 struct fib6_table *table;
2530 struct rt6_info *rt;
8c14586f 2531
f4797b33 2532 table = fib6_get_table(net, tbid);
8c14586f
DA
2533 if (!table)
2534 return NULL;
2535
2536 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2537 flags |= RT6_LOOKUP_F_HAS_SADDR;
2538
f4797b33 2539 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2540 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2541
2542 /* if table lookup failed, fall back to full lookup */
2543 if (rt == net->ipv6.ip6_null_entry) {
2544 ip6_rt_put(rt);
2545 rt = NULL;
2546 }
2547
2548 return rt;
2549}
2550
fc1e64e1
DA
2551static int ip6_route_check_nh_onlink(struct net *net,
2552 struct fib6_config *cfg,
9fbb704c 2553 const struct net_device *dev,
fc1e64e1
DA
2554 struct netlink_ext_ack *extack)
2555{
44750f84 2556 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2557 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2558 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2559 struct rt6_info *grt;
2560 int err;
2561
2562 err = 0;
2563 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2564 if (grt) {
58e354c0
DA
2565 if (!grt->dst.error &&
2566 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2567 NL_SET_ERR_MSG(extack,
2568 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2569 err = -EINVAL;
2570 }
2571
2572 ip6_rt_put(grt);
2573 }
2574
2575 return err;
2576}
2577
1edce99f
DA
2578static int ip6_route_check_nh(struct net *net,
2579 struct fib6_config *cfg,
2580 struct net_device **_dev,
2581 struct inet6_dev **idev)
2582{
2583 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2584 struct net_device *dev = _dev ? *_dev : NULL;
2585 struct rt6_info *grt = NULL;
2586 int err = -EHOSTUNREACH;
2587
2588 if (cfg->fc_table) {
f4797b33
DA
2589 int flags = RT6_LOOKUP_F_IFACE;
2590
2591 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2592 cfg->fc_table, flags);
1edce99f
DA
2593 if (grt) {
2594 if (grt->rt6i_flags & RTF_GATEWAY ||
2595 (dev && dev != grt->dst.dev)) {
2596 ip6_rt_put(grt);
2597 grt = NULL;
2598 }
2599 }
2600 }
2601
2602 if (!grt)
b75cc8f9 2603 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2604
2605 if (!grt)
2606 goto out;
2607
2608 if (dev) {
2609 if (dev != grt->dst.dev) {
2610 ip6_rt_put(grt);
2611 goto out;
2612 }
2613 } else {
2614 *_dev = dev = grt->dst.dev;
2615 *idev = grt->rt6i_idev;
2616 dev_hold(dev);
2617 in6_dev_hold(grt->rt6i_idev);
2618 }
2619
2620 if (!(grt->rt6i_flags & RTF_GATEWAY))
2621 err = 0;
2622
2623 ip6_rt_put(grt);
2624
2625out:
2626 return err;
2627}
2628
9fbb704c
DA
2629static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2630 struct net_device **_dev, struct inet6_dev **idev,
2631 struct netlink_ext_ack *extack)
2632{
2633 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2634 int gwa_type = ipv6_addr_type(gw_addr);
2635 const struct net_device *dev = *_dev;
2636 int err = -EINVAL;
2637
2638 /* if gw_addr is local we will fail to detect this in case
2639 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2640 * will return already-added prefix route via interface that
2641 * prefix route was assigned to, which might be non-loopback.
2642 */
2643 if (ipv6_chk_addr_and_flags(net, gw_addr,
2644 gwa_type & IPV6_ADDR_LINKLOCAL ?
2645 dev : NULL, 0, 0)) {
2646 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2647 goto out;
2648 }
2649
2650 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2651 /* IPv6 strictly inhibits using not link-local
2652 * addresses as nexthop address.
2653 * Otherwise, router will not able to send redirects.
2654 * It is very good, but in some (rare!) circumstances
2655 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2656 * some exceptions. --ANK
2657 * We allow IPv4-mapped nexthops to support RFC4798-type
2658 * addressing
2659 */
2660 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2661 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2662 goto out;
2663 }
2664
2665 if (cfg->fc_flags & RTNH_F_ONLINK)
2666 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2667 else
2668 err = ip6_route_check_nh(net, cfg, _dev, idev);
2669
2670 if (err)
2671 goto out;
2672 }
2673
2674 /* reload in case device was changed */
2675 dev = *_dev;
2676
2677 err = -EINVAL;
2678 if (!dev) {
2679 NL_SET_ERR_MSG(extack, "Egress device not specified");
2680 goto out;
2681 } else if (dev->flags & IFF_LOOPBACK) {
2682 NL_SET_ERR_MSG(extack,
2683 "Egress device can not be loopback device for this route");
2684 goto out;
2685 }
2686 err = 0;
2687out:
2688 return err;
2689}
2690
333c4301
DA
2691static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2692 struct netlink_ext_ack *extack)
1da177e4 2693{
5578689a 2694 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2695 struct rt6_info *rt = NULL;
2696 struct net_device *dev = NULL;
2697 struct inet6_dev *idev = NULL;
c71099ac 2698 struct fib6_table *table;
1da177e4 2699 int addr_type;
8c5b83f0 2700 int err = -EINVAL;
1da177e4 2701
557c44be 2702 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2703 if (cfg->fc_flags & RTF_PCPU) {
2704 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2705 goto out;
d5d531cb 2706 }
557c44be 2707
2ea2352e
WW
2708 /* RTF_CACHE is an internal flag; can not be set by userspace */
2709 if (cfg->fc_flags & RTF_CACHE) {
2710 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2711 goto out;
2712 }
2713
d5d531cb
DA
2714 if (cfg->fc_dst_len > 128) {
2715 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2716 goto out;
2717 }
2718 if (cfg->fc_src_len > 128) {
2719 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2720 goto out;
d5d531cb 2721 }
1da177e4 2722#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2723 if (cfg->fc_src_len) {
2724 NL_SET_ERR_MSG(extack,
2725 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2726 goto out;
d5d531cb 2727 }
1da177e4 2728#endif
86872cb5 2729 if (cfg->fc_ifindex) {
1da177e4 2730 err = -ENODEV;
5578689a 2731 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2732 if (!dev)
2733 goto out;
2734 idev = in6_dev_get(dev);
2735 if (!idev)
2736 goto out;
2737 }
2738
86872cb5
TG
2739 if (cfg->fc_metric == 0)
2740 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2741
fc1e64e1
DA
2742 if (cfg->fc_flags & RTNH_F_ONLINK) {
2743 if (!dev) {
2744 NL_SET_ERR_MSG(extack,
2745 "Nexthop device required for onlink");
2746 err = -ENODEV;
2747 goto out;
2748 }
2749
2750 if (!(dev->flags & IFF_UP)) {
2751 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2752 err = -ENETDOWN;
2753 goto out;
2754 }
2755 }
2756
d71314b4 2757 err = -ENOBUFS;
38308473
DM
2758 if (cfg->fc_nlinfo.nlh &&
2759 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2760 table = fib6_get_table(net, cfg->fc_table);
38308473 2761 if (!table) {
f3213831 2762 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2763 table = fib6_new_table(net, cfg->fc_table);
2764 }
2765 } else {
2766 table = fib6_new_table(net, cfg->fc_table);
2767 }
38308473
DM
2768
2769 if (!table)
c71099ac 2770 goto out;
c71099ac 2771
ad706862
MKL
2772 rt = ip6_dst_alloc(net, NULL,
2773 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2774
38308473 2775 if (!rt) {
1da177e4
LT
2776 err = -ENOMEM;
2777 goto out;
2778 }
2779
1716a961
G
2780 if (cfg->fc_flags & RTF_EXPIRES)
2781 rt6_set_expires(rt, jiffies +
2782 clock_t_to_jiffies(cfg->fc_expires));
2783 else
2784 rt6_clean_expires(rt);
1da177e4 2785
86872cb5
TG
2786 if (cfg->fc_protocol == RTPROT_UNSPEC)
2787 cfg->fc_protocol = RTPROT_BOOT;
2788 rt->rt6i_protocol = cfg->fc_protocol;
2789
2790 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2791
2792 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2793 rt->dst.input = ip6_mc_input;
ab79ad14
2794 else if (cfg->fc_flags & RTF_LOCAL)
2795 rt->dst.input = ip6_input;
1da177e4 2796 else
d8d1f30b 2797 rt->dst.input = ip6_forward;
1da177e4 2798
d8d1f30b 2799 rt->dst.output = ip6_output;
1da177e4 2800
19e42e45
RP
2801 if (cfg->fc_encap) {
2802 struct lwtunnel_state *lwtstate;
2803
30357d7d 2804 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2805 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2806 &lwtstate, extack);
19e42e45
RP
2807 if (err)
2808 goto out;
61adedf3 2809 rt->dst.lwtstate = lwtstate_get(lwtstate);
9942895b 2810 lwtunnel_set_redirect(&rt->dst);
19e42e45
RP
2811 }
2812
86872cb5
TG
2813 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2814 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2815 if (rt->rt6i_dst.plen == 128)
e5fd387a 2816 rt->dst.flags |= DST_HOST;
e5fd387a 2817
1da177e4 2818#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2819 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2820 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2821#endif
2822
86872cb5 2823 rt->rt6i_metric = cfg->fc_metric;
398958ae 2824 rt->rt6i_nh_weight = 1;
1da177e4
LT
2825
2826 /* We cannot add true routes via loopback here,
2827 they would result in kernel looping; promote them to reject routes
2828 */
86872cb5 2829 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2830 (dev && (dev->flags & IFF_LOOPBACK) &&
2831 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2832 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2833 /* hold loopback dev/idev if we haven't done so. */
5578689a 2834 if (dev != net->loopback_dev) {
1da177e4
LT
2835 if (dev) {
2836 dev_put(dev);
2837 in6_dev_put(idev);
2838 }
5578689a 2839 dev = net->loopback_dev;
1da177e4
LT
2840 dev_hold(dev);
2841 idev = in6_dev_get(dev);
2842 if (!idev) {
2843 err = -ENODEV;
2844 goto out;
2845 }
2846 }
1da177e4 2847 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2848 switch (cfg->fc_type) {
2849 case RTN_BLACKHOLE:
2850 rt->dst.error = -EINVAL;
ede2059d 2851 rt->dst.output = dst_discard_out;
7150aede 2852 rt->dst.input = dst_discard;
ef2c7d7b
ND
2853 break;
2854 case RTN_PROHIBIT:
2855 rt->dst.error = -EACCES;
7150aede
K
2856 rt->dst.output = ip6_pkt_prohibit_out;
2857 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2858 break;
b4949ab2 2859 case RTN_THROW:
0315e382 2860 case RTN_UNREACHABLE:
ef2c7d7b 2861 default:
7150aede 2862 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2863 : (cfg->fc_type == RTN_UNREACHABLE)
2864 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2865 rt->dst.output = ip6_pkt_discard_out;
2866 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2867 break;
2868 }
1da177e4
LT
2869 goto install_route;
2870 }
2871
86872cb5 2872 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
2873 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2874 if (err)
48ed7b26 2875 goto out;
1da177e4 2876
9fbb704c 2877 rt->rt6i_gateway = cfg->fc_gateway;
1da177e4
LT
2878 }
2879
2880 err = -ENODEV;
38308473 2881 if (!dev)
1da177e4
LT
2882 goto out;
2883
955ec4cb
DA
2884 if (!(dev->flags & IFF_UP)) {
2885 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2886 err = -ENETDOWN;
2887 goto out;
2888 }
2889
c3968a85
DW
2890 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2891 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2892 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2893 err = -EINVAL;
2894 goto out;
2895 }
4e3fd7a0 2896 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2897 rt->rt6i_prefsrc.plen = 128;
2898 } else
2899 rt->rt6i_prefsrc.plen = 0;
2900
86872cb5 2901 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2902
2903install_route:
5609b80a
IS
2904 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2905 !netif_carrier_ok(dev))
2906 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
fc1e64e1 2907 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
d8d1f30b 2908 rt->dst.dev = dev;
1da177e4 2909 rt->rt6i_idev = idev;
c71099ac 2910 rt->rt6i_table = table;
63152fc0 2911
c346dca1 2912 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2913
8c5b83f0 2914 return rt;
6b9ea5a6
RP
2915out:
2916 if (dev)
2917 dev_put(dev);
2918 if (idev)
2919 in6_dev_put(idev);
587fea74
WW
2920 if (rt)
2921 dst_release_immediate(&rt->dst);
6b9ea5a6 2922
8c5b83f0 2923 return ERR_PTR(err);
6b9ea5a6
RP
2924}
2925
333c4301
DA
2926int ip6_route_add(struct fib6_config *cfg,
2927 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2928{
2929 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2930 struct rt6_info *rt;
6b9ea5a6
RP
2931 int err;
2932
333c4301 2933 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2934 if (IS_ERR(rt)) {
2935 err = PTR_ERR(rt);
2936 rt = NULL;
6b9ea5a6 2937 goto out;
8c5b83f0 2938 }
6b9ea5a6 2939
e715b6d3
FW
2940 err = ip6_convert_metrics(&mxc, cfg);
2941 if (err)
2942 goto out;
1da177e4 2943
333c4301 2944 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2945
2946 kfree(mxc.mx);
6b9ea5a6 2947
e715b6d3 2948 return err;
1da177e4 2949out:
587fea74
WW
2950 if (rt)
2951 dst_release_immediate(&rt->dst);
6b9ea5a6 2952
1da177e4
LT
2953 return err;
2954}
2955
86872cb5 2956static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2957{
2958 int err;
c71099ac 2959 struct fib6_table *table;
d1918542 2960 struct net *net = dev_net(rt->dst.dev);
1da177e4 2961
a4c2fd7f 2962 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2963 err = -ENOENT;
2964 goto out;
2965 }
6c813a72 2966
c71099ac 2967 table = rt->rt6i_table;
66f5d6ce 2968 spin_lock_bh(&table->tb6_lock);
86872cb5 2969 err = fib6_del(rt, info);
66f5d6ce 2970 spin_unlock_bh(&table->tb6_lock);
1da177e4 2971
6825a26c 2972out:
94e187c0 2973 ip6_rt_put(rt);
1da177e4
LT
2974 return err;
2975}
2976
e0a1ad73
TG
2977int ip6_del_rt(struct rt6_info *rt)
2978{
4d1169c1 2979 struct nl_info info = {
d1918542 2980 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2981 };
528c4ceb 2982 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2983}
2984
0ae81335
DA
2985static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2986{
2987 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2988 struct net *net = info->nl_net;
16a16cd3 2989 struct sk_buff *skb = NULL;
0ae81335 2990 struct fib6_table *table;
e3330039 2991 int err = -ENOENT;
0ae81335 2992
e3330039
WC
2993 if (rt == net->ipv6.ip6_null_entry)
2994 goto out_put;
0ae81335 2995 table = rt->rt6i_table;
66f5d6ce 2996 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
2997
2998 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2999 struct rt6_info *sibling, *next_sibling;
3000
16a16cd3
DA
3001 /* prefer to send a single notification with all hops */
3002 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3003 if (skb) {
3004 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3005
e3330039 3006 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
3007 NULL, NULL, 0, RTM_DELROUTE,
3008 info->portid, seq, 0) < 0) {
3009 kfree_skb(skb);
3010 skb = NULL;
3011 } else
3012 info->skip_notify = 1;
3013 }
3014
0ae81335
DA
3015 list_for_each_entry_safe(sibling, next_sibling,
3016 &rt->rt6i_siblings,
3017 rt6i_siblings) {
3018 err = fib6_del(sibling, info);
3019 if (err)
e3330039 3020 goto out_unlock;
0ae81335
DA
3021 }
3022 }
3023
3024 err = fib6_del(rt, info);
e3330039 3025out_unlock:
66f5d6ce 3026 spin_unlock_bh(&table->tb6_lock);
e3330039 3027out_put:
0ae81335 3028 ip6_rt_put(rt);
16a16cd3
DA
3029
3030 if (skb) {
e3330039 3031 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3032 info->nlh, gfp_any());
3033 }
0ae81335
DA
3034 return err;
3035}
3036
333c4301
DA
3037static int ip6_route_del(struct fib6_config *cfg,
3038 struct netlink_ext_ack *extack)
1da177e4 3039{
2b760fcf 3040 struct rt6_info *rt, *rt_cache;
c71099ac 3041 struct fib6_table *table;
1da177e4 3042 struct fib6_node *fn;
1da177e4
LT
3043 int err = -ESRCH;
3044
5578689a 3045 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3046 if (!table) {
3047 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3048 return err;
d5d531cb 3049 }
c71099ac 3050
66f5d6ce 3051 rcu_read_lock();
1da177e4 3052
c71099ac 3053 fn = fib6_locate(&table->tb6_root,
86872cb5 3054 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3055 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3056 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3057
1da177e4 3058 if (fn) {
66f5d6ce 3059 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
3060 if (cfg->fc_flags & RTF_CACHE) {
3061 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3062 &cfg->fc_src);
3063 if (!rt_cache)
3064 continue;
3065 rt = rt_cache;
3066 }
86872cb5 3067 if (cfg->fc_ifindex &&
d1918542
DM
3068 (!rt->dst.dev ||
3069 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 3070 continue;
86872cb5
TG
3071 if (cfg->fc_flags & RTF_GATEWAY &&
3072 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 3073 continue;
86872cb5 3074 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3075 continue;
c2ed1880
M
3076 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3077 continue;
d3843fe5
WW
3078 if (!dst_hold_safe(&rt->dst))
3079 break;
66f5d6ce 3080 rcu_read_unlock();
1da177e4 3081
0ae81335
DA
3082 /* if gateway was specified only delete the one hop */
3083 if (cfg->fc_flags & RTF_GATEWAY)
3084 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3085
3086 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3087 }
3088 }
66f5d6ce 3089 rcu_read_unlock();
1da177e4
LT
3090
3091 return err;
3092}
3093
6700c270 3094static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3095{
a6279458 3096 struct netevent_redirect netevent;
e8599ff4 3097 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3098 struct ndisc_options ndopts;
3099 struct inet6_dev *in6_dev;
3100 struct neighbour *neigh;
71bcdba0 3101 struct rd_msg *msg;
6e157b6a
DM
3102 int optlen, on_link;
3103 u8 *lladdr;
e8599ff4 3104
29a3cad5 3105 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3106 optlen -= sizeof(*msg);
e8599ff4
DM
3107
3108 if (optlen < 0) {
6e157b6a 3109 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3110 return;
3111 }
3112
71bcdba0 3113 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3114
71bcdba0 3115 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3116 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3117 return;
3118 }
3119
6e157b6a 3120 on_link = 0;
71bcdba0 3121 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3122 on_link = 1;
71bcdba0 3123 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3124 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3125 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3126 return;
3127 }
3128
3129 in6_dev = __in6_dev_get(skb->dev);
3130 if (!in6_dev)
3131 return;
3132 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3133 return;
3134
3135 /* RFC2461 8.1:
3136 * The IP source address of the Redirect MUST be the same as the current
3137 * first-hop router for the specified ICMP Destination Address.
3138 */
3139
f997c55c 3140 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3141 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3142 return;
3143 }
6e157b6a
DM
3144
3145 lladdr = NULL;
e8599ff4
DM
3146 if (ndopts.nd_opts_tgt_lladdr) {
3147 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3148 skb->dev);
3149 if (!lladdr) {
3150 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3151 return;
3152 }
3153 }
3154
6e157b6a 3155 rt = (struct rt6_info *) dst;
ec13ad1d 3156 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3157 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3158 return;
6e157b6a 3159 }
e8599ff4 3160
6e157b6a
DM
3161 /* Redirect received -> path was valid.
3162 * Look, redirects are sent only in response to data packets,
3163 * so that this nexthop apparently is reachable. --ANK
3164 */
0dec879f 3165 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3166
71bcdba0 3167 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3168 if (!neigh)
3169 return;
a6279458 3170
1da177e4
LT
3171 /*
3172 * We have finally decided to accept it.
3173 */
3174
f997c55c 3175 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3176 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3177 NEIGH_UPDATE_F_OVERRIDE|
3178 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3179 NEIGH_UPDATE_F_ISROUTER)),
3180 NDISC_REDIRECT, &ndopts);
1da177e4 3181
83a09abd 3182 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3183 if (!nrt)
1da177e4
LT
3184 goto out;
3185
3186 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3187 if (on_link)
3188 nrt->rt6i_flags &= ~RTF_GATEWAY;
3189
b91d5329 3190 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3191 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3192
2b760fcf
WW
3193 /* No need to remove rt from the exception table if rt is
3194 * a cached route because rt6_insert_exception() will
3195 * takes care of it
3196 */
3197 if (rt6_insert_exception(nrt, rt)) {
3198 dst_release_immediate(&nrt->dst);
3199 goto out;
3200 }
1da177e4 3201
d8d1f30b
CG
3202 netevent.old = &rt->dst;
3203 netevent.new = &nrt->dst;
71bcdba0 3204 netevent.daddr = &msg->dest;
60592833 3205 netevent.neigh = neigh;
8d71740c
TT
3206 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3207
1da177e4 3208out:
e8599ff4 3209 neigh_release(neigh);
6e157b6a
DM
3210}
3211
1da177e4
LT
3212/*
3213 * Misc support functions
3214 */
3215
4b32b5ad
MKL
3216static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3217{
3a2232e9 3218 BUG_ON(from->from);
4b32b5ad
MKL
3219
3220 rt->rt6i_flags &= ~RTF_EXPIRES;
3221 dst_hold(&from->dst);
3a2232e9 3222 rt->from = from;
4b32b5ad
MKL
3223 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3224}
3225
83a09abd
MKL
3226static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3227{
3228 rt->dst.input = ort->dst.input;
3229 rt->dst.output = ort->dst.output;
3230 rt->rt6i_dst = ort->rt6i_dst;
3231 rt->dst.error = ort->dst.error;
3232 rt->rt6i_idev = ort->rt6i_idev;
3233 if (rt->rt6i_idev)
3234 in6_dev_hold(rt->rt6i_idev);
3235 rt->dst.lastuse = jiffies;
3236 rt->rt6i_gateway = ort->rt6i_gateway;
3237 rt->rt6i_flags = ort->rt6i_flags;
3238 rt6_set_from(rt, ort);
3239 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 3240#ifdef CONFIG_IPV6_SUBTREES
83a09abd 3241 rt->rt6i_src = ort->rt6i_src;
1da177e4 3242#endif
83a09abd
MKL
3243 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3244 rt->rt6i_table = ort->rt6i_table;
61adedf3 3245 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
3246}
3247
70ceb4f5 3248#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3249static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3250 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3251 const struct in6_addr *gwaddr,
3252 struct net_device *dev)
70ceb4f5 3253{
830218c1
DA
3254 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3255 int ifindex = dev->ifindex;
70ceb4f5
YH
3256 struct fib6_node *fn;
3257 struct rt6_info *rt = NULL;
c71099ac
TG
3258 struct fib6_table *table;
3259
830218c1 3260 table = fib6_get_table(net, tb_id);
38308473 3261 if (!table)
c71099ac 3262 return NULL;
70ceb4f5 3263
66f5d6ce 3264 rcu_read_lock();
38fbeeee 3265 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3266 if (!fn)
3267 goto out;
3268
66f5d6ce 3269 for_each_fib6_node_rt_rcu(fn) {
d1918542 3270 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3271 continue;
3272 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3273 continue;
3274 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3275 continue;
d3843fe5 3276 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3277 break;
3278 }
3279out:
66f5d6ce 3280 rcu_read_unlock();
70ceb4f5
YH
3281 return rt;
3282}
3283
efa2cea0 3284static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3285 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3286 const struct in6_addr *gwaddr,
3287 struct net_device *dev,
95c96174 3288 unsigned int pref)
70ceb4f5 3289{
86872cb5 3290 struct fib6_config cfg = {
238fc7ea 3291 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3292 .fc_ifindex = dev->ifindex,
86872cb5
TG
3293 .fc_dst_len = prefixlen,
3294 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3295 RTF_UP | RTF_PREF(pref),
b91d5329 3296 .fc_protocol = RTPROT_RA,
15e47304 3297 .fc_nlinfo.portid = 0,
efa2cea0
DL
3298 .fc_nlinfo.nlh = NULL,
3299 .fc_nlinfo.nl_net = net,
86872cb5
TG
3300 };
3301
830218c1 3302 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3303 cfg.fc_dst = *prefix;
3304 cfg.fc_gateway = *gwaddr;
70ceb4f5 3305
e317da96
YH
3306 /* We should treat it as a default route if prefix length is 0. */
3307 if (!prefixlen)
86872cb5 3308 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3309
333c4301 3310 ip6_route_add(&cfg, NULL);
70ceb4f5 3311
830218c1 3312 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3313}
3314#endif
3315
b71d1d42 3316struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3317{
830218c1 3318 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3319 struct rt6_info *rt;
c71099ac 3320 struct fib6_table *table;
1da177e4 3321
830218c1 3322 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3323 if (!table)
c71099ac 3324 return NULL;
1da177e4 3325
66f5d6ce
WW
3326 rcu_read_lock();
3327 for_each_fib6_node_rt_rcu(&table->tb6_root) {
d1918542 3328 if (dev == rt->dst.dev &&
045927ff 3329 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3330 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3331 break;
3332 }
3333 if (rt)
d3843fe5 3334 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3335 rcu_read_unlock();
1da177e4
LT
3336 return rt;
3337}
3338
b71d1d42 3339struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3340 struct net_device *dev,
3341 unsigned int pref)
1da177e4 3342{
86872cb5 3343 struct fib6_config cfg = {
ca254490 3344 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3345 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3346 .fc_ifindex = dev->ifindex,
3347 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3348 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3349 .fc_protocol = RTPROT_RA,
15e47304 3350 .fc_nlinfo.portid = 0,
5578689a 3351 .fc_nlinfo.nlh = NULL,
c346dca1 3352 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3353 };
1da177e4 3354
4e3fd7a0 3355 cfg.fc_gateway = *gwaddr;
1da177e4 3356
333c4301 3357 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3358 struct fib6_table *table;
3359
3360 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3361 if (table)
3362 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3363 }
1da177e4 3364
1da177e4
LT
3365 return rt6_get_dflt_router(gwaddr, dev);
3366}
3367
830218c1 3368static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3369{
3370 struct rt6_info *rt;
3371
3372restart:
66f5d6ce
WW
3373 rcu_read_lock();
3374 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3375 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3376 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3377 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3378 rcu_read_unlock();
d3843fe5
WW
3379 ip6_del_rt(rt);
3380 } else {
66f5d6ce 3381 rcu_read_unlock();
d3843fe5 3382 }
1da177e4
LT
3383 goto restart;
3384 }
3385 }
66f5d6ce 3386 rcu_read_unlock();
830218c1
DA
3387
3388 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3389}
3390
3391void rt6_purge_dflt_routers(struct net *net)
3392{
3393 struct fib6_table *table;
3394 struct hlist_head *head;
3395 unsigned int h;
3396
3397 rcu_read_lock();
3398
3399 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3400 head = &net->ipv6.fib_table_hash[h];
3401 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3402 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3403 __rt6_purge_dflt_routers(table);
3404 }
3405 }
3406
3407 rcu_read_unlock();
1da177e4
LT
3408}
3409
5578689a
DL
3410static void rtmsg_to_fib6_config(struct net *net,
3411 struct in6_rtmsg *rtmsg,
86872cb5
TG
3412 struct fib6_config *cfg)
3413{
3414 memset(cfg, 0, sizeof(*cfg));
3415
ca254490
DA
3416 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3417 : RT6_TABLE_MAIN;
86872cb5
TG
3418 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3419 cfg->fc_metric = rtmsg->rtmsg_metric;
3420 cfg->fc_expires = rtmsg->rtmsg_info;
3421 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3422 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3423 cfg->fc_flags = rtmsg->rtmsg_flags;
3424
5578689a 3425 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3426
4e3fd7a0
AD
3427 cfg->fc_dst = rtmsg->rtmsg_dst;
3428 cfg->fc_src = rtmsg->rtmsg_src;
3429 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3430}
3431
5578689a 3432int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3433{
86872cb5 3434 struct fib6_config cfg;
1da177e4
LT
3435 struct in6_rtmsg rtmsg;
3436 int err;
3437
67ba4152 3438 switch (cmd) {
1da177e4
LT
3439 case SIOCADDRT: /* Add a route */
3440 case SIOCDELRT: /* Delete a route */
af31f412 3441 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3442 return -EPERM;
3443 err = copy_from_user(&rtmsg, arg,
3444 sizeof(struct in6_rtmsg));
3445 if (err)
3446 return -EFAULT;
86872cb5 3447
5578689a 3448 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3449
1da177e4
LT
3450 rtnl_lock();
3451 switch (cmd) {
3452 case SIOCADDRT:
333c4301 3453 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3454 break;
3455 case SIOCDELRT:
333c4301 3456 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3457 break;
3458 default:
3459 err = -EINVAL;
3460 }
3461 rtnl_unlock();
3462
3463 return err;
3ff50b79 3464 }
1da177e4
LT
3465
3466 return -EINVAL;
3467}
3468
3469/*
3470 * Drop the packet on the floor
3471 */
3472
d5fdd6ba 3473static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3474{
612f09e8 3475 int type;
adf30907 3476 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3477 switch (ipstats_mib_noroutes) {
3478 case IPSTATS_MIB_INNOROUTES:
0660e03f 3479 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3480 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3481 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3482 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3483 break;
3484 }
3485 /* FALLTHROUGH */
3486 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3487 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3488 ipstats_mib_noroutes);
612f09e8
YH
3489 break;
3490 }
3ffe533c 3491 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3492 kfree_skb(skb);
3493 return 0;
3494}
3495
9ce8ade0
TG
3496static int ip6_pkt_discard(struct sk_buff *skb)
3497{
612f09e8 3498 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3499}
3500
ede2059d 3501static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3502{
adf30907 3503 skb->dev = skb_dst(skb)->dev;
612f09e8 3504 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3505}
3506
9ce8ade0
TG
3507static int ip6_pkt_prohibit(struct sk_buff *skb)
3508{
612f09e8 3509 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3510}
3511
ede2059d 3512static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3513{
adf30907 3514 skb->dev = skb_dst(skb)->dev;
612f09e8 3515 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3516}
3517
1da177e4
LT
3518/*
3519 * Allocate a dst for local (unicast / anycast) address.
3520 */
3521
3522struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3523 const struct in6_addr *addr,
8f031519 3524 bool anycast)
1da177e4 3525{
ca254490 3526 u32 tb_id;
c346dca1 3527 struct net *net = dev_net(idev->dev);
4832c30d 3528 struct net_device *dev = idev->dev;
5f02ce24
DA
3529 struct rt6_info *rt;
3530
5f02ce24 3531 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3532 if (!rt)
1da177e4
LT
3533 return ERR_PTR(-ENOMEM);
3534
1da177e4
LT
3535 in6_dev_hold(idev);
3536
11d53b49 3537 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3538 rt->dst.input = ip6_input;
3539 rt->dst.output = ip6_output;
1da177e4 3540 rt->rt6i_idev = idev;
1da177e4 3541
94b5e0f9 3542 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3543 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3544 if (anycast)
3545 rt->rt6i_flags |= RTF_ANYCAST;
3546 else
1da177e4 3547 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3548
550bab42 3549 rt->rt6i_gateway = *addr;
4e3fd7a0 3550 rt->rt6i_dst.addr = *addr;
1da177e4 3551 rt->rt6i_dst.plen = 128;
ca254490
DA
3552 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3553 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3554
1da177e4
LT
3555 return rt;
3556}
3557
c3968a85
DW
3558/* remove deleted ip from prefsrc entries */
3559struct arg_dev_net_ip {
3560 struct net_device *dev;
3561 struct net *net;
3562 struct in6_addr *addr;
3563};
3564
3565static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3566{
3567 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3568 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3569 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3570
d1918542 3571 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3572 rt != net->ipv6.ip6_null_entry &&
3573 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3574 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3575 /* remove prefsrc entry */
3576 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3577 /* need to update cache as well */
3578 rt6_exceptions_remove_prefsrc(rt);
3579 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3580 }
3581 return 0;
3582}
3583
3584void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3585{
3586 struct net *net = dev_net(ifp->idev->dev);
3587 struct arg_dev_net_ip adni = {
3588 .dev = ifp->idev->dev,
3589 .net = net,
3590 .addr = &ifp->addr,
3591 };
0c3584d5 3592 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3593}
3594
be7a010d 3595#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3596
3597/* Remove routers and update dst entries when gateway turn into host. */
3598static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3599{
3600 struct in6_addr *gateway = (struct in6_addr *)arg;
3601
2b760fcf
WW
3602 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3603 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
be7a010d
DJ
3604 return -1;
3605 }
b16cb459
WW
3606
3607 /* Further clean up cached routes in exception table.
3608 * This is needed because cached route may have a different
3609 * gateway than its 'parent' in the case of an ip redirect.
3610 */
3611 rt6_exceptions_clean_tohost(rt, gateway);
3612
be7a010d
DJ
3613 return 0;
3614}
3615
3616void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3617{
3618 fib6_clean_all(net, fib6_clean_tohost, gateway);
3619}
3620
2127d95a
IS
3621struct arg_netdev_event {
3622 const struct net_device *dev;
4c981e28
IS
3623 union {
3624 unsigned int nh_flags;
3625 unsigned long event;
3626 };
2127d95a
IS
3627};
3628
d7dedee1
IS
3629static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3630{
3631 struct rt6_info *iter;
3632 struct fib6_node *fn;
3633
3634 fn = rcu_dereference_protected(rt->rt6i_node,
3635 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3636 iter = rcu_dereference_protected(fn->leaf,
3637 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3638 while (iter) {
3639 if (iter->rt6i_metric == rt->rt6i_metric &&
3640 rt6_qualify_for_ecmp(iter))
3641 return iter;
3642 iter = rcu_dereference_protected(iter->rt6_next,
3643 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3644 }
3645
3646 return NULL;
3647}
3648
3649static bool rt6_is_dead(const struct rt6_info *rt)
3650{
3651 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3652 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3653 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3654 return true;
3655
3656 return false;
3657}
3658
3659static int rt6_multipath_total_weight(const struct rt6_info *rt)
3660{
3661 struct rt6_info *iter;
3662 int total = 0;
3663
3664 if (!rt6_is_dead(rt))
398958ae 3665 total += rt->rt6i_nh_weight;
d7dedee1
IS
3666
3667 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3668 if (!rt6_is_dead(iter))
398958ae 3669 total += iter->rt6i_nh_weight;
d7dedee1
IS
3670 }
3671
3672 return total;
3673}
3674
3675static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3676{
3677 int upper_bound = -1;
3678
3679 if (!rt6_is_dead(rt)) {
398958ae 3680 *weight += rt->rt6i_nh_weight;
d7dedee1
IS
3681 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3682 total) - 1;
3683 }
3684 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3685}
3686
3687static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3688{
3689 struct rt6_info *iter;
3690 int weight = 0;
3691
3692 rt6_upper_bound_set(rt, &weight, total);
3693
3694 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3695 rt6_upper_bound_set(iter, &weight, total);
3696}
3697
3698void rt6_multipath_rebalance(struct rt6_info *rt)
3699{
3700 struct rt6_info *first;
3701 int total;
3702
3703 /* In case the entire multipath route was marked for flushing,
3704 * then there is no need to rebalance upon the removal of every
3705 * sibling route.
3706 */
3707 if (!rt->rt6i_nsiblings || rt->should_flush)
3708 return;
3709
3710 /* During lookup routes are evaluated in order, so we need to
3711 * make sure upper bounds are assigned from the first sibling
3712 * onwards.
3713 */
3714 first = rt6_multipath_first_sibling(rt);
3715 if (WARN_ON_ONCE(!first))
3716 return;
3717
3718 total = rt6_multipath_total_weight(first);
3719 rt6_multipath_upper_bound_set(first, total);
3720}
3721
2127d95a
IS
3722static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3723{
3724 const struct arg_netdev_event *arg = p_arg;
3725 const struct net *net = dev_net(arg->dev);
3726
1de178ed 3727 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
2127d95a 3728 rt->rt6i_nh_flags &= ~arg->nh_flags;
1de178ed 3729 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
d7dedee1 3730 rt6_multipath_rebalance(rt);
1de178ed 3731 }
2127d95a
IS
3732
3733 return 0;
3734}
3735
3736void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3737{
3738 struct arg_netdev_event arg = {
3739 .dev = dev,
6802f3ad
IS
3740 {
3741 .nh_flags = nh_flags,
3742 },
2127d95a
IS
3743 };
3744
3745 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3746 arg.nh_flags |= RTNH_F_LINKDOWN;
3747
3748 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3749}
3750
1de178ed
IS
3751static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3752 const struct net_device *dev)
3753{
3754 struct rt6_info *iter;
3755
3756 if (rt->dst.dev == dev)
3757 return true;
3758 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3759 if (iter->dst.dev == dev)
3760 return true;
3761
3762 return false;
3763}
3764
3765static void rt6_multipath_flush(struct rt6_info *rt)
3766{
3767 struct rt6_info *iter;
3768
3769 rt->should_flush = 1;
3770 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3771 iter->should_flush = 1;
3772}
3773
3774static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3775 const struct net_device *down_dev)
3776{
3777 struct rt6_info *iter;
3778 unsigned int dead = 0;
3779
3780 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3781 dead++;
3782 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3783 if (iter->dst.dev == down_dev ||
3784 iter->rt6i_nh_flags & RTNH_F_DEAD)
3785 dead++;
3786
3787 return dead;
3788}
3789
3790static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3791 const struct net_device *dev,
3792 unsigned int nh_flags)
3793{
3794 struct rt6_info *iter;
3795
3796 if (rt->dst.dev == dev)
3797 rt->rt6i_nh_flags |= nh_flags;
3798 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3799 if (iter->dst.dev == dev)
3800 iter->rt6i_nh_flags |= nh_flags;
3801}
3802
a1a22c12 3803/* called with write lock held for table with rt */
4c981e28 3804static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3805{
4c981e28
IS
3806 const struct arg_netdev_event *arg = p_arg;
3807 const struct net_device *dev = arg->dev;
3808 const struct net *net = dev_net(dev);
8ed67789 3809
1de178ed 3810 if (rt == net->ipv6.ip6_null_entry)
27c6fa73
IS
3811 return 0;
3812
3813 switch (arg->event) {
3814 case NETDEV_UNREGISTER:
1de178ed 3815 return rt->dst.dev == dev ? -1 : 0;
27c6fa73 3816 case NETDEV_DOWN:
1de178ed 3817 if (rt->should_flush)
27c6fa73 3818 return -1;
1de178ed
IS
3819 if (!rt->rt6i_nsiblings)
3820 return rt->dst.dev == dev ? -1 : 0;
3821 if (rt6_multipath_uses_dev(rt, dev)) {
3822 unsigned int count;
3823
3824 count = rt6_multipath_dead_count(rt, dev);
3825 if (rt->rt6i_nsiblings + 1 == count) {
3826 rt6_multipath_flush(rt);
3827 return -1;
3828 }
3829 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3830 RTNH_F_LINKDOWN);
3831 fib6_update_sernum(rt);
d7dedee1 3832 rt6_multipath_rebalance(rt);
1de178ed
IS
3833 }
3834 return -2;
27c6fa73 3835 case NETDEV_CHANGE:
1de178ed
IS
3836 if (rt->dst.dev != dev ||
3837 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73
IS
3838 break;
3839 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3840 rt6_multipath_rebalance(rt);
27c6fa73 3841 break;
2b241361 3842 }
c159d30c 3843
1da177e4
LT
3844 return 0;
3845}
3846
27c6fa73 3847void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3848{
4c981e28 3849 struct arg_netdev_event arg = {
8ed67789 3850 .dev = dev,
6802f3ad
IS
3851 {
3852 .event = event,
3853 },
8ed67789
DL
3854 };
3855
4c981e28
IS
3856 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3857}
3858
3859void rt6_disable_ip(struct net_device *dev, unsigned long event)
3860{
3861 rt6_sync_down_dev(dev, event);
3862 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3863 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3864}
3865
95c96174 3866struct rt6_mtu_change_arg {
1da177e4 3867 struct net_device *dev;
95c96174 3868 unsigned int mtu;
1da177e4
LT
3869};
3870
3871static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3872{
3873 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3874 struct inet6_dev *idev;
3875
3876 /* In IPv6 pmtu discovery is not optional,
3877 so that RTAX_MTU lock cannot disable it.
3878 We still use this lock to block changes
3879 caused by addrconf/ndisc.
3880 */
3881
3882 idev = __in6_dev_get(arg->dev);
38308473 3883 if (!idev)
1da177e4
LT
3884 return 0;
3885
3886 /* For administrative MTU increase, there is no way to discover
3887 IPv6 PMTU increase, so PMTU increase should be updated here.
3888 Since RFC 1981 doesn't include administrative MTU increase
3889 update PMTU increase is a MUST. (i.e. jumbo frame)
3890 */
3891 /*
3892 If new MTU is less than route PMTU, this new MTU will be the
3893 lowest MTU in the path, update the route PMTU to reflect PMTU
3894 decreases; if new MTU is greater than route PMTU, and the
3895 old MTU is the lowest MTU in the path, update the route PMTU
3896 to reflect the increase. In this case if the other nodes' MTU
3897 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 3898 PMTU discovery.
1da177e4 3899 */
d1918542 3900 if (rt->dst.dev == arg->dev &&
fb56be83 3901 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad 3902 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 3903 spin_lock_bh(&rt6_exception_lock);
2b760fcf
WW
3904 if (dst_mtu(&rt->dst) >= arg->mtu ||
3905 (dst_mtu(&rt->dst) < arg->mtu &&
3906 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
4b32b5ad
MKL
3907 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3908 }
f5bbe7ee
WW
3909 rt6_exceptions_update_pmtu(rt, arg->mtu);
3910 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3911 }
1da177e4
LT
3912 return 0;
3913}
3914
95c96174 3915void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3916{
c71099ac
TG
3917 struct rt6_mtu_change_arg arg = {
3918 .dev = dev,
3919 .mtu = mtu,
3920 };
1da177e4 3921
0c3584d5 3922 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3923}
3924
ef7c79ed 3925static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3926 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3927 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3928 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3929 [RTA_PRIORITY] = { .type = NLA_U32 },
3930 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3931 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3932 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3933 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3934 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3935 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3936 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3937 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3938};
3939
3940static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3941 struct fib6_config *cfg,
3942 struct netlink_ext_ack *extack)
1da177e4 3943{
86872cb5
TG
3944 struct rtmsg *rtm;
3945 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3946 unsigned int pref;
86872cb5 3947 int err;
1da177e4 3948
fceb6435
JB
3949 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3950 NULL);
86872cb5
TG
3951 if (err < 0)
3952 goto errout;
1da177e4 3953
86872cb5
TG
3954 err = -EINVAL;
3955 rtm = nlmsg_data(nlh);
3956 memset(cfg, 0, sizeof(*cfg));
3957
3958 cfg->fc_table = rtm->rtm_table;
3959 cfg->fc_dst_len = rtm->rtm_dst_len;
3960 cfg->fc_src_len = rtm->rtm_src_len;
3961 cfg->fc_flags = RTF_UP;
3962 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3963 cfg->fc_type = rtm->rtm_type;
86872cb5 3964
ef2c7d7b
ND
3965 if (rtm->rtm_type == RTN_UNREACHABLE ||
3966 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3967 rtm->rtm_type == RTN_PROHIBIT ||
3968 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3969 cfg->fc_flags |= RTF_REJECT;
3970
ab79ad14
3971 if (rtm->rtm_type == RTN_LOCAL)
3972 cfg->fc_flags |= RTF_LOCAL;
3973
1f56a01f
MKL
3974 if (rtm->rtm_flags & RTM_F_CLONED)
3975 cfg->fc_flags |= RTF_CACHE;
3976
fc1e64e1
DA
3977 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
3978
15e47304 3979 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3980 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3981 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3982
3983 if (tb[RTA_GATEWAY]) {
67b61f6c 3984 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3985 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3986 }
86872cb5
TG
3987
3988 if (tb[RTA_DST]) {
3989 int plen = (rtm->rtm_dst_len + 7) >> 3;
3990
3991 if (nla_len(tb[RTA_DST]) < plen)
3992 goto errout;
3993
3994 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3995 }
86872cb5
TG
3996
3997 if (tb[RTA_SRC]) {
3998 int plen = (rtm->rtm_src_len + 7) >> 3;
3999
4000 if (nla_len(tb[RTA_SRC]) < plen)
4001 goto errout;
4002
4003 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4004 }
86872cb5 4005
c3968a85 4006 if (tb[RTA_PREFSRC])
67b61f6c 4007 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4008
86872cb5
TG
4009 if (tb[RTA_OIF])
4010 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4011
4012 if (tb[RTA_PRIORITY])
4013 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4014
4015 if (tb[RTA_METRICS]) {
4016 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4017 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4018 }
86872cb5
TG
4019
4020 if (tb[RTA_TABLE])
4021 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4022
51ebd318
ND
4023 if (tb[RTA_MULTIPATH]) {
4024 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4025 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4026
4027 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4028 cfg->fc_mp_len, extack);
9ed59592
DA
4029 if (err < 0)
4030 goto errout;
51ebd318
ND
4031 }
4032
c78ba6d6
LR
4033 if (tb[RTA_PREF]) {
4034 pref = nla_get_u8(tb[RTA_PREF]);
4035 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4036 pref != ICMPV6_ROUTER_PREF_HIGH)
4037 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4038 cfg->fc_flags |= RTF_PREF(pref);
4039 }
4040
19e42e45
RP
4041 if (tb[RTA_ENCAP])
4042 cfg->fc_encap = tb[RTA_ENCAP];
4043
9ed59592 4044 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4045 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4046
c255bd68 4047 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4048 if (err < 0)
4049 goto errout;
4050 }
4051
32bc201e
XL
4052 if (tb[RTA_EXPIRES]) {
4053 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4054
4055 if (addrconf_finite_timeout(timeout)) {
4056 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4057 cfg->fc_flags |= RTF_EXPIRES;
4058 }
4059 }
4060
86872cb5
TG
4061 err = 0;
4062errout:
4063 return err;
1da177e4
LT
4064}
4065
6b9ea5a6
RP
4066struct rt6_nh {
4067 struct rt6_info *rt6_info;
4068 struct fib6_config r_cfg;
4069 struct mx6_config mxc;
4070 struct list_head next;
4071};
4072
4073static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4074{
4075 struct rt6_nh *nh;
4076
4077 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4078 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4079 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4080 nh->r_cfg.fc_ifindex);
4081 }
4082}
4083
4084static int ip6_route_info_append(struct list_head *rt6_nh_list,
4085 struct rt6_info *rt, struct fib6_config *r_cfg)
4086{
4087 struct rt6_nh *nh;
6b9ea5a6
RP
4088 int err = -EEXIST;
4089
4090 list_for_each_entry(nh, rt6_nh_list, next) {
4091 /* check if rt6_info already exists */
f06b7549 4092 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4093 return err;
4094 }
4095
4096 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4097 if (!nh)
4098 return -ENOMEM;
4099 nh->rt6_info = rt;
4100 err = ip6_convert_metrics(&nh->mxc, r_cfg);
4101 if (err) {
4102 kfree(nh);
4103 return err;
4104 }
4105 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4106 list_add_tail(&nh->next, rt6_nh_list);
4107
4108 return 0;
4109}
4110
3b1137fe
DA
4111static void ip6_route_mpath_notify(struct rt6_info *rt,
4112 struct rt6_info *rt_last,
4113 struct nl_info *info,
4114 __u16 nlflags)
4115{
4116 /* if this is an APPEND route, then rt points to the first route
4117 * inserted and rt_last points to last route inserted. Userspace
4118 * wants a consistent dump of the route which starts at the first
4119 * nexthop. Since sibling routes are always added at the end of
4120 * the list, find the first sibling of the last route appended
4121 */
4122 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4123 rt = list_first_entry(&rt_last->rt6i_siblings,
4124 struct rt6_info,
4125 rt6i_siblings);
4126 }
4127
4128 if (rt)
4129 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4130}
4131
333c4301
DA
4132static int ip6_route_multipath_add(struct fib6_config *cfg,
4133 struct netlink_ext_ack *extack)
51ebd318 4134{
3b1137fe
DA
4135 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4136 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4137 struct fib6_config r_cfg;
4138 struct rtnexthop *rtnh;
6b9ea5a6
RP
4139 struct rt6_info *rt;
4140 struct rt6_nh *err_nh;
4141 struct rt6_nh *nh, *nh_safe;
3b1137fe 4142 __u16 nlflags;
51ebd318
ND
4143 int remaining;
4144 int attrlen;
6b9ea5a6
RP
4145 int err = 1;
4146 int nhn = 0;
4147 int replace = (cfg->fc_nlinfo.nlh &&
4148 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4149 LIST_HEAD(rt6_nh_list);
51ebd318 4150
3b1137fe
DA
4151 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4152 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4153 nlflags |= NLM_F_APPEND;
4154
35f1b4e9 4155 remaining = cfg->fc_mp_len;
51ebd318 4156 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4157
6b9ea5a6
RP
4158 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4159 * rt6_info structs per nexthop
4160 */
51ebd318
ND
4161 while (rtnh_ok(rtnh, remaining)) {
4162 memcpy(&r_cfg, cfg, sizeof(*cfg));
4163 if (rtnh->rtnh_ifindex)
4164 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4165
4166 attrlen = rtnh_attrlen(rtnh);
4167 if (attrlen > 0) {
4168 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4169
4170 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4171 if (nla) {
67b61f6c 4172 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4173 r_cfg.fc_flags |= RTF_GATEWAY;
4174 }
19e42e45
RP
4175 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4176 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4177 if (nla)
4178 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4179 }
6b9ea5a6 4180
333c4301 4181 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4182 if (IS_ERR(rt)) {
4183 err = PTR_ERR(rt);
4184 rt = NULL;
6b9ea5a6 4185 goto cleanup;
8c5b83f0 4186 }
6b9ea5a6 4187
398958ae
IS
4188 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
4189
6b9ea5a6 4190 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 4191 if (err) {
587fea74 4192 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4193 goto cleanup;
4194 }
4195
4196 rtnh = rtnh_next(rtnh, &remaining);
4197 }
4198
3b1137fe
DA
4199 /* for add and replace send one notification with all nexthops.
4200 * Skip the notification in fib6_add_rt2node and send one with
4201 * the full route when done
4202 */
4203 info->skip_notify = 1;
4204
6b9ea5a6
RP
4205 err_nh = NULL;
4206 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4207 rt_last = nh->rt6_info;
333c4301 4208 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
4209 /* save reference to first route for notification */
4210 if (!rt_notif && !err)
4211 rt_notif = nh->rt6_info;
4212
6b9ea5a6
RP
4213 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4214 nh->rt6_info = NULL;
4215 if (err) {
4216 if (replace && nhn)
4217 ip6_print_replace_route_err(&rt6_nh_list);
4218 err_nh = nh;
4219 goto add_errout;
51ebd318 4220 }
6b9ea5a6 4221
1a72418b 4222 /* Because each route is added like a single route we remove
27596472
MK
4223 * these flags after the first nexthop: if there is a collision,
4224 * we have already failed to add the first nexthop:
4225 * fib6_add_rt2node() has rejected it; when replacing, old
4226 * nexthops have been replaced by first new, the rest should
4227 * be added to it.
1a72418b 4228 */
27596472
MK
4229 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4230 NLM_F_REPLACE);
6b9ea5a6
RP
4231 nhn++;
4232 }
4233
3b1137fe
DA
4234 /* success ... tell user about new route */
4235 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4236 goto cleanup;
4237
4238add_errout:
3b1137fe
DA
4239 /* send notification for routes that were added so that
4240 * the delete notifications sent by ip6_route_del are
4241 * coherent
4242 */
4243 if (rt_notif)
4244 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4245
6b9ea5a6
RP
4246 /* Delete routes that were already added */
4247 list_for_each_entry(nh, &rt6_nh_list, next) {
4248 if (err_nh == nh)
4249 break;
333c4301 4250 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4251 }
4252
4253cleanup:
4254 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4255 if (nh->rt6_info)
4256 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 4257 kfree(nh->mxc.mx);
6b9ea5a6
RP
4258 list_del(&nh->next);
4259 kfree(nh);
4260 }
4261
4262 return err;
4263}
4264
333c4301
DA
4265static int ip6_route_multipath_del(struct fib6_config *cfg,
4266 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4267{
4268 struct fib6_config r_cfg;
4269 struct rtnexthop *rtnh;
4270 int remaining;
4271 int attrlen;
4272 int err = 1, last_err = 0;
4273
4274 remaining = cfg->fc_mp_len;
4275 rtnh = (struct rtnexthop *)cfg->fc_mp;
4276
4277 /* Parse a Multipath Entry */
4278 while (rtnh_ok(rtnh, remaining)) {
4279 memcpy(&r_cfg, cfg, sizeof(*cfg));
4280 if (rtnh->rtnh_ifindex)
4281 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4282
4283 attrlen = rtnh_attrlen(rtnh);
4284 if (attrlen > 0) {
4285 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4286
4287 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4288 if (nla) {
4289 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4290 r_cfg.fc_flags |= RTF_GATEWAY;
4291 }
4292 }
333c4301 4293 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4294 if (err)
4295 last_err = err;
4296
51ebd318
ND
4297 rtnh = rtnh_next(rtnh, &remaining);
4298 }
4299
4300 return last_err;
4301}
4302
c21ef3e3
DA
4303static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4304 struct netlink_ext_ack *extack)
1da177e4 4305{
86872cb5
TG
4306 struct fib6_config cfg;
4307 int err;
1da177e4 4308
333c4301 4309 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4310 if (err < 0)
4311 return err;
4312
51ebd318 4313 if (cfg.fc_mp)
333c4301 4314 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4315 else {
4316 cfg.fc_delete_all_nh = 1;
333c4301 4317 return ip6_route_del(&cfg, extack);
0ae81335 4318 }
1da177e4
LT
4319}
4320
c21ef3e3
DA
4321static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4322 struct netlink_ext_ack *extack)
1da177e4 4323{
86872cb5
TG
4324 struct fib6_config cfg;
4325 int err;
1da177e4 4326
333c4301 4327 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4328 if (err < 0)
4329 return err;
4330
51ebd318 4331 if (cfg.fc_mp)
333c4301 4332 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4333 else
333c4301 4334 return ip6_route_add(&cfg, extack);
1da177e4
LT
4335}
4336
beb1afac 4337static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4338{
beb1afac
DA
4339 int nexthop_len = 0;
4340
4341 if (rt->rt6i_nsiblings) {
4342 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4343 + NLA_ALIGN(sizeof(struct rtnexthop))
4344 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
4345 + lwtunnel_get_encap_size(rt->dst.lwtstate);
4346
4347 nexthop_len *= rt->rt6i_nsiblings;
4348 }
4349
339bf98f
TG
4350 return NLMSG_ALIGN(sizeof(struct rtmsg))
4351 + nla_total_size(16) /* RTA_SRC */
4352 + nla_total_size(16) /* RTA_DST */
4353 + nla_total_size(16) /* RTA_GATEWAY */
4354 + nla_total_size(16) /* RTA_PREFSRC */
4355 + nla_total_size(4) /* RTA_TABLE */
4356 + nla_total_size(4) /* RTA_IIF */
4357 + nla_total_size(4) /* RTA_OIF */
4358 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4359 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4360 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4361 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4362 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
4363 + lwtunnel_get_encap_size(rt->dst.lwtstate)
4364 + nexthop_len;
4365}
4366
4367static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4368 unsigned int *flags, bool skip_oif)
beb1afac 4369{
f9d882ea
IS
4370 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4371 *flags |= RTNH_F_DEAD;
4372
44c9f2f2 4373 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4374 *flags |= RTNH_F_LINKDOWN;
4375 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4376 *flags |= RTNH_F_DEAD;
4377 }
4378
4379 if (rt->rt6i_flags & RTF_GATEWAY) {
4380 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
4381 goto nla_put_failure;
4382 }
4383
fc1e64e1 4384 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
fe400799 4385 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4386 *flags |= RTNH_F_OFFLOAD;
4387
5be083ce
DA
4388 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4389 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
4390 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
4391 goto nla_put_failure;
4392
4393 if (rt->dst.lwtstate &&
4394 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4395 goto nla_put_failure;
4396
4397 return 0;
4398
4399nla_put_failure:
4400 return -EMSGSIZE;
4401}
4402
5be083ce 4403/* add multipath next hop */
beb1afac
DA
4404static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4405{
4406 struct rtnexthop *rtnh;
4407 unsigned int flags = 0;
4408
4409 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4410 if (!rtnh)
4411 goto nla_put_failure;
4412
398958ae 4413 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
beb1afac
DA
4414 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4415
5be083ce 4416 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4417 goto nla_put_failure;
4418
4419 rtnh->rtnh_flags = flags;
4420
4421 /* length of rtnetlink header + attributes */
4422 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4423
4424 return 0;
4425
4426nla_put_failure:
4427 return -EMSGSIZE;
339bf98f
TG
4428}
4429
191cd582
BH
4430static int rt6_fill_node(struct net *net,
4431 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4432 struct in6_addr *dst, struct in6_addr *src,
15e47304 4433 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4434 unsigned int flags)
1da177e4 4435{
4b32b5ad 4436 u32 metrics[RTAX_MAX];
1da177e4 4437 struct rtmsg *rtm;
2d7202bf 4438 struct nlmsghdr *nlh;
e3703b3d 4439 long expires;
9e762a4a 4440 u32 table;
1da177e4 4441
15e47304 4442 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4443 if (!nlh)
26932566 4444 return -EMSGSIZE;
2d7202bf
TG
4445
4446 rtm = nlmsg_data(nlh);
1da177e4
LT
4447 rtm->rtm_family = AF_INET6;
4448 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4449 rtm->rtm_src_len = rt->rt6i_src.plen;
4450 rtm->rtm_tos = 0;
c71099ac 4451 if (rt->rt6i_table)
9e762a4a 4452 table = rt->rt6i_table->tb6_id;
c71099ac 4453 else
9e762a4a
PM
4454 table = RT6_TABLE_UNSPEC;
4455 rtm->rtm_table = table;
c78679e8
DM
4456 if (nla_put_u32(skb, RTA_TABLE, table))
4457 goto nla_put_failure;
ef2c7d7b
ND
4458 if (rt->rt6i_flags & RTF_REJECT) {
4459 switch (rt->dst.error) {
4460 case -EINVAL:
4461 rtm->rtm_type = RTN_BLACKHOLE;
4462 break;
4463 case -EACCES:
4464 rtm->rtm_type = RTN_PROHIBIT;
4465 break;
b4949ab2
ND
4466 case -EAGAIN:
4467 rtm->rtm_type = RTN_THROW;
4468 break;
ef2c7d7b
ND
4469 default:
4470 rtm->rtm_type = RTN_UNREACHABLE;
4471 break;
4472 }
4473 }
38308473 4474 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4475 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4476 else if (rt->rt6i_flags & RTF_ANYCAST)
4477 rtm->rtm_type = RTN_ANYCAST;
d1918542 4478 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4479 rtm->rtm_type = RTN_LOCAL;
4480 else
4481 rtm->rtm_type = RTN_UNICAST;
4482 rtm->rtm_flags = 0;
4483 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4484 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4485
38308473 4486 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4487 rtm->rtm_flags |= RTM_F_CLONED;
4488
4489 if (dst) {
930345ea 4490 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4491 goto nla_put_failure;
1ab1457c 4492 rtm->rtm_dst_len = 128;
1da177e4 4493 } else if (rtm->rtm_dst_len)
930345ea 4494 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4495 goto nla_put_failure;
1da177e4
LT
4496#ifdef CONFIG_IPV6_SUBTREES
4497 if (src) {
930345ea 4498 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4499 goto nla_put_failure;
1ab1457c 4500 rtm->rtm_src_len = 128;
c78679e8 4501 } else if (rtm->rtm_src_len &&
930345ea 4502 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4503 goto nla_put_failure;
1da177e4 4504#endif
7bc570c8
YH
4505 if (iif) {
4506#ifdef CONFIG_IPV6_MROUTE
4507 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4508 int err = ip6mr_get_route(net, skb, rtm, portid);
4509
4510 if (err == 0)
4511 return 0;
4512 if (err < 0)
4513 goto nla_put_failure;
7bc570c8
YH
4514 } else
4515#endif
c78679e8
DM
4516 if (nla_put_u32(skb, RTA_IIF, iif))
4517 goto nla_put_failure;
7bc570c8 4518 } else if (dst) {
1da177e4 4519 struct in6_addr saddr_buf;
c78679e8 4520 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4521 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4522 goto nla_put_failure;
1da177e4 4523 }
2d7202bf 4524
c3968a85
DW
4525 if (rt->rt6i_prefsrc.plen) {
4526 struct in6_addr saddr_buf;
4e3fd7a0 4527 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4528 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4529 goto nla_put_failure;
c3968a85
DW
4530 }
4531
4b32b5ad
MKL
4532 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4533 if (rt->rt6i_pmtu)
4534 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4535 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4536 goto nla_put_failure;
4537
c78679e8
DM
4538 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4539 goto nla_put_failure;
8253947e 4540
beb1afac
DA
4541 /* For multipath routes, walk the siblings list and add
4542 * each as a nexthop within RTA_MULTIPATH.
4543 */
4544 if (rt->rt6i_nsiblings) {
4545 struct rt6_info *sibling, *next_sibling;
4546 struct nlattr *mp;
4547
4548 mp = nla_nest_start(skb, RTA_MULTIPATH);
4549 if (!mp)
4550 goto nla_put_failure;
4551
4552 if (rt6_add_nexthop(skb, rt) < 0)
4553 goto nla_put_failure;
4554
4555 list_for_each_entry_safe(sibling, next_sibling,
4556 &rt->rt6i_siblings, rt6i_siblings) {
4557 if (rt6_add_nexthop(skb, sibling) < 0)
4558 goto nla_put_failure;
4559 }
4560
4561 nla_nest_end(skb, mp);
4562 } else {
5be083ce 4563 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4564 goto nla_put_failure;
4565 }
4566
8253947e 4567 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4568
87a50699 4569 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4570 goto nla_put_failure;
2d7202bf 4571
c78ba6d6
LR
4572 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4573 goto nla_put_failure;
4574
19e42e45 4575
053c095a
JB
4576 nlmsg_end(skb, nlh);
4577 return 0;
2d7202bf
TG
4578
4579nla_put_failure:
26932566
PM
4580 nlmsg_cancel(skb, nlh);
4581 return -EMSGSIZE;
1da177e4
LT
4582}
4583
1b43af54 4584int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4585{
4586 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4587 struct net *net = arg->net;
4588
4589 if (rt == net->ipv6.ip6_null_entry)
4590 return 0;
1da177e4 4591
2d7202bf
TG
4592 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4593 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4594
4595 /* user wants prefix routes only */
4596 if (rtm->rtm_flags & RTM_F_PREFIX &&
4597 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4598 /* success since this is not a prefix route */
4599 return 1;
4600 }
4601 }
1da177e4 4602
1f17e2f2 4603 return rt6_fill_node(net,
191cd582 4604 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4605 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4606 NLM_F_MULTI);
1da177e4
LT
4607}
4608
c21ef3e3
DA
4609static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4610 struct netlink_ext_ack *extack)
1da177e4 4611{
3b1e0a65 4612 struct net *net = sock_net(in_skb->sk);
ab364a6f 4613 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4614 int err, iif = 0, oif = 0;
4615 struct dst_entry *dst;
ab364a6f 4616 struct rt6_info *rt;
1da177e4 4617 struct sk_buff *skb;
ab364a6f 4618 struct rtmsg *rtm;
4c9483b2 4619 struct flowi6 fl6;
18c3a61c 4620 bool fibmatch;
1da177e4 4621
fceb6435 4622 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4623 extack);
ab364a6f
TG
4624 if (err < 0)
4625 goto errout;
1da177e4 4626
ab364a6f 4627 err = -EINVAL;
4c9483b2 4628 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4629 rtm = nlmsg_data(nlh);
4630 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4631 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4632
ab364a6f
TG
4633 if (tb[RTA_SRC]) {
4634 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4635 goto errout;
4636
4e3fd7a0 4637 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4638 }
4639
4640 if (tb[RTA_DST]) {
4641 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4642 goto errout;
4643
4e3fd7a0 4644 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4645 }
4646
4647 if (tb[RTA_IIF])
4648 iif = nla_get_u32(tb[RTA_IIF]);
4649
4650 if (tb[RTA_OIF])
72331bc0 4651 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4652
2e47b291
LC
4653 if (tb[RTA_MARK])
4654 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4655
622ec2c9
LC
4656 if (tb[RTA_UID])
4657 fl6.flowi6_uid = make_kuid(current_user_ns(),
4658 nla_get_u32(tb[RTA_UID]));
4659 else
4660 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4661
1da177e4
LT
4662 if (iif) {
4663 struct net_device *dev;
72331bc0
SL
4664 int flags = 0;
4665
121622db
FW
4666 rcu_read_lock();
4667
4668 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4669 if (!dev) {
121622db 4670 rcu_read_unlock();
1da177e4 4671 err = -ENODEV;
ab364a6f 4672 goto errout;
1da177e4 4673 }
72331bc0
SL
4674
4675 fl6.flowi6_iif = iif;
4676
4677 if (!ipv6_addr_any(&fl6.saddr))
4678 flags |= RT6_LOOKUP_F_HAS_SADDR;
4679
b75cc8f9 4680 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4681
4682 rcu_read_unlock();
72331bc0
SL
4683 } else {
4684 fl6.flowi6_oif = oif;
4685
58acfd71 4686 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4687 }
4688
18c3a61c
RP
4689
4690 rt = container_of(dst, struct rt6_info, dst);
4691 if (rt->dst.error) {
4692 err = rt->dst.error;
4693 ip6_rt_put(rt);
4694 goto errout;
1da177e4
LT
4695 }
4696
9d6acb3b
WC
4697 if (rt == net->ipv6.ip6_null_entry) {
4698 err = rt->dst.error;
4699 ip6_rt_put(rt);
4700 goto errout;
4701 }
4702
fba961ab
DM
4703 if (fibmatch && rt->from) {
4704 struct rt6_info *ort = rt->from;
58acfd71
IS
4705
4706 dst_hold(&ort->dst);
4707 ip6_rt_put(rt);
4708 rt = ort;
4709 }
4710
ab364a6f 4711 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4712 if (!skb) {
94e187c0 4713 ip6_rt_put(rt);
ab364a6f
TG
4714 err = -ENOBUFS;
4715 goto errout;
4716 }
1da177e4 4717
d8d1f30b 4718 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4719 if (fibmatch)
4720 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4721 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4722 nlh->nlmsg_seq, 0);
4723 else
4724 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4725 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4726 nlh->nlmsg_seq, 0);
1da177e4 4727 if (err < 0) {
ab364a6f
TG
4728 kfree_skb(skb);
4729 goto errout;
1da177e4
LT
4730 }
4731
15e47304 4732 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4733errout:
1da177e4 4734 return err;
1da177e4
LT
4735}
4736
37a1d361
RP
4737void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4738 unsigned int nlm_flags)
1da177e4
LT
4739{
4740 struct sk_buff *skb;
5578689a 4741 struct net *net = info->nl_net;
528c4ceb
DL
4742 u32 seq;
4743 int err;
4744
4745 err = -ENOBUFS;
38308473 4746 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4747
19e42e45 4748 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4749 if (!skb)
21713ebc
TG
4750 goto errout;
4751
191cd582 4752 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4753 event, info->portid, seq, nlm_flags);
26932566
PM
4754 if (err < 0) {
4755 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4756 WARN_ON(err == -EMSGSIZE);
4757 kfree_skb(skb);
4758 goto errout;
4759 }
15e47304 4760 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4761 info->nlh, gfp_any());
4762 return;
21713ebc
TG
4763errout:
4764 if (err < 0)
5578689a 4765 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4766}
4767
8ed67789 4768static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4769 unsigned long event, void *ptr)
8ed67789 4770{
351638e7 4771 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4772 struct net *net = dev_net(dev);
8ed67789 4773
242d3a49
WC
4774 if (!(dev->flags & IFF_LOOPBACK))
4775 return NOTIFY_OK;
4776
4777 if (event == NETDEV_REGISTER) {
d8d1f30b 4778 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4779 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4780#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4781 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4782 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4783 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4784 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4785#endif
76da0704
WC
4786 } else if (event == NETDEV_UNREGISTER &&
4787 dev->reg_state != NETREG_UNREGISTERED) {
4788 /* NETDEV_UNREGISTER could be fired for multiple times by
4789 * netdev_wait_allrefs(). Make sure we only call this once.
4790 */
12d94a80 4791 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4792#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4793 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4794 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4795#endif
4796 }
4797
4798 return NOTIFY_OK;
4799}
4800
1da177e4
LT
4801/*
4802 * /proc
4803 */
4804
4805#ifdef CONFIG_PROC_FS
4806
33120b30 4807static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4808 .open = ipv6_route_open,
4809 .read = seq_read,
4810 .llseek = seq_lseek,
8d2ca1d7 4811 .release = seq_release_net,
33120b30
AD
4812};
4813
1da177e4
LT
4814static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4815{
69ddb805 4816 struct net *net = (struct net *)seq->private;
1da177e4 4817 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4818 net->ipv6.rt6_stats->fib_nodes,
4819 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4820 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4821 net->ipv6.rt6_stats->fib_rt_entries,
4822 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4823 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4824 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4825
4826 return 0;
4827}
4828
4829static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4830{
de05c557 4831 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4832}
4833
9a32144e 4834static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4835 .open = rt6_stats_seq_open,
4836 .read = seq_read,
4837 .llseek = seq_lseek,
b6fcbdb4 4838 .release = single_release_net,
1da177e4
LT
4839};
4840#endif /* CONFIG_PROC_FS */
4841
4842#ifdef CONFIG_SYSCTL
4843
1da177e4 4844static
fe2c6338 4845int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4846 void __user *buffer, size_t *lenp, loff_t *ppos)
4847{
c486da34
LAG
4848 struct net *net;
4849 int delay;
4850 if (!write)
1da177e4 4851 return -EINVAL;
c486da34
LAG
4852
4853 net = (struct net *)ctl->extra1;
4854 delay = net->ipv6.sysctl.flush_delay;
4855 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4856 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4857 return 0;
1da177e4
LT
4858}
4859
fe2c6338 4860struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4861 {
1da177e4 4862 .procname = "flush",
4990509f 4863 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4864 .maxlen = sizeof(int),
89c8b3a1 4865 .mode = 0200,
6d9f239a 4866 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4867 },
4868 {
1da177e4 4869 .procname = "gc_thresh",
9a7ec3a9 4870 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4871 .maxlen = sizeof(int),
4872 .mode = 0644,
6d9f239a 4873 .proc_handler = proc_dointvec,
1da177e4
LT
4874 },
4875 {
1da177e4 4876 .procname = "max_size",
4990509f 4877 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4878 .maxlen = sizeof(int),
4879 .mode = 0644,
6d9f239a 4880 .proc_handler = proc_dointvec,
1da177e4
LT
4881 },
4882 {
1da177e4 4883 .procname = "gc_min_interval",
4990509f 4884 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4885 .maxlen = sizeof(int),
4886 .mode = 0644,
6d9f239a 4887 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4888 },
4889 {
1da177e4 4890 .procname = "gc_timeout",
4990509f 4891 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4892 .maxlen = sizeof(int),
4893 .mode = 0644,
6d9f239a 4894 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4895 },
4896 {
1da177e4 4897 .procname = "gc_interval",
4990509f 4898 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4899 .maxlen = sizeof(int),
4900 .mode = 0644,
6d9f239a 4901 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4902 },
4903 {
1da177e4 4904 .procname = "gc_elasticity",
4990509f 4905 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4906 .maxlen = sizeof(int),
4907 .mode = 0644,
f3d3f616 4908 .proc_handler = proc_dointvec,
1da177e4
LT
4909 },
4910 {
1da177e4 4911 .procname = "mtu_expires",
4990509f 4912 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4913 .maxlen = sizeof(int),
4914 .mode = 0644,
6d9f239a 4915 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4916 },
4917 {
1da177e4 4918 .procname = "min_adv_mss",
4990509f 4919 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4920 .maxlen = sizeof(int),
4921 .mode = 0644,
f3d3f616 4922 .proc_handler = proc_dointvec,
1da177e4
LT
4923 },
4924 {
1da177e4 4925 .procname = "gc_min_interval_ms",
4990509f 4926 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4927 .maxlen = sizeof(int),
4928 .mode = 0644,
6d9f239a 4929 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4930 },
f8572d8f 4931 { }
1da177e4
LT
4932};
4933
2c8c1e72 4934struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4935{
4936 struct ctl_table *table;
4937
4938 table = kmemdup(ipv6_route_table_template,
4939 sizeof(ipv6_route_table_template),
4940 GFP_KERNEL);
5ee09105
YH
4941
4942 if (table) {
4943 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4944 table[0].extra1 = net;
86393e52 4945 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4946 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4947 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4948 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4949 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4950 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4951 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4952 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4953 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4954
4955 /* Don't export sysctls to unprivileged users */
4956 if (net->user_ns != &init_user_ns)
4957 table[0].procname = NULL;
5ee09105
YH
4958 }
4959
760f2d01
DL
4960 return table;
4961}
1da177e4
LT
4962#endif
4963
2c8c1e72 4964static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4965{
633d424b 4966 int ret = -ENOMEM;
8ed67789 4967
86393e52
AD
4968 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4969 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4970
fc66f95c
ED
4971 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4972 goto out_ip6_dst_ops;
4973
8ed67789
DL
4974 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4975 sizeof(*net->ipv6.ip6_null_entry),
4976 GFP_KERNEL);
4977 if (!net->ipv6.ip6_null_entry)
fc66f95c 4978 goto out_ip6_dst_entries;
d8d1f30b 4979 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4980 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4981 ip6_template_metrics, true);
8ed67789
DL
4982
4983#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4984 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4985 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4986 sizeof(*net->ipv6.ip6_prohibit_entry),
4987 GFP_KERNEL);
68fffc67
PZ
4988 if (!net->ipv6.ip6_prohibit_entry)
4989 goto out_ip6_null_entry;
d8d1f30b 4990 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4991 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4992 ip6_template_metrics, true);
8ed67789
DL
4993
4994 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4995 sizeof(*net->ipv6.ip6_blk_hole_entry),
4996 GFP_KERNEL);
68fffc67
PZ
4997 if (!net->ipv6.ip6_blk_hole_entry)
4998 goto out_ip6_prohibit_entry;
d8d1f30b 4999 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5000 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5001 ip6_template_metrics, true);
8ed67789
DL
5002#endif
5003
b339a47c
PZ
5004 net->ipv6.sysctl.flush_delay = 0;
5005 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5006 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5007 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5008 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5009 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5010 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5011 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5012
6891a346
BT
5013 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5014
8ed67789
DL
5015 ret = 0;
5016out:
5017 return ret;
f2fc6a54 5018
68fffc67
PZ
5019#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5020out_ip6_prohibit_entry:
5021 kfree(net->ipv6.ip6_prohibit_entry);
5022out_ip6_null_entry:
5023 kfree(net->ipv6.ip6_null_entry);
5024#endif
fc66f95c
ED
5025out_ip6_dst_entries:
5026 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5027out_ip6_dst_ops:
f2fc6a54 5028 goto out;
cdb18761
DL
5029}
5030
2c8c1e72 5031static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5032{
8ed67789
DL
5033 kfree(net->ipv6.ip6_null_entry);
5034#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5035 kfree(net->ipv6.ip6_prohibit_entry);
5036 kfree(net->ipv6.ip6_blk_hole_entry);
5037#endif
41bb78b4 5038 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5039}
5040
d189634e
TG
5041static int __net_init ip6_route_net_init_late(struct net *net)
5042{
5043#ifdef CONFIG_PROC_FS
d4beaa66
G
5044 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
5045 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5046#endif
5047 return 0;
5048}
5049
5050static void __net_exit ip6_route_net_exit_late(struct net *net)
5051{
5052#ifdef CONFIG_PROC_FS
ece31ffd
G
5053 remove_proc_entry("ipv6_route", net->proc_net);
5054 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5055#endif
5056}
5057
cdb18761
DL
5058static struct pernet_operations ip6_route_net_ops = {
5059 .init = ip6_route_net_init,
5060 .exit = ip6_route_net_exit,
50911411 5061 .async = true,
cdb18761
DL
5062};
5063
c3426b47
DM
5064static int __net_init ipv6_inetpeer_init(struct net *net)
5065{
5066 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5067
5068 if (!bp)
5069 return -ENOMEM;
5070 inet_peer_base_init(bp);
5071 net->ipv6.peers = bp;
5072 return 0;
5073}
5074
5075static void __net_exit ipv6_inetpeer_exit(struct net *net)
5076{
5077 struct inet_peer_base *bp = net->ipv6.peers;
5078
5079 net->ipv6.peers = NULL;
56a6b248 5080 inetpeer_invalidate_tree(bp);
c3426b47
DM
5081 kfree(bp);
5082}
5083
2b823f72 5084static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5085 .init = ipv6_inetpeer_init,
5086 .exit = ipv6_inetpeer_exit,
85ca51b2 5087 .async = true,
c3426b47
DM
5088};
5089
d189634e
TG
5090static struct pernet_operations ip6_route_net_late_ops = {
5091 .init = ip6_route_net_init_late,
5092 .exit = ip6_route_net_exit_late,
50911411 5093 .async = true,
d189634e
TG
5094};
5095
8ed67789
DL
5096static struct notifier_block ip6_route_dev_notifier = {
5097 .notifier_call = ip6_route_dev_notify,
242d3a49 5098 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5099};
5100
2f460933
WC
5101void __init ip6_route_init_special_entries(void)
5102{
5103 /* Registering of the loopback is done before this portion of code,
5104 * the loopback reference in rt6_info will not be taken, do it
5105 * manually for init_net */
5106 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5107 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5108 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5109 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5110 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5111 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5112 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5113 #endif
5114}
5115
433d49c3 5116int __init ip6_route_init(void)
1da177e4 5117{
433d49c3 5118 int ret;
8d0b94af 5119 int cpu;
433d49c3 5120
9a7ec3a9
DL
5121 ret = -ENOMEM;
5122 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5123 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5124 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5125 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5126 goto out;
14e50e57 5127
fc66f95c 5128 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5129 if (ret)
bdb3289f 5130 goto out_kmem_cache;
bdb3289f 5131
c3426b47
DM
5132 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5133 if (ret)
e8803b6c 5134 goto out_dst_entries;
2a0c451a 5135
7e52b33b
DM
5136 ret = register_pernet_subsys(&ip6_route_net_ops);
5137 if (ret)
5138 goto out_register_inetpeer;
c3426b47 5139
5dc121e9
AE
5140 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5141
e8803b6c 5142 ret = fib6_init();
433d49c3 5143 if (ret)
8ed67789 5144 goto out_register_subsys;
433d49c3 5145
433d49c3
DL
5146 ret = xfrm6_init();
5147 if (ret)
e8803b6c 5148 goto out_fib6_init;
c35b7e72 5149
433d49c3
DL
5150 ret = fib6_rules_init();
5151 if (ret)
5152 goto xfrm6_init;
7e5449c2 5153
d189634e
TG
5154 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5155 if (ret)
5156 goto fib6_rules_init;
5157
16feebcf
FW
5158 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5159 inet6_rtm_newroute, NULL, 0);
5160 if (ret < 0)
5161 goto out_register_late_subsys;
5162
5163 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5164 inet6_rtm_delroute, NULL, 0);
5165 if (ret < 0)
5166 goto out_register_late_subsys;
5167
5168 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5169 inet6_rtm_getroute, NULL,
5170 RTNL_FLAG_DOIT_UNLOCKED);
5171 if (ret < 0)
d189634e 5172 goto out_register_late_subsys;
c127ea2c 5173
8ed67789 5174 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5175 if (ret)
d189634e 5176 goto out_register_late_subsys;
8ed67789 5177
8d0b94af
MKL
5178 for_each_possible_cpu(cpu) {
5179 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5180
5181 INIT_LIST_HEAD(&ul->head);
5182 spin_lock_init(&ul->lock);
5183 }
5184
433d49c3
DL
5185out:
5186 return ret;
5187
d189634e 5188out_register_late_subsys:
16feebcf 5189 rtnl_unregister_all(PF_INET6);
d189634e 5190 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5191fib6_rules_init:
433d49c3
DL
5192 fib6_rules_cleanup();
5193xfrm6_init:
433d49c3 5194 xfrm6_fini();
2a0c451a
TG
5195out_fib6_init:
5196 fib6_gc_cleanup();
8ed67789
DL
5197out_register_subsys:
5198 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5199out_register_inetpeer:
5200 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5201out_dst_entries:
5202 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5203out_kmem_cache:
f2fc6a54 5204 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5205 goto out;
1da177e4
LT
5206}
5207
5208void ip6_route_cleanup(void)
5209{
8ed67789 5210 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5211 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5212 fib6_rules_cleanup();
1da177e4 5213 xfrm6_fini();
1da177e4 5214 fib6_gc_cleanup();
c3426b47 5215 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5216 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5217 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5218 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5219}