2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
61 #include <asm/uaccess.h>
64 #include <linux/sysctl.h>
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
84 #ifdef CONFIG_IPV6_ROUTE_INFO
85 static struct rt6_info *rt6_add_route_info(struct net *net,
86 const struct in6_addr *prefix, int prefixlen,
87 const struct in6_addr *gwaddr, int ifindex,
89 static struct rt6_info *rt6_get_route_info(struct net *net,
90 const struct in6_addr *prefix, int prefixlen,
91 const struct in6_addr *gwaddr, int ifindex);
94 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
96 struct rt6_info *rt = (struct rt6_info *) dst;
97 struct inet_peer *peer;
100 if (!(rt->dst.flags & DST_HOST))
103 peer = rt6_get_peer_create(rt);
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
124 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 struct in6_addr *p = &rt->rt6i_gateway;
130 if (!ipv6_addr_any(p))
131 return (const void *) p;
133 return &ipv6_hdr(skb)->daddr;
137 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct rt6_info *rt = (struct rt6_info *) dst;
144 daddr = choose_neigh_daddr(rt, skb, daddr);
145 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 return neigh_create(&nd_tbl, daddr, dst->dev);
151 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
153 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
155 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
164 static struct dst_ops ip6_dst_ops_template = {
166 .protocol = cpu_to_be16(ETH_P_IPV6),
169 .check = ip6_dst_check,
170 .default_advmss = ip6_default_advmss,
172 .cow_metrics = ipv6_cow_metrics,
173 .destroy = ip6_dst_destroy,
174 .ifdown = ip6_dst_ifdown,
175 .negative_advice = ip6_negative_advice,
176 .link_failure = ip6_link_failure,
177 .update_pmtu = ip6_rt_update_pmtu,
178 .redirect = rt6_do_redirect,
179 .local_out = __ip6_local_out,
180 .neigh_lookup = ip6_neigh_lookup,
183 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
185 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
187 return mtu ? : dst->dev->mtu;
190 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
194 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
198 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204 static struct dst_ops ip6_dst_blackhole_ops = {
206 .protocol = cpu_to_be16(ETH_P_IPV6),
207 .destroy = ip6_dst_destroy,
208 .check = ip6_dst_check,
209 .mtu = ip6_blackhole_mtu,
210 .default_advmss = ip6_default_advmss,
211 .update_pmtu = ip6_rt_blackhole_update_pmtu,
212 .redirect = ip6_rt_blackhole_redirect,
213 .cow_metrics = ip6_rt_blackhole_cow_metrics,
214 .neigh_lookup = ip6_neigh_lookup,
217 static const u32 ip6_template_metrics[RTAX_MAX] = {
218 [RTAX_HOPLIMIT - 1] = 255,
221 static struct rt6_info ip6_null_entry_template = {
223 .__refcnt = ATOMIC_INIT(1),
226 .error = -ENETUNREACH,
227 .input = ip6_pkt_discard,
228 .output = ip6_pkt_discard_out,
230 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
231 .rt6i_protocol = RTPROT_KERNEL,
232 .rt6i_metric = ~(u32) 0,
233 .rt6i_ref = ATOMIC_INIT(1),
236 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
238 static int ip6_pkt_prohibit(struct sk_buff *skb);
239 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
241 static struct rt6_info ip6_prohibit_entry_template = {
243 .__refcnt = ATOMIC_INIT(1),
247 .input = ip6_pkt_prohibit,
248 .output = ip6_pkt_prohibit_out,
250 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
251 .rt6i_protocol = RTPROT_KERNEL,
252 .rt6i_metric = ~(u32) 0,
253 .rt6i_ref = ATOMIC_INIT(1),
256 static struct rt6_info ip6_blk_hole_entry_template = {
258 .__refcnt = ATOMIC_INIT(1),
262 .input = dst_discard,
263 .output = dst_discard,
265 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
266 .rt6i_protocol = RTPROT_KERNEL,
267 .rt6i_metric = ~(u32) 0,
268 .rt6i_ref = ATOMIC_INIT(1),
273 /* allocate dst with ip6_dst_ops */
274 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
275 struct net_device *dev,
277 struct fib6_table *table)
279 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 sizeof(*rt) - sizeof(struct dst_entry));
285 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
290 static void ip6_dst_destroy(struct dst_entry *dst)
292 struct rt6_info *rt = (struct rt6_info *)dst;
293 struct inet6_dev *idev = rt->rt6i_idev;
296 neigh_release(rt->n);
298 if (!(rt->dst.flags & DST_HOST))
299 dst_destroy_metrics_generic(dst);
302 rt->rt6i_idev = NULL;
306 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
307 dst_release(dst->from);
309 if (rt6_has_peer(rt)) {
310 struct inet_peer *peer = rt6_peer_ptr(rt);
315 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
317 static u32 rt6_peer_genid(void)
319 return atomic_read(&__rt6_peer_genid);
322 void rt6_bind_peer(struct rt6_info *rt, int create)
324 struct inet_peer_base *base;
325 struct inet_peer *peer;
327 base = inetpeer_base_ptr(rt->_rt6i_peer);
331 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
333 if (!rt6_set_peer(rt, peer))
336 rt->rt6i_peer_genid = rt6_peer_genid();
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
343 struct rt6_info *rt = (struct rt6_info *)dst;
344 struct inet6_dev *idev = rt->rt6i_idev;
345 struct net_device *loopback_dev =
346 dev_net(dev)->loopback_dev;
348 if (dev != loopback_dev) {
349 if (idev && idev->dev == dev) {
350 struct inet6_dev *loopback_idev =
351 in6_dev_get(loopback_dev);
353 rt->rt6i_idev = loopback_idev;
357 if (rt->n && rt->n->dev == dev) {
358 rt->n->dev = loopback_dev;
359 dev_hold(loopback_dev);
365 static bool rt6_check_expired(const struct rt6_info *rt)
367 struct rt6_info *ort = NULL;
369 if (rt->rt6i_flags & RTF_EXPIRES) {
370 if (time_after(jiffies, rt->dst.expires))
372 } else if (rt->dst.from) {
373 ort = (struct rt6_info *) rt->dst.from;
374 return (ort->rt6i_flags & RTF_EXPIRES) &&
375 time_after(jiffies, ort->dst.expires);
380 static bool rt6_need_strict(const struct in6_addr *daddr)
382 return ipv6_addr_type(daddr) &
383 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
387 * Route lookup. Any table->tb6_lock is implied.
390 static inline struct rt6_info *rt6_device_match(struct net *net,
392 const struct in6_addr *saddr,
396 struct rt6_info *local = NULL;
397 struct rt6_info *sprt;
399 if (!oif && ipv6_addr_any(saddr))
402 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
403 struct net_device *dev = sprt->dst.dev;
406 if (dev->ifindex == oif)
408 if (dev->flags & IFF_LOOPBACK) {
409 if (!sprt->rt6i_idev ||
410 sprt->rt6i_idev->dev->ifindex != oif) {
411 if (flags & RT6_LOOKUP_F_IFACE && oif)
413 if (local && (!oif ||
414 local->rt6i_idev->dev->ifindex == oif))
420 if (ipv6_chk_addr(net, saddr, dev,
421 flags & RT6_LOOKUP_F_IFACE))
430 if (flags & RT6_LOOKUP_F_IFACE)
431 return net->ipv6.ip6_null_entry;
437 #ifdef CONFIG_IPV6_ROUTER_PREF
438 static void rt6_probe(struct rt6_info *rt)
440 struct neighbour *neigh;
442 * Okay, this does not seem to be appropriate
443 * for now, however, we need to check if it
444 * is really so; aka Router Reachability Probing.
446 * Router Reachability Probe MUST be rate-limited
447 * to no more than one per minute.
450 neigh = rt ? rt->n : NULL;
451 if (!neigh || (neigh->nud_state & NUD_VALID))
453 read_lock_bh(&neigh->lock);
454 if (!(neigh->nud_state & NUD_VALID) &&
455 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
456 struct in6_addr mcaddr;
457 struct in6_addr *target;
459 neigh->updated = jiffies;
460 read_unlock_bh(&neigh->lock);
462 target = (struct in6_addr *)&neigh->primary_key;
463 addrconf_addr_solict_mult(target, &mcaddr);
464 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
466 read_unlock_bh(&neigh->lock);
472 static inline void rt6_probe(struct rt6_info *rt)
478 * Default Router Selection (RFC 2461 6.3.6)
480 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
482 struct net_device *dev = rt->dst.dev;
483 if (!oif || dev->ifindex == oif)
485 if ((dev->flags & IFF_LOOPBACK) &&
486 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
491 static inline int rt6_check_neigh(struct rt6_info *rt)
493 struct neighbour *neigh;
498 if (rt->rt6i_flags & RTF_NONEXTHOP ||
499 !(rt->rt6i_flags & RTF_GATEWAY))
502 read_lock_bh(&neigh->lock);
503 if (neigh->nud_state & NUD_VALID)
505 #ifdef CONFIG_IPV6_ROUTER_PREF
506 else if (neigh->nud_state & NUD_FAILED)
511 read_unlock_bh(&neigh->lock);
518 static int rt6_score_route(struct rt6_info *rt, int oif,
523 m = rt6_check_dev(rt, oif);
524 if (!m && (strict & RT6_LOOKUP_F_IFACE))
526 #ifdef CONFIG_IPV6_ROUTER_PREF
527 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
529 n = rt6_check_neigh(rt);
530 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
535 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
536 int *mpri, struct rt6_info *match)
540 if (rt6_check_expired(rt))
543 m = rt6_score_route(rt, oif, strict);
548 if (strict & RT6_LOOKUP_F_REACHABLE)
552 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
560 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
561 struct rt6_info *rr_head,
562 u32 metric, int oif, int strict)
564 struct rt6_info *rt, *match;
568 for (rt = rr_head; rt && rt->rt6i_metric == metric;
569 rt = rt->dst.rt6_next)
570 match = find_match(rt, oif, strict, &mpri, match);
571 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
572 rt = rt->dst.rt6_next)
573 match = find_match(rt, oif, strict, &mpri, match);
578 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
580 struct rt6_info *match, *rt0;
585 fn->rr_ptr = rt0 = fn->leaf;
587 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
590 (strict & RT6_LOOKUP_F_REACHABLE)) {
591 struct rt6_info *next = rt0->dst.rt6_next;
593 /* no entries matched; do round-robin */
594 if (!next || next->rt6i_metric != rt0->rt6i_metric)
601 net = dev_net(rt0->dst.dev);
602 return match ? match : net->ipv6.ip6_null_entry;
605 #ifdef CONFIG_IPV6_ROUTE_INFO
606 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
607 const struct in6_addr *gwaddr)
609 struct net *net = dev_net(dev);
610 struct route_info *rinfo = (struct route_info *) opt;
611 struct in6_addr prefix_buf, *prefix;
613 unsigned long lifetime;
616 if (len < sizeof(struct route_info)) {
620 /* Sanity check for prefix_len and length */
621 if (rinfo->length > 3) {
623 } else if (rinfo->prefix_len > 128) {
625 } else if (rinfo->prefix_len > 64) {
626 if (rinfo->length < 2) {
629 } else if (rinfo->prefix_len > 0) {
630 if (rinfo->length < 1) {
635 pref = rinfo->route_pref;
636 if (pref == ICMPV6_ROUTER_PREF_INVALID)
639 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
641 if (rinfo->length == 3)
642 prefix = (struct in6_addr *)rinfo->prefix;
644 /* this function is safe */
645 ipv6_addr_prefix(&prefix_buf,
646 (struct in6_addr *)rinfo->prefix,
648 prefix = &prefix_buf;
651 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
654 if (rt && !lifetime) {
660 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
663 rt->rt6i_flags = RTF_ROUTEINFO |
664 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
667 if (!addrconf_finite_timeout(lifetime))
668 rt6_clean_expires(rt);
670 rt6_set_expires(rt, jiffies + HZ * lifetime);
672 dst_release(&rt->dst);
678 #define BACKTRACK(__net, saddr) \
680 if (rt == __net->ipv6.ip6_null_entry) { \
681 struct fib6_node *pn; \
683 if (fn->fn_flags & RTN_TL_ROOT) \
686 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
687 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
690 if (fn->fn_flags & RTN_RTINFO) \
696 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
697 struct fib6_table *table,
698 struct flowi6 *fl6, int flags)
700 struct fib6_node *fn;
703 read_lock_bh(&table->tb6_lock);
704 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
707 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
708 BACKTRACK(net, &fl6->saddr);
710 dst_use(&rt->dst, jiffies);
711 read_unlock_bh(&table->tb6_lock);
716 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
719 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
721 EXPORT_SYMBOL_GPL(ip6_route_lookup);
723 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
724 const struct in6_addr *saddr, int oif, int strict)
726 struct flowi6 fl6 = {
730 struct dst_entry *dst;
731 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
734 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
735 flags |= RT6_LOOKUP_F_HAS_SADDR;
738 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
740 return (struct rt6_info *) dst;
747 EXPORT_SYMBOL(rt6_lookup);
749 /* ip6_ins_rt is called with FREE table->tb6_lock.
750 It takes new route entry, the addition fails by any reason the
751 route is freed. In any case, if caller does not hold it, it may
755 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
758 struct fib6_table *table;
760 table = rt->rt6i_table;
761 write_lock_bh(&table->tb6_lock);
762 err = fib6_add(&table->tb6_root, rt, info);
763 write_unlock_bh(&table->tb6_lock);
768 int ip6_ins_rt(struct rt6_info *rt)
770 struct nl_info info = {
771 .nl_net = dev_net(rt->dst.dev),
773 return __ip6_ins_rt(rt, &info);
776 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
777 const struct in6_addr *daddr,
778 const struct in6_addr *saddr)
786 rt = ip6_rt_copy(ort, daddr);
789 int attempts = !in_softirq();
791 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
792 if (ort->rt6i_dst.plen != 128 &&
793 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
794 rt->rt6i_flags |= RTF_ANYCAST;
795 rt->rt6i_gateway = *daddr;
798 rt->rt6i_flags |= RTF_CACHE;
800 #ifdef CONFIG_IPV6_SUBTREES
801 if (rt->rt6i_src.plen && saddr) {
802 rt->rt6i_src.addr = *saddr;
803 rt->rt6i_src.plen = 128;
808 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
809 struct net *net = dev_net(rt->dst.dev);
810 int saved_rt_min_interval =
811 net->ipv6.sysctl.ip6_rt_gc_min_interval;
812 int saved_rt_elasticity =
813 net->ipv6.sysctl.ip6_rt_gc_elasticity;
815 if (attempts-- > 0) {
816 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
817 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
819 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
821 net->ipv6.sysctl.ip6_rt_gc_elasticity =
823 net->ipv6.sysctl.ip6_rt_gc_min_interval =
824 saved_rt_min_interval;
828 net_warn_ratelimited("Neighbour table overflow\n");
837 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
838 const struct in6_addr *daddr)
840 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
843 rt->rt6i_flags |= RTF_CACHE;
844 rt->n = neigh_clone(ort->n);
849 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
850 struct flowi6 *fl6, int flags)
852 struct fib6_node *fn;
853 struct rt6_info *rt, *nrt;
857 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
859 strict |= flags & RT6_LOOKUP_F_IFACE;
862 read_lock_bh(&table->tb6_lock);
865 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
868 rt = rt6_select(fn, oif, strict | reachable);
870 BACKTRACK(net, &fl6->saddr);
871 if (rt == net->ipv6.ip6_null_entry ||
872 rt->rt6i_flags & RTF_CACHE)
876 read_unlock_bh(&table->tb6_lock);
878 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
879 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
880 else if (!(rt->dst.flags & DST_HOST))
881 nrt = rt6_alloc_clone(rt, &fl6->daddr);
885 dst_release(&rt->dst);
886 rt = nrt ? : net->ipv6.ip6_null_entry;
890 err = ip6_ins_rt(nrt);
899 * Race condition! In the gap, when table->tb6_lock was
900 * released someone could insert this route. Relookup.
902 dst_release(&rt->dst);
911 read_unlock_bh(&table->tb6_lock);
913 rt->dst.lastuse = jiffies;
919 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
920 struct flowi6 *fl6, int flags)
922 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
925 static struct dst_entry *ip6_route_input_lookup(struct net *net,
926 struct net_device *dev,
927 struct flowi6 *fl6, int flags)
929 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
930 flags |= RT6_LOOKUP_F_IFACE;
932 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
935 void ip6_route_input(struct sk_buff *skb)
937 const struct ipv6hdr *iph = ipv6_hdr(skb);
938 struct net *net = dev_net(skb->dev);
939 int flags = RT6_LOOKUP_F_HAS_SADDR;
940 struct flowi6 fl6 = {
941 .flowi6_iif = skb->dev->ifindex,
944 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
945 .flowi6_mark = skb->mark,
946 .flowi6_proto = iph->nexthdr,
949 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
952 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
953 struct flowi6 *fl6, int flags)
955 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
958 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
963 fl6->flowi6_iif = net->loopback_dev->ifindex;
965 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
966 flags |= RT6_LOOKUP_F_IFACE;
968 if (!ipv6_addr_any(&fl6->saddr))
969 flags |= RT6_LOOKUP_F_HAS_SADDR;
971 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
973 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
976 EXPORT_SYMBOL(ip6_route_output);
978 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
980 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
981 struct dst_entry *new = NULL;
983 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
985 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
986 rt6_init_peer(rt, net->ipv6.peers);
991 new->input = dst_discard;
992 new->output = dst_discard;
994 if (dst_metrics_read_only(&ort->dst))
995 new->_metrics = ort->dst._metrics;
997 dst_copy_metrics(new, &ort->dst);
998 rt->rt6i_idev = ort->rt6i_idev;
1000 in6_dev_hold(rt->rt6i_idev);
1002 rt->rt6i_gateway = ort->rt6i_gateway;
1003 rt->rt6i_flags = ort->rt6i_flags;
1004 rt6_clean_expires(rt);
1005 rt->rt6i_metric = 0;
1007 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1008 #ifdef CONFIG_IPV6_SUBTREES
1009 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1015 dst_release(dst_orig);
1016 return new ? new : ERR_PTR(-ENOMEM);
1020 * Destination cache support functions
1023 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1025 struct rt6_info *rt;
1027 rt = (struct rt6_info *) dst;
1029 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1030 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1031 if (!rt6_has_peer(rt))
1032 rt6_bind_peer(rt, 0);
1033 rt->rt6i_peer_genid = rt6_peer_genid();
1040 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1042 struct rt6_info *rt = (struct rt6_info *) dst;
1045 if (rt->rt6i_flags & RTF_CACHE) {
1046 if (rt6_check_expired(rt)) {
1058 static void ip6_link_failure(struct sk_buff *skb)
1060 struct rt6_info *rt;
1062 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1064 rt = (struct rt6_info *) skb_dst(skb);
1066 if (rt->rt6i_flags & RTF_CACHE)
1067 rt6_update_expires(rt, 0);
1068 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1069 rt->rt6i_node->fn_sernum = -1;
1073 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1075 struct rt6_info *rt6 = (struct rt6_info*)dst;
1078 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1079 struct net *net = dev_net(dst->dev);
1081 rt6->rt6i_flags |= RTF_MODIFIED;
1082 if (mtu < IPV6_MIN_MTU) {
1083 u32 features = dst_metric(dst, RTAX_FEATURES);
1085 features |= RTAX_FEATURE_ALLFRAG;
1086 dst_metric_set(dst, RTAX_FEATURES, features);
1088 dst_metric_set(dst, RTAX_MTU, mtu);
1089 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1093 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1096 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1097 struct dst_entry *dst;
1100 memset(&fl6, 0, sizeof(fl6));
1101 fl6.flowi6_oif = oif;
1102 fl6.flowi6_mark = mark;
1103 fl6.flowi6_flags = 0;
1104 fl6.daddr = iph->daddr;
1105 fl6.saddr = iph->saddr;
1106 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1108 dst = ip6_route_output(net, NULL, &fl6);
1110 ip6_rt_update_pmtu(dst, ntohl(mtu));
1113 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1115 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1117 ip6_update_pmtu(skb, sock_net(sk), mtu,
1118 sk->sk_bound_dev_if, sk->sk_mark);
1120 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1122 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1124 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1125 struct dst_entry *dst;
1128 memset(&fl6, 0, sizeof(fl6));
1129 fl6.flowi6_oif = oif;
1130 fl6.flowi6_mark = mark;
1131 fl6.flowi6_flags = 0;
1132 fl6.daddr = iph->daddr;
1133 fl6.saddr = iph->saddr;
1134 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1136 dst = ip6_route_output(net, NULL, &fl6);
1138 rt6_do_redirect(dst, skb);
1141 EXPORT_SYMBOL_GPL(ip6_redirect);
1143 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1145 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1147 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1149 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1151 struct net_device *dev = dst->dev;
1152 unsigned int mtu = dst_mtu(dst);
1153 struct net *net = dev_net(dev);
1155 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1157 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1158 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1161 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1162 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1163 * IPV6_MAXPLEN is also valid and means: "any MSS,
1164 * rely only on pmtu discovery"
1166 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1171 static unsigned int ip6_mtu(const struct dst_entry *dst)
1173 struct inet6_dev *idev;
1174 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1182 idev = __in6_dev_get(dst->dev);
1184 mtu = idev->cnf.mtu6;
1190 static struct dst_entry *icmp6_dst_gc_list;
1191 static DEFINE_SPINLOCK(icmp6_dst_lock);
1193 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1194 struct neighbour *neigh,
1197 struct dst_entry *dst;
1198 struct rt6_info *rt;
1199 struct inet6_dev *idev = in6_dev_get(dev);
1200 struct net *net = dev_net(dev);
1202 if (unlikely(!idev))
1203 return ERR_PTR(-ENODEV);
1205 rt = ip6_dst_alloc(net, dev, 0, NULL);
1206 if (unlikely(!rt)) {
1208 dst = ERR_PTR(-ENOMEM);
1215 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1216 if (IS_ERR(neigh)) {
1219 return ERR_CAST(neigh);
1223 rt->dst.flags |= DST_HOST;
1224 rt->dst.output = ip6_output;
1226 atomic_set(&rt->dst.__refcnt, 1);
1227 rt->rt6i_dst.addr = fl6->daddr;
1228 rt->rt6i_dst.plen = 128;
1229 rt->rt6i_idev = idev;
1230 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1232 spin_lock_bh(&icmp6_dst_lock);
1233 rt->dst.next = icmp6_dst_gc_list;
1234 icmp6_dst_gc_list = &rt->dst;
1235 spin_unlock_bh(&icmp6_dst_lock);
1237 fib6_force_start_gc(net);
1239 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245 int icmp6_dst_gc(void)
1247 struct dst_entry *dst, **pprev;
1250 spin_lock_bh(&icmp6_dst_lock);
1251 pprev = &icmp6_dst_gc_list;
1253 while ((dst = *pprev) != NULL) {
1254 if (!atomic_read(&dst->__refcnt)) {
1263 spin_unlock_bh(&icmp6_dst_lock);
1268 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1271 struct dst_entry *dst, **pprev;
1273 spin_lock_bh(&icmp6_dst_lock);
1274 pprev = &icmp6_dst_gc_list;
1275 while ((dst = *pprev) != NULL) {
1276 struct rt6_info *rt = (struct rt6_info *) dst;
1277 if (func(rt, arg)) {
1284 spin_unlock_bh(&icmp6_dst_lock);
1287 static int ip6_dst_gc(struct dst_ops *ops)
1289 unsigned long now = jiffies;
1290 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1291 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1292 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1293 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1294 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1295 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1298 entries = dst_entries_get_fast(ops);
1299 if (time_after(rt_last_gc + rt_min_interval, now) &&
1300 entries <= rt_max_size)
1303 net->ipv6.ip6_rt_gc_expire++;
1304 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1305 net->ipv6.ip6_rt_last_gc = now;
1306 entries = dst_entries_get_slow(ops);
1307 if (entries < ops->gc_thresh)
1308 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1310 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1311 return entries > rt_max_size;
1314 /* Clean host part of a prefix. Not necessary in radix tree,
1315 but results in cleaner routing tables.
1317 Remove it only when all the things will work!
1320 int ip6_dst_hoplimit(struct dst_entry *dst)
1322 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1323 if (hoplimit == 0) {
1324 struct net_device *dev = dst->dev;
1325 struct inet6_dev *idev;
1328 idev = __in6_dev_get(dev);
1330 hoplimit = idev->cnf.hop_limit;
1332 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1337 EXPORT_SYMBOL(ip6_dst_hoplimit);
1343 int ip6_route_add(struct fib6_config *cfg)
1346 struct net *net = cfg->fc_nlinfo.nl_net;
1347 struct rt6_info *rt = NULL;
1348 struct net_device *dev = NULL;
1349 struct inet6_dev *idev = NULL;
1350 struct fib6_table *table;
1353 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1355 #ifndef CONFIG_IPV6_SUBTREES
1356 if (cfg->fc_src_len)
1359 if (cfg->fc_ifindex) {
1361 dev = dev_get_by_index(net, cfg->fc_ifindex);
1364 idev = in6_dev_get(dev);
1369 if (cfg->fc_metric == 0)
1370 cfg->fc_metric = IP6_RT_PRIO_USER;
1373 if (cfg->fc_nlinfo.nlh &&
1374 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1375 table = fib6_get_table(net, cfg->fc_table);
1377 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1378 table = fib6_new_table(net, cfg->fc_table);
1381 table = fib6_new_table(net, cfg->fc_table);
1387 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1394 rt->dst.obsolete = -1;
1396 if (cfg->fc_flags & RTF_EXPIRES)
1397 rt6_set_expires(rt, jiffies +
1398 clock_t_to_jiffies(cfg->fc_expires));
1400 rt6_clean_expires(rt);
1402 if (cfg->fc_protocol == RTPROT_UNSPEC)
1403 cfg->fc_protocol = RTPROT_BOOT;
1404 rt->rt6i_protocol = cfg->fc_protocol;
1406 addr_type = ipv6_addr_type(&cfg->fc_dst);
1408 if (addr_type & IPV6_ADDR_MULTICAST)
1409 rt->dst.input = ip6_mc_input;
1410 else if (cfg->fc_flags & RTF_LOCAL)
1411 rt->dst.input = ip6_input;
1413 rt->dst.input = ip6_forward;
1415 rt->dst.output = ip6_output;
1417 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1418 rt->rt6i_dst.plen = cfg->fc_dst_len;
1419 if (rt->rt6i_dst.plen == 128)
1420 rt->dst.flags |= DST_HOST;
1422 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1423 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1428 dst_init_metrics(&rt->dst, metrics, 0);
1430 #ifdef CONFIG_IPV6_SUBTREES
1431 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1432 rt->rt6i_src.plen = cfg->fc_src_len;
1435 rt->rt6i_metric = cfg->fc_metric;
1437 /* We cannot add true routes via loopback here,
1438 they would result in kernel looping; promote them to reject routes
1440 if ((cfg->fc_flags & RTF_REJECT) ||
1441 (dev && (dev->flags & IFF_LOOPBACK) &&
1442 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1443 !(cfg->fc_flags & RTF_LOCAL))) {
1444 /* hold loopback dev/idev if we haven't done so. */
1445 if (dev != net->loopback_dev) {
1450 dev = net->loopback_dev;
1452 idev = in6_dev_get(dev);
1458 rt->dst.output = ip6_pkt_discard_out;
1459 rt->dst.input = ip6_pkt_discard;
1460 rt->dst.error = -ENETUNREACH;
1461 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1465 if (cfg->fc_flags & RTF_GATEWAY) {
1466 const struct in6_addr *gw_addr;
1469 gw_addr = &cfg->fc_gateway;
1470 rt->rt6i_gateway = *gw_addr;
1471 gwa_type = ipv6_addr_type(gw_addr);
1473 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1474 struct rt6_info *grt;
1476 /* IPv6 strictly inhibits using not link-local
1477 addresses as nexthop address.
1478 Otherwise, router will not able to send redirects.
1479 It is very good, but in some (rare!) circumstances
1480 (SIT, PtP, NBMA NOARP links) it is handy to allow
1481 some exceptions. --ANK
1484 if (!(gwa_type & IPV6_ADDR_UNICAST))
1487 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1489 err = -EHOSTUNREACH;
1493 if (dev != grt->dst.dev) {
1494 dst_release(&grt->dst);
1499 idev = grt->rt6i_idev;
1501 in6_dev_hold(grt->rt6i_idev);
1503 if (!(grt->rt6i_flags & RTF_GATEWAY))
1505 dst_release(&grt->dst);
1511 if (!dev || (dev->flags & IFF_LOOPBACK))
1519 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1520 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1524 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1525 rt->rt6i_prefsrc.plen = 128;
1527 rt->rt6i_prefsrc.plen = 0;
1529 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1530 err = rt6_bind_neighbour(rt, dev);
1535 rt->rt6i_flags = cfg->fc_flags;
1542 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1543 int type = nla_type(nla);
1546 if (type > RTAX_MAX) {
1551 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1557 rt->rt6i_idev = idev;
1558 rt->rt6i_table = table;
1560 cfg->fc_nlinfo.nl_net = dev_net(dev);
1562 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1574 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1577 struct fib6_table *table;
1578 struct net *net = dev_net(rt->dst.dev);
1580 if (rt == net->ipv6.ip6_null_entry)
1583 table = rt->rt6i_table;
1584 write_lock_bh(&table->tb6_lock);
1586 err = fib6_del(rt, info);
1587 dst_release(&rt->dst);
1589 write_unlock_bh(&table->tb6_lock);
1594 int ip6_del_rt(struct rt6_info *rt)
1596 struct nl_info info = {
1597 .nl_net = dev_net(rt->dst.dev),
1599 return __ip6_del_rt(rt, &info);
1602 static int ip6_route_del(struct fib6_config *cfg)
1604 struct fib6_table *table;
1605 struct fib6_node *fn;
1606 struct rt6_info *rt;
1609 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1613 read_lock_bh(&table->tb6_lock);
1615 fn = fib6_locate(&table->tb6_root,
1616 &cfg->fc_dst, cfg->fc_dst_len,
1617 &cfg->fc_src, cfg->fc_src_len);
1620 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1621 if (cfg->fc_ifindex &&
1623 rt->dst.dev->ifindex != cfg->fc_ifindex))
1625 if (cfg->fc_flags & RTF_GATEWAY &&
1626 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1628 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1631 read_unlock_bh(&table->tb6_lock);
1633 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1636 read_unlock_bh(&table->tb6_lock);
1641 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
1643 struct net *net = dev_net(skb->dev);
1644 struct netevent_redirect netevent;
1645 struct rt6_info *rt, *nrt = NULL;
1646 const struct in6_addr *target;
1647 struct ndisc_options ndopts;
1648 const struct in6_addr *dest;
1649 struct neighbour *old_neigh;
1650 struct inet6_dev *in6_dev;
1651 struct neighbour *neigh;
1652 struct icmp6hdr *icmph;
1653 int optlen, on_link;
1656 optlen = skb->tail - skb->transport_header;
1657 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1660 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1664 icmph = icmp6_hdr(skb);
1665 target = (const struct in6_addr *) (icmph + 1);
1668 if (ipv6_addr_is_multicast(dest)) {
1669 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1674 if (ipv6_addr_equal(dest, target)) {
1676 } else if (ipv6_addr_type(target) !=
1677 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1678 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1682 in6_dev = __in6_dev_get(skb->dev);
1685 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1689 * The IP source address of the Redirect MUST be the same as the current
1690 * first-hop router for the specified ICMP Destination Address.
1693 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1694 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1699 if (ndopts.nd_opts_tgt_lladdr) {
1700 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1703 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1708 rt = (struct rt6_info *) dst;
1709 if (rt == net->ipv6.ip6_null_entry) {
1710 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1714 /* Redirect received -> path was valid.
1715 * Look, redirects are sent only in response to data packets,
1716 * so that this nexthop apparently is reachable. --ANK
1718 dst_confirm(&rt->dst);
1720 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1724 /* Duplicate redirect: silently ignore. */
1726 if (neigh == old_neigh)
1730 * We have finally decided to accept it.
1733 neigh_update(neigh, lladdr, NUD_STALE,
1734 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1735 NEIGH_UPDATE_F_OVERRIDE|
1736 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1737 NEIGH_UPDATE_F_ISROUTER))
1740 nrt = ip6_rt_copy(rt, dest);
1744 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1746 nrt->rt6i_flags &= ~RTF_GATEWAY;
1748 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1749 nrt->n = neigh_clone(neigh);
1751 if (ip6_ins_rt(nrt))
1754 netevent.old = &rt->dst;
1755 netevent.old_neigh = old_neigh;
1756 netevent.new = &nrt->dst;
1757 netevent.new_neigh = neigh;
1758 netevent.daddr = dest;
1759 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1761 if (rt->rt6i_flags & RTF_CACHE) {
1762 rt = (struct rt6_info *) dst_clone(&rt->dst);
1767 neigh_release(neigh);
1771 * Misc support functions
1774 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1775 const struct in6_addr *dest)
1777 struct net *net = dev_net(ort->dst.dev);
1778 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1782 rt->dst.input = ort->dst.input;
1783 rt->dst.output = ort->dst.output;
1784 rt->dst.flags |= DST_HOST;
1786 rt->rt6i_dst.addr = *dest;
1787 rt->rt6i_dst.plen = 128;
1788 dst_copy_metrics(&rt->dst, &ort->dst);
1789 rt->dst.error = ort->dst.error;
1790 rt->rt6i_idev = ort->rt6i_idev;
1792 in6_dev_hold(rt->rt6i_idev);
1793 rt->dst.lastuse = jiffies;
1795 rt->rt6i_gateway = ort->rt6i_gateway;
1796 rt->rt6i_flags = ort->rt6i_flags;
1797 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1798 (RTF_DEFAULT | RTF_ADDRCONF))
1799 rt6_set_from(rt, ort);
1801 rt6_clean_expires(rt);
1802 rt->rt6i_metric = 0;
1804 #ifdef CONFIG_IPV6_SUBTREES
1805 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1807 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1808 rt->rt6i_table = ort->rt6i_table;
1813 #ifdef CONFIG_IPV6_ROUTE_INFO
1814 static struct rt6_info *rt6_get_route_info(struct net *net,
1815 const struct in6_addr *prefix, int prefixlen,
1816 const struct in6_addr *gwaddr, int ifindex)
1818 struct fib6_node *fn;
1819 struct rt6_info *rt = NULL;
1820 struct fib6_table *table;
1822 table = fib6_get_table(net, RT6_TABLE_INFO);
1826 write_lock_bh(&table->tb6_lock);
1827 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1831 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1832 if (rt->dst.dev->ifindex != ifindex)
1834 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1836 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1842 write_unlock_bh(&table->tb6_lock);
1846 static struct rt6_info *rt6_add_route_info(struct net *net,
1847 const struct in6_addr *prefix, int prefixlen,
1848 const struct in6_addr *gwaddr, int ifindex,
1851 struct fib6_config cfg = {
1852 .fc_table = RT6_TABLE_INFO,
1853 .fc_metric = IP6_RT_PRIO_USER,
1854 .fc_ifindex = ifindex,
1855 .fc_dst_len = prefixlen,
1856 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1857 RTF_UP | RTF_PREF(pref),
1859 .fc_nlinfo.nlh = NULL,
1860 .fc_nlinfo.nl_net = net,
1863 cfg.fc_dst = *prefix;
1864 cfg.fc_gateway = *gwaddr;
1866 /* We should treat it as a default route if prefix length is 0. */
1868 cfg.fc_flags |= RTF_DEFAULT;
1870 ip6_route_add(&cfg);
1872 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1876 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1878 struct rt6_info *rt;
1879 struct fib6_table *table;
1881 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1885 write_lock_bh(&table->tb6_lock);
1886 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1887 if (dev == rt->dst.dev &&
1888 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1889 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1894 write_unlock_bh(&table->tb6_lock);
1898 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1899 struct net_device *dev,
1902 struct fib6_config cfg = {
1903 .fc_table = RT6_TABLE_DFLT,
1904 .fc_metric = IP6_RT_PRIO_USER,
1905 .fc_ifindex = dev->ifindex,
1906 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1907 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1909 .fc_nlinfo.nlh = NULL,
1910 .fc_nlinfo.nl_net = dev_net(dev),
1913 cfg.fc_gateway = *gwaddr;
1915 ip6_route_add(&cfg);
1917 return rt6_get_dflt_router(gwaddr, dev);
1920 void rt6_purge_dflt_routers(struct net *net)
1922 struct rt6_info *rt;
1923 struct fib6_table *table;
1925 /* NOTE: Keep consistent with rt6_get_dflt_router */
1926 table = fib6_get_table(net, RT6_TABLE_DFLT);
1931 read_lock_bh(&table->tb6_lock);
1932 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1933 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1935 read_unlock_bh(&table->tb6_lock);
1940 read_unlock_bh(&table->tb6_lock);
1943 static void rtmsg_to_fib6_config(struct net *net,
1944 struct in6_rtmsg *rtmsg,
1945 struct fib6_config *cfg)
1947 memset(cfg, 0, sizeof(*cfg));
1949 cfg->fc_table = RT6_TABLE_MAIN;
1950 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1951 cfg->fc_metric = rtmsg->rtmsg_metric;
1952 cfg->fc_expires = rtmsg->rtmsg_info;
1953 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1954 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1955 cfg->fc_flags = rtmsg->rtmsg_flags;
1957 cfg->fc_nlinfo.nl_net = net;
1959 cfg->fc_dst = rtmsg->rtmsg_dst;
1960 cfg->fc_src = rtmsg->rtmsg_src;
1961 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1964 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1966 struct fib6_config cfg;
1967 struct in6_rtmsg rtmsg;
1971 case SIOCADDRT: /* Add a route */
1972 case SIOCDELRT: /* Delete a route */
1973 if (!capable(CAP_NET_ADMIN))
1975 err = copy_from_user(&rtmsg, arg,
1976 sizeof(struct in6_rtmsg));
1980 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1985 err = ip6_route_add(&cfg);
1988 err = ip6_route_del(&cfg);
2002 * Drop the packet on the floor
2005 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2008 struct dst_entry *dst = skb_dst(skb);
2009 switch (ipstats_mib_noroutes) {
2010 case IPSTATS_MIB_INNOROUTES:
2011 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2012 if (type == IPV6_ADDR_ANY) {
2013 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2014 IPSTATS_MIB_INADDRERRORS);
2018 case IPSTATS_MIB_OUTNOROUTES:
2019 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2020 ipstats_mib_noroutes);
2023 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2028 static int ip6_pkt_discard(struct sk_buff *skb)
2030 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2033 static int ip6_pkt_discard_out(struct sk_buff *skb)
2035 skb->dev = skb_dst(skb)->dev;
2036 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2039 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2041 static int ip6_pkt_prohibit(struct sk_buff *skb)
2043 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2046 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2048 skb->dev = skb_dst(skb)->dev;
2049 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2055 * Allocate a dst for local (unicast / anycast) address.
2058 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2059 const struct in6_addr *addr,
2062 struct net *net = dev_net(idev->dev);
2063 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2067 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2068 return ERR_PTR(-ENOMEM);
2073 rt->dst.flags |= DST_HOST;
2074 rt->dst.input = ip6_input;
2075 rt->dst.output = ip6_output;
2076 rt->rt6i_idev = idev;
2077 rt->dst.obsolete = -1;
2079 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2081 rt->rt6i_flags |= RTF_ANYCAST;
2083 rt->rt6i_flags |= RTF_LOCAL;
2084 err = rt6_bind_neighbour(rt, rt->dst.dev);
2087 return ERR_PTR(err);
2090 rt->rt6i_dst.addr = *addr;
2091 rt->rt6i_dst.plen = 128;
2092 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2094 atomic_set(&rt->dst.__refcnt, 1);
2099 int ip6_route_get_saddr(struct net *net,
2100 struct rt6_info *rt,
2101 const struct in6_addr *daddr,
2103 struct in6_addr *saddr)
2105 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2107 if (rt->rt6i_prefsrc.plen)
2108 *saddr = rt->rt6i_prefsrc.addr;
2110 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2111 daddr, prefs, saddr);
2115 /* remove deleted ip from prefsrc entries */
2116 struct arg_dev_net_ip {
2117 struct net_device *dev;
2119 struct in6_addr *addr;
2122 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2124 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2125 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2126 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2128 if (((void *)rt->dst.dev == dev || !dev) &&
2129 rt != net->ipv6.ip6_null_entry &&
2130 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2131 /* remove prefsrc entry */
2132 rt->rt6i_prefsrc.plen = 0;
2137 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2139 struct net *net = dev_net(ifp->idev->dev);
2140 struct arg_dev_net_ip adni = {
2141 .dev = ifp->idev->dev,
2145 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2148 struct arg_dev_net {
2149 struct net_device *dev;
2153 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2155 const struct arg_dev_net *adn = arg;
2156 const struct net_device *dev = adn->dev;
2158 if ((rt->dst.dev == dev || !dev) &&
2159 rt != adn->net->ipv6.ip6_null_entry)
2165 void rt6_ifdown(struct net *net, struct net_device *dev)
2167 struct arg_dev_net adn = {
2172 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2173 icmp6_clean_all(fib6_ifdown, &adn);
2176 struct rt6_mtu_change_arg {
2177 struct net_device *dev;
2181 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2183 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2184 struct inet6_dev *idev;
2186 /* In IPv6 pmtu discovery is not optional,
2187 so that RTAX_MTU lock cannot disable it.
2188 We still use this lock to block changes
2189 caused by addrconf/ndisc.
2192 idev = __in6_dev_get(arg->dev);
2196 /* For administrative MTU increase, there is no way to discover
2197 IPv6 PMTU increase, so PMTU increase should be updated here.
2198 Since RFC 1981 doesn't include administrative MTU increase
2199 update PMTU increase is a MUST. (i.e. jumbo frame)
2202 If new MTU is less than route PMTU, this new MTU will be the
2203 lowest MTU in the path, update the route PMTU to reflect PMTU
2204 decreases; if new MTU is greater than route PMTU, and the
2205 old MTU is the lowest MTU in the path, update the route PMTU
2206 to reflect the increase. In this case if the other nodes' MTU
2207 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2210 if (rt->dst.dev == arg->dev &&
2211 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2212 (dst_mtu(&rt->dst) >= arg->mtu ||
2213 (dst_mtu(&rt->dst) < arg->mtu &&
2214 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2215 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2220 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2222 struct rt6_mtu_change_arg arg = {
2227 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2230 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2231 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2232 [RTA_OIF] = { .type = NLA_U32 },
2233 [RTA_IIF] = { .type = NLA_U32 },
2234 [RTA_PRIORITY] = { .type = NLA_U32 },
2235 [RTA_METRICS] = { .type = NLA_NESTED },
2238 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2239 struct fib6_config *cfg)
2242 struct nlattr *tb[RTA_MAX+1];
2245 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2250 rtm = nlmsg_data(nlh);
2251 memset(cfg, 0, sizeof(*cfg));
2253 cfg->fc_table = rtm->rtm_table;
2254 cfg->fc_dst_len = rtm->rtm_dst_len;
2255 cfg->fc_src_len = rtm->rtm_src_len;
2256 cfg->fc_flags = RTF_UP;
2257 cfg->fc_protocol = rtm->rtm_protocol;
2259 if (rtm->rtm_type == RTN_UNREACHABLE)
2260 cfg->fc_flags |= RTF_REJECT;
2262 if (rtm->rtm_type == RTN_LOCAL)
2263 cfg->fc_flags |= RTF_LOCAL;
2265 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2266 cfg->fc_nlinfo.nlh = nlh;
2267 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2269 if (tb[RTA_GATEWAY]) {
2270 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2271 cfg->fc_flags |= RTF_GATEWAY;
2275 int plen = (rtm->rtm_dst_len + 7) >> 3;
2277 if (nla_len(tb[RTA_DST]) < plen)
2280 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2284 int plen = (rtm->rtm_src_len + 7) >> 3;
2286 if (nla_len(tb[RTA_SRC]) < plen)
2289 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2292 if (tb[RTA_PREFSRC])
2293 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2296 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2298 if (tb[RTA_PRIORITY])
2299 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2301 if (tb[RTA_METRICS]) {
2302 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2303 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2307 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2314 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2316 struct fib6_config cfg;
2319 err = rtm_to_fib6_config(skb, nlh, &cfg);
2323 return ip6_route_del(&cfg);
2326 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2328 struct fib6_config cfg;
2331 err = rtm_to_fib6_config(skb, nlh, &cfg);
2335 return ip6_route_add(&cfg);
2338 static inline size_t rt6_nlmsg_size(void)
2340 return NLMSG_ALIGN(sizeof(struct rtmsg))
2341 + nla_total_size(16) /* RTA_SRC */
2342 + nla_total_size(16) /* RTA_DST */
2343 + nla_total_size(16) /* RTA_GATEWAY */
2344 + nla_total_size(16) /* RTA_PREFSRC */
2345 + nla_total_size(4) /* RTA_TABLE */
2346 + nla_total_size(4) /* RTA_IIF */
2347 + nla_total_size(4) /* RTA_OIF */
2348 + nla_total_size(4) /* RTA_PRIORITY */
2349 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2350 + nla_total_size(sizeof(struct rta_cacheinfo));
2353 static int rt6_fill_node(struct net *net,
2354 struct sk_buff *skb, struct rt6_info *rt,
2355 struct in6_addr *dst, struct in6_addr *src,
2356 int iif, int type, u32 pid, u32 seq,
2357 int prefix, int nowait, unsigned int flags)
2360 struct nlmsghdr *nlh;
2363 struct neighbour *n;
2365 if (prefix) { /* user wants prefix routes only */
2366 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2367 /* success since this is not a prefix route */
2372 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2376 rtm = nlmsg_data(nlh);
2377 rtm->rtm_family = AF_INET6;
2378 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2379 rtm->rtm_src_len = rt->rt6i_src.plen;
2382 table = rt->rt6i_table->tb6_id;
2384 table = RT6_TABLE_UNSPEC;
2385 rtm->rtm_table = table;
2386 if (nla_put_u32(skb, RTA_TABLE, table))
2387 goto nla_put_failure;
2388 if (rt->rt6i_flags & RTF_REJECT)
2389 rtm->rtm_type = RTN_UNREACHABLE;
2390 else if (rt->rt6i_flags & RTF_LOCAL)
2391 rtm->rtm_type = RTN_LOCAL;
2392 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2393 rtm->rtm_type = RTN_LOCAL;
2395 rtm->rtm_type = RTN_UNICAST;
2397 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2398 rtm->rtm_protocol = rt->rt6i_protocol;
2399 if (rt->rt6i_flags & RTF_DYNAMIC)
2400 rtm->rtm_protocol = RTPROT_REDIRECT;
2401 else if (rt->rt6i_flags & RTF_ADDRCONF)
2402 rtm->rtm_protocol = RTPROT_KERNEL;
2403 else if (rt->rt6i_flags & RTF_DEFAULT)
2404 rtm->rtm_protocol = RTPROT_RA;
2406 if (rt->rt6i_flags & RTF_CACHE)
2407 rtm->rtm_flags |= RTM_F_CLONED;
2410 if (nla_put(skb, RTA_DST, 16, dst))
2411 goto nla_put_failure;
2412 rtm->rtm_dst_len = 128;
2413 } else if (rtm->rtm_dst_len)
2414 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2415 goto nla_put_failure;
2416 #ifdef CONFIG_IPV6_SUBTREES
2418 if (nla_put(skb, RTA_SRC, 16, src))
2419 goto nla_put_failure;
2420 rtm->rtm_src_len = 128;
2421 } else if (rtm->rtm_src_len &&
2422 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2423 goto nla_put_failure;
2426 #ifdef CONFIG_IPV6_MROUTE
2427 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2428 int err = ip6mr_get_route(net, skb, rtm, nowait);
2433 goto nla_put_failure;
2435 if (err == -EMSGSIZE)
2436 goto nla_put_failure;
2441 if (nla_put_u32(skb, RTA_IIF, iif))
2442 goto nla_put_failure;
2444 struct in6_addr saddr_buf;
2445 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2446 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2447 goto nla_put_failure;
2450 if (rt->rt6i_prefsrc.plen) {
2451 struct in6_addr saddr_buf;
2452 saddr_buf = rt->rt6i_prefsrc.addr;
2453 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2454 goto nla_put_failure;
2457 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2458 goto nla_put_failure;
2463 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2465 goto nla_put_failure;
2471 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2472 goto nla_put_failure;
2473 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2474 goto nla_put_failure;
2475 if (!(rt->rt6i_flags & RTF_EXPIRES))
2477 else if (rt->dst.expires - jiffies < INT_MAX)
2478 expires = rt->dst.expires - jiffies;
2482 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2483 goto nla_put_failure;
2485 return nlmsg_end(skb, nlh);
2488 nlmsg_cancel(skb, nlh);
2492 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2494 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2497 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2498 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2499 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2503 return rt6_fill_node(arg->net,
2504 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2505 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2506 prefix, 0, NLM_F_MULTI);
2509 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2511 struct net *net = sock_net(in_skb->sk);
2512 struct nlattr *tb[RTA_MAX+1];
2513 struct rt6_info *rt;
2514 struct sk_buff *skb;
2517 int err, iif = 0, oif = 0;
2519 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2524 memset(&fl6, 0, sizeof(fl6));
2527 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2530 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2534 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2537 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2541 iif = nla_get_u32(tb[RTA_IIF]);
2544 oif = nla_get_u32(tb[RTA_OIF]);
2547 struct net_device *dev;
2550 dev = __dev_get_by_index(net, iif);
2556 fl6.flowi6_iif = iif;
2558 if (!ipv6_addr_any(&fl6.saddr))
2559 flags |= RT6_LOOKUP_F_HAS_SADDR;
2561 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2564 fl6.flowi6_oif = oif;
2566 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2569 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2571 dst_release(&rt->dst);
2576 /* Reserve room for dummy headers, this skb can pass
2577 through good chunk of routing engine.
2579 skb_reset_mac_header(skb);
2580 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2582 skb_dst_set(skb, &rt->dst);
2584 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2585 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2586 nlh->nlmsg_seq, 0, 0, 0);
2592 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2597 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2599 struct sk_buff *skb;
2600 struct net *net = info->nl_net;
2605 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2607 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2611 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2612 event, info->pid, seq, 0, 0, 0);
2614 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2615 WARN_ON(err == -EMSGSIZE);
2619 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2620 info->nlh, gfp_any());
2624 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2627 static int ip6_route_dev_notify(struct notifier_block *this,
2628 unsigned long event, void *data)
2630 struct net_device *dev = (struct net_device *)data;
2631 struct net *net = dev_net(dev);
2633 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2634 net->ipv6.ip6_null_entry->dst.dev = dev;
2635 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2636 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2637 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2638 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2639 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2640 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2651 #ifdef CONFIG_PROC_FS
2662 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2664 struct seq_file *m = p_arg;
2665 struct neighbour *n;
2667 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2669 #ifdef CONFIG_IPV6_SUBTREES
2670 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2672 seq_puts(m, "00000000000000000000000000000000 00 ");
2677 seq_printf(m, "%pi6", n->primary_key);
2679 seq_puts(m, "00000000000000000000000000000000");
2682 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2683 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2684 rt->dst.__use, rt->rt6i_flags,
2685 rt->dst.dev ? rt->dst.dev->name : "");
2689 static int ipv6_route_show(struct seq_file *m, void *v)
2691 struct net *net = (struct net *)m->private;
2692 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2696 static int ipv6_route_open(struct inode *inode, struct file *file)
2698 return single_open_net(inode, file, ipv6_route_show);
2701 static const struct file_operations ipv6_route_proc_fops = {
2702 .owner = THIS_MODULE,
2703 .open = ipv6_route_open,
2705 .llseek = seq_lseek,
2706 .release = single_release_net,
2709 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2711 struct net *net = (struct net *)seq->private;
2712 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2713 net->ipv6.rt6_stats->fib_nodes,
2714 net->ipv6.rt6_stats->fib_route_nodes,
2715 net->ipv6.rt6_stats->fib_rt_alloc,
2716 net->ipv6.rt6_stats->fib_rt_entries,
2717 net->ipv6.rt6_stats->fib_rt_cache,
2718 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2719 net->ipv6.rt6_stats->fib_discarded_routes);
2724 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2726 return single_open_net(inode, file, rt6_stats_seq_show);
2729 static const struct file_operations rt6_stats_seq_fops = {
2730 .owner = THIS_MODULE,
2731 .open = rt6_stats_seq_open,
2733 .llseek = seq_lseek,
2734 .release = single_release_net,
2736 #endif /* CONFIG_PROC_FS */
2738 #ifdef CONFIG_SYSCTL
2741 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2742 void __user *buffer, size_t *lenp, loff_t *ppos)
2749 net = (struct net *)ctl->extra1;
2750 delay = net->ipv6.sysctl.flush_delay;
2751 proc_dointvec(ctl, write, buffer, lenp, ppos);
2752 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2756 ctl_table ipv6_route_table_template[] = {
2758 .procname = "flush",
2759 .data = &init_net.ipv6.sysctl.flush_delay,
2760 .maxlen = sizeof(int),
2762 .proc_handler = ipv6_sysctl_rtcache_flush
2765 .procname = "gc_thresh",
2766 .data = &ip6_dst_ops_template.gc_thresh,
2767 .maxlen = sizeof(int),
2769 .proc_handler = proc_dointvec,
2772 .procname = "max_size",
2773 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2774 .maxlen = sizeof(int),
2776 .proc_handler = proc_dointvec,
2779 .procname = "gc_min_interval",
2780 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2781 .maxlen = sizeof(int),
2783 .proc_handler = proc_dointvec_jiffies,
2786 .procname = "gc_timeout",
2787 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2788 .maxlen = sizeof(int),
2790 .proc_handler = proc_dointvec_jiffies,
2793 .procname = "gc_interval",
2794 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2795 .maxlen = sizeof(int),
2797 .proc_handler = proc_dointvec_jiffies,
2800 .procname = "gc_elasticity",
2801 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2802 .maxlen = sizeof(int),
2804 .proc_handler = proc_dointvec,
2807 .procname = "mtu_expires",
2808 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2809 .maxlen = sizeof(int),
2811 .proc_handler = proc_dointvec_jiffies,
2814 .procname = "min_adv_mss",
2815 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2816 .maxlen = sizeof(int),
2818 .proc_handler = proc_dointvec,
2821 .procname = "gc_min_interval_ms",
2822 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2823 .maxlen = sizeof(int),
2825 .proc_handler = proc_dointvec_ms_jiffies,
2830 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2832 struct ctl_table *table;
2834 table = kmemdup(ipv6_route_table_template,
2835 sizeof(ipv6_route_table_template),
2839 table[0].data = &net->ipv6.sysctl.flush_delay;
2840 table[0].extra1 = net;
2841 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2842 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2843 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2844 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2845 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2846 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2847 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2848 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2849 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2856 static int __net_init ip6_route_net_init(struct net *net)
2860 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2861 sizeof(net->ipv6.ip6_dst_ops));
2863 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2864 goto out_ip6_dst_ops;
2866 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2867 sizeof(*net->ipv6.ip6_null_entry),
2869 if (!net->ipv6.ip6_null_entry)
2870 goto out_ip6_dst_entries;
2871 net->ipv6.ip6_null_entry->dst.path =
2872 (struct dst_entry *)net->ipv6.ip6_null_entry;
2873 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2874 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2875 ip6_template_metrics, true);
2877 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2878 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2879 sizeof(*net->ipv6.ip6_prohibit_entry),
2881 if (!net->ipv6.ip6_prohibit_entry)
2882 goto out_ip6_null_entry;
2883 net->ipv6.ip6_prohibit_entry->dst.path =
2884 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2885 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2886 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2887 ip6_template_metrics, true);
2889 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2890 sizeof(*net->ipv6.ip6_blk_hole_entry),
2892 if (!net->ipv6.ip6_blk_hole_entry)
2893 goto out_ip6_prohibit_entry;
2894 net->ipv6.ip6_blk_hole_entry->dst.path =
2895 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2896 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2897 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2898 ip6_template_metrics, true);
2901 net->ipv6.sysctl.flush_delay = 0;
2902 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2903 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2904 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2905 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2906 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2907 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2908 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2910 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2916 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2917 out_ip6_prohibit_entry:
2918 kfree(net->ipv6.ip6_prohibit_entry);
2920 kfree(net->ipv6.ip6_null_entry);
2922 out_ip6_dst_entries:
2923 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2928 static void __net_exit ip6_route_net_exit(struct net *net)
2930 kfree(net->ipv6.ip6_null_entry);
2931 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932 kfree(net->ipv6.ip6_prohibit_entry);
2933 kfree(net->ipv6.ip6_blk_hole_entry);
2935 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2938 static int __net_init ip6_route_net_init_late(struct net *net)
2940 #ifdef CONFIG_PROC_FS
2941 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2942 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2947 static void __net_exit ip6_route_net_exit_late(struct net *net)
2949 #ifdef CONFIG_PROC_FS
2950 proc_net_remove(net, "ipv6_route");
2951 proc_net_remove(net, "rt6_stats");
2955 static struct pernet_operations ip6_route_net_ops = {
2956 .init = ip6_route_net_init,
2957 .exit = ip6_route_net_exit,
2960 static int __net_init ipv6_inetpeer_init(struct net *net)
2962 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2966 inet_peer_base_init(bp);
2967 net->ipv6.peers = bp;
2971 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2973 struct inet_peer_base *bp = net->ipv6.peers;
2975 net->ipv6.peers = NULL;
2976 inetpeer_invalidate_tree(bp);
2980 static struct pernet_operations ipv6_inetpeer_ops = {
2981 .init = ipv6_inetpeer_init,
2982 .exit = ipv6_inetpeer_exit,
2985 static struct pernet_operations ip6_route_net_late_ops = {
2986 .init = ip6_route_net_init_late,
2987 .exit = ip6_route_net_exit_late,
2990 static struct notifier_block ip6_route_dev_notifier = {
2991 .notifier_call = ip6_route_dev_notify,
2995 int __init ip6_route_init(void)
3000 ip6_dst_ops_template.kmem_cachep =
3001 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3002 SLAB_HWCACHE_ALIGN, NULL);
3003 if (!ip6_dst_ops_template.kmem_cachep)
3006 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3008 goto out_kmem_cache;
3010 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3012 goto out_dst_entries;
3014 ret = register_pernet_subsys(&ip6_route_net_ops);
3016 goto out_register_inetpeer;
3018 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3020 /* Registering of the loopback is done before this portion of code,
3021 * the loopback reference in rt6_info will not be taken, do it
3022 * manually for init_net */
3023 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3024 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3025 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3026 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3027 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3028 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3029 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3033 goto out_register_subsys;
3039 ret = fib6_rules_init();
3043 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3045 goto fib6_rules_init;
3048 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3049 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3050 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3051 goto out_register_late_subsys;
3053 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3055 goto out_register_late_subsys;
3060 out_register_late_subsys:
3061 unregister_pernet_subsys(&ip6_route_net_late_ops);
3063 fib6_rules_cleanup();
3068 out_register_subsys:
3069 unregister_pernet_subsys(&ip6_route_net_ops);
3070 out_register_inetpeer:
3071 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3073 dst_entries_destroy(&ip6_dst_blackhole_ops);
3075 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3079 void ip6_route_cleanup(void)
3081 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3082 unregister_pernet_subsys(&ip6_route_net_late_ops);
3083 fib6_rules_cleanup();
3086 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3087 unregister_pernet_subsys(&ip6_route_net_ops);
3088 dst_entries_destroy(&ip6_dst_blackhole_ops);
3089 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);