net: proc: change proc_net_remove to remove_proc_entry
[linux-block.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static struct dst_ops ip6_dst_ops_template = {
155         .family                 =       AF_INET6,
156         .protocol               =       cpu_to_be16(ETH_P_IPV6),
157         .gc                     =       ip6_dst_gc,
158         .gc_thresh              =       1024,
159         .check                  =       ip6_dst_check,
160         .default_advmss         =       ip6_default_advmss,
161         .mtu                    =       ip6_mtu,
162         .cow_metrics            =       ipv6_cow_metrics,
163         .destroy                =       ip6_dst_destroy,
164         .ifdown                 =       ip6_dst_ifdown,
165         .negative_advice        =       ip6_negative_advice,
166         .link_failure           =       ip6_link_failure,
167         .update_pmtu            =       ip6_rt_update_pmtu,
168         .redirect               =       rt6_do_redirect,
169         .local_out              =       __ip6_local_out,
170         .neigh_lookup           =       ip6_neigh_lookup,
171 };
172
173 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
174 {
175         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
176
177         return mtu ? : dst->dev->mtu;
178 }
179
180 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
181                                          struct sk_buff *skb, u32 mtu)
182 {
183 }
184
185 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
186                                       struct sk_buff *skb)
187 {
188 }
189
190 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
191                                          unsigned long old)
192 {
193         return NULL;
194 }
195
196 static struct dst_ops ip6_dst_blackhole_ops = {
197         .family                 =       AF_INET6,
198         .protocol               =       cpu_to_be16(ETH_P_IPV6),
199         .destroy                =       ip6_dst_destroy,
200         .check                  =       ip6_dst_check,
201         .mtu                    =       ip6_blackhole_mtu,
202         .default_advmss         =       ip6_default_advmss,
203         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
204         .redirect               =       ip6_rt_blackhole_redirect,
205         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
206         .neigh_lookup           =       ip6_neigh_lookup,
207 };
208
209 static const u32 ip6_template_metrics[RTAX_MAX] = {
210         [RTAX_HOPLIMIT - 1] = 0,
211 };
212
213 static const struct rt6_info ip6_null_entry_template = {
214         .dst = {
215                 .__refcnt       = ATOMIC_INIT(1),
216                 .__use          = 1,
217                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
218                 .error          = -ENETUNREACH,
219                 .input          = ip6_pkt_discard,
220                 .output         = ip6_pkt_discard_out,
221         },
222         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
223         .rt6i_protocol  = RTPROT_KERNEL,
224         .rt6i_metric    = ~(u32) 0,
225         .rt6i_ref       = ATOMIC_INIT(1),
226 };
227
228 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
229
230 static int ip6_pkt_prohibit(struct sk_buff *skb);
231 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
232
233 static const struct rt6_info ip6_prohibit_entry_template = {
234         .dst = {
235                 .__refcnt       = ATOMIC_INIT(1),
236                 .__use          = 1,
237                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
238                 .error          = -EACCES,
239                 .input          = ip6_pkt_prohibit,
240                 .output         = ip6_pkt_prohibit_out,
241         },
242         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
243         .rt6i_protocol  = RTPROT_KERNEL,
244         .rt6i_metric    = ~(u32) 0,
245         .rt6i_ref       = ATOMIC_INIT(1),
246 };
247
248 static const struct rt6_info ip6_blk_hole_entry_template = {
249         .dst = {
250                 .__refcnt       = ATOMIC_INIT(1),
251                 .__use          = 1,
252                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
253                 .error          = -EINVAL,
254                 .input          = dst_discard,
255                 .output         = dst_discard,
256         },
257         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
258         .rt6i_protocol  = RTPROT_KERNEL,
259         .rt6i_metric    = ~(u32) 0,
260         .rt6i_ref       = ATOMIC_INIT(1),
261 };
262
263 #endif
264
265 /* allocate dst with ip6_dst_ops */
266 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
267                                              struct net_device *dev,
268                                              int flags,
269                                              struct fib6_table *table)
270 {
271         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
272                                         0, DST_OBSOLETE_FORCE_CHK, flags);
273
274         if (rt) {
275                 struct dst_entry *dst = &rt->dst;
276
277                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
278                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279                 rt->rt6i_genid = rt_genid(net);
280                 INIT_LIST_HEAD(&rt->rt6i_siblings);
281                 rt->rt6i_nsiblings = 0;
282         }
283         return rt;
284 }
285
286 static void ip6_dst_destroy(struct dst_entry *dst)
287 {
288         struct rt6_info *rt = (struct rt6_info *)dst;
289         struct inet6_dev *idev = rt->rt6i_idev;
290
291         if (!(rt->dst.flags & DST_HOST))
292                 dst_destroy_metrics_generic(dst);
293
294         if (idev) {
295                 rt->rt6i_idev = NULL;
296                 in6_dev_put(idev);
297         }
298
299         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
300                 dst_release(dst->from);
301
302         if (rt6_has_peer(rt)) {
303                 struct inet_peer *peer = rt6_peer_ptr(rt);
304                 inet_putpeer(peer);
305         }
306 }
307
308 void rt6_bind_peer(struct rt6_info *rt, int create)
309 {
310         struct inet_peer_base *base;
311         struct inet_peer *peer;
312
313         base = inetpeer_base_ptr(rt->_rt6i_peer);
314         if (!base)
315                 return;
316
317         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
318         if (peer) {
319                 if (!rt6_set_peer(rt, peer))
320                         inet_putpeer(peer);
321         }
322 }
323
324 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325                            int how)
326 {
327         struct rt6_info *rt = (struct rt6_info *)dst;
328         struct inet6_dev *idev = rt->rt6i_idev;
329         struct net_device *loopback_dev =
330                 dev_net(dev)->loopback_dev;
331
332         if (dev != loopback_dev) {
333                 if (idev && idev->dev == dev) {
334                         struct inet6_dev *loopback_idev =
335                                 in6_dev_get(loopback_dev);
336                         if (loopback_idev) {
337                                 rt->rt6i_idev = loopback_idev;
338                                 in6_dev_put(idev);
339                         }
340                 }
341         }
342 }
343
344 static bool rt6_check_expired(const struct rt6_info *rt)
345 {
346         if (rt->rt6i_flags & RTF_EXPIRES) {
347                 if (time_after(jiffies, rt->dst.expires))
348                         return true;
349         } else if (rt->dst.from) {
350                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
351         }
352         return false;
353 }
354
355 static bool rt6_need_strict(const struct in6_addr *daddr)
356 {
357         return ipv6_addr_type(daddr) &
358                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
359 }
360
361 /* Multipath route selection:
362  *   Hash based function using packet header and flowlabel.
363  * Adapted from fib_info_hashfn()
364  */
365 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
366                                const struct flowi6 *fl6)
367 {
368         unsigned int val = fl6->flowi6_proto;
369
370         val ^= ipv6_addr_hash(&fl6->daddr);
371         val ^= ipv6_addr_hash(&fl6->saddr);
372
373         /* Work only if this not encapsulated */
374         switch (fl6->flowi6_proto) {
375         case IPPROTO_UDP:
376         case IPPROTO_TCP:
377         case IPPROTO_SCTP:
378                 val ^= (__force u16)fl6->fl6_sport;
379                 val ^= (__force u16)fl6->fl6_dport;
380                 break;
381
382         case IPPROTO_ICMPV6:
383                 val ^= (__force u16)fl6->fl6_icmp_type;
384                 val ^= (__force u16)fl6->fl6_icmp_code;
385                 break;
386         }
387         /* RFC6438 recommands to use flowlabel */
388         val ^= (__force u32)fl6->flowlabel;
389
390         /* Perhaps, we need to tune, this function? */
391         val = val ^ (val >> 7) ^ (val >> 12);
392         return val % candidate_count;
393 }
394
395 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
396                                              struct flowi6 *fl6)
397 {
398         struct rt6_info *sibling, *next_sibling;
399         int route_choosen;
400
401         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
402         /* Don't change the route, if route_choosen == 0
403          * (siblings does not include ourself)
404          */
405         if (route_choosen)
406                 list_for_each_entry_safe(sibling, next_sibling,
407                                 &match->rt6i_siblings, rt6i_siblings) {
408                         route_choosen--;
409                         if (route_choosen == 0) {
410                                 match = sibling;
411                                 break;
412                         }
413                 }
414         return match;
415 }
416
417 /*
418  *      Route lookup. Any table->tb6_lock is implied.
419  */
420
421 static inline struct rt6_info *rt6_device_match(struct net *net,
422                                                     struct rt6_info *rt,
423                                                     const struct in6_addr *saddr,
424                                                     int oif,
425                                                     int flags)
426 {
427         struct rt6_info *local = NULL;
428         struct rt6_info *sprt;
429
430         if (!oif && ipv6_addr_any(saddr))
431                 goto out;
432
433         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
434                 struct net_device *dev = sprt->dst.dev;
435
436                 if (oif) {
437                         if (dev->ifindex == oif)
438                                 return sprt;
439                         if (dev->flags & IFF_LOOPBACK) {
440                                 if (!sprt->rt6i_idev ||
441                                     sprt->rt6i_idev->dev->ifindex != oif) {
442                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
443                                                 continue;
444                                         if (local && (!oif ||
445                                                       local->rt6i_idev->dev->ifindex == oif))
446                                                 continue;
447                                 }
448                                 local = sprt;
449                         }
450                 } else {
451                         if (ipv6_chk_addr(net, saddr, dev,
452                                           flags & RT6_LOOKUP_F_IFACE))
453                                 return sprt;
454                 }
455         }
456
457         if (oif) {
458                 if (local)
459                         return local;
460
461                 if (flags & RT6_LOOKUP_F_IFACE)
462                         return net->ipv6.ip6_null_entry;
463         }
464 out:
465         return rt;
466 }
467
468 #ifdef CONFIG_IPV6_ROUTER_PREF
469 static void rt6_probe(struct rt6_info *rt)
470 {
471         struct neighbour *neigh;
472         /*
473          * Okay, this does not seem to be appropriate
474          * for now, however, we need to check if it
475          * is really so; aka Router Reachability Probing.
476          *
477          * Router Reachability Probe MUST be rate-limited
478          * to no more than one per minute.
479          */
480         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
481                 return;
482         rcu_read_lock_bh();
483         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
484         if (neigh) {
485                 write_lock(&neigh->lock);
486                 if (neigh->nud_state & NUD_VALID)
487                         goto out;
488         }
489
490         if (!neigh ||
491             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
492                 struct in6_addr mcaddr;
493                 struct in6_addr *target;
494
495                 if (neigh) {
496                         neigh->updated = jiffies;
497                         write_unlock(&neigh->lock);
498                 }
499
500                 target = (struct in6_addr *)&rt->rt6i_gateway;
501                 addrconf_addr_solict_mult(target, &mcaddr);
502                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
503         } else {
504 out:
505                 write_unlock(&neigh->lock);
506         }
507         rcu_read_unlock_bh();
508 }
509 #else
510 static inline void rt6_probe(struct rt6_info *rt)
511 {
512 }
513 #endif
514
515 /*
516  * Default Router Selection (RFC 2461 6.3.6)
517  */
518 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
519 {
520         struct net_device *dev = rt->dst.dev;
521         if (!oif || dev->ifindex == oif)
522                 return 2;
523         if ((dev->flags & IFF_LOOPBACK) &&
524             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
525                 return 1;
526         return 0;
527 }
528
529 static inline bool rt6_check_neigh(struct rt6_info *rt)
530 {
531         struct neighbour *neigh;
532         bool ret = false;
533
534         if (rt->rt6i_flags & RTF_NONEXTHOP ||
535             !(rt->rt6i_flags & RTF_GATEWAY))
536                 return true;
537
538         rcu_read_lock_bh();
539         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
540         if (neigh) {
541                 read_lock(&neigh->lock);
542                 if (neigh->nud_state & NUD_VALID)
543                         ret = true;
544 #ifdef CONFIG_IPV6_ROUTER_PREF
545                 else if (!(neigh->nud_state & NUD_FAILED))
546                         ret = true;
547 #endif
548                 read_unlock(&neigh->lock);
549         }
550         rcu_read_unlock_bh();
551
552         return ret;
553 }
554
555 static int rt6_score_route(struct rt6_info *rt, int oif,
556                            int strict)
557 {
558         int m;
559
560         m = rt6_check_dev(rt, oif);
561         if (!m && (strict & RT6_LOOKUP_F_IFACE))
562                 return -1;
563 #ifdef CONFIG_IPV6_ROUTER_PREF
564         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
565 #endif
566         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
567                 return -1;
568         return m;
569 }
570
571 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
572                                    int *mpri, struct rt6_info *match)
573 {
574         int m;
575
576         if (rt6_check_expired(rt))
577                 goto out;
578
579         m = rt6_score_route(rt, oif, strict);
580         if (m < 0)
581                 goto out;
582
583         if (m > *mpri) {
584                 if (strict & RT6_LOOKUP_F_REACHABLE)
585                         rt6_probe(match);
586                 *mpri = m;
587                 match = rt;
588         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
589                 rt6_probe(rt);
590         }
591
592 out:
593         return match;
594 }
595
596 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
597                                      struct rt6_info *rr_head,
598                                      u32 metric, int oif, int strict)
599 {
600         struct rt6_info *rt, *match;
601         int mpri = -1;
602
603         match = NULL;
604         for (rt = rr_head; rt && rt->rt6i_metric == metric;
605              rt = rt->dst.rt6_next)
606                 match = find_match(rt, oif, strict, &mpri, match);
607         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
608              rt = rt->dst.rt6_next)
609                 match = find_match(rt, oif, strict, &mpri, match);
610
611         return match;
612 }
613
614 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
615 {
616         struct rt6_info *match, *rt0;
617         struct net *net;
618
619         rt0 = fn->rr_ptr;
620         if (!rt0)
621                 fn->rr_ptr = rt0 = fn->leaf;
622
623         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
624
625         if (!match &&
626             (strict & RT6_LOOKUP_F_REACHABLE)) {
627                 struct rt6_info *next = rt0->dst.rt6_next;
628
629                 /* no entries matched; do round-robin */
630                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
631                         next = fn->leaf;
632
633                 if (next != rt0)
634                         fn->rr_ptr = next;
635         }
636
637         net = dev_net(rt0->dst.dev);
638         return match ? match : net->ipv6.ip6_null_entry;
639 }
640
641 #ifdef CONFIG_IPV6_ROUTE_INFO
642 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
643                   const struct in6_addr *gwaddr)
644 {
645         struct net *net = dev_net(dev);
646         struct route_info *rinfo = (struct route_info *) opt;
647         struct in6_addr prefix_buf, *prefix;
648         unsigned int pref;
649         unsigned long lifetime;
650         struct rt6_info *rt;
651
652         if (len < sizeof(struct route_info)) {
653                 return -EINVAL;
654         }
655
656         /* Sanity check for prefix_len and length */
657         if (rinfo->length > 3) {
658                 return -EINVAL;
659         } else if (rinfo->prefix_len > 128) {
660                 return -EINVAL;
661         } else if (rinfo->prefix_len > 64) {
662                 if (rinfo->length < 2) {
663                         return -EINVAL;
664                 }
665         } else if (rinfo->prefix_len > 0) {
666                 if (rinfo->length < 1) {
667                         return -EINVAL;
668                 }
669         }
670
671         pref = rinfo->route_pref;
672         if (pref == ICMPV6_ROUTER_PREF_INVALID)
673                 return -EINVAL;
674
675         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
676
677         if (rinfo->length == 3)
678                 prefix = (struct in6_addr *)rinfo->prefix;
679         else {
680                 /* this function is safe */
681                 ipv6_addr_prefix(&prefix_buf,
682                                  (struct in6_addr *)rinfo->prefix,
683                                  rinfo->prefix_len);
684                 prefix = &prefix_buf;
685         }
686
687         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
688                                 dev->ifindex);
689
690         if (rt && !lifetime) {
691                 ip6_del_rt(rt);
692                 rt = NULL;
693         }
694
695         if (!rt && lifetime)
696                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
697                                         pref);
698         else if (rt)
699                 rt->rt6i_flags = RTF_ROUTEINFO |
700                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
701
702         if (rt) {
703                 if (!addrconf_finite_timeout(lifetime))
704                         rt6_clean_expires(rt);
705                 else
706                         rt6_set_expires(rt, jiffies + HZ * lifetime);
707
708                 ip6_rt_put(rt);
709         }
710         return 0;
711 }
712 #endif
713
714 #define BACKTRACK(__net, saddr)                 \
715 do { \
716         if (rt == __net->ipv6.ip6_null_entry) { \
717                 struct fib6_node *pn; \
718                 while (1) { \
719                         if (fn->fn_flags & RTN_TL_ROOT) \
720                                 goto out; \
721                         pn = fn->parent; \
722                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
723                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
724                         else \
725                                 fn = pn; \
726                         if (fn->fn_flags & RTN_RTINFO) \
727                                 goto restart; \
728                 } \
729         } \
730 } while (0)
731
732 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
733                                              struct fib6_table *table,
734                                              struct flowi6 *fl6, int flags)
735 {
736         struct fib6_node *fn;
737         struct rt6_info *rt;
738
739         read_lock_bh(&table->tb6_lock);
740         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
741 restart:
742         rt = fn->leaf;
743         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
744         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
745                 rt = rt6_multipath_select(rt, fl6);
746         BACKTRACK(net, &fl6->saddr);
747 out:
748         dst_use(&rt->dst, jiffies);
749         read_unlock_bh(&table->tb6_lock);
750         return rt;
751
752 }
753
754 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
755                                     int flags)
756 {
757         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
758 }
759 EXPORT_SYMBOL_GPL(ip6_route_lookup);
760
761 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
762                             const struct in6_addr *saddr, int oif, int strict)
763 {
764         struct flowi6 fl6 = {
765                 .flowi6_oif = oif,
766                 .daddr = *daddr,
767         };
768         struct dst_entry *dst;
769         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
770
771         if (saddr) {
772                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
773                 flags |= RT6_LOOKUP_F_HAS_SADDR;
774         }
775
776         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
777         if (dst->error == 0)
778                 return (struct rt6_info *) dst;
779
780         dst_release(dst);
781
782         return NULL;
783 }
784
785 EXPORT_SYMBOL(rt6_lookup);
786
787 /* ip6_ins_rt is called with FREE table->tb6_lock.
788    It takes new route entry, the addition fails by any reason the
789    route is freed. In any case, if caller does not hold it, it may
790    be destroyed.
791  */
792
793 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
794 {
795         int err;
796         struct fib6_table *table;
797
798         table = rt->rt6i_table;
799         write_lock_bh(&table->tb6_lock);
800         err = fib6_add(&table->tb6_root, rt, info);
801         write_unlock_bh(&table->tb6_lock);
802
803         return err;
804 }
805
806 int ip6_ins_rt(struct rt6_info *rt)
807 {
808         struct nl_info info = {
809                 .nl_net = dev_net(rt->dst.dev),
810         };
811         return __ip6_ins_rt(rt, &info);
812 }
813
814 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
815                                       const struct in6_addr *daddr,
816                                       const struct in6_addr *saddr)
817 {
818         struct rt6_info *rt;
819
820         /*
821          *      Clone the route.
822          */
823
824         rt = ip6_rt_copy(ort, daddr);
825
826         if (rt) {
827                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
828                         if (ort->rt6i_dst.plen != 128 &&
829                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
830                                 rt->rt6i_flags |= RTF_ANYCAST;
831                         rt->rt6i_gateway = *daddr;
832                 }
833
834                 rt->rt6i_flags |= RTF_CACHE;
835
836 #ifdef CONFIG_IPV6_SUBTREES
837                 if (rt->rt6i_src.plen && saddr) {
838                         rt->rt6i_src.addr = *saddr;
839                         rt->rt6i_src.plen = 128;
840                 }
841 #endif
842         }
843
844         return rt;
845 }
846
847 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
848                                         const struct in6_addr *daddr)
849 {
850         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
851
852         if (rt)
853                 rt->rt6i_flags |= RTF_CACHE;
854         return rt;
855 }
856
857 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
858                                       struct flowi6 *fl6, int flags)
859 {
860         struct fib6_node *fn;
861         struct rt6_info *rt, *nrt;
862         int strict = 0;
863         int attempts = 3;
864         int err;
865         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
866
867         strict |= flags & RT6_LOOKUP_F_IFACE;
868
869 relookup:
870         read_lock_bh(&table->tb6_lock);
871
872 restart_2:
873         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
874
875 restart:
876         rt = rt6_select(fn, oif, strict | reachable);
877         if (rt->rt6i_nsiblings && oif == 0)
878                 rt = rt6_multipath_select(rt, fl6);
879         BACKTRACK(net, &fl6->saddr);
880         if (rt == net->ipv6.ip6_null_entry ||
881             rt->rt6i_flags & RTF_CACHE)
882                 goto out;
883
884         dst_hold(&rt->dst);
885         read_unlock_bh(&table->tb6_lock);
886
887         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
888                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
889         else if (!(rt->dst.flags & DST_HOST))
890                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
891         else
892                 goto out2;
893
894         ip6_rt_put(rt);
895         rt = nrt ? : net->ipv6.ip6_null_entry;
896
897         dst_hold(&rt->dst);
898         if (nrt) {
899                 err = ip6_ins_rt(nrt);
900                 if (!err)
901                         goto out2;
902         }
903
904         if (--attempts <= 0)
905                 goto out2;
906
907         /*
908          * Race condition! In the gap, when table->tb6_lock was
909          * released someone could insert this route.  Relookup.
910          */
911         ip6_rt_put(rt);
912         goto relookup;
913
914 out:
915         if (reachable) {
916                 reachable = 0;
917                 goto restart_2;
918         }
919         dst_hold(&rt->dst);
920         read_unlock_bh(&table->tb6_lock);
921 out2:
922         rt->dst.lastuse = jiffies;
923         rt->dst.__use++;
924
925         return rt;
926 }
927
928 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
929                                             struct flowi6 *fl6, int flags)
930 {
931         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
932 }
933
934 static struct dst_entry *ip6_route_input_lookup(struct net *net,
935                                                 struct net_device *dev,
936                                                 struct flowi6 *fl6, int flags)
937 {
938         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
939                 flags |= RT6_LOOKUP_F_IFACE;
940
941         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
942 }
943
944 void ip6_route_input(struct sk_buff *skb)
945 {
946         const struct ipv6hdr *iph = ipv6_hdr(skb);
947         struct net *net = dev_net(skb->dev);
948         int flags = RT6_LOOKUP_F_HAS_SADDR;
949         struct flowi6 fl6 = {
950                 .flowi6_iif = skb->dev->ifindex,
951                 .daddr = iph->daddr,
952                 .saddr = iph->saddr,
953                 .flowlabel = ip6_flowinfo(iph),
954                 .flowi6_mark = skb->mark,
955                 .flowi6_proto = iph->nexthdr,
956         };
957
958         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
959 }
960
961 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
962                                              struct flowi6 *fl6, int flags)
963 {
964         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
965 }
966
967 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
968                                     struct flowi6 *fl6)
969 {
970         int flags = 0;
971
972         fl6->flowi6_iif = LOOPBACK_IFINDEX;
973
974         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
975                 flags |= RT6_LOOKUP_F_IFACE;
976
977         if (!ipv6_addr_any(&fl6->saddr))
978                 flags |= RT6_LOOKUP_F_HAS_SADDR;
979         else if (sk)
980                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
981
982         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
983 }
984
985 EXPORT_SYMBOL(ip6_route_output);
986
987 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
988 {
989         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
990         struct dst_entry *new = NULL;
991
992         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
993         if (rt) {
994                 new = &rt->dst;
995
996                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
997                 rt6_init_peer(rt, net->ipv6.peers);
998
999                 new->__use = 1;
1000                 new->input = dst_discard;
1001                 new->output = dst_discard;
1002
1003                 if (dst_metrics_read_only(&ort->dst))
1004                         new->_metrics = ort->dst._metrics;
1005                 else
1006                         dst_copy_metrics(new, &ort->dst);
1007                 rt->rt6i_idev = ort->rt6i_idev;
1008                 if (rt->rt6i_idev)
1009                         in6_dev_hold(rt->rt6i_idev);
1010
1011                 rt->rt6i_gateway = ort->rt6i_gateway;
1012                 rt->rt6i_flags = ort->rt6i_flags;
1013                 rt6_clean_expires(rt);
1014                 rt->rt6i_metric = 0;
1015
1016                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1017 #ifdef CONFIG_IPV6_SUBTREES
1018                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1019 #endif
1020
1021                 dst_free(new);
1022         }
1023
1024         dst_release(dst_orig);
1025         return new ? new : ERR_PTR(-ENOMEM);
1026 }
1027
1028 /*
1029  *      Destination cache support functions
1030  */
1031
1032 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1033 {
1034         struct rt6_info *rt;
1035
1036         rt = (struct rt6_info *) dst;
1037
1038         /* All IPV6 dsts are created with ->obsolete set to the value
1039          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040          * into this function always.
1041          */
1042         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1043                 return NULL;
1044
1045         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1046                 return dst;
1047
1048         return NULL;
1049 }
1050
1051 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1052 {
1053         struct rt6_info *rt = (struct rt6_info *) dst;
1054
1055         if (rt) {
1056                 if (rt->rt6i_flags & RTF_CACHE) {
1057                         if (rt6_check_expired(rt)) {
1058                                 ip6_del_rt(rt);
1059                                 dst = NULL;
1060                         }
1061                 } else {
1062                         dst_release(dst);
1063                         dst = NULL;
1064                 }
1065         }
1066         return dst;
1067 }
1068
1069 static void ip6_link_failure(struct sk_buff *skb)
1070 {
1071         struct rt6_info *rt;
1072
1073         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1074
1075         rt = (struct rt6_info *) skb_dst(skb);
1076         if (rt) {
1077                 if (rt->rt6i_flags & RTF_CACHE)
1078                         rt6_update_expires(rt, 0);
1079                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1080                         rt->rt6i_node->fn_sernum = -1;
1081         }
1082 }
1083
1084 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1085                                struct sk_buff *skb, u32 mtu)
1086 {
1087         struct rt6_info *rt6 = (struct rt6_info*)dst;
1088
1089         dst_confirm(dst);
1090         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1091                 struct net *net = dev_net(dst->dev);
1092
1093                 rt6->rt6i_flags |= RTF_MODIFIED;
1094                 if (mtu < IPV6_MIN_MTU) {
1095                         u32 features = dst_metric(dst, RTAX_FEATURES);
1096                         mtu = IPV6_MIN_MTU;
1097                         features |= RTAX_FEATURE_ALLFRAG;
1098                         dst_metric_set(dst, RTAX_FEATURES, features);
1099                 }
1100                 dst_metric_set(dst, RTAX_MTU, mtu);
1101                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1102         }
1103 }
1104
1105 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1106                      int oif, u32 mark)
1107 {
1108         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1109         struct dst_entry *dst;
1110         struct flowi6 fl6;
1111
1112         memset(&fl6, 0, sizeof(fl6));
1113         fl6.flowi6_oif = oif;
1114         fl6.flowi6_mark = mark;
1115         fl6.flowi6_flags = 0;
1116         fl6.daddr = iph->daddr;
1117         fl6.saddr = iph->saddr;
1118         fl6.flowlabel = ip6_flowinfo(iph);
1119
1120         dst = ip6_route_output(net, NULL, &fl6);
1121         if (!dst->error)
1122                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123         dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126
1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129         ip6_update_pmtu(skb, sock_net(sk), mtu,
1130                         sk->sk_bound_dev_if, sk->sk_mark);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133
1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135 {
1136         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1137         struct dst_entry *dst;
1138         struct flowi6 fl6;
1139
1140         memset(&fl6, 0, sizeof(fl6));
1141         fl6.flowi6_oif = oif;
1142         fl6.flowi6_mark = mark;
1143         fl6.flowi6_flags = 0;
1144         fl6.daddr = iph->daddr;
1145         fl6.saddr = iph->saddr;
1146         fl6.flowlabel = ip6_flowinfo(iph);
1147
1148         dst = ip6_route_output(net, NULL, &fl6);
1149         if (!dst->error)
1150                 rt6_do_redirect(dst, NULL, skb);
1151         dst_release(dst);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_redirect);
1154
1155 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156 {
1157         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1158 }
1159 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1160
1161 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1162 {
1163         struct net_device *dev = dst->dev;
1164         unsigned int mtu = dst_mtu(dst);
1165         struct net *net = dev_net(dev);
1166
1167         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1168
1169         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1170                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1171
1172         /*
1173          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1174          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1175          * IPV6_MAXPLEN is also valid and means: "any MSS,
1176          * rely only on pmtu discovery"
1177          */
1178         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1179                 mtu = IPV6_MAXPLEN;
1180         return mtu;
1181 }
1182
1183 static unsigned int ip6_mtu(const struct dst_entry *dst)
1184 {
1185         struct inet6_dev *idev;
1186         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1187
1188         if (mtu)
1189                 return mtu;
1190
1191         mtu = IPV6_MIN_MTU;
1192
1193         rcu_read_lock();
1194         idev = __in6_dev_get(dst->dev);
1195         if (idev)
1196                 mtu = idev->cnf.mtu6;
1197         rcu_read_unlock();
1198
1199         return mtu;
1200 }
1201
1202 static struct dst_entry *icmp6_dst_gc_list;
1203 static DEFINE_SPINLOCK(icmp6_dst_lock);
1204
1205 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1206                                   struct flowi6 *fl6)
1207 {
1208         struct dst_entry *dst;
1209         struct rt6_info *rt;
1210         struct inet6_dev *idev = in6_dev_get(dev);
1211         struct net *net = dev_net(dev);
1212
1213         if (unlikely(!idev))
1214                 return ERR_PTR(-ENODEV);
1215
1216         rt = ip6_dst_alloc(net, dev, 0, NULL);
1217         if (unlikely(!rt)) {
1218                 in6_dev_put(idev);
1219                 dst = ERR_PTR(-ENOMEM);
1220                 goto out;
1221         }
1222
1223         rt->dst.flags |= DST_HOST;
1224         rt->dst.output  = ip6_output;
1225         atomic_set(&rt->dst.__refcnt, 1);
1226         rt->rt6i_dst.addr = fl6->daddr;
1227         rt->rt6i_dst.plen = 128;
1228         rt->rt6i_idev     = idev;
1229         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1230
1231         spin_lock_bh(&icmp6_dst_lock);
1232         rt->dst.next = icmp6_dst_gc_list;
1233         icmp6_dst_gc_list = &rt->dst;
1234         spin_unlock_bh(&icmp6_dst_lock);
1235
1236         fib6_force_start_gc(net);
1237
1238         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1239
1240 out:
1241         return dst;
1242 }
1243
1244 int icmp6_dst_gc(void)
1245 {
1246         struct dst_entry *dst, **pprev;
1247         int more = 0;
1248
1249         spin_lock_bh(&icmp6_dst_lock);
1250         pprev = &icmp6_dst_gc_list;
1251
1252         while ((dst = *pprev) != NULL) {
1253                 if (!atomic_read(&dst->__refcnt)) {
1254                         *pprev = dst->next;
1255                         dst_free(dst);
1256                 } else {
1257                         pprev = &dst->next;
1258                         ++more;
1259                 }
1260         }
1261
1262         spin_unlock_bh(&icmp6_dst_lock);
1263
1264         return more;
1265 }
1266
1267 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1268                             void *arg)
1269 {
1270         struct dst_entry *dst, **pprev;
1271
1272         spin_lock_bh(&icmp6_dst_lock);
1273         pprev = &icmp6_dst_gc_list;
1274         while ((dst = *pprev) != NULL) {
1275                 struct rt6_info *rt = (struct rt6_info *) dst;
1276                 if (func(rt, arg)) {
1277                         *pprev = dst->next;
1278                         dst_free(dst);
1279                 } else {
1280                         pprev = &dst->next;
1281                 }
1282         }
1283         spin_unlock_bh(&icmp6_dst_lock);
1284 }
1285
1286 static int ip6_dst_gc(struct dst_ops *ops)
1287 {
1288         unsigned long now = jiffies;
1289         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1292         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1293         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1294         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1295         int entries;
1296
1297         entries = dst_entries_get_fast(ops);
1298         if (time_after(rt_last_gc + rt_min_interval, now) &&
1299             entries <= rt_max_size)
1300                 goto out;
1301
1302         net->ipv6.ip6_rt_gc_expire++;
1303         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1304         net->ipv6.ip6_rt_last_gc = now;
1305         entries = dst_entries_get_slow(ops);
1306         if (entries < ops->gc_thresh)
1307                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1308 out:
1309         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1310         return entries > rt_max_size;
1311 }
1312
1313 int ip6_dst_hoplimit(struct dst_entry *dst)
1314 {
1315         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316         if (hoplimit == 0) {
1317                 struct net_device *dev = dst->dev;
1318                 struct inet6_dev *idev;
1319
1320                 rcu_read_lock();
1321                 idev = __in6_dev_get(dev);
1322                 if (idev)
1323                         hoplimit = idev->cnf.hop_limit;
1324                 else
1325                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326                 rcu_read_unlock();
1327         }
1328         return hoplimit;
1329 }
1330 EXPORT_SYMBOL(ip6_dst_hoplimit);
1331
1332 /*
1333  *
1334  */
1335
1336 int ip6_route_add(struct fib6_config *cfg)
1337 {
1338         int err;
1339         struct net *net = cfg->fc_nlinfo.nl_net;
1340         struct rt6_info *rt = NULL;
1341         struct net_device *dev = NULL;
1342         struct inet6_dev *idev = NULL;
1343         struct fib6_table *table;
1344         int addr_type;
1345
1346         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1347                 return -EINVAL;
1348 #ifndef CONFIG_IPV6_SUBTREES
1349         if (cfg->fc_src_len)
1350                 return -EINVAL;
1351 #endif
1352         if (cfg->fc_ifindex) {
1353                 err = -ENODEV;
1354                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1355                 if (!dev)
1356                         goto out;
1357                 idev = in6_dev_get(dev);
1358                 if (!idev)
1359                         goto out;
1360         }
1361
1362         if (cfg->fc_metric == 0)
1363                 cfg->fc_metric = IP6_RT_PRIO_USER;
1364
1365         err = -ENOBUFS;
1366         if (cfg->fc_nlinfo.nlh &&
1367             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1368                 table = fib6_get_table(net, cfg->fc_table);
1369                 if (!table) {
1370                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1371                         table = fib6_new_table(net, cfg->fc_table);
1372                 }
1373         } else {
1374                 table = fib6_new_table(net, cfg->fc_table);
1375         }
1376
1377         if (!table)
1378                 goto out;
1379
1380         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1381
1382         if (!rt) {
1383                 err = -ENOMEM;
1384                 goto out;
1385         }
1386
1387         if (cfg->fc_flags & RTF_EXPIRES)
1388                 rt6_set_expires(rt, jiffies +
1389                                 clock_t_to_jiffies(cfg->fc_expires));
1390         else
1391                 rt6_clean_expires(rt);
1392
1393         if (cfg->fc_protocol == RTPROT_UNSPEC)
1394                 cfg->fc_protocol = RTPROT_BOOT;
1395         rt->rt6i_protocol = cfg->fc_protocol;
1396
1397         addr_type = ipv6_addr_type(&cfg->fc_dst);
1398
1399         if (addr_type & IPV6_ADDR_MULTICAST)
1400                 rt->dst.input = ip6_mc_input;
1401         else if (cfg->fc_flags & RTF_LOCAL)
1402                 rt->dst.input = ip6_input;
1403         else
1404                 rt->dst.input = ip6_forward;
1405
1406         rt->dst.output = ip6_output;
1407
1408         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1409         rt->rt6i_dst.plen = cfg->fc_dst_len;
1410         if (rt->rt6i_dst.plen == 128)
1411                rt->dst.flags |= DST_HOST;
1412
1413         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1414                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1415                 if (!metrics) {
1416                         err = -ENOMEM;
1417                         goto out;
1418                 }
1419                 dst_init_metrics(&rt->dst, metrics, 0);
1420         }
1421 #ifdef CONFIG_IPV6_SUBTREES
1422         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1423         rt->rt6i_src.plen = cfg->fc_src_len;
1424 #endif
1425
1426         rt->rt6i_metric = cfg->fc_metric;
1427
1428         /* We cannot add true routes via loopback here,
1429            they would result in kernel looping; promote them to reject routes
1430          */
1431         if ((cfg->fc_flags & RTF_REJECT) ||
1432             (dev && (dev->flags & IFF_LOOPBACK) &&
1433              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1434              !(cfg->fc_flags & RTF_LOCAL))) {
1435                 /* hold loopback dev/idev if we haven't done so. */
1436                 if (dev != net->loopback_dev) {
1437                         if (dev) {
1438                                 dev_put(dev);
1439                                 in6_dev_put(idev);
1440                         }
1441                         dev = net->loopback_dev;
1442                         dev_hold(dev);
1443                         idev = in6_dev_get(dev);
1444                         if (!idev) {
1445                                 err = -ENODEV;
1446                                 goto out;
1447                         }
1448                 }
1449                 rt->dst.output = ip6_pkt_discard_out;
1450                 rt->dst.input = ip6_pkt_discard;
1451                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1452                 switch (cfg->fc_type) {
1453                 case RTN_BLACKHOLE:
1454                         rt->dst.error = -EINVAL;
1455                         break;
1456                 case RTN_PROHIBIT:
1457                         rt->dst.error = -EACCES;
1458                         break;
1459                 case RTN_THROW:
1460                         rt->dst.error = -EAGAIN;
1461                         break;
1462                 default:
1463                         rt->dst.error = -ENETUNREACH;
1464                         break;
1465                 }
1466                 goto install_route;
1467         }
1468
1469         if (cfg->fc_flags & RTF_GATEWAY) {
1470                 const struct in6_addr *gw_addr;
1471                 int gwa_type;
1472
1473                 gw_addr = &cfg->fc_gateway;
1474                 rt->rt6i_gateway = *gw_addr;
1475                 gwa_type = ipv6_addr_type(gw_addr);
1476
1477                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1478                         struct rt6_info *grt;
1479
1480                         /* IPv6 strictly inhibits using not link-local
1481                            addresses as nexthop address.
1482                            Otherwise, router will not able to send redirects.
1483                            It is very good, but in some (rare!) circumstances
1484                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1485                            some exceptions. --ANK
1486                          */
1487                         err = -EINVAL;
1488                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1489                                 goto out;
1490
1491                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1492
1493                         err = -EHOSTUNREACH;
1494                         if (!grt)
1495                                 goto out;
1496                         if (dev) {
1497                                 if (dev != grt->dst.dev) {
1498                                         ip6_rt_put(grt);
1499                                         goto out;
1500                                 }
1501                         } else {
1502                                 dev = grt->dst.dev;
1503                                 idev = grt->rt6i_idev;
1504                                 dev_hold(dev);
1505                                 in6_dev_hold(grt->rt6i_idev);
1506                         }
1507                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1508                                 err = 0;
1509                         ip6_rt_put(grt);
1510
1511                         if (err)
1512                                 goto out;
1513                 }
1514                 err = -EINVAL;
1515                 if (!dev || (dev->flags & IFF_LOOPBACK))
1516                         goto out;
1517         }
1518
1519         err = -ENODEV;
1520         if (!dev)
1521                 goto out;
1522
1523         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1524                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1525                         err = -EINVAL;
1526                         goto out;
1527                 }
1528                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1529                 rt->rt6i_prefsrc.plen = 128;
1530         } else
1531                 rt->rt6i_prefsrc.plen = 0;
1532
1533         rt->rt6i_flags = cfg->fc_flags;
1534
1535 install_route:
1536         if (cfg->fc_mx) {
1537                 struct nlattr *nla;
1538                 int remaining;
1539
1540                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1541                         int type = nla_type(nla);
1542
1543                         if (type) {
1544                                 if (type > RTAX_MAX) {
1545                                         err = -EINVAL;
1546                                         goto out;
1547                                 }
1548
1549                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1550                         }
1551                 }
1552         }
1553
1554         rt->dst.dev = dev;
1555         rt->rt6i_idev = idev;
1556         rt->rt6i_table = table;
1557
1558         cfg->fc_nlinfo.nl_net = dev_net(dev);
1559
1560         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1561
1562 out:
1563         if (dev)
1564                 dev_put(dev);
1565         if (idev)
1566                 in6_dev_put(idev);
1567         if (rt)
1568                 dst_free(&rt->dst);
1569         return err;
1570 }
1571
1572 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1573 {
1574         int err;
1575         struct fib6_table *table;
1576         struct net *net = dev_net(rt->dst.dev);
1577
1578         if (rt == net->ipv6.ip6_null_entry) {
1579                 err = -ENOENT;
1580                 goto out;
1581         }
1582
1583         table = rt->rt6i_table;
1584         write_lock_bh(&table->tb6_lock);
1585         err = fib6_del(rt, info);
1586         write_unlock_bh(&table->tb6_lock);
1587
1588 out:
1589         ip6_rt_put(rt);
1590         return err;
1591 }
1592
1593 int ip6_del_rt(struct rt6_info *rt)
1594 {
1595         struct nl_info info = {
1596                 .nl_net = dev_net(rt->dst.dev),
1597         };
1598         return __ip6_del_rt(rt, &info);
1599 }
1600
1601 static int ip6_route_del(struct fib6_config *cfg)
1602 {
1603         struct fib6_table *table;
1604         struct fib6_node *fn;
1605         struct rt6_info *rt;
1606         int err = -ESRCH;
1607
1608         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1609         if (!table)
1610                 return err;
1611
1612         read_lock_bh(&table->tb6_lock);
1613
1614         fn = fib6_locate(&table->tb6_root,
1615                          &cfg->fc_dst, cfg->fc_dst_len,
1616                          &cfg->fc_src, cfg->fc_src_len);
1617
1618         if (fn) {
1619                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1620                         if (cfg->fc_ifindex &&
1621                             (!rt->dst.dev ||
1622                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1623                                 continue;
1624                         if (cfg->fc_flags & RTF_GATEWAY &&
1625                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1626                                 continue;
1627                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1628                                 continue;
1629                         dst_hold(&rt->dst);
1630                         read_unlock_bh(&table->tb6_lock);
1631
1632                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1633                 }
1634         }
1635         read_unlock_bh(&table->tb6_lock);
1636
1637         return err;
1638 }
1639
1640 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1641 {
1642         struct net *net = dev_net(skb->dev);
1643         struct netevent_redirect netevent;
1644         struct rt6_info *rt, *nrt = NULL;
1645         struct ndisc_options ndopts;
1646         struct inet6_dev *in6_dev;
1647         struct neighbour *neigh;
1648         struct rd_msg *msg;
1649         int optlen, on_link;
1650         u8 *lladdr;
1651
1652         optlen = skb->tail - skb->transport_header;
1653         optlen -= sizeof(*msg);
1654
1655         if (optlen < 0) {
1656                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1657                 return;
1658         }
1659
1660         msg = (struct rd_msg *)icmp6_hdr(skb);
1661
1662         if (ipv6_addr_is_multicast(&msg->dest)) {
1663                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1664                 return;
1665         }
1666
1667         on_link = 0;
1668         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1669                 on_link = 1;
1670         } else if (ipv6_addr_type(&msg->target) !=
1671                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1672                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1673                 return;
1674         }
1675
1676         in6_dev = __in6_dev_get(skb->dev);
1677         if (!in6_dev)
1678                 return;
1679         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1680                 return;
1681
1682         /* RFC2461 8.1:
1683          *      The IP source address of the Redirect MUST be the same as the current
1684          *      first-hop router for the specified ICMP Destination Address.
1685          */
1686
1687         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1688                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1689                 return;
1690         }
1691
1692         lladdr = NULL;
1693         if (ndopts.nd_opts_tgt_lladdr) {
1694                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1695                                              skb->dev);
1696                 if (!lladdr) {
1697                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1698                         return;
1699                 }
1700         }
1701
1702         rt = (struct rt6_info *) dst;
1703         if (rt == net->ipv6.ip6_null_entry) {
1704                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1705                 return;
1706         }
1707
1708         /* Redirect received -> path was valid.
1709          * Look, redirects are sent only in response to data packets,
1710          * so that this nexthop apparently is reachable. --ANK
1711          */
1712         dst_confirm(&rt->dst);
1713
1714         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1715         if (!neigh)
1716                 return;
1717
1718         /*
1719          *      We have finally decided to accept it.
1720          */
1721
1722         neigh_update(neigh, lladdr, NUD_STALE,
1723                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1724                      NEIGH_UPDATE_F_OVERRIDE|
1725                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1726                                      NEIGH_UPDATE_F_ISROUTER))
1727                      );
1728
1729         nrt = ip6_rt_copy(rt, &msg->dest);
1730         if (!nrt)
1731                 goto out;
1732
1733         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1734         if (on_link)
1735                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1736
1737         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1738
1739         if (ip6_ins_rt(nrt))
1740                 goto out;
1741
1742         netevent.old = &rt->dst;
1743         netevent.new = &nrt->dst;
1744         netevent.daddr = &msg->dest;
1745         netevent.neigh = neigh;
1746         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1747
1748         if (rt->rt6i_flags & RTF_CACHE) {
1749                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1750                 ip6_del_rt(rt);
1751         }
1752
1753 out:
1754         neigh_release(neigh);
1755 }
1756
1757 /*
1758  *      Misc support functions
1759  */
1760
1761 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1762                                     const struct in6_addr *dest)
1763 {
1764         struct net *net = dev_net(ort->dst.dev);
1765         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1766                                             ort->rt6i_table);
1767
1768         if (rt) {
1769                 rt->dst.input = ort->dst.input;
1770                 rt->dst.output = ort->dst.output;
1771                 rt->dst.flags |= DST_HOST;
1772
1773                 rt->rt6i_dst.addr = *dest;
1774                 rt->rt6i_dst.plen = 128;
1775                 dst_copy_metrics(&rt->dst, &ort->dst);
1776                 rt->dst.error = ort->dst.error;
1777                 rt->rt6i_idev = ort->rt6i_idev;
1778                 if (rt->rt6i_idev)
1779                         in6_dev_hold(rt->rt6i_idev);
1780                 rt->dst.lastuse = jiffies;
1781
1782                 rt->rt6i_gateway = ort->rt6i_gateway;
1783                 rt->rt6i_flags = ort->rt6i_flags;
1784                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1785                     (RTF_DEFAULT | RTF_ADDRCONF))
1786                         rt6_set_from(rt, ort);
1787                 else
1788                         rt6_clean_expires(rt);
1789                 rt->rt6i_metric = 0;
1790
1791 #ifdef CONFIG_IPV6_SUBTREES
1792                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1793 #endif
1794                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1795                 rt->rt6i_table = ort->rt6i_table;
1796         }
1797         return rt;
1798 }
1799
1800 #ifdef CONFIG_IPV6_ROUTE_INFO
1801 static struct rt6_info *rt6_get_route_info(struct net *net,
1802                                            const struct in6_addr *prefix, int prefixlen,
1803                                            const struct in6_addr *gwaddr, int ifindex)
1804 {
1805         struct fib6_node *fn;
1806         struct rt6_info *rt = NULL;
1807         struct fib6_table *table;
1808
1809         table = fib6_get_table(net, RT6_TABLE_INFO);
1810         if (!table)
1811                 return NULL;
1812
1813         read_lock_bh(&table->tb6_lock);
1814         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1815         if (!fn)
1816                 goto out;
1817
1818         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1819                 if (rt->dst.dev->ifindex != ifindex)
1820                         continue;
1821                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1822                         continue;
1823                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1824                         continue;
1825                 dst_hold(&rt->dst);
1826                 break;
1827         }
1828 out:
1829         read_unlock_bh(&table->tb6_lock);
1830         return rt;
1831 }
1832
1833 static struct rt6_info *rt6_add_route_info(struct net *net,
1834                                            const struct in6_addr *prefix, int prefixlen,
1835                                            const struct in6_addr *gwaddr, int ifindex,
1836                                            unsigned int pref)
1837 {
1838         struct fib6_config cfg = {
1839                 .fc_table       = RT6_TABLE_INFO,
1840                 .fc_metric      = IP6_RT_PRIO_USER,
1841                 .fc_ifindex     = ifindex,
1842                 .fc_dst_len     = prefixlen,
1843                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1844                                   RTF_UP | RTF_PREF(pref),
1845                 .fc_nlinfo.portid = 0,
1846                 .fc_nlinfo.nlh = NULL,
1847                 .fc_nlinfo.nl_net = net,
1848         };
1849
1850         cfg.fc_dst = *prefix;
1851         cfg.fc_gateway = *gwaddr;
1852
1853         /* We should treat it as a default route if prefix length is 0. */
1854         if (!prefixlen)
1855                 cfg.fc_flags |= RTF_DEFAULT;
1856
1857         ip6_route_add(&cfg);
1858
1859         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1860 }
1861 #endif
1862
1863 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1864 {
1865         struct rt6_info *rt;
1866         struct fib6_table *table;
1867
1868         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1869         if (!table)
1870                 return NULL;
1871
1872         read_lock_bh(&table->tb6_lock);
1873         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1874                 if (dev == rt->dst.dev &&
1875                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1876                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1877                         break;
1878         }
1879         if (rt)
1880                 dst_hold(&rt->dst);
1881         read_unlock_bh(&table->tb6_lock);
1882         return rt;
1883 }
1884
1885 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1886                                      struct net_device *dev,
1887                                      unsigned int pref)
1888 {
1889         struct fib6_config cfg = {
1890                 .fc_table       = RT6_TABLE_DFLT,
1891                 .fc_metric      = IP6_RT_PRIO_USER,
1892                 .fc_ifindex     = dev->ifindex,
1893                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1894                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1895                 .fc_nlinfo.portid = 0,
1896                 .fc_nlinfo.nlh = NULL,
1897                 .fc_nlinfo.nl_net = dev_net(dev),
1898         };
1899
1900         cfg.fc_gateway = *gwaddr;
1901
1902         ip6_route_add(&cfg);
1903
1904         return rt6_get_dflt_router(gwaddr, dev);
1905 }
1906
1907 void rt6_purge_dflt_routers(struct net *net)
1908 {
1909         struct rt6_info *rt;
1910         struct fib6_table *table;
1911
1912         /* NOTE: Keep consistent with rt6_get_dflt_router */
1913         table = fib6_get_table(net, RT6_TABLE_DFLT);
1914         if (!table)
1915                 return;
1916
1917 restart:
1918         read_lock_bh(&table->tb6_lock);
1919         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1920                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1921                         dst_hold(&rt->dst);
1922                         read_unlock_bh(&table->tb6_lock);
1923                         ip6_del_rt(rt);
1924                         goto restart;
1925                 }
1926         }
1927         read_unlock_bh(&table->tb6_lock);
1928 }
1929
1930 static void rtmsg_to_fib6_config(struct net *net,
1931                                  struct in6_rtmsg *rtmsg,
1932                                  struct fib6_config *cfg)
1933 {
1934         memset(cfg, 0, sizeof(*cfg));
1935
1936         cfg->fc_table = RT6_TABLE_MAIN;
1937         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1938         cfg->fc_metric = rtmsg->rtmsg_metric;
1939         cfg->fc_expires = rtmsg->rtmsg_info;
1940         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1941         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1942         cfg->fc_flags = rtmsg->rtmsg_flags;
1943
1944         cfg->fc_nlinfo.nl_net = net;
1945
1946         cfg->fc_dst = rtmsg->rtmsg_dst;
1947         cfg->fc_src = rtmsg->rtmsg_src;
1948         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1949 }
1950
1951 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1952 {
1953         struct fib6_config cfg;
1954         struct in6_rtmsg rtmsg;
1955         int err;
1956
1957         switch(cmd) {
1958         case SIOCADDRT:         /* Add a route */
1959         case SIOCDELRT:         /* Delete a route */
1960                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1961                         return -EPERM;
1962                 err = copy_from_user(&rtmsg, arg,
1963                                      sizeof(struct in6_rtmsg));
1964                 if (err)
1965                         return -EFAULT;
1966
1967                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1968
1969                 rtnl_lock();
1970                 switch (cmd) {
1971                 case SIOCADDRT:
1972                         err = ip6_route_add(&cfg);
1973                         break;
1974                 case SIOCDELRT:
1975                         err = ip6_route_del(&cfg);
1976                         break;
1977                 default:
1978                         err = -EINVAL;
1979                 }
1980                 rtnl_unlock();
1981
1982                 return err;
1983         }
1984
1985         return -EINVAL;
1986 }
1987
1988 /*
1989  *      Drop the packet on the floor
1990  */
1991
1992 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1993 {
1994         int type;
1995         struct dst_entry *dst = skb_dst(skb);
1996         switch (ipstats_mib_noroutes) {
1997         case IPSTATS_MIB_INNOROUTES:
1998                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1999                 if (type == IPV6_ADDR_ANY) {
2000                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001                                       IPSTATS_MIB_INADDRERRORS);
2002                         break;
2003                 }
2004                 /* FALLTHROUGH */
2005         case IPSTATS_MIB_OUTNOROUTES:
2006                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2007                               ipstats_mib_noroutes);
2008                 break;
2009         }
2010         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2011         kfree_skb(skb);
2012         return 0;
2013 }
2014
2015 static int ip6_pkt_discard(struct sk_buff *skb)
2016 {
2017         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2018 }
2019
2020 static int ip6_pkt_discard_out(struct sk_buff *skb)
2021 {
2022         skb->dev = skb_dst(skb)->dev;
2023         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2024 }
2025
2026 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2027
2028 static int ip6_pkt_prohibit(struct sk_buff *skb)
2029 {
2030         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2031 }
2032
2033 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2034 {
2035         skb->dev = skb_dst(skb)->dev;
2036         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2037 }
2038
2039 #endif
2040
2041 /*
2042  *      Allocate a dst for local (unicast / anycast) address.
2043  */
2044
2045 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2046                                     const struct in6_addr *addr,
2047                                     bool anycast)
2048 {
2049         struct net *net = dev_net(idev->dev);
2050         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2051
2052         if (!rt) {
2053                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2054                 return ERR_PTR(-ENOMEM);
2055         }
2056
2057         in6_dev_hold(idev);
2058
2059         rt->dst.flags |= DST_HOST;
2060         rt->dst.input = ip6_input;
2061         rt->dst.output = ip6_output;
2062         rt->rt6i_idev = idev;
2063
2064         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2065         if (anycast)
2066                 rt->rt6i_flags |= RTF_ANYCAST;
2067         else
2068                 rt->rt6i_flags |= RTF_LOCAL;
2069
2070         rt->rt6i_dst.addr = *addr;
2071         rt->rt6i_dst.plen = 128;
2072         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2073
2074         atomic_set(&rt->dst.__refcnt, 1);
2075
2076         return rt;
2077 }
2078
2079 int ip6_route_get_saddr(struct net *net,
2080                         struct rt6_info *rt,
2081                         const struct in6_addr *daddr,
2082                         unsigned int prefs,
2083                         struct in6_addr *saddr)
2084 {
2085         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2086         int err = 0;
2087         if (rt->rt6i_prefsrc.plen)
2088                 *saddr = rt->rt6i_prefsrc.addr;
2089         else
2090                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2091                                          daddr, prefs, saddr);
2092         return err;
2093 }
2094
2095 /* remove deleted ip from prefsrc entries */
2096 struct arg_dev_net_ip {
2097         struct net_device *dev;
2098         struct net *net;
2099         struct in6_addr *addr;
2100 };
2101
2102 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2103 {
2104         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2105         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2106         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2107
2108         if (((void *)rt->dst.dev == dev || !dev) &&
2109             rt != net->ipv6.ip6_null_entry &&
2110             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2111                 /* remove prefsrc entry */
2112                 rt->rt6i_prefsrc.plen = 0;
2113         }
2114         return 0;
2115 }
2116
2117 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2118 {
2119         struct net *net = dev_net(ifp->idev->dev);
2120         struct arg_dev_net_ip adni = {
2121                 .dev = ifp->idev->dev,
2122                 .net = net,
2123                 .addr = &ifp->addr,
2124         };
2125         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2126 }
2127
2128 struct arg_dev_net {
2129         struct net_device *dev;
2130         struct net *net;
2131 };
2132
2133 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2134 {
2135         const struct arg_dev_net *adn = arg;
2136         const struct net_device *dev = adn->dev;
2137
2138         if ((rt->dst.dev == dev || !dev) &&
2139             rt != adn->net->ipv6.ip6_null_entry)
2140                 return -1;
2141
2142         return 0;
2143 }
2144
2145 void rt6_ifdown(struct net *net, struct net_device *dev)
2146 {
2147         struct arg_dev_net adn = {
2148                 .dev = dev,
2149                 .net = net,
2150         };
2151
2152         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2153         icmp6_clean_all(fib6_ifdown, &adn);
2154 }
2155
2156 struct rt6_mtu_change_arg {
2157         struct net_device *dev;
2158         unsigned int mtu;
2159 };
2160
2161 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2162 {
2163         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2164         struct inet6_dev *idev;
2165
2166         /* In IPv6 pmtu discovery is not optional,
2167            so that RTAX_MTU lock cannot disable it.
2168            We still use this lock to block changes
2169            caused by addrconf/ndisc.
2170         */
2171
2172         idev = __in6_dev_get(arg->dev);
2173         if (!idev)
2174                 return 0;
2175
2176         /* For administrative MTU increase, there is no way to discover
2177            IPv6 PMTU increase, so PMTU increase should be updated here.
2178            Since RFC 1981 doesn't include administrative MTU increase
2179            update PMTU increase is a MUST. (i.e. jumbo frame)
2180          */
2181         /*
2182            If new MTU is less than route PMTU, this new MTU will be the
2183            lowest MTU in the path, update the route PMTU to reflect PMTU
2184            decreases; if new MTU is greater than route PMTU, and the
2185            old MTU is the lowest MTU in the path, update the route PMTU
2186            to reflect the increase. In this case if the other nodes' MTU
2187            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2188            PMTU discouvery.
2189          */
2190         if (rt->dst.dev == arg->dev &&
2191             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2192             (dst_mtu(&rt->dst) >= arg->mtu ||
2193              (dst_mtu(&rt->dst) < arg->mtu &&
2194               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2195                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2196         }
2197         return 0;
2198 }
2199
2200 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2201 {
2202         struct rt6_mtu_change_arg arg = {
2203                 .dev = dev,
2204                 .mtu = mtu,
2205         };
2206
2207         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2208 }
2209
2210 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2211         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2212         [RTA_OIF]               = { .type = NLA_U32 },
2213         [RTA_IIF]               = { .type = NLA_U32 },
2214         [RTA_PRIORITY]          = { .type = NLA_U32 },
2215         [RTA_METRICS]           = { .type = NLA_NESTED },
2216         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2217 };
2218
2219 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2220                               struct fib6_config *cfg)
2221 {
2222         struct rtmsg *rtm;
2223         struct nlattr *tb[RTA_MAX+1];
2224         int err;
2225
2226         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2227         if (err < 0)
2228                 goto errout;
2229
2230         err = -EINVAL;
2231         rtm = nlmsg_data(nlh);
2232         memset(cfg, 0, sizeof(*cfg));
2233
2234         cfg->fc_table = rtm->rtm_table;
2235         cfg->fc_dst_len = rtm->rtm_dst_len;
2236         cfg->fc_src_len = rtm->rtm_src_len;
2237         cfg->fc_flags = RTF_UP;
2238         cfg->fc_protocol = rtm->rtm_protocol;
2239         cfg->fc_type = rtm->rtm_type;
2240
2241         if (rtm->rtm_type == RTN_UNREACHABLE ||
2242             rtm->rtm_type == RTN_BLACKHOLE ||
2243             rtm->rtm_type == RTN_PROHIBIT ||
2244             rtm->rtm_type == RTN_THROW)
2245                 cfg->fc_flags |= RTF_REJECT;
2246
2247         if (rtm->rtm_type == RTN_LOCAL)
2248                 cfg->fc_flags |= RTF_LOCAL;
2249
2250         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2251         cfg->fc_nlinfo.nlh = nlh;
2252         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2253
2254         if (tb[RTA_GATEWAY]) {
2255                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2256                 cfg->fc_flags |= RTF_GATEWAY;
2257         }
2258
2259         if (tb[RTA_DST]) {
2260                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2261
2262                 if (nla_len(tb[RTA_DST]) < plen)
2263                         goto errout;
2264
2265                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2266         }
2267
2268         if (tb[RTA_SRC]) {
2269                 int plen = (rtm->rtm_src_len + 7) >> 3;
2270
2271                 if (nla_len(tb[RTA_SRC]) < plen)
2272                         goto errout;
2273
2274                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2275         }
2276
2277         if (tb[RTA_PREFSRC])
2278                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2279
2280         if (tb[RTA_OIF])
2281                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2282
2283         if (tb[RTA_PRIORITY])
2284                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2285
2286         if (tb[RTA_METRICS]) {
2287                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2288                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2289         }
2290
2291         if (tb[RTA_TABLE])
2292                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2293
2294         if (tb[RTA_MULTIPATH]) {
2295                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2296                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2297         }
2298
2299         err = 0;
2300 errout:
2301         return err;
2302 }
2303
2304 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2305 {
2306         struct fib6_config r_cfg;
2307         struct rtnexthop *rtnh;
2308         int remaining;
2309         int attrlen;
2310         int err = 0, last_err = 0;
2311
2312 beginning:
2313         rtnh = (struct rtnexthop *)cfg->fc_mp;
2314         remaining = cfg->fc_mp_len;
2315
2316         /* Parse a Multipath Entry */
2317         while (rtnh_ok(rtnh, remaining)) {
2318                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2319                 if (rtnh->rtnh_ifindex)
2320                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2321
2322                 attrlen = rtnh_attrlen(rtnh);
2323                 if (attrlen > 0) {
2324                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2325
2326                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2327                         if (nla) {
2328                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2329                                 r_cfg.fc_flags |= RTF_GATEWAY;
2330                         }
2331                 }
2332                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2333                 if (err) {
2334                         last_err = err;
2335                         /* If we are trying to remove a route, do not stop the
2336                          * loop when ip6_route_del() fails (because next hop is
2337                          * already gone), we should try to remove all next hops.
2338                          */
2339                         if (add) {
2340                                 /* If add fails, we should try to delete all
2341                                  * next hops that have been already added.
2342                                  */
2343                                 add = 0;
2344                                 goto beginning;
2345                         }
2346                 }
2347                 /* Because each route is added like a single route we remove
2348                  * this flag after the first nexthop (if there is a collision,
2349                  * we have already fail to add the first nexthop:
2350                  * fib6_add_rt2node() has reject it).
2351                  */
2352                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2353                 rtnh = rtnh_next(rtnh, &remaining);
2354         }
2355
2356         return last_err;
2357 }
2358
2359 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2360 {
2361         struct fib6_config cfg;
2362         int err;
2363
2364         err = rtm_to_fib6_config(skb, nlh, &cfg);
2365         if (err < 0)
2366                 return err;
2367
2368         if (cfg.fc_mp)
2369                 return ip6_route_multipath(&cfg, 0);
2370         else
2371                 return ip6_route_del(&cfg);
2372 }
2373
2374 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2375 {
2376         struct fib6_config cfg;
2377         int err;
2378
2379         err = rtm_to_fib6_config(skb, nlh, &cfg);
2380         if (err < 0)
2381                 return err;
2382
2383         if (cfg.fc_mp)
2384                 return ip6_route_multipath(&cfg, 1);
2385         else
2386                 return ip6_route_add(&cfg);
2387 }
2388
2389 static inline size_t rt6_nlmsg_size(void)
2390 {
2391         return NLMSG_ALIGN(sizeof(struct rtmsg))
2392                + nla_total_size(16) /* RTA_SRC */
2393                + nla_total_size(16) /* RTA_DST */
2394                + nla_total_size(16) /* RTA_GATEWAY */
2395                + nla_total_size(16) /* RTA_PREFSRC */
2396                + nla_total_size(4) /* RTA_TABLE */
2397                + nla_total_size(4) /* RTA_IIF */
2398                + nla_total_size(4) /* RTA_OIF */
2399                + nla_total_size(4) /* RTA_PRIORITY */
2400                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2401                + nla_total_size(sizeof(struct rta_cacheinfo));
2402 }
2403
2404 static int rt6_fill_node(struct net *net,
2405                          struct sk_buff *skb, struct rt6_info *rt,
2406                          struct in6_addr *dst, struct in6_addr *src,
2407                          int iif, int type, u32 portid, u32 seq,
2408                          int prefix, int nowait, unsigned int flags)
2409 {
2410         struct rtmsg *rtm;
2411         struct nlmsghdr *nlh;
2412         long expires;
2413         u32 table;
2414
2415         if (prefix) {   /* user wants prefix routes only */
2416                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2417                         /* success since this is not a prefix route */
2418                         return 1;
2419                 }
2420         }
2421
2422         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2423         if (!nlh)
2424                 return -EMSGSIZE;
2425
2426         rtm = nlmsg_data(nlh);
2427         rtm->rtm_family = AF_INET6;
2428         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2429         rtm->rtm_src_len = rt->rt6i_src.plen;
2430         rtm->rtm_tos = 0;
2431         if (rt->rt6i_table)
2432                 table = rt->rt6i_table->tb6_id;
2433         else
2434                 table = RT6_TABLE_UNSPEC;
2435         rtm->rtm_table = table;
2436         if (nla_put_u32(skb, RTA_TABLE, table))
2437                 goto nla_put_failure;
2438         if (rt->rt6i_flags & RTF_REJECT) {
2439                 switch (rt->dst.error) {
2440                 case -EINVAL:
2441                         rtm->rtm_type = RTN_BLACKHOLE;
2442                         break;
2443                 case -EACCES:
2444                         rtm->rtm_type = RTN_PROHIBIT;
2445                         break;
2446                 case -EAGAIN:
2447                         rtm->rtm_type = RTN_THROW;
2448                         break;
2449                 default:
2450                         rtm->rtm_type = RTN_UNREACHABLE;
2451                         break;
2452                 }
2453         }
2454         else if (rt->rt6i_flags & RTF_LOCAL)
2455                 rtm->rtm_type = RTN_LOCAL;
2456         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2457                 rtm->rtm_type = RTN_LOCAL;
2458         else
2459                 rtm->rtm_type = RTN_UNICAST;
2460         rtm->rtm_flags = 0;
2461         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2462         rtm->rtm_protocol = rt->rt6i_protocol;
2463         if (rt->rt6i_flags & RTF_DYNAMIC)
2464                 rtm->rtm_protocol = RTPROT_REDIRECT;
2465         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2466                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2467                         rtm->rtm_protocol = RTPROT_RA;
2468                 else
2469                         rtm->rtm_protocol = RTPROT_KERNEL;
2470         }
2471
2472         if (rt->rt6i_flags & RTF_CACHE)
2473                 rtm->rtm_flags |= RTM_F_CLONED;
2474
2475         if (dst) {
2476                 if (nla_put(skb, RTA_DST, 16, dst))
2477                         goto nla_put_failure;
2478                 rtm->rtm_dst_len = 128;
2479         } else if (rtm->rtm_dst_len)
2480                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2481                         goto nla_put_failure;
2482 #ifdef CONFIG_IPV6_SUBTREES
2483         if (src) {
2484                 if (nla_put(skb, RTA_SRC, 16, src))
2485                         goto nla_put_failure;
2486                 rtm->rtm_src_len = 128;
2487         } else if (rtm->rtm_src_len &&
2488                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2489                 goto nla_put_failure;
2490 #endif
2491         if (iif) {
2492 #ifdef CONFIG_IPV6_MROUTE
2493                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2494                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2495                         if (err <= 0) {
2496                                 if (!nowait) {
2497                                         if (err == 0)
2498                                                 return 0;
2499                                         goto nla_put_failure;
2500                                 } else {
2501                                         if (err == -EMSGSIZE)
2502                                                 goto nla_put_failure;
2503                                 }
2504                         }
2505                 } else
2506 #endif
2507                         if (nla_put_u32(skb, RTA_IIF, iif))
2508                                 goto nla_put_failure;
2509         } else if (dst) {
2510                 struct in6_addr saddr_buf;
2511                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2512                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2513                         goto nla_put_failure;
2514         }
2515
2516         if (rt->rt6i_prefsrc.plen) {
2517                 struct in6_addr saddr_buf;
2518                 saddr_buf = rt->rt6i_prefsrc.addr;
2519                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2520                         goto nla_put_failure;
2521         }
2522
2523         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2524                 goto nla_put_failure;
2525
2526         if (rt->rt6i_flags & RTF_GATEWAY) {
2527                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2528                         goto nla_put_failure;
2529         }
2530
2531         if (rt->dst.dev &&
2532             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2533                 goto nla_put_failure;
2534         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2535                 goto nla_put_failure;
2536
2537         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2538
2539         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2540                 goto nla_put_failure;
2541
2542         return nlmsg_end(skb, nlh);
2543
2544 nla_put_failure:
2545         nlmsg_cancel(skb, nlh);
2546         return -EMSGSIZE;
2547 }
2548
2549 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2550 {
2551         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2552         int prefix;
2553
2554         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2555                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2556                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2557         } else
2558                 prefix = 0;
2559
2560         return rt6_fill_node(arg->net,
2561                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2562                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2563                      prefix, 0, NLM_F_MULTI);
2564 }
2565
2566 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2567 {
2568         struct net *net = sock_net(in_skb->sk);
2569         struct nlattr *tb[RTA_MAX+1];
2570         struct rt6_info *rt;
2571         struct sk_buff *skb;
2572         struct rtmsg *rtm;
2573         struct flowi6 fl6;
2574         int err, iif = 0, oif = 0;
2575
2576         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2577         if (err < 0)
2578                 goto errout;
2579
2580         err = -EINVAL;
2581         memset(&fl6, 0, sizeof(fl6));
2582
2583         if (tb[RTA_SRC]) {
2584                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2585                         goto errout;
2586
2587                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2588         }
2589
2590         if (tb[RTA_DST]) {
2591                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2592                         goto errout;
2593
2594                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2595         }
2596
2597         if (tb[RTA_IIF])
2598                 iif = nla_get_u32(tb[RTA_IIF]);
2599
2600         if (tb[RTA_OIF])
2601                 oif = nla_get_u32(tb[RTA_OIF]);
2602
2603         if (iif) {
2604                 struct net_device *dev;
2605                 int flags = 0;
2606
2607                 dev = __dev_get_by_index(net, iif);
2608                 if (!dev) {
2609                         err = -ENODEV;
2610                         goto errout;
2611                 }
2612
2613                 fl6.flowi6_iif = iif;
2614
2615                 if (!ipv6_addr_any(&fl6.saddr))
2616                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2617
2618                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2619                                                                flags);
2620         } else {
2621                 fl6.flowi6_oif = oif;
2622
2623                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2624         }
2625
2626         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2627         if (!skb) {
2628                 ip6_rt_put(rt);
2629                 err = -ENOBUFS;
2630                 goto errout;
2631         }
2632
2633         /* Reserve room for dummy headers, this skb can pass
2634            through good chunk of routing engine.
2635          */
2636         skb_reset_mac_header(skb);
2637         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2638
2639         skb_dst_set(skb, &rt->dst);
2640
2641         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2642                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2643                             nlh->nlmsg_seq, 0, 0, 0);
2644         if (err < 0) {
2645                 kfree_skb(skb);
2646                 goto errout;
2647         }
2648
2649         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2650 errout:
2651         return err;
2652 }
2653
2654 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2655 {
2656         struct sk_buff *skb;
2657         struct net *net = info->nl_net;
2658         u32 seq;
2659         int err;
2660
2661         err = -ENOBUFS;
2662         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2663
2664         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2665         if (!skb)
2666                 goto errout;
2667
2668         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2669                                 event, info->portid, seq, 0, 0, 0);
2670         if (err < 0) {
2671                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2672                 WARN_ON(err == -EMSGSIZE);
2673                 kfree_skb(skb);
2674                 goto errout;
2675         }
2676         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2677                     info->nlh, gfp_any());
2678         return;
2679 errout:
2680         if (err < 0)
2681                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2682 }
2683
2684 static int ip6_route_dev_notify(struct notifier_block *this,
2685                                 unsigned long event, void *data)
2686 {
2687         struct net_device *dev = (struct net_device *)data;
2688         struct net *net = dev_net(dev);
2689
2690         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2691                 net->ipv6.ip6_null_entry->dst.dev = dev;
2692                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2693 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2694                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2695                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2696                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2697                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2698 #endif
2699         }
2700
2701         return NOTIFY_OK;
2702 }
2703
2704 /*
2705  *      /proc
2706  */
2707
2708 #ifdef CONFIG_PROC_FS
2709
2710 struct rt6_proc_arg
2711 {
2712         char *buffer;
2713         int offset;
2714         int length;
2715         int skip;
2716         int len;
2717 };
2718
2719 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2720 {
2721         struct seq_file *m = p_arg;
2722
2723         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2724
2725 #ifdef CONFIG_IPV6_SUBTREES
2726         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2727 #else
2728         seq_puts(m, "00000000000000000000000000000000 00 ");
2729 #endif
2730         if (rt->rt6i_flags & RTF_GATEWAY) {
2731                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2732         } else {
2733                 seq_puts(m, "00000000000000000000000000000000");
2734         }
2735         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2736                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2737                    rt->dst.__use, rt->rt6i_flags,
2738                    rt->dst.dev ? rt->dst.dev->name : "");
2739         return 0;
2740 }
2741
2742 static int ipv6_route_show(struct seq_file *m, void *v)
2743 {
2744         struct net *net = (struct net *)m->private;
2745         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2746         return 0;
2747 }
2748
2749 static int ipv6_route_open(struct inode *inode, struct file *file)
2750 {
2751         return single_open_net(inode, file, ipv6_route_show);
2752 }
2753
2754 static const struct file_operations ipv6_route_proc_fops = {
2755         .owner          = THIS_MODULE,
2756         .open           = ipv6_route_open,
2757         .read           = seq_read,
2758         .llseek         = seq_lseek,
2759         .release        = single_release_net,
2760 };
2761
2762 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2763 {
2764         struct net *net = (struct net *)seq->private;
2765         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2766                    net->ipv6.rt6_stats->fib_nodes,
2767                    net->ipv6.rt6_stats->fib_route_nodes,
2768                    net->ipv6.rt6_stats->fib_rt_alloc,
2769                    net->ipv6.rt6_stats->fib_rt_entries,
2770                    net->ipv6.rt6_stats->fib_rt_cache,
2771                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2772                    net->ipv6.rt6_stats->fib_discarded_routes);
2773
2774         return 0;
2775 }
2776
2777 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2778 {
2779         return single_open_net(inode, file, rt6_stats_seq_show);
2780 }
2781
2782 static const struct file_operations rt6_stats_seq_fops = {
2783         .owner   = THIS_MODULE,
2784         .open    = rt6_stats_seq_open,
2785         .read    = seq_read,
2786         .llseek  = seq_lseek,
2787         .release = single_release_net,
2788 };
2789 #endif  /* CONFIG_PROC_FS */
2790
2791 #ifdef CONFIG_SYSCTL
2792
2793 static
2794 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2795                               void __user *buffer, size_t *lenp, loff_t *ppos)
2796 {
2797         struct net *net;
2798         int delay;
2799         if (!write)
2800                 return -EINVAL;
2801
2802         net = (struct net *)ctl->extra1;
2803         delay = net->ipv6.sysctl.flush_delay;
2804         proc_dointvec(ctl, write, buffer, lenp, ppos);
2805         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2806         return 0;
2807 }
2808
2809 ctl_table ipv6_route_table_template[] = {
2810         {
2811                 .procname       =       "flush",
2812                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2813                 .maxlen         =       sizeof(int),
2814                 .mode           =       0200,
2815                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2816         },
2817         {
2818                 .procname       =       "gc_thresh",
2819                 .data           =       &ip6_dst_ops_template.gc_thresh,
2820                 .maxlen         =       sizeof(int),
2821                 .mode           =       0644,
2822                 .proc_handler   =       proc_dointvec,
2823         },
2824         {
2825                 .procname       =       "max_size",
2826                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2827                 .maxlen         =       sizeof(int),
2828                 .mode           =       0644,
2829                 .proc_handler   =       proc_dointvec,
2830         },
2831         {
2832                 .procname       =       "gc_min_interval",
2833                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2834                 .maxlen         =       sizeof(int),
2835                 .mode           =       0644,
2836                 .proc_handler   =       proc_dointvec_jiffies,
2837         },
2838         {
2839                 .procname       =       "gc_timeout",
2840                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2841                 .maxlen         =       sizeof(int),
2842                 .mode           =       0644,
2843                 .proc_handler   =       proc_dointvec_jiffies,
2844         },
2845         {
2846                 .procname       =       "gc_interval",
2847                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2848                 .maxlen         =       sizeof(int),
2849                 .mode           =       0644,
2850                 .proc_handler   =       proc_dointvec_jiffies,
2851         },
2852         {
2853                 .procname       =       "gc_elasticity",
2854                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2855                 .maxlen         =       sizeof(int),
2856                 .mode           =       0644,
2857                 .proc_handler   =       proc_dointvec,
2858         },
2859         {
2860                 .procname       =       "mtu_expires",
2861                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2862                 .maxlen         =       sizeof(int),
2863                 .mode           =       0644,
2864                 .proc_handler   =       proc_dointvec_jiffies,
2865         },
2866         {
2867                 .procname       =       "min_adv_mss",
2868                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2869                 .maxlen         =       sizeof(int),
2870                 .mode           =       0644,
2871                 .proc_handler   =       proc_dointvec,
2872         },
2873         {
2874                 .procname       =       "gc_min_interval_ms",
2875                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2876                 .maxlen         =       sizeof(int),
2877                 .mode           =       0644,
2878                 .proc_handler   =       proc_dointvec_ms_jiffies,
2879         },
2880         { }
2881 };
2882
2883 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2884 {
2885         struct ctl_table *table;
2886
2887         table = kmemdup(ipv6_route_table_template,
2888                         sizeof(ipv6_route_table_template),
2889                         GFP_KERNEL);
2890
2891         if (table) {
2892                 table[0].data = &net->ipv6.sysctl.flush_delay;
2893                 table[0].extra1 = net;
2894                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2895                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2896                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2897                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2898                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2899                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2900                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2901                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2902                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2903
2904                 /* Don't export sysctls to unprivileged users */
2905                 if (net->user_ns != &init_user_ns)
2906                         table[0].procname = NULL;
2907         }
2908
2909         return table;
2910 }
2911 #endif
2912
2913 static int __net_init ip6_route_net_init(struct net *net)
2914 {
2915         int ret = -ENOMEM;
2916
2917         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2918                sizeof(net->ipv6.ip6_dst_ops));
2919
2920         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2921                 goto out_ip6_dst_ops;
2922
2923         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2924                                            sizeof(*net->ipv6.ip6_null_entry),
2925                                            GFP_KERNEL);
2926         if (!net->ipv6.ip6_null_entry)
2927                 goto out_ip6_dst_entries;
2928         net->ipv6.ip6_null_entry->dst.path =
2929                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2930         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2931         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2932                          ip6_template_metrics, true);
2933
2934 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2935         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2936                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2937                                                GFP_KERNEL);
2938         if (!net->ipv6.ip6_prohibit_entry)
2939                 goto out_ip6_null_entry;
2940         net->ipv6.ip6_prohibit_entry->dst.path =
2941                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2942         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2943         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2944                          ip6_template_metrics, true);
2945
2946         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2947                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2948                                                GFP_KERNEL);
2949         if (!net->ipv6.ip6_blk_hole_entry)
2950                 goto out_ip6_prohibit_entry;
2951         net->ipv6.ip6_blk_hole_entry->dst.path =
2952                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2953         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2954         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2955                          ip6_template_metrics, true);
2956 #endif
2957
2958         net->ipv6.sysctl.flush_delay = 0;
2959         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2960         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2961         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2962         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2963         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2964         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2965         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2966
2967         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2968
2969         ret = 0;
2970 out:
2971         return ret;
2972
2973 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2974 out_ip6_prohibit_entry:
2975         kfree(net->ipv6.ip6_prohibit_entry);
2976 out_ip6_null_entry:
2977         kfree(net->ipv6.ip6_null_entry);
2978 #endif
2979 out_ip6_dst_entries:
2980         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2981 out_ip6_dst_ops:
2982         goto out;
2983 }
2984
2985 static void __net_exit ip6_route_net_exit(struct net *net)
2986 {
2987         kfree(net->ipv6.ip6_null_entry);
2988 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2989         kfree(net->ipv6.ip6_prohibit_entry);
2990         kfree(net->ipv6.ip6_blk_hole_entry);
2991 #endif
2992         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2993 }
2994
2995 static int __net_init ip6_route_net_init_late(struct net *net)
2996 {
2997 #ifdef CONFIG_PROC_FS
2998         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
2999         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3000 #endif
3001         return 0;
3002 }
3003
3004 static void __net_exit ip6_route_net_exit_late(struct net *net)
3005 {
3006 #ifdef CONFIG_PROC_FS
3007         remove_proc_entry("ipv6_route", net->proc_net);
3008         remove_proc_entry("rt6_stats", net->proc_net);
3009 #endif
3010 }
3011
3012 static struct pernet_operations ip6_route_net_ops = {
3013         .init = ip6_route_net_init,
3014         .exit = ip6_route_net_exit,
3015 };
3016
3017 static int __net_init ipv6_inetpeer_init(struct net *net)
3018 {
3019         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3020
3021         if (!bp)
3022                 return -ENOMEM;
3023         inet_peer_base_init(bp);
3024         net->ipv6.peers = bp;
3025         return 0;
3026 }
3027
3028 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3029 {
3030         struct inet_peer_base *bp = net->ipv6.peers;
3031
3032         net->ipv6.peers = NULL;
3033         inetpeer_invalidate_tree(bp);
3034         kfree(bp);
3035 }
3036
3037 static struct pernet_operations ipv6_inetpeer_ops = {
3038         .init   =       ipv6_inetpeer_init,
3039         .exit   =       ipv6_inetpeer_exit,
3040 };
3041
3042 static struct pernet_operations ip6_route_net_late_ops = {
3043         .init = ip6_route_net_init_late,
3044         .exit = ip6_route_net_exit_late,
3045 };
3046
3047 static struct notifier_block ip6_route_dev_notifier = {
3048         .notifier_call = ip6_route_dev_notify,
3049         .priority = 0,
3050 };
3051
3052 int __init ip6_route_init(void)
3053 {
3054         int ret;
3055
3056         ret = -ENOMEM;
3057         ip6_dst_ops_template.kmem_cachep =
3058                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3059                                   SLAB_HWCACHE_ALIGN, NULL);
3060         if (!ip6_dst_ops_template.kmem_cachep)
3061                 goto out;
3062
3063         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3064         if (ret)
3065                 goto out_kmem_cache;
3066
3067         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3068         if (ret)
3069                 goto out_dst_entries;
3070
3071         ret = register_pernet_subsys(&ip6_route_net_ops);
3072         if (ret)
3073                 goto out_register_inetpeer;
3074
3075         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3076
3077         /* Registering of the loopback is done before this portion of code,
3078          * the loopback reference in rt6_info will not be taken, do it
3079          * manually for init_net */
3080         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3081         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3082   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3083         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3084         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3085         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3086         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3087   #endif
3088         ret = fib6_init();
3089         if (ret)
3090                 goto out_register_subsys;
3091
3092         ret = xfrm6_init();
3093         if (ret)
3094                 goto out_fib6_init;
3095
3096         ret = fib6_rules_init();
3097         if (ret)
3098                 goto xfrm6_init;
3099
3100         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3101         if (ret)
3102                 goto fib6_rules_init;
3103
3104         ret = -ENOBUFS;
3105         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3106             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3107             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3108                 goto out_register_late_subsys;
3109
3110         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3111         if (ret)
3112                 goto out_register_late_subsys;
3113
3114 out:
3115         return ret;
3116
3117 out_register_late_subsys:
3118         unregister_pernet_subsys(&ip6_route_net_late_ops);
3119 fib6_rules_init:
3120         fib6_rules_cleanup();
3121 xfrm6_init:
3122         xfrm6_fini();
3123 out_fib6_init:
3124         fib6_gc_cleanup();
3125 out_register_subsys:
3126         unregister_pernet_subsys(&ip6_route_net_ops);
3127 out_register_inetpeer:
3128         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3129 out_dst_entries:
3130         dst_entries_destroy(&ip6_dst_blackhole_ops);
3131 out_kmem_cache:
3132         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3133         goto out;
3134 }
3135
3136 void ip6_route_cleanup(void)
3137 {
3138         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3139         unregister_pernet_subsys(&ip6_route_net_late_ops);
3140         fib6_rules_cleanup();
3141         xfrm6_fini();
3142         fib6_gc_cleanup();
3143         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3144         unregister_pernet_subsys(&ip6_route_net_ops);
3145         dst_entries_destroy(&ip6_dst_blackhole_ops);
3146         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3147 }