2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
59 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67 return hash_32((__force u32)key ^ (__force u32)remote,
71 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
72 __be16 flags, __be32 key)
74 if (p->i_flags & TUNNEL_KEY) {
75 if (flags & TUNNEL_KEY)
76 return key == p->i_key;
78 /* key expected, none present */
81 return !(flags & TUNNEL_KEY);
84 /* Fallback tunnel: no source, no destination, no key, no options
87 We require exact key match i.e. if a key is present in packet
88 it will match only tunnel with the same key; if it is not present,
89 it will match only keyless tunnel.
91 All keysless packets, if not matched configured keyless tunnels
92 will match fallback tunnel.
93 Given src, dst and key, find appropriate for input tunnel.
95 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
96 int link, __be16 flags,
97 __be32 remote, __be32 local,
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
104 hash = ip_tunnel_hash(key, remote);
105 head = &itn->tunnels[hash];
107 hlist_for_each_entry_rcu(t, head, hash_node) {
108 if (local != t->parms.iph.saddr ||
109 remote != t->parms.iph.daddr ||
110 !(t->dev->flags & IFF_UP))
113 if (!ip_tunnel_key_match(&t->parms, flags, key))
116 if (t->parms.link == link)
122 hlist_for_each_entry_rcu(t, head, hash_node) {
123 if (remote != t->parms.iph.daddr ||
124 t->parms.iph.saddr != 0 ||
125 !(t->dev->flags & IFF_UP))
128 if (!ip_tunnel_key_match(&t->parms, flags, key))
131 if (t->parms.link == link)
137 hash = ip_tunnel_hash(key, 0);
138 head = &itn->tunnels[hash];
140 hlist_for_each_entry_rcu(t, head, hash_node) {
141 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
142 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
145 if (!(t->dev->flags & IFF_UP))
148 if (!ip_tunnel_key_match(&t->parms, flags, key))
151 if (t->parms.link == link)
157 if (flags & TUNNEL_NO_KEY)
158 goto skip_key_lookup;
160 hlist_for_each_entry_rcu(t, head, hash_node) {
161 if (t->parms.i_key != key ||
162 t->parms.iph.saddr != 0 ||
163 t->parms.iph.daddr != 0 ||
164 !(t->dev->flags & IFF_UP))
167 if (t->parms.link == link)
177 t = rcu_dereference(itn->collect_md_tun);
181 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
182 return netdev_priv(itn->fb_tunnel_dev);
186 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
188 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
189 struct ip_tunnel_parm *parms)
193 __be32 i_key = parms->i_key;
195 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
196 remote = parms->iph.daddr;
200 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
203 h = ip_tunnel_hash(i_key, remote);
204 return &itn->tunnels[h];
207 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
209 struct hlist_head *head = ip_bucket(itn, &t->parms);
212 rcu_assign_pointer(itn->collect_md_tun, t);
213 hlist_add_head_rcu(&t->hash_node, head);
216 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
219 rcu_assign_pointer(itn->collect_md_tun, NULL);
220 hlist_del_init_rcu(&t->hash_node);
223 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
224 struct ip_tunnel_parm *parms,
227 __be32 remote = parms->iph.daddr;
228 __be32 local = parms->iph.saddr;
229 __be32 key = parms->i_key;
230 __be16 flags = parms->i_flags;
231 int link = parms->link;
232 struct ip_tunnel *t = NULL;
233 struct hlist_head *head = ip_bucket(itn, parms);
235 hlist_for_each_entry_rcu(t, head, hash_node) {
236 if (local == t->parms.iph.saddr &&
237 remote == t->parms.iph.daddr &&
238 link == t->parms.link &&
239 type == t->dev->type &&
240 ip_tunnel_key_match(&t->parms, flags, key))
246 static struct net_device *__ip_tunnel_create(struct net *net,
247 const struct rtnl_link_ops *ops,
248 struct ip_tunnel_parm *parms)
251 struct ip_tunnel *tunnel;
252 struct net_device *dev;
256 strlcpy(name, parms->name, IFNAMSIZ);
258 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
262 strlcpy(name, ops->kind, IFNAMSIZ);
263 strncat(name, "%d", 2);
267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
272 dev_net_set(dev, net);
274 dev->rtnl_link_ops = ops;
276 tunnel = netdev_priv(dev);
277 tunnel->parms = *parms;
280 err = register_netdevice(dev);
292 static inline void init_tunnel_flow(struct flowi4 *fl4,
294 __be32 daddr, __be32 saddr,
295 __be32 key, __u8 tos, int oif)
297 memset(fl4, 0, sizeof(*fl4));
298 fl4->flowi4_oif = oif;
301 fl4->flowi4_tos = tos;
302 fl4->flowi4_proto = proto;
303 fl4->fl4_gre_key = key;
306 static int ip_tunnel_bind_dev(struct net_device *dev)
308 struct net_device *tdev = NULL;
309 struct ip_tunnel *tunnel = netdev_priv(dev);
310 const struct iphdr *iph;
311 int hlen = LL_MAX_HEADER;
312 int mtu = ETH_DATA_LEN;
313 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
315 iph = &tunnel->parms.iph;
317 /* Guess output device to choose reasonable mtu and needed_headroom */
322 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
323 iph->saddr, tunnel->parms.o_key,
324 RT_TOS(iph->tos), tunnel->parms.link);
325 rt = ip_route_output_key(tunnel->net, &fl4);
329 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
333 if (dev->type != ARPHRD_ETHER)
334 dev->flags |= IFF_POINTOPOINT;
337 if (!tdev && tunnel->parms.link)
338 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
341 hlen = tdev->hard_header_len + tdev->needed_headroom;
345 dev->needed_headroom = t_hlen + hlen;
346 mtu -= (dev->hard_header_len + t_hlen);
354 static struct ip_tunnel *ip_tunnel_create(struct net *net,
355 struct ip_tunnel_net *itn,
356 struct ip_tunnel_parm *parms)
358 struct ip_tunnel *nt;
359 struct net_device *dev;
361 BUG_ON(!itn->fb_tunnel_dev);
362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
364 return ERR_CAST(dev);
366 dev->mtu = ip_tunnel_bind_dev(dev);
368 nt = netdev_priv(dev);
369 ip_tunnel_add(itn, nt);
373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377 struct pcpu_sw_netstats *tstats;
378 const struct iphdr *iph = ip_hdr(skb);
381 #ifdef CONFIG_NET_IPGRE_BROADCAST
382 if (ipv4_is_multicast(iph->daddr)) {
383 tunnel->dev->stats.multicast++;
384 skb->pkt_type = PACKET_BROADCAST;
388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390 tunnel->dev->stats.rx_crc_errors++;
391 tunnel->dev->stats.rx_errors++;
395 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396 if (!(tpi->flags&TUNNEL_SEQ) ||
397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398 tunnel->dev->stats.rx_fifo_errors++;
399 tunnel->dev->stats.rx_errors++;
402 tunnel->i_seqno = ntohl(tpi->seq) + 1;
405 skb_reset_network_header(skb);
407 err = IP_ECN_decapsulate(iph, skb);
410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411 &iph->saddr, iph->tos);
413 ++tunnel->dev->stats.rx_frame_errors;
414 ++tunnel->dev->stats.rx_errors;
419 tstats = this_cpu_ptr(tunnel->dev->tstats);
420 u64_stats_update_begin(&tstats->syncp);
421 tstats->rx_packets++;
422 tstats->rx_bytes += skb->len;
423 u64_stats_update_end(&tstats->syncp);
425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
427 if (tunnel->dev->type == ARPHRD_ETHER) {
428 skb->protocol = eth_type_trans(skb, tunnel->dev);
429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
431 skb->dev = tunnel->dev;
435 skb_dst_set(skb, (struct dst_entry *)tun_dst);
437 gro_cells_receive(&tunnel->gro_cells, skb);
444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
446 static int ip_encap_hlen(struct ip_tunnel_encap *e)
448 const struct ip_tunnel_encap_ops *ops;
451 if (e->type == TUNNEL_ENCAP_NONE)
454 if (e->type >= MAX_IPTUN_ENCAP_OPS)
458 ops = rcu_dereference(iptun_encaps[e->type]);
459 if (likely(ops && ops->encap_hlen))
460 hlen = ops->encap_hlen(e);
466 const struct ip_tunnel_encap_ops __rcu *
467 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
469 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
472 if (num >= MAX_IPTUN_ENCAP_OPS)
475 return !cmpxchg((const struct ip_tunnel_encap_ops **)
479 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
481 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
486 if (num >= MAX_IPTUN_ENCAP_OPS)
489 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
491 ops, NULL) == ops) ? 0 : -1;
497 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
499 int ip_tunnel_encap_setup(struct ip_tunnel *t,
500 struct ip_tunnel_encap *ipencap)
504 memset(&t->encap, 0, sizeof(t->encap));
506 hlen = ip_encap_hlen(ipencap);
510 t->encap.type = ipencap->type;
511 t->encap.sport = ipencap->sport;
512 t->encap.dport = ipencap->dport;
513 t->encap.flags = ipencap->flags;
515 t->encap_hlen = hlen;
516 t->hlen = t->encap_hlen + t->tun_hlen;
520 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
522 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
523 u8 *protocol, struct flowi4 *fl4)
525 const struct ip_tunnel_encap_ops *ops;
528 if (t->encap.type == TUNNEL_ENCAP_NONE)
531 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
535 ops = rcu_dereference(iptun_encaps[t->encap.type]);
536 if (likely(ops && ops->build_header))
537 ret = ops->build_header(skb, &t->encap, protocol, fl4);
542 EXPORT_SYMBOL(ip_tunnel_encap);
544 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
545 struct rtable *rt, __be16 df,
546 const struct iphdr *inner_iph)
548 struct ip_tunnel *tunnel = netdev_priv(dev);
549 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
553 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
554 - sizeof(struct iphdr) - tunnel->hlen;
556 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
559 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
561 if (skb->protocol == htons(ETH_P_IP)) {
562 if (!skb_is_gso(skb) &&
563 (inner_iph->frag_off & htons(IP_DF)) &&
565 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
566 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
570 #if IS_ENABLED(CONFIG_IPV6)
571 else if (skb->protocol == htons(ETH_P_IPV6)) {
572 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
574 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
575 mtu >= IPV6_MIN_MTU) {
576 if ((tunnel->parms.iph.daddr &&
577 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
578 rt6->rt6i_dst.plen == 128) {
579 rt6->rt6i_flags |= RTF_MODIFIED;
580 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
584 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
594 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
595 const struct iphdr *tnl_params, u8 protocol)
597 struct ip_tunnel *tunnel = netdev_priv(dev);
598 const struct iphdr *inner_iph;
602 struct rtable *rt; /* Route to the other host */
603 unsigned int max_headroom; /* The extra header space needed */
607 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
608 connected = (tunnel->parms.iph.daddr != 0);
610 dst = tnl_params->daddr;
615 dev->stats.tx_fifo_errors++;
619 if (skb->protocol == htons(ETH_P_IP)) {
620 rt = skb_rtable(skb);
621 dst = rt_nexthop(rt, inner_iph->daddr);
623 #if IS_ENABLED(CONFIG_IPV6)
624 else if (skb->protocol == htons(ETH_P_IPV6)) {
625 const struct in6_addr *addr6;
626 struct neighbour *neigh;
627 bool do_tx_error_icmp;
630 neigh = dst_neigh_lookup(skb_dst(skb),
631 &ipv6_hdr(skb)->daddr);
635 addr6 = (const struct in6_addr *)&neigh->primary_key;
636 addr_type = ipv6_addr_type(addr6);
638 if (addr_type == IPV6_ADDR_ANY) {
639 addr6 = &ipv6_hdr(skb)->daddr;
640 addr_type = ipv6_addr_type(addr6);
643 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
644 do_tx_error_icmp = true;
646 do_tx_error_icmp = false;
647 dst = addr6->s6_addr32[3];
649 neigh_release(neigh);
650 if (do_tx_error_icmp)
660 tos = tnl_params->tos;
663 if (skb->protocol == htons(ETH_P_IP)) {
664 tos = inner_iph->tos;
666 } else if (skb->protocol == htons(ETH_P_IPV6)) {
667 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
672 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
673 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
675 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
678 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
682 rt = ip_route_output_key(tunnel->net, &fl4);
685 dev->stats.tx_carrier_errors++;
689 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
693 if (rt->dst.dev == dev) {
695 dev->stats.collisions++;
699 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
704 if (tunnel->err_count > 0) {
705 if (time_before(jiffies,
706 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
709 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
710 dst_link_failure(skb);
712 tunnel->err_count = 0;
715 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
716 ttl = tnl_params->ttl;
718 if (skb->protocol == htons(ETH_P_IP))
719 ttl = inner_iph->ttl;
720 #if IS_ENABLED(CONFIG_IPV6)
721 else if (skb->protocol == htons(ETH_P_IPV6))
722 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
725 ttl = ip4_dst_hoplimit(&rt->dst);
728 df = tnl_params->frag_off;
729 if (skb->protocol == htons(ETH_P_IP))
730 df |= (inner_iph->frag_off&htons(IP_DF));
732 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
733 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
734 if (max_headroom > dev->needed_headroom)
735 dev->needed_headroom = max_headroom;
737 if (skb_cow_head(skb, dev->needed_headroom)) {
739 dev->stats.tx_dropped++;
744 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
745 df, !net_eq(tunnel->net, dev_net(dev)));
748 #if IS_ENABLED(CONFIG_IPV6)
750 dst_link_failure(skb);
753 dev->stats.tx_errors++;
756 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
758 static void ip_tunnel_update(struct ip_tunnel_net *itn,
760 struct net_device *dev,
761 struct ip_tunnel_parm *p,
764 ip_tunnel_del(itn, t);
765 t->parms.iph.saddr = p->iph.saddr;
766 t->parms.iph.daddr = p->iph.daddr;
767 t->parms.i_key = p->i_key;
768 t->parms.o_key = p->o_key;
769 if (dev->type != ARPHRD_ETHER) {
770 memcpy(dev->dev_addr, &p->iph.saddr, 4);
771 memcpy(dev->broadcast, &p->iph.daddr, 4);
773 ip_tunnel_add(itn, t);
775 t->parms.iph.ttl = p->iph.ttl;
776 t->parms.iph.tos = p->iph.tos;
777 t->parms.iph.frag_off = p->iph.frag_off;
779 if (t->parms.link != p->link) {
782 t->parms.link = p->link;
783 mtu = ip_tunnel_bind_dev(dev);
787 dst_cache_reset(&t->dst_cache);
788 netdev_state_change(dev);
791 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
794 struct ip_tunnel *t = netdev_priv(dev);
795 struct net *net = t->net;
796 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
798 BUG_ON(!itn->fb_tunnel_dev);
801 if (dev == itn->fb_tunnel_dev) {
802 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
804 t = netdev_priv(dev);
806 memcpy(p, &t->parms, sizeof(*p));
812 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
815 p->iph.frag_off |= htons(IP_DF);
816 if (!(p->i_flags & VTI_ISVTI)) {
817 if (!(p->i_flags & TUNNEL_KEY))
819 if (!(p->o_flags & TUNNEL_KEY))
823 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
825 if (cmd == SIOCADDTUNNEL) {
827 t = ip_tunnel_create(net, itn, p);
828 err = PTR_ERR_OR_ZERO(t);
835 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
842 unsigned int nflags = 0;
844 if (ipv4_is_multicast(p->iph.daddr))
845 nflags = IFF_BROADCAST;
846 else if (p->iph.daddr)
847 nflags = IFF_POINTOPOINT;
849 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
854 t = netdev_priv(dev);
860 ip_tunnel_update(itn, t, dev, p, true);
868 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
871 if (dev == itn->fb_tunnel_dev) {
873 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
877 if (t == netdev_priv(itn->fb_tunnel_dev))
881 unregister_netdevice(dev);
892 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
894 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
896 struct ip_tunnel *tunnel = netdev_priv(dev);
897 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
900 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
905 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
907 static void ip_tunnel_dev_free(struct net_device *dev)
909 struct ip_tunnel *tunnel = netdev_priv(dev);
911 gro_cells_destroy(&tunnel->gro_cells);
912 dst_cache_destroy(&tunnel->dst_cache);
913 free_percpu(dev->tstats);
917 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
919 struct ip_tunnel *tunnel = netdev_priv(dev);
920 struct ip_tunnel_net *itn;
922 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
924 if (itn->fb_tunnel_dev != dev) {
925 ip_tunnel_del(itn, netdev_priv(dev));
926 unregister_netdevice_queue(dev, head);
929 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
931 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
933 struct ip_tunnel *tunnel = netdev_priv(dev);
937 EXPORT_SYMBOL(ip_tunnel_get_link_net);
939 int ip_tunnel_get_iflink(const struct net_device *dev)
941 struct ip_tunnel *tunnel = netdev_priv(dev);
943 return tunnel->parms.link;
945 EXPORT_SYMBOL(ip_tunnel_get_iflink);
947 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
948 struct rtnl_link_ops *ops, char *devname)
950 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
951 struct ip_tunnel_parm parms;
954 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
955 INIT_HLIST_HEAD(&itn->tunnels[i]);
958 itn->fb_tunnel_dev = NULL;
962 memset(&parms, 0, sizeof(parms));
964 strlcpy(parms.name, devname, IFNAMSIZ);
967 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
968 /* FB netdevice is special: we have one, and only one per netns.
969 * Allowing to move it to another netns is clearly unsafe.
971 if (!IS_ERR(itn->fb_tunnel_dev)) {
972 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
973 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
974 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
978 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
980 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
982 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
983 struct rtnl_link_ops *ops)
985 struct net *net = dev_net(itn->fb_tunnel_dev);
986 struct net_device *dev, *aux;
989 for_each_netdev_safe(net, dev, aux)
990 if (dev->rtnl_link_ops == ops)
991 unregister_netdevice_queue(dev, head);
993 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
995 struct hlist_node *n;
996 struct hlist_head *thead = &itn->tunnels[h];
998 hlist_for_each_entry_safe(t, n, thead, hash_node)
999 /* If dev is in the same netns, it has already
1000 * been added to the list by the previous loop.
1002 if (!net_eq(dev_net(t->dev), net))
1003 unregister_netdevice_queue(t->dev, head);
1007 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1012 ip_tunnel_destroy(itn, &list, ops);
1013 unregister_netdevice_many(&list);
1016 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1018 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1019 struct ip_tunnel_parm *p)
1021 struct ip_tunnel *nt;
1022 struct net *net = dev_net(dev);
1023 struct ip_tunnel_net *itn;
1027 nt = netdev_priv(dev);
1028 itn = net_generic(net, nt->ip_tnl_net_id);
1030 if (nt->collect_md) {
1031 if (rtnl_dereference(itn->collect_md_tun))
1034 if (ip_tunnel_find(itn, p, dev->type))
1040 err = register_netdevice(dev);
1044 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1045 eth_hw_addr_random(dev);
1047 mtu = ip_tunnel_bind_dev(dev);
1051 ip_tunnel_add(itn, nt);
1055 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1057 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1058 struct ip_tunnel_parm *p)
1060 struct ip_tunnel *t;
1061 struct ip_tunnel *tunnel = netdev_priv(dev);
1062 struct net *net = tunnel->net;
1063 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1065 if (dev == itn->fb_tunnel_dev)
1068 t = ip_tunnel_find(itn, p, dev->type);
1076 if (dev->type != ARPHRD_ETHER) {
1077 unsigned int nflags = 0;
1079 if (ipv4_is_multicast(p->iph.daddr))
1080 nflags = IFF_BROADCAST;
1081 else if (p->iph.daddr)
1082 nflags = IFF_POINTOPOINT;
1084 if ((dev->flags ^ nflags) &
1085 (IFF_POINTOPOINT | IFF_BROADCAST))
1090 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1093 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1095 int ip_tunnel_init(struct net_device *dev)
1097 struct ip_tunnel *tunnel = netdev_priv(dev);
1098 struct iphdr *iph = &tunnel->parms.iph;
1101 dev->destructor = ip_tunnel_dev_free;
1102 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1106 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1108 free_percpu(dev->tstats);
1112 err = gro_cells_init(&tunnel->gro_cells, dev);
1114 dst_cache_destroy(&tunnel->dst_cache);
1115 free_percpu(dev->tstats);
1120 tunnel->net = dev_net(dev);
1121 strcpy(tunnel->parms.name, dev->name);
1125 if (tunnel->collect_md) {
1126 dev->features |= NETIF_F_NETNS_LOCAL;
1127 netif_keep_dst(dev);
1131 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1133 void ip_tunnel_uninit(struct net_device *dev)
1135 struct ip_tunnel *tunnel = netdev_priv(dev);
1136 struct net *net = tunnel->net;
1137 struct ip_tunnel_net *itn;
1139 itn = net_generic(net, tunnel->ip_tnl_net_id);
1140 /* fb_tunnel_dev will be unregisted in net-exit call. */
1141 if (itn->fb_tunnel_dev != dev)
1142 ip_tunnel_del(itn, netdev_priv(dev));
1144 dst_cache_reset(&tunnel->dst_cache);
1146 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1148 /* Do least required initialization, rest of init is done in tunnel_init call */
1149 void ip_tunnel_setup(struct net_device *dev, int net_id)
1151 struct ip_tunnel *tunnel = netdev_priv(dev);
1152 tunnel->ip_tnl_net_id = net_id;
1154 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1156 MODULE_LICENSE("GPL");