Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-block.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     !(t->dev->flags & IFF_UP))
177                         continue;
178
179                 if (!ip_tunnel_key_match(&t->parms, flags, key))
180                         continue;
181
182                 if (t->parms.link == link)
183                         return t;
184                 else if (!cand)
185                         cand = t;
186         }
187
188         hash = ip_tunnel_hash(key, 0);
189         head = &itn->tunnels[hash];
190
191         hlist_for_each_entry_rcu(t, head, hash_node) {
192                 if ((local != t->parms.iph.saddr &&
193                      (local != t->parms.iph.daddr ||
194                       !ipv4_is_multicast(local))) ||
195                     !(t->dev->flags & IFF_UP))
196                         continue;
197
198                 if (!ip_tunnel_key_match(&t->parms, flags, key))
199                         continue;
200
201                 if (t->parms.link == link)
202                         return t;
203                 else if (!cand)
204                         cand = t;
205         }
206
207         if (flags & TUNNEL_NO_KEY)
208                 goto skip_key_lookup;
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (t->parms.i_key != key ||
212                     !(t->dev->flags & IFF_UP))
213                         continue;
214
215                 if (t->parms.link == link)
216                         return t;
217                 else if (!cand)
218                         cand = t;
219         }
220
221 skip_key_lookup:
222         if (cand)
223                 return cand;
224
225         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226                 return netdev_priv(itn->fb_tunnel_dev);
227
228
229         return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234                                     struct ip_tunnel_parm *parms)
235 {
236         unsigned int h;
237         __be32 remote;
238         __be32 i_key = parms->i_key;
239
240         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
241                 remote = parms->iph.daddr;
242         else
243                 remote = 0;
244
245         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
246                 i_key = 0;
247
248         h = ip_tunnel_hash(i_key, remote);
249         return &itn->tunnels[h];
250 }
251
252 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
253 {
254         struct hlist_head *head = ip_bucket(itn, &t->parms);
255
256         hlist_add_head_rcu(&t->hash_node, head);
257 }
258
259 static void ip_tunnel_del(struct ip_tunnel *t)
260 {
261         hlist_del_init_rcu(&t->hash_node);
262 }
263
264 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
265                                         struct ip_tunnel_parm *parms,
266                                         int type)
267 {
268         __be32 remote = parms->iph.daddr;
269         __be32 local = parms->iph.saddr;
270         __be32 key = parms->i_key;
271         int link = parms->link;
272         struct ip_tunnel *t = NULL;
273         struct hlist_head *head = ip_bucket(itn, parms);
274
275         hlist_for_each_entry_rcu(t, head, hash_node) {
276                 if (local == t->parms.iph.saddr &&
277                     remote == t->parms.iph.daddr &&
278                     key == t->parms.i_key &&
279                     link == t->parms.link &&
280                     type == t->dev->type)
281                         break;
282         }
283         return t;
284 }
285
286 static struct net_device *__ip_tunnel_create(struct net *net,
287                                              const struct rtnl_link_ops *ops,
288                                              struct ip_tunnel_parm *parms)
289 {
290         int err;
291         struct ip_tunnel *tunnel;
292         struct net_device *dev;
293         char name[IFNAMSIZ];
294
295         if (parms->name[0])
296                 strlcpy(name, parms->name, IFNAMSIZ);
297         else {
298                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
299                         err = -E2BIG;
300                         goto failed;
301                 }
302                 strlcpy(name, ops->kind, IFNAMSIZ);
303                 strncat(name, "%d", 2);
304         }
305
306         ASSERT_RTNL();
307         dev = alloc_netdev(ops->priv_size, name, ops->setup);
308         if (!dev) {
309                 err = -ENOMEM;
310                 goto failed;
311         }
312         dev_net_set(dev, net);
313
314         dev->rtnl_link_ops = ops;
315
316         tunnel = netdev_priv(dev);
317         tunnel->parms = *parms;
318         tunnel->net = net;
319
320         err = register_netdevice(dev);
321         if (err)
322                 goto failed_free;
323
324         return dev;
325
326 failed_free:
327         free_netdev(dev);
328 failed:
329         return ERR_PTR(err);
330 }
331
332 static inline void init_tunnel_flow(struct flowi4 *fl4,
333                                     int proto,
334                                     __be32 daddr, __be32 saddr,
335                                     __be32 key, __u8 tos, int oif)
336 {
337         memset(fl4, 0, sizeof(*fl4));
338         fl4->flowi4_oif = oif;
339         fl4->daddr = daddr;
340         fl4->saddr = saddr;
341         fl4->flowi4_tos = tos;
342         fl4->flowi4_proto = proto;
343         fl4->fl4_gre_key = key;
344 }
345
346 static int ip_tunnel_bind_dev(struct net_device *dev)
347 {
348         struct net_device *tdev = NULL;
349         struct ip_tunnel *tunnel = netdev_priv(dev);
350         const struct iphdr *iph;
351         int hlen = LL_MAX_HEADER;
352         int mtu = ETH_DATA_LEN;
353         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
354
355         iph = &tunnel->parms.iph;
356
357         /* Guess output device to choose reasonable mtu and needed_headroom */
358         if (iph->daddr) {
359                 struct flowi4 fl4;
360                 struct rtable *rt;
361
362                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
363                                  iph->saddr, tunnel->parms.o_key,
364                                  RT_TOS(iph->tos), tunnel->parms.link);
365                 rt = ip_route_output_key(tunnel->net, &fl4);
366
367                 if (!IS_ERR(rt)) {
368                         tdev = rt->dst.dev;
369                         tunnel_dst_set(tunnel, &rt->dst);
370                         ip_rt_put(rt);
371                 }
372                 if (dev->type != ARPHRD_ETHER)
373                         dev->flags |= IFF_POINTOPOINT;
374         }
375
376         if (!tdev && tunnel->parms.link)
377                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
378
379         if (tdev) {
380                 hlen = tdev->hard_header_len + tdev->needed_headroom;
381                 mtu = tdev->mtu;
382         }
383         dev->iflink = tunnel->parms.link;
384
385         dev->needed_headroom = t_hlen + hlen;
386         mtu -= (dev->hard_header_len + t_hlen);
387
388         if (mtu < 68)
389                 mtu = 68;
390
391         return mtu;
392 }
393
394 static struct ip_tunnel *ip_tunnel_create(struct net *net,
395                                           struct ip_tunnel_net *itn,
396                                           struct ip_tunnel_parm *parms)
397 {
398         struct ip_tunnel *nt;
399         struct net_device *dev;
400
401         BUG_ON(!itn->fb_tunnel_dev);
402         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
403         if (IS_ERR(dev))
404                 return ERR_CAST(dev);
405
406         dev->mtu = ip_tunnel_bind_dev(dev);
407
408         nt = netdev_priv(dev);
409         ip_tunnel_add(itn, nt);
410         return nt;
411 }
412
413 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
414                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
415 {
416         struct pcpu_sw_netstats *tstats;
417         const struct iphdr *iph = ip_hdr(skb);
418         int err;
419
420 #ifdef CONFIG_NET_IPGRE_BROADCAST
421         if (ipv4_is_multicast(iph->daddr)) {
422                 tunnel->dev->stats.multicast++;
423                 skb->pkt_type = PACKET_BROADCAST;
424         }
425 #endif
426
427         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
428              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
429                 tunnel->dev->stats.rx_crc_errors++;
430                 tunnel->dev->stats.rx_errors++;
431                 goto drop;
432         }
433
434         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
435                 if (!(tpi->flags&TUNNEL_SEQ) ||
436                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
437                         tunnel->dev->stats.rx_fifo_errors++;
438                         tunnel->dev->stats.rx_errors++;
439                         goto drop;
440                 }
441                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
442         }
443
444         skb_reset_network_header(skb);
445
446         err = IP_ECN_decapsulate(iph, skb);
447         if (unlikely(err)) {
448                 if (log_ecn_error)
449                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
450                                         &iph->saddr, iph->tos);
451                 if (err > 1) {
452                         ++tunnel->dev->stats.rx_frame_errors;
453                         ++tunnel->dev->stats.rx_errors;
454                         goto drop;
455                 }
456         }
457
458         tstats = this_cpu_ptr(tunnel->dev->tstats);
459         u64_stats_update_begin(&tstats->syncp);
460         tstats->rx_packets++;
461         tstats->rx_bytes += skb->len;
462         u64_stats_update_end(&tstats->syncp);
463
464         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
465
466         if (tunnel->dev->type == ARPHRD_ETHER) {
467                 skb->protocol = eth_type_trans(skb, tunnel->dev);
468                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
469         } else {
470                 skb->dev = tunnel->dev;
471         }
472
473         gro_cells_receive(&tunnel->gro_cells, skb);
474         return 0;
475
476 drop:
477         kfree_skb(skb);
478         return 0;
479 }
480 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
481
482 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
483                             struct rtable *rt, __be16 df)
484 {
485         struct ip_tunnel *tunnel = netdev_priv(dev);
486         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
487         int mtu;
488
489         if (df)
490                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
491                                         - sizeof(struct iphdr) - tunnel->hlen;
492         else
493                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
494
495         if (skb_dst(skb))
496                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
497
498         if (skb->protocol == htons(ETH_P_IP)) {
499                 if (!skb_is_gso(skb) &&
500                     (df & htons(IP_DF)) && mtu < pkt_size) {
501                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
502                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
503                         return -E2BIG;
504                 }
505         }
506 #if IS_ENABLED(CONFIG_IPV6)
507         else if (skb->protocol == htons(ETH_P_IPV6)) {
508                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
509
510                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
511                            mtu >= IPV6_MIN_MTU) {
512                         if ((tunnel->parms.iph.daddr &&
513                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
514                             rt6->rt6i_dst.plen == 128) {
515                                 rt6->rt6i_flags |= RTF_MODIFIED;
516                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
517                         }
518                 }
519
520                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
521                                         mtu < pkt_size) {
522                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
523                         return -E2BIG;
524                 }
525         }
526 #endif
527         return 0;
528 }
529
530 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
531                     const struct iphdr *tnl_params, const u8 protocol)
532 {
533         struct ip_tunnel *tunnel = netdev_priv(dev);
534         const struct iphdr *inner_iph;
535         struct flowi4 fl4;
536         u8     tos, ttl;
537         __be16 df;
538         struct rtable *rt;              /* Route to the other host */
539         unsigned int max_headroom;      /* The extra header space needed */
540         __be32 dst;
541         int err;
542         bool connected;
543
544         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
545         connected = (tunnel->parms.iph.daddr != 0);
546
547         dst = tnl_params->daddr;
548         if (dst == 0) {
549                 /* NBMA tunnel */
550
551                 if (skb_dst(skb) == NULL) {
552                         dev->stats.tx_fifo_errors++;
553                         goto tx_error;
554                 }
555
556                 if (skb->protocol == htons(ETH_P_IP)) {
557                         rt = skb_rtable(skb);
558                         dst = rt_nexthop(rt, inner_iph->daddr);
559                 }
560 #if IS_ENABLED(CONFIG_IPV6)
561                 else if (skb->protocol == htons(ETH_P_IPV6)) {
562                         const struct in6_addr *addr6;
563                         struct neighbour *neigh;
564                         bool do_tx_error_icmp;
565                         int addr_type;
566
567                         neigh = dst_neigh_lookup(skb_dst(skb),
568                                                  &ipv6_hdr(skb)->daddr);
569                         if (neigh == NULL)
570                                 goto tx_error;
571
572                         addr6 = (const struct in6_addr *)&neigh->primary_key;
573                         addr_type = ipv6_addr_type(addr6);
574
575                         if (addr_type == IPV6_ADDR_ANY) {
576                                 addr6 = &ipv6_hdr(skb)->daddr;
577                                 addr_type = ipv6_addr_type(addr6);
578                         }
579
580                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
581                                 do_tx_error_icmp = true;
582                         else {
583                                 do_tx_error_icmp = false;
584                                 dst = addr6->s6_addr32[3];
585                         }
586                         neigh_release(neigh);
587                         if (do_tx_error_icmp)
588                                 goto tx_error_icmp;
589                 }
590 #endif
591                 else
592                         goto tx_error;
593
594                 connected = false;
595         }
596
597         tos = tnl_params->tos;
598         if (tos & 0x1) {
599                 tos &= ~0x1;
600                 if (skb->protocol == htons(ETH_P_IP)) {
601                         tos = inner_iph->tos;
602                         connected = false;
603                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
604                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
605                         connected = false;
606                 }
607         }
608
609         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
610                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
611
612         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
613
614         if (!rt) {
615                 rt = ip_route_output_key(tunnel->net, &fl4);
616
617                 if (IS_ERR(rt)) {
618                         dev->stats.tx_carrier_errors++;
619                         goto tx_error;
620                 }
621                 if (connected)
622                         tunnel_dst_set(tunnel, &rt->dst);
623         }
624
625         if (rt->dst.dev == dev) {
626                 ip_rt_put(rt);
627                 dev->stats.collisions++;
628                 goto tx_error;
629         }
630
631         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
632                 ip_rt_put(rt);
633                 goto tx_error;
634         }
635
636         if (tunnel->err_count > 0) {
637                 if (time_before(jiffies,
638                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
639                         tunnel->err_count--;
640
641                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
642                         dst_link_failure(skb);
643                 } else
644                         tunnel->err_count = 0;
645         }
646
647         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
648         ttl = tnl_params->ttl;
649         if (ttl == 0) {
650                 if (skb->protocol == htons(ETH_P_IP))
651                         ttl = inner_iph->ttl;
652 #if IS_ENABLED(CONFIG_IPV6)
653                 else if (skb->protocol == htons(ETH_P_IPV6))
654                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
655 #endif
656                 else
657                         ttl = ip4_dst_hoplimit(&rt->dst);
658         }
659
660         df = tnl_params->frag_off;
661         if (skb->protocol == htons(ETH_P_IP))
662                 df |= (inner_iph->frag_off&htons(IP_DF));
663
664         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
665                         + rt->dst.header_len;
666         if (max_headroom > dev->needed_headroom)
667                 dev->needed_headroom = max_headroom;
668
669         if (skb_cow_head(skb, dev->needed_headroom)) {
670                 dev->stats.tx_dropped++;
671                 kfree_skb(skb);
672                 return;
673         }
674
675         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
676                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
677         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
678
679         return;
680
681 #if IS_ENABLED(CONFIG_IPV6)
682 tx_error_icmp:
683         dst_link_failure(skb);
684 #endif
685 tx_error:
686         dev->stats.tx_errors++;
687         kfree_skb(skb);
688 }
689 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
690
691 static void ip_tunnel_update(struct ip_tunnel_net *itn,
692                              struct ip_tunnel *t,
693                              struct net_device *dev,
694                              struct ip_tunnel_parm *p,
695                              bool set_mtu)
696 {
697         ip_tunnel_del(t);
698         t->parms.iph.saddr = p->iph.saddr;
699         t->parms.iph.daddr = p->iph.daddr;
700         t->parms.i_key = p->i_key;
701         t->parms.o_key = p->o_key;
702         if (dev->type != ARPHRD_ETHER) {
703                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
704                 memcpy(dev->broadcast, &p->iph.daddr, 4);
705         }
706         ip_tunnel_add(itn, t);
707
708         t->parms.iph.ttl = p->iph.ttl;
709         t->parms.iph.tos = p->iph.tos;
710         t->parms.iph.frag_off = p->iph.frag_off;
711
712         if (t->parms.link != p->link) {
713                 int mtu;
714
715                 t->parms.link = p->link;
716                 mtu = ip_tunnel_bind_dev(dev);
717                 if (set_mtu)
718                         dev->mtu = mtu;
719         }
720         ip_tunnel_dst_reset_all(t);
721         netdev_state_change(dev);
722 }
723
724 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
725 {
726         int err = 0;
727         struct ip_tunnel *t = netdev_priv(dev);
728         struct net *net = t->net;
729         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
730
731         BUG_ON(!itn->fb_tunnel_dev);
732         switch (cmd) {
733         case SIOCGETTUNNEL:
734                 if (dev == itn->fb_tunnel_dev) {
735                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
736                         if (t == NULL)
737                                 t = netdev_priv(dev);
738                 }
739                 memcpy(p, &t->parms, sizeof(*p));
740                 break;
741
742         case SIOCADDTUNNEL:
743         case SIOCCHGTUNNEL:
744                 err = -EPERM;
745                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
746                         goto done;
747                 if (p->iph.ttl)
748                         p->iph.frag_off |= htons(IP_DF);
749                 if (!(p->i_flags&TUNNEL_KEY))
750                         p->i_key = 0;
751                 if (!(p->o_flags&TUNNEL_KEY))
752                         p->o_key = 0;
753
754                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
755
756                 if (!t && (cmd == SIOCADDTUNNEL)) {
757                         t = ip_tunnel_create(net, itn, p);
758                         err = PTR_ERR_OR_ZERO(t);
759                         break;
760                 }
761                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762                         if (t != NULL) {
763                                 if (t->dev != dev) {
764                                         err = -EEXIST;
765                                         break;
766                                 }
767                         } else {
768                                 unsigned int nflags = 0;
769
770                                 if (ipv4_is_multicast(p->iph.daddr))
771                                         nflags = IFF_BROADCAST;
772                                 else if (p->iph.daddr)
773                                         nflags = IFF_POINTOPOINT;
774
775                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776                                         err = -EINVAL;
777                                         break;
778                                 }
779
780                                 t = netdev_priv(dev);
781                         }
782                 }
783
784                 if (t) {
785                         err = 0;
786                         ip_tunnel_update(itn, t, dev, p, true);
787                 } else {
788                         err = -ENOENT;
789                 }
790                 break;
791
792         case SIOCDELTUNNEL:
793                 err = -EPERM;
794                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
795                         goto done;
796
797                 if (dev == itn->fb_tunnel_dev) {
798                         err = -ENOENT;
799                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
800                         if (t == NULL)
801                                 goto done;
802                         err = -EPERM;
803                         if (t == netdev_priv(itn->fb_tunnel_dev))
804                                 goto done;
805                         dev = t->dev;
806                 }
807                 unregister_netdevice(dev);
808                 err = 0;
809                 break;
810
811         default:
812                 err = -EINVAL;
813         }
814
815 done:
816         return err;
817 }
818 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
819
820 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
821 {
822         struct ip_tunnel *tunnel = netdev_priv(dev);
823         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
824
825         if (new_mtu < 68 ||
826             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
827                 return -EINVAL;
828         dev->mtu = new_mtu;
829         return 0;
830 }
831 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
832
833 static void ip_tunnel_dev_free(struct net_device *dev)
834 {
835         struct ip_tunnel *tunnel = netdev_priv(dev);
836
837         gro_cells_destroy(&tunnel->gro_cells);
838         free_percpu(tunnel->dst_cache);
839         free_percpu(dev->tstats);
840         free_netdev(dev);
841 }
842
843 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
844 {
845         struct ip_tunnel *tunnel = netdev_priv(dev);
846         struct ip_tunnel_net *itn;
847
848         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
849
850         if (itn->fb_tunnel_dev != dev) {
851                 ip_tunnel_del(netdev_priv(dev));
852                 unregister_netdevice_queue(dev, head);
853         }
854 }
855 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
856
857 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
858                                   struct rtnl_link_ops *ops, char *devname)
859 {
860         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
861         struct ip_tunnel_parm parms;
862         unsigned int i;
863
864         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
865                 INIT_HLIST_HEAD(&itn->tunnels[i]);
866
867         if (!ops) {
868                 itn->fb_tunnel_dev = NULL;
869                 return 0;
870         }
871
872         memset(&parms, 0, sizeof(parms));
873         if (devname)
874                 strlcpy(parms.name, devname, IFNAMSIZ);
875
876         rtnl_lock();
877         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
878         /* FB netdevice is special: we have one, and only one per netns.
879          * Allowing to move it to another netns is clearly unsafe.
880          */
881         if (!IS_ERR(itn->fb_tunnel_dev)) {
882                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
883                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
884                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
885         }
886         rtnl_unlock();
887
888         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
889 }
890 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
891
892 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
893                               struct rtnl_link_ops *ops)
894 {
895         struct net *net = dev_net(itn->fb_tunnel_dev);
896         struct net_device *dev, *aux;
897         int h;
898
899         for_each_netdev_safe(net, dev, aux)
900                 if (dev->rtnl_link_ops == ops)
901                         unregister_netdevice_queue(dev, head);
902
903         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
904                 struct ip_tunnel *t;
905                 struct hlist_node *n;
906                 struct hlist_head *thead = &itn->tunnels[h];
907
908                 hlist_for_each_entry_safe(t, n, thead, hash_node)
909                         /* If dev is in the same netns, it has already
910                          * been added to the list by the previous loop.
911                          */
912                         if (!net_eq(dev_net(t->dev), net))
913                                 unregister_netdevice_queue(t->dev, head);
914         }
915 }
916
917 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
918 {
919         LIST_HEAD(list);
920
921         rtnl_lock();
922         ip_tunnel_destroy(itn, &list, ops);
923         unregister_netdevice_many(&list);
924         rtnl_unlock();
925 }
926 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
927
928 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
929                       struct ip_tunnel_parm *p)
930 {
931         struct ip_tunnel *nt;
932         struct net *net = dev_net(dev);
933         struct ip_tunnel_net *itn;
934         int mtu;
935         int err;
936
937         nt = netdev_priv(dev);
938         itn = net_generic(net, nt->ip_tnl_net_id);
939
940         if (ip_tunnel_find(itn, p, dev->type))
941                 return -EEXIST;
942
943         nt->net = net;
944         nt->parms = *p;
945         err = register_netdevice(dev);
946         if (err)
947                 goto out;
948
949         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
950                 eth_hw_addr_random(dev);
951
952         mtu = ip_tunnel_bind_dev(dev);
953         if (!tb[IFLA_MTU])
954                 dev->mtu = mtu;
955
956         ip_tunnel_add(itn, nt);
957
958 out:
959         return err;
960 }
961 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
962
963 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
964                          struct ip_tunnel_parm *p)
965 {
966         struct ip_tunnel *t;
967         struct ip_tunnel *tunnel = netdev_priv(dev);
968         struct net *net = tunnel->net;
969         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
970
971         if (dev == itn->fb_tunnel_dev)
972                 return -EINVAL;
973
974         t = ip_tunnel_find(itn, p, dev->type);
975
976         if (t) {
977                 if (t->dev != dev)
978                         return -EEXIST;
979         } else {
980                 t = tunnel;
981
982                 if (dev->type != ARPHRD_ETHER) {
983                         unsigned int nflags = 0;
984
985                         if (ipv4_is_multicast(p->iph.daddr))
986                                 nflags = IFF_BROADCAST;
987                         else if (p->iph.daddr)
988                                 nflags = IFF_POINTOPOINT;
989
990                         if ((dev->flags ^ nflags) &
991                             (IFF_POINTOPOINT | IFF_BROADCAST))
992                                 return -EINVAL;
993                 }
994         }
995
996         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
997         return 0;
998 }
999 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1000
1001 int ip_tunnel_init(struct net_device *dev)
1002 {
1003         struct ip_tunnel *tunnel = netdev_priv(dev);
1004         struct iphdr *iph = &tunnel->parms.iph;
1005         int err;
1006
1007         dev->destructor = ip_tunnel_dev_free;
1008         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1009         if (!dev->tstats)
1010                 return -ENOMEM;
1011
1012         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1013         if (!tunnel->dst_cache) {
1014                 free_percpu(dev->tstats);
1015                 return -ENOMEM;
1016         }
1017
1018         err = gro_cells_init(&tunnel->gro_cells, dev);
1019         if (err) {
1020                 free_percpu(tunnel->dst_cache);
1021                 free_percpu(dev->tstats);
1022                 return err;
1023         }
1024
1025         tunnel->dev = dev;
1026         tunnel->net = dev_net(dev);
1027         strcpy(tunnel->parms.name, dev->name);
1028         iph->version            = 4;
1029         iph->ihl                = 5;
1030
1031         return 0;
1032 }
1033 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1034
1035 void ip_tunnel_uninit(struct net_device *dev)
1036 {
1037         struct ip_tunnel *tunnel = netdev_priv(dev);
1038         struct net *net = tunnel->net;
1039         struct ip_tunnel_net *itn;
1040
1041         itn = net_generic(net, tunnel->ip_tnl_net_id);
1042         /* fb_tunnel_dev will be unregisted in net-exit call. */
1043         if (itn->fb_tunnel_dev != dev)
1044                 ip_tunnel_del(netdev_priv(dev));
1045
1046         ip_tunnel_dst_reset_all(tunnel);
1047 }
1048 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1049
1050 /* Do least required initialization, rest of init is done in tunnel_init call */
1051 void ip_tunnel_setup(struct net_device *dev, int net_id)
1052 {
1053         struct ip_tunnel *tunnel = netdev_priv(dev);
1054         tunnel->ip_tnl_net_id = net_id;
1055 }
1056 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1057
1058 MODULE_LICENSE("GPL");