Merge branches 'acpica', 'acpi-tpm' and 'acpi-processor'
[linux-2.6-block.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     !(t->dev->flags & IFF_UP))
177                         continue;
178
179                 if (!ip_tunnel_key_match(&t->parms, flags, key))
180                         continue;
181
182                 if (t->parms.link == link)
183                         return t;
184                 else if (!cand)
185                         cand = t;
186         }
187
188         hash = ip_tunnel_hash(key, 0);
189         head = &itn->tunnels[hash];
190
191         hlist_for_each_entry_rcu(t, head, hash_node) {
192                 if ((local != t->parms.iph.saddr &&
193                      (local != t->parms.iph.daddr ||
194                       !ipv4_is_multicast(local))) ||
195                     !(t->dev->flags & IFF_UP))
196                         continue;
197
198                 if (!ip_tunnel_key_match(&t->parms, flags, key))
199                         continue;
200
201                 if (t->parms.link == link)
202                         return t;
203                 else if (!cand)
204                         cand = t;
205         }
206
207         if (flags & TUNNEL_NO_KEY)
208                 goto skip_key_lookup;
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (t->parms.i_key != key ||
212                     !(t->dev->flags & IFF_UP))
213                         continue;
214
215                 if (t->parms.link == link)
216                         return t;
217                 else if (!cand)
218                         cand = t;
219         }
220
221 skip_key_lookup:
222         if (cand)
223                 return cand;
224
225         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226                 return netdev_priv(itn->fb_tunnel_dev);
227
228
229         return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234                                     struct ip_tunnel_parm *parms)
235 {
236         unsigned int h;
237         __be32 remote;
238         __be32 i_key = parms->i_key;
239
240         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
241                 remote = parms->iph.daddr;
242         else
243                 remote = 0;
244
245         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
246                 i_key = 0;
247
248         h = ip_tunnel_hash(i_key, remote);
249         return &itn->tunnels[h];
250 }
251
252 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
253 {
254         struct hlist_head *head = ip_bucket(itn, &t->parms);
255
256         hlist_add_head_rcu(&t->hash_node, head);
257 }
258
259 static void ip_tunnel_del(struct ip_tunnel *t)
260 {
261         hlist_del_init_rcu(&t->hash_node);
262 }
263
264 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
265                                         struct ip_tunnel_parm *parms,
266                                         int type)
267 {
268         __be32 remote = parms->iph.daddr;
269         __be32 local = parms->iph.saddr;
270         __be32 key = parms->i_key;
271         int link = parms->link;
272         struct ip_tunnel *t = NULL;
273         struct hlist_head *head = ip_bucket(itn, parms);
274
275         hlist_for_each_entry_rcu(t, head, hash_node) {
276                 if (local == t->parms.iph.saddr &&
277                     remote == t->parms.iph.daddr &&
278                     key == t->parms.i_key &&
279                     link == t->parms.link &&
280                     type == t->dev->type)
281                         break;
282         }
283         return t;
284 }
285
286 static struct net_device *__ip_tunnel_create(struct net *net,
287                                              const struct rtnl_link_ops *ops,
288                                              struct ip_tunnel_parm *parms)
289 {
290         int err;
291         struct ip_tunnel *tunnel;
292         struct net_device *dev;
293         char name[IFNAMSIZ];
294
295         if (parms->name[0])
296                 strlcpy(name, parms->name, IFNAMSIZ);
297         else {
298                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
299                         err = -E2BIG;
300                         goto failed;
301                 }
302                 strlcpy(name, ops->kind, IFNAMSIZ);
303                 strncat(name, "%d", 2);
304         }
305
306         ASSERT_RTNL();
307         dev = alloc_netdev(ops->priv_size, name, ops->setup);
308         if (!dev) {
309                 err = -ENOMEM;
310                 goto failed;
311         }
312         dev_net_set(dev, net);
313
314         dev->rtnl_link_ops = ops;
315
316         tunnel = netdev_priv(dev);
317         tunnel->parms = *parms;
318         tunnel->net = net;
319
320         err = register_netdevice(dev);
321         if (err)
322                 goto failed_free;
323
324         return dev;
325
326 failed_free:
327         free_netdev(dev);
328 failed:
329         return ERR_PTR(err);
330 }
331
332 static inline void init_tunnel_flow(struct flowi4 *fl4,
333                                     int proto,
334                                     __be32 daddr, __be32 saddr,
335                                     __be32 key, __u8 tos, int oif)
336 {
337         memset(fl4, 0, sizeof(*fl4));
338         fl4->flowi4_oif = oif;
339         fl4->daddr = daddr;
340         fl4->saddr = saddr;
341         fl4->flowi4_tos = tos;
342         fl4->flowi4_proto = proto;
343         fl4->fl4_gre_key = key;
344 }
345
346 static int ip_tunnel_bind_dev(struct net_device *dev)
347 {
348         struct net_device *tdev = NULL;
349         struct ip_tunnel *tunnel = netdev_priv(dev);
350         const struct iphdr *iph;
351         int hlen = LL_MAX_HEADER;
352         int mtu = ETH_DATA_LEN;
353         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
354
355         iph = &tunnel->parms.iph;
356
357         /* Guess output device to choose reasonable mtu and needed_headroom */
358         if (iph->daddr) {
359                 struct flowi4 fl4;
360                 struct rtable *rt;
361
362                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
363                                  iph->saddr, tunnel->parms.o_key,
364                                  RT_TOS(iph->tos), tunnel->parms.link);
365                 rt = ip_route_output_key(tunnel->net, &fl4);
366
367                 if (!IS_ERR(rt)) {
368                         tdev = rt->dst.dev;
369                         tunnel_dst_set(tunnel, &rt->dst);
370                         ip_rt_put(rt);
371                 }
372                 if (dev->type != ARPHRD_ETHER)
373                         dev->flags |= IFF_POINTOPOINT;
374         }
375
376         if (!tdev && tunnel->parms.link)
377                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
378
379         if (tdev) {
380                 hlen = tdev->hard_header_len + tdev->needed_headroom;
381                 mtu = tdev->mtu;
382         }
383         dev->iflink = tunnel->parms.link;
384
385         dev->needed_headroom = t_hlen + hlen;
386         mtu -= (dev->hard_header_len + t_hlen);
387
388         if (mtu < 68)
389                 mtu = 68;
390
391         return mtu;
392 }
393
394 static struct ip_tunnel *ip_tunnel_create(struct net *net,
395                                           struct ip_tunnel_net *itn,
396                                           struct ip_tunnel_parm *parms)
397 {
398         struct ip_tunnel *nt, *fbt;
399         struct net_device *dev;
400
401         BUG_ON(!itn->fb_tunnel_dev);
402         fbt = netdev_priv(itn->fb_tunnel_dev);
403         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
404         if (IS_ERR(dev))
405                 return ERR_CAST(dev);
406
407         dev->mtu = ip_tunnel_bind_dev(dev);
408
409         nt = netdev_priv(dev);
410         ip_tunnel_add(itn, nt);
411         return nt;
412 }
413
414 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
415                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
416 {
417         struct pcpu_sw_netstats *tstats;
418         const struct iphdr *iph = ip_hdr(skb);
419         int err;
420
421 #ifdef CONFIG_NET_IPGRE_BROADCAST
422         if (ipv4_is_multicast(iph->daddr)) {
423                 tunnel->dev->stats.multicast++;
424                 skb->pkt_type = PACKET_BROADCAST;
425         }
426 #endif
427
428         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430                 tunnel->dev->stats.rx_crc_errors++;
431                 tunnel->dev->stats.rx_errors++;
432                 goto drop;
433         }
434
435         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436                 if (!(tpi->flags&TUNNEL_SEQ) ||
437                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438                         tunnel->dev->stats.rx_fifo_errors++;
439                         tunnel->dev->stats.rx_errors++;
440                         goto drop;
441                 }
442                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443         }
444
445         skb_reset_network_header(skb);
446
447         err = IP_ECN_decapsulate(iph, skb);
448         if (unlikely(err)) {
449                 if (log_ecn_error)
450                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
451                                         &iph->saddr, iph->tos);
452                 if (err > 1) {
453                         ++tunnel->dev->stats.rx_frame_errors;
454                         ++tunnel->dev->stats.rx_errors;
455                         goto drop;
456                 }
457         }
458
459         tstats = this_cpu_ptr(tunnel->dev->tstats);
460         u64_stats_update_begin(&tstats->syncp);
461         tstats->rx_packets++;
462         tstats->rx_bytes += skb->len;
463         u64_stats_update_end(&tstats->syncp);
464
465         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
466
467         if (tunnel->dev->type == ARPHRD_ETHER) {
468                 skb->protocol = eth_type_trans(skb, tunnel->dev);
469                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
470         } else {
471                 skb->dev = tunnel->dev;
472         }
473
474         gro_cells_receive(&tunnel->gro_cells, skb);
475         return 0;
476
477 drop:
478         kfree_skb(skb);
479         return 0;
480 }
481 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
482
483 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
484                             struct rtable *rt, __be16 df)
485 {
486         struct ip_tunnel *tunnel = netdev_priv(dev);
487         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
488         int mtu;
489
490         if (df)
491                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
492                                         - sizeof(struct iphdr) - tunnel->hlen;
493         else
494                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
495
496         if (skb_dst(skb))
497                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
498
499         if (skb->protocol == htons(ETH_P_IP)) {
500                 if (!skb_is_gso(skb) &&
501                     (df & htons(IP_DF)) && mtu < pkt_size) {
502                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
503                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
504                         return -E2BIG;
505                 }
506         }
507 #if IS_ENABLED(CONFIG_IPV6)
508         else if (skb->protocol == htons(ETH_P_IPV6)) {
509                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
510
511                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
512                            mtu >= IPV6_MIN_MTU) {
513                         if ((tunnel->parms.iph.daddr &&
514                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
515                             rt6->rt6i_dst.plen == 128) {
516                                 rt6->rt6i_flags |= RTF_MODIFIED;
517                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
518                         }
519                 }
520
521                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
522                                         mtu < pkt_size) {
523                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
524                         return -E2BIG;
525                 }
526         }
527 #endif
528         return 0;
529 }
530
531 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
532                     const struct iphdr *tnl_params, const u8 protocol)
533 {
534         struct ip_tunnel *tunnel = netdev_priv(dev);
535         const struct iphdr *inner_iph;
536         struct flowi4 fl4;
537         u8     tos, ttl;
538         __be16 df;
539         struct rtable *rt;              /* Route to the other host */
540         unsigned int max_headroom;      /* The extra header space needed */
541         __be32 dst;
542         int err;
543         bool connected = true;
544
545         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
546
547         dst = tnl_params->daddr;
548         if (dst == 0) {
549                 /* NBMA tunnel */
550
551                 if (skb_dst(skb) == NULL) {
552                         dev->stats.tx_fifo_errors++;
553                         goto tx_error;
554                 }
555
556                 if (skb->protocol == htons(ETH_P_IP)) {
557                         rt = skb_rtable(skb);
558                         dst = rt_nexthop(rt, inner_iph->daddr);
559                 }
560 #if IS_ENABLED(CONFIG_IPV6)
561                 else if (skb->protocol == htons(ETH_P_IPV6)) {
562                         const struct in6_addr *addr6;
563                         struct neighbour *neigh;
564                         bool do_tx_error_icmp;
565                         int addr_type;
566
567                         neigh = dst_neigh_lookup(skb_dst(skb),
568                                                  &ipv6_hdr(skb)->daddr);
569                         if (neigh == NULL)
570                                 goto tx_error;
571
572                         addr6 = (const struct in6_addr *)&neigh->primary_key;
573                         addr_type = ipv6_addr_type(addr6);
574
575                         if (addr_type == IPV6_ADDR_ANY) {
576                                 addr6 = &ipv6_hdr(skb)->daddr;
577                                 addr_type = ipv6_addr_type(addr6);
578                         }
579
580                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
581                                 do_tx_error_icmp = true;
582                         else {
583                                 do_tx_error_icmp = false;
584                                 dst = addr6->s6_addr32[3];
585                         }
586                         neigh_release(neigh);
587                         if (do_tx_error_icmp)
588                                 goto tx_error_icmp;
589                 }
590 #endif
591                 else
592                         goto tx_error;
593
594                 connected = false;
595         }
596
597         tos = tnl_params->tos;
598         if (tos & 0x1) {
599                 tos &= ~0x1;
600                 if (skb->protocol == htons(ETH_P_IP)) {
601                         tos = inner_iph->tos;
602                         connected = false;
603                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
604                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
605                         connected = false;
606                 }
607         }
608
609         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
610                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
611
612         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
613
614         if (!rt) {
615                 rt = ip_route_output_key(tunnel->net, &fl4);
616
617                 if (IS_ERR(rt)) {
618                         dev->stats.tx_carrier_errors++;
619                         goto tx_error;
620                 }
621                 if (connected)
622                         tunnel_dst_set(tunnel, &rt->dst);
623         }
624
625         if (rt->dst.dev == dev) {
626                 ip_rt_put(rt);
627                 dev->stats.collisions++;
628                 goto tx_error;
629         }
630
631         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
632                 ip_rt_put(rt);
633                 goto tx_error;
634         }
635
636         if (tunnel->err_count > 0) {
637                 if (time_before(jiffies,
638                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
639                         tunnel->err_count--;
640
641                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
642                         dst_link_failure(skb);
643                 } else
644                         tunnel->err_count = 0;
645         }
646
647         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
648         ttl = tnl_params->ttl;
649         if (ttl == 0) {
650                 if (skb->protocol == htons(ETH_P_IP))
651                         ttl = inner_iph->ttl;
652 #if IS_ENABLED(CONFIG_IPV6)
653                 else if (skb->protocol == htons(ETH_P_IPV6))
654                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
655 #endif
656                 else
657                         ttl = ip4_dst_hoplimit(&rt->dst);
658         }
659
660         df = tnl_params->frag_off;
661         if (skb->protocol == htons(ETH_P_IP))
662                 df |= (inner_iph->frag_off&htons(IP_DF));
663
664         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
665                         + rt->dst.header_len;
666         if (max_headroom > dev->needed_headroom)
667                 dev->needed_headroom = max_headroom;
668
669         if (skb_cow_head(skb, dev->needed_headroom)) {
670                 dev->stats.tx_dropped++;
671                 kfree_skb(skb);
672                 return;
673         }
674
675         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
676                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
677         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
678
679         return;
680
681 #if IS_ENABLED(CONFIG_IPV6)
682 tx_error_icmp:
683         dst_link_failure(skb);
684 #endif
685 tx_error:
686         dev->stats.tx_errors++;
687         kfree_skb(skb);
688 }
689 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
690
691 static void ip_tunnel_update(struct ip_tunnel_net *itn,
692                              struct ip_tunnel *t,
693                              struct net_device *dev,
694                              struct ip_tunnel_parm *p,
695                              bool set_mtu)
696 {
697         ip_tunnel_del(t);
698         t->parms.iph.saddr = p->iph.saddr;
699         t->parms.iph.daddr = p->iph.daddr;
700         t->parms.i_key = p->i_key;
701         t->parms.o_key = p->o_key;
702         if (dev->type != ARPHRD_ETHER) {
703                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
704                 memcpy(dev->broadcast, &p->iph.daddr, 4);
705         }
706         ip_tunnel_add(itn, t);
707
708         t->parms.iph.ttl = p->iph.ttl;
709         t->parms.iph.tos = p->iph.tos;
710         t->parms.iph.frag_off = p->iph.frag_off;
711
712         if (t->parms.link != p->link) {
713                 int mtu;
714
715                 t->parms.link = p->link;
716                 mtu = ip_tunnel_bind_dev(dev);
717                 if (set_mtu)
718                         dev->mtu = mtu;
719         }
720         ip_tunnel_dst_reset_all(t);
721         netdev_state_change(dev);
722 }
723
724 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
725 {
726         int err = 0;
727         struct ip_tunnel *t = netdev_priv(dev);
728         struct net *net = t->net;
729         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
730
731         BUG_ON(!itn->fb_tunnel_dev);
732         switch (cmd) {
733         case SIOCGETTUNNEL:
734                 if (dev == itn->fb_tunnel_dev) {
735                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
736                         if (t == NULL)
737                                 t = netdev_priv(dev);
738                 }
739                 memcpy(p, &t->parms, sizeof(*p));
740                 break;
741
742         case SIOCADDTUNNEL:
743         case SIOCCHGTUNNEL:
744                 err = -EPERM;
745                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
746                         goto done;
747                 if (p->iph.ttl)
748                         p->iph.frag_off |= htons(IP_DF);
749                 if (!(p->i_flags&TUNNEL_KEY))
750                         p->i_key = 0;
751                 if (!(p->o_flags&TUNNEL_KEY))
752                         p->o_key = 0;
753
754                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
755
756                 if (!t && (cmd == SIOCADDTUNNEL)) {
757                         t = ip_tunnel_create(net, itn, p);
758                         if (IS_ERR(t)) {
759                                 err = PTR_ERR(t);
760                                 break;
761                         }
762                 }
763                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
764                         if (t != NULL) {
765                                 if (t->dev != dev) {
766                                         err = -EEXIST;
767                                         break;
768                                 }
769                         } else {
770                                 unsigned int nflags = 0;
771
772                                 if (ipv4_is_multicast(p->iph.daddr))
773                                         nflags = IFF_BROADCAST;
774                                 else if (p->iph.daddr)
775                                         nflags = IFF_POINTOPOINT;
776
777                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
778                                         err = -EINVAL;
779                                         break;
780                                 }
781
782                                 t = netdev_priv(dev);
783                         }
784                 }
785
786                 if (t) {
787                         err = 0;
788                         ip_tunnel_update(itn, t, dev, p, true);
789                 } else {
790                         err = -ENOENT;
791                 }
792                 break;
793
794         case SIOCDELTUNNEL:
795                 err = -EPERM;
796                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
797                         goto done;
798
799                 if (dev == itn->fb_tunnel_dev) {
800                         err = -ENOENT;
801                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
802                         if (t == NULL)
803                                 goto done;
804                         err = -EPERM;
805                         if (t == netdev_priv(itn->fb_tunnel_dev))
806                                 goto done;
807                         dev = t->dev;
808                 }
809                 unregister_netdevice(dev);
810                 err = 0;
811                 break;
812
813         default:
814                 err = -EINVAL;
815         }
816
817 done:
818         return err;
819 }
820 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
821
822 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
823 {
824         struct ip_tunnel *tunnel = netdev_priv(dev);
825         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
826
827         if (new_mtu < 68 ||
828             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
829                 return -EINVAL;
830         dev->mtu = new_mtu;
831         return 0;
832 }
833 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
834
835 static void ip_tunnel_dev_free(struct net_device *dev)
836 {
837         struct ip_tunnel *tunnel = netdev_priv(dev);
838
839         gro_cells_destroy(&tunnel->gro_cells);
840         free_percpu(tunnel->dst_cache);
841         free_percpu(dev->tstats);
842         free_netdev(dev);
843 }
844
845 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
846 {
847         struct ip_tunnel *tunnel = netdev_priv(dev);
848         struct ip_tunnel_net *itn;
849
850         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
851
852         if (itn->fb_tunnel_dev != dev) {
853                 ip_tunnel_del(netdev_priv(dev));
854                 unregister_netdevice_queue(dev, head);
855         }
856 }
857 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
858
859 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
860                                   struct rtnl_link_ops *ops, char *devname)
861 {
862         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
863         struct ip_tunnel_parm parms;
864         unsigned int i;
865
866         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
867                 INIT_HLIST_HEAD(&itn->tunnels[i]);
868
869         if (!ops) {
870                 itn->fb_tunnel_dev = NULL;
871                 return 0;
872         }
873
874         memset(&parms, 0, sizeof(parms));
875         if (devname)
876                 strlcpy(parms.name, devname, IFNAMSIZ);
877
878         rtnl_lock();
879         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
880         /* FB netdevice is special: we have one, and only one per netns.
881          * Allowing to move it to another netns is clearly unsafe.
882          */
883         if (!IS_ERR(itn->fb_tunnel_dev)) {
884                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
885                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
886         }
887         rtnl_unlock();
888
889         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
890 }
891 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
892
893 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
894                               struct rtnl_link_ops *ops)
895 {
896         struct net *net = dev_net(itn->fb_tunnel_dev);
897         struct net_device *dev, *aux;
898         int h;
899
900         for_each_netdev_safe(net, dev, aux)
901                 if (dev->rtnl_link_ops == ops)
902                         unregister_netdevice_queue(dev, head);
903
904         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
905                 struct ip_tunnel *t;
906                 struct hlist_node *n;
907                 struct hlist_head *thead = &itn->tunnels[h];
908
909                 hlist_for_each_entry_safe(t, n, thead, hash_node)
910                         /* If dev is in the same netns, it has already
911                          * been added to the list by the previous loop.
912                          */
913                         if (!net_eq(dev_net(t->dev), net))
914                                 unregister_netdevice_queue(t->dev, head);
915         }
916 }
917
918 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
919 {
920         LIST_HEAD(list);
921
922         rtnl_lock();
923         ip_tunnel_destroy(itn, &list, ops);
924         unregister_netdevice_many(&list);
925         rtnl_unlock();
926 }
927 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
928
929 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
930                       struct ip_tunnel_parm *p)
931 {
932         struct ip_tunnel *nt;
933         struct net *net = dev_net(dev);
934         struct ip_tunnel_net *itn;
935         int mtu;
936         int err;
937
938         nt = netdev_priv(dev);
939         itn = net_generic(net, nt->ip_tnl_net_id);
940
941         if (ip_tunnel_find(itn, p, dev->type))
942                 return -EEXIST;
943
944         nt->net = net;
945         nt->parms = *p;
946         err = register_netdevice(dev);
947         if (err)
948                 goto out;
949
950         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
951                 eth_hw_addr_random(dev);
952
953         mtu = ip_tunnel_bind_dev(dev);
954         if (!tb[IFLA_MTU])
955                 dev->mtu = mtu;
956
957         ip_tunnel_add(itn, nt);
958
959 out:
960         return err;
961 }
962 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
963
964 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
965                          struct ip_tunnel_parm *p)
966 {
967         struct ip_tunnel *t;
968         struct ip_tunnel *tunnel = netdev_priv(dev);
969         struct net *net = tunnel->net;
970         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
971
972         if (dev == itn->fb_tunnel_dev)
973                 return -EINVAL;
974
975         t = ip_tunnel_find(itn, p, dev->type);
976
977         if (t) {
978                 if (t->dev != dev)
979                         return -EEXIST;
980         } else {
981                 t = tunnel;
982
983                 if (dev->type != ARPHRD_ETHER) {
984                         unsigned int nflags = 0;
985
986                         if (ipv4_is_multicast(p->iph.daddr))
987                                 nflags = IFF_BROADCAST;
988                         else if (p->iph.daddr)
989                                 nflags = IFF_POINTOPOINT;
990
991                         if ((dev->flags ^ nflags) &
992                             (IFF_POINTOPOINT | IFF_BROADCAST))
993                                 return -EINVAL;
994                 }
995         }
996
997         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
998         return 0;
999 }
1000 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1001
1002 int ip_tunnel_init(struct net_device *dev)
1003 {
1004         struct ip_tunnel *tunnel = netdev_priv(dev);
1005         struct iphdr *iph = &tunnel->parms.iph;
1006         int err;
1007
1008         dev->destructor = ip_tunnel_dev_free;
1009         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1010         if (!dev->tstats)
1011                 return -ENOMEM;
1012
1013         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1014         if (!tunnel->dst_cache) {
1015                 free_percpu(dev->tstats);
1016                 return -ENOMEM;
1017         }
1018
1019         err = gro_cells_init(&tunnel->gro_cells, dev);
1020         if (err) {
1021                 free_percpu(tunnel->dst_cache);
1022                 free_percpu(dev->tstats);
1023                 return err;
1024         }
1025
1026         tunnel->dev = dev;
1027         tunnel->net = dev_net(dev);
1028         strcpy(tunnel->parms.name, dev->name);
1029         iph->version            = 4;
1030         iph->ihl                = 5;
1031
1032         return 0;
1033 }
1034 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1035
1036 void ip_tunnel_uninit(struct net_device *dev)
1037 {
1038         struct ip_tunnel *tunnel = netdev_priv(dev);
1039         struct net *net = tunnel->net;
1040         struct ip_tunnel_net *itn;
1041
1042         itn = net_generic(net, tunnel->ip_tnl_net_id);
1043         /* fb_tunnel_dev will be unregisted in net-exit call. */
1044         if (itn->fb_tunnel_dev != dev)
1045                 ip_tunnel_del(netdev_priv(dev));
1046
1047         ip_tunnel_dst_reset_all(tunnel);
1048 }
1049 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1050
1051 /* Do least required initialization, rest of init is done in tunnel_init call */
1052 void ip_tunnel_setup(struct net_device *dev, int net_id)
1053 {
1054         struct ip_tunnel *tunnel = netdev_priv(dev);
1055         tunnel->ip_tnl_net_id = net_id;
1056 }
1057 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1058
1059 MODULE_LICENSE("GPL");