Merge branch 'imx-drm-fixes-urgent' of git://ftp.arm.linux.org.uk/~rmk/linux-arm...
[linux-2.6-block.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     !(t->dev->flags & IFF_UP))
177                         continue;
178
179                 if (!ip_tunnel_key_match(&t->parms, flags, key))
180                         continue;
181
182                 if (t->parms.link == link)
183                         return t;
184                 else if (!cand)
185                         cand = t;
186         }
187
188         hash = ip_tunnel_hash(key, 0);
189         head = &itn->tunnels[hash];
190
191         hlist_for_each_entry_rcu(t, head, hash_node) {
192                 if ((local != t->parms.iph.saddr &&
193                      (local != t->parms.iph.daddr ||
194                       !ipv4_is_multicast(local))) ||
195                     !(t->dev->flags & IFF_UP))
196                         continue;
197
198                 if (!ip_tunnel_key_match(&t->parms, flags, key))
199                         continue;
200
201                 if (t->parms.link == link)
202                         return t;
203                 else if (!cand)
204                         cand = t;
205         }
206
207         if (flags & TUNNEL_NO_KEY)
208                 goto skip_key_lookup;
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (t->parms.i_key != key ||
212                     !(t->dev->flags & IFF_UP))
213                         continue;
214
215                 if (t->parms.link == link)
216                         return t;
217                 else if (!cand)
218                         cand = t;
219         }
220
221 skip_key_lookup:
222         if (cand)
223                 return cand;
224
225         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226                 return netdev_priv(itn->fb_tunnel_dev);
227
228
229         return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234                                     struct ip_tunnel_parm *parms)
235 {
236         unsigned int h;
237         __be32 remote;
238         __be32 i_key = parms->i_key;
239
240         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
241                 remote = parms->iph.daddr;
242         else
243                 remote = 0;
244
245         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
246                 i_key = 0;
247
248         h = ip_tunnel_hash(i_key, remote);
249         return &itn->tunnels[h];
250 }
251
252 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
253 {
254         struct hlist_head *head = ip_bucket(itn, &t->parms);
255
256         hlist_add_head_rcu(&t->hash_node, head);
257 }
258
259 static void ip_tunnel_del(struct ip_tunnel *t)
260 {
261         hlist_del_init_rcu(&t->hash_node);
262 }
263
264 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
265                                         struct ip_tunnel_parm *parms,
266                                         int type)
267 {
268         __be32 remote = parms->iph.daddr;
269         __be32 local = parms->iph.saddr;
270         __be32 key = parms->i_key;
271         int link = parms->link;
272         struct ip_tunnel *t = NULL;
273         struct hlist_head *head = ip_bucket(itn, parms);
274
275         hlist_for_each_entry_rcu(t, head, hash_node) {
276                 if (local == t->parms.iph.saddr &&
277                     remote == t->parms.iph.daddr &&
278                     key == t->parms.i_key &&
279                     link == t->parms.link &&
280                     type == t->dev->type)
281                         break;
282         }
283         return t;
284 }
285
286 static struct net_device *__ip_tunnel_create(struct net *net,
287                                              const struct rtnl_link_ops *ops,
288                                              struct ip_tunnel_parm *parms)
289 {
290         int err;
291         struct ip_tunnel *tunnel;
292         struct net_device *dev;
293         char name[IFNAMSIZ];
294
295         if (parms->name[0])
296                 strlcpy(name, parms->name, IFNAMSIZ);
297         else {
298                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
299                         err = -E2BIG;
300                         goto failed;
301                 }
302                 strlcpy(name, ops->kind, IFNAMSIZ);
303                 strncat(name, "%d", 2);
304         }
305
306         ASSERT_RTNL();
307         dev = alloc_netdev(ops->priv_size, name, ops->setup);
308         if (!dev) {
309                 err = -ENOMEM;
310                 goto failed;
311         }
312         dev_net_set(dev, net);
313
314         dev->rtnl_link_ops = ops;
315
316         tunnel = netdev_priv(dev);
317         tunnel->parms = *parms;
318         tunnel->net = net;
319
320         err = register_netdevice(dev);
321         if (err)
322                 goto failed_free;
323
324         return dev;
325
326 failed_free:
327         free_netdev(dev);
328 failed:
329         return ERR_PTR(err);
330 }
331
332 static inline void init_tunnel_flow(struct flowi4 *fl4,
333                                     int proto,
334                                     __be32 daddr, __be32 saddr,
335                                     __be32 key, __u8 tos, int oif)
336 {
337         memset(fl4, 0, sizeof(*fl4));
338         fl4->flowi4_oif = oif;
339         fl4->daddr = daddr;
340         fl4->saddr = saddr;
341         fl4->flowi4_tos = tos;
342         fl4->flowi4_proto = proto;
343         fl4->fl4_gre_key = key;
344 }
345
346 static int ip_tunnel_bind_dev(struct net_device *dev)
347 {
348         struct net_device *tdev = NULL;
349         struct ip_tunnel *tunnel = netdev_priv(dev);
350         const struct iphdr *iph;
351         int hlen = LL_MAX_HEADER;
352         int mtu = ETH_DATA_LEN;
353         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
354
355         iph = &tunnel->parms.iph;
356
357         /* Guess output device to choose reasonable mtu and needed_headroom */
358         if (iph->daddr) {
359                 struct flowi4 fl4;
360                 struct rtable *rt;
361
362                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
363                                  iph->saddr, tunnel->parms.o_key,
364                                  RT_TOS(iph->tos), tunnel->parms.link);
365                 rt = ip_route_output_key(tunnel->net, &fl4);
366
367                 if (!IS_ERR(rt)) {
368                         tdev = rt->dst.dev;
369                         tunnel_dst_set(tunnel, &rt->dst);
370                         ip_rt_put(rt);
371                 }
372                 if (dev->type != ARPHRD_ETHER)
373                         dev->flags |= IFF_POINTOPOINT;
374         }
375
376         if (!tdev && tunnel->parms.link)
377                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
378
379         if (tdev) {
380                 hlen = tdev->hard_header_len + tdev->needed_headroom;
381                 mtu = tdev->mtu;
382         }
383         dev->iflink = tunnel->parms.link;
384
385         dev->needed_headroom = t_hlen + hlen;
386         mtu -= (dev->hard_header_len + t_hlen);
387
388         if (mtu < 68)
389                 mtu = 68;
390
391         return mtu;
392 }
393
394 static struct ip_tunnel *ip_tunnel_create(struct net *net,
395                                           struct ip_tunnel_net *itn,
396                                           struct ip_tunnel_parm *parms)
397 {
398         struct ip_tunnel *nt, *fbt;
399         struct net_device *dev;
400
401         BUG_ON(!itn->fb_tunnel_dev);
402         fbt = netdev_priv(itn->fb_tunnel_dev);
403         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
404         if (IS_ERR(dev))
405                 return ERR_CAST(dev);
406
407         dev->mtu = ip_tunnel_bind_dev(dev);
408
409         nt = netdev_priv(dev);
410         ip_tunnel_add(itn, nt);
411         return nt;
412 }
413
414 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
415                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
416 {
417         struct pcpu_sw_netstats *tstats;
418         const struct iphdr *iph = ip_hdr(skb);
419         int err;
420
421 #ifdef CONFIG_NET_IPGRE_BROADCAST
422         if (ipv4_is_multicast(iph->daddr)) {
423                 tunnel->dev->stats.multicast++;
424                 skb->pkt_type = PACKET_BROADCAST;
425         }
426 #endif
427
428         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430                 tunnel->dev->stats.rx_crc_errors++;
431                 tunnel->dev->stats.rx_errors++;
432                 goto drop;
433         }
434
435         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436                 if (!(tpi->flags&TUNNEL_SEQ) ||
437                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438                         tunnel->dev->stats.rx_fifo_errors++;
439                         tunnel->dev->stats.rx_errors++;
440                         goto drop;
441                 }
442                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443         }
444
445         err = IP_ECN_decapsulate(iph, skb);
446         if (unlikely(err)) {
447                 if (log_ecn_error)
448                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
449                                         &iph->saddr, iph->tos);
450                 if (err > 1) {
451                         ++tunnel->dev->stats.rx_frame_errors;
452                         ++tunnel->dev->stats.rx_errors;
453                         goto drop;
454                 }
455         }
456
457         tstats = this_cpu_ptr(tunnel->dev->tstats);
458         u64_stats_update_begin(&tstats->syncp);
459         tstats->rx_packets++;
460         tstats->rx_bytes += skb->len;
461         u64_stats_update_end(&tstats->syncp);
462
463         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
464
465         if (tunnel->dev->type == ARPHRD_ETHER) {
466                 skb->protocol = eth_type_trans(skb, tunnel->dev);
467                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
468         } else {
469                 skb->dev = tunnel->dev;
470         }
471
472         gro_cells_receive(&tunnel->gro_cells, skb);
473         return 0;
474
475 drop:
476         kfree_skb(skb);
477         return 0;
478 }
479 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
480
481 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
482                             struct rtable *rt, __be16 df)
483 {
484         struct ip_tunnel *tunnel = netdev_priv(dev);
485         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
486         int mtu;
487
488         if (df)
489                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
490                                         - sizeof(struct iphdr) - tunnel->hlen;
491         else
492                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
493
494         if (skb_dst(skb))
495                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
496
497         if (skb->protocol == htons(ETH_P_IP)) {
498                 if (!skb_is_gso(skb) &&
499                     (df & htons(IP_DF)) && mtu < pkt_size) {
500                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
501                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
502                         return -E2BIG;
503                 }
504         }
505 #if IS_ENABLED(CONFIG_IPV6)
506         else if (skb->protocol == htons(ETH_P_IPV6)) {
507                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
508
509                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
510                            mtu >= IPV6_MIN_MTU) {
511                         if ((tunnel->parms.iph.daddr &&
512                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
513                             rt6->rt6i_dst.plen == 128) {
514                                 rt6->rt6i_flags |= RTF_MODIFIED;
515                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
516                         }
517                 }
518
519                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
520                                         mtu < pkt_size) {
521                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
522                         return -E2BIG;
523                 }
524         }
525 #endif
526         return 0;
527 }
528
529 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
530                     const struct iphdr *tnl_params, const u8 protocol)
531 {
532         struct ip_tunnel *tunnel = netdev_priv(dev);
533         const struct iphdr *inner_iph;
534         struct flowi4 fl4;
535         u8     tos, ttl;
536         __be16 df;
537         struct rtable *rt;              /* Route to the other host */
538         unsigned int max_headroom;      /* The extra header space needed */
539         __be32 dst;
540         int err;
541         bool connected = true;
542
543         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
544
545         dst = tnl_params->daddr;
546         if (dst == 0) {
547                 /* NBMA tunnel */
548
549                 if (skb_dst(skb) == NULL) {
550                         dev->stats.tx_fifo_errors++;
551                         goto tx_error;
552                 }
553
554                 if (skb->protocol == htons(ETH_P_IP)) {
555                         rt = skb_rtable(skb);
556                         dst = rt_nexthop(rt, inner_iph->daddr);
557                 }
558 #if IS_ENABLED(CONFIG_IPV6)
559                 else if (skb->protocol == htons(ETH_P_IPV6)) {
560                         const struct in6_addr *addr6;
561                         struct neighbour *neigh;
562                         bool do_tx_error_icmp;
563                         int addr_type;
564
565                         neigh = dst_neigh_lookup(skb_dst(skb),
566                                                  &ipv6_hdr(skb)->daddr);
567                         if (neigh == NULL)
568                                 goto tx_error;
569
570                         addr6 = (const struct in6_addr *)&neigh->primary_key;
571                         addr_type = ipv6_addr_type(addr6);
572
573                         if (addr_type == IPV6_ADDR_ANY) {
574                                 addr6 = &ipv6_hdr(skb)->daddr;
575                                 addr_type = ipv6_addr_type(addr6);
576                         }
577
578                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
579                                 do_tx_error_icmp = true;
580                         else {
581                                 do_tx_error_icmp = false;
582                                 dst = addr6->s6_addr32[3];
583                         }
584                         neigh_release(neigh);
585                         if (do_tx_error_icmp)
586                                 goto tx_error_icmp;
587                 }
588 #endif
589                 else
590                         goto tx_error;
591
592                 connected = false;
593         }
594
595         tos = tnl_params->tos;
596         if (tos & 0x1) {
597                 tos &= ~0x1;
598                 if (skb->protocol == htons(ETH_P_IP)) {
599                         tos = inner_iph->tos;
600                         connected = false;
601                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
602                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
603                         connected = false;
604                 }
605         }
606
607         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
608                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
609
610         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
611
612         if (!rt) {
613                 rt = ip_route_output_key(tunnel->net, &fl4);
614
615                 if (IS_ERR(rt)) {
616                         dev->stats.tx_carrier_errors++;
617                         goto tx_error;
618                 }
619                 if (connected)
620                         tunnel_dst_set(tunnel, &rt->dst);
621         }
622
623         if (rt->dst.dev == dev) {
624                 ip_rt_put(rt);
625                 dev->stats.collisions++;
626                 goto tx_error;
627         }
628
629         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
630                 ip_rt_put(rt);
631                 goto tx_error;
632         }
633
634         if (tunnel->err_count > 0) {
635                 if (time_before(jiffies,
636                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
637                         tunnel->err_count--;
638
639                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
640                         dst_link_failure(skb);
641                 } else
642                         tunnel->err_count = 0;
643         }
644
645         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
646         ttl = tnl_params->ttl;
647         if (ttl == 0) {
648                 if (skb->protocol == htons(ETH_P_IP))
649                         ttl = inner_iph->ttl;
650 #if IS_ENABLED(CONFIG_IPV6)
651                 else if (skb->protocol == htons(ETH_P_IPV6))
652                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
653 #endif
654                 else
655                         ttl = ip4_dst_hoplimit(&rt->dst);
656         }
657
658         df = tnl_params->frag_off;
659         if (skb->protocol == htons(ETH_P_IP))
660                 df |= (inner_iph->frag_off&htons(IP_DF));
661
662         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
663                         + rt->dst.header_len;
664         if (max_headroom > dev->needed_headroom)
665                 dev->needed_headroom = max_headroom;
666
667         if (skb_cow_head(skb, dev->needed_headroom)) {
668                 dev->stats.tx_dropped++;
669                 kfree_skb(skb);
670                 return;
671         }
672
673         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
674                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
675         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
676
677         return;
678
679 #if IS_ENABLED(CONFIG_IPV6)
680 tx_error_icmp:
681         dst_link_failure(skb);
682 #endif
683 tx_error:
684         dev->stats.tx_errors++;
685         kfree_skb(skb);
686 }
687 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
688
689 static void ip_tunnel_update(struct ip_tunnel_net *itn,
690                              struct ip_tunnel *t,
691                              struct net_device *dev,
692                              struct ip_tunnel_parm *p,
693                              bool set_mtu)
694 {
695         ip_tunnel_del(t);
696         t->parms.iph.saddr = p->iph.saddr;
697         t->parms.iph.daddr = p->iph.daddr;
698         t->parms.i_key = p->i_key;
699         t->parms.o_key = p->o_key;
700         if (dev->type != ARPHRD_ETHER) {
701                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
702                 memcpy(dev->broadcast, &p->iph.daddr, 4);
703         }
704         ip_tunnel_add(itn, t);
705
706         t->parms.iph.ttl = p->iph.ttl;
707         t->parms.iph.tos = p->iph.tos;
708         t->parms.iph.frag_off = p->iph.frag_off;
709
710         if (t->parms.link != p->link) {
711                 int mtu;
712
713                 t->parms.link = p->link;
714                 mtu = ip_tunnel_bind_dev(dev);
715                 if (set_mtu)
716                         dev->mtu = mtu;
717         }
718         ip_tunnel_dst_reset_all(t);
719         netdev_state_change(dev);
720 }
721
722 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
723 {
724         int err = 0;
725         struct ip_tunnel *t = netdev_priv(dev);
726         struct net *net = t->net;
727         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
728
729         BUG_ON(!itn->fb_tunnel_dev);
730         switch (cmd) {
731         case SIOCGETTUNNEL:
732                 if (dev == itn->fb_tunnel_dev) {
733                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
734                         if (t == NULL)
735                                 t = netdev_priv(dev);
736                 }
737                 memcpy(p, &t->parms, sizeof(*p));
738                 break;
739
740         case SIOCADDTUNNEL:
741         case SIOCCHGTUNNEL:
742                 err = -EPERM;
743                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
744                         goto done;
745                 if (p->iph.ttl)
746                         p->iph.frag_off |= htons(IP_DF);
747                 if (!(p->i_flags&TUNNEL_KEY))
748                         p->i_key = 0;
749                 if (!(p->o_flags&TUNNEL_KEY))
750                         p->o_key = 0;
751
752                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
753
754                 if (!t && (cmd == SIOCADDTUNNEL)) {
755                         t = ip_tunnel_create(net, itn, p);
756                         if (IS_ERR(t)) {
757                                 err = PTR_ERR(t);
758                                 break;
759                         }
760                 }
761                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762                         if (t != NULL) {
763                                 if (t->dev != dev) {
764                                         err = -EEXIST;
765                                         break;
766                                 }
767                         } else {
768                                 unsigned int nflags = 0;
769
770                                 if (ipv4_is_multicast(p->iph.daddr))
771                                         nflags = IFF_BROADCAST;
772                                 else if (p->iph.daddr)
773                                         nflags = IFF_POINTOPOINT;
774
775                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776                                         err = -EINVAL;
777                                         break;
778                                 }
779
780                                 t = netdev_priv(dev);
781                         }
782                 }
783
784                 if (t) {
785                         err = 0;
786                         ip_tunnel_update(itn, t, dev, p, true);
787                 } else {
788                         err = -ENOENT;
789                 }
790                 break;
791
792         case SIOCDELTUNNEL:
793                 err = -EPERM;
794                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
795                         goto done;
796
797                 if (dev == itn->fb_tunnel_dev) {
798                         err = -ENOENT;
799                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
800                         if (t == NULL)
801                                 goto done;
802                         err = -EPERM;
803                         if (t == netdev_priv(itn->fb_tunnel_dev))
804                                 goto done;
805                         dev = t->dev;
806                 }
807                 unregister_netdevice(dev);
808                 err = 0;
809                 break;
810
811         default:
812                 err = -EINVAL;
813         }
814
815 done:
816         return err;
817 }
818 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
819
820 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
821 {
822         struct ip_tunnel *tunnel = netdev_priv(dev);
823         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
824
825         if (new_mtu < 68 ||
826             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
827                 return -EINVAL;
828         dev->mtu = new_mtu;
829         return 0;
830 }
831 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
832
833 static void ip_tunnel_dev_free(struct net_device *dev)
834 {
835         struct ip_tunnel *tunnel = netdev_priv(dev);
836
837         gro_cells_destroy(&tunnel->gro_cells);
838         free_percpu(tunnel->dst_cache);
839         free_percpu(dev->tstats);
840         free_netdev(dev);
841 }
842
843 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
844 {
845         struct ip_tunnel *tunnel = netdev_priv(dev);
846         struct ip_tunnel_net *itn;
847
848         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
849
850         if (itn->fb_tunnel_dev != dev) {
851                 ip_tunnel_del(netdev_priv(dev));
852                 unregister_netdevice_queue(dev, head);
853         }
854 }
855 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
856
857 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
858                                   struct rtnl_link_ops *ops, char *devname)
859 {
860         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
861         struct ip_tunnel_parm parms;
862         unsigned int i;
863
864         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
865                 INIT_HLIST_HEAD(&itn->tunnels[i]);
866
867         if (!ops) {
868                 itn->fb_tunnel_dev = NULL;
869                 return 0;
870         }
871
872         memset(&parms, 0, sizeof(parms));
873         if (devname)
874                 strlcpy(parms.name, devname, IFNAMSIZ);
875
876         rtnl_lock();
877         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
878         /* FB netdevice is special: we have one, and only one per netns.
879          * Allowing to move it to another netns is clearly unsafe.
880          */
881         if (!IS_ERR(itn->fb_tunnel_dev)) {
882                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
883                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
884         }
885         rtnl_unlock();
886
887         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
888 }
889 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
890
891 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
892                               struct rtnl_link_ops *ops)
893 {
894         struct net *net = dev_net(itn->fb_tunnel_dev);
895         struct net_device *dev, *aux;
896         int h;
897
898         for_each_netdev_safe(net, dev, aux)
899                 if (dev->rtnl_link_ops == ops)
900                         unregister_netdevice_queue(dev, head);
901
902         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
903                 struct ip_tunnel *t;
904                 struct hlist_node *n;
905                 struct hlist_head *thead = &itn->tunnels[h];
906
907                 hlist_for_each_entry_safe(t, n, thead, hash_node)
908                         /* If dev is in the same netns, it has already
909                          * been added to the list by the previous loop.
910                          */
911                         if (!net_eq(dev_net(t->dev), net))
912                                 unregister_netdevice_queue(t->dev, head);
913         }
914 }
915
916 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
917 {
918         LIST_HEAD(list);
919
920         rtnl_lock();
921         ip_tunnel_destroy(itn, &list, ops);
922         unregister_netdevice_many(&list);
923         rtnl_unlock();
924 }
925 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
926
927 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
928                       struct ip_tunnel_parm *p)
929 {
930         struct ip_tunnel *nt;
931         struct net *net = dev_net(dev);
932         struct ip_tunnel_net *itn;
933         int mtu;
934         int err;
935
936         nt = netdev_priv(dev);
937         itn = net_generic(net, nt->ip_tnl_net_id);
938
939         if (ip_tunnel_find(itn, p, dev->type))
940                 return -EEXIST;
941
942         nt->net = net;
943         nt->parms = *p;
944         err = register_netdevice(dev);
945         if (err)
946                 goto out;
947
948         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
949                 eth_hw_addr_random(dev);
950
951         mtu = ip_tunnel_bind_dev(dev);
952         if (!tb[IFLA_MTU])
953                 dev->mtu = mtu;
954
955         ip_tunnel_add(itn, nt);
956
957 out:
958         return err;
959 }
960 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
961
962 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
963                          struct ip_tunnel_parm *p)
964 {
965         struct ip_tunnel *t;
966         struct ip_tunnel *tunnel = netdev_priv(dev);
967         struct net *net = tunnel->net;
968         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
969
970         if (dev == itn->fb_tunnel_dev)
971                 return -EINVAL;
972
973         t = ip_tunnel_find(itn, p, dev->type);
974
975         if (t) {
976                 if (t->dev != dev)
977                         return -EEXIST;
978         } else {
979                 t = tunnel;
980
981                 if (dev->type != ARPHRD_ETHER) {
982                         unsigned int nflags = 0;
983
984                         if (ipv4_is_multicast(p->iph.daddr))
985                                 nflags = IFF_BROADCAST;
986                         else if (p->iph.daddr)
987                                 nflags = IFF_POINTOPOINT;
988
989                         if ((dev->flags ^ nflags) &
990                             (IFF_POINTOPOINT | IFF_BROADCAST))
991                                 return -EINVAL;
992                 }
993         }
994
995         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
996         return 0;
997 }
998 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
999
1000 int ip_tunnel_init(struct net_device *dev)
1001 {
1002         struct ip_tunnel *tunnel = netdev_priv(dev);
1003         struct iphdr *iph = &tunnel->parms.iph;
1004         int err;
1005
1006         dev->destructor = ip_tunnel_dev_free;
1007         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1008         if (!dev->tstats)
1009                 return -ENOMEM;
1010
1011         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1012         if (!tunnel->dst_cache) {
1013                 free_percpu(dev->tstats);
1014                 return -ENOMEM;
1015         }
1016
1017         err = gro_cells_init(&tunnel->gro_cells, dev);
1018         if (err) {
1019                 free_percpu(tunnel->dst_cache);
1020                 free_percpu(dev->tstats);
1021                 return err;
1022         }
1023
1024         tunnel->dev = dev;
1025         tunnel->net = dev_net(dev);
1026         strcpy(tunnel->parms.name, dev->name);
1027         iph->version            = 4;
1028         iph->ihl                = 5;
1029
1030         return 0;
1031 }
1032 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1033
1034 void ip_tunnel_uninit(struct net_device *dev)
1035 {
1036         struct ip_tunnel *tunnel = netdev_priv(dev);
1037         struct net *net = tunnel->net;
1038         struct ip_tunnel_net *itn;
1039
1040         itn = net_generic(net, tunnel->ip_tnl_net_id);
1041         /* fb_tunnel_dev will be unregisted in net-exit call. */
1042         if (itn->fb_tunnel_dev != dev)
1043                 ip_tunnel_del(netdev_priv(dev));
1044
1045         ip_tunnel_dst_reset_all(tunnel);
1046 }
1047 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1048
1049 /* Do least required initialization, rest of init is done in tunnel_init call */
1050 void ip_tunnel_setup(struct net_device *dev, int net_id)
1051 {
1052         struct ip_tunnel *tunnel = netdev_priv(dev);
1053         tunnel->ip_tnl_net_id = net_id;
1054 }
1055 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1056
1057 MODULE_LICENSE("GPL");