Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[linux-2.6-block.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
56328486 58#include <net/udp.h>
63487bab 59
c5441932
PS
60#if IS_ENABLED(CONFIG_IPV6)
61#include <net/ipv6.h>
62#include <net/ip6_fib.h>
63#include <net/ip6_route.h>
64#endif
65
967680e0 66static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
67{
68 return hash_32((__force u32)key ^ (__force u32)remote,
69 IP_TNL_HASH_BITS);
70}
71
6c7e7610 72static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
95cb5745 73 struct dst_entry *dst, __be32 saddr)
7d442fab
TH
74{
75 struct dst_entry *old_dst;
76
f8864972 77 dst_clone(dst);
6c7e7610 78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 79 dst_release(old_dst);
95cb5745 80 idst->saddr = saddr;
7d442fab
TH
81}
82
a35165ca 83static noinline void tunnel_dst_set(struct ip_tunnel *t,
95cb5745 84 struct dst_entry *dst, __be32 saddr)
7d442fab 85{
a35165ca 86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
7d442fab
TH
87}
88
6c7e7610 89static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab 90{
95cb5745 91 tunnel_dst_set(t, NULL, 0);
7d442fab
TH
92}
93
cf71d2bc 94void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
9a4aa9af
TH
95{
96 int i;
97
98 for_each_possible_cpu(i)
95cb5745 99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
9a4aa9af 100}
cf71d2bc 101EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
9a4aa9af 102
95cb5745
DP
103static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
7d442fab 105{
95cb5745 106 struct ip_tunnel_dst *idst;
7d442fab
TH
107 struct dst_entry *dst;
108
109 rcu_read_lock();
a35165ca 110 idst = raw_cpu_ptr(t->dst_cache);
95cb5745 111 dst = rcu_dereference(idst->dst);
f8864972
ED
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
113 dst = NULL;
b045d37b 114 if (dst) {
95cb5745
DP
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
117 } else {
b045d37b 118 tunnel_dst_reset(t);
f8864972
ED
119 dst_release(dst);
120 dst = NULL;
b045d37b 121 }
7d442fab 122 }
b045d37b
ED
123 rcu_read_unlock();
124 return (struct rtable *)dst;
7d442fab
TH
125}
126
c5441932
PS
127static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
129{
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
133 else
134 /* key expected, none present */
135 return false;
136 } else
137 return !(flags & TUNNEL_KEY);
138}
139
140/* Fallback tunnel: no source, no destination, no key, no options
141
142 Tunnel hash table:
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
146
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 Given src, dst and key, find appropriate for input tunnel.
150*/
151struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
154 __be32 key)
155{
156 unsigned int hash;
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
159
967680e0 160 hash = ip_tunnel_hash(key, remote);
c5441932
PS
161 head = &itn->tunnels[hash];
162
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
167 continue;
168
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
170 continue;
171
172 if (t->parms.link == link)
173 return t;
174 else
175 cand = t;
176 }
177
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
e0056593 180 t->parms.iph.saddr != 0 ||
c5441932
PS
181 !(t->dev->flags & IFF_UP))
182 continue;
183
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
185 continue;
186
187 if (t->parms.link == link)
188 return t;
189 else if (!cand)
190 cand = t;
191 }
192
967680e0 193 hash = ip_tunnel_hash(key, 0);
c5441932
PS
194 head = &itn->tunnels[hash];
195
196 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
199 continue;
200
201 if (!(t->dev->flags & IFF_UP))
c5441932
PS
202 continue;
203
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
205 continue;
206
207 if (t->parms.link == link)
208 return t;
209 else if (!cand)
210 cand = t;
211 }
212
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
215
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
e0056593
DP
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
c5441932
PS
220 !(t->dev->flags & IFF_UP))
221 continue;
222
223 if (t->parms.link == link)
224 return t;
225 else if (!cand)
226 cand = t;
227 }
228
229skip_key_lookup:
230 if (cand)
231 return cand;
232
233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234 return netdev_priv(itn->fb_tunnel_dev);
235
236
237 return NULL;
238}
239EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240
241static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242 struct ip_tunnel_parm *parms)
243{
244 unsigned int h;
245 __be32 remote;
6d608f06 246 __be32 i_key = parms->i_key;
c5441932
PS
247
248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249 remote = parms->iph.daddr;
250 else
251 remote = 0;
252
6d608f06
SK
253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
254 i_key = 0;
255
256 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
257 return &itn->tunnels[h];
258}
259
260static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261{
262 struct hlist_head *head = ip_bucket(itn, &t->parms);
263
264 hlist_add_head_rcu(&t->hash_node, head);
265}
266
267static void ip_tunnel_del(struct ip_tunnel *t)
268{
269 hlist_del_init_rcu(&t->hash_node);
270}
271
272static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273 struct ip_tunnel_parm *parms,
274 int type)
275{
276 __be32 remote = parms->iph.daddr;
277 __be32 local = parms->iph.saddr;
278 __be32 key = parms->i_key;
5ce54af1 279 __be16 flags = parms->i_flags;
c5441932
PS
280 int link = parms->link;
281 struct ip_tunnel *t = NULL;
282 struct hlist_head *head = ip_bucket(itn, parms);
283
284 hlist_for_each_entry_rcu(t, head, hash_node) {
285 if (local == t->parms.iph.saddr &&
286 remote == t->parms.iph.daddr &&
c5441932 287 link == t->parms.link &&
5ce54af1
DP
288 type == t->dev->type &&
289 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
290 break;
291 }
292 return t;
293}
294
295static struct net_device *__ip_tunnel_create(struct net *net,
296 const struct rtnl_link_ops *ops,
297 struct ip_tunnel_parm *parms)
298{
299 int err;
300 struct ip_tunnel *tunnel;
301 struct net_device *dev;
302 char name[IFNAMSIZ];
303
304 if (parms->name[0])
305 strlcpy(name, parms->name, IFNAMSIZ);
306 else {
54a5d382 307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
308 err = -E2BIG;
309 goto failed;
310 }
311 strlcpy(name, ops->kind, IFNAMSIZ);
312 strncat(name, "%d", 2);
313 }
314
315 ASSERT_RTNL();
c835a677 316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
317 if (!dev) {
318 err = -ENOMEM;
319 goto failed;
320 }
321 dev_net_set(dev, net);
322
323 dev->rtnl_link_ops = ops;
324
325 tunnel = netdev_priv(dev);
326 tunnel->parms = *parms;
5e6700b3 327 tunnel->net = net;
c5441932
PS
328
329 err = register_netdevice(dev);
330 if (err)
331 goto failed_free;
332
333 return dev;
334
335failed_free:
336 free_netdev(dev);
337failed:
338 return ERR_PTR(err);
339}
340
7d442fab
TH
341static inline void init_tunnel_flow(struct flowi4 *fl4,
342 int proto,
343 __be32 daddr, __be32 saddr,
344 __be32 key, __u8 tos, int oif)
c5441932
PS
345{
346 memset(fl4, 0, sizeof(*fl4));
347 fl4->flowi4_oif = oif;
348 fl4->daddr = daddr;
349 fl4->saddr = saddr;
350 fl4->flowi4_tos = tos;
351 fl4->flowi4_proto = proto;
352 fl4->fl4_gre_key = key;
c5441932
PS
353}
354
355static int ip_tunnel_bind_dev(struct net_device *dev)
356{
357 struct net_device *tdev = NULL;
358 struct ip_tunnel *tunnel = netdev_priv(dev);
359 const struct iphdr *iph;
360 int hlen = LL_MAX_HEADER;
361 int mtu = ETH_DATA_LEN;
362 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363
364 iph = &tunnel->parms.iph;
365
366 /* Guess output device to choose reasonable mtu and needed_headroom */
367 if (iph->daddr) {
368 struct flowi4 fl4;
369 struct rtable *rt;
370
7d442fab
TH
371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372 iph->saddr, tunnel->parms.o_key,
373 RT_TOS(iph->tos), tunnel->parms.link);
374 rt = ip_route_output_key(tunnel->net, &fl4);
375
c5441932
PS
376 if (!IS_ERR(rt)) {
377 tdev = rt->dst.dev;
95cb5745 378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932
PS
379 ip_rt_put(rt);
380 }
381 if (dev->type != ARPHRD_ETHER)
382 dev->flags |= IFF_POINTOPOINT;
383 }
384
385 if (!tdev && tunnel->parms.link)
6c742e71 386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
387
388 if (tdev) {
389 hlen = tdev->hard_header_len + tdev->needed_headroom;
390 mtu = tdev->mtu;
391 }
c5441932
PS
392
393 dev->needed_headroom = t_hlen + hlen;
394 mtu -= (dev->hard_header_len + t_hlen);
395
396 if (mtu < 68)
397 mtu = 68;
398
399 return mtu;
400}
401
402static struct ip_tunnel *ip_tunnel_create(struct net *net,
403 struct ip_tunnel_net *itn,
404 struct ip_tunnel_parm *parms)
405{
4929fd8c 406 struct ip_tunnel *nt;
c5441932
PS
407 struct net_device *dev;
408
409 BUG_ON(!itn->fb_tunnel_dev);
c5441932
PS
410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411 if (IS_ERR(dev))
6dd3c9ec 412 return ERR_CAST(dev);
c5441932
PS
413
414 dev->mtu = ip_tunnel_bind_dev(dev);
415
416 nt = netdev_priv(dev);
417 ip_tunnel_add(itn, nt);
418 return nt;
419}
420
421int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422 const struct tnl_ptk_info *tpi, bool log_ecn_error)
423{
8f84985f 424 struct pcpu_sw_netstats *tstats;
c5441932
PS
425 const struct iphdr *iph = ip_hdr(skb);
426 int err;
427
c5441932
PS
428#ifdef CONFIG_NET_IPGRE_BROADCAST
429 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
430 tunnel->dev->stats.multicast++;
431 skb->pkt_type = PACKET_BROADCAST;
432 }
433#endif
434
435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437 tunnel->dev->stats.rx_crc_errors++;
438 tunnel->dev->stats.rx_errors++;
439 goto drop;
440 }
441
442 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443 if (!(tpi->flags&TUNNEL_SEQ) ||
444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445 tunnel->dev->stats.rx_fifo_errors++;
446 tunnel->dev->stats.rx_errors++;
447 goto drop;
448 }
449 tunnel->i_seqno = ntohl(tpi->seq) + 1;
450 }
451
e96f2e7c
YC
452 skb_reset_network_header(skb);
453
c5441932
PS
454 err = IP_ECN_decapsulate(iph, skb);
455 if (unlikely(err)) {
456 if (log_ecn_error)
457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458 &iph->saddr, iph->tos);
459 if (err > 1) {
460 ++tunnel->dev->stats.rx_frame_errors;
461 ++tunnel->dev->stats.rx_errors;
462 goto drop;
463 }
464 }
465
466 tstats = this_cpu_ptr(tunnel->dev->tstats);
467 u64_stats_update_begin(&tstats->syncp);
468 tstats->rx_packets++;
469 tstats->rx_bytes += skb->len;
470 u64_stats_update_end(&tstats->syncp);
471
81b9eab5
AS
472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
473
3d7b46cd
PS
474 if (tunnel->dev->type == ARPHRD_ETHER) {
475 skb->protocol = eth_type_trans(skb, tunnel->dev);
476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477 } else {
478 skb->dev = tunnel->dev;
479 }
64261f23 480
c5441932
PS
481 gro_cells_receive(&tunnel->gro_cells, skb);
482 return 0;
483
484drop:
485 kfree_skb(skb);
486 return 0;
487}
488EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489
56328486
TH
490static int ip_encap_hlen(struct ip_tunnel_encap *e)
491{
a8c5f90f
TH
492 const struct ip_tunnel_encap_ops *ops;
493 int hlen = -EINVAL;
494
495 if (e->type == TUNNEL_ENCAP_NONE)
56328486 496 return 0;
a8c5f90f
TH
497
498 if (e->type >= MAX_IPTUN_ENCAP_OPS)
56328486 499 return -EINVAL;
a8c5f90f
TH
500
501 rcu_read_lock();
502 ops = rcu_dereference(iptun_encaps[e->type]);
503 if (likely(ops && ops->encap_hlen))
504 hlen = ops->encap_hlen(e);
505 rcu_read_unlock();
506
507 return hlen;
508}
509
510const struct ip_tunnel_encap_ops __rcu *
511 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
512
513int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
514 unsigned int num)
515{
bb1553c8
TG
516 if (num >= MAX_IPTUN_ENCAP_OPS)
517 return -ERANGE;
518
a8c5f90f
TH
519 return !cmpxchg((const struct ip_tunnel_encap_ops **)
520 &iptun_encaps[num],
521 NULL, ops) ? 0 : -1;
56328486 522}
a8c5f90f
TH
523EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
524
525int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
526 unsigned int num)
527{
528 int ret;
529
bb1553c8
TG
530 if (num >= MAX_IPTUN_ENCAP_OPS)
531 return -ERANGE;
532
a8c5f90f
TH
533 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
534 &iptun_encaps[num],
535 ops, NULL) == ops) ? 0 : -1;
536
537 synchronize_net();
538
539 return ret;
540}
541EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
56328486
TH
542
543int ip_tunnel_encap_setup(struct ip_tunnel *t,
544 struct ip_tunnel_encap *ipencap)
545{
546 int hlen;
547
548 memset(&t->encap, 0, sizeof(t->encap));
549
550 hlen = ip_encap_hlen(ipencap);
551 if (hlen < 0)
552 return hlen;
553
554 t->encap.type = ipencap->type;
555 t->encap.sport = ipencap->sport;
556 t->encap.dport = ipencap->dport;
557 t->encap.flags = ipencap->flags;
558
559 t->encap_hlen = hlen;
560 t->hlen = t->encap_hlen + t->tun_hlen;
561
562 return 0;
563}
564EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
565
56328486
TH
566int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
567 u8 *protocol, struct flowi4 *fl4)
568{
a8c5f90f
TH
569 const struct ip_tunnel_encap_ops *ops;
570 int ret = -EINVAL;
571
572 if (t->encap.type == TUNNEL_ENCAP_NONE)
56328486 573 return 0;
a8c5f90f 574
f1fb521f
TG
575 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
576 return -EINVAL;
577
a8c5f90f
TH
578 rcu_read_lock();
579 ops = rcu_dereference(iptun_encaps[t->encap.type]);
580 if (likely(ops && ops->build_header))
581 ret = ops->build_header(skb, &t->encap, protocol, fl4);
582 rcu_read_unlock();
583
584 return ret;
56328486
TH
585}
586EXPORT_SYMBOL(ip_tunnel_encap);
587
23a3647b
PS
588static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
589 struct rtable *rt, __be16 df)
590{
591 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 592 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
593 int mtu;
594
595 if (df)
596 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
597 - sizeof(struct iphdr) - tunnel->hlen;
598 else
599 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
600
601 if (skb_dst(skb))
602 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
603
604 if (skb->protocol == htons(ETH_P_IP)) {
605 if (!skb_is_gso(skb) &&
606 (df & htons(IP_DF)) && mtu < pkt_size) {
607 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
608 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
609 return -E2BIG;
610 }
611 }
612#if IS_ENABLED(CONFIG_IPV6)
613 else if (skb->protocol == htons(ETH_P_IPV6)) {
614 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
615
616 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
617 mtu >= IPV6_MIN_MTU) {
618 if ((tunnel->parms.iph.daddr &&
619 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
620 rt6->rt6i_dst.plen == 128) {
621 rt6->rt6i_flags |= RTF_MODIFIED;
622 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
623 }
624 }
625
626 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
627 mtu < pkt_size) {
628 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
629 return -E2BIG;
630 }
631 }
632#endif
633 return 0;
634}
635
c5441932 636void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 637 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
638{
639 struct ip_tunnel *tunnel = netdev_priv(dev);
640 const struct iphdr *inner_iph;
c5441932
PS
641 struct flowi4 fl4;
642 u8 tos, ttl;
643 __be16 df;
b045d37b 644 struct rtable *rt; /* Route to the other host */
c5441932
PS
645 unsigned int max_headroom; /* The extra header space needed */
646 __be32 dst;
0e6fbc5b 647 int err;
22fb22ea 648 bool connected;
c5441932
PS
649
650 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 651 connected = (tunnel->parms.iph.daddr != 0);
c5441932
PS
652
653 dst = tnl_params->daddr;
654 if (dst == 0) {
655 /* NBMA tunnel */
656
51456b29 657 if (!skb_dst(skb)) {
c5441932
PS
658 dev->stats.tx_fifo_errors++;
659 goto tx_error;
660 }
661
662 if (skb->protocol == htons(ETH_P_IP)) {
663 rt = skb_rtable(skb);
664 dst = rt_nexthop(rt, inner_iph->daddr);
665 }
666#if IS_ENABLED(CONFIG_IPV6)
667 else if (skb->protocol == htons(ETH_P_IPV6)) {
668 const struct in6_addr *addr6;
669 struct neighbour *neigh;
670 bool do_tx_error_icmp;
671 int addr_type;
672
673 neigh = dst_neigh_lookup(skb_dst(skb),
674 &ipv6_hdr(skb)->daddr);
51456b29 675 if (!neigh)
c5441932
PS
676 goto tx_error;
677
678 addr6 = (const struct in6_addr *)&neigh->primary_key;
679 addr_type = ipv6_addr_type(addr6);
680
681 if (addr_type == IPV6_ADDR_ANY) {
682 addr6 = &ipv6_hdr(skb)->daddr;
683 addr_type = ipv6_addr_type(addr6);
684 }
685
686 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
687 do_tx_error_icmp = true;
688 else {
689 do_tx_error_icmp = false;
690 dst = addr6->s6_addr32[3];
691 }
692 neigh_release(neigh);
693 if (do_tx_error_icmp)
694 goto tx_error_icmp;
695 }
696#endif
697 else
698 goto tx_error;
7d442fab
TH
699
700 connected = false;
c5441932
PS
701 }
702
703 tos = tnl_params->tos;
704 if (tos & 0x1) {
705 tos &= ~0x1;
7d442fab 706 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 707 tos = inner_iph->tos;
7d442fab
TH
708 connected = false;
709 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 710 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
711 connected = false;
712 }
c5441932
PS
713 }
714
7d442fab
TH
715 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
716 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
717
56328486
TH
718 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
719 goto tx_error;
720
95cb5745 721 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
7d442fab
TH
722
723 if (!rt) {
724 rt = ip_route_output_key(tunnel->net, &fl4);
725
726 if (IS_ERR(rt)) {
727 dev->stats.tx_carrier_errors++;
728 goto tx_error;
729 }
730 if (connected)
95cb5745 731 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
c5441932 732 }
7d442fab 733
0e6fbc5b 734 if (rt->dst.dev == dev) {
c5441932
PS
735 ip_rt_put(rt);
736 dev->stats.collisions++;
737 goto tx_error;
738 }
c5441932 739
23a3647b
PS
740 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
741 ip_rt_put(rt);
742 goto tx_error;
c5441932 743 }
c5441932
PS
744
745 if (tunnel->err_count > 0) {
746 if (time_before(jiffies,
747 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
748 tunnel->err_count--;
749
11c21a30 750 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
751 dst_link_failure(skb);
752 } else
753 tunnel->err_count = 0;
754 }
755
d4a71b15 756 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
757 ttl = tnl_params->ttl;
758 if (ttl == 0) {
759 if (skb->protocol == htons(ETH_P_IP))
760 ttl = inner_iph->ttl;
761#if IS_ENABLED(CONFIG_IPV6)
762 else if (skb->protocol == htons(ETH_P_IPV6))
763 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
764#endif
765 else
766 ttl = ip4_dst_hoplimit(&rt->dst);
767 }
768
23a3647b
PS
769 df = tnl_params->frag_off;
770 if (skb->protocol == htons(ETH_P_IP))
771 df |= (inner_iph->frag_off&htons(IP_DF));
772
0e6fbc5b 773 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 774 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
3e08f4a7 775 if (max_headroom > dev->needed_headroom)
c5441932 776 dev->needed_headroom = max_headroom;
3e08f4a7
SK
777
778 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 779 ip_rt_put(rt);
3e08f4a7 780 dev->stats.tx_dropped++;
3acfa1e7 781 kfree_skb(skb);
3e08f4a7 782 return;
c5441932
PS
783 }
784
79b16aad 785 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 786 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 787 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 788
c5441932
PS
789 return;
790
791#if IS_ENABLED(CONFIG_IPV6)
792tx_error_icmp:
793 dst_link_failure(skb);
794#endif
795tx_error:
796 dev->stats.tx_errors++;
3acfa1e7 797 kfree_skb(skb);
c5441932
PS
798}
799EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
800
801static void ip_tunnel_update(struct ip_tunnel_net *itn,
802 struct ip_tunnel *t,
803 struct net_device *dev,
804 struct ip_tunnel_parm *p,
805 bool set_mtu)
806{
807 ip_tunnel_del(t);
808 t->parms.iph.saddr = p->iph.saddr;
809 t->parms.iph.daddr = p->iph.daddr;
810 t->parms.i_key = p->i_key;
811 t->parms.o_key = p->o_key;
812 if (dev->type != ARPHRD_ETHER) {
813 memcpy(dev->dev_addr, &p->iph.saddr, 4);
814 memcpy(dev->broadcast, &p->iph.daddr, 4);
815 }
816 ip_tunnel_add(itn, t);
817
818 t->parms.iph.ttl = p->iph.ttl;
819 t->parms.iph.tos = p->iph.tos;
820 t->parms.iph.frag_off = p->iph.frag_off;
821
822 if (t->parms.link != p->link) {
823 int mtu;
824
825 t->parms.link = p->link;
826 mtu = ip_tunnel_bind_dev(dev);
827 if (set_mtu)
828 dev->mtu = mtu;
829 }
cf71d2bc 830 ip_tunnel_dst_reset_all(t);
c5441932
PS
831 netdev_state_change(dev);
832}
833
834int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
835{
836 int err = 0;
8c923ce2
ND
837 struct ip_tunnel *t = netdev_priv(dev);
838 struct net *net = t->net;
839 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
840
841 BUG_ON(!itn->fb_tunnel_dev);
842 switch (cmd) {
843 case SIOCGETTUNNEL:
8c923ce2 844 if (dev == itn->fb_tunnel_dev) {
c5441932 845 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 846 if (!t)
8c923ce2
ND
847 t = netdev_priv(dev);
848 }
c5441932
PS
849 memcpy(p, &t->parms, sizeof(*p));
850 break;
851
852 case SIOCADDTUNNEL:
853 case SIOCCHGTUNNEL:
854 err = -EPERM;
855 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
856 goto done;
857 if (p->iph.ttl)
858 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
859 if (!(p->i_flags & VTI_ISVTI)) {
860 if (!(p->i_flags & TUNNEL_KEY))
861 p->i_key = 0;
862 if (!(p->o_flags & TUNNEL_KEY))
863 p->o_key = 0;
864 }
c5441932
PS
865
866 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
867
d61746b2
SK
868 if (cmd == SIOCADDTUNNEL) {
869 if (!t) {
870 t = ip_tunnel_create(net, itn, p);
871 err = PTR_ERR_OR_ZERO(t);
872 break;
873 }
874
875 err = -EEXIST;
ee30ef4d 876 break;
6dd3c9ec 877 }
c5441932 878 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
00db4124 879 if (t) {
c5441932
PS
880 if (t->dev != dev) {
881 err = -EEXIST;
882 break;
883 }
884 } else {
885 unsigned int nflags = 0;
886
887 if (ipv4_is_multicast(p->iph.daddr))
888 nflags = IFF_BROADCAST;
889 else if (p->iph.daddr)
890 nflags = IFF_POINTOPOINT;
891
892 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
893 err = -EINVAL;
894 break;
895 }
896
897 t = netdev_priv(dev);
898 }
899 }
900
901 if (t) {
902 err = 0;
903 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
904 } else {
905 err = -ENOENT;
906 }
c5441932
PS
907 break;
908
909 case SIOCDELTUNNEL:
910 err = -EPERM;
911 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
912 goto done;
913
914 if (dev == itn->fb_tunnel_dev) {
915 err = -ENOENT;
916 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 917 if (!t)
c5441932
PS
918 goto done;
919 err = -EPERM;
920 if (t == netdev_priv(itn->fb_tunnel_dev))
921 goto done;
922 dev = t->dev;
923 }
924 unregister_netdevice(dev);
925 err = 0;
926 break;
927
928 default:
929 err = -EINVAL;
930 }
931
932done:
933 return err;
934}
935EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
936
937int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
938{
939 struct ip_tunnel *tunnel = netdev_priv(dev);
940 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
941
942 if (new_mtu < 68 ||
943 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
944 return -EINVAL;
945 dev->mtu = new_mtu;
946 return 0;
947}
948EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
949
950static void ip_tunnel_dev_free(struct net_device *dev)
951{
952 struct ip_tunnel *tunnel = netdev_priv(dev);
953
954 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 955 free_percpu(tunnel->dst_cache);
c5441932
PS
956 free_percpu(dev->tstats);
957 free_netdev(dev);
958}
959
960void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
961{
c5441932
PS
962 struct ip_tunnel *tunnel = netdev_priv(dev);
963 struct ip_tunnel_net *itn;
964
6c742e71 965 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
966
967 if (itn->fb_tunnel_dev != dev) {
968 ip_tunnel_del(netdev_priv(dev));
969 unregister_netdevice_queue(dev, head);
970 }
971}
972EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
973
1728d4fa
ND
974struct net *ip_tunnel_get_link_net(const struct net_device *dev)
975{
976 struct ip_tunnel *tunnel = netdev_priv(dev);
977
978 return tunnel->net;
979}
980EXPORT_SYMBOL(ip_tunnel_get_link_net);
981
1e99584b
ND
982int ip_tunnel_get_iflink(const struct net_device *dev)
983{
984 struct ip_tunnel *tunnel = netdev_priv(dev);
985
986 return tunnel->parms.link;
987}
988EXPORT_SYMBOL(ip_tunnel_get_iflink);
989
d3b6f614 990int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
991 struct rtnl_link_ops *ops, char *devname)
992{
993 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
994 struct ip_tunnel_parm parms;
6261d983 995 unsigned int i;
c5441932 996
6261d983 997 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
998 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
999
1000 if (!ops) {
1001 itn->fb_tunnel_dev = NULL;
1002 return 0;
1003 }
6261d983 1004
c5441932
PS
1005 memset(&parms, 0, sizeof(parms));
1006 if (devname)
1007 strlcpy(parms.name, devname, IFNAMSIZ);
1008
1009 rtnl_lock();
1010 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
1011 /* FB netdevice is special: we have one, and only one per netns.
1012 * Allowing to move it to another netns is clearly unsafe.
1013 */
67013282 1014 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 1015 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 1016 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
1017 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1018 }
b4de77ad 1019 rtnl_unlock();
c5441932 1020
27d79f3b 1021 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
1022}
1023EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1024
6c742e71
ND
1025static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1026 struct rtnl_link_ops *ops)
c5441932 1027{
6c742e71
ND
1028 struct net *net = dev_net(itn->fb_tunnel_dev);
1029 struct net_device *dev, *aux;
c5441932
PS
1030 int h;
1031
6c742e71
ND
1032 for_each_netdev_safe(net, dev, aux)
1033 if (dev->rtnl_link_ops == ops)
1034 unregister_netdevice_queue(dev, head);
1035
c5441932
PS
1036 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1037 struct ip_tunnel *t;
1038 struct hlist_node *n;
1039 struct hlist_head *thead = &itn->tunnels[h];
1040
1041 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
1042 /* If dev is in the same netns, it has already
1043 * been added to the list by the previous loop.
1044 */
1045 if (!net_eq(dev_net(t->dev), net))
1046 unregister_netdevice_queue(t->dev, head);
c5441932 1047 }
c5441932
PS
1048}
1049
6c742e71 1050void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
1051{
1052 LIST_HEAD(list);
1053
1054 rtnl_lock();
6c742e71 1055 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
1056 unregister_netdevice_many(&list);
1057 rtnl_unlock();
c5441932
PS
1058}
1059EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1060
1061int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1062 struct ip_tunnel_parm *p)
1063{
1064 struct ip_tunnel *nt;
1065 struct net *net = dev_net(dev);
1066 struct ip_tunnel_net *itn;
1067 int mtu;
1068 int err;
1069
1070 nt = netdev_priv(dev);
1071 itn = net_generic(net, nt->ip_tnl_net_id);
1072
1073 if (ip_tunnel_find(itn, p, dev->type))
1074 return -EEXIST;
1075
5e6700b3 1076 nt->net = net;
c5441932
PS
1077 nt->parms = *p;
1078 err = register_netdevice(dev);
1079 if (err)
1080 goto out;
1081
1082 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1083 eth_hw_addr_random(dev);
1084
1085 mtu = ip_tunnel_bind_dev(dev);
1086 if (!tb[IFLA_MTU])
1087 dev->mtu = mtu;
1088
1089 ip_tunnel_add(itn, nt);
1090
1091out:
1092 return err;
1093}
1094EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1095
1096int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1097 struct ip_tunnel_parm *p)
1098{
6c742e71 1099 struct ip_tunnel *t;
c5441932 1100 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1101 struct net *net = tunnel->net;
c5441932
PS
1102 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1103
1104 if (dev == itn->fb_tunnel_dev)
1105 return -EINVAL;
1106
c5441932
PS
1107 t = ip_tunnel_find(itn, p, dev->type);
1108
1109 if (t) {
1110 if (t->dev != dev)
1111 return -EEXIST;
1112 } else {
6c742e71 1113 t = tunnel;
c5441932
PS
1114
1115 if (dev->type != ARPHRD_ETHER) {
1116 unsigned int nflags = 0;
1117
1118 if (ipv4_is_multicast(p->iph.daddr))
1119 nflags = IFF_BROADCAST;
1120 else if (p->iph.daddr)
1121 nflags = IFF_POINTOPOINT;
1122
1123 if ((dev->flags ^ nflags) &
1124 (IFF_POINTOPOINT | IFF_BROADCAST))
1125 return -EINVAL;
1126 }
1127 }
1128
1129 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1130 return 0;
1131}
1132EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1133
1134int ip_tunnel_init(struct net_device *dev)
1135{
1136 struct ip_tunnel *tunnel = netdev_priv(dev);
1137 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1138 int err;
c5441932
PS
1139
1140 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1141 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1142 if (!dev->tstats)
1143 return -ENOMEM;
1144
9a4aa9af
TH
1145 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1146 if (!tunnel->dst_cache) {
1147 free_percpu(dev->tstats);
1148 return -ENOMEM;
1149 }
1150
c5441932
PS
1151 err = gro_cells_init(&tunnel->gro_cells, dev);
1152 if (err) {
9a4aa9af 1153 free_percpu(tunnel->dst_cache);
c5441932
PS
1154 free_percpu(dev->tstats);
1155 return err;
1156 }
1157
1158 tunnel->dev = dev;
6c742e71 1159 tunnel->net = dev_net(dev);
c5441932
PS
1160 strcpy(tunnel->parms.name, dev->name);
1161 iph->version = 4;
1162 iph->ihl = 5;
1163
1164 return 0;
1165}
1166EXPORT_SYMBOL_GPL(ip_tunnel_init);
1167
1168void ip_tunnel_uninit(struct net_device *dev)
1169{
c5441932 1170 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1171 struct net *net = tunnel->net;
c5441932
PS
1172 struct ip_tunnel_net *itn;
1173
1174 itn = net_generic(net, tunnel->ip_tnl_net_id);
1175 /* fb_tunnel_dev will be unregisted in net-exit call. */
1176 if (itn->fb_tunnel_dev != dev)
1177 ip_tunnel_del(netdev_priv(dev));
7d442fab 1178
cf71d2bc 1179 ip_tunnel_dst_reset_all(tunnel);
c5441932
PS
1180}
1181EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1182
1183/* Do least required initialization, rest of init is done in tunnel_init call */
1184void ip_tunnel_setup(struct net_device *dev, int net_id)
1185{
1186 struct ip_tunnel *tunnel = netdev_priv(dev);
1187 tunnel->ip_tnl_net_id = net_id;
1188}
1189EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1190
1191MODULE_LICENSE("GPL");