tcp: fix delayed ACKs for MSS boundary condition
[linux-block.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c9422999 1// SPDX-License-Identifier: GPL-2.0-only
c5441932
PS
2/*
3 * Copyright (c) 2013 Nicira, Inc.
c5441932
PS
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/capability.h>
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/in.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19#include <linux/if_arp.h>
c5441932
PS
20#include <linux/init.h>
21#include <linux/in6.h>
22#include <linux/inetdevice.h>
23#include <linux/igmp.h>
24#include <linux/netfilter_ipv4.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/if_vlan.h>
28#include <linux/rculist.h>
27d79f3b 29#include <linux/err.h>
c5441932
PS
30
31#include <net/sock.h>
32#include <net/ip.h>
33#include <net/icmp.h>
34#include <net/protocol.h>
35#include <net/ip_tunnels.h>
36#include <net/arp.h>
37#include <net/checksum.h>
38#include <net/dsfield.h>
39#include <net/inet_ecn.h>
40#include <net/xfrm.h>
41#include <net/net_namespace.h>
42#include <net/netns/generic.h>
43#include <net/rtnetlink.h>
56328486 44#include <net/udp.h>
cfc7381b 45#include <net/dst_metadata.h>
63487bab 46
c5441932
PS
47#if IS_ENABLED(CONFIG_IPV6)
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
967680e0 53static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
54{
55 return hash_32((__force u32)key ^ (__force u32)remote,
56 IP_TNL_HASH_BITS);
57}
58
c5441932
PS
59static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 __be16 flags, __be32 key)
61{
62 if (p->i_flags & TUNNEL_KEY) {
63 if (flags & TUNNEL_KEY)
64 return key == p->i_key;
65 else
66 /* key expected, none present */
67 return false;
68 } else
69 return !(flags & TUNNEL_KEY);
70}
71
72/* Fallback tunnel: no source, no destination, no key, no options
73
74 Tunnel hash table:
75 We require exact key match i.e. if a key is present in packet
76 it will match only tunnel with the same key; if it is not present,
77 it will match only keyless tunnel.
78
79 All keysless packets, if not matched configured keyless tunnels
80 will match fallback tunnel.
81 Given src, dst and key, find appropriate for input tunnel.
82*/
83struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 int link, __be16 flags,
85 __be32 remote, __be32 local,
86 __be32 key)
87{
c5441932
PS
88 struct ip_tunnel *t, *cand = NULL;
89 struct hlist_head *head;
ba61539c
TY
90 struct net_device *ndev;
91 unsigned int hash;
c5441932 92
967680e0 93 hash = ip_tunnel_hash(key, remote);
c5441932
PS
94 head = &itn->tunnels[hash];
95
96 hlist_for_each_entry_rcu(t, head, hash_node) {
97 if (local != t->parms.iph.saddr ||
98 remote != t->parms.iph.daddr ||
99 !(t->dev->flags & IFF_UP))
100 continue;
101
102 if (!ip_tunnel_key_match(&t->parms, flags, key))
103 continue;
104
105 if (t->parms.link == link)
106 return t;
107 else
108 cand = t;
109 }
110
111 hlist_for_each_entry_rcu(t, head, hash_node) {
112 if (remote != t->parms.iph.daddr ||
e0056593 113 t->parms.iph.saddr != 0 ||
c5441932
PS
114 !(t->dev->flags & IFF_UP))
115 continue;
116
117 if (!ip_tunnel_key_match(&t->parms, flags, key))
118 continue;
119
120 if (t->parms.link == link)
121 return t;
122 else if (!cand)
123 cand = t;
124 }
125
967680e0 126 hash = ip_tunnel_hash(key, 0);
c5441932
PS
127 head = &itn->tunnels[hash];
128
129 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
130 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 continue;
133
134 if (!(t->dev->flags & IFF_UP))
c5441932
PS
135 continue;
136
137 if (!ip_tunnel_key_match(&t->parms, flags, key))
138 continue;
139
140 if (t->parms.link == link)
141 return t;
142 else if (!cand)
143 cand = t;
144 }
145
c5441932 146 hlist_for_each_entry_rcu(t, head, hash_node) {
25629fda 147 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
e0056593
DP
148 t->parms.iph.saddr != 0 ||
149 t->parms.iph.daddr != 0 ||
c5441932
PS
150 !(t->dev->flags & IFF_UP))
151 continue;
152
153 if (t->parms.link == link)
154 return t;
155 else if (!cand)
156 cand = t;
157 }
158
c5441932
PS
159 if (cand)
160 return cand;
161
2e15ea39 162 t = rcu_dereference(itn->collect_md_tun);
833a8b40 163 if (t && t->dev->flags & IFF_UP)
2e15ea39
PS
164 return t;
165
ba61539c
TY
166 ndev = READ_ONCE(itn->fb_tunnel_dev);
167 if (ndev && ndev->flags & IFF_UP)
168 return netdev_priv(ndev);
c5441932 169
c5441932
PS
170 return NULL;
171}
172EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173
174static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 struct ip_tunnel_parm *parms)
176{
177 unsigned int h;
178 __be32 remote;
6d608f06 179 __be32 i_key = parms->i_key;
c5441932
PS
180
181 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 remote = parms->iph.daddr;
183 else
184 remote = 0;
185
6d608f06
SK
186 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 i_key = 0;
188
189 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
190 return &itn->tunnels[h];
191}
192
193static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194{
195 struct hlist_head *head = ip_bucket(itn, &t->parms);
196
2e15ea39
PS
197 if (t->collect_md)
198 rcu_assign_pointer(itn->collect_md_tun, t);
c5441932
PS
199 hlist_add_head_rcu(&t->hash_node, head);
200}
201
2e15ea39 202static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
c5441932 203{
2e15ea39
PS
204 if (t->collect_md)
205 rcu_assign_pointer(itn->collect_md_tun, NULL);
c5441932
PS
206 hlist_del_init_rcu(&t->hash_node);
207}
208
209static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 struct ip_tunnel_parm *parms,
211 int type)
212{
213 __be32 remote = parms->iph.daddr;
214 __be32 local = parms->iph.saddr;
215 __be32 key = parms->i_key;
5ce54af1 216 __be16 flags = parms->i_flags;
c5441932
PS
217 int link = parms->link;
218 struct ip_tunnel *t = NULL;
219 struct hlist_head *head = ip_bucket(itn, parms);
220
221 hlist_for_each_entry_rcu(t, head, hash_node) {
222 if (local == t->parms.iph.saddr &&
223 remote == t->parms.iph.daddr &&
c5441932 224 link == t->parms.link &&
5ce54af1
DP
225 type == t->dev->type &&
226 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
227 break;
228 }
229 return t;
230}
231
232static struct net_device *__ip_tunnel_create(struct net *net,
233 const struct rtnl_link_ops *ops,
234 struct ip_tunnel_parm *parms)
235{
236 int err;
237 struct ip_tunnel *tunnel;
238 struct net_device *dev;
239 char name[IFNAMSIZ];
240
9cb726a2
ED
241 err = -E2BIG;
242 if (parms->name[0]) {
243 if (!dev_valid_name(parms->name))
244 goto failed;
512b2dc4 245 strscpy(name, parms->name, IFNAMSIZ);
9cb726a2
ED
246 } else {
247 if (strlen(ops->kind) > (IFNAMSIZ - 3))
c5441932 248 goto failed;
000ade80
SA
249 strcpy(name, ops->kind);
250 strcat(name, "%d");
c5441932
PS
251 }
252
253 ASSERT_RTNL();
c835a677 254 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
255 if (!dev) {
256 err = -ENOMEM;
257 goto failed;
258 }
259 dev_net_set(dev, net);
260
261 dev->rtnl_link_ops = ops;
262
263 tunnel = netdev_priv(dev);
264 tunnel->parms = *parms;
5e6700b3 265 tunnel->net = net;
c5441932
PS
266
267 err = register_netdevice(dev);
268 if (err)
269 goto failed_free;
270
271 return dev;
272
273failed_free:
274 free_netdev(dev);
275failed:
276 return ERR_PTR(err);
277}
278
c5441932
PS
279static int ip_tunnel_bind_dev(struct net_device *dev)
280{
281 struct net_device *tdev = NULL;
282 struct ip_tunnel *tunnel = netdev_priv(dev);
283 const struct iphdr *iph;
284 int hlen = LL_MAX_HEADER;
285 int mtu = ETH_DATA_LEN;
286 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287
288 iph = &tunnel->parms.iph;
289
290 /* Guess output device to choose reasonable mtu and needed_headroom */
291 if (iph->daddr) {
292 struct flowi4 fl4;
293 struct rtable *rt;
294
b0066da5
PM
295 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 iph->saddr, tunnel->parms.o_key,
db53cd3d 297 RT_TOS(iph->tos), dev_net(dev),
7ec9fce4 298 tunnel->parms.link, tunnel->fwmark, 0, 0);
7d442fab
TH
299 rt = ip_route_output_key(tunnel->net, &fl4);
300
c5441932
PS
301 if (!IS_ERR(rt)) {
302 tdev = rt->dst.dev;
303 ip_rt_put(rt);
304 }
305 if (dev->type != ARPHRD_ETHER)
306 dev->flags |= IFF_POINTOPOINT;
f27337e1
PA
307
308 dst_cache_reset(&tunnel->dst_cache);
c5441932
PS
309 }
310
311 if (!tdev && tunnel->parms.link)
6c742e71 312 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
313
314 if (tdev) {
315 hlen = tdev->hard_header_len + tdev->needed_headroom;
82612de1 316 mtu = min(tdev->mtu, IP_MAX_MTU);
c5441932 317 }
c5441932
PS
318
319 dev->needed_headroom = t_hlen + hlen;
9992a078 320 mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
c5441932 321
b5476022
ED
322 if (mtu < IPV4_MIN_MTU)
323 mtu = IPV4_MIN_MTU;
c5441932
PS
324
325 return mtu;
326}
327
328static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 struct ip_tunnel_net *itn,
330 struct ip_tunnel_parm *parms)
331{
4929fd8c 332 struct ip_tunnel *nt;
c5441932 333 struct net_device *dev;
b96f9afe 334 int t_hlen;
f6cc9c05
PM
335 int mtu;
336 int err;
c5441932 337
79134e6c 338 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
c5441932 339 if (IS_ERR(dev))
6dd3c9ec 340 return ERR_CAST(dev);
c5441932 341
f6cc9c05
PM
342 mtu = ip_tunnel_bind_dev(dev);
343 err = dev_set_mtu(dev, mtu);
344 if (err)
345 goto err_dev_set_mtu;
c5441932
PS
346
347 nt = netdev_priv(dev);
b96f9afe
JW
348 t_hlen = nt->hlen + sizeof(struct iphdr);
349 dev->min_mtu = ETH_MIN_MTU;
28e104d0 350 dev->max_mtu = IP_MAX_MTU - t_hlen;
9992a078
HL
351 if (dev->type == ARPHRD_ETHER)
352 dev->max_mtu -= dev->hard_header_len;
353
c5441932
PS
354 ip_tunnel_add(itn, nt);
355 return nt;
f6cc9c05
PM
356
357err_dev_set_mtu:
358 unregister_netdevice(dev);
359 return ERR_PTR(err);
c5441932
PS
360}
361
ac931d4c
CE
362void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
363{
364 const struct iphdr *iph = ip_hdr(skb);
365 const struct udphdr *udph;
366
367 if (iph->protocol != IPPROTO_UDP)
368 return;
369
370 udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
371 info->encap.sport = udph->source;
372 info->encap.dport = udph->dest;
373}
374EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
375
c5441932 376int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
2e15ea39
PS
377 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
378 bool log_ecn_error)
c5441932 379{
c5441932
PS
380 const struct iphdr *iph = ip_hdr(skb);
381 int err;
382
c5441932
PS
383#ifdef CONFIG_NET_IPGRE_BROADCAST
384 if (ipv4_is_multicast(iph->daddr)) {
c4794d22 385 DEV_STATS_INC(tunnel->dev, multicast);
c5441932
PS
386 skb->pkt_type = PACKET_BROADCAST;
387 }
388#endif
389
390 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
391 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
c4794d22
ED
392 DEV_STATS_INC(tunnel->dev, rx_crc_errors);
393 DEV_STATS_INC(tunnel->dev, rx_errors);
c5441932
PS
394 goto drop;
395 }
396
397 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
398 if (!(tpi->flags&TUNNEL_SEQ) ||
399 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
c4794d22
ED
400 DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
401 DEV_STATS_INC(tunnel->dev, rx_errors);
c5441932
PS
402 goto drop;
403 }
404 tunnel->i_seqno = ntohl(tpi->seq) + 1;
405 }
406
227adfb2 407 skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
e96f2e7c 408
c5441932
PS
409 err = IP_ECN_decapsulate(iph, skb);
410 if (unlikely(err)) {
411 if (log_ecn_error)
412 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
413 &iph->saddr, iph->tos);
414 if (err > 1) {
c4794d22
ED
415 DEV_STATS_INC(tunnel->dev, rx_frame_errors);
416 DEV_STATS_INC(tunnel->dev, rx_errors);
c5441932
PS
417 goto drop;
418 }
419 }
420
560b50cf 421 dev_sw_netstats_rx_add(tunnel->dev, skb->len);
81b9eab5
AS
422 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
423
3d7b46cd
PS
424 if (tunnel->dev->type == ARPHRD_ETHER) {
425 skb->protocol = eth_type_trans(skb, tunnel->dev);
426 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
427 } else {
428 skb->dev = tunnel->dev;
429 }
64261f23 430
2e15ea39
PS
431 if (tun_dst)
432 skb_dst_set(skb, (struct dst_entry *)tun_dst);
433
c5441932
PS
434 gro_cells_receive(&tunnel->gro_cells, skb);
435 return 0;
436
437drop:
469f87e1
HY
438 if (tun_dst)
439 dst_release((struct dst_entry *)tun_dst);
c5441932
PS
440 kfree_skb(skb);
441 return 0;
442}
443EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
444
a8c5f90f
TH
445int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
446 unsigned int num)
447{
bb1553c8
TG
448 if (num >= MAX_IPTUN_ENCAP_OPS)
449 return -ERANGE;
450
a8c5f90f
TH
451 return !cmpxchg((const struct ip_tunnel_encap_ops **)
452 &iptun_encaps[num],
453 NULL, ops) ? 0 : -1;
56328486 454}
a8c5f90f
TH
455EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
456
457int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
458 unsigned int num)
459{
460 int ret;
461
bb1553c8
TG
462 if (num >= MAX_IPTUN_ENCAP_OPS)
463 return -ERANGE;
464
a8c5f90f
TH
465 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
466 &iptun_encaps[num],
467 ops, NULL) == ops) ? 0 : -1;
468
469 synchronize_net();
470
471 return ret;
472}
473EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
56328486
TH
474
475int ip_tunnel_encap_setup(struct ip_tunnel *t,
476 struct ip_tunnel_encap *ipencap)
477{
478 int hlen;
479
480 memset(&t->encap, 0, sizeof(t->encap));
481
482 hlen = ip_encap_hlen(ipencap);
483 if (hlen < 0)
484 return hlen;
485
486 t->encap.type = ipencap->type;
487 t->encap.sport = ipencap->sport;
488 t->encap.dport = ipencap->dport;
489 t->encap.flags = ipencap->flags;
490
491 t->encap_hlen = hlen;
492 t->hlen = t->encap_hlen + t->tun_hlen;
493
494 return 0;
495}
496EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
497
23a3647b 498static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
fc24f2b2 499 struct rtable *rt, __be16 df,
c8b34e68 500 const struct iphdr *inner_iph,
501 int tunnel_hlen, __be32 dst, bool md)
23a3647b
PS
502{
503 struct ip_tunnel *tunnel = netdev_priv(dev);
c8b34e68 504 int pkt_size;
23a3647b
PS
505 int mtu;
506
c8b34e68 507 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
28e104d0 508 pkt_size = skb->len - tunnel_hlen;
9992a078 509 pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
c8b34e68 510
9992a078 511 if (df) {
28e104d0 512 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
9992a078
HL
513 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
514 } else {
f4b3ec4e 515 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
9992a078 516 }
23a3647b 517
f4b3ec4e 518 if (skb_valid_dst(skb))
7a1592bc 519 skb_dst_update_pmtu_no_confirm(skb, mtu);
23a3647b
PS
520
521 if (skb->protocol == htons(ETH_P_IP)) {
522 if (!skb_is_gso(skb) &&
fc24f2b2
TT
523 (inner_iph->frag_off & htons(IP_DF)) &&
524 mtu < pkt_size) {
4372339e 525 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
23a3647b
PS
526 return -E2BIG;
527 }
528 }
529#if IS_ENABLED(CONFIG_IPV6)
530 else if (skb->protocol == htons(ETH_P_IPV6)) {
f4b3ec4e 531 struct rt6_info *rt6;
c8b34e68 532 __be32 daddr;
533
f4b3ec4e
AM
534 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
535 NULL;
c8b34e68 536 daddr = md ? dst : tunnel->parms.iph.daddr;
23a3647b
PS
537
538 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
539 mtu >= IPV6_MIN_MTU) {
c8b34e68 540 if ((daddr && !ipv4_is_multicast(daddr)) ||
23a3647b
PS
541 rt6->rt6i_dst.plen == 128) {
542 rt6->rt6i_flags |= RTF_MODIFIED;
543 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
544 }
545 }
546
547 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
548 mtu < pkt_size) {
4372339e 549 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
23a3647b
PS
550 return -E2BIG;
551 }
552 }
553#endif
554 return 0;
555}
556
c8b34e68 557void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
558 u8 proto, int tunnel_hlen)
cfc7381b
AS
559{
560 struct ip_tunnel *tunnel = netdev_priv(dev);
561 u32 headroom = sizeof(struct iphdr);
562 struct ip_tunnel_info *tun_info;
563 const struct ip_tunnel_key *key;
564 const struct iphdr *inner_iph;
f46fe4f8 565 struct rtable *rt = NULL;
cfc7381b
AS
566 struct flowi4 fl4;
567 __be16 df = 0;
568 u8 tos, ttl;
f46fe4f8 569 bool use_cache;
cfc7381b
AS
570
571 tun_info = skb_tunnel_info(skb);
572 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
573 ip_tunnel_info_af(tun_info) != AF_INET))
574 goto tx_error;
575 key = &tun_info->key;
576 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
578 tos = key->tos;
579 if (tos == 1) {
580 if (skb->protocol == htons(ETH_P_IP))
581 tos = inner_iph->tos;
582 else if (skb->protocol == htons(ETH_P_IPV6))
583 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
584 }
6e6b904a 585 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
586 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
7ec9fce4
EB
587 dev_net(dev), 0, skb->mark, skb_get_hash(skb),
588 key->flow_flags);
ac931d4c
CE
589
590 if (!tunnel_hlen)
591 tunnel_hlen = ip_encap_hlen(&tun_info->encap);
592
593 if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
cfc7381b 594 goto tx_error;
f46fe4f8 595
596 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
597 if (use_cache)
598 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
599 if (!rt) {
600 rt = ip_route_output_key(tunnel->net, &fl4);
601 if (IS_ERR(rt)) {
c4794d22 602 DEV_STATS_INC(dev, tx_carrier_errors);
f46fe4f8 603 goto tx_error;
604 }
605 if (use_cache)
606 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
607 fl4.saddr);
cfc7381b
AS
608 }
609 if (rt->dst.dev == dev) {
610 ip_rt_put(rt);
c4794d22 611 DEV_STATS_INC(dev, collisions);
cfc7381b
AS
612 goto tx_error;
613 }
c8b34e68 614
615 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
616 df = htons(IP_DF);
617 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
618 key->u.ipv4.dst, true)) {
619 ip_rt_put(rt);
620 goto tx_error;
621 }
622
cfc7381b
AS
623 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
624 ttl = key->ttl;
625 if (ttl == 0) {
626 if (skb->protocol == htons(ETH_P_IP))
627 ttl = inner_iph->ttl;
628 else if (skb->protocol == htons(ETH_P_IPV6))
629 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
630 else
631 ttl = ip4_dst_hoplimit(&rt->dst);
632 }
c8b34e68 633
cfc7381b 634 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
4b397c06
ED
635 if (headroom > READ_ONCE(dev->needed_headroom))
636 WRITE_ONCE(dev->needed_headroom, headroom);
cfc7381b 637
4b397c06 638 if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
cfc7381b
AS
639 ip_rt_put(rt);
640 goto tx_dropped;
641 }
0f693f19
HY
642 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
643 df, !net_eq(tunnel->net, dev_net(dev)));
cfc7381b
AS
644 return;
645tx_error:
c4794d22 646 DEV_STATS_INC(dev, tx_errors);
cfc7381b
AS
647 goto kfree;
648tx_dropped:
c4794d22 649 DEV_STATS_INC(dev, tx_dropped);
cfc7381b
AS
650kfree:
651 kfree_skb(skb);
652}
653EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
654
c5441932 655void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 656 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
657{
658 struct ip_tunnel *tunnel = netdev_priv(dev);
186d9366 659 struct ip_tunnel_info *tun_info = NULL;
c5441932 660 const struct iphdr *inner_iph;
c5441932 661 unsigned int max_headroom; /* The extra header space needed */
186d9366 662 struct rtable *rt = NULL; /* Route to the other host */
7ae29fd1 663 __be16 payload_protocol;
186d9366 664 bool use_cache = false;
665 struct flowi4 fl4;
666 bool md = false;
22fb22ea 667 bool connected;
186d9366 668 u8 tos, ttl;
669 __be32 dst;
670 __be16 df;
c5441932
PS
671
672 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 673 connected = (tunnel->parms.iph.daddr != 0);
7ae29fd1 674 payload_protocol = skb_protocol(skb, true);
c5441932 675
5146d1f1
BH
676 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
677
c5441932
PS
678 dst = tnl_params->daddr;
679 if (dst == 0) {
680 /* NBMA tunnel */
681
51456b29 682 if (!skb_dst(skb)) {
c4794d22 683 DEV_STATS_INC(dev, tx_fifo_errors);
c5441932
PS
684 goto tx_error;
685 }
686
d71b5753 687 tun_info = skb_tunnel_info(skb);
688 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
689 ip_tunnel_info_af(tun_info) == AF_INET &&
186d9366 690 tun_info->key.u.ipv4.dst) {
d71b5753 691 dst = tun_info->key.u.ipv4.dst;
186d9366 692 md = true;
693 connected = true;
7ae29fd1 694 } else if (payload_protocol == htons(ETH_P_IP)) {
c5441932
PS
695 rt = skb_rtable(skb);
696 dst = rt_nexthop(rt, inner_iph->daddr);
697 }
698#if IS_ENABLED(CONFIG_IPV6)
7ae29fd1 699 else if (payload_protocol == htons(ETH_P_IPV6)) {
c5441932
PS
700 const struct in6_addr *addr6;
701 struct neighbour *neigh;
702 bool do_tx_error_icmp;
703 int addr_type;
704
705 neigh = dst_neigh_lookup(skb_dst(skb),
706 &ipv6_hdr(skb)->daddr);
51456b29 707 if (!neigh)
c5441932
PS
708 goto tx_error;
709
710 addr6 = (const struct in6_addr *)&neigh->primary_key;
711 addr_type = ipv6_addr_type(addr6);
712
713 if (addr_type == IPV6_ADDR_ANY) {
714 addr6 = &ipv6_hdr(skb)->daddr;
715 addr_type = ipv6_addr_type(addr6);
716 }
717
718 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
719 do_tx_error_icmp = true;
720 else {
721 do_tx_error_icmp = false;
722 dst = addr6->s6_addr32[3];
723 }
724 neigh_release(neigh);
725 if (do_tx_error_icmp)
726 goto tx_error_icmp;
727 }
728#endif
729 else
730 goto tx_error;
7d442fab 731
186d9366 732 if (!md)
733 connected = false;
c5441932
PS
734 }
735
736 tos = tnl_params->tos;
737 if (tos & 0x1) {
738 tos &= ~0x1;
7ae29fd1 739 if (payload_protocol == htons(ETH_P_IP)) {
c5441932 740 tos = inner_iph->tos;
7d442fab 741 connected = false;
7ae29fd1 742 } else if (payload_protocol == htons(ETH_P_IPV6)) {
c5441932 743 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
744 connected = false;
745 }
c5441932
PS
746 }
747
0f3e9c97 748 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
db53cd3d
DA
749 tunnel->parms.o_key, RT_TOS(tos),
750 dev_net(dev), tunnel->parms.link,
7ec9fce4 751 tunnel->fwmark, skb_get_hash(skb), 0);
7d442fab 752
ac931d4c 753 if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
56328486
TH
754 goto tx_error;
755
186d9366 756 if (connected && md) {
757 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
758 if (use_cache)
759 rt = dst_cache_get_ip4(&tun_info->dst_cache,
760 &fl4.saddr);
761 } else {
762 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
763 &fl4.saddr) : NULL;
764 }
7d442fab
TH
765
766 if (!rt) {
767 rt = ip_route_output_key(tunnel->net, &fl4);
768
769 if (IS_ERR(rt)) {
c4794d22 770 DEV_STATS_INC(dev, tx_carrier_errors);
7d442fab
TH
771 goto tx_error;
772 }
186d9366 773 if (use_cache)
774 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
775 fl4.saddr);
776 else if (!md && connected)
e09acddf
PA
777 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
778 fl4.saddr);
c5441932 779 }
7d442fab 780
0e6fbc5b 781 if (rt->dst.dev == dev) {
c5441932 782 ip_rt_put(rt);
c4794d22 783 DEV_STATS_INC(dev, collisions);
c5441932
PS
784 goto tx_error;
785 }
c5441932 786
50c66167 787 df = tnl_params->frag_off;
7ae29fd1 788 if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
50c66167
FW
789 df |= (inner_iph->frag_off & htons(IP_DF));
790
791 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
23a3647b
PS
792 ip_rt_put(rt);
793 goto tx_error;
c5441932 794 }
c5441932
PS
795
796 if (tunnel->err_count > 0) {
797 if (time_before(jiffies,
798 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
799 tunnel->err_count--;
800
801 dst_link_failure(skb);
802 } else
803 tunnel->err_count = 0;
804 }
805
d4a71b15 806 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
807 ttl = tnl_params->ttl;
808 if (ttl == 0) {
7ae29fd1 809 if (payload_protocol == htons(ETH_P_IP))
c5441932
PS
810 ttl = inner_iph->ttl;
811#if IS_ENABLED(CONFIG_IPV6)
7ae29fd1 812 else if (payload_protocol == htons(ETH_P_IPV6))
c5441932
PS
813 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
814#endif
815 else
816 ttl = ip4_dst_hoplimit(&rt->dst);
817 }
818
0e6fbc5b 819 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 820 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
4b397c06
ED
821 if (max_headroom > READ_ONCE(dev->needed_headroom))
822 WRITE_ONCE(dev->needed_headroom, max_headroom);
3e08f4a7 823
4b397c06 824 if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
586d5fc8 825 ip_rt_put(rt);
c4794d22 826 DEV_STATS_INC(dev, tx_dropped);
3acfa1e7 827 kfree_skb(skb);
3e08f4a7 828 return;
c5441932
PS
829 }
830
039f5062
PS
831 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
832 df, !net_eq(tunnel->net, dev_net(dev)));
c5441932
PS
833 return;
834
835#if IS_ENABLED(CONFIG_IPV6)
836tx_error_icmp:
837 dst_link_failure(skb);
838#endif
839tx_error:
c4794d22 840 DEV_STATS_INC(dev, tx_errors);
3acfa1e7 841 kfree_skb(skb);
c5441932
PS
842}
843EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
844
845static void ip_tunnel_update(struct ip_tunnel_net *itn,
846 struct ip_tunnel *t,
847 struct net_device *dev,
848 struct ip_tunnel_parm *p,
9830ad4c
CG
849 bool set_mtu,
850 __u32 fwmark)
c5441932 851{
2e15ea39 852 ip_tunnel_del(itn, t);
c5441932
PS
853 t->parms.iph.saddr = p->iph.saddr;
854 t->parms.iph.daddr = p->iph.daddr;
855 t->parms.i_key = p->i_key;
856 t->parms.o_key = p->o_key;
857 if (dev->type != ARPHRD_ETHER) {
5a1b7e1a 858 __dev_addr_set(dev, &p->iph.saddr, 4);
c5441932
PS
859 memcpy(dev->broadcast, &p->iph.daddr, 4);
860 }
861 ip_tunnel_add(itn, t);
862
863 t->parms.iph.ttl = p->iph.ttl;
864 t->parms.iph.tos = p->iph.tos;
865 t->parms.iph.frag_off = p->iph.frag_off;
866
9830ad4c 867 if (t->parms.link != p->link || t->fwmark != fwmark) {
c5441932
PS
868 int mtu;
869
870 t->parms.link = p->link;
9830ad4c 871 t->fwmark = fwmark;
c5441932
PS
872 mtu = ip_tunnel_bind_dev(dev);
873 if (set_mtu)
874 dev->mtu = mtu;
875 }
e09acddf 876 dst_cache_reset(&t->dst_cache);
c5441932
PS
877 netdev_state_change(dev);
878}
879
607259a6 880int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
c5441932
PS
881{
882 int err = 0;
8c923ce2
ND
883 struct ip_tunnel *t = netdev_priv(dev);
884 struct net *net = t->net;
885 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932 886
c5441932
PS
887 switch (cmd) {
888 case SIOCGETTUNNEL:
8c923ce2 889 if (dev == itn->fb_tunnel_dev) {
c5441932 890 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 891 if (!t)
8c923ce2
ND
892 t = netdev_priv(dev);
893 }
c5441932
PS
894 memcpy(p, &t->parms, sizeof(*p));
895 break;
896
897 case SIOCADDTUNNEL:
898 case SIOCCHGTUNNEL:
899 err = -EPERM;
900 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
901 goto done;
902 if (p->iph.ttl)
903 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
904 if (!(p->i_flags & VTI_ISVTI)) {
905 if (!(p->i_flags & TUNNEL_KEY))
906 p->i_key = 0;
907 if (!(p->o_flags & TUNNEL_KEY))
908 p->o_key = 0;
909 }
c5441932 910
79134e6c 911 t = ip_tunnel_find(itn, p, itn->type);
c5441932 912
d61746b2
SK
913 if (cmd == SIOCADDTUNNEL) {
914 if (!t) {
915 t = ip_tunnel_create(net, itn, p);
916 err = PTR_ERR_OR_ZERO(t);
917 break;
918 }
919
920 err = -EEXIST;
ee30ef4d 921 break;
6dd3c9ec 922 }
c5441932 923 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
00db4124 924 if (t) {
c5441932
PS
925 if (t->dev != dev) {
926 err = -EEXIST;
927 break;
928 }
929 } else {
930 unsigned int nflags = 0;
931
932 if (ipv4_is_multicast(p->iph.daddr))
933 nflags = IFF_BROADCAST;
934 else if (p->iph.daddr)
935 nflags = IFF_POINTOPOINT;
936
937 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
938 err = -EINVAL;
939 break;
940 }
941
942 t = netdev_priv(dev);
943 }
944 }
945
946 if (t) {
947 err = 0;
9830ad4c 948 ip_tunnel_update(itn, t, dev, p, true, 0);
6dd3c9ec
FW
949 } else {
950 err = -ENOENT;
951 }
c5441932
PS
952 break;
953
954 case SIOCDELTUNNEL:
955 err = -EPERM;
956 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
957 goto done;
958
959 if (dev == itn->fb_tunnel_dev) {
960 err = -ENOENT;
961 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 962 if (!t)
c5441932
PS
963 goto done;
964 err = -EPERM;
965 if (t == netdev_priv(itn->fb_tunnel_dev))
966 goto done;
967 dev = t->dev;
968 }
969 unregister_netdevice(dev);
970 err = 0;
971 break;
972
973 default:
974 err = -EINVAL;
975 }
976
977done:
978 return err;
979}
607259a6
CH
980EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
981
3e7a1c7c
AB
982int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
983 void __user *data, int cmd)
607259a6
CH
984{
985 struct ip_tunnel_parm p;
986 int err;
987
3e7a1c7c 988 if (copy_from_user(&p, data, sizeof(p)))
607259a6
CH
989 return -EFAULT;
990 err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
3e7a1c7c 991 if (!err && copy_to_user(data, &p, sizeof(p)))
607259a6
CH
992 return -EFAULT;
993 return err;
994}
3e7a1c7c 995EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
c5441932 996
7e059158 997int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
c5441932
PS
998{
999 struct ip_tunnel *tunnel = netdev_priv(dev);
1000 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
28e104d0 1001 int max_mtu = IP_MAX_MTU - t_hlen;
c5441932 1002
9992a078
HL
1003 if (dev->type == ARPHRD_ETHER)
1004 max_mtu -= dev->hard_header_len;
1005
b96f9afe 1006 if (new_mtu < ETH_MIN_MTU)
c5441932 1007 return -EINVAL;
7e059158
DW
1008
1009 if (new_mtu > max_mtu) {
1010 if (strict)
1011 return -EINVAL;
1012
1013 new_mtu = max_mtu;
1014 }
1015
c5441932
PS
1016 dev->mtu = new_mtu;
1017 return 0;
1018}
7e059158
DW
1019EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1020
1021int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1022{
1023 return __ip_tunnel_change_mtu(dev, new_mtu, true);
1024}
c5441932
PS
1025EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1026
1027static void ip_tunnel_dev_free(struct net_device *dev)
1028{
1029 struct ip_tunnel *tunnel = netdev_priv(dev);
1030
1031 gro_cells_destroy(&tunnel->gro_cells);
e09acddf 1032 dst_cache_destroy(&tunnel->dst_cache);
c5441932 1033 free_percpu(dev->tstats);
c5441932
PS
1034}
1035
1036void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1037{
c5441932
PS
1038 struct ip_tunnel *tunnel = netdev_priv(dev);
1039 struct ip_tunnel_net *itn;
1040
6c742e71 1041 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
1042
1043 if (itn->fb_tunnel_dev != dev) {
2e15ea39 1044 ip_tunnel_del(itn, netdev_priv(dev));
c5441932
PS
1045 unregister_netdevice_queue(dev, head);
1046 }
1047}
1048EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1049
1728d4fa
ND
1050struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1051{
1052 struct ip_tunnel *tunnel = netdev_priv(dev);
1053
1054 return tunnel->net;
1055}
1056EXPORT_SYMBOL(ip_tunnel_get_link_net);
1057
1e99584b
ND
1058int ip_tunnel_get_iflink(const struct net_device *dev)
1059{
1060 struct ip_tunnel *tunnel = netdev_priv(dev);
1061
1062 return tunnel->parms.link;
1063}
1064EXPORT_SYMBOL(ip_tunnel_get_iflink);
1065
c7d03a00 1066int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
c5441932
PS
1067 struct rtnl_link_ops *ops, char *devname)
1068{
1069 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1070 struct ip_tunnel_parm parms;
6261d983 1071 unsigned int i;
c5441932 1072
79134e6c 1073 itn->rtnl_link_ops = ops;
6261d983 1074 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1075 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932 1076
79134e6c
ED
1077 if (!ops || !net_has_fallback_tunnels(net)) {
1078 struct ip_tunnel_net *it_init_net;
1079
1080 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1081 itn->type = it_init_net->type;
c5441932
PS
1082 itn->fb_tunnel_dev = NULL;
1083 return 0;
1084 }
6261d983 1085
c5441932
PS
1086 memset(&parms, 0, sizeof(parms));
1087 if (devname)
512b2dc4 1088 strscpy(parms.name, devname, IFNAMSIZ);
c5441932
PS
1089
1090 rtnl_lock();
1091 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
1092 /* FB netdevice is special: we have one, and only one per netns.
1093 * Allowing to move it to another netns is clearly unsafe.
1094 */
67013282 1095 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 1096 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 1097 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282 1098 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
79134e6c 1099 itn->type = itn->fb_tunnel_dev->type;
67013282 1100 }
b4de77ad 1101 rtnl_unlock();
c5441932 1102
27d79f3b 1103 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
1104}
1105EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1106
79134e6c
ED
1107static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1108 struct list_head *head,
6c742e71 1109 struct rtnl_link_ops *ops)
c5441932 1110{
6c742e71 1111 struct net_device *dev, *aux;
c5441932
PS
1112 int h;
1113
6c742e71
ND
1114 for_each_netdev_safe(net, dev, aux)
1115 if (dev->rtnl_link_ops == ops)
1116 unregister_netdevice_queue(dev, head);
1117
c5441932
PS
1118 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1119 struct ip_tunnel *t;
1120 struct hlist_node *n;
1121 struct hlist_head *thead = &itn->tunnels[h];
1122
1123 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
1124 /* If dev is in the same netns, it has already
1125 * been added to the list by the previous loop.
1126 */
1127 if (!net_eq(dev_net(t->dev), net))
1128 unregister_netdevice_queue(t->dev, head);
c5441932 1129 }
c5441932
PS
1130}
1131
64bc1781
ED
1132void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1133 struct rtnl_link_ops *ops)
c5441932 1134{
64bc1781
ED
1135 struct ip_tunnel_net *itn;
1136 struct net *net;
c5441932
PS
1137 LIST_HEAD(list);
1138
1139 rtnl_lock();
64bc1781
ED
1140 list_for_each_entry(net, net_list, exit_list) {
1141 itn = net_generic(net, id);
79134e6c 1142 ip_tunnel_destroy(net, itn, &list, ops);
64bc1781 1143 }
c5441932
PS
1144 unregister_netdevice_many(&list);
1145 rtnl_unlock();
c5441932 1146}
64bc1781 1147EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
c5441932
PS
1148
1149int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
9830ad4c 1150 struct ip_tunnel_parm *p, __u32 fwmark)
c5441932
PS
1151{
1152 struct ip_tunnel *nt;
1153 struct net *net = dev_net(dev);
1154 struct ip_tunnel_net *itn;
1155 int mtu;
1156 int err;
1157
1158 nt = netdev_priv(dev);
1159 itn = net_generic(net, nt->ip_tnl_net_id);
1160
2e15ea39
PS
1161 if (nt->collect_md) {
1162 if (rtnl_dereference(itn->collect_md_tun))
1163 return -EEXIST;
1164 } else {
1165 if (ip_tunnel_find(itn, p, dev->type))
1166 return -EEXIST;
1167 }
c5441932 1168
5e6700b3 1169 nt->net = net;
c5441932 1170 nt->parms = *p;
9830ad4c 1171 nt->fwmark = fwmark;
c5441932
PS
1172 err = register_netdevice(dev);
1173 if (err)
f6cc9c05 1174 goto err_register_netdevice;
c5441932
PS
1175
1176 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1177 eth_hw_addr_random(dev);
1178
1179 mtu = ip_tunnel_bind_dev(dev);
24fc7979 1180 if (tb[IFLA_MTU]) {
28e104d0 1181 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
24fc7979 1182
9992a078
HL
1183 if (dev->type == ARPHRD_ETHER)
1184 max -= dev->hard_header_len;
1185
28e104d0 1186 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
f6cc9c05 1187 }
c5441932 1188
5568cdc3
DM
1189 err = dev_set_mtu(dev, mtu);
1190 if (err)
1191 goto err_dev_set_mtu;
c5441932
PS
1192
1193 ip_tunnel_add(itn, nt);
f6cc9c05
PM
1194 return 0;
1195
1196err_dev_set_mtu:
1197 unregister_netdevice(dev);
1198err_register_netdevice:
c5441932
PS
1199 return err;
1200}
1201EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1202
1203int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
9830ad4c 1204 struct ip_tunnel_parm *p, __u32 fwmark)
c5441932 1205{
6c742e71 1206 struct ip_tunnel *t;
c5441932 1207 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1208 struct net *net = tunnel->net;
c5441932
PS
1209 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1210
1211 if (dev == itn->fb_tunnel_dev)
1212 return -EINVAL;
1213
c5441932
PS
1214 t = ip_tunnel_find(itn, p, dev->type);
1215
1216 if (t) {
1217 if (t->dev != dev)
1218 return -EEXIST;
1219 } else {
6c742e71 1220 t = tunnel;
c5441932
PS
1221
1222 if (dev->type != ARPHRD_ETHER) {
1223 unsigned int nflags = 0;
1224
1225 if (ipv4_is_multicast(p->iph.daddr))
1226 nflags = IFF_BROADCAST;
1227 else if (p->iph.daddr)
1228 nflags = IFF_POINTOPOINT;
1229
1230 if ((dev->flags ^ nflags) &
1231 (IFF_POINTOPOINT | IFF_BROADCAST))
1232 return -EINVAL;
1233 }
1234 }
1235
9830ad4c 1236 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
c5441932
PS
1237 return 0;
1238}
1239EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1240
1241int ip_tunnel_init(struct net_device *dev)
1242{
1243 struct ip_tunnel *tunnel = netdev_priv(dev);
1244 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1245 int err;
c5441932 1246
cf124db5
DM
1247 dev->needs_free_netdev = true;
1248 dev->priv_destructor = ip_tunnel_dev_free;
1c213bd2 1249 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1250 if (!dev->tstats)
1251 return -ENOMEM;
1252
e09acddf
PA
1253 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1254 if (err) {
9a4aa9af 1255 free_percpu(dev->tstats);
e09acddf 1256 return err;
9a4aa9af
TH
1257 }
1258
c5441932
PS
1259 err = gro_cells_init(&tunnel->gro_cells, dev);
1260 if (err) {
e09acddf 1261 dst_cache_destroy(&tunnel->dst_cache);
c5441932
PS
1262 free_percpu(dev->tstats);
1263 return err;
1264 }
1265
1266 tunnel->dev = dev;
6c742e71 1267 tunnel->net = dev_net(dev);
c5441932
PS
1268 strcpy(tunnel->parms.name, dev->name);
1269 iph->version = 4;
1270 iph->ihl = 5;
1271
d0f41851 1272 if (tunnel->collect_md)
2e15ea39 1273 netif_keep_dst(dev);
c5441932
PS
1274 return 0;
1275}
1276EXPORT_SYMBOL_GPL(ip_tunnel_init);
1277
1278void ip_tunnel_uninit(struct net_device *dev)
1279{
c5441932 1280 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1281 struct net *net = tunnel->net;
c5441932
PS
1282 struct ip_tunnel_net *itn;
1283
1284 itn = net_generic(net, tunnel->ip_tnl_net_id);
ba61539c
TY
1285 ip_tunnel_del(itn, netdev_priv(dev));
1286 if (itn->fb_tunnel_dev == dev)
1287 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
7d442fab 1288
e09acddf 1289 dst_cache_reset(&tunnel->dst_cache);
c5441932
PS
1290}
1291EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1292
1293/* Do least required initialization, rest of init is done in tunnel_init call */
c7d03a00 1294void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
c5441932
PS
1295{
1296 struct ip_tunnel *tunnel = netdev_priv(dev);
1297 tunnel->ip_tnl_net_id = net_id;
1298}
1299EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1300
1301MODULE_LICENSE("GPL");