Merge tag 'pm-6.16-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[linux-2.6-block.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c9422999 1// SPDX-License-Identifier: GPL-2.0-only
c5441932
PS
2/*
3 * Copyright (c) 2013 Nicira, Inc.
c5441932
PS
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/capability.h>
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/in.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19#include <linux/if_arp.h>
c5441932
PS
20#include <linux/init.h>
21#include <linux/in6.h>
22#include <linux/inetdevice.h>
23#include <linux/igmp.h>
24#include <linux/netfilter_ipv4.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/if_vlan.h>
28#include <linux/rculist.h>
27d79f3b 29#include <linux/err.h>
c5441932
PS
30
31#include <net/sock.h>
32#include <net/ip.h>
33#include <net/icmp.h>
34#include <net/protocol.h>
35#include <net/ip_tunnels.h>
36#include <net/arp.h>
37#include <net/checksum.h>
38#include <net/dsfield.h>
39#include <net/inet_ecn.h>
40#include <net/xfrm.h>
41#include <net/net_namespace.h>
42#include <net/netns/generic.h>
8ef890df 43#include <net/netdev_lock.h>
c5441932 44#include <net/rtnetlink.h>
56328486 45#include <net/udp.h>
cfc7381b 46#include <net/dst_metadata.h>
c34cfe72 47#include <net/inet_dscp.h>
63487bab 48
c5441932
PS
49#if IS_ENABLED(CONFIG_IPV6)
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#endif
54
967680e0 55static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
56{
57 return hash_32((__force u32)key ^ (__force u32)remote,
58 IP_TNL_HASH_BITS);
59}
60
117aef12 61static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
5832c4a7 62 const unsigned long *flags, __be32 key)
c5441932 63{
5832c4a7
AL
64 if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
65 return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
66
67 return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
c5441932
PS
68}
69
70/* Fallback tunnel: no source, no destination, no key, no options
71
72 Tunnel hash table:
73 We require exact key match i.e. if a key is present in packet
74 it will match only tunnel with the same key; if it is not present,
75 it will match only keyless tunnel.
76
77 All keysless packets, if not matched configured keyless tunnels
78 will match fallback tunnel.
79 Given src, dst and key, find appropriate for input tunnel.
80*/
81struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
5832c4a7 82 int link, const unsigned long *flags,
c5441932
PS
83 __be32 remote, __be32 local,
84 __be32 key)
85{
c5441932
PS
86 struct ip_tunnel *t, *cand = NULL;
87 struct hlist_head *head;
ba61539c
TY
88 struct net_device *ndev;
89 unsigned int hash;
c5441932 90
967680e0 91 hash = ip_tunnel_hash(key, remote);
c5441932
PS
92 head = &itn->tunnels[hash];
93
94 hlist_for_each_entry_rcu(t, head, hash_node) {
95 if (local != t->parms.iph.saddr ||
96 remote != t->parms.iph.daddr ||
97 !(t->dev->flags & IFF_UP))
98 continue;
99
100 if (!ip_tunnel_key_match(&t->parms, flags, key))
101 continue;
102
f694eee9 103 if (READ_ONCE(t->parms.link) == link)
c5441932 104 return t;
f694eee9 105 cand = t;
c5441932
PS
106 }
107
108 hlist_for_each_entry_rcu(t, head, hash_node) {
109 if (remote != t->parms.iph.daddr ||
e0056593 110 t->parms.iph.saddr != 0 ||
c5441932
PS
111 !(t->dev->flags & IFF_UP))
112 continue;
113
114 if (!ip_tunnel_key_match(&t->parms, flags, key))
115 continue;
116
f694eee9 117 if (READ_ONCE(t->parms.link) == link)
c5441932 118 return t;
f694eee9 119 if (!cand)
c5441932
PS
120 cand = t;
121 }
122
967680e0 123 hash = ip_tunnel_hash(key, 0);
c5441932
PS
124 head = &itn->tunnels[hash];
125
126 hlist_for_each_entry_rcu(t, head, hash_node) {
e0056593
DP
127 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
128 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
129 continue;
130
131 if (!(t->dev->flags & IFF_UP))
c5441932
PS
132 continue;
133
134 if (!ip_tunnel_key_match(&t->parms, flags, key))
135 continue;
136
f694eee9 137 if (READ_ONCE(t->parms.link) == link)
c5441932 138 return t;
f694eee9 139 if (!cand)
c5441932
PS
140 cand = t;
141 }
142
c5441932 143 hlist_for_each_entry_rcu(t, head, hash_node) {
5832c4a7
AL
144 if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
145 t->parms.i_key != key) ||
e0056593
DP
146 t->parms.iph.saddr != 0 ||
147 t->parms.iph.daddr != 0 ||
c5441932
PS
148 !(t->dev->flags & IFF_UP))
149 continue;
150
f694eee9 151 if (READ_ONCE(t->parms.link) == link)
c5441932 152 return t;
f694eee9 153 if (!cand)
c5441932
PS
154 cand = t;
155 }
156
c5441932
PS
157 if (cand)
158 return cand;
159
2e15ea39 160 t = rcu_dereference(itn->collect_md_tun);
833a8b40 161 if (t && t->dev->flags & IFF_UP)
2e15ea39
PS
162 return t;
163
ba61539c
TY
164 ndev = READ_ONCE(itn->fb_tunnel_dev);
165 if (ndev && ndev->flags & IFF_UP)
166 return netdev_priv(ndev);
c5441932 167
c5441932
PS
168 return NULL;
169}
170EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
171
172static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
117aef12 173 struct ip_tunnel_parm_kern *parms)
c5441932
PS
174{
175 unsigned int h;
176 __be32 remote;
6d608f06 177 __be32 i_key = parms->i_key;
c5441932
PS
178
179 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
180 remote = parms->iph.daddr;
181 else
182 remote = 0;
183
5832c4a7
AL
184 if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
185 test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
6d608f06
SK
186 i_key = 0;
187
188 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
189 return &itn->tunnels[h];
190}
191
192static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193{
194 struct hlist_head *head = ip_bucket(itn, &t->parms);
195
2e15ea39
PS
196 if (t->collect_md)
197 rcu_assign_pointer(itn->collect_md_tun, t);
c5441932
PS
198 hlist_add_head_rcu(&t->hash_node, head);
199}
200
2e15ea39 201static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
c5441932 202{
2e15ea39
PS
203 if (t->collect_md)
204 rcu_assign_pointer(itn->collect_md_tun, NULL);
c5441932
PS
205 hlist_del_init_rcu(&t->hash_node);
206}
207
208static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
117aef12 209 struct ip_tunnel_parm_kern *parms,
c5441932
PS
210 int type)
211{
212 __be32 remote = parms->iph.daddr;
213 __be32 local = parms->iph.saddr;
5832c4a7 214 IP_TUNNEL_DECLARE_FLAGS(flags);
c5441932
PS
215 __be32 key = parms->i_key;
216 int link = parms->link;
217 struct ip_tunnel *t = NULL;
218 struct hlist_head *head = ip_bucket(itn, parms);
219
5832c4a7
AL
220 ip_tunnel_flags_copy(flags, parms->i_flags);
221
90e0569d 222 hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
c5441932
PS
223 if (local == t->parms.iph.saddr &&
224 remote == t->parms.iph.daddr &&
f694eee9 225 link == READ_ONCE(t->parms.link) &&
5ce54af1
DP
226 type == t->dev->type &&
227 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
228 break;
229 }
230 return t;
231}
232
233static struct net_device *__ip_tunnel_create(struct net *net,
234 const struct rtnl_link_ops *ops,
117aef12 235 struct ip_tunnel_parm_kern *parms)
c5441932
PS
236{
237 int err;
238 struct ip_tunnel *tunnel;
239 struct net_device *dev;
240 char name[IFNAMSIZ];
241
9cb726a2
ED
242 err = -E2BIG;
243 if (parms->name[0]) {
244 if (!dev_valid_name(parms->name))
245 goto failed;
c2dbda07 246 strscpy(name, parms->name);
9cb726a2
ED
247 } else {
248 if (strlen(ops->kind) > (IFNAMSIZ - 3))
c5441932 249 goto failed;
c2dbda07 250 strscpy(name, ops->kind);
000ade80 251 strcat(name, "%d");
c5441932
PS
252 }
253
254 ASSERT_RTNL();
c835a677 255 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
c5441932
PS
256 if (!dev) {
257 err = -ENOMEM;
258 goto failed;
259 }
260 dev_net_set(dev, net);
261
262 dev->rtnl_link_ops = ops;
263
264 tunnel = netdev_priv(dev);
265 tunnel->parms = *parms;
5e6700b3 266 tunnel->net = net;
c5441932
PS
267
268 err = register_netdevice(dev);
269 if (err)
270 goto failed_free;
271
272 return dev;
273
274failed_free:
275 free_netdev(dev);
276failed:
277 return ERR_PTR(err);
278}
279
c5441932
PS
280static int ip_tunnel_bind_dev(struct net_device *dev)
281{
282 struct net_device *tdev = NULL;
283 struct ip_tunnel *tunnel = netdev_priv(dev);
284 const struct iphdr *iph;
285 int hlen = LL_MAX_HEADER;
286 int mtu = ETH_DATA_LEN;
287 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
288
289 iph = &tunnel->parms.iph;
290
291 /* Guess output device to choose reasonable mtu and needed_headroom */
292 if (iph->daddr) {
293 struct flowi4 fl4;
294 struct rtable *rt;
295
b0066da5
PM
296 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
297 iph->saddr, tunnel->parms.o_key,
b5a7b661 298 iph->tos & INET_DSCP_MASK, tunnel->net,
7ec9fce4 299 tunnel->parms.link, tunnel->fwmark, 0, 0);
7d442fab
TH
300 rt = ip_route_output_key(tunnel->net, &fl4);
301
c5441932
PS
302 if (!IS_ERR(rt)) {
303 tdev = rt->dst.dev;
304 ip_rt_put(rt);
305 }
306 if (dev->type != ARPHRD_ETHER)
307 dev->flags |= IFF_POINTOPOINT;
f27337e1
PA
308
309 dst_cache_reset(&tunnel->dst_cache);
c5441932
PS
310 }
311
312 if (!tdev && tunnel->parms.link)
6c742e71 313 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
314
315 if (tdev) {
316 hlen = tdev->hard_header_len + tdev->needed_headroom;
82612de1 317 mtu = min(tdev->mtu, IP_MAX_MTU);
c5441932 318 }
c5441932
PS
319
320 dev->needed_headroom = t_hlen + hlen;
9992a078 321 mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
c5441932 322
b5476022
ED
323 if (mtu < IPV4_MIN_MTU)
324 mtu = IPV4_MIN_MTU;
c5441932
PS
325
326 return mtu;
327}
328
329static struct ip_tunnel *ip_tunnel_create(struct net *net,
330 struct ip_tunnel_net *itn,
117aef12 331 struct ip_tunnel_parm_kern *parms)
c5441932 332{
4929fd8c 333 struct ip_tunnel *nt;
c5441932 334 struct net_device *dev;
b96f9afe 335 int t_hlen;
f6cc9c05
PM
336 int mtu;
337 int err;
c5441932 338
79134e6c 339 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
c5441932 340 if (IS_ERR(dev))
6dd3c9ec 341 return ERR_CAST(dev);
c5441932 342
f6cc9c05
PM
343 mtu = ip_tunnel_bind_dev(dev);
344 err = dev_set_mtu(dev, mtu);
345 if (err)
346 goto err_dev_set_mtu;
c5441932
PS
347
348 nt = netdev_priv(dev);
b96f9afe
JW
349 t_hlen = nt->hlen + sizeof(struct iphdr);
350 dev->min_mtu = ETH_MIN_MTU;
28e104d0 351 dev->max_mtu = IP_MAX_MTU - t_hlen;
9992a078
HL
352 if (dev->type == ARPHRD_ETHER)
353 dev->max_mtu -= dev->hard_header_len;
354
c5441932
PS
355 ip_tunnel_add(itn, nt);
356 return nt;
f6cc9c05
PM
357
358err_dev_set_mtu:
359 unregister_netdevice(dev);
360 return ERR_PTR(err);
c5441932
PS
361}
362
ac931d4c
CE
363void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
364{
365 const struct iphdr *iph = ip_hdr(skb);
366 const struct udphdr *udph;
367
368 if (iph->protocol != IPPROTO_UDP)
369 return;
370
371 udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
372 info->encap.sport = udph->source;
373 info->encap.dport = udph->dest;
374}
375EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
376
c5441932 377int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
2e15ea39
PS
378 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
379 bool log_ecn_error)
c5441932 380{
c5441932 381 const struct iphdr *iph = ip_hdr(skb);
b0ec2abf 382 int nh, err;
c5441932 383
c5441932
PS
384#ifdef CONFIG_NET_IPGRE_BROADCAST
385 if (ipv4_is_multicast(iph->daddr)) {
c4794d22 386 DEV_STATS_INC(tunnel->dev, multicast);
c5441932
PS
387 skb->pkt_type = PACKET_BROADCAST;
388 }
389#endif
390
5832c4a7
AL
391 if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
392 test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
c4794d22
ED
393 DEV_STATS_INC(tunnel->dev, rx_crc_errors);
394 DEV_STATS_INC(tunnel->dev, rx_errors);
c5441932
PS
395 goto drop;
396 }
397
5832c4a7
AL
398 if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
399 if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
c5441932 400 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
c4794d22
ED
401 DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
402 DEV_STATS_INC(tunnel->dev, rx_errors);
c5441932
PS
403 goto drop;
404 }
405 tunnel->i_seqno = ntohl(tpi->seq) + 1;
406 }
407
b0ec2abf
ED
408 /* Save offset of outer header relative to skb->head,
409 * because we are going to reset the network header to the inner header
410 * and might change skb->head.
411 */
412 nh = skb_network_header(skb) - skb->head;
413
227adfb2 414 skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
e96f2e7c 415
b0ec2abf
ED
416 if (!pskb_inet_may_pull(skb)) {
417 DEV_STATS_INC(tunnel->dev, rx_length_errors);
418 DEV_STATS_INC(tunnel->dev, rx_errors);
419 goto drop;
420 }
421 iph = (struct iphdr *)(skb->head + nh);
422
c5441932
PS
423 err = IP_ECN_decapsulate(iph, skb);
424 if (unlikely(err)) {
425 if (log_ecn_error)
426 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
427 &iph->saddr, iph->tos);
428 if (err > 1) {
c4794d22
ED
429 DEV_STATS_INC(tunnel->dev, rx_frame_errors);
430 DEV_STATS_INC(tunnel->dev, rx_errors);
c5441932
PS
431 goto drop;
432 }
433 }
434
560b50cf 435 dev_sw_netstats_rx_add(tunnel->dev, skb->len);
81b9eab5
AS
436 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
437
3d7b46cd
PS
438 if (tunnel->dev->type == ARPHRD_ETHER) {
439 skb->protocol = eth_type_trans(skb, tunnel->dev);
440 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
441 } else {
442 skb->dev = tunnel->dev;
443 }
64261f23 444
2e15ea39
PS
445 if (tun_dst)
446 skb_dst_set(skb, (struct dst_entry *)tun_dst);
447
c5441932
PS
448 gro_cells_receive(&tunnel->gro_cells, skb);
449 return 0;
450
451drop:
469f87e1
HY
452 if (tun_dst)
453 dst_release((struct dst_entry *)tun_dst);
c5441932
PS
454 kfree_skb(skb);
455 return 0;
456}
457EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
458
a8c5f90f
TH
459int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
460 unsigned int num)
461{
bb1553c8
TG
462 if (num >= MAX_IPTUN_ENCAP_OPS)
463 return -ERANGE;
464
a8c5f90f
TH
465 return !cmpxchg((const struct ip_tunnel_encap_ops **)
466 &iptun_encaps[num],
467 NULL, ops) ? 0 : -1;
56328486 468}
a8c5f90f
TH
469EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
470
471int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
472 unsigned int num)
473{
474 int ret;
475
bb1553c8
TG
476 if (num >= MAX_IPTUN_ENCAP_OPS)
477 return -ERANGE;
478
a8c5f90f
TH
479 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
480 &iptun_encaps[num],
481 ops, NULL) == ops) ? 0 : -1;
482
483 synchronize_net();
484
485 return ret;
486}
487EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
56328486
TH
488
489int ip_tunnel_encap_setup(struct ip_tunnel *t,
490 struct ip_tunnel_encap *ipencap)
491{
492 int hlen;
493
494 memset(&t->encap, 0, sizeof(t->encap));
495
496 hlen = ip_encap_hlen(ipencap);
497 if (hlen < 0)
498 return hlen;
499
500 t->encap.type = ipencap->type;
501 t->encap.sport = ipencap->sport;
502 t->encap.dport = ipencap->dport;
503 t->encap.flags = ipencap->flags;
504
505 t->encap_hlen = hlen;
506 t->hlen = t->encap_hlen + t->tun_hlen;
507
508 return 0;
509}
510EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
511
23a3647b 512static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
fc24f2b2 513 struct rtable *rt, __be16 df,
c8b34e68 514 const struct iphdr *inner_iph,
515 int tunnel_hlen, __be32 dst, bool md)
23a3647b
PS
516{
517 struct ip_tunnel *tunnel = netdev_priv(dev);
c8b34e68 518 int pkt_size;
23a3647b
PS
519 int mtu;
520
c8b34e68 521 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
28e104d0 522 pkt_size = skb->len - tunnel_hlen;
9992a078 523 pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
c8b34e68 524
9992a078 525 if (df) {
28e104d0 526 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
9992a078
HL
527 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
528 } else {
f4b3ec4e 529 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
9992a078 530 }
23a3647b 531
f4b3ec4e 532 if (skb_valid_dst(skb))
7a1592bc 533 skb_dst_update_pmtu_no_confirm(skb, mtu);
23a3647b
PS
534
535 if (skb->protocol == htons(ETH_P_IP)) {
536 if (!skb_is_gso(skb) &&
fc24f2b2
TT
537 (inner_iph->frag_off & htons(IP_DF)) &&
538 mtu < pkt_size) {
4372339e 539 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
23a3647b
PS
540 return -E2BIG;
541 }
542 }
543#if IS_ENABLED(CONFIG_IPV6)
544 else if (skb->protocol == htons(ETH_P_IPV6)) {
f4b3ec4e 545 struct rt6_info *rt6;
c8b34e68 546 __be32 daddr;
547
e8dfd42c 548 rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
f4b3ec4e 549 NULL;
c8b34e68 550 daddr = md ? dst : tunnel->parms.iph.daddr;
23a3647b
PS
551
552 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
553 mtu >= IPV6_MIN_MTU) {
c8b34e68 554 if ((daddr && !ipv4_is_multicast(daddr)) ||
23a3647b
PS
555 rt6->rt6i_dst.plen == 128) {
556 rt6->rt6i_flags |= RTF_MODIFIED;
557 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
558 }
559 }
560
561 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
562 mtu < pkt_size) {
4372339e 563 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
23a3647b
PS
564 return -E2BIG;
565 }
566 }
567#endif
568 return 0;
569}
570
5ae1e992
FW
571static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
572{
573 /* we must cap headroom to some upperlimit, else pskb_expand_head
574 * will overflow header offsets in skb_headers_offset_update().
575 */
576 static const unsigned int max_allowed = 512;
577
578 if (headroom > max_allowed)
579 headroom = max_allowed;
580
581 if (headroom > READ_ONCE(dev->needed_headroom))
582 WRITE_ONCE(dev->needed_headroom, headroom);
583}
584
c8b34e68 585void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
586 u8 proto, int tunnel_hlen)
cfc7381b
AS
587{
588 struct ip_tunnel *tunnel = netdev_priv(dev);
589 u32 headroom = sizeof(struct iphdr);
590 struct ip_tunnel_info *tun_info;
591 const struct ip_tunnel_key *key;
592 const struct iphdr *inner_iph;
f46fe4f8 593 struct rtable *rt = NULL;
cfc7381b
AS
594 struct flowi4 fl4;
595 __be16 df = 0;
596 u8 tos, ttl;
f46fe4f8 597 bool use_cache;
cfc7381b
AS
598
599 tun_info = skb_tunnel_info(skb);
600 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
601 ip_tunnel_info_af(tun_info) != AF_INET))
602 goto tx_error;
603 key = &tun_info->key;
604 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
605 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
606 tos = key->tos;
607 if (tos == 1) {
608 if (skb->protocol == htons(ETH_P_IP))
609 tos = inner_iph->tos;
610 else if (skb->protocol == htons(ETH_P_IPV6))
611 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
612 }
6e6b904a 613 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
c34cfe72 614 tunnel_id_to_key32(key->tun_id),
b5a7b661 615 tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark,
c34cfe72 616 skb_get_hash(skb), key->flow_flags);
ac931d4c
CE
617
618 if (!tunnel_hlen)
619 tunnel_hlen = ip_encap_hlen(&tun_info->encap);
620
621 if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
cfc7381b 622 goto tx_error;
f46fe4f8 623
624 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
625 if (use_cache)
626 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
627 if (!rt) {
628 rt = ip_route_output_key(tunnel->net, &fl4);
629 if (IS_ERR(rt)) {
c4794d22 630 DEV_STATS_INC(dev, tx_carrier_errors);
f46fe4f8 631 goto tx_error;
632 }
633 if (use_cache)
634 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
635 fl4.saddr);
cfc7381b
AS
636 }
637 if (rt->dst.dev == dev) {
638 ip_rt_put(rt);
c4794d22 639 DEV_STATS_INC(dev, collisions);
cfc7381b
AS
640 goto tx_error;
641 }
c8b34e68 642
5832c4a7 643 if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
c8b34e68 644 df = htons(IP_DF);
645 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
646 key->u.ipv4.dst, true)) {
647 ip_rt_put(rt);
648 goto tx_error;
649 }
650
cfc7381b
AS
651 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
652 ttl = key->ttl;
653 if (ttl == 0) {
654 if (skb->protocol == htons(ETH_P_IP))
655 ttl = inner_iph->ttl;
656 else if (skb->protocol == htons(ETH_P_IPV6))
657 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
658 else
659 ttl = ip4_dst_hoplimit(&rt->dst);
660 }
c8b34e68 661
cfc7381b 662 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
5ae1e992 663 if (skb_cow_head(skb, headroom)) {
cfc7381b
AS
664 ip_rt_put(rt);
665 goto tx_dropped;
666 }
5ae1e992
FW
667
668 ip_tunnel_adj_headroom(dev, headroom);
669
0f693f19
HY
670 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
671 df, !net_eq(tunnel->net, dev_net(dev)));
cfc7381b
AS
672 return;
673tx_error:
c4794d22 674 DEV_STATS_INC(dev, tx_errors);
cfc7381b
AS
675 goto kfree;
676tx_dropped:
c4794d22 677 DEV_STATS_INC(dev, tx_dropped);
cfc7381b
AS
678kfree:
679 kfree_skb(skb);
680}
681EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
682
c5441932 683void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
56328486 684 const struct iphdr *tnl_params, u8 protocol)
c5441932
PS
685{
686 struct ip_tunnel *tunnel = netdev_priv(dev);
186d9366 687 struct ip_tunnel_info *tun_info = NULL;
c5441932 688 const struct iphdr *inner_iph;
c5441932 689 unsigned int max_headroom; /* The extra header space needed */
186d9366 690 struct rtable *rt = NULL; /* Route to the other host */
7ae29fd1 691 __be16 payload_protocol;
186d9366 692 bool use_cache = false;
693 struct flowi4 fl4;
694 bool md = false;
22fb22ea 695 bool connected;
186d9366 696 u8 tos, ttl;
697 __be32 dst;
698 __be16 df;
c5441932
PS
699
700 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 701 connected = (tunnel->parms.iph.daddr != 0);
7ae29fd1 702 payload_protocol = skb_protocol(skb, true);
c5441932 703
5146d1f1
BH
704 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
705
c5441932
PS
706 dst = tnl_params->daddr;
707 if (dst == 0) {
708 /* NBMA tunnel */
709
51456b29 710 if (!skb_dst(skb)) {
c4794d22 711 DEV_STATS_INC(dev, tx_fifo_errors);
c5441932
PS
712 goto tx_error;
713 }
714
d71b5753 715 tun_info = skb_tunnel_info(skb);
716 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
717 ip_tunnel_info_af(tun_info) == AF_INET &&
186d9366 718 tun_info->key.u.ipv4.dst) {
d71b5753 719 dst = tun_info->key.u.ipv4.dst;
186d9366 720 md = true;
721 connected = true;
7ae29fd1 722 } else if (payload_protocol == htons(ETH_P_IP)) {
c5441932
PS
723 rt = skb_rtable(skb);
724 dst = rt_nexthop(rt, inner_iph->daddr);
725 }
726#if IS_ENABLED(CONFIG_IPV6)
7ae29fd1 727 else if (payload_protocol == htons(ETH_P_IPV6)) {
c5441932
PS
728 const struct in6_addr *addr6;
729 struct neighbour *neigh;
730 bool do_tx_error_icmp;
731 int addr_type;
732
733 neigh = dst_neigh_lookup(skb_dst(skb),
734 &ipv6_hdr(skb)->daddr);
51456b29 735 if (!neigh)
c5441932
PS
736 goto tx_error;
737
738 addr6 = (const struct in6_addr *)&neigh->primary_key;
739 addr_type = ipv6_addr_type(addr6);
740
741 if (addr_type == IPV6_ADDR_ANY) {
742 addr6 = &ipv6_hdr(skb)->daddr;
743 addr_type = ipv6_addr_type(addr6);
744 }
745
746 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
747 do_tx_error_icmp = true;
748 else {
749 do_tx_error_icmp = false;
750 dst = addr6->s6_addr32[3];
751 }
752 neigh_release(neigh);
753 if (do_tx_error_icmp)
754 goto tx_error_icmp;
755 }
756#endif
757 else
758 goto tx_error;
7d442fab 759
186d9366 760 if (!md)
761 connected = false;
c5441932
PS
762 }
763
764 tos = tnl_params->tos;
765 if (tos & 0x1) {
766 tos &= ~0x1;
7ae29fd1 767 if (payload_protocol == htons(ETH_P_IP)) {
c5441932 768 tos = inner_iph->tos;
7d442fab 769 connected = false;
7ae29fd1 770 } else if (payload_protocol == htons(ETH_P_IPV6)) {
c5441932 771 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
772 connected = false;
773 }
c5441932
PS
774 }
775
0f3e9c97 776 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
c2b639f9 777 tunnel->parms.o_key, tos & INET_DSCP_MASK,
b5a7b661 778 tunnel->net, READ_ONCE(tunnel->parms.link),
7ec9fce4 779 tunnel->fwmark, skb_get_hash(skb), 0);
7d442fab 780
ac931d4c 781 if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
56328486
TH
782 goto tx_error;
783
186d9366 784 if (connected && md) {
785 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
786 if (use_cache)
787 rt = dst_cache_get_ip4(&tun_info->dst_cache,
788 &fl4.saddr);
789 } else {
790 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
791 &fl4.saddr) : NULL;
792 }
7d442fab
TH
793
794 if (!rt) {
795 rt = ip_route_output_key(tunnel->net, &fl4);
796
797 if (IS_ERR(rt)) {
c4794d22 798 DEV_STATS_INC(dev, tx_carrier_errors);
7d442fab
TH
799 goto tx_error;
800 }
186d9366 801 if (use_cache)
802 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
803 fl4.saddr);
804 else if (!md && connected)
e09acddf
PA
805 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
806 fl4.saddr);
c5441932 807 }
7d442fab 808
0e6fbc5b 809 if (rt->dst.dev == dev) {
c5441932 810 ip_rt_put(rt);
c4794d22 811 DEV_STATS_INC(dev, collisions);
c5441932
PS
812 goto tx_error;
813 }
c5441932 814
50c66167 815 df = tnl_params->frag_off;
7ae29fd1 816 if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
50c66167
FW
817 df |= (inner_iph->frag_off & htons(IP_DF));
818
819 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
23a3647b
PS
820 ip_rt_put(rt);
821 goto tx_error;
c5441932 822 }
c5441932
PS
823
824 if (tunnel->err_count > 0) {
825 if (time_before(jiffies,
826 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
827 tunnel->err_count--;
828
829 dst_link_failure(skb);
830 } else
831 tunnel->err_count = 0;
832 }
833
d4a71b15 834 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
835 ttl = tnl_params->ttl;
836 if (ttl == 0) {
7ae29fd1 837 if (payload_protocol == htons(ETH_P_IP))
c5441932
PS
838 ttl = inner_iph->ttl;
839#if IS_ENABLED(CONFIG_IPV6)
7ae29fd1 840 else if (payload_protocol == htons(ETH_P_IPV6))
c5441932
PS
841 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
842#endif
843 else
844 ttl = ip4_dst_hoplimit(&rt->dst);
845 }
846
0e6fbc5b 847 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
7371e022 848 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
3e08f4a7 849
5ae1e992 850 if (skb_cow_head(skb, max_headroom)) {
586d5fc8 851 ip_rt_put(rt);
c4794d22 852 DEV_STATS_INC(dev, tx_dropped);
3acfa1e7 853 kfree_skb(skb);
3e08f4a7 854 return;
c5441932
PS
855 }
856
5ae1e992
FW
857 ip_tunnel_adj_headroom(dev, max_headroom);
858
039f5062
PS
859 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
860 df, !net_eq(tunnel->net, dev_net(dev)));
c5441932
PS
861 return;
862
863#if IS_ENABLED(CONFIG_IPV6)
864tx_error_icmp:
865 dst_link_failure(skb);
866#endif
867tx_error:
c4794d22 868 DEV_STATS_INC(dev, tx_errors);
3acfa1e7 869 kfree_skb(skb);
c5441932
PS
870}
871EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
872
873static void ip_tunnel_update(struct ip_tunnel_net *itn,
874 struct ip_tunnel *t,
875 struct net_device *dev,
117aef12 876 struct ip_tunnel_parm_kern *p,
9830ad4c
CG
877 bool set_mtu,
878 __u32 fwmark)
c5441932 879{
2e15ea39 880 ip_tunnel_del(itn, t);
c5441932
PS
881 t->parms.iph.saddr = p->iph.saddr;
882 t->parms.iph.daddr = p->iph.daddr;
883 t->parms.i_key = p->i_key;
884 t->parms.o_key = p->o_key;
885 if (dev->type != ARPHRD_ETHER) {
5a1b7e1a 886 __dev_addr_set(dev, &p->iph.saddr, 4);
c5441932
PS
887 memcpy(dev->broadcast, &p->iph.daddr, 4);
888 }
889 ip_tunnel_add(itn, t);
890
891 t->parms.iph.ttl = p->iph.ttl;
892 t->parms.iph.tos = p->iph.tos;
893 t->parms.iph.frag_off = p->iph.frag_off;
894
9830ad4c 895 if (t->parms.link != p->link || t->fwmark != fwmark) {
c5441932
PS
896 int mtu;
897
f694eee9 898 WRITE_ONCE(t->parms.link, p->link);
9830ad4c 899 t->fwmark = fwmark;
c5441932
PS
900 mtu = ip_tunnel_bind_dev(dev);
901 if (set_mtu)
1eb2cded 902 WRITE_ONCE(dev->mtu, mtu);
c5441932 903 }
e09acddf 904 dst_cache_reset(&t->dst_cache);
c5441932
PS
905 netdev_state_change(dev);
906}
907
117aef12
AL
908int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
909 int cmd)
c5441932
PS
910{
911 int err = 0;
8c923ce2
ND
912 struct ip_tunnel *t = netdev_priv(dev);
913 struct net *net = t->net;
914 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932 915
c5441932
PS
916 switch (cmd) {
917 case SIOCGETTUNNEL:
8c923ce2 918 if (dev == itn->fb_tunnel_dev) {
c5441932 919 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 920 if (!t)
8c923ce2
ND
921 t = netdev_priv(dev);
922 }
c5441932
PS
923 memcpy(p, &t->parms, sizeof(*p));
924 break;
925
926 case SIOCADDTUNNEL:
927 case SIOCCHGTUNNEL:
928 err = -EPERM;
929 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
930 goto done;
931 if (p->iph.ttl)
932 p->iph.frag_off |= htons(IP_DF);
5832c4a7
AL
933 if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
934 if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
7c8e6b9c 935 p->i_key = 0;
5832c4a7 936 if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
7c8e6b9c
DP
937 p->o_key = 0;
938 }
c5441932 939
79134e6c 940 t = ip_tunnel_find(itn, p, itn->type);
c5441932 941
d61746b2
SK
942 if (cmd == SIOCADDTUNNEL) {
943 if (!t) {
944 t = ip_tunnel_create(net, itn, p);
945 err = PTR_ERR_OR_ZERO(t);
946 break;
947 }
948
949 err = -EEXIST;
ee30ef4d 950 break;
6dd3c9ec 951 }
c5441932 952 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
00db4124 953 if (t) {
c5441932
PS
954 if (t->dev != dev) {
955 err = -EEXIST;
956 break;
957 }
958 } else {
959 unsigned int nflags = 0;
960
961 if (ipv4_is_multicast(p->iph.daddr))
962 nflags = IFF_BROADCAST;
963 else if (p->iph.daddr)
964 nflags = IFF_POINTOPOINT;
965
966 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
967 err = -EINVAL;
968 break;
969 }
970
971 t = netdev_priv(dev);
972 }
973 }
974
975 if (t) {
976 err = 0;
9830ad4c 977 ip_tunnel_update(itn, t, dev, p, true, 0);
6dd3c9ec
FW
978 } else {
979 err = -ENOENT;
980 }
c5441932
PS
981 break;
982
983 case SIOCDELTUNNEL:
984 err = -EPERM;
985 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
986 goto done;
987
988 if (dev == itn->fb_tunnel_dev) {
989 err = -ENOENT;
990 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
51456b29 991 if (!t)
c5441932
PS
992 goto done;
993 err = -EPERM;
994 if (t == netdev_priv(itn->fb_tunnel_dev))
995 goto done;
996 dev = t->dev;
997 }
998 unregister_netdevice(dev);
999 err = 0;
1000 break;
1001
1002 default:
1003 err = -EINVAL;
1004 }
1005
1006done:
1007 return err;
1008}
607259a6
CH
1009EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
1010
117aef12
AL
1011bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
1012 const void __user *data)
1013{
1014 struct ip_tunnel_parm p;
1015
1016 if (copy_from_user(&p, data, sizeof(p)))
1017 return false;
1018
1019 strscpy(kp->name, p.name);
1020 kp->link = p.link;
5832c4a7
AL
1021 ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
1022 ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
117aef12
AL
1023 kp->i_key = p.i_key;
1024 kp->o_key = p.o_key;
1025 memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
1026
1027 return true;
1028}
1029EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
1030
1031bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
1032{
1033 struct ip_tunnel_parm p;
1034
5832c4a7
AL
1035 if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
1036 !ip_tunnel_flags_is_be16_compat(kp->o_flags))
1037 return false;
1038
5a66cda5
AL
1039 memset(&p, 0, sizeof(p));
1040
117aef12
AL
1041 strscpy(p.name, kp->name);
1042 p.link = kp->link;
5832c4a7
AL
1043 p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
1044 p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
117aef12
AL
1045 p.i_key = kp->i_key;
1046 p.o_key = kp->o_key;
1047 memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
1048
1049 return !copy_to_user(data, &p, sizeof(p));
1050}
1051EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
1052
3e7a1c7c
AB
1053int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1054 void __user *data, int cmd)
607259a6 1055{
117aef12 1056 struct ip_tunnel_parm_kern p;
607259a6
CH
1057 int err;
1058
117aef12 1059 if (!ip_tunnel_parm_from_user(&p, data))
607259a6
CH
1060 return -EFAULT;
1061 err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
117aef12 1062 if (!err && !ip_tunnel_parm_to_user(data, &p))
607259a6
CH
1063 return -EFAULT;
1064 return err;
1065}
3e7a1c7c 1066EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
c5441932 1067
7e059158 1068int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
c5441932
PS
1069{
1070 struct ip_tunnel *tunnel = netdev_priv(dev);
1071 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
28e104d0 1072 int max_mtu = IP_MAX_MTU - t_hlen;
c5441932 1073
9992a078
HL
1074 if (dev->type == ARPHRD_ETHER)
1075 max_mtu -= dev->hard_header_len;
1076
b96f9afe 1077 if (new_mtu < ETH_MIN_MTU)
c5441932 1078 return -EINVAL;
7e059158
DW
1079
1080 if (new_mtu > max_mtu) {
1081 if (strict)
1082 return -EINVAL;
1083
1084 new_mtu = max_mtu;
1085 }
1086
1eb2cded 1087 WRITE_ONCE(dev->mtu, new_mtu);
c5441932
PS
1088 return 0;
1089}
7e059158
DW
1090EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1091
1092int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1093{
1094 return __ip_tunnel_change_mtu(dev, new_mtu, true);
1095}
c5441932
PS
1096EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1097
1098static void ip_tunnel_dev_free(struct net_device *dev)
1099{
1100 struct ip_tunnel *tunnel = netdev_priv(dev);
1101
1102 gro_cells_destroy(&tunnel->gro_cells);
e09acddf 1103 dst_cache_destroy(&tunnel->dst_cache);
c5441932
PS
1104}
1105
1106void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1107{
c5441932
PS
1108 struct ip_tunnel *tunnel = netdev_priv(dev);
1109 struct ip_tunnel_net *itn;
1110
6c742e71 1111 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
1112
1113 if (itn->fb_tunnel_dev != dev) {
2e15ea39 1114 ip_tunnel_del(itn, netdev_priv(dev));
c5441932
PS
1115 unregister_netdevice_queue(dev, head);
1116 }
1117}
1118EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1119
1728d4fa
ND
1120struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1121{
1122 struct ip_tunnel *tunnel = netdev_priv(dev);
1123
9cf621bd 1124 return READ_ONCE(tunnel->net);
1728d4fa
ND
1125}
1126EXPORT_SYMBOL(ip_tunnel_get_link_net);
1127
1e99584b
ND
1128int ip_tunnel_get_iflink(const struct net_device *dev)
1129{
f694eee9 1130 const struct ip_tunnel *tunnel = netdev_priv(dev);
1e99584b 1131
f694eee9 1132 return READ_ONCE(tunnel->parms.link);
1e99584b
ND
1133}
1134EXPORT_SYMBOL(ip_tunnel_get_iflink);
1135
c7d03a00 1136int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
c5441932
PS
1137 struct rtnl_link_ops *ops, char *devname)
1138{
1139 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
117aef12 1140 struct ip_tunnel_parm_kern parms;
6261d983 1141 unsigned int i;
c5441932 1142
79134e6c 1143 itn->rtnl_link_ops = ops;
6261d983 1144 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1145 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932 1146
79134e6c
ED
1147 if (!ops || !net_has_fallback_tunnels(net)) {
1148 struct ip_tunnel_net *it_init_net;
1149
1150 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1151 itn->type = it_init_net->type;
c5441932
PS
1152 itn->fb_tunnel_dev = NULL;
1153 return 0;
1154 }
6261d983 1155
c5441932
PS
1156 memset(&parms, 0, sizeof(parms));
1157 if (devname)
512b2dc4 1158 strscpy(parms.name, devname, IFNAMSIZ);
c5441932
PS
1159
1160 rtnl_lock();
1161 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
1162 /* FB netdevice is special: we have one, and only one per netns.
1163 * Allowing to move it to another netns is clearly unsafe.
1164 */
67013282 1165 if (!IS_ERR(itn->fb_tunnel_dev)) {
0c493da8 1166 itn->fb_tunnel_dev->netns_immutable = true;
78ff4be4 1167 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282 1168 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
79134e6c 1169 itn->type = itn->fb_tunnel_dev->type;
67013282 1170 }
b4de77ad 1171 rtnl_unlock();
c5441932 1172
27d79f3b 1173 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
1174}
1175EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1176
a967e01e
KI
1177void ip_tunnel_delete_net(struct net *net, unsigned int id,
1178 struct rtnl_link_ops *ops,
1179 struct list_head *head)
c5441932 1180{
a967e01e 1181 struct ip_tunnel_net *itn = net_generic(net, id);
6c742e71 1182 struct net_device *dev, *aux;
c5441932
PS
1183 int h;
1184
a967e01e
KI
1185 ASSERT_RTNL_NET(net);
1186
6c742e71
ND
1187 for_each_netdev_safe(net, dev, aux)
1188 if (dev->rtnl_link_ops == ops)
1189 unregister_netdevice_queue(dev, head);
1190
c5441932
PS
1191 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1192 struct ip_tunnel *t;
1193 struct hlist_node *n;
1194 struct hlist_head *thead = &itn->tunnels[h];
1195
1196 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
1197 /* If dev is in the same netns, it has already
1198 * been added to the list by the previous loop.
1199 */
1200 if (!net_eq(dev_net(t->dev), net))
1201 unregister_netdevice_queue(t->dev, head);
c5441932 1202 }
c5441932 1203}
a967e01e 1204EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
c5441932 1205
eacb1160
XL
1206int ip_tunnel_newlink(struct net *net, struct net_device *dev,
1207 struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
1208 __u32 fwmark)
c5441932
PS
1209{
1210 struct ip_tunnel *nt;
c5441932
PS
1211 struct ip_tunnel_net *itn;
1212 int mtu;
1213 int err;
1214
1215 nt = netdev_priv(dev);
1216 itn = net_generic(net, nt->ip_tnl_net_id);
1217
2e15ea39
PS
1218 if (nt->collect_md) {
1219 if (rtnl_dereference(itn->collect_md_tun))
1220 return -EEXIST;
1221 } else {
1222 if (ip_tunnel_find(itn, p, dev->type))
1223 return -EEXIST;
1224 }
c5441932 1225
5e6700b3 1226 nt->net = net;
c5441932 1227 nt->parms = *p;
9830ad4c 1228 nt->fwmark = fwmark;
c5441932
PS
1229 err = register_netdevice(dev);
1230 if (err)
f6cc9c05 1231 goto err_register_netdevice;
c5441932
PS
1232
1233 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1234 eth_hw_addr_random(dev);
1235
1236 mtu = ip_tunnel_bind_dev(dev);
24fc7979 1237 if (tb[IFLA_MTU]) {
28e104d0 1238 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
24fc7979 1239
9992a078
HL
1240 if (dev->type == ARPHRD_ETHER)
1241 max -= dev->hard_header_len;
1242
28e104d0 1243 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
f6cc9c05 1244 }
c5441932 1245
5568cdc3
DM
1246 err = dev_set_mtu(dev, mtu);
1247 if (err)
1248 goto err_dev_set_mtu;
c5441932
PS
1249
1250 ip_tunnel_add(itn, nt);
f6cc9c05
PM
1251 return 0;
1252
1253err_dev_set_mtu:
1254 unregister_netdevice(dev);
1255err_register_netdevice:
c5441932
PS
1256 return err;
1257}
1258EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1259
1260int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
117aef12 1261 struct ip_tunnel_parm_kern *p, __u32 fwmark)
c5441932 1262{
6c742e71 1263 struct ip_tunnel *t;
c5441932 1264 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1265 struct net *net = tunnel->net;
c5441932
PS
1266 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1267
1268 if (dev == itn->fb_tunnel_dev)
1269 return -EINVAL;
1270
c5441932
PS
1271 t = ip_tunnel_find(itn, p, dev->type);
1272
1273 if (t) {
1274 if (t->dev != dev)
1275 return -EEXIST;
1276 } else {
6c742e71 1277 t = tunnel;
c5441932
PS
1278
1279 if (dev->type != ARPHRD_ETHER) {
1280 unsigned int nflags = 0;
1281
1282 if (ipv4_is_multicast(p->iph.daddr))
1283 nflags = IFF_BROADCAST;
1284 else if (p->iph.daddr)
1285 nflags = IFF_POINTOPOINT;
1286
1287 if ((dev->flags ^ nflags) &
1288 (IFF_POINTOPOINT | IFF_BROADCAST))
1289 return -EINVAL;
1290 }
1291 }
1292
9830ad4c 1293 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
c5441932
PS
1294 return 0;
1295}
1296EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1297
1298int ip_tunnel_init(struct net_device *dev)
1299{
1300 struct ip_tunnel *tunnel = netdev_priv(dev);
1301 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1302 int err;
c5441932 1303
cf124db5
DM
1304 dev->needs_free_netdev = true;
1305 dev->priv_destructor = ip_tunnel_dev_free;
45403b12 1306 dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
c5441932 1307
e09acddf 1308 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
45403b12 1309 if (err)
e09acddf 1310 return err;
9a4aa9af 1311
c5441932
PS
1312 err = gro_cells_init(&tunnel->gro_cells, dev);
1313 if (err) {
e09acddf 1314 dst_cache_destroy(&tunnel->dst_cache);
c5441932
PS
1315 return err;
1316 }
1317
1318 tunnel->dev = dev;
82183b03 1319 strscpy(tunnel->parms.name, dev->name);
c5441932
PS
1320 iph->version = 4;
1321 iph->ihl = 5;
1322
d0f41851 1323 if (tunnel->collect_md)
2e15ea39 1324 netif_keep_dst(dev);
0bef5120 1325 netdev_lockdep_set_classes(dev);
c5441932
PS
1326 return 0;
1327}
1328EXPORT_SYMBOL_GPL(ip_tunnel_init);
1329
1330void ip_tunnel_uninit(struct net_device *dev)
1331{
c5441932 1332 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1333 struct net *net = tunnel->net;
c5441932
PS
1334 struct ip_tunnel_net *itn;
1335
1336 itn = net_generic(net, tunnel->ip_tnl_net_id);
ba61539c
TY
1337 ip_tunnel_del(itn, netdev_priv(dev));
1338 if (itn->fb_tunnel_dev == dev)
1339 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
7d442fab 1340
e09acddf 1341 dst_cache_reset(&tunnel->dst_cache);
c5441932
PS
1342}
1343EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1344
1345/* Do least required initialization, rest of init is done in tunnel_init call */
c7d03a00 1346void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
c5441932
PS
1347{
1348 struct ip_tunnel *tunnel = netdev_priv(dev);
1349 tunnel->ip_tnl_net_id = net_id;
1350}
1351EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1352
b058a5d2 1353MODULE_DESCRIPTION("IPv4 tunnel implementation library");
c5441932 1354MODULE_LICENSE("GPL");