Commit | Line | Data |
---|---|---|
c9422999 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
c5441932 PS |
2 | /* |
3 | * Copyright (c) 2013 Nicira, Inc. | |
c5441932 PS |
4 | */ |
5 | ||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | ||
8 | #include <linux/capability.h> | |
9 | #include <linux/module.h> | |
10 | #include <linux/types.h> | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/slab.h> | |
13 | #include <linux/uaccess.h> | |
14 | #include <linux/skbuff.h> | |
15 | #include <linux/netdevice.h> | |
16 | #include <linux/in.h> | |
17 | #include <linux/tcp.h> | |
18 | #include <linux/udp.h> | |
19 | #include <linux/if_arp.h> | |
c5441932 PS |
20 | #include <linux/init.h> |
21 | #include <linux/in6.h> | |
22 | #include <linux/inetdevice.h> | |
23 | #include <linux/igmp.h> | |
24 | #include <linux/netfilter_ipv4.h> | |
25 | #include <linux/etherdevice.h> | |
26 | #include <linux/if_ether.h> | |
27 | #include <linux/if_vlan.h> | |
28 | #include <linux/rculist.h> | |
27d79f3b | 29 | #include <linux/err.h> |
c5441932 PS |
30 | |
31 | #include <net/sock.h> | |
32 | #include <net/ip.h> | |
33 | #include <net/icmp.h> | |
34 | #include <net/protocol.h> | |
35 | #include <net/ip_tunnels.h> | |
36 | #include <net/arp.h> | |
37 | #include <net/checksum.h> | |
38 | #include <net/dsfield.h> | |
39 | #include <net/inet_ecn.h> | |
40 | #include <net/xfrm.h> | |
41 | #include <net/net_namespace.h> | |
42 | #include <net/netns/generic.h> | |
43 | #include <net/rtnetlink.h> | |
56328486 | 44 | #include <net/udp.h> |
cfc7381b | 45 | #include <net/dst_metadata.h> |
63487bab | 46 | |
c5441932 PS |
47 | #if IS_ENABLED(CONFIG_IPV6) |
48 | #include <net/ipv6.h> | |
49 | #include <net/ip6_fib.h> | |
50 | #include <net/ip6_route.h> | |
51 | #endif | |
52 | ||
967680e0 | 53 | static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) |
c5441932 PS |
54 | { |
55 | return hash_32((__force u32)key ^ (__force u32)remote, | |
56 | IP_TNL_HASH_BITS); | |
57 | } | |
58 | ||
c5441932 PS |
59 | static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, |
60 | __be16 flags, __be32 key) | |
61 | { | |
62 | if (p->i_flags & TUNNEL_KEY) { | |
63 | if (flags & TUNNEL_KEY) | |
64 | return key == p->i_key; | |
65 | else | |
66 | /* key expected, none present */ | |
67 | return false; | |
68 | } else | |
69 | return !(flags & TUNNEL_KEY); | |
70 | } | |
71 | ||
72 | /* Fallback tunnel: no source, no destination, no key, no options | |
73 | ||
74 | Tunnel hash table: | |
75 | We require exact key match i.e. if a key is present in packet | |
76 | it will match only tunnel with the same key; if it is not present, | |
77 | it will match only keyless tunnel. | |
78 | ||
79 | All keysless packets, if not matched configured keyless tunnels | |
80 | will match fallback tunnel. | |
81 | Given src, dst and key, find appropriate for input tunnel. | |
82 | */ | |
83 | struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, | |
84 | int link, __be16 flags, | |
85 | __be32 remote, __be32 local, | |
86 | __be32 key) | |
87 | { | |
c5441932 PS |
88 | struct ip_tunnel *t, *cand = NULL; |
89 | struct hlist_head *head; | |
ba61539c TY |
90 | struct net_device *ndev; |
91 | unsigned int hash; | |
c5441932 | 92 | |
967680e0 | 93 | hash = ip_tunnel_hash(key, remote); |
c5441932 PS |
94 | head = &itn->tunnels[hash]; |
95 | ||
96 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
97 | if (local != t->parms.iph.saddr || | |
98 | remote != t->parms.iph.daddr || | |
99 | !(t->dev->flags & IFF_UP)) | |
100 | continue; | |
101 | ||
102 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
103 | continue; | |
104 | ||
105 | if (t->parms.link == link) | |
106 | return t; | |
107 | else | |
108 | cand = t; | |
109 | } | |
110 | ||
111 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
112 | if (remote != t->parms.iph.daddr || | |
e0056593 | 113 | t->parms.iph.saddr != 0 || |
c5441932 PS |
114 | !(t->dev->flags & IFF_UP)) |
115 | continue; | |
116 | ||
117 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
118 | continue; | |
119 | ||
120 | if (t->parms.link == link) | |
121 | return t; | |
122 | else if (!cand) | |
123 | cand = t; | |
124 | } | |
125 | ||
967680e0 | 126 | hash = ip_tunnel_hash(key, 0); |
c5441932 PS |
127 | head = &itn->tunnels[hash]; |
128 | ||
129 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
e0056593 DP |
130 | if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && |
131 | (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) | |
132 | continue; | |
133 | ||
134 | if (!(t->dev->flags & IFF_UP)) | |
c5441932 PS |
135 | continue; |
136 | ||
137 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
138 | continue; | |
139 | ||
140 | if (t->parms.link == link) | |
141 | return t; | |
142 | else if (!cand) | |
143 | cand = t; | |
144 | } | |
145 | ||
c5441932 | 146 | hlist_for_each_entry_rcu(t, head, hash_node) { |
25629fda | 147 | if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) || |
e0056593 DP |
148 | t->parms.iph.saddr != 0 || |
149 | t->parms.iph.daddr != 0 || | |
c5441932 PS |
150 | !(t->dev->flags & IFF_UP)) |
151 | continue; | |
152 | ||
153 | if (t->parms.link == link) | |
154 | return t; | |
155 | else if (!cand) | |
156 | cand = t; | |
157 | } | |
158 | ||
c5441932 PS |
159 | if (cand) |
160 | return cand; | |
161 | ||
2e15ea39 | 162 | t = rcu_dereference(itn->collect_md_tun); |
833a8b40 | 163 | if (t && t->dev->flags & IFF_UP) |
2e15ea39 PS |
164 | return t; |
165 | ||
ba61539c TY |
166 | ndev = READ_ONCE(itn->fb_tunnel_dev); |
167 | if (ndev && ndev->flags & IFF_UP) | |
168 | return netdev_priv(ndev); | |
c5441932 | 169 | |
c5441932 PS |
170 | return NULL; |
171 | } | |
172 | EXPORT_SYMBOL_GPL(ip_tunnel_lookup); | |
173 | ||
174 | static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, | |
175 | struct ip_tunnel_parm *parms) | |
176 | { | |
177 | unsigned int h; | |
178 | __be32 remote; | |
6d608f06 | 179 | __be32 i_key = parms->i_key; |
c5441932 PS |
180 | |
181 | if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) | |
182 | remote = parms->iph.daddr; | |
183 | else | |
184 | remote = 0; | |
185 | ||
6d608f06 SK |
186 | if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) |
187 | i_key = 0; | |
188 | ||
189 | h = ip_tunnel_hash(i_key, remote); | |
c5441932 PS |
190 | return &itn->tunnels[h]; |
191 | } | |
192 | ||
193 | static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) | |
194 | { | |
195 | struct hlist_head *head = ip_bucket(itn, &t->parms); | |
196 | ||
2e15ea39 PS |
197 | if (t->collect_md) |
198 | rcu_assign_pointer(itn->collect_md_tun, t); | |
c5441932 PS |
199 | hlist_add_head_rcu(&t->hash_node, head); |
200 | } | |
201 | ||
2e15ea39 | 202 | static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) |
c5441932 | 203 | { |
2e15ea39 PS |
204 | if (t->collect_md) |
205 | rcu_assign_pointer(itn->collect_md_tun, NULL); | |
c5441932 PS |
206 | hlist_del_init_rcu(&t->hash_node); |
207 | } | |
208 | ||
209 | static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, | |
210 | struct ip_tunnel_parm *parms, | |
211 | int type) | |
212 | { | |
213 | __be32 remote = parms->iph.daddr; | |
214 | __be32 local = parms->iph.saddr; | |
215 | __be32 key = parms->i_key; | |
5ce54af1 | 216 | __be16 flags = parms->i_flags; |
c5441932 PS |
217 | int link = parms->link; |
218 | struct ip_tunnel *t = NULL; | |
219 | struct hlist_head *head = ip_bucket(itn, parms); | |
220 | ||
221 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
222 | if (local == t->parms.iph.saddr && | |
223 | remote == t->parms.iph.daddr && | |
c5441932 | 224 | link == t->parms.link && |
5ce54af1 DP |
225 | type == t->dev->type && |
226 | ip_tunnel_key_match(&t->parms, flags, key)) | |
c5441932 PS |
227 | break; |
228 | } | |
229 | return t; | |
230 | } | |
231 | ||
232 | static struct net_device *__ip_tunnel_create(struct net *net, | |
233 | const struct rtnl_link_ops *ops, | |
234 | struct ip_tunnel_parm *parms) | |
235 | { | |
236 | int err; | |
237 | struct ip_tunnel *tunnel; | |
238 | struct net_device *dev; | |
239 | char name[IFNAMSIZ]; | |
240 | ||
9cb726a2 ED |
241 | err = -E2BIG; |
242 | if (parms->name[0]) { | |
243 | if (!dev_valid_name(parms->name)) | |
244 | goto failed; | |
512b2dc4 | 245 | strscpy(name, parms->name, IFNAMSIZ); |
9cb726a2 ED |
246 | } else { |
247 | if (strlen(ops->kind) > (IFNAMSIZ - 3)) | |
c5441932 | 248 | goto failed; |
000ade80 SA |
249 | strcpy(name, ops->kind); |
250 | strcat(name, "%d"); | |
c5441932 PS |
251 | } |
252 | ||
253 | ASSERT_RTNL(); | |
c835a677 | 254 | dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); |
c5441932 PS |
255 | if (!dev) { |
256 | err = -ENOMEM; | |
257 | goto failed; | |
258 | } | |
259 | dev_net_set(dev, net); | |
260 | ||
261 | dev->rtnl_link_ops = ops; | |
262 | ||
263 | tunnel = netdev_priv(dev); | |
264 | tunnel->parms = *parms; | |
5e6700b3 | 265 | tunnel->net = net; |
c5441932 PS |
266 | |
267 | err = register_netdevice(dev); | |
268 | if (err) | |
269 | goto failed_free; | |
270 | ||
271 | return dev; | |
272 | ||
273 | failed_free: | |
274 | free_netdev(dev); | |
275 | failed: | |
276 | return ERR_PTR(err); | |
277 | } | |
278 | ||
c5441932 PS |
279 | static int ip_tunnel_bind_dev(struct net_device *dev) |
280 | { | |
281 | struct net_device *tdev = NULL; | |
282 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
283 | const struct iphdr *iph; | |
284 | int hlen = LL_MAX_HEADER; | |
285 | int mtu = ETH_DATA_LEN; | |
286 | int t_hlen = tunnel->hlen + sizeof(struct iphdr); | |
287 | ||
288 | iph = &tunnel->parms.iph; | |
289 | ||
290 | /* Guess output device to choose reasonable mtu and needed_headroom */ | |
291 | if (iph->daddr) { | |
292 | struct flowi4 fl4; | |
293 | struct rtable *rt; | |
294 | ||
b0066da5 PM |
295 | ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, |
296 | iph->saddr, tunnel->parms.o_key, | |
db53cd3d | 297 | RT_TOS(iph->tos), dev_net(dev), |
7ec9fce4 | 298 | tunnel->parms.link, tunnel->fwmark, 0, 0); |
7d442fab TH |
299 | rt = ip_route_output_key(tunnel->net, &fl4); |
300 | ||
c5441932 PS |
301 | if (!IS_ERR(rt)) { |
302 | tdev = rt->dst.dev; | |
303 | ip_rt_put(rt); | |
304 | } | |
305 | if (dev->type != ARPHRD_ETHER) | |
306 | dev->flags |= IFF_POINTOPOINT; | |
f27337e1 PA |
307 | |
308 | dst_cache_reset(&tunnel->dst_cache); | |
c5441932 PS |
309 | } |
310 | ||
311 | if (!tdev && tunnel->parms.link) | |
6c742e71 | 312 | tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); |
c5441932 PS |
313 | |
314 | if (tdev) { | |
315 | hlen = tdev->hard_header_len + tdev->needed_headroom; | |
82612de1 | 316 | mtu = min(tdev->mtu, IP_MAX_MTU); |
c5441932 | 317 | } |
c5441932 PS |
318 | |
319 | dev->needed_headroom = t_hlen + hlen; | |
9992a078 | 320 | mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0); |
c5441932 | 321 | |
b5476022 ED |
322 | if (mtu < IPV4_MIN_MTU) |
323 | mtu = IPV4_MIN_MTU; | |
c5441932 PS |
324 | |
325 | return mtu; | |
326 | } | |
327 | ||
328 | static struct ip_tunnel *ip_tunnel_create(struct net *net, | |
329 | struct ip_tunnel_net *itn, | |
330 | struct ip_tunnel_parm *parms) | |
331 | { | |
4929fd8c | 332 | struct ip_tunnel *nt; |
c5441932 | 333 | struct net_device *dev; |
b96f9afe | 334 | int t_hlen; |
f6cc9c05 PM |
335 | int mtu; |
336 | int err; | |
c5441932 | 337 | |
79134e6c | 338 | dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms); |
c5441932 | 339 | if (IS_ERR(dev)) |
6dd3c9ec | 340 | return ERR_CAST(dev); |
c5441932 | 341 | |
f6cc9c05 PM |
342 | mtu = ip_tunnel_bind_dev(dev); |
343 | err = dev_set_mtu(dev, mtu); | |
344 | if (err) | |
345 | goto err_dev_set_mtu; | |
c5441932 PS |
346 | |
347 | nt = netdev_priv(dev); | |
b96f9afe JW |
348 | t_hlen = nt->hlen + sizeof(struct iphdr); |
349 | dev->min_mtu = ETH_MIN_MTU; | |
28e104d0 | 350 | dev->max_mtu = IP_MAX_MTU - t_hlen; |
9992a078 HL |
351 | if (dev->type == ARPHRD_ETHER) |
352 | dev->max_mtu -= dev->hard_header_len; | |
353 | ||
c5441932 PS |
354 | ip_tunnel_add(itn, nt); |
355 | return nt; | |
f6cc9c05 PM |
356 | |
357 | err_dev_set_mtu: | |
358 | unregister_netdevice(dev); | |
359 | return ERR_PTR(err); | |
c5441932 PS |
360 | } |
361 | ||
ac931d4c CE |
362 | void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info) |
363 | { | |
364 | const struct iphdr *iph = ip_hdr(skb); | |
365 | const struct udphdr *udph; | |
366 | ||
367 | if (iph->protocol != IPPROTO_UDP) | |
368 | return; | |
369 | ||
370 | udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2)); | |
371 | info->encap.sport = udph->source; | |
372 | info->encap.dport = udph->dest; | |
373 | } | |
374 | EXPORT_SYMBOL(ip_tunnel_md_udp_encap); | |
375 | ||
c5441932 | 376 | int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, |
2e15ea39 PS |
377 | const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, |
378 | bool log_ecn_error) | |
c5441932 | 379 | { |
c5441932 PS |
380 | const struct iphdr *iph = ip_hdr(skb); |
381 | int err; | |
382 | ||
c5441932 PS |
383 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
384 | if (ipv4_is_multicast(iph->daddr)) { | |
c4794d22 | 385 | DEV_STATS_INC(tunnel->dev, multicast); |
c5441932 PS |
386 | skb->pkt_type = PACKET_BROADCAST; |
387 | } | |
388 | #endif | |
389 | ||
390 | if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || | |
391 | ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { | |
c4794d22 ED |
392 | DEV_STATS_INC(tunnel->dev, rx_crc_errors); |
393 | DEV_STATS_INC(tunnel->dev, rx_errors); | |
c5441932 PS |
394 | goto drop; |
395 | } | |
396 | ||
397 | if (tunnel->parms.i_flags&TUNNEL_SEQ) { | |
398 | if (!(tpi->flags&TUNNEL_SEQ) || | |
399 | (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { | |
c4794d22 ED |
400 | DEV_STATS_INC(tunnel->dev, rx_fifo_errors); |
401 | DEV_STATS_INC(tunnel->dev, rx_errors); | |
c5441932 PS |
402 | goto drop; |
403 | } | |
404 | tunnel->i_seqno = ntohl(tpi->seq) + 1; | |
405 | } | |
406 | ||
227adfb2 | 407 | skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); |
e96f2e7c | 408 | |
c5441932 PS |
409 | err = IP_ECN_decapsulate(iph, skb); |
410 | if (unlikely(err)) { | |
411 | if (log_ecn_error) | |
412 | net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", | |
413 | &iph->saddr, iph->tos); | |
414 | if (err > 1) { | |
c4794d22 ED |
415 | DEV_STATS_INC(tunnel->dev, rx_frame_errors); |
416 | DEV_STATS_INC(tunnel->dev, rx_errors); | |
c5441932 PS |
417 | goto drop; |
418 | } | |
419 | } | |
420 | ||
560b50cf | 421 | dev_sw_netstats_rx_add(tunnel->dev, skb->len); |
81b9eab5 AS |
422 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); |
423 | ||
3d7b46cd PS |
424 | if (tunnel->dev->type == ARPHRD_ETHER) { |
425 | skb->protocol = eth_type_trans(skb, tunnel->dev); | |
426 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | |
427 | } else { | |
428 | skb->dev = tunnel->dev; | |
429 | } | |
64261f23 | 430 | |
2e15ea39 PS |
431 | if (tun_dst) |
432 | skb_dst_set(skb, (struct dst_entry *)tun_dst); | |
433 | ||
c5441932 PS |
434 | gro_cells_receive(&tunnel->gro_cells, skb); |
435 | return 0; | |
436 | ||
437 | drop: | |
469f87e1 HY |
438 | if (tun_dst) |
439 | dst_release((struct dst_entry *)tun_dst); | |
c5441932 PS |
440 | kfree_skb(skb); |
441 | return 0; | |
442 | } | |
443 | EXPORT_SYMBOL_GPL(ip_tunnel_rcv); | |
444 | ||
a8c5f90f TH |
445 | int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, |
446 | unsigned int num) | |
447 | { | |
bb1553c8 TG |
448 | if (num >= MAX_IPTUN_ENCAP_OPS) |
449 | return -ERANGE; | |
450 | ||
a8c5f90f TH |
451 | return !cmpxchg((const struct ip_tunnel_encap_ops **) |
452 | &iptun_encaps[num], | |
453 | NULL, ops) ? 0 : -1; | |
56328486 | 454 | } |
a8c5f90f TH |
455 | EXPORT_SYMBOL(ip_tunnel_encap_add_ops); |
456 | ||
457 | int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, | |
458 | unsigned int num) | |
459 | { | |
460 | int ret; | |
461 | ||
bb1553c8 TG |
462 | if (num >= MAX_IPTUN_ENCAP_OPS) |
463 | return -ERANGE; | |
464 | ||
a8c5f90f TH |
465 | ret = (cmpxchg((const struct ip_tunnel_encap_ops **) |
466 | &iptun_encaps[num], | |
467 | ops, NULL) == ops) ? 0 : -1; | |
468 | ||
469 | synchronize_net(); | |
470 | ||
471 | return ret; | |
472 | } | |
473 | EXPORT_SYMBOL(ip_tunnel_encap_del_ops); | |
56328486 TH |
474 | |
475 | int ip_tunnel_encap_setup(struct ip_tunnel *t, | |
476 | struct ip_tunnel_encap *ipencap) | |
477 | { | |
478 | int hlen; | |
479 | ||
480 | memset(&t->encap, 0, sizeof(t->encap)); | |
481 | ||
482 | hlen = ip_encap_hlen(ipencap); | |
483 | if (hlen < 0) | |
484 | return hlen; | |
485 | ||
486 | t->encap.type = ipencap->type; | |
487 | t->encap.sport = ipencap->sport; | |
488 | t->encap.dport = ipencap->dport; | |
489 | t->encap.flags = ipencap->flags; | |
490 | ||
491 | t->encap_hlen = hlen; | |
492 | t->hlen = t->encap_hlen + t->tun_hlen; | |
493 | ||
494 | return 0; | |
495 | } | |
496 | EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); | |
497 | ||
23a3647b | 498 | static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, |
fc24f2b2 | 499 | struct rtable *rt, __be16 df, |
c8b34e68 | 500 | const struct iphdr *inner_iph, |
501 | int tunnel_hlen, __be32 dst, bool md) | |
23a3647b PS |
502 | { |
503 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
c8b34e68 | 504 | int pkt_size; |
23a3647b PS |
505 | int mtu; |
506 | ||
c8b34e68 | 507 | tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; |
28e104d0 | 508 | pkt_size = skb->len - tunnel_hlen; |
9992a078 | 509 | pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; |
c8b34e68 | 510 | |
9992a078 | 511 | if (df) { |
28e104d0 | 512 | mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); |
9992a078 HL |
513 | mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; |
514 | } else { | |
f4b3ec4e | 515 | mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
9992a078 | 516 | } |
23a3647b | 517 | |
f4b3ec4e | 518 | if (skb_valid_dst(skb)) |
7a1592bc | 519 | skb_dst_update_pmtu_no_confirm(skb, mtu); |
23a3647b PS |
520 | |
521 | if (skb->protocol == htons(ETH_P_IP)) { | |
522 | if (!skb_is_gso(skb) && | |
fc24f2b2 TT |
523 | (inner_iph->frag_off & htons(IP_DF)) && |
524 | mtu < pkt_size) { | |
4372339e | 525 | icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
23a3647b PS |
526 | return -E2BIG; |
527 | } | |
528 | } | |
529 | #if IS_ENABLED(CONFIG_IPV6) | |
530 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
f4b3ec4e | 531 | struct rt6_info *rt6; |
c8b34e68 | 532 | __be32 daddr; |
533 | ||
f4b3ec4e AM |
534 | rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : |
535 | NULL; | |
c8b34e68 | 536 | daddr = md ? dst : tunnel->parms.iph.daddr; |
23a3647b PS |
537 | |
538 | if (rt6 && mtu < dst_mtu(skb_dst(skb)) && | |
539 | mtu >= IPV6_MIN_MTU) { | |
c8b34e68 | 540 | if ((daddr && !ipv4_is_multicast(daddr)) || |
23a3647b PS |
541 | rt6->rt6i_dst.plen == 128) { |
542 | rt6->rt6i_flags |= RTF_MODIFIED; | |
543 | dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); | |
544 | } | |
545 | } | |
546 | ||
547 | if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && | |
548 | mtu < pkt_size) { | |
4372339e | 549 | icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
23a3647b PS |
550 | return -E2BIG; |
551 | } | |
552 | } | |
553 | #endif | |
554 | return 0; | |
555 | } | |
556 | ||
c8b34e68 | 557 | void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
558 | u8 proto, int tunnel_hlen) | |
cfc7381b AS |
559 | { |
560 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
561 | u32 headroom = sizeof(struct iphdr); | |
562 | struct ip_tunnel_info *tun_info; | |
563 | const struct ip_tunnel_key *key; | |
564 | const struct iphdr *inner_iph; | |
f46fe4f8 | 565 | struct rtable *rt = NULL; |
cfc7381b AS |
566 | struct flowi4 fl4; |
567 | __be16 df = 0; | |
568 | u8 tos, ttl; | |
f46fe4f8 | 569 | bool use_cache; |
cfc7381b AS |
570 | |
571 | tun_info = skb_tunnel_info(skb); | |
572 | if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || | |
573 | ip_tunnel_info_af(tun_info) != AF_INET)) | |
574 | goto tx_error; | |
575 | key = &tun_info->key; | |
576 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
577 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); | |
578 | tos = key->tos; | |
579 | if (tos == 1) { | |
580 | if (skb->protocol == htons(ETH_P_IP)) | |
581 | tos = inner_iph->tos; | |
582 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
583 | tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); | |
584 | } | |
6e6b904a | 585 | ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, |
586 | tunnel_id_to_key32(key->tun_id), RT_TOS(tos), | |
7ec9fce4 EB |
587 | dev_net(dev), 0, skb->mark, skb_get_hash(skb), |
588 | key->flow_flags); | |
ac931d4c CE |
589 | |
590 | if (!tunnel_hlen) | |
591 | tunnel_hlen = ip_encap_hlen(&tun_info->encap); | |
592 | ||
593 | if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0) | |
cfc7381b | 594 | goto tx_error; |
f46fe4f8 | 595 | |
596 | use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); | |
597 | if (use_cache) | |
598 | rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr); | |
599 | if (!rt) { | |
600 | rt = ip_route_output_key(tunnel->net, &fl4); | |
601 | if (IS_ERR(rt)) { | |
c4794d22 | 602 | DEV_STATS_INC(dev, tx_carrier_errors); |
f46fe4f8 | 603 | goto tx_error; |
604 | } | |
605 | if (use_cache) | |
606 | dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, | |
607 | fl4.saddr); | |
cfc7381b AS |
608 | } |
609 | if (rt->dst.dev == dev) { | |
610 | ip_rt_put(rt); | |
c4794d22 | 611 | DEV_STATS_INC(dev, collisions); |
cfc7381b AS |
612 | goto tx_error; |
613 | } | |
c8b34e68 | 614 | |
615 | if (key->tun_flags & TUNNEL_DONT_FRAGMENT) | |
616 | df = htons(IP_DF); | |
617 | if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen, | |
618 | key->u.ipv4.dst, true)) { | |
619 | ip_rt_put(rt); | |
620 | goto tx_error; | |
621 | } | |
622 | ||
cfc7381b AS |
623 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); |
624 | ttl = key->ttl; | |
625 | if (ttl == 0) { | |
626 | if (skb->protocol == htons(ETH_P_IP)) | |
627 | ttl = inner_iph->ttl; | |
628 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
629 | ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; | |
630 | else | |
631 | ttl = ip4_dst_hoplimit(&rt->dst); | |
632 | } | |
c8b34e68 | 633 | |
cfc7381b | 634 | headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; |
4b397c06 ED |
635 | if (headroom > READ_ONCE(dev->needed_headroom)) |
636 | WRITE_ONCE(dev->needed_headroom, headroom); | |
cfc7381b | 637 | |
4b397c06 | 638 | if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { |
cfc7381b AS |
639 | ip_rt_put(rt); |
640 | goto tx_dropped; | |
641 | } | |
0f693f19 HY |
642 | iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, |
643 | df, !net_eq(tunnel->net, dev_net(dev))); | |
cfc7381b AS |
644 | return; |
645 | tx_error: | |
c4794d22 | 646 | DEV_STATS_INC(dev, tx_errors); |
cfc7381b AS |
647 | goto kfree; |
648 | tx_dropped: | |
c4794d22 | 649 | DEV_STATS_INC(dev, tx_dropped); |
cfc7381b AS |
650 | kfree: |
651 | kfree_skb(skb); | |
652 | } | |
653 | EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); | |
654 | ||
c5441932 | 655 | void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
56328486 | 656 | const struct iphdr *tnl_params, u8 protocol) |
c5441932 PS |
657 | { |
658 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
186d9366 | 659 | struct ip_tunnel_info *tun_info = NULL; |
c5441932 | 660 | const struct iphdr *inner_iph; |
c5441932 | 661 | unsigned int max_headroom; /* The extra header space needed */ |
186d9366 | 662 | struct rtable *rt = NULL; /* Route to the other host */ |
7ae29fd1 | 663 | __be16 payload_protocol; |
186d9366 | 664 | bool use_cache = false; |
665 | struct flowi4 fl4; | |
666 | bool md = false; | |
22fb22ea | 667 | bool connected; |
186d9366 | 668 | u8 tos, ttl; |
669 | __be32 dst; | |
670 | __be16 df; | |
c5441932 PS |
671 | |
672 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); | |
22fb22ea | 673 | connected = (tunnel->parms.iph.daddr != 0); |
7ae29fd1 | 674 | payload_protocol = skb_protocol(skb, true); |
c5441932 | 675 | |
5146d1f1 BH |
676 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
677 | ||
c5441932 PS |
678 | dst = tnl_params->daddr; |
679 | if (dst == 0) { | |
680 | /* NBMA tunnel */ | |
681 | ||
51456b29 | 682 | if (!skb_dst(skb)) { |
c4794d22 | 683 | DEV_STATS_INC(dev, tx_fifo_errors); |
c5441932 PS |
684 | goto tx_error; |
685 | } | |
686 | ||
d71b5753 | 687 | tun_info = skb_tunnel_info(skb); |
688 | if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && | |
689 | ip_tunnel_info_af(tun_info) == AF_INET && | |
186d9366 | 690 | tun_info->key.u.ipv4.dst) { |
d71b5753 | 691 | dst = tun_info->key.u.ipv4.dst; |
186d9366 | 692 | md = true; |
693 | connected = true; | |
7ae29fd1 | 694 | } else if (payload_protocol == htons(ETH_P_IP)) { |
c5441932 PS |
695 | rt = skb_rtable(skb); |
696 | dst = rt_nexthop(rt, inner_iph->daddr); | |
697 | } | |
698 | #if IS_ENABLED(CONFIG_IPV6) | |
7ae29fd1 | 699 | else if (payload_protocol == htons(ETH_P_IPV6)) { |
c5441932 PS |
700 | const struct in6_addr *addr6; |
701 | struct neighbour *neigh; | |
702 | bool do_tx_error_icmp; | |
703 | int addr_type; | |
704 | ||
705 | neigh = dst_neigh_lookup(skb_dst(skb), | |
706 | &ipv6_hdr(skb)->daddr); | |
51456b29 | 707 | if (!neigh) |
c5441932 PS |
708 | goto tx_error; |
709 | ||
710 | addr6 = (const struct in6_addr *)&neigh->primary_key; | |
711 | addr_type = ipv6_addr_type(addr6); | |
712 | ||
713 | if (addr_type == IPV6_ADDR_ANY) { | |
714 | addr6 = &ipv6_hdr(skb)->daddr; | |
715 | addr_type = ipv6_addr_type(addr6); | |
716 | } | |
717 | ||
718 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) | |
719 | do_tx_error_icmp = true; | |
720 | else { | |
721 | do_tx_error_icmp = false; | |
722 | dst = addr6->s6_addr32[3]; | |
723 | } | |
724 | neigh_release(neigh); | |
725 | if (do_tx_error_icmp) | |
726 | goto tx_error_icmp; | |
727 | } | |
728 | #endif | |
729 | else | |
730 | goto tx_error; | |
7d442fab | 731 | |
186d9366 | 732 | if (!md) |
733 | connected = false; | |
c5441932 PS |
734 | } |
735 | ||
736 | tos = tnl_params->tos; | |
737 | if (tos & 0x1) { | |
738 | tos &= ~0x1; | |
7ae29fd1 | 739 | if (payload_protocol == htons(ETH_P_IP)) { |
c5441932 | 740 | tos = inner_iph->tos; |
7d442fab | 741 | connected = false; |
7ae29fd1 | 742 | } else if (payload_protocol == htons(ETH_P_IPV6)) { |
c5441932 | 743 | tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); |
7d442fab TH |
744 | connected = false; |
745 | } | |
c5441932 PS |
746 | } |
747 | ||
0f3e9c97 | 748 | ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, |
db53cd3d DA |
749 | tunnel->parms.o_key, RT_TOS(tos), |
750 | dev_net(dev), tunnel->parms.link, | |
7ec9fce4 | 751 | tunnel->fwmark, skb_get_hash(skb), 0); |
7d442fab | 752 | |
ac931d4c | 753 | if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) |
56328486 TH |
754 | goto tx_error; |
755 | ||
186d9366 | 756 | if (connected && md) { |
757 | use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); | |
758 | if (use_cache) | |
759 | rt = dst_cache_get_ip4(&tun_info->dst_cache, | |
760 | &fl4.saddr); | |
761 | } else { | |
762 | rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, | |
763 | &fl4.saddr) : NULL; | |
764 | } | |
7d442fab TH |
765 | |
766 | if (!rt) { | |
767 | rt = ip_route_output_key(tunnel->net, &fl4); | |
768 | ||
769 | if (IS_ERR(rt)) { | |
c4794d22 | 770 | DEV_STATS_INC(dev, tx_carrier_errors); |
7d442fab TH |
771 | goto tx_error; |
772 | } | |
186d9366 | 773 | if (use_cache) |
774 | dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, | |
775 | fl4.saddr); | |
776 | else if (!md && connected) | |
e09acddf PA |
777 | dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, |
778 | fl4.saddr); | |
c5441932 | 779 | } |
7d442fab | 780 | |
0e6fbc5b | 781 | if (rt->dst.dev == dev) { |
c5441932 | 782 | ip_rt_put(rt); |
c4794d22 | 783 | DEV_STATS_INC(dev, collisions); |
c5441932 PS |
784 | goto tx_error; |
785 | } | |
c5441932 | 786 | |
50c66167 | 787 | df = tnl_params->frag_off; |
7ae29fd1 | 788 | if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df) |
50c66167 FW |
789 | df |= (inner_iph->frag_off & htons(IP_DF)); |
790 | ||
791 | if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { | |
23a3647b PS |
792 | ip_rt_put(rt); |
793 | goto tx_error; | |
c5441932 | 794 | } |
c5441932 PS |
795 | |
796 | if (tunnel->err_count > 0) { | |
797 | if (time_before(jiffies, | |
798 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | |
799 | tunnel->err_count--; | |
800 | ||
801 | dst_link_failure(skb); | |
802 | } else | |
803 | tunnel->err_count = 0; | |
804 | } | |
805 | ||
d4a71b15 | 806 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); |
c5441932 PS |
807 | ttl = tnl_params->ttl; |
808 | if (ttl == 0) { | |
7ae29fd1 | 809 | if (payload_protocol == htons(ETH_P_IP)) |
c5441932 PS |
810 | ttl = inner_iph->ttl; |
811 | #if IS_ENABLED(CONFIG_IPV6) | |
7ae29fd1 | 812 | else if (payload_protocol == htons(ETH_P_IPV6)) |
c5441932 PS |
813 | ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; |
814 | #endif | |
815 | else | |
816 | ttl = ip4_dst_hoplimit(&rt->dst); | |
817 | } | |
818 | ||
0e6fbc5b | 819 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) |
7371e022 | 820 | + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); |
4b397c06 ED |
821 | if (max_headroom > READ_ONCE(dev->needed_headroom)) |
822 | WRITE_ONCE(dev->needed_headroom, max_headroom); | |
3e08f4a7 | 823 | |
4b397c06 | 824 | if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { |
586d5fc8 | 825 | ip_rt_put(rt); |
c4794d22 | 826 | DEV_STATS_INC(dev, tx_dropped); |
3acfa1e7 | 827 | kfree_skb(skb); |
3e08f4a7 | 828 | return; |
c5441932 PS |
829 | } |
830 | ||
039f5062 PS |
831 | iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, |
832 | df, !net_eq(tunnel->net, dev_net(dev))); | |
c5441932 PS |
833 | return; |
834 | ||
835 | #if IS_ENABLED(CONFIG_IPV6) | |
836 | tx_error_icmp: | |
837 | dst_link_failure(skb); | |
838 | #endif | |
839 | tx_error: | |
c4794d22 | 840 | DEV_STATS_INC(dev, tx_errors); |
3acfa1e7 | 841 | kfree_skb(skb); |
c5441932 PS |
842 | } |
843 | EXPORT_SYMBOL_GPL(ip_tunnel_xmit); | |
844 | ||
845 | static void ip_tunnel_update(struct ip_tunnel_net *itn, | |
846 | struct ip_tunnel *t, | |
847 | struct net_device *dev, | |
848 | struct ip_tunnel_parm *p, | |
9830ad4c CG |
849 | bool set_mtu, |
850 | __u32 fwmark) | |
c5441932 | 851 | { |
2e15ea39 | 852 | ip_tunnel_del(itn, t); |
c5441932 PS |
853 | t->parms.iph.saddr = p->iph.saddr; |
854 | t->parms.iph.daddr = p->iph.daddr; | |
855 | t->parms.i_key = p->i_key; | |
856 | t->parms.o_key = p->o_key; | |
857 | if (dev->type != ARPHRD_ETHER) { | |
5a1b7e1a | 858 | __dev_addr_set(dev, &p->iph.saddr, 4); |
c5441932 PS |
859 | memcpy(dev->broadcast, &p->iph.daddr, 4); |
860 | } | |
861 | ip_tunnel_add(itn, t); | |
862 | ||
863 | t->parms.iph.ttl = p->iph.ttl; | |
864 | t->parms.iph.tos = p->iph.tos; | |
865 | t->parms.iph.frag_off = p->iph.frag_off; | |
866 | ||
9830ad4c | 867 | if (t->parms.link != p->link || t->fwmark != fwmark) { |
c5441932 PS |
868 | int mtu; |
869 | ||
870 | t->parms.link = p->link; | |
9830ad4c | 871 | t->fwmark = fwmark; |
c5441932 PS |
872 | mtu = ip_tunnel_bind_dev(dev); |
873 | if (set_mtu) | |
874 | dev->mtu = mtu; | |
875 | } | |
e09acddf | 876 | dst_cache_reset(&t->dst_cache); |
c5441932 PS |
877 | netdev_state_change(dev); |
878 | } | |
879 | ||
607259a6 | 880 | int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) |
c5441932 PS |
881 | { |
882 | int err = 0; | |
8c923ce2 ND |
883 | struct ip_tunnel *t = netdev_priv(dev); |
884 | struct net *net = t->net; | |
885 | struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); | |
c5441932 | 886 | |
c5441932 PS |
887 | switch (cmd) { |
888 | case SIOCGETTUNNEL: | |
8c923ce2 | 889 | if (dev == itn->fb_tunnel_dev) { |
c5441932 | 890 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); |
51456b29 | 891 | if (!t) |
8c923ce2 ND |
892 | t = netdev_priv(dev); |
893 | } | |
c5441932 PS |
894 | memcpy(p, &t->parms, sizeof(*p)); |
895 | break; | |
896 | ||
897 | case SIOCADDTUNNEL: | |
898 | case SIOCCHGTUNNEL: | |
899 | err = -EPERM; | |
900 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | |
901 | goto done; | |
902 | if (p->iph.ttl) | |
903 | p->iph.frag_off |= htons(IP_DF); | |
7c8e6b9c DP |
904 | if (!(p->i_flags & VTI_ISVTI)) { |
905 | if (!(p->i_flags & TUNNEL_KEY)) | |
906 | p->i_key = 0; | |
907 | if (!(p->o_flags & TUNNEL_KEY)) | |
908 | p->o_key = 0; | |
909 | } | |
c5441932 | 910 | |
79134e6c | 911 | t = ip_tunnel_find(itn, p, itn->type); |
c5441932 | 912 | |
d61746b2 SK |
913 | if (cmd == SIOCADDTUNNEL) { |
914 | if (!t) { | |
915 | t = ip_tunnel_create(net, itn, p); | |
916 | err = PTR_ERR_OR_ZERO(t); | |
917 | break; | |
918 | } | |
919 | ||
920 | err = -EEXIST; | |
ee30ef4d | 921 | break; |
6dd3c9ec | 922 | } |
c5441932 | 923 | if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
00db4124 | 924 | if (t) { |
c5441932 PS |
925 | if (t->dev != dev) { |
926 | err = -EEXIST; | |
927 | break; | |
928 | } | |
929 | } else { | |
930 | unsigned int nflags = 0; | |
931 | ||
932 | if (ipv4_is_multicast(p->iph.daddr)) | |
933 | nflags = IFF_BROADCAST; | |
934 | else if (p->iph.daddr) | |
935 | nflags = IFF_POINTOPOINT; | |
936 | ||
937 | if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { | |
938 | err = -EINVAL; | |
939 | break; | |
940 | } | |
941 | ||
942 | t = netdev_priv(dev); | |
943 | } | |
944 | } | |
945 | ||
946 | if (t) { | |
947 | err = 0; | |
9830ad4c | 948 | ip_tunnel_update(itn, t, dev, p, true, 0); |
6dd3c9ec FW |
949 | } else { |
950 | err = -ENOENT; | |
951 | } | |
c5441932 PS |
952 | break; |
953 | ||
954 | case SIOCDELTUNNEL: | |
955 | err = -EPERM; | |
956 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | |
957 | goto done; | |
958 | ||
959 | if (dev == itn->fb_tunnel_dev) { | |
960 | err = -ENOENT; | |
961 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); | |
51456b29 | 962 | if (!t) |
c5441932 PS |
963 | goto done; |
964 | err = -EPERM; | |
965 | if (t == netdev_priv(itn->fb_tunnel_dev)) | |
966 | goto done; | |
967 | dev = t->dev; | |
968 | } | |
969 | unregister_netdevice(dev); | |
970 | err = 0; | |
971 | break; | |
972 | ||
973 | default: | |
974 | err = -EINVAL; | |
975 | } | |
976 | ||
977 | done: | |
978 | return err; | |
979 | } | |
607259a6 CH |
980 | EXPORT_SYMBOL_GPL(ip_tunnel_ctl); |
981 | ||
3e7a1c7c AB |
982 | int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, |
983 | void __user *data, int cmd) | |
607259a6 CH |
984 | { |
985 | struct ip_tunnel_parm p; | |
986 | int err; | |
987 | ||
3e7a1c7c | 988 | if (copy_from_user(&p, data, sizeof(p))) |
607259a6 CH |
989 | return -EFAULT; |
990 | err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); | |
3e7a1c7c | 991 | if (!err && copy_to_user(data, &p, sizeof(p))) |
607259a6 CH |
992 | return -EFAULT; |
993 | return err; | |
994 | } | |
3e7a1c7c | 995 | EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate); |
c5441932 | 996 | |
7e059158 | 997 | int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) |
c5441932 PS |
998 | { |
999 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1000 | int t_hlen = tunnel->hlen + sizeof(struct iphdr); | |
28e104d0 | 1001 | int max_mtu = IP_MAX_MTU - t_hlen; |
c5441932 | 1002 | |
9992a078 HL |
1003 | if (dev->type == ARPHRD_ETHER) |
1004 | max_mtu -= dev->hard_header_len; | |
1005 | ||
b96f9afe | 1006 | if (new_mtu < ETH_MIN_MTU) |
c5441932 | 1007 | return -EINVAL; |
7e059158 DW |
1008 | |
1009 | if (new_mtu > max_mtu) { | |
1010 | if (strict) | |
1011 | return -EINVAL; | |
1012 | ||
1013 | new_mtu = max_mtu; | |
1014 | } | |
1015 | ||
c5441932 PS |
1016 | dev->mtu = new_mtu; |
1017 | return 0; | |
1018 | } | |
7e059158 DW |
1019 | EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); |
1020 | ||
1021 | int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | |
1022 | { | |
1023 | return __ip_tunnel_change_mtu(dev, new_mtu, true); | |
1024 | } | |
c5441932 PS |
1025 | EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); |
1026 | ||
1027 | static void ip_tunnel_dev_free(struct net_device *dev) | |
1028 | { | |
1029 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1030 | ||
1031 | gro_cells_destroy(&tunnel->gro_cells); | |
e09acddf | 1032 | dst_cache_destroy(&tunnel->dst_cache); |
c5441932 | 1033 | free_percpu(dev->tstats); |
c5441932 PS |
1034 | } |
1035 | ||
1036 | void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) | |
1037 | { | |
c5441932 PS |
1038 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1039 | struct ip_tunnel_net *itn; | |
1040 | ||
6c742e71 | 1041 | itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); |
c5441932 PS |
1042 | |
1043 | if (itn->fb_tunnel_dev != dev) { | |
2e15ea39 | 1044 | ip_tunnel_del(itn, netdev_priv(dev)); |
c5441932 PS |
1045 | unregister_netdevice_queue(dev, head); |
1046 | } | |
1047 | } | |
1048 | EXPORT_SYMBOL_GPL(ip_tunnel_dellink); | |
1049 | ||
1728d4fa ND |
1050 | struct net *ip_tunnel_get_link_net(const struct net_device *dev) |
1051 | { | |
1052 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1053 | ||
1054 | return tunnel->net; | |
1055 | } | |
1056 | EXPORT_SYMBOL(ip_tunnel_get_link_net); | |
1057 | ||
1e99584b ND |
1058 | int ip_tunnel_get_iflink(const struct net_device *dev) |
1059 | { | |
1060 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1061 | ||
1062 | return tunnel->parms.link; | |
1063 | } | |
1064 | EXPORT_SYMBOL(ip_tunnel_get_iflink); | |
1065 | ||
c7d03a00 | 1066 | int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, |
c5441932 PS |
1067 | struct rtnl_link_ops *ops, char *devname) |
1068 | { | |
1069 | struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); | |
1070 | struct ip_tunnel_parm parms; | |
6261d983 | 1071 | unsigned int i; |
c5441932 | 1072 | |
79134e6c | 1073 | itn->rtnl_link_ops = ops; |
6261d983 | 1074 | for (i = 0; i < IP_TNL_HASH_SIZE; i++) |
1075 | INIT_HLIST_HEAD(&itn->tunnels[i]); | |
c5441932 | 1076 | |
79134e6c ED |
1077 | if (!ops || !net_has_fallback_tunnels(net)) { |
1078 | struct ip_tunnel_net *it_init_net; | |
1079 | ||
1080 | it_init_net = net_generic(&init_net, ip_tnl_net_id); | |
1081 | itn->type = it_init_net->type; | |
c5441932 PS |
1082 | itn->fb_tunnel_dev = NULL; |
1083 | return 0; | |
1084 | } | |
6261d983 | 1085 | |
c5441932 PS |
1086 | memset(&parms, 0, sizeof(parms)); |
1087 | if (devname) | |
512b2dc4 | 1088 | strscpy(parms.name, devname, IFNAMSIZ); |
c5441932 PS |
1089 | |
1090 | rtnl_lock(); | |
1091 | itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); | |
ea857f28 DC |
1092 | /* FB netdevice is special: we have one, and only one per netns. |
1093 | * Allowing to move it to another netns is clearly unsafe. | |
1094 | */ | |
67013282 | 1095 | if (!IS_ERR(itn->fb_tunnel_dev)) { |
b4de77ad | 1096 | itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; |
78ff4be4 | 1097 | itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); |
67013282 | 1098 | ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); |
79134e6c | 1099 | itn->type = itn->fb_tunnel_dev->type; |
67013282 | 1100 | } |
b4de77ad | 1101 | rtnl_unlock(); |
c5441932 | 1102 | |
27d79f3b | 1103 | return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); |
c5441932 PS |
1104 | } |
1105 | EXPORT_SYMBOL_GPL(ip_tunnel_init_net); | |
1106 | ||
79134e6c ED |
1107 | static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, |
1108 | struct list_head *head, | |
6c742e71 | 1109 | struct rtnl_link_ops *ops) |
c5441932 | 1110 | { |
6c742e71 | 1111 | struct net_device *dev, *aux; |
c5441932 PS |
1112 | int h; |
1113 | ||
6c742e71 ND |
1114 | for_each_netdev_safe(net, dev, aux) |
1115 | if (dev->rtnl_link_ops == ops) | |
1116 | unregister_netdevice_queue(dev, head); | |
1117 | ||
c5441932 PS |
1118 | for (h = 0; h < IP_TNL_HASH_SIZE; h++) { |
1119 | struct ip_tunnel *t; | |
1120 | struct hlist_node *n; | |
1121 | struct hlist_head *thead = &itn->tunnels[h]; | |
1122 | ||
1123 | hlist_for_each_entry_safe(t, n, thead, hash_node) | |
6c742e71 ND |
1124 | /* If dev is in the same netns, it has already |
1125 | * been added to the list by the previous loop. | |
1126 | */ | |
1127 | if (!net_eq(dev_net(t->dev), net)) | |
1128 | unregister_netdevice_queue(t->dev, head); | |
c5441932 | 1129 | } |
c5441932 PS |
1130 | } |
1131 | ||
64bc1781 ED |
1132 | void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, |
1133 | struct rtnl_link_ops *ops) | |
c5441932 | 1134 | { |
64bc1781 ED |
1135 | struct ip_tunnel_net *itn; |
1136 | struct net *net; | |
c5441932 PS |
1137 | LIST_HEAD(list); |
1138 | ||
1139 | rtnl_lock(); | |
64bc1781 ED |
1140 | list_for_each_entry(net, net_list, exit_list) { |
1141 | itn = net_generic(net, id); | |
79134e6c | 1142 | ip_tunnel_destroy(net, itn, &list, ops); |
64bc1781 | 1143 | } |
c5441932 PS |
1144 | unregister_netdevice_many(&list); |
1145 | rtnl_unlock(); | |
c5441932 | 1146 | } |
64bc1781 | 1147 | EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); |
c5441932 PS |
1148 | |
1149 | int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], | |
9830ad4c | 1150 | struct ip_tunnel_parm *p, __u32 fwmark) |
c5441932 PS |
1151 | { |
1152 | struct ip_tunnel *nt; | |
1153 | struct net *net = dev_net(dev); | |
1154 | struct ip_tunnel_net *itn; | |
1155 | int mtu; | |
1156 | int err; | |
1157 | ||
1158 | nt = netdev_priv(dev); | |
1159 | itn = net_generic(net, nt->ip_tnl_net_id); | |
1160 | ||
2e15ea39 PS |
1161 | if (nt->collect_md) { |
1162 | if (rtnl_dereference(itn->collect_md_tun)) | |
1163 | return -EEXIST; | |
1164 | } else { | |
1165 | if (ip_tunnel_find(itn, p, dev->type)) | |
1166 | return -EEXIST; | |
1167 | } | |
c5441932 | 1168 | |
5e6700b3 | 1169 | nt->net = net; |
c5441932 | 1170 | nt->parms = *p; |
9830ad4c | 1171 | nt->fwmark = fwmark; |
c5441932 PS |
1172 | err = register_netdevice(dev); |
1173 | if (err) | |
f6cc9c05 | 1174 | goto err_register_netdevice; |
c5441932 PS |
1175 | |
1176 | if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) | |
1177 | eth_hw_addr_random(dev); | |
1178 | ||
1179 | mtu = ip_tunnel_bind_dev(dev); | |
24fc7979 | 1180 | if (tb[IFLA_MTU]) { |
28e104d0 | 1181 | unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); |
24fc7979 | 1182 | |
9992a078 HL |
1183 | if (dev->type == ARPHRD_ETHER) |
1184 | max -= dev->hard_header_len; | |
1185 | ||
28e104d0 | 1186 | mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); |
f6cc9c05 | 1187 | } |
c5441932 | 1188 | |
5568cdc3 DM |
1189 | err = dev_set_mtu(dev, mtu); |
1190 | if (err) | |
1191 | goto err_dev_set_mtu; | |
c5441932 PS |
1192 | |
1193 | ip_tunnel_add(itn, nt); | |
f6cc9c05 PM |
1194 | return 0; |
1195 | ||
1196 | err_dev_set_mtu: | |
1197 | unregister_netdevice(dev); | |
1198 | err_register_netdevice: | |
c5441932 PS |
1199 | return err; |
1200 | } | |
1201 | EXPORT_SYMBOL_GPL(ip_tunnel_newlink); | |
1202 | ||
1203 | int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], | |
9830ad4c | 1204 | struct ip_tunnel_parm *p, __u32 fwmark) |
c5441932 | 1205 | { |
6c742e71 | 1206 | struct ip_tunnel *t; |
c5441932 | 1207 | struct ip_tunnel *tunnel = netdev_priv(dev); |
6c742e71 | 1208 | struct net *net = tunnel->net; |
c5441932 PS |
1209 | struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); |
1210 | ||
1211 | if (dev == itn->fb_tunnel_dev) | |
1212 | return -EINVAL; | |
1213 | ||
c5441932 PS |
1214 | t = ip_tunnel_find(itn, p, dev->type); |
1215 | ||
1216 | if (t) { | |
1217 | if (t->dev != dev) | |
1218 | return -EEXIST; | |
1219 | } else { | |
6c742e71 | 1220 | t = tunnel; |
c5441932 PS |
1221 | |
1222 | if (dev->type != ARPHRD_ETHER) { | |
1223 | unsigned int nflags = 0; | |
1224 | ||
1225 | if (ipv4_is_multicast(p->iph.daddr)) | |
1226 | nflags = IFF_BROADCAST; | |
1227 | else if (p->iph.daddr) | |
1228 | nflags = IFF_POINTOPOINT; | |
1229 | ||
1230 | if ((dev->flags ^ nflags) & | |
1231 | (IFF_POINTOPOINT | IFF_BROADCAST)) | |
1232 | return -EINVAL; | |
1233 | } | |
1234 | } | |
1235 | ||
9830ad4c | 1236 | ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark); |
c5441932 PS |
1237 | return 0; |
1238 | } | |
1239 | EXPORT_SYMBOL_GPL(ip_tunnel_changelink); | |
1240 | ||
1241 | int ip_tunnel_init(struct net_device *dev) | |
1242 | { | |
1243 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1244 | struct iphdr *iph = &tunnel->parms.iph; | |
1c213bd2 | 1245 | int err; |
c5441932 | 1246 | |
cf124db5 DM |
1247 | dev->needs_free_netdev = true; |
1248 | dev->priv_destructor = ip_tunnel_dev_free; | |
1c213bd2 | 1249 | dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); |
c5441932 PS |
1250 | if (!dev->tstats) |
1251 | return -ENOMEM; | |
1252 | ||
e09acddf PA |
1253 | err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); |
1254 | if (err) { | |
9a4aa9af | 1255 | free_percpu(dev->tstats); |
e09acddf | 1256 | return err; |
9a4aa9af TH |
1257 | } |
1258 | ||
c5441932 PS |
1259 | err = gro_cells_init(&tunnel->gro_cells, dev); |
1260 | if (err) { | |
e09acddf | 1261 | dst_cache_destroy(&tunnel->dst_cache); |
c5441932 PS |
1262 | free_percpu(dev->tstats); |
1263 | return err; | |
1264 | } | |
1265 | ||
1266 | tunnel->dev = dev; | |
6c742e71 | 1267 | tunnel->net = dev_net(dev); |
c5441932 PS |
1268 | strcpy(tunnel->parms.name, dev->name); |
1269 | iph->version = 4; | |
1270 | iph->ihl = 5; | |
1271 | ||
d0f41851 | 1272 | if (tunnel->collect_md) |
2e15ea39 | 1273 | netif_keep_dst(dev); |
c5441932 PS |
1274 | return 0; |
1275 | } | |
1276 | EXPORT_SYMBOL_GPL(ip_tunnel_init); | |
1277 | ||
1278 | void ip_tunnel_uninit(struct net_device *dev) | |
1279 | { | |
c5441932 | 1280 | struct ip_tunnel *tunnel = netdev_priv(dev); |
6c742e71 | 1281 | struct net *net = tunnel->net; |
c5441932 PS |
1282 | struct ip_tunnel_net *itn; |
1283 | ||
1284 | itn = net_generic(net, tunnel->ip_tnl_net_id); | |
ba61539c TY |
1285 | ip_tunnel_del(itn, netdev_priv(dev)); |
1286 | if (itn->fb_tunnel_dev == dev) | |
1287 | WRITE_ONCE(itn->fb_tunnel_dev, NULL); | |
7d442fab | 1288 | |
e09acddf | 1289 | dst_cache_reset(&tunnel->dst_cache); |
c5441932 PS |
1290 | } |
1291 | EXPORT_SYMBOL_GPL(ip_tunnel_uninit); | |
1292 | ||
1293 | /* Do least required initialization, rest of init is done in tunnel_init call */ | |
c7d03a00 | 1294 | void ip_tunnel_setup(struct net_device *dev, unsigned int net_id) |
c5441932 PS |
1295 | { |
1296 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1297 | tunnel->ip_tnl_net_id = net_id; | |
1298 | } | |
1299 | EXPORT_SYMBOL_GPL(ip_tunnel_setup); | |
1300 | ||
1301 | MODULE_LICENSE("GPL"); |