Commit | Line | Data |
---|---|---|
c9422999 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
c5441932 PS |
2 | /* |
3 | * Copyright (c) 2013 Nicira, Inc. | |
c5441932 PS |
4 | */ |
5 | ||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | ||
8 | #include <linux/capability.h> | |
9 | #include <linux/module.h> | |
10 | #include <linux/types.h> | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/slab.h> | |
13 | #include <linux/uaccess.h> | |
14 | #include <linux/skbuff.h> | |
15 | #include <linux/netdevice.h> | |
16 | #include <linux/in.h> | |
17 | #include <linux/tcp.h> | |
18 | #include <linux/udp.h> | |
19 | #include <linux/if_arp.h> | |
c5441932 PS |
20 | #include <linux/init.h> |
21 | #include <linux/in6.h> | |
22 | #include <linux/inetdevice.h> | |
23 | #include <linux/igmp.h> | |
24 | #include <linux/netfilter_ipv4.h> | |
25 | #include <linux/etherdevice.h> | |
26 | #include <linux/if_ether.h> | |
27 | #include <linux/if_vlan.h> | |
28 | #include <linux/rculist.h> | |
27d79f3b | 29 | #include <linux/err.h> |
c5441932 PS |
30 | |
31 | #include <net/sock.h> | |
32 | #include <net/ip.h> | |
33 | #include <net/icmp.h> | |
34 | #include <net/protocol.h> | |
35 | #include <net/ip_tunnels.h> | |
36 | #include <net/arp.h> | |
37 | #include <net/checksum.h> | |
38 | #include <net/dsfield.h> | |
39 | #include <net/inet_ecn.h> | |
40 | #include <net/xfrm.h> | |
41 | #include <net/net_namespace.h> | |
42 | #include <net/netns/generic.h> | |
8ef890df | 43 | #include <net/netdev_lock.h> |
c5441932 | 44 | #include <net/rtnetlink.h> |
56328486 | 45 | #include <net/udp.h> |
cfc7381b | 46 | #include <net/dst_metadata.h> |
c34cfe72 | 47 | #include <net/inet_dscp.h> |
63487bab | 48 | |
c5441932 PS |
49 | #if IS_ENABLED(CONFIG_IPV6) |
50 | #include <net/ipv6.h> | |
51 | #include <net/ip6_fib.h> | |
52 | #include <net/ip6_route.h> | |
53 | #endif | |
54 | ||
967680e0 | 55 | static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) |
c5441932 PS |
56 | { |
57 | return hash_32((__force u32)key ^ (__force u32)remote, | |
58 | IP_TNL_HASH_BITS); | |
59 | } | |
60 | ||
117aef12 | 61 | static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p, |
5832c4a7 | 62 | const unsigned long *flags, __be32 key) |
c5441932 | 63 | { |
5832c4a7 AL |
64 | if (!test_bit(IP_TUNNEL_KEY_BIT, flags)) |
65 | return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags); | |
66 | ||
67 | return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key; | |
c5441932 PS |
68 | } |
69 | ||
70 | /* Fallback tunnel: no source, no destination, no key, no options | |
71 | ||
72 | Tunnel hash table: | |
73 | We require exact key match i.e. if a key is present in packet | |
74 | it will match only tunnel with the same key; if it is not present, | |
75 | it will match only keyless tunnel. | |
76 | ||
77 | All keysless packets, if not matched configured keyless tunnels | |
78 | will match fallback tunnel. | |
79 | Given src, dst and key, find appropriate for input tunnel. | |
80 | */ | |
81 | struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, | |
5832c4a7 | 82 | int link, const unsigned long *flags, |
c5441932 PS |
83 | __be32 remote, __be32 local, |
84 | __be32 key) | |
85 | { | |
c5441932 PS |
86 | struct ip_tunnel *t, *cand = NULL; |
87 | struct hlist_head *head; | |
ba61539c TY |
88 | struct net_device *ndev; |
89 | unsigned int hash; | |
c5441932 | 90 | |
967680e0 | 91 | hash = ip_tunnel_hash(key, remote); |
c5441932 PS |
92 | head = &itn->tunnels[hash]; |
93 | ||
94 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
95 | if (local != t->parms.iph.saddr || | |
96 | remote != t->parms.iph.daddr || | |
97 | !(t->dev->flags & IFF_UP)) | |
98 | continue; | |
99 | ||
100 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
101 | continue; | |
102 | ||
f694eee9 | 103 | if (READ_ONCE(t->parms.link) == link) |
c5441932 | 104 | return t; |
f694eee9 | 105 | cand = t; |
c5441932 PS |
106 | } |
107 | ||
108 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
109 | if (remote != t->parms.iph.daddr || | |
e0056593 | 110 | t->parms.iph.saddr != 0 || |
c5441932 PS |
111 | !(t->dev->flags & IFF_UP)) |
112 | continue; | |
113 | ||
114 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
115 | continue; | |
116 | ||
f694eee9 | 117 | if (READ_ONCE(t->parms.link) == link) |
c5441932 | 118 | return t; |
f694eee9 | 119 | if (!cand) |
c5441932 PS |
120 | cand = t; |
121 | } | |
122 | ||
967680e0 | 123 | hash = ip_tunnel_hash(key, 0); |
c5441932 PS |
124 | head = &itn->tunnels[hash]; |
125 | ||
126 | hlist_for_each_entry_rcu(t, head, hash_node) { | |
e0056593 DP |
127 | if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && |
128 | (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) | |
129 | continue; | |
130 | ||
131 | if (!(t->dev->flags & IFF_UP)) | |
c5441932 PS |
132 | continue; |
133 | ||
134 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | |
135 | continue; | |
136 | ||
f694eee9 | 137 | if (READ_ONCE(t->parms.link) == link) |
c5441932 | 138 | return t; |
f694eee9 | 139 | if (!cand) |
c5441932 PS |
140 | cand = t; |
141 | } | |
142 | ||
c5441932 | 143 | hlist_for_each_entry_rcu(t, head, hash_node) { |
5832c4a7 AL |
144 | if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) && |
145 | t->parms.i_key != key) || | |
e0056593 DP |
146 | t->parms.iph.saddr != 0 || |
147 | t->parms.iph.daddr != 0 || | |
c5441932 PS |
148 | !(t->dev->flags & IFF_UP)) |
149 | continue; | |
150 | ||
f694eee9 | 151 | if (READ_ONCE(t->parms.link) == link) |
c5441932 | 152 | return t; |
f694eee9 | 153 | if (!cand) |
c5441932 PS |
154 | cand = t; |
155 | } | |
156 | ||
c5441932 PS |
157 | if (cand) |
158 | return cand; | |
159 | ||
2e15ea39 | 160 | t = rcu_dereference(itn->collect_md_tun); |
833a8b40 | 161 | if (t && t->dev->flags & IFF_UP) |
2e15ea39 PS |
162 | return t; |
163 | ||
ba61539c TY |
164 | ndev = READ_ONCE(itn->fb_tunnel_dev); |
165 | if (ndev && ndev->flags & IFF_UP) | |
166 | return netdev_priv(ndev); | |
c5441932 | 167 | |
c5441932 PS |
168 | return NULL; |
169 | } | |
170 | EXPORT_SYMBOL_GPL(ip_tunnel_lookup); | |
171 | ||
172 | static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, | |
117aef12 | 173 | struct ip_tunnel_parm_kern *parms) |
c5441932 PS |
174 | { |
175 | unsigned int h; | |
176 | __be32 remote; | |
6d608f06 | 177 | __be32 i_key = parms->i_key; |
c5441932 PS |
178 | |
179 | if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) | |
180 | remote = parms->iph.daddr; | |
181 | else | |
182 | remote = 0; | |
183 | ||
5832c4a7 AL |
184 | if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) && |
185 | test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags)) | |
6d608f06 SK |
186 | i_key = 0; |
187 | ||
188 | h = ip_tunnel_hash(i_key, remote); | |
c5441932 PS |
189 | return &itn->tunnels[h]; |
190 | } | |
191 | ||
192 | static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) | |
193 | { | |
194 | struct hlist_head *head = ip_bucket(itn, &t->parms); | |
195 | ||
2e15ea39 PS |
196 | if (t->collect_md) |
197 | rcu_assign_pointer(itn->collect_md_tun, t); | |
c5441932 PS |
198 | hlist_add_head_rcu(&t->hash_node, head); |
199 | } | |
200 | ||
2e15ea39 | 201 | static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) |
c5441932 | 202 | { |
2e15ea39 PS |
203 | if (t->collect_md) |
204 | rcu_assign_pointer(itn->collect_md_tun, NULL); | |
c5441932 PS |
205 | hlist_del_init_rcu(&t->hash_node); |
206 | } | |
207 | ||
208 | static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, | |
117aef12 | 209 | struct ip_tunnel_parm_kern *parms, |
c5441932 PS |
210 | int type) |
211 | { | |
212 | __be32 remote = parms->iph.daddr; | |
213 | __be32 local = parms->iph.saddr; | |
5832c4a7 | 214 | IP_TUNNEL_DECLARE_FLAGS(flags); |
c5441932 PS |
215 | __be32 key = parms->i_key; |
216 | int link = parms->link; | |
217 | struct ip_tunnel *t = NULL; | |
218 | struct hlist_head *head = ip_bucket(itn, parms); | |
219 | ||
5832c4a7 AL |
220 | ip_tunnel_flags_copy(flags, parms->i_flags); |
221 | ||
90e0569d | 222 | hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) { |
c5441932 PS |
223 | if (local == t->parms.iph.saddr && |
224 | remote == t->parms.iph.daddr && | |
f694eee9 | 225 | link == READ_ONCE(t->parms.link) && |
5ce54af1 DP |
226 | type == t->dev->type && |
227 | ip_tunnel_key_match(&t->parms, flags, key)) | |
c5441932 PS |
228 | break; |
229 | } | |
230 | return t; | |
231 | } | |
232 | ||
233 | static struct net_device *__ip_tunnel_create(struct net *net, | |
234 | const struct rtnl_link_ops *ops, | |
117aef12 | 235 | struct ip_tunnel_parm_kern *parms) |
c5441932 PS |
236 | { |
237 | int err; | |
238 | struct ip_tunnel *tunnel; | |
239 | struct net_device *dev; | |
240 | char name[IFNAMSIZ]; | |
241 | ||
9cb726a2 ED |
242 | err = -E2BIG; |
243 | if (parms->name[0]) { | |
244 | if (!dev_valid_name(parms->name)) | |
245 | goto failed; | |
c2dbda07 | 246 | strscpy(name, parms->name); |
9cb726a2 ED |
247 | } else { |
248 | if (strlen(ops->kind) > (IFNAMSIZ - 3)) | |
c5441932 | 249 | goto failed; |
c2dbda07 | 250 | strscpy(name, ops->kind); |
000ade80 | 251 | strcat(name, "%d"); |
c5441932 PS |
252 | } |
253 | ||
254 | ASSERT_RTNL(); | |
c835a677 | 255 | dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); |
c5441932 PS |
256 | if (!dev) { |
257 | err = -ENOMEM; | |
258 | goto failed; | |
259 | } | |
260 | dev_net_set(dev, net); | |
261 | ||
262 | dev->rtnl_link_ops = ops; | |
263 | ||
264 | tunnel = netdev_priv(dev); | |
265 | tunnel->parms = *parms; | |
5e6700b3 | 266 | tunnel->net = net; |
c5441932 PS |
267 | |
268 | err = register_netdevice(dev); | |
269 | if (err) | |
270 | goto failed_free; | |
271 | ||
272 | return dev; | |
273 | ||
274 | failed_free: | |
275 | free_netdev(dev); | |
276 | failed: | |
277 | return ERR_PTR(err); | |
278 | } | |
279 | ||
c5441932 PS |
280 | static int ip_tunnel_bind_dev(struct net_device *dev) |
281 | { | |
282 | struct net_device *tdev = NULL; | |
283 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
284 | const struct iphdr *iph; | |
285 | int hlen = LL_MAX_HEADER; | |
286 | int mtu = ETH_DATA_LEN; | |
287 | int t_hlen = tunnel->hlen + sizeof(struct iphdr); | |
288 | ||
289 | iph = &tunnel->parms.iph; | |
290 | ||
291 | /* Guess output device to choose reasonable mtu and needed_headroom */ | |
292 | if (iph->daddr) { | |
293 | struct flowi4 fl4; | |
294 | struct rtable *rt; | |
295 | ||
b0066da5 PM |
296 | ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, |
297 | iph->saddr, tunnel->parms.o_key, | |
b5a7b661 | 298 | iph->tos & INET_DSCP_MASK, tunnel->net, |
7ec9fce4 | 299 | tunnel->parms.link, tunnel->fwmark, 0, 0); |
7d442fab TH |
300 | rt = ip_route_output_key(tunnel->net, &fl4); |
301 | ||
c5441932 PS |
302 | if (!IS_ERR(rt)) { |
303 | tdev = rt->dst.dev; | |
304 | ip_rt_put(rt); | |
305 | } | |
306 | if (dev->type != ARPHRD_ETHER) | |
307 | dev->flags |= IFF_POINTOPOINT; | |
f27337e1 PA |
308 | |
309 | dst_cache_reset(&tunnel->dst_cache); | |
c5441932 PS |
310 | } |
311 | ||
312 | if (!tdev && tunnel->parms.link) | |
6c742e71 | 313 | tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); |
c5441932 PS |
314 | |
315 | if (tdev) { | |
316 | hlen = tdev->hard_header_len + tdev->needed_headroom; | |
82612de1 | 317 | mtu = min(tdev->mtu, IP_MAX_MTU); |
c5441932 | 318 | } |
c5441932 PS |
319 | |
320 | dev->needed_headroom = t_hlen + hlen; | |
9992a078 | 321 | mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0); |
c5441932 | 322 | |
b5476022 ED |
323 | if (mtu < IPV4_MIN_MTU) |
324 | mtu = IPV4_MIN_MTU; | |
c5441932 PS |
325 | |
326 | return mtu; | |
327 | } | |
328 | ||
329 | static struct ip_tunnel *ip_tunnel_create(struct net *net, | |
330 | struct ip_tunnel_net *itn, | |
117aef12 | 331 | struct ip_tunnel_parm_kern *parms) |
c5441932 | 332 | { |
4929fd8c | 333 | struct ip_tunnel *nt; |
c5441932 | 334 | struct net_device *dev; |
b96f9afe | 335 | int t_hlen; |
f6cc9c05 PM |
336 | int mtu; |
337 | int err; | |
c5441932 | 338 | |
79134e6c | 339 | dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms); |
c5441932 | 340 | if (IS_ERR(dev)) |
6dd3c9ec | 341 | return ERR_CAST(dev); |
c5441932 | 342 | |
f6cc9c05 PM |
343 | mtu = ip_tunnel_bind_dev(dev); |
344 | err = dev_set_mtu(dev, mtu); | |
345 | if (err) | |
346 | goto err_dev_set_mtu; | |
c5441932 PS |
347 | |
348 | nt = netdev_priv(dev); | |
b96f9afe JW |
349 | t_hlen = nt->hlen + sizeof(struct iphdr); |
350 | dev->min_mtu = ETH_MIN_MTU; | |
28e104d0 | 351 | dev->max_mtu = IP_MAX_MTU - t_hlen; |
9992a078 HL |
352 | if (dev->type == ARPHRD_ETHER) |
353 | dev->max_mtu -= dev->hard_header_len; | |
354 | ||
c5441932 PS |
355 | ip_tunnel_add(itn, nt); |
356 | return nt; | |
f6cc9c05 PM |
357 | |
358 | err_dev_set_mtu: | |
359 | unregister_netdevice(dev); | |
360 | return ERR_PTR(err); | |
c5441932 PS |
361 | } |
362 | ||
ac931d4c CE |
363 | void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info) |
364 | { | |
365 | const struct iphdr *iph = ip_hdr(skb); | |
366 | const struct udphdr *udph; | |
367 | ||
368 | if (iph->protocol != IPPROTO_UDP) | |
369 | return; | |
370 | ||
371 | udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2)); | |
372 | info->encap.sport = udph->source; | |
373 | info->encap.dport = udph->dest; | |
374 | } | |
375 | EXPORT_SYMBOL(ip_tunnel_md_udp_encap); | |
376 | ||
c5441932 | 377 | int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, |
2e15ea39 PS |
378 | const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, |
379 | bool log_ecn_error) | |
c5441932 | 380 | { |
c5441932 | 381 | const struct iphdr *iph = ip_hdr(skb); |
b0ec2abf | 382 | int nh, err; |
c5441932 | 383 | |
c5441932 PS |
384 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
385 | if (ipv4_is_multicast(iph->daddr)) { | |
c4794d22 | 386 | DEV_STATS_INC(tunnel->dev, multicast); |
c5441932 PS |
387 | skb->pkt_type = PACKET_BROADCAST; |
388 | } | |
389 | #endif | |
390 | ||
5832c4a7 AL |
391 | if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) != |
392 | test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) { | |
c4794d22 ED |
393 | DEV_STATS_INC(tunnel->dev, rx_crc_errors); |
394 | DEV_STATS_INC(tunnel->dev, rx_errors); | |
c5441932 PS |
395 | goto drop; |
396 | } | |
397 | ||
5832c4a7 AL |
398 | if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) { |
399 | if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) || | |
c5441932 | 400 | (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { |
c4794d22 ED |
401 | DEV_STATS_INC(tunnel->dev, rx_fifo_errors); |
402 | DEV_STATS_INC(tunnel->dev, rx_errors); | |
c5441932 PS |
403 | goto drop; |
404 | } | |
405 | tunnel->i_seqno = ntohl(tpi->seq) + 1; | |
406 | } | |
407 | ||
b0ec2abf ED |
408 | /* Save offset of outer header relative to skb->head, |
409 | * because we are going to reset the network header to the inner header | |
410 | * and might change skb->head. | |
411 | */ | |
412 | nh = skb_network_header(skb) - skb->head; | |
413 | ||
227adfb2 | 414 | skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); |
e96f2e7c | 415 | |
b0ec2abf ED |
416 | if (!pskb_inet_may_pull(skb)) { |
417 | DEV_STATS_INC(tunnel->dev, rx_length_errors); | |
418 | DEV_STATS_INC(tunnel->dev, rx_errors); | |
419 | goto drop; | |
420 | } | |
421 | iph = (struct iphdr *)(skb->head + nh); | |
422 | ||
c5441932 PS |
423 | err = IP_ECN_decapsulate(iph, skb); |
424 | if (unlikely(err)) { | |
425 | if (log_ecn_error) | |
426 | net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", | |
427 | &iph->saddr, iph->tos); | |
428 | if (err > 1) { | |
c4794d22 ED |
429 | DEV_STATS_INC(tunnel->dev, rx_frame_errors); |
430 | DEV_STATS_INC(tunnel->dev, rx_errors); | |
c5441932 PS |
431 | goto drop; |
432 | } | |
433 | } | |
434 | ||
560b50cf | 435 | dev_sw_netstats_rx_add(tunnel->dev, skb->len); |
81b9eab5 AS |
436 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); |
437 | ||
3d7b46cd PS |
438 | if (tunnel->dev->type == ARPHRD_ETHER) { |
439 | skb->protocol = eth_type_trans(skb, tunnel->dev); | |
440 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | |
441 | } else { | |
442 | skb->dev = tunnel->dev; | |
443 | } | |
64261f23 | 444 | |
2e15ea39 PS |
445 | if (tun_dst) |
446 | skb_dst_set(skb, (struct dst_entry *)tun_dst); | |
447 | ||
c5441932 PS |
448 | gro_cells_receive(&tunnel->gro_cells, skb); |
449 | return 0; | |
450 | ||
451 | drop: | |
469f87e1 HY |
452 | if (tun_dst) |
453 | dst_release((struct dst_entry *)tun_dst); | |
c5441932 PS |
454 | kfree_skb(skb); |
455 | return 0; | |
456 | } | |
457 | EXPORT_SYMBOL_GPL(ip_tunnel_rcv); | |
458 | ||
a8c5f90f TH |
459 | int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, |
460 | unsigned int num) | |
461 | { | |
bb1553c8 TG |
462 | if (num >= MAX_IPTUN_ENCAP_OPS) |
463 | return -ERANGE; | |
464 | ||
a8c5f90f TH |
465 | return !cmpxchg((const struct ip_tunnel_encap_ops **) |
466 | &iptun_encaps[num], | |
467 | NULL, ops) ? 0 : -1; | |
56328486 | 468 | } |
a8c5f90f TH |
469 | EXPORT_SYMBOL(ip_tunnel_encap_add_ops); |
470 | ||
471 | int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, | |
472 | unsigned int num) | |
473 | { | |
474 | int ret; | |
475 | ||
bb1553c8 TG |
476 | if (num >= MAX_IPTUN_ENCAP_OPS) |
477 | return -ERANGE; | |
478 | ||
a8c5f90f TH |
479 | ret = (cmpxchg((const struct ip_tunnel_encap_ops **) |
480 | &iptun_encaps[num], | |
481 | ops, NULL) == ops) ? 0 : -1; | |
482 | ||
483 | synchronize_net(); | |
484 | ||
485 | return ret; | |
486 | } | |
487 | EXPORT_SYMBOL(ip_tunnel_encap_del_ops); | |
56328486 TH |
488 | |
489 | int ip_tunnel_encap_setup(struct ip_tunnel *t, | |
490 | struct ip_tunnel_encap *ipencap) | |
491 | { | |
492 | int hlen; | |
493 | ||
494 | memset(&t->encap, 0, sizeof(t->encap)); | |
495 | ||
496 | hlen = ip_encap_hlen(ipencap); | |
497 | if (hlen < 0) | |
498 | return hlen; | |
499 | ||
500 | t->encap.type = ipencap->type; | |
501 | t->encap.sport = ipencap->sport; | |
502 | t->encap.dport = ipencap->dport; | |
503 | t->encap.flags = ipencap->flags; | |
504 | ||
505 | t->encap_hlen = hlen; | |
506 | t->hlen = t->encap_hlen + t->tun_hlen; | |
507 | ||
508 | return 0; | |
509 | } | |
510 | EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); | |
511 | ||
23a3647b | 512 | static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, |
fc24f2b2 | 513 | struct rtable *rt, __be16 df, |
c8b34e68 | 514 | const struct iphdr *inner_iph, |
515 | int tunnel_hlen, __be32 dst, bool md) | |
23a3647b PS |
516 | { |
517 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
c8b34e68 | 518 | int pkt_size; |
23a3647b PS |
519 | int mtu; |
520 | ||
c8b34e68 | 521 | tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; |
28e104d0 | 522 | pkt_size = skb->len - tunnel_hlen; |
9992a078 | 523 | pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; |
c8b34e68 | 524 | |
9992a078 | 525 | if (df) { |
28e104d0 | 526 | mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); |
9992a078 HL |
527 | mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; |
528 | } else { | |
f4b3ec4e | 529 | mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
9992a078 | 530 | } |
23a3647b | 531 | |
f4b3ec4e | 532 | if (skb_valid_dst(skb)) |
7a1592bc | 533 | skb_dst_update_pmtu_no_confirm(skb, mtu); |
23a3647b PS |
534 | |
535 | if (skb->protocol == htons(ETH_P_IP)) { | |
536 | if (!skb_is_gso(skb) && | |
fc24f2b2 TT |
537 | (inner_iph->frag_off & htons(IP_DF)) && |
538 | mtu < pkt_size) { | |
4372339e | 539 | icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
23a3647b PS |
540 | return -E2BIG; |
541 | } | |
542 | } | |
543 | #if IS_ENABLED(CONFIG_IPV6) | |
544 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
f4b3ec4e | 545 | struct rt6_info *rt6; |
c8b34e68 | 546 | __be32 daddr; |
547 | ||
e8dfd42c | 548 | rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) : |
f4b3ec4e | 549 | NULL; |
c8b34e68 | 550 | daddr = md ? dst : tunnel->parms.iph.daddr; |
23a3647b PS |
551 | |
552 | if (rt6 && mtu < dst_mtu(skb_dst(skb)) && | |
553 | mtu >= IPV6_MIN_MTU) { | |
c8b34e68 | 554 | if ((daddr && !ipv4_is_multicast(daddr)) || |
23a3647b PS |
555 | rt6->rt6i_dst.plen == 128) { |
556 | rt6->rt6i_flags |= RTF_MODIFIED; | |
557 | dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); | |
558 | } | |
559 | } | |
560 | ||
561 | if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && | |
562 | mtu < pkt_size) { | |
4372339e | 563 | icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
23a3647b PS |
564 | return -E2BIG; |
565 | } | |
566 | } | |
567 | #endif | |
568 | return 0; | |
569 | } | |
570 | ||
5ae1e992 FW |
571 | static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) |
572 | { | |
573 | /* we must cap headroom to some upperlimit, else pskb_expand_head | |
574 | * will overflow header offsets in skb_headers_offset_update(). | |
575 | */ | |
576 | static const unsigned int max_allowed = 512; | |
577 | ||
578 | if (headroom > max_allowed) | |
579 | headroom = max_allowed; | |
580 | ||
581 | if (headroom > READ_ONCE(dev->needed_headroom)) | |
582 | WRITE_ONCE(dev->needed_headroom, headroom); | |
583 | } | |
584 | ||
c8b34e68 | 585 | void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
586 | u8 proto, int tunnel_hlen) | |
cfc7381b AS |
587 | { |
588 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
589 | u32 headroom = sizeof(struct iphdr); | |
590 | struct ip_tunnel_info *tun_info; | |
591 | const struct ip_tunnel_key *key; | |
592 | const struct iphdr *inner_iph; | |
f46fe4f8 | 593 | struct rtable *rt = NULL; |
cfc7381b AS |
594 | struct flowi4 fl4; |
595 | __be16 df = 0; | |
596 | u8 tos, ttl; | |
f46fe4f8 | 597 | bool use_cache; |
cfc7381b AS |
598 | |
599 | tun_info = skb_tunnel_info(skb); | |
600 | if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || | |
601 | ip_tunnel_info_af(tun_info) != AF_INET)) | |
602 | goto tx_error; | |
603 | key = &tun_info->key; | |
604 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
605 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); | |
606 | tos = key->tos; | |
607 | if (tos == 1) { | |
608 | if (skb->protocol == htons(ETH_P_IP)) | |
609 | tos = inner_iph->tos; | |
610 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
611 | tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); | |
612 | } | |
6e6b904a | 613 | ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, |
c34cfe72 | 614 | tunnel_id_to_key32(key->tun_id), |
b5a7b661 | 615 | tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark, |
c34cfe72 | 616 | skb_get_hash(skb), key->flow_flags); |
ac931d4c CE |
617 | |
618 | if (!tunnel_hlen) | |
619 | tunnel_hlen = ip_encap_hlen(&tun_info->encap); | |
620 | ||
621 | if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0) | |
cfc7381b | 622 | goto tx_error; |
f46fe4f8 | 623 | |
624 | use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); | |
625 | if (use_cache) | |
626 | rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr); | |
627 | if (!rt) { | |
628 | rt = ip_route_output_key(tunnel->net, &fl4); | |
629 | if (IS_ERR(rt)) { | |
c4794d22 | 630 | DEV_STATS_INC(dev, tx_carrier_errors); |
f46fe4f8 | 631 | goto tx_error; |
632 | } | |
633 | if (use_cache) | |
634 | dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, | |
635 | fl4.saddr); | |
cfc7381b AS |
636 | } |
637 | if (rt->dst.dev == dev) { | |
638 | ip_rt_put(rt); | |
c4794d22 | 639 | DEV_STATS_INC(dev, collisions); |
cfc7381b AS |
640 | goto tx_error; |
641 | } | |
c8b34e68 | 642 | |
5832c4a7 | 643 | if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags)) |
c8b34e68 | 644 | df = htons(IP_DF); |
645 | if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen, | |
646 | key->u.ipv4.dst, true)) { | |
647 | ip_rt_put(rt); | |
648 | goto tx_error; | |
649 | } | |
650 | ||
cfc7381b AS |
651 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); |
652 | ttl = key->ttl; | |
653 | if (ttl == 0) { | |
654 | if (skb->protocol == htons(ETH_P_IP)) | |
655 | ttl = inner_iph->ttl; | |
656 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
657 | ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; | |
658 | else | |
659 | ttl = ip4_dst_hoplimit(&rt->dst); | |
660 | } | |
c8b34e68 | 661 | |
cfc7381b | 662 | headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; |
5ae1e992 | 663 | if (skb_cow_head(skb, headroom)) { |
cfc7381b AS |
664 | ip_rt_put(rt); |
665 | goto tx_dropped; | |
666 | } | |
5ae1e992 FW |
667 | |
668 | ip_tunnel_adj_headroom(dev, headroom); | |
669 | ||
0f693f19 HY |
670 | iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, |
671 | df, !net_eq(tunnel->net, dev_net(dev))); | |
cfc7381b AS |
672 | return; |
673 | tx_error: | |
c4794d22 | 674 | DEV_STATS_INC(dev, tx_errors); |
cfc7381b AS |
675 | goto kfree; |
676 | tx_dropped: | |
c4794d22 | 677 | DEV_STATS_INC(dev, tx_dropped); |
cfc7381b AS |
678 | kfree: |
679 | kfree_skb(skb); | |
680 | } | |
681 | EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); | |
682 | ||
c5441932 | 683 | void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
56328486 | 684 | const struct iphdr *tnl_params, u8 protocol) |
c5441932 PS |
685 | { |
686 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
186d9366 | 687 | struct ip_tunnel_info *tun_info = NULL; |
c5441932 | 688 | const struct iphdr *inner_iph; |
c5441932 | 689 | unsigned int max_headroom; /* The extra header space needed */ |
186d9366 | 690 | struct rtable *rt = NULL; /* Route to the other host */ |
7ae29fd1 | 691 | __be16 payload_protocol; |
186d9366 | 692 | bool use_cache = false; |
693 | struct flowi4 fl4; | |
694 | bool md = false; | |
22fb22ea | 695 | bool connected; |
186d9366 | 696 | u8 tos, ttl; |
697 | __be32 dst; | |
698 | __be16 df; | |
c5441932 PS |
699 | |
700 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); | |
22fb22ea | 701 | connected = (tunnel->parms.iph.daddr != 0); |
7ae29fd1 | 702 | payload_protocol = skb_protocol(skb, true); |
c5441932 | 703 | |
5146d1f1 BH |
704 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
705 | ||
c5441932 PS |
706 | dst = tnl_params->daddr; |
707 | if (dst == 0) { | |
708 | /* NBMA tunnel */ | |
709 | ||
51456b29 | 710 | if (!skb_dst(skb)) { |
c4794d22 | 711 | DEV_STATS_INC(dev, tx_fifo_errors); |
c5441932 PS |
712 | goto tx_error; |
713 | } | |
714 | ||
d71b5753 | 715 | tun_info = skb_tunnel_info(skb); |
716 | if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && | |
717 | ip_tunnel_info_af(tun_info) == AF_INET && | |
186d9366 | 718 | tun_info->key.u.ipv4.dst) { |
d71b5753 | 719 | dst = tun_info->key.u.ipv4.dst; |
186d9366 | 720 | md = true; |
721 | connected = true; | |
7ae29fd1 | 722 | } else if (payload_protocol == htons(ETH_P_IP)) { |
c5441932 PS |
723 | rt = skb_rtable(skb); |
724 | dst = rt_nexthop(rt, inner_iph->daddr); | |
725 | } | |
726 | #if IS_ENABLED(CONFIG_IPV6) | |
7ae29fd1 | 727 | else if (payload_protocol == htons(ETH_P_IPV6)) { |
c5441932 PS |
728 | const struct in6_addr *addr6; |
729 | struct neighbour *neigh; | |
730 | bool do_tx_error_icmp; | |
731 | int addr_type; | |
732 | ||
733 | neigh = dst_neigh_lookup(skb_dst(skb), | |
734 | &ipv6_hdr(skb)->daddr); | |
51456b29 | 735 | if (!neigh) |
c5441932 PS |
736 | goto tx_error; |
737 | ||
738 | addr6 = (const struct in6_addr *)&neigh->primary_key; | |
739 | addr_type = ipv6_addr_type(addr6); | |
740 | ||
741 | if (addr_type == IPV6_ADDR_ANY) { | |
742 | addr6 = &ipv6_hdr(skb)->daddr; | |
743 | addr_type = ipv6_addr_type(addr6); | |
744 | } | |
745 | ||
746 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) | |
747 | do_tx_error_icmp = true; | |
748 | else { | |
749 | do_tx_error_icmp = false; | |
750 | dst = addr6->s6_addr32[3]; | |
751 | } | |
752 | neigh_release(neigh); | |
753 | if (do_tx_error_icmp) | |
754 | goto tx_error_icmp; | |
755 | } | |
756 | #endif | |
757 | else | |
758 | goto tx_error; | |
7d442fab | 759 | |
186d9366 | 760 | if (!md) |
761 | connected = false; | |
c5441932 PS |
762 | } |
763 | ||
764 | tos = tnl_params->tos; | |
765 | if (tos & 0x1) { | |
766 | tos &= ~0x1; | |
7ae29fd1 | 767 | if (payload_protocol == htons(ETH_P_IP)) { |
c5441932 | 768 | tos = inner_iph->tos; |
7d442fab | 769 | connected = false; |
7ae29fd1 | 770 | } else if (payload_protocol == htons(ETH_P_IPV6)) { |
c5441932 | 771 | tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); |
7d442fab TH |
772 | connected = false; |
773 | } | |
c5441932 PS |
774 | } |
775 | ||
0f3e9c97 | 776 | ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, |
c2b639f9 | 777 | tunnel->parms.o_key, tos & INET_DSCP_MASK, |
b5a7b661 | 778 | tunnel->net, READ_ONCE(tunnel->parms.link), |
7ec9fce4 | 779 | tunnel->fwmark, skb_get_hash(skb), 0); |
7d442fab | 780 | |
ac931d4c | 781 | if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) |
56328486 TH |
782 | goto tx_error; |
783 | ||
186d9366 | 784 | if (connected && md) { |
785 | use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); | |
786 | if (use_cache) | |
787 | rt = dst_cache_get_ip4(&tun_info->dst_cache, | |
788 | &fl4.saddr); | |
789 | } else { | |
790 | rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, | |
791 | &fl4.saddr) : NULL; | |
792 | } | |
7d442fab TH |
793 | |
794 | if (!rt) { | |
795 | rt = ip_route_output_key(tunnel->net, &fl4); | |
796 | ||
797 | if (IS_ERR(rt)) { | |
c4794d22 | 798 | DEV_STATS_INC(dev, tx_carrier_errors); |
7d442fab TH |
799 | goto tx_error; |
800 | } | |
186d9366 | 801 | if (use_cache) |
802 | dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, | |
803 | fl4.saddr); | |
804 | else if (!md && connected) | |
e09acddf PA |
805 | dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, |
806 | fl4.saddr); | |
c5441932 | 807 | } |
7d442fab | 808 | |
0e6fbc5b | 809 | if (rt->dst.dev == dev) { |
c5441932 | 810 | ip_rt_put(rt); |
c4794d22 | 811 | DEV_STATS_INC(dev, collisions); |
c5441932 PS |
812 | goto tx_error; |
813 | } | |
c5441932 | 814 | |
50c66167 | 815 | df = tnl_params->frag_off; |
7ae29fd1 | 816 | if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df) |
50c66167 FW |
817 | df |= (inner_iph->frag_off & htons(IP_DF)); |
818 | ||
819 | if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { | |
23a3647b PS |
820 | ip_rt_put(rt); |
821 | goto tx_error; | |
c5441932 | 822 | } |
c5441932 PS |
823 | |
824 | if (tunnel->err_count > 0) { | |
825 | if (time_before(jiffies, | |
826 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | |
827 | tunnel->err_count--; | |
828 | ||
829 | dst_link_failure(skb); | |
830 | } else | |
831 | tunnel->err_count = 0; | |
832 | } | |
833 | ||
d4a71b15 | 834 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); |
c5441932 PS |
835 | ttl = tnl_params->ttl; |
836 | if (ttl == 0) { | |
7ae29fd1 | 837 | if (payload_protocol == htons(ETH_P_IP)) |
c5441932 PS |
838 | ttl = inner_iph->ttl; |
839 | #if IS_ENABLED(CONFIG_IPV6) | |
7ae29fd1 | 840 | else if (payload_protocol == htons(ETH_P_IPV6)) |
c5441932 PS |
841 | ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; |
842 | #endif | |
843 | else | |
844 | ttl = ip4_dst_hoplimit(&rt->dst); | |
845 | } | |
846 | ||
0e6fbc5b | 847 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) |
7371e022 | 848 | + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); |
3e08f4a7 | 849 | |
5ae1e992 | 850 | if (skb_cow_head(skb, max_headroom)) { |
586d5fc8 | 851 | ip_rt_put(rt); |
c4794d22 | 852 | DEV_STATS_INC(dev, tx_dropped); |
3acfa1e7 | 853 | kfree_skb(skb); |
3e08f4a7 | 854 | return; |
c5441932 PS |
855 | } |
856 | ||
5ae1e992 FW |
857 | ip_tunnel_adj_headroom(dev, max_headroom); |
858 | ||
039f5062 PS |
859 | iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, |
860 | df, !net_eq(tunnel->net, dev_net(dev))); | |
c5441932 PS |
861 | return; |
862 | ||
863 | #if IS_ENABLED(CONFIG_IPV6) | |
864 | tx_error_icmp: | |
865 | dst_link_failure(skb); | |
866 | #endif | |
867 | tx_error: | |
c4794d22 | 868 | DEV_STATS_INC(dev, tx_errors); |
3acfa1e7 | 869 | kfree_skb(skb); |
c5441932 PS |
870 | } |
871 | EXPORT_SYMBOL_GPL(ip_tunnel_xmit); | |
872 | ||
873 | static void ip_tunnel_update(struct ip_tunnel_net *itn, | |
874 | struct ip_tunnel *t, | |
875 | struct net_device *dev, | |
117aef12 | 876 | struct ip_tunnel_parm_kern *p, |
9830ad4c CG |
877 | bool set_mtu, |
878 | __u32 fwmark) | |
c5441932 | 879 | { |
2e15ea39 | 880 | ip_tunnel_del(itn, t); |
c5441932 PS |
881 | t->parms.iph.saddr = p->iph.saddr; |
882 | t->parms.iph.daddr = p->iph.daddr; | |
883 | t->parms.i_key = p->i_key; | |
884 | t->parms.o_key = p->o_key; | |
885 | if (dev->type != ARPHRD_ETHER) { | |
5a1b7e1a | 886 | __dev_addr_set(dev, &p->iph.saddr, 4); |
c5441932 PS |
887 | memcpy(dev->broadcast, &p->iph.daddr, 4); |
888 | } | |
889 | ip_tunnel_add(itn, t); | |
890 | ||
891 | t->parms.iph.ttl = p->iph.ttl; | |
892 | t->parms.iph.tos = p->iph.tos; | |
893 | t->parms.iph.frag_off = p->iph.frag_off; | |
894 | ||
9830ad4c | 895 | if (t->parms.link != p->link || t->fwmark != fwmark) { |
c5441932 PS |
896 | int mtu; |
897 | ||
f694eee9 | 898 | WRITE_ONCE(t->parms.link, p->link); |
9830ad4c | 899 | t->fwmark = fwmark; |
c5441932 PS |
900 | mtu = ip_tunnel_bind_dev(dev); |
901 | if (set_mtu) | |
1eb2cded | 902 | WRITE_ONCE(dev->mtu, mtu); |
c5441932 | 903 | } |
e09acddf | 904 | dst_cache_reset(&t->dst_cache); |
c5441932 PS |
905 | netdev_state_change(dev); |
906 | } | |
907 | ||
117aef12 AL |
908 | int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, |
909 | int cmd) | |
c5441932 PS |
910 | { |
911 | int err = 0; | |
8c923ce2 ND |
912 | struct ip_tunnel *t = netdev_priv(dev); |
913 | struct net *net = t->net; | |
914 | struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); | |
c5441932 | 915 | |
c5441932 PS |
916 | switch (cmd) { |
917 | case SIOCGETTUNNEL: | |
8c923ce2 | 918 | if (dev == itn->fb_tunnel_dev) { |
c5441932 | 919 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); |
51456b29 | 920 | if (!t) |
8c923ce2 ND |
921 | t = netdev_priv(dev); |
922 | } | |
c5441932 PS |
923 | memcpy(p, &t->parms, sizeof(*p)); |
924 | break; | |
925 | ||
926 | case SIOCADDTUNNEL: | |
927 | case SIOCCHGTUNNEL: | |
928 | err = -EPERM; | |
929 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | |
930 | goto done; | |
931 | if (p->iph.ttl) | |
932 | p->iph.frag_off |= htons(IP_DF); | |
5832c4a7 AL |
933 | if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) { |
934 | if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags)) | |
7c8e6b9c | 935 | p->i_key = 0; |
5832c4a7 | 936 | if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags)) |
7c8e6b9c DP |
937 | p->o_key = 0; |
938 | } | |
c5441932 | 939 | |
79134e6c | 940 | t = ip_tunnel_find(itn, p, itn->type); |
c5441932 | 941 | |
d61746b2 SK |
942 | if (cmd == SIOCADDTUNNEL) { |
943 | if (!t) { | |
944 | t = ip_tunnel_create(net, itn, p); | |
945 | err = PTR_ERR_OR_ZERO(t); | |
946 | break; | |
947 | } | |
948 | ||
949 | err = -EEXIST; | |
ee30ef4d | 950 | break; |
6dd3c9ec | 951 | } |
c5441932 | 952 | if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
00db4124 | 953 | if (t) { |
c5441932 PS |
954 | if (t->dev != dev) { |
955 | err = -EEXIST; | |
956 | break; | |
957 | } | |
958 | } else { | |
959 | unsigned int nflags = 0; | |
960 | ||
961 | if (ipv4_is_multicast(p->iph.daddr)) | |
962 | nflags = IFF_BROADCAST; | |
963 | else if (p->iph.daddr) | |
964 | nflags = IFF_POINTOPOINT; | |
965 | ||
966 | if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { | |
967 | err = -EINVAL; | |
968 | break; | |
969 | } | |
970 | ||
971 | t = netdev_priv(dev); | |
972 | } | |
973 | } | |
974 | ||
975 | if (t) { | |
976 | err = 0; | |
9830ad4c | 977 | ip_tunnel_update(itn, t, dev, p, true, 0); |
6dd3c9ec FW |
978 | } else { |
979 | err = -ENOENT; | |
980 | } | |
c5441932 PS |
981 | break; |
982 | ||
983 | case SIOCDELTUNNEL: | |
984 | err = -EPERM; | |
985 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | |
986 | goto done; | |
987 | ||
988 | if (dev == itn->fb_tunnel_dev) { | |
989 | err = -ENOENT; | |
990 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); | |
51456b29 | 991 | if (!t) |
c5441932 PS |
992 | goto done; |
993 | err = -EPERM; | |
994 | if (t == netdev_priv(itn->fb_tunnel_dev)) | |
995 | goto done; | |
996 | dev = t->dev; | |
997 | } | |
998 | unregister_netdevice(dev); | |
999 | err = 0; | |
1000 | break; | |
1001 | ||
1002 | default: | |
1003 | err = -EINVAL; | |
1004 | } | |
1005 | ||
1006 | done: | |
1007 | return err; | |
1008 | } | |
607259a6 CH |
1009 | EXPORT_SYMBOL_GPL(ip_tunnel_ctl); |
1010 | ||
117aef12 AL |
1011 | bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, |
1012 | const void __user *data) | |
1013 | { | |
1014 | struct ip_tunnel_parm p; | |
1015 | ||
1016 | if (copy_from_user(&p, data, sizeof(p))) | |
1017 | return false; | |
1018 | ||
1019 | strscpy(kp->name, p.name); | |
1020 | kp->link = p.link; | |
5832c4a7 AL |
1021 | ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags); |
1022 | ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags); | |
117aef12 AL |
1023 | kp->i_key = p.i_key; |
1024 | kp->o_key = p.o_key; | |
1025 | memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph))); | |
1026 | ||
1027 | return true; | |
1028 | } | |
1029 | EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user); | |
1030 | ||
1031 | bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp) | |
1032 | { | |
1033 | struct ip_tunnel_parm p; | |
1034 | ||
5832c4a7 AL |
1035 | if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) || |
1036 | !ip_tunnel_flags_is_be16_compat(kp->o_flags)) | |
1037 | return false; | |
1038 | ||
5a66cda5 AL |
1039 | memset(&p, 0, sizeof(p)); |
1040 | ||
117aef12 AL |
1041 | strscpy(p.name, kp->name); |
1042 | p.link = kp->link; | |
5832c4a7 AL |
1043 | p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags); |
1044 | p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags); | |
117aef12 AL |
1045 | p.i_key = kp->i_key; |
1046 | p.o_key = kp->o_key; | |
1047 | memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph))); | |
1048 | ||
1049 | return !copy_to_user(data, &p, sizeof(p)); | |
1050 | } | |
1051 | EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user); | |
1052 | ||
3e7a1c7c AB |
1053 | int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, |
1054 | void __user *data, int cmd) | |
607259a6 | 1055 | { |
117aef12 | 1056 | struct ip_tunnel_parm_kern p; |
607259a6 CH |
1057 | int err; |
1058 | ||
117aef12 | 1059 | if (!ip_tunnel_parm_from_user(&p, data)) |
607259a6 CH |
1060 | return -EFAULT; |
1061 | err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); | |
117aef12 | 1062 | if (!err && !ip_tunnel_parm_to_user(data, &p)) |
607259a6 CH |
1063 | return -EFAULT; |
1064 | return err; | |
1065 | } | |
3e7a1c7c | 1066 | EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate); |
c5441932 | 1067 | |
7e059158 | 1068 | int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) |
c5441932 PS |
1069 | { |
1070 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1071 | int t_hlen = tunnel->hlen + sizeof(struct iphdr); | |
28e104d0 | 1072 | int max_mtu = IP_MAX_MTU - t_hlen; |
c5441932 | 1073 | |
9992a078 HL |
1074 | if (dev->type == ARPHRD_ETHER) |
1075 | max_mtu -= dev->hard_header_len; | |
1076 | ||
b96f9afe | 1077 | if (new_mtu < ETH_MIN_MTU) |
c5441932 | 1078 | return -EINVAL; |
7e059158 DW |
1079 | |
1080 | if (new_mtu > max_mtu) { | |
1081 | if (strict) | |
1082 | return -EINVAL; | |
1083 | ||
1084 | new_mtu = max_mtu; | |
1085 | } | |
1086 | ||
1eb2cded | 1087 | WRITE_ONCE(dev->mtu, new_mtu); |
c5441932 PS |
1088 | return 0; |
1089 | } | |
7e059158 DW |
1090 | EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); |
1091 | ||
1092 | int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | |
1093 | { | |
1094 | return __ip_tunnel_change_mtu(dev, new_mtu, true); | |
1095 | } | |
c5441932 PS |
1096 | EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); |
1097 | ||
1098 | static void ip_tunnel_dev_free(struct net_device *dev) | |
1099 | { | |
1100 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1101 | ||
1102 | gro_cells_destroy(&tunnel->gro_cells); | |
e09acddf | 1103 | dst_cache_destroy(&tunnel->dst_cache); |
c5441932 PS |
1104 | } |
1105 | ||
1106 | void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) | |
1107 | { | |
c5441932 PS |
1108 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1109 | struct ip_tunnel_net *itn; | |
1110 | ||
6c742e71 | 1111 | itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); |
c5441932 PS |
1112 | |
1113 | if (itn->fb_tunnel_dev != dev) { | |
2e15ea39 | 1114 | ip_tunnel_del(itn, netdev_priv(dev)); |
c5441932 PS |
1115 | unregister_netdevice_queue(dev, head); |
1116 | } | |
1117 | } | |
1118 | EXPORT_SYMBOL_GPL(ip_tunnel_dellink); | |
1119 | ||
1728d4fa ND |
1120 | struct net *ip_tunnel_get_link_net(const struct net_device *dev) |
1121 | { | |
1122 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1123 | ||
9cf621bd | 1124 | return READ_ONCE(tunnel->net); |
1728d4fa ND |
1125 | } |
1126 | EXPORT_SYMBOL(ip_tunnel_get_link_net); | |
1127 | ||
1e99584b ND |
1128 | int ip_tunnel_get_iflink(const struct net_device *dev) |
1129 | { | |
f694eee9 | 1130 | const struct ip_tunnel *tunnel = netdev_priv(dev); |
1e99584b | 1131 | |
f694eee9 | 1132 | return READ_ONCE(tunnel->parms.link); |
1e99584b ND |
1133 | } |
1134 | EXPORT_SYMBOL(ip_tunnel_get_iflink); | |
1135 | ||
c7d03a00 | 1136 | int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, |
c5441932 PS |
1137 | struct rtnl_link_ops *ops, char *devname) |
1138 | { | |
1139 | struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); | |
117aef12 | 1140 | struct ip_tunnel_parm_kern parms; |
6261d983 | 1141 | unsigned int i; |
c5441932 | 1142 | |
79134e6c | 1143 | itn->rtnl_link_ops = ops; |
6261d983 | 1144 | for (i = 0; i < IP_TNL_HASH_SIZE; i++) |
1145 | INIT_HLIST_HEAD(&itn->tunnels[i]); | |
c5441932 | 1146 | |
79134e6c ED |
1147 | if (!ops || !net_has_fallback_tunnels(net)) { |
1148 | struct ip_tunnel_net *it_init_net; | |
1149 | ||
1150 | it_init_net = net_generic(&init_net, ip_tnl_net_id); | |
1151 | itn->type = it_init_net->type; | |
c5441932 PS |
1152 | itn->fb_tunnel_dev = NULL; |
1153 | return 0; | |
1154 | } | |
6261d983 | 1155 | |
c5441932 PS |
1156 | memset(&parms, 0, sizeof(parms)); |
1157 | if (devname) | |
512b2dc4 | 1158 | strscpy(parms.name, devname, IFNAMSIZ); |
c5441932 PS |
1159 | |
1160 | rtnl_lock(); | |
1161 | itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); | |
ea857f28 DC |
1162 | /* FB netdevice is special: we have one, and only one per netns. |
1163 | * Allowing to move it to another netns is clearly unsafe. | |
1164 | */ | |
67013282 | 1165 | if (!IS_ERR(itn->fb_tunnel_dev)) { |
0c493da8 | 1166 | itn->fb_tunnel_dev->netns_immutable = true; |
78ff4be4 | 1167 | itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); |
67013282 | 1168 | ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); |
79134e6c | 1169 | itn->type = itn->fb_tunnel_dev->type; |
67013282 | 1170 | } |
b4de77ad | 1171 | rtnl_unlock(); |
c5441932 | 1172 | |
27d79f3b | 1173 | return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); |
c5441932 PS |
1174 | } |
1175 | EXPORT_SYMBOL_GPL(ip_tunnel_init_net); | |
1176 | ||
a967e01e KI |
1177 | void ip_tunnel_delete_net(struct net *net, unsigned int id, |
1178 | struct rtnl_link_ops *ops, | |
1179 | struct list_head *head) | |
c5441932 | 1180 | { |
a967e01e | 1181 | struct ip_tunnel_net *itn = net_generic(net, id); |
6c742e71 | 1182 | struct net_device *dev, *aux; |
c5441932 PS |
1183 | int h; |
1184 | ||
a967e01e KI |
1185 | ASSERT_RTNL_NET(net); |
1186 | ||
6c742e71 ND |
1187 | for_each_netdev_safe(net, dev, aux) |
1188 | if (dev->rtnl_link_ops == ops) | |
1189 | unregister_netdevice_queue(dev, head); | |
1190 | ||
c5441932 PS |
1191 | for (h = 0; h < IP_TNL_HASH_SIZE; h++) { |
1192 | struct ip_tunnel *t; | |
1193 | struct hlist_node *n; | |
1194 | struct hlist_head *thead = &itn->tunnels[h]; | |
1195 | ||
1196 | hlist_for_each_entry_safe(t, n, thead, hash_node) | |
6c742e71 ND |
1197 | /* If dev is in the same netns, it has already |
1198 | * been added to the list by the previous loop. | |
1199 | */ | |
1200 | if (!net_eq(dev_net(t->dev), net)) | |
1201 | unregister_netdevice_queue(t->dev, head); | |
c5441932 | 1202 | } |
c5441932 | 1203 | } |
a967e01e | 1204 | EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); |
c5441932 | 1205 | |
eacb1160 XL |
1206 | int ip_tunnel_newlink(struct net *net, struct net_device *dev, |
1207 | struct nlattr *tb[], struct ip_tunnel_parm_kern *p, | |
1208 | __u32 fwmark) | |
c5441932 PS |
1209 | { |
1210 | struct ip_tunnel *nt; | |
c5441932 PS |
1211 | struct ip_tunnel_net *itn; |
1212 | int mtu; | |
1213 | int err; | |
1214 | ||
1215 | nt = netdev_priv(dev); | |
1216 | itn = net_generic(net, nt->ip_tnl_net_id); | |
1217 | ||
2e15ea39 PS |
1218 | if (nt->collect_md) { |
1219 | if (rtnl_dereference(itn->collect_md_tun)) | |
1220 | return -EEXIST; | |
1221 | } else { | |
1222 | if (ip_tunnel_find(itn, p, dev->type)) | |
1223 | return -EEXIST; | |
1224 | } | |
c5441932 | 1225 | |
5e6700b3 | 1226 | nt->net = net; |
c5441932 | 1227 | nt->parms = *p; |
9830ad4c | 1228 | nt->fwmark = fwmark; |
c5441932 PS |
1229 | err = register_netdevice(dev); |
1230 | if (err) | |
f6cc9c05 | 1231 | goto err_register_netdevice; |
c5441932 PS |
1232 | |
1233 | if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) | |
1234 | eth_hw_addr_random(dev); | |
1235 | ||
1236 | mtu = ip_tunnel_bind_dev(dev); | |
24fc7979 | 1237 | if (tb[IFLA_MTU]) { |
28e104d0 | 1238 | unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); |
24fc7979 | 1239 | |
9992a078 HL |
1240 | if (dev->type == ARPHRD_ETHER) |
1241 | max -= dev->hard_header_len; | |
1242 | ||
28e104d0 | 1243 | mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); |
f6cc9c05 | 1244 | } |
c5441932 | 1245 | |
5568cdc3 DM |
1246 | err = dev_set_mtu(dev, mtu); |
1247 | if (err) | |
1248 | goto err_dev_set_mtu; | |
c5441932 PS |
1249 | |
1250 | ip_tunnel_add(itn, nt); | |
f6cc9c05 PM |
1251 | return 0; |
1252 | ||
1253 | err_dev_set_mtu: | |
1254 | unregister_netdevice(dev); | |
1255 | err_register_netdevice: | |
c5441932 PS |
1256 | return err; |
1257 | } | |
1258 | EXPORT_SYMBOL_GPL(ip_tunnel_newlink); | |
1259 | ||
1260 | int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], | |
117aef12 | 1261 | struct ip_tunnel_parm_kern *p, __u32 fwmark) |
c5441932 | 1262 | { |
6c742e71 | 1263 | struct ip_tunnel *t; |
c5441932 | 1264 | struct ip_tunnel *tunnel = netdev_priv(dev); |
6c742e71 | 1265 | struct net *net = tunnel->net; |
c5441932 PS |
1266 | struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); |
1267 | ||
1268 | if (dev == itn->fb_tunnel_dev) | |
1269 | return -EINVAL; | |
1270 | ||
c5441932 PS |
1271 | t = ip_tunnel_find(itn, p, dev->type); |
1272 | ||
1273 | if (t) { | |
1274 | if (t->dev != dev) | |
1275 | return -EEXIST; | |
1276 | } else { | |
6c742e71 | 1277 | t = tunnel; |
c5441932 PS |
1278 | |
1279 | if (dev->type != ARPHRD_ETHER) { | |
1280 | unsigned int nflags = 0; | |
1281 | ||
1282 | if (ipv4_is_multicast(p->iph.daddr)) | |
1283 | nflags = IFF_BROADCAST; | |
1284 | else if (p->iph.daddr) | |
1285 | nflags = IFF_POINTOPOINT; | |
1286 | ||
1287 | if ((dev->flags ^ nflags) & | |
1288 | (IFF_POINTOPOINT | IFF_BROADCAST)) | |
1289 | return -EINVAL; | |
1290 | } | |
1291 | } | |
1292 | ||
9830ad4c | 1293 | ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark); |
c5441932 PS |
1294 | return 0; |
1295 | } | |
1296 | EXPORT_SYMBOL_GPL(ip_tunnel_changelink); | |
1297 | ||
1298 | int ip_tunnel_init(struct net_device *dev) | |
1299 | { | |
1300 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1301 | struct iphdr *iph = &tunnel->parms.iph; | |
1c213bd2 | 1302 | int err; |
c5441932 | 1303 | |
cf124db5 DM |
1304 | dev->needs_free_netdev = true; |
1305 | dev->priv_destructor = ip_tunnel_dev_free; | |
45403b12 | 1306 | dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; |
c5441932 | 1307 | |
e09acddf | 1308 | err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); |
45403b12 | 1309 | if (err) |
e09acddf | 1310 | return err; |
9a4aa9af | 1311 | |
c5441932 PS |
1312 | err = gro_cells_init(&tunnel->gro_cells, dev); |
1313 | if (err) { | |
e09acddf | 1314 | dst_cache_destroy(&tunnel->dst_cache); |
c5441932 PS |
1315 | return err; |
1316 | } | |
1317 | ||
1318 | tunnel->dev = dev; | |
82183b03 | 1319 | strscpy(tunnel->parms.name, dev->name); |
c5441932 PS |
1320 | iph->version = 4; |
1321 | iph->ihl = 5; | |
1322 | ||
d0f41851 | 1323 | if (tunnel->collect_md) |
2e15ea39 | 1324 | netif_keep_dst(dev); |
0bef5120 | 1325 | netdev_lockdep_set_classes(dev); |
c5441932 PS |
1326 | return 0; |
1327 | } | |
1328 | EXPORT_SYMBOL_GPL(ip_tunnel_init); | |
1329 | ||
1330 | void ip_tunnel_uninit(struct net_device *dev) | |
1331 | { | |
c5441932 | 1332 | struct ip_tunnel *tunnel = netdev_priv(dev); |
6c742e71 | 1333 | struct net *net = tunnel->net; |
c5441932 PS |
1334 | struct ip_tunnel_net *itn; |
1335 | ||
1336 | itn = net_generic(net, tunnel->ip_tnl_net_id); | |
ba61539c TY |
1337 | ip_tunnel_del(itn, netdev_priv(dev)); |
1338 | if (itn->fb_tunnel_dev == dev) | |
1339 | WRITE_ONCE(itn->fb_tunnel_dev, NULL); | |
7d442fab | 1340 | |
e09acddf | 1341 | dst_cache_reset(&tunnel->dst_cache); |
c5441932 PS |
1342 | } |
1343 | EXPORT_SYMBOL_GPL(ip_tunnel_uninit); | |
1344 | ||
1345 | /* Do least required initialization, rest of init is done in tunnel_init call */ | |
c7d03a00 | 1346 | void ip_tunnel_setup(struct net_device *dev, unsigned int net_id) |
c5441932 PS |
1347 | { |
1348 | struct ip_tunnel *tunnel = netdev_priv(dev); | |
1349 | tunnel->ip_tnl_net_id = net_id; | |
1350 | } | |
1351 | EXPORT_SYMBOL_GPL(ip_tunnel_setup); | |
1352 | ||
b058a5d2 | 1353 | MODULE_DESCRIPTION("IPv4 tunnel implementation library"); |
c5441932 | 1354 | MODULE_LICENSE("GPL"); |