Merge tag 'pci-v6.16-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
[linux-2.6-block.git] / net / ipv4 / ip_gre.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4 2/*
e905a9ed 3 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
4 *
5 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
1da177e4
LT
6 */
7
afd46503
JP
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
4fc268d2 10#include <linux/capability.h>
1da177e4
LT
11#include <linux/module.h>
12#include <linux/types.h>
1da177e4 13#include <linux/kernel.h>
5a0e3ad6 14#include <linux/slab.h>
7c0f6ba6 15#include <linux/uaccess.h>
1da177e4
LT
16#include <linux/skbuff.h>
17#include <linux/netdevice.h>
18#include <linux/in.h>
19#include <linux/tcp.h>
20#include <linux/udp.h>
21#include <linux/if_arp.h>
2e15ea39 22#include <linux/if_vlan.h>
1da177e4
LT
23#include <linux/init.h>
24#include <linux/in6.h>
25#include <linux/inetdevice.h>
26#include <linux/igmp.h>
27#include <linux/netfilter_ipv4.h>
e1a80002 28#include <linux/etherdevice.h>
46f25dff 29#include <linux/if_ether.h>
1da177e4
LT
30
31#include <net/sock.h>
32#include <net/ip.h>
33#include <net/icmp.h>
34#include <net/protocol.h>
c5441932 35#include <net/ip_tunnels.h>
1da177e4
LT
36#include <net/arp.h>
37#include <net/checksum.h>
38#include <net/dsfield.h>
39#include <net/inet_ecn.h>
40#include <net/xfrm.h>
59a4c759
PE
41#include <net/net_namespace.h>
42#include <net/netns/generic.h>
c19e654d 43#include <net/rtnetlink.h>
00959ade 44#include <net/gre.h>
2e15ea39 45#include <net/dst_metadata.h>
84e54fe0 46#include <net/erspan.h>
25376a89 47#include <net/inet_dscp.h>
1da177e4 48
1da177e4
LT
49/*
50 Problems & solutions
51 --------------------
52
53 1. The most important issue is detecting local dead loops.
54 They would cause complete host lockup in transmit, which
55 would be "resolved" by stack overflow or, if queueing is enabled,
56 with infinite looping in net_bh.
57
58 We cannot track such dead loops during route installation,
59 it is infeasible task. The most general solutions would be
60 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 61 and silently drop packet when it expires. It is a good
bff52857 62 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
63 skb, even if no tunneling is used.
64
6d0722a2
ED
65 Current solution: xmit_recursion breaks dead loops. This is a percpu
66 counter, since when we enter the first ndo_xmit(), cpu migration is
67 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
68
69 2. Networking dead loops would not kill routers, but would really
70 kill network. IP hop limit plays role of "t->recursion" in this case,
71 if we copy it from packet being encapsulated to upper header.
72 It is very good solution, but it introduces two problems:
73
74 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
75 do not work over tunnels.
76 - traceroute does not work. I planned to relay ICMP from tunnel,
77 so that this problem would be solved and traceroute output
78 would even more informative. This idea appeared to be wrong:
79 only Linux complies to rfc1812 now (yes, guys, Linux is the only
80 true router now :-)), all routers (at least, in neighbourhood of mine)
81 return only 8 bytes of payload. It is the end.
82
83 Hence, if we want that OSPF worked or traceroute said something reasonable,
84 we should search for another solution.
85
86 One of them is to parse packet trying to detect inner encapsulation
87 made by our node. It is difficult or even impossible, especially,
bff52857 88 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
89
90 Current solution: The solution was UNEXPECTEDLY SIMPLE.
91 We force DF flag on tunnels with preconfigured hop limit,
92 that is ALL. :-) Well, it does not remove the problem completely,
93 but exponential growth of network traffic is changed to linear
94 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 95 rapidly degrades to value <68, where looping stops.
1da177e4
LT
96 Yes, it is not good if there exists a router in the loop,
97 which does not force DF, even when encapsulating packets have DF set.
98 But it is not our problem! Nobody could accuse us, we made
99 all that we could make. Even if it is your gated who injected
100 fatal route to network, even if it were you who configured
101 fatal static route: you are innocent. :-)
102
1da177e4
LT
103 Alexey Kuznetsov.
104 */
105
eccc1bb8 106static bool log_ecn_error = true;
107module_param(log_ecn_error, bool, 0644);
108MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109
c19e654d 110static struct rtnl_link_ops ipgre_link_ops __read_mostly;
aab1e898
GN
111static const struct header_ops ipgre_header_ops;
112
1da177e4 113static int ipgre_tunnel_init(struct net_device *dev);
1a66a836 114static void erspan_build_header(struct sk_buff *skb,
c69de58b 115 u32 id, u32 index,
a3222dc9 116 bool truncate, bool is_ipv4);
eb8ce741 117
c7d03a00
AD
118static unsigned int ipgre_net_id __read_mostly;
119static unsigned int gre_tap_net_id __read_mostly;
84e54fe0 120static unsigned int erspan_net_id __read_mostly;
1da177e4 121
32bbd879
SB
122static int ipgre_err(struct sk_buff *skb, u32 info,
123 const struct tnl_ptk_info *tpi)
1da177e4 124{
1da177e4 125
c5441932
PS
126 /* All the routers (except for Linux) return only
127 8 bytes of packet payload. It means, that precise relaying of
128 ICMP in the real Internet is absolutely infeasible.
1da177e4 129
c5441932
PS
130 Moreover, Cisco "wise men" put GRE key to the third word
131 in GRE header. It makes impossible maintaining even soft
132 state for keyed GRE tunnels with enabled checksum. Tell
133 them "thank you".
1da177e4 134
c5441932
PS
135 Well, I wonder, rfc1812 was written by Cisco employee,
136 what the hell these idiots break standards established
137 by themselves???
138 */
139 struct net *net = dev_net(skb->dev);
140 struct ip_tunnel_net *itn;
96f5a846 141 const struct iphdr *iph;
88c7664f
ACM
142 const int type = icmp_hdr(skb)->type;
143 const int code = icmp_hdr(skb)->code;
1da177e4 144 struct ip_tunnel *t;
1da177e4 145
32bbd879
SB
146 if (tpi->proto == htons(ETH_P_TEB))
147 itn = net_generic(net, gre_tap_net_id);
148 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 tpi->proto == htons(ETH_P_ERSPAN2))
150 itn = net_generic(net, erspan_net_id);
151 else
152 itn = net_generic(net, ipgre_net_id);
153
154 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 iph->daddr, iph->saddr, tpi->key);
157
158 if (!t)
159 return -ENOENT;
160
1da177e4
LT
161 switch (type) {
162 default:
163 case ICMP_PARAMETERPROB:
32bbd879 164 return 0;
1da177e4
LT
165
166 case ICMP_DEST_UNREACH:
167 switch (code) {
168 case ICMP_SR_FAILED:
169 case ICMP_PORT_UNREACH:
170 /* Impossible event. */
32bbd879 171 return 0;
1da177e4
LT
172 default:
173 /* All others are translated to HOST_UNREACH.
174 rfc2003 contains "deep thoughts" about NET_UNREACH,
175 I believe they are just ether pollution. --ANK
176 */
177 break;
178 }
179 break;
9f57c67c 180
1da177e4
LT
181 case ICMP_TIME_EXCEEDED:
182 if (code != ICMP_EXC_TTL)
32bbd879 183 return 0;
1da177e4 184 break;
55be7a9c
DM
185
186 case ICMP_REDIRECT:
187 break;
1da177e4
LT
188 }
189
9b8c6d7b 190#if IS_ENABLED(CONFIG_IPV6)
50f37fc2
GU
191 if (tpi->proto == htons(ETH_P_IPV6)) {
192 unsigned int data_len = 0;
193
194 if (type == ICMP_TIME_EXCEEDED)
195 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
196
197 if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
198 type, data_len))
199 return 0;
200 }
9b8c6d7b
ED
201#endif
202
36393395 203 if (t->parms.iph.daddr == 0 ||
f97c1e0c 204 ipv4_is_multicast(t->parms.iph.daddr))
32bbd879 205 return 0;
1da177e4
LT
206
207 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
32bbd879 208 return 0;
1da177e4 209
da6185d8 210 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
211 t->err_count++;
212 else
213 t->err_count = 1;
214 t->err_time = jiffies;
32bbd879
SB
215
216 return 0;
9f57c67c
PS
217}
218
219static void gre_err(struct sk_buff *skb, u32 info)
220{
221 /* All the routers (except for Linux) return only
222 * 8 bytes of packet payload. It means, that precise relaying of
223 * ICMP in the real Internet is absolutely infeasible.
224 *
225 * Moreover, Cisco "wise men" put GRE key to the third word
226 * in GRE header. It makes impossible maintaining even soft
227 * state for keyed
228 * GRE tunnels with enabled checksum. Tell them "thank you".
229 *
230 * Well, I wonder, rfc1812 was written by Cisco employee,
231 * what the hell these idiots break standards established
232 * by themselves???
233 */
234
e582615a 235 const struct iphdr *iph = (struct iphdr *)skb->data;
9f57c67c
PS
236 const int type = icmp_hdr(skb)->type;
237 const int code = icmp_hdr(skb)->code;
238 struct tnl_ptk_info tpi;
9f57c67c 239
b0350d51
HY
240 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
241 iph->ihl * 4) < 0)
242 return;
9f57c67c
PS
243
244 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
245 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
d888f396 246 skb->dev->ifindex, IPPROTO_GRE);
9f57c67c
PS
247 return;
248 }
249 if (type == ICMP_REDIRECT) {
1042caa7
MÅ»
250 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
251 IPPROTO_GRE);
9f57c67c
PS
252 return;
253 }
254
255 ipgre_err(skb, info, &tpi);
1da177e4
LT
256}
257
f989d546
WT
258static bool is_erspan_type1(int gre_hdr_len)
259{
260 /* Both ERSPAN type I (version 0) and type II (version 1) use
261 * protocol 0x88BE, but the type I has only 4-byte GRE header,
262 * while type II has 8-byte.
263 */
264 return gre_hdr_len == 4;
265}
266
84e54fe0
WT
267static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
268 int gre_hdr_len)
269{
270 struct net *net = dev_net(skb->dev);
271 struct metadata_dst *tun_dst = NULL;
1d7e2ed2 272 struct erspan_base_hdr *ershdr;
5832c4a7 273 IP_TUNNEL_DECLARE_FLAGS(flags);
84e54fe0
WT
274 struct ip_tunnel_net *itn;
275 struct ip_tunnel *tunnel;
84e54fe0 276 const struct iphdr *iph;
3df19283 277 struct erspan_md2 *md2;
1d7e2ed2 278 int ver;
84e54fe0
WT
279 int len;
280
5832c4a7
AL
281 ip_tunnel_flags_copy(flags, tpi->flags);
282
84e54fe0 283 itn = net_generic(net, erspan_net_id);
84e54fe0 284 iph = ip_hdr(skb);
f989d546
WT
285 if (is_erspan_type1(gre_hdr_len)) {
286 ver = 0;
5832c4a7
AL
287 __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
288 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
f989d546
WT
289 iph->saddr, iph->daddr, 0);
290 } else {
17af4205
ED
291 if (unlikely(!pskb_may_pull(skb,
292 gre_hdr_len + sizeof(*ershdr))))
293 return PACKET_REJECT;
294
f989d546
WT
295 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
296 ver = ershdr->ver;
17af4205 297 iph = ip_hdr(skb);
5832c4a7
AL
298 __set_bit(IP_TUNNEL_KEY_BIT, flags);
299 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
f989d546
WT
300 iph->saddr, iph->daddr, tpi->key);
301 }
84e54fe0
WT
302
303 if (tunnel) {
f989d546
WT
304 if (is_erspan_type1(gre_hdr_len))
305 len = gre_hdr_len;
306 else
307 len = gre_hdr_len + erspan_hdr_len(ver);
308
1d7e2ed2 309 if (unlikely(!pskb_may_pull(skb, len)))
ae3e1337 310 return PACKET_REJECT;
1d7e2ed2 311
84e54fe0 312 if (__iptunnel_pull_header(skb,
1d7e2ed2 313 len,
84e54fe0
WT
314 htons(ETH_P_TEB),
315 false, false) < 0)
316 goto drop;
317
1a66a836 318 if (tunnel->collect_md) {
492b67e2 319 struct erspan_metadata *pkt_md, *md;
1a66a836 320 struct ip_tunnel_info *info;
492b67e2 321 unsigned char *gh;
1a66a836 322 __be64 tun_id;
1a66a836 323
5832c4a7
AL
324 __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
325 ip_tunnel_flags_copy(flags, tpi->flags);
1a66a836
WT
326 tun_id = key32_to_tunnel_id(tpi->key);
327
328 tun_dst = ip_tun_rx_dst(skb, flags,
329 tun_id, sizeof(*md));
330 if (!tun_dst)
331 return PACKET_REJECT;
332
492b67e2
LB
333 /* skb can be uncloned in __iptunnel_pull_header, so
334 * old pkt_md is no longer valid and we need to reset
335 * it
336 */
337 gh = skb_network_header(skb) +
338 skb_network_header_len(skb);
339 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
340 sizeof(*ershdr));
1a66a836 341 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
f551c91d 342 md->version = ver;
3df19283
WT
343 md2 = &md->u.md2;
344 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
345 ERSPAN_V2_MDSIZE);
f551c91d 346
1a66a836 347 info = &tun_dst->u.tun_info;
5832c4a7
AL
348 __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
349 info->key.tun_flags);
1a66a836 350 info->options_len = sizeof(*md);
1a66a836
WT
351 }
352
84e54fe0
WT
353 skb_reset_mac_header(skb);
354 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
355 return PACKET_RCVD;
356 }
5a64506b
HY
357 return PACKET_REJECT;
358
84e54fe0
WT
359drop:
360 kfree_skb(skb);
361 return PACKET_RCVD;
362}
363
125372fa
JB
364static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
365 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
1da177e4 366{
2e15ea39 367 struct metadata_dst *tun_dst = NULL;
b71d1d42 368 const struct iphdr *iph;
1da177e4 369 struct ip_tunnel *tunnel;
1da177e4 370
c5441932 371 iph = ip_hdr(skb);
bda7bb46
PS
372 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
373 iph->saddr, iph->daddr, tpi->key);
e1a80002 374
d2083287 375 if (tunnel) {
c0d59da7 376 const struct iphdr *tnl_params;
377
125372fa
JB
378 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
379 raw_proto, false) < 0)
244a797b
JB
380 goto drop;
381
aab1e898
GN
382 /* Special case for ipgre_header_parse(), which expects the
383 * mac_header to point to the outer IP header.
384 */
385 if (tunnel->dev->header_ops == &ipgre_header_ops)
e271c7b4
JB
386 skb_pop_mac_header(skb);
387 else
388 skb_reset_mac_header(skb);
c0d59da7 389
390 tnl_params = &tunnel->parms.iph;
391 if (tunnel->collect_md || tnl_params->daddr == 0) {
5832c4a7 392 IP_TUNNEL_DECLARE_FLAGS(flags) = { };
c29a70d2 393 __be64 tun_id;
2e15ea39 394
5832c4a7
AL
395 __set_bit(IP_TUNNEL_CSUM_BIT, flags);
396 __set_bit(IP_TUNNEL_KEY_BIT, flags);
397 ip_tunnel_flags_and(flags, tpi->flags, flags);
398
d817f432 399 tun_id = key32_to_tunnel_id(tpi->key);
c29a70d2 400 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
2e15ea39
PS
401 if (!tun_dst)
402 return PACKET_REJECT;
2e15ea39
PS
403 }
404
405 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
bda7bb46 406 return PACKET_RCVD;
1da177e4 407 }
125372fa 408 return PACKET_NEXT;
244a797b
JB
409
410drop:
411 kfree_skb(skb);
412 return PACKET_RCVD;
1da177e4
LT
413}
414
125372fa
JB
415static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
416 int hdr_len)
417{
418 struct net *net = dev_net(skb->dev);
419 struct ip_tunnel_net *itn;
420 int res;
421
422 if (tpi->proto == htons(ETH_P_TEB))
423 itn = net_generic(net, gre_tap_net_id);
424 else
425 itn = net_generic(net, ipgre_net_id);
426
427 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
428 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
429 /* ipgre tunnels in collect metadata mode should receive
430 * also ETH_P_TEB traffic.
431 */
432 itn = net_generic(net, ipgre_net_id);
433 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
434 }
435 return res;
436}
437
9f57c67c
PS
438static int gre_rcv(struct sk_buff *skb)
439{
440 struct tnl_ptk_info tpi;
441 bool csum_err = false;
95f5c64c 442 int hdr_len;
9f57c67c
PS
443
444#ifdef CONFIG_NET_IPGRE_BROADCAST
445 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
446 /* Looped back packet, drop it! */
447 if (rt_is_output_route(skb_rtable(skb)))
448 goto drop;
449 }
450#endif
451
e582615a 452 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
f132ae7c 453 if (hdr_len < 0)
95f5c64c
TH
454 goto drop;
455
f551c91d
WT
456 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
457 tpi.proto == htons(ETH_P_ERSPAN2))) {
84e54fe0
WT
458 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
459 return 0;
dd8d5b8c 460 goto out;
84e54fe0
WT
461 }
462
244a797b 463 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
9f57c67c
PS
464 return 0;
465
dd8d5b8c 466out:
9f57c67c
PS
467 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
468drop:
469 kfree_skb(skb);
470 return 0;
471}
472
c5441932
PS
473static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
474 const struct iphdr *tnl_params,
475 __be16 proto)
476{
477 struct ip_tunnel *tunnel = netdev_priv(dev);
5832c4a7
AL
478 IP_TUNNEL_DECLARE_FLAGS(flags);
479
480 ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
1da177e4 481
c5441932 482 /* Push GRE header. */
182a352d 483 gre_build_header(skb, tunnel->tun_hlen,
ff827beb 484 flags, proto, tunnel->parms.o_key,
5832c4a7
AL
485 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
486 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
54bc9bac 487
bf3d6a8f 488 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
c5441932 489}
1da177e4 490
aed069df 491static int gre_handle_offloads(struct sk_buff *skb, bool csum)
b2acd1dc 492{
6fa79666 493 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
b2acd1dc
PS
494}
495
862a03c3
WT
496static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
497 __be16 proto)
498{
77a5196a 499 struct ip_tunnel *tunnel = netdev_priv(dev);
5832c4a7 500 IP_TUNNEL_DECLARE_FLAGS(flags) = { };
862a03c3
WT
501 struct ip_tunnel_info *tun_info;
502 const struct ip_tunnel_key *key;
862a03c3 503 int tunnel_hlen;
862a03c3
WT
504
505 tun_info = skb_tunnel_info(skb);
506 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
507 ip_tunnel_info_af(tun_info) != AF_INET))
508 goto err_free_skb;
509
510 key = &tun_info->key;
511 tunnel_hlen = gre_calc_hlen(key->tun_flags);
512
962924fa 513 if (skb_cow_head(skb, dev->needed_headroom))
514 goto err_free_skb;
2e15ea39
PS
515
516 /* Push Tunnel header. */
5832c4a7
AL
517 if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
518 tunnel->parms.o_flags)))
962924fa 519 goto err_free_skb;
2e15ea39 520
5832c4a7
AL
521 __set_bit(IP_TUNNEL_CSUM_BIT, flags);
522 __set_bit(IP_TUNNEL_KEY_BIT, flags);
523 __set_bit(IP_TUNNEL_SEQ_BIT, flags);
524 ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
525
cba65321 526 gre_build_header(skb, tunnel_hlen, flags, proto,
77a5196a 527 tunnel_id_to_key32(tun_info->key.tun_id),
5832c4a7
AL
528 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
529 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
2e15ea39 530
962924fa 531 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
039f5062 532
2e15ea39
PS
533 return;
534
2e15ea39
PS
535err_free_skb:
536 kfree_skb(skb);
c4794d22 537 DEV_STATS_INC(dev, tx_dropped);
2e15ea39
PS
538}
539
20704bd1 540static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
1a66a836
WT
541{
542 struct ip_tunnel *tunnel = netdev_priv(dev);
5832c4a7 543 IP_TUNNEL_DECLARE_FLAGS(flags) = { };
1a66a836
WT
544 struct ip_tunnel_info *tun_info;
545 const struct ip_tunnel_key *key;
546 struct erspan_metadata *md;
1a66a836 547 bool truncate = false;
962924fa 548 __be16 proto;
1a66a836 549 int tunnel_hlen;
f551c91d 550 int version;
1baf5ebf 551 int nhoff;
1a66a836
WT
552
553 tun_info = skb_tunnel_info(skb);
554 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
555 ip_tunnel_info_af(tun_info) != AF_INET))
556 goto err_free_skb;
557
558 key = &tun_info->key;
5832c4a7 559 if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
962924fa 560 goto err_free_skb;
2eb8d6d2 561 if (tun_info->options_len < sizeof(*md))
962924fa 562 goto err_free_skb;
2eb8d6d2 563 md = ip_tunnel_info_opts(tun_info);
1a66a836
WT
564
565 /* ERSPAN has fixed 8 byte GRE header */
f551c91d
WT
566 version = md->version;
567 tunnel_hlen = 8 + erspan_hdr_len(version);
1a66a836 568
962924fa 569 if (skb_cow_head(skb, dev->needed_headroom))
570 goto err_free_skb;
1a66a836
WT
571
572 if (gre_handle_offloads(skb, false))
962924fa 573 goto err_free_skb;
1a66a836 574
f192970d 575 if (skb->len > dev->mtu + dev->hard_header_len) {
02d84f3e
YG
576 if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
577 goto err_free_skb;
1a66a836
WT
578 truncate = true;
579 }
580
8e50ed77 581 nhoff = skb_network_offset(skb);
1baf5ebf
WT
582 if (skb->protocol == htons(ETH_P_IP) &&
583 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
584 truncate = true;
d5db21a3 585
301bd140
ED
586 if (skb->protocol == htons(ETH_P_IPV6)) {
587 int thoff;
588
589 if (skb_transport_header_was_set(skb))
8e50ed77 590 thoff = skb_transport_offset(skb);
301bd140
ED
591 else
592 thoff = nhoff + sizeof(struct ipv6hdr);
593 if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
594 truncate = true;
595 }
1baf5ebf 596
f551c91d 597 if (version == 1) {
c69de58b 598 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
f551c91d 599 ntohl(md->u.index), truncate, true);
20704bd1 600 proto = htons(ETH_P_ERSPAN);
f551c91d 601 } else if (version == 2) {
c69de58b
WT
602 erspan_build_header_v2(skb,
603 ntohl(tunnel_id_to_key32(key->tun_id)),
604 md->u.md2.dir,
605 get_hwid(&md->u.md2),
606 truncate, true);
20704bd1 607 proto = htons(ETH_P_ERSPAN2);
f551c91d 608 } else {
962924fa 609 goto err_free_skb;
f551c91d 610 }
1a66a836 611
5832c4a7
AL
612 __set_bit(IP_TUNNEL_SEQ_BIT, flags);
613 gre_build_header(skb, 8, flags, proto, 0,
614 htonl(atomic_fetch_inc(&tunnel->o_seqno)));
1a66a836 615
962924fa 616 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
1a66a836 617
1a66a836
WT
618 return;
619
1a66a836
WT
620err_free_skb:
621 kfree_skb(skb);
c4794d22 622 DEV_STATS_INC(dev, tx_dropped);
1a66a836
WT
623}
624
fc4099f1
PS
625static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
626{
627 struct ip_tunnel_info *info = skb_tunnel_info(skb);
962924fa 628 const struct ip_tunnel_key *key;
fc4099f1
PS
629 struct rtable *rt;
630 struct flowi4 fl4;
631
632 if (ip_tunnel_info_af(info) != AF_INET)
633 return -EINVAL;
634
962924fa 635 key = &info->key;
636 ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
f7716b31 637 tunnel_id_to_key32(key->tun_id),
db53cd3d 638 key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
7ec9fce4 639 skb->mark, skb_get_hash(skb), key->flow_flags);
962924fa 640 rt = ip_route_output_key(dev_net(dev), &fl4);
fc4099f1
PS
641 if (IS_ERR(rt))
642 return PTR_ERR(rt);
643
644 ip_rt_put(rt);
645 info->key.u.ipv4.src = fl4.saddr;
646 return 0;
647}
648
c5441932
PS
649static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
650 struct net_device *dev)
651{
652 struct ip_tunnel *tunnel = netdev_priv(dev);
653 const struct iphdr *tnl_params;
1da177e4 654
cb9f1b78
WB
655 if (!pskb_inet_may_pull(skb))
656 goto free_skb;
657
2e15ea39 658 if (tunnel->collect_md) {
2090714e 659 gre_fb_xmit(skb, dev, skb->protocol);
2e15ea39
PS
660 return NETDEV_TX_OK;
661 }
662
c5441932 663 if (dev->header_ops) {
80d875cf
SY
664 int pull_len = tunnel->hlen + sizeof(struct iphdr);
665
fdafed45 666 if (skb_cow_head(skb, 0))
c5441932 667 goto free_skb;
1da177e4 668
c4a14f6d 669 if (!pskb_may_pull(skb, pull_len))
80d875cf
SY
670 goto free_skb;
671
c4a14f6d
AD
672 tnl_params = (const struct iphdr *)skb->data;
673
80d875cf
SY
674 /* ip_tunnel_xmit() needs skb->data pointing to gre header. */
675 skb_pull(skb, pull_len);
8a0033a9 676 skb_reset_mac_header(skb);
8d21e996
WB
677
678 if (skb->ip_summed == CHECKSUM_PARTIAL &&
679 skb_checksum_start(skb) < skb->data)
680 goto free_skb;
c5441932
PS
681 } else {
682 if (skb_cow_head(skb, dev->needed_headroom))
683 goto free_skb;
1da177e4 684
c5441932 685 tnl_params = &tunnel->parms.iph;
1da177e4
LT
686 }
687
5832c4a7
AL
688 if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
689 tunnel->parms.o_flags)))
aed069df 690 goto free_skb;
8a0033a9 691
c5441932 692 __gre_xmit(skb, dev, tnl_params, skb->protocol);
6ed10654 693 return NETDEV_TX_OK;
1da177e4 694
c5441932 695free_skb:
3acfa1e7 696 kfree_skb(skb);
c4794d22 697 DEV_STATS_INC(dev, tx_dropped);
6ed10654 698 return NETDEV_TX_OK;
1da177e4
LT
699}
700
84e54fe0
WT
701static netdev_tx_t erspan_xmit(struct sk_buff *skb,
702 struct net_device *dev)
703{
704 struct ip_tunnel *tunnel = netdev_priv(dev);
705 bool truncate = false;
20704bd1 706 __be16 proto;
84e54fe0 707
cb9f1b78
WB
708 if (!pskb_inet_may_pull(skb))
709 goto free_skb;
710
1a66a836 711 if (tunnel->collect_md) {
20704bd1 712 erspan_fb_xmit(skb, dev);
1a66a836
WT
713 return NETDEV_TX_OK;
714 }
715
84e54fe0
WT
716 if (gre_handle_offloads(skb, false))
717 goto free_skb;
718
719 if (skb_cow_head(skb, dev->needed_headroom))
720 goto free_skb;
721
f192970d 722 if (skb->len > dev->mtu + dev->hard_header_len) {
aa7cb378
YG
723 if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
724 goto free_skb;
84e54fe0
WT
725 truncate = true;
726 }
727
728 /* Push ERSPAN header */
f989d546
WT
729 if (tunnel->erspan_ver == 0) {
730 proto = htons(ETH_P_ERSPAN);
5832c4a7 731 __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
f989d546 732 } else if (tunnel->erspan_ver == 1) {
c69de58b
WT
733 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
734 tunnel->index,
f551c91d 735 truncate, true);
20704bd1
XL
736 proto = htons(ETH_P_ERSPAN);
737 } else if (tunnel->erspan_ver == 2) {
c69de58b 738 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
f551c91d
WT
739 tunnel->dir, tunnel->hwid,
740 truncate, true);
20704bd1
XL
741 proto = htons(ETH_P_ERSPAN2);
742 } else {
02f99df1 743 goto free_skb;
20704bd1 744 }
f551c91d 745
5832c4a7 746 __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
20704bd1 747 __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
84e54fe0
WT
748 return NETDEV_TX_OK;
749
750free_skb:
751 kfree_skb(skb);
c4794d22 752 DEV_STATS_INC(dev, tx_dropped);
84e54fe0
WT
753 return NETDEV_TX_OK;
754}
755
c5441932
PS
756static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
757 struct net_device *dev)
ee34c1eb 758{
c5441932 759 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 760
cb9f1b78
WB
761 if (!pskb_inet_may_pull(skb))
762 goto free_skb;
763
2e15ea39 764 if (tunnel->collect_md) {
2090714e 765 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
2e15ea39
PS
766 return NETDEV_TX_OK;
767 }
768
5832c4a7
AL
769 if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
770 tunnel->parms.o_flags)))
aed069df 771 goto free_skb;
ee34c1eb 772
c5441932
PS
773 if (skb_cow_head(skb, dev->needed_headroom))
774 goto free_skb;
42aa9162 775
c5441932 776 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
c5441932 777 return NETDEV_TX_OK;
ee34c1eb 778
c5441932 779free_skb:
3acfa1e7 780 kfree_skb(skb);
c4794d22 781 DEV_STATS_INC(dev, tx_dropped);
c5441932 782 return NETDEV_TX_OK;
ee34c1eb
MS
783}
784
dd9d598c
XL
785static void ipgre_link_update(struct net_device *dev, bool set_mtu)
786{
787 struct ip_tunnel *tunnel = netdev_priv(dev);
788 int len;
789
790 len = tunnel->tun_hlen;
791 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
792 len = tunnel->tun_hlen - len;
793 tunnel->hlen = tunnel->hlen + len;
794
fdafed45
CW
795 if (dev->header_ops)
796 dev->hard_header_len += len;
797 else
798 dev->needed_headroom += len;
799
dd9d598c 800 if (set_mtu)
1eb2cded 801 WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
dd9d598c 802
5832c4a7
AL
803 if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
804 (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
805 tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
020e8f60 806 dev->features &= ~NETIF_F_GSO_SOFTWARE;
1cc5954f 807 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
020e8f60
PY
808 } else {
809 dev->features |= NETIF_F_GSO_SOFTWARE;
810 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
dd9d598c
XL
811 }
812}
813
117aef12
AL
814static int ipgre_tunnel_ctl(struct net_device *dev,
815 struct ip_tunnel_parm_kern *p,
607259a6 816 int cmd)
1da177e4 817{
5832c4a7 818 __be16 i_flags, o_flags;
a0efab67 819 int err;
1da177e4 820
5832c4a7
AL
821 if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
822 !ip_tunnel_flags_is_be16_compat(p->o_flags))
823 return -EOVERFLOW;
824
825 i_flags = ip_tunnel_flags_to_be16(p->i_flags);
826 o_flags = ip_tunnel_flags_to_be16(p->o_flags);
827
6c734fb8 828 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
607259a6
CH
829 if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
830 p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
5832c4a7 831 ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
6c734fb8 832 return -EINVAL;
1da177e4 833 }
a0efab67 834
5832c4a7
AL
835 gre_flags_to_tnl_flags(p->i_flags, i_flags);
836 gre_flags_to_tnl_flags(p->o_flags, o_flags);
1da177e4 837
607259a6 838 err = ip_tunnel_ctl(dev, p, cmd);
c5441932
PS
839 if (err)
840 return err;
1da177e4 841
a0efab67
XL
842 if (cmd == SIOCCHGTUNNEL) {
843 struct ip_tunnel *t = netdev_priv(dev);
844
5832c4a7
AL
845 ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
846 ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
a0efab67
XL
847
848 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
849 ipgre_link_update(dev, true);
850 }
851
5832c4a7
AL
852 i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
853 ip_tunnel_flags_from_be16(p->i_flags, i_flags);
854 o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
855 ip_tunnel_flags_from_be16(p->o_flags, o_flags);
856
1da177e4
LT
857 return 0;
858}
859
1da177e4
LT
860/* Nice toy. Unfortunately, useless in real life :-)
861 It allows to construct virtual multiprotocol broadcast "LAN"
862 over the Internet, provided multicast routing is tuned.
863
864
865 I have no idea was this bicycle invented before me,
866 so that I had to set ARPHRD_IPGRE to a random value.
867 I have an impression, that Cisco could make something similar,
868 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 869
1da177e4
LT
870 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
871 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
872
873 ping -t 255 224.66.66.66
874
875 If nobody answers, mbone does not work.
876
877 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
878 ip addr add 10.66.66.<somewhat>/24 dev Universe
879 ifconfig Universe up
880 ifconfig Universe add fe80::<Your_real_addr>/10
881 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
882 ftp 10.66.66.66
883 ...
884 ftp fec0:6666:6666::193.233.7.65
885 ...
1da177e4 886 */
3b04ddde
SH
887static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
888 unsigned short type,
1507850b 889 const void *daddr, const void *saddr, unsigned int len)
1da177e4 890{
2941a486 891 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
892 struct iphdr *iph;
893 struct gre_base_hdr *greh;
1da177e4 894
d58ff351 895 iph = skb_push(skb, t->hlen + sizeof(*iph));
c5441932 896 greh = (struct gre_base_hdr *)(iph+1);
95f5c64c 897 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
c5441932 898 greh->protocol = htons(type);
1da177e4 899
c5441932 900 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 901
c5441932 902 /* Set the source hardware address. */
1da177e4
LT
903 if (saddr)
904 memcpy(&iph->saddr, saddr, 4);
6d55cb91 905 if (daddr)
1da177e4 906 memcpy(&iph->daddr, daddr, 4);
6d55cb91 907 if (iph->daddr)
77a482bd 908 return t->hlen + sizeof(*iph);
e905a9ed 909
c5441932 910 return -(t->hlen + sizeof(*iph));
1da177e4
LT
911}
912
6a5f44d7
TT
913static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
914{
b71d1d42 915 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
916 memcpy(haddr, &iph->saddr, 4);
917 return 4;
918}
919
3b04ddde
SH
920static const struct header_ops ipgre_header_ops = {
921 .create = ipgre_header,
6a5f44d7 922 .parse = ipgre_header_parse,
3b04ddde
SH
923};
924
6a5f44d7 925#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
926static int ipgre_open(struct net_device *dev)
927{
2941a486 928 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 929
f97c1e0c 930 if (ipv4_is_multicast(t->parms.iph.daddr)) {
29b54079
GN
931 struct flowi4 fl4 = {
932 .flowi4_oif = t->parms.link,
2c77bcb3 933 .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(&t->parms.iph)),
29b54079
GN
934 .flowi4_scope = RT_SCOPE_UNIVERSE,
935 .flowi4_proto = IPPROTO_GRE,
936 .saddr = t->parms.iph.saddr,
937 .daddr = t->parms.iph.daddr,
938 .fl4_gre_key = t->parms.o_key,
939 };
cbb1e85f
DM
940 struct rtable *rt;
941
29b54079 942 rt = ip_route_output_key(t->net, &fl4);
b23dd4fe 943 if (IS_ERR(rt))
1da177e4 944 return -EADDRNOTAVAIL;
d8d1f30b 945 dev = rt->dst.dev;
1da177e4 946 ip_rt_put(rt);
51456b29 947 if (!__in_dev_get_rtnl(dev))
1da177e4
LT
948 return -EADDRNOTAVAIL;
949 t->mlink = dev->ifindex;
e5ed6399 950 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
951 }
952 return 0;
953}
954
955static int ipgre_close(struct net_device *dev)
956{
2941a486 957 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 958
f97c1e0c 959 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 960 struct in_device *in_dev;
b57708ad 961 in_dev = inetdev_by_index(t->net, t->mlink);
8723e1b4 962 if (in_dev)
1da177e4 963 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
964 }
965 return 0;
966}
1da177e4
LT
967#endif
968
b8c26a33
SH
969static const struct net_device_ops ipgre_netdev_ops = {
970 .ndo_init = ipgre_tunnel_init,
c5441932 971 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
972#ifdef CONFIG_NET_IPGRE_BROADCAST
973 .ndo_open = ipgre_open,
974 .ndo_stop = ipgre_close,
975#endif
c5441932 976 .ndo_start_xmit = ipgre_xmit,
3e7a1c7c 977 .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
c5441932 978 .ndo_change_mtu = ip_tunnel_change_mtu,
98d7fc46 979 .ndo_get_stats64 = dev_get_tstats64,
1e99584b 980 .ndo_get_iflink = ip_tunnel_get_iflink,
607259a6 981 .ndo_tunnel_ctl = ipgre_tunnel_ctl,
b8c26a33
SH
982};
983
6b78f16e
ED
984#define GRE_FEATURES (NETIF_F_SG | \
985 NETIF_F_FRAGLIST | \
986 NETIF_F_HIGHDMA | \
987 NETIF_F_HW_CSUM)
988
1da177e4
LT
989static void ipgre_tunnel_setup(struct net_device *dev)
990{
b8c26a33 991 dev->netdev_ops = &ipgre_netdev_ops;
5a455275 992 dev->type = ARPHRD_IPGRE;
c5441932
PS
993 ip_tunnel_setup(dev, ipgre_net_id);
994}
1da177e4 995
c5441932
PS
996static void __gre_tunnel_init(struct net_device *dev)
997{
998 struct ip_tunnel *tunnel;
999
1000 tunnel = netdev_priv(dev);
95f5c64c 1001 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
c5441932
PS
1002 tunnel->parms.iph.protocol = IPPROTO_GRE;
1003
4565e991 1004 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
fdafed45 1005 dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
4565e991 1006
00d066a4 1007 dev->features |= GRE_FEATURES;
6b78f16e 1008 dev->hw_features |= GRE_FEATURES;
c5441932 1009
020e8f60
PY
1010 /* TCP offload with GRE SEQ is not supported, nor can we support 2
1011 * levels of outer headers requiring an update.
1012 */
5832c4a7 1013 if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
020e8f60 1014 return;
5832c4a7
AL
1015 if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
1016 tunnel->encap.type != TUNNEL_ENCAP_NONE)
020e8f60
PY
1017 return;
1018
1019 dev->features |= NETIF_F_GSO_SOFTWARE;
1020 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
00d066a4
AL
1021
1022 dev->lltx = true;
1da177e4
LT
1023}
1024
1025static int ipgre_tunnel_init(struct net_device *dev)
1026{
c5441932
PS
1027 struct ip_tunnel *tunnel = netdev_priv(dev);
1028 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 1029
c5441932 1030 __gre_tunnel_init(dev);
1da177e4 1031
5a1b7e1a 1032 __dev_addr_set(dev, &iph->saddr, 4);
c5441932 1033 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 1034
c5441932 1035 dev->flags = IFF_NOARP;
02875878 1036 netif_keep_dst(dev);
c5441932 1037 dev->addr_len = 4;
1da177e4 1038
a64b04d8 1039 if (iph->daddr && !tunnel->collect_md) {
1da177e4 1040#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1041 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1042 if (!iph->saddr)
1043 return -EINVAL;
1044 dev->flags = IFF_BROADCAST;
3b04ddde 1045 dev->header_ops = &ipgre_header_ops;
fdafed45
CW
1046 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1047 dev->needed_headroom = 0;
1da177e4
LT
1048 }
1049#endif
a64b04d8 1050 } else if (!tunnel->collect_md) {
6a5f44d7 1051 dev->header_ops = &ipgre_header_ops;
fdafed45
CW
1052 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1053 dev->needed_headroom = 0;
a64b04d8 1054 }
1da177e4 1055
c5441932 1056 return ip_tunnel_init(dev);
1da177e4
LT
1057}
1058
9f57c67c
PS
1059static const struct gre_protocol ipgre_protocol = {
1060 .handler = gre_rcv,
1061 .err_handler = gre_err,
1da177e4
LT
1062};
1063
2c8c1e72 1064static int __net_init ipgre_init_net(struct net *net)
59a4c759 1065{
c5441932 1066 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
1067}
1068
a967e01e
KI
1069static void __net_exit ipgre_exit_rtnl(struct net *net,
1070 struct list_head *dev_to_kill)
59a4c759 1071{
a967e01e 1072 ip_tunnel_delete_net(net, ipgre_net_id, &ipgre_link_ops, dev_to_kill);
59a4c759
PE
1073}
1074
1075static struct pernet_operations ipgre_net_ops = {
1076 .init = ipgre_init_net,
a967e01e 1077 .exit_rtnl = ipgre_exit_rtnl,
cfb8fbf2 1078 .id = &ipgre_net_id,
c5441932 1079 .size = sizeof(struct ip_tunnel_net),
59a4c759 1080};
1da177e4 1081
a8b8a889
MS
1082static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1083 struct netlink_ext_ack *extack)
c19e654d
HX
1084{
1085 __be16 flags;
1086
1087 if (!data)
1088 return 0;
1089
1090 flags = 0;
1091 if (data[IFLA_GRE_IFLAGS])
1092 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1093 if (data[IFLA_GRE_OFLAGS])
1094 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1095 if (flags & (GRE_VERSION|GRE_ROUTING))
1096 return -EINVAL;
1097
946b636f
JB
1098 if (data[IFLA_GRE_COLLECT_METADATA] &&
1099 data[IFLA_GRE_ENCAP_TYPE] &&
1100 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1101 return -EINVAL;
1102
c19e654d
HX
1103 return 0;
1104}
1105
a8b8a889
MS
1106static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1107 struct netlink_ext_ack *extack)
e1a80002
HX
1108{
1109 __be32 daddr;
1110
1111 if (tb[IFLA_ADDRESS]) {
1112 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1113 return -EINVAL;
1114 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1115 return -EADDRNOTAVAIL;
1116 }
1117
1118 if (!data)
1119 goto out;
1120
1121 if (data[IFLA_GRE_REMOTE]) {
1122 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1123 if (!daddr)
1124 return -EINVAL;
1125 }
1126
1127out:
a8b8a889 1128 return ipgre_tunnel_validate(tb, data, extack);
e1a80002
HX
1129}
1130
84e54fe0
WT
1131static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1132 struct netlink_ext_ack *extack)
1133{
1134 __be16 flags = 0;
1135 int ret;
1136
1137 if (!data)
1138 return 0;
1139
1140 ret = ipgre_tap_validate(tb, data, extack);
1141 if (ret)
1142 return ret;
1143
51fa960d
WT
1144 if (data[IFLA_GRE_ERSPAN_VER] &&
1145 nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
f989d546
WT
1146 return 0;
1147
1148 /* ERSPAN type II/III should only have GRE sequence and key flag */
1a66a836
WT
1149 if (data[IFLA_GRE_OFLAGS])
1150 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1151 if (data[IFLA_GRE_IFLAGS])
1152 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1153 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1154 flags != (GRE_SEQ | GRE_KEY))
84e54fe0
WT
1155 return -EINVAL;
1156
1157 /* ERSPAN Session ID only has 10-bit. Since we reuse
1158 * 32-bit key field as ID, check it's range.
1159 */
1160 if (data[IFLA_GRE_IKEY] &&
1161 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1162 return -EINVAL;
1163
1164 if (data[IFLA_GRE_OKEY] &&
1165 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1166 return -EINVAL;
1167
1168 return 0;
1169}
1170
22a59be8 1171static int ipgre_netlink_parms(struct net_device *dev,
2e15ea39
PS
1172 struct nlattr *data[],
1173 struct nlattr *tb[],
117aef12 1174 struct ip_tunnel_parm_kern *parms,
9830ad4c 1175 __u32 *fwmark)
c19e654d 1176{
22a59be8
PP
1177 struct ip_tunnel *t = netdev_priv(dev);
1178
7bb82d92 1179 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1180
1181 parms->iph.protocol = IPPROTO_GRE;
1182
1183 if (!data)
22a59be8 1184 return 0;
c19e654d
HX
1185
1186 if (data[IFLA_GRE_LINK])
1187 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1188
1189 if (data[IFLA_GRE_IFLAGS])
5832c4a7
AL
1190 gre_flags_to_tnl_flags(parms->i_flags,
1191 nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
1192
1193 if (data[IFLA_GRE_OFLAGS])
5832c4a7
AL
1194 gre_flags_to_tnl_flags(parms->o_flags,
1195 nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
1196
1197 if (data[IFLA_GRE_IKEY])
1198 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1199
1200 if (data[IFLA_GRE_OKEY])
1201 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1202
1203 if (data[IFLA_GRE_LOCAL])
67b61f6c 1204 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1205
1206 if (data[IFLA_GRE_REMOTE])
67b61f6c 1207 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1208
1209 if (data[IFLA_GRE_TTL])
1210 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1211
1212 if (data[IFLA_GRE_TOS])
1213 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1214
22a59be8
PP
1215 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1216 if (t->ignore_df)
1217 return -EINVAL;
c19e654d 1218 parms->iph.frag_off = htons(IP_DF);
22a59be8 1219 }
2e15ea39
PS
1220
1221 if (data[IFLA_GRE_COLLECT_METADATA]) {
2e15ea39 1222 t->collect_md = true;
e271c7b4
JB
1223 if (dev->type == ARPHRD_IPGRE)
1224 dev->type = ARPHRD_NONE;
2e15ea39 1225 }
22a59be8
PP
1226
1227 if (data[IFLA_GRE_IGNORE_DF]) {
1228 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1229 && (parms->iph.frag_off & htons(IP_DF)))
1230 return -EINVAL;
1231 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1232 }
1233
9830ad4c
CG
1234 if (data[IFLA_GRE_FWMARK])
1235 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1236
e1f8f78f
PM
1237 return 0;
1238}
1239
1240static int erspan_netlink_parms(struct net_device *dev,
1241 struct nlattr *data[],
1242 struct nlattr *tb[],
117aef12 1243 struct ip_tunnel_parm_kern *parms,
e1f8f78f
PM
1244 __u32 *fwmark)
1245{
1246 struct ip_tunnel *t = netdev_priv(dev);
1247 int err;
1248
1249 err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1250 if (err)
1251 return err;
32ca98fe
PM
1252 if (!data)
1253 return 0;
e1f8f78f 1254
f551c91d
WT
1255 if (data[IFLA_GRE_ERSPAN_VER]) {
1256 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
84e54fe0 1257
f989d546 1258 if (t->erspan_ver > 2)
84e54fe0
WT
1259 return -EINVAL;
1260 }
1261
f551c91d
WT
1262 if (t->erspan_ver == 1) {
1263 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1264 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1265 if (t->index & ~INDEX_MASK)
1266 return -EINVAL;
1267 }
1268 } else if (t->erspan_ver == 2) {
1269 if (data[IFLA_GRE_ERSPAN_DIR]) {
1270 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1271 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1272 return -EINVAL;
1273 }
1274 if (data[IFLA_GRE_ERSPAN_HWID]) {
1275 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1276 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1277 return -EINVAL;
1278 }
1279 }
1280
22a59be8 1281 return 0;
c19e654d
HX
1282}
1283
4565e991
TH
1284/* This function returns true when ENCAP attributes are present in the nl msg */
1285static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1286 struct ip_tunnel_encap *ipencap)
1287{
1288 bool ret = false;
1289
1290 memset(ipencap, 0, sizeof(*ipencap));
1291
1292 if (!data)
1293 return ret;
1294
1295 if (data[IFLA_GRE_ENCAP_TYPE]) {
1296 ret = true;
1297 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1298 }
1299
1300 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1301 ret = true;
1302 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1303 }
1304
1305 if (data[IFLA_GRE_ENCAP_SPORT]) {
1306 ret = true;
3e97fa70 1307 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
4565e991
TH
1308 }
1309
1310 if (data[IFLA_GRE_ENCAP_DPORT]) {
1311 ret = true;
3e97fa70 1312 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
4565e991
TH
1313 }
1314
1315 return ret;
1316}
1317
c5441932 1318static int gre_tap_init(struct net_device *dev)
e1a80002 1319{
c5441932 1320 __gre_tunnel_init(dev);
bec94d43 1321 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
d51711c0 1322 netif_keep_dst(dev);
e1a80002 1323
c5441932 1324 return ip_tunnel_init(dev);
e1a80002
HX
1325}
1326
c5441932
PS
1327static const struct net_device_ops gre_tap_netdev_ops = {
1328 .ndo_init = gre_tap_init,
1329 .ndo_uninit = ip_tunnel_uninit,
1330 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
1331 .ndo_set_mac_address = eth_mac_addr,
1332 .ndo_validate_addr = eth_validate_addr,
c5441932 1333 .ndo_change_mtu = ip_tunnel_change_mtu,
98d7fc46 1334 .ndo_get_stats64 = dev_get_tstats64,
1e99584b 1335 .ndo_get_iflink = ip_tunnel_get_iflink,
fc4099f1 1336 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
b8c26a33
SH
1337};
1338
84e54fe0
WT
1339static int erspan_tunnel_init(struct net_device *dev)
1340{
1341 struct ip_tunnel *tunnel = netdev_priv(dev);
84e54fe0 1342
f989d546
WT
1343 if (tunnel->erspan_ver == 0)
1344 tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1345 else
1346 tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1347
84e54fe0 1348 tunnel->parms.iph.protocol = IPPROTO_GRE;
c122fda2 1349 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
f551c91d 1350 erspan_hdr_len(tunnel->erspan_ver);
84e54fe0 1351
84e54fe0
WT
1352 dev->features |= GRE_FEATURES;
1353 dev->hw_features |= GRE_FEATURES;
1354 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c84bed44 1355 netif_keep_dst(dev);
84e54fe0
WT
1356
1357 return ip_tunnel_init(dev);
1358}
1359
1360static const struct net_device_ops erspan_netdev_ops = {
1361 .ndo_init = erspan_tunnel_init,
1362 .ndo_uninit = ip_tunnel_uninit,
1363 .ndo_start_xmit = erspan_xmit,
1364 .ndo_set_mac_address = eth_mac_addr,
1365 .ndo_validate_addr = eth_validate_addr,
1366 .ndo_change_mtu = ip_tunnel_change_mtu,
98d7fc46 1367 .ndo_get_stats64 = dev_get_tstats64,
84e54fe0
WT
1368 .ndo_get_iflink = ip_tunnel_get_iflink,
1369 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1370};
1371
e1a80002
HX
1372static void ipgre_tap_setup(struct net_device *dev)
1373{
e1a80002 1374 ether_setup(dev);
cfddd4c3 1375 dev->max_mtu = 0;
d13b161c
JB
1376 dev->netdev_ops = &gre_tap_netdev_ops;
1377 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1378 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c5441932 1379 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
1380}
1381
e1f8f78f
PM
1382static int
1383ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
c19e654d 1384{
4565e991
TH
1385 struct ip_tunnel_encap ipencap;
1386
1387 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1388 struct ip_tunnel *t = netdev_priv(dev);
e1f8f78f 1389 int err = ip_tunnel_encap_setup(t, &ipencap);
4565e991
TH
1390
1391 if (err < 0)
1392 return err;
1393 }
c19e654d 1394
e1f8f78f
PM
1395 return 0;
1396}
1397
69c7be1b
XL
1398static int ipgre_newlink(struct net_device *dev,
1399 struct rtnl_newlink_params *params,
e1f8f78f
PM
1400 struct netlink_ext_ack *extack)
1401{
69c7be1b
XL
1402 struct nlattr **data = params->data;
1403 struct nlattr **tb = params->tb;
117aef12 1404 struct ip_tunnel_parm_kern p;
e1f8f78f
PM
1405 __u32 fwmark = 0;
1406 int err;
1407
1408 err = ipgre_newlink_encap_setup(dev, data);
1409 if (err)
1410 return err;
1411
9830ad4c 1412 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
22a59be8
PP
1413 if (err < 0)
1414 return err;
eacb1160
XL
1415 return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1416 fwmark);
c19e654d
HX
1417}
1418
69c7be1b
XL
1419static int erspan_newlink(struct net_device *dev,
1420 struct rtnl_newlink_params *params,
e1f8f78f
PM
1421 struct netlink_ext_ack *extack)
1422{
69c7be1b
XL
1423 struct nlattr **data = params->data;
1424 struct nlattr **tb = params->tb;
117aef12 1425 struct ip_tunnel_parm_kern p;
e1f8f78f
PM
1426 __u32 fwmark = 0;
1427 int err;
1428
1429 err = ipgre_newlink_encap_setup(dev, data);
1430 if (err)
1431 return err;
1432
1433 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1434 if (err)
1435 return err;
eacb1160
XL
1436 return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1437 fwmark);
e1f8f78f
PM
1438}
1439
c19e654d 1440static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
ad744b22
MS
1441 struct nlattr *data[],
1442 struct netlink_ext_ack *extack)
c19e654d 1443{
9830ad4c 1444 struct ip_tunnel *t = netdev_priv(dev);
117aef12 1445 struct ip_tunnel_parm_kern p;
9830ad4c 1446 __u32 fwmark = t->fwmark;
22a59be8 1447 int err;
4565e991 1448
e1f8f78f
PM
1449 err = ipgre_newlink_encap_setup(dev, data);
1450 if (err)
1451 return err;
c19e654d 1452
9830ad4c 1453 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
22a59be8
PP
1454 if (err < 0)
1455 return err;
dd9d598c
XL
1456
1457 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1458 if (err < 0)
1459 return err;
1460
5832c4a7
AL
1461 ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1462 ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
dd9d598c 1463
e1f8f78f
PM
1464 ipgre_link_update(dev, !tb[IFLA_MTU]);
1465
1466 return 0;
1467}
1468
1469static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1470 struct nlattr *data[],
1471 struct netlink_ext_ack *extack)
1472{
1473 struct ip_tunnel *t = netdev_priv(dev);
117aef12 1474 struct ip_tunnel_parm_kern p;
e1f8f78f 1475 __u32 fwmark = t->fwmark;
e1f8f78f
PM
1476 int err;
1477
1478 err = ipgre_newlink_encap_setup(dev, data);
1479 if (err)
1480 return err;
1481
1482 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1483 if (err < 0)
1484 return err;
1485
1486 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1487 if (err < 0)
1488 return err;
1489
5832c4a7
AL
1490 ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1491 ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
dd9d598c
XL
1492
1493 return 0;
c19e654d
HX
1494}
1495
1496static size_t ipgre_get_size(const struct net_device *dev)
1497{
1498 return
1499 /* IFLA_GRE_LINK */
1500 nla_total_size(4) +
1501 /* IFLA_GRE_IFLAGS */
1502 nla_total_size(2) +
1503 /* IFLA_GRE_OFLAGS */
1504 nla_total_size(2) +
1505 /* IFLA_GRE_IKEY */
1506 nla_total_size(4) +
1507 /* IFLA_GRE_OKEY */
1508 nla_total_size(4) +
1509 /* IFLA_GRE_LOCAL */
1510 nla_total_size(4) +
1511 /* IFLA_GRE_REMOTE */
1512 nla_total_size(4) +
1513 /* IFLA_GRE_TTL */
1514 nla_total_size(1) +
1515 /* IFLA_GRE_TOS */
1516 nla_total_size(1) +
1517 /* IFLA_GRE_PMTUDISC */
1518 nla_total_size(1) +
4565e991
TH
1519 /* IFLA_GRE_ENCAP_TYPE */
1520 nla_total_size(2) +
1521 /* IFLA_GRE_ENCAP_FLAGS */
1522 nla_total_size(2) +
1523 /* IFLA_GRE_ENCAP_SPORT */
1524 nla_total_size(2) +
1525 /* IFLA_GRE_ENCAP_DPORT */
1526 nla_total_size(2) +
2e15ea39
PS
1527 /* IFLA_GRE_COLLECT_METADATA */
1528 nla_total_size(0) +
22a59be8
PP
1529 /* IFLA_GRE_IGNORE_DF */
1530 nla_total_size(1) +
9830ad4c
CG
1531 /* IFLA_GRE_FWMARK */
1532 nla_total_size(4) +
84e54fe0
WT
1533 /* IFLA_GRE_ERSPAN_INDEX */
1534 nla_total_size(4) +
f551c91d
WT
1535 /* IFLA_GRE_ERSPAN_VER */
1536 nla_total_size(1) +
1537 /* IFLA_GRE_ERSPAN_DIR */
1538 nla_total_size(1) +
1539 /* IFLA_GRE_ERSPAN_HWID */
1540 nla_total_size(2) +
c19e654d
HX
1541 0;
1542}
1543
1544static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1545{
1546 struct ip_tunnel *t = netdev_priv(dev);
117aef12 1547 struct ip_tunnel_parm_kern *p = &t->parms;
5832c4a7
AL
1548 IP_TUNNEL_DECLARE_FLAGS(o_flags);
1549
1550 ip_tunnel_flags_copy(o_flags, p->o_flags);
feaf5c79 1551
f3756b79 1552 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
95f5c64c
TH
1553 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1554 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1555 nla_put_be16(skb, IFLA_GRE_OFLAGS,
feaf5c79 1556 gre_tnl_flags_to_gre_flags(o_flags)) ||
f3756b79
DM
1557 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1558 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
930345ea
JB
1559 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1560 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
f3756b79
DM
1561 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1562 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1563 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
9830ad4c
CG
1564 !!(p->iph.frag_off & htons(IP_DF))) ||
1565 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
f3756b79 1566 goto nla_put_failure;
4565e991
TH
1567
1568 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1569 t->encap.type) ||
3e97fa70
SD
1570 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1571 t->encap.sport) ||
1572 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1573 t->encap.dport) ||
4565e991 1574 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
e1b2cb65 1575 t->encap.flags))
4565e991
TH
1576 goto nla_put_failure;
1577
22a59be8
PP
1578 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1579 goto nla_put_failure;
1580
2e15ea39
PS
1581 if (t->collect_md) {
1582 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1583 goto nla_put_failure;
1584 }
1585
c19e654d
HX
1586 return 0;
1587
1588nla_put_failure:
1589 return -EMSGSIZE;
1590}
1591
ee496694
HL
1592static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1593{
1594 struct ip_tunnel *t = netdev_priv(dev);
1595
1596 if (t->erspan_ver <= 2) {
1597 if (t->erspan_ver != 0 && !t->collect_md)
5832c4a7 1598 __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
ee496694
HL
1599
1600 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1601 goto nla_put_failure;
1602
1603 if (t->erspan_ver == 1) {
1604 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1605 goto nla_put_failure;
1606 } else if (t->erspan_ver == 2) {
1607 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1608 goto nla_put_failure;
1609 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1610 goto nla_put_failure;
1611 }
1612 }
1613
1614 return ipgre_fill_info(skb, dev);
1615
1616nla_put_failure:
1617 return -EMSGSIZE;
1618}
1619
84e54fe0
WT
1620static void erspan_setup(struct net_device *dev)
1621{
84581bda
XL
1622 struct ip_tunnel *t = netdev_priv(dev);
1623
84e54fe0 1624 ether_setup(dev);
0e141f75 1625 dev->max_mtu = 0;
84e54fe0
WT
1626 dev->netdev_ops = &erspan_netdev_ops;
1627 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1628 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1629 ip_tunnel_setup(dev, erspan_net_id);
84581bda 1630 t->erspan_ver = 1;
84e54fe0
WT
1631}
1632
c19e654d
HX
1633static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1634 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1635 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1636 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1637 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1638 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
c593642c
PB
1639 [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
1640 [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
c19e654d
HX
1641 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1642 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1643 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
4565e991
TH
1644 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1645 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1646 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1647 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2e15ea39 1648 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
22a59be8 1649 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
9830ad4c 1650 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
84e54fe0 1651 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
f551c91d
WT
1652 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1653 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1654 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
c19e654d
HX
1655};
1656
1657static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1658 .kind = "gre",
1659 .maxtype = IFLA_GRE_MAX,
1660 .policy = ipgre_policy,
1661 .priv_size = sizeof(struct ip_tunnel),
1662 .setup = ipgre_tunnel_setup,
1663 .validate = ipgre_tunnel_validate,
1664 .newlink = ipgre_newlink,
1665 .changelink = ipgre_changelink,
c5441932 1666 .dellink = ip_tunnel_dellink,
c19e654d
HX
1667 .get_size = ipgre_get_size,
1668 .fill_info = ipgre_fill_info,
1728d4fa 1669 .get_link_net = ip_tunnel_get_link_net,
c19e654d
HX
1670};
1671
e1a80002
HX
1672static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1673 .kind = "gretap",
1674 .maxtype = IFLA_GRE_MAX,
1675 .policy = ipgre_policy,
1676 .priv_size = sizeof(struct ip_tunnel),
1677 .setup = ipgre_tap_setup,
1678 .validate = ipgre_tap_validate,
1679 .newlink = ipgre_newlink,
1680 .changelink = ipgre_changelink,
c5441932 1681 .dellink = ip_tunnel_dellink,
e1a80002
HX
1682 .get_size = ipgre_get_size,
1683 .fill_info = ipgre_fill_info,
1728d4fa 1684 .get_link_net = ip_tunnel_get_link_net,
e1a80002
HX
1685};
1686
84e54fe0
WT
1687static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1688 .kind = "erspan",
1689 .maxtype = IFLA_GRE_MAX,
1690 .policy = ipgre_policy,
1691 .priv_size = sizeof(struct ip_tunnel),
1692 .setup = erspan_setup,
1693 .validate = erspan_validate,
e1f8f78f
PM
1694 .newlink = erspan_newlink,
1695 .changelink = erspan_changelink,
84e54fe0
WT
1696 .dellink = ip_tunnel_dellink,
1697 .get_size = ipgre_get_size,
ee496694 1698 .fill_info = erspan_fill_info,
84e54fe0
WT
1699 .get_link_net = ip_tunnel_get_link_net,
1700};
1701
b2acd1dc
PS
1702struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1703 u8 name_assign_type)
1704{
eacb1160 1705 struct rtnl_newlink_params params = { .src_net = net };
b2acd1dc
PS
1706 struct nlattr *tb[IFLA_MAX + 1];
1707 struct net_device *dev;
106da663 1708 LIST_HEAD(list_kill);
b2acd1dc
PS
1709 struct ip_tunnel *t;
1710 int err;
1711
1712 memset(&tb, 0, sizeof(tb));
69c7be1b 1713 params.tb = tb;
b2acd1dc
PS
1714
1715 dev = rtnl_create_link(net, name, name_assign_type,
d0522f1c 1716 &ipgre_tap_ops, tb, NULL);
b2acd1dc
PS
1717 if (IS_ERR(dev))
1718 return dev;
1719
1720 /* Configure flow based GRE device. */
1721 t = netdev_priv(dev);
1722 t->collect_md = true;
1723
69c7be1b 1724 err = ipgre_newlink(dev, &params, NULL);
106da663
ND
1725 if (err < 0) {
1726 free_netdev(dev);
1727 return ERR_PTR(err);
1728 }
7e059158
DW
1729
1730 /* openvswitch users expect packet sizes to be unrestricted,
1731 * so set the largest MTU we can.
1732 */
1733 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1734 if (err)
1735 goto out;
1736
1d997f10 1737 err = rtnl_configure_link(dev, NULL, 0, NULL);
da6f1da8
ND
1738 if (err < 0)
1739 goto out;
1740
b2acd1dc
PS
1741 return dev;
1742out:
106da663
ND
1743 ip_tunnel_dellink(dev, &list_kill);
1744 unregister_netdevice_many(&list_kill);
b2acd1dc
PS
1745 return ERR_PTR(err);
1746}
1747EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1748
c5441932
PS
1749static int __net_init ipgre_tap_init_net(struct net *net)
1750{
2e15ea39 1751 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
c5441932
PS
1752}
1753
a967e01e
KI
1754static void __net_exit ipgre_tap_exit_rtnl(struct net *net,
1755 struct list_head *dev_to_kill)
c5441932 1756{
a967e01e 1757 ip_tunnel_delete_net(net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill);
c5441932
PS
1758}
1759
1760static struct pernet_operations ipgre_tap_net_ops = {
1761 .init = ipgre_tap_init_net,
a967e01e 1762 .exit_rtnl = ipgre_tap_exit_rtnl,
c5441932
PS
1763 .id = &gre_tap_net_id,
1764 .size = sizeof(struct ip_tunnel_net),
1765};
1da177e4 1766
84e54fe0
WT
1767static int __net_init erspan_init_net(struct net *net)
1768{
1769 return ip_tunnel_init_net(net, erspan_net_id,
1770 &erspan_link_ops, "erspan0");
1771}
1772
a967e01e
KI
1773static void __net_exit erspan_exit_rtnl(struct net *net,
1774 struct list_head *dev_to_kill)
84e54fe0 1775{
a967e01e 1776 ip_tunnel_delete_net(net, erspan_net_id, &erspan_link_ops, dev_to_kill);
84e54fe0
WT
1777}
1778
1779static struct pernet_operations erspan_net_ops = {
1780 .init = erspan_init_net,
a967e01e 1781 .exit_rtnl = erspan_exit_rtnl,
84e54fe0
WT
1782 .id = &erspan_net_id,
1783 .size = sizeof(struct ip_tunnel_net),
1784};
1785
1da177e4
LT
1786static int __init ipgre_init(void)
1787{
1788 int err;
1789
058bd4d2 1790 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1791
cfb8fbf2 1792 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1793 if (err < 0)
c2892f02
AD
1794 return err;
1795
c5441932
PS
1796 err = register_pernet_device(&ipgre_tap_net_ops);
1797 if (err < 0)
e3d0328c 1798 goto pnet_tap_failed;
c5441932 1799
84e54fe0
WT
1800 err = register_pernet_device(&erspan_net_ops);
1801 if (err < 0)
1802 goto pnet_erspan_failed;
1803
9f57c67c 1804 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1805 if (err < 0) {
058bd4d2 1806 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1807 goto add_proto_failed;
1808 }
7daa0004 1809
c19e654d
HX
1810 err = rtnl_link_register(&ipgre_link_ops);
1811 if (err < 0)
1812 goto rtnl_link_failed;
1813
e1a80002
HX
1814 err = rtnl_link_register(&ipgre_tap_ops);
1815 if (err < 0)
1816 goto tap_ops_failed;
1817
84e54fe0
WT
1818 err = rtnl_link_register(&erspan_link_ops);
1819 if (err < 0)
1820 goto erspan_link_failed;
1821
c5441932 1822 return 0;
c19e654d 1823
84e54fe0
WT
1824erspan_link_failed:
1825 rtnl_link_unregister(&ipgre_tap_ops);
e1a80002
HX
1826tap_ops_failed:
1827 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1828rtnl_link_failed:
9f57c67c 1829 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1830add_proto_failed:
84e54fe0
WT
1831 unregister_pernet_device(&erspan_net_ops);
1832pnet_erspan_failed:
c5441932 1833 unregister_pernet_device(&ipgre_tap_net_ops);
e3d0328c 1834pnet_tap_failed:
c2892f02 1835 unregister_pernet_device(&ipgre_net_ops);
c5441932 1836 return err;
1da177e4
LT
1837}
1838
db44575f 1839static void __exit ipgre_fini(void)
1da177e4 1840{
e1a80002 1841 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1842 rtnl_link_unregister(&ipgre_link_ops);
84e54fe0 1843 rtnl_link_unregister(&erspan_link_ops);
9f57c67c 1844 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c5441932 1845 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1846 unregister_pernet_device(&ipgre_net_ops);
84e54fe0 1847 unregister_pernet_device(&erspan_net_ops);
1da177e4
LT
1848}
1849
1850module_init(ipgre_init);
1851module_exit(ipgre_fini);
b058a5d2 1852MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1da177e4 1853MODULE_LICENSE("GPL");
4d74f8ba
PM
1854MODULE_ALIAS_RTNL_LINK("gre");
1855MODULE_ALIAS_RTNL_LINK("gretap");
84e54fe0 1856MODULE_ALIAS_RTNL_LINK("erspan");
8909c9ad 1857MODULE_ALIAS_NETDEV("gre0");
c5441932 1858MODULE_ALIAS_NETDEV("gretap0");
84e54fe0 1859MODULE_ALIAS_NETDEV("erspan0");