x86/kvm/hyper-v: don't announce GUEST IDLE MSR support
[linux-2.6-block.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
7c0f6ba6 20#include <linux/uaccess.h>
1da177e4
LT
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
2e15ea39 27#include <linux/if_vlan.h>
1da177e4
LT
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
c5441932 40#include <net/ip_tunnels.h>
1da177e4
LT
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
2e15ea39 50#include <net/dst_metadata.h>
84e54fe0 51#include <net/erspan.h>
1da177e4 52
1da177e4
LT
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 65 and silently drop packet when it expires. It is a good
bff52857 66 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
67 skb, even if no tunneling is used.
68
6d0722a2
ED
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
bff52857 92 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 99 rapidly degrades to value <68, where looping stops.
1da177e4
LT
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
1da177e4
LT
107 Alexey Kuznetsov.
108 */
109
eccc1bb8 110static bool log_ecn_error = true;
111module_param(log_ecn_error, bool, 0644);
112MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
c19e654d 114static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4 115static int ipgre_tunnel_init(struct net_device *dev);
1a66a836 116static void erspan_build_header(struct sk_buff *skb,
c69de58b 117 u32 id, u32 index,
a3222dc9 118 bool truncate, bool is_ipv4);
eb8ce741 119
c7d03a00
AD
120static unsigned int ipgre_net_id __read_mostly;
121static unsigned int gre_tap_net_id __read_mostly;
84e54fe0 122static unsigned int erspan_net_id __read_mostly;
1da177e4 123
32bbd879
SB
124static int ipgre_err(struct sk_buff *skb, u32 info,
125 const struct tnl_ptk_info *tpi)
1da177e4 126{
1da177e4 127
c5441932
PS
128 /* All the routers (except for Linux) return only
129 8 bytes of packet payload. It means, that precise relaying of
130 ICMP in the real Internet is absolutely infeasible.
1da177e4 131
c5441932
PS
132 Moreover, Cisco "wise men" put GRE key to the third word
133 in GRE header. It makes impossible maintaining even soft
134 state for keyed GRE tunnels with enabled checksum. Tell
135 them "thank you".
1da177e4 136
c5441932
PS
137 Well, I wonder, rfc1812 was written by Cisco employee,
138 what the hell these idiots break standards established
139 by themselves???
140 */
141 struct net *net = dev_net(skb->dev);
142 struct ip_tunnel_net *itn;
96f5a846 143 const struct iphdr *iph;
88c7664f
ACM
144 const int type = icmp_hdr(skb)->type;
145 const int code = icmp_hdr(skb)->code;
20e1954f 146 unsigned int data_len = 0;
1da177e4 147 struct ip_tunnel *t;
1da177e4 148
32bbd879
SB
149 if (tpi->proto == htons(ETH_P_TEB))
150 itn = net_generic(net, gre_tap_net_id);
151 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
152 tpi->proto == htons(ETH_P_ERSPAN2))
153 itn = net_generic(net, erspan_net_id);
154 else
155 itn = net_generic(net, ipgre_net_id);
156
157 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
158 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
159 iph->daddr, iph->saddr, tpi->key);
160
161 if (!t)
162 return -ENOENT;
163
1da177e4
LT
164 switch (type) {
165 default:
166 case ICMP_PARAMETERPROB:
32bbd879 167 return 0;
1da177e4
LT
168
169 case ICMP_DEST_UNREACH:
170 switch (code) {
171 case ICMP_SR_FAILED:
172 case ICMP_PORT_UNREACH:
173 /* Impossible event. */
32bbd879 174 return 0;
1da177e4
LT
175 default:
176 /* All others are translated to HOST_UNREACH.
177 rfc2003 contains "deep thoughts" about NET_UNREACH,
178 I believe they are just ether pollution. --ANK
179 */
180 break;
181 }
182 break;
9f57c67c 183
1da177e4
LT
184 case ICMP_TIME_EXCEEDED:
185 if (code != ICMP_EXC_TTL)
32bbd879 186 return 0;
20e1954f 187 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
1da177e4 188 break;
55be7a9c
DM
189
190 case ICMP_REDIRECT:
191 break;
1da177e4
LT
192 }
193
9b8c6d7b
ED
194#if IS_ENABLED(CONFIG_IPV6)
195 if (tpi->proto == htons(ETH_P_IPV6) &&
20e1954f
ED
196 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
197 type, data_len))
32bbd879 198 return 0;
9b8c6d7b
ED
199#endif
200
36393395 201 if (t->parms.iph.daddr == 0 ||
f97c1e0c 202 ipv4_is_multicast(t->parms.iph.daddr))
32bbd879 203 return 0;
1da177e4
LT
204
205 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
32bbd879 206 return 0;
1da177e4 207
da6185d8 208 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
209 t->err_count++;
210 else
211 t->err_count = 1;
212 t->err_time = jiffies;
32bbd879
SB
213
214 return 0;
9f57c67c
PS
215}
216
217static void gre_err(struct sk_buff *skb, u32 info)
218{
219 /* All the routers (except for Linux) return only
220 * 8 bytes of packet payload. It means, that precise relaying of
221 * ICMP in the real Internet is absolutely infeasible.
222 *
223 * Moreover, Cisco "wise men" put GRE key to the third word
224 * in GRE header. It makes impossible maintaining even soft
225 * state for keyed
226 * GRE tunnels with enabled checksum. Tell them "thank you".
227 *
228 * Well, I wonder, rfc1812 was written by Cisco employee,
229 * what the hell these idiots break standards established
230 * by themselves???
231 */
232
e582615a 233 const struct iphdr *iph = (struct iphdr *)skb->data;
9f57c67c
PS
234 const int type = icmp_hdr(skb)->type;
235 const int code = icmp_hdr(skb)->code;
236 struct tnl_ptk_info tpi;
9f57c67c 237
b0350d51
HY
238 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
239 iph->ihl * 4) < 0)
240 return;
9f57c67c
PS
241
242 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
243 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
d888f396 244 skb->dev->ifindex, IPPROTO_GRE);
9f57c67c
PS
245 return;
246 }
247 if (type == ICMP_REDIRECT) {
1042caa7
MÅ»
248 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
249 IPPROTO_GRE);
9f57c67c
PS
250 return;
251 }
252
253 ipgre_err(skb, info, &tpi);
1da177e4
LT
254}
255
84e54fe0
WT
256static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
257 int gre_hdr_len)
258{
259 struct net *net = dev_net(skb->dev);
260 struct metadata_dst *tun_dst = NULL;
1d7e2ed2
WT
261 struct erspan_base_hdr *ershdr;
262 struct erspan_metadata *pkt_md;
84e54fe0
WT
263 struct ip_tunnel_net *itn;
264 struct ip_tunnel *tunnel;
84e54fe0 265 const struct iphdr *iph;
3df19283 266 struct erspan_md2 *md2;
1d7e2ed2 267 int ver;
84e54fe0
WT
268 int len;
269
270 itn = net_generic(net, erspan_net_id);
84e54fe0
WT
271 len = gre_hdr_len + sizeof(*ershdr);
272
1d7e2ed2 273 /* Check based hdr len */
84e54fe0 274 if (unlikely(!pskb_may_pull(skb, len)))
c05fad57 275 return PACKET_REJECT;
84e54fe0
WT
276
277 iph = ip_hdr(skb);
1d7e2ed2 278 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
c69de58b 279 ver = ershdr->ver;
84e54fe0
WT
280
281 /* The original GRE header does not have key field,
282 * Use ERSPAN 10-bit session ID as key.
283 */
c69de58b 284 tpi->key = cpu_to_be32(get_session_id(ershdr));
84e54fe0
WT
285 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
286 tpi->flags | TUNNEL_KEY,
287 iph->saddr, iph->daddr, tpi->key);
288
289 if (tunnel) {
1d7e2ed2
WT
290 len = gre_hdr_len + erspan_hdr_len(ver);
291 if (unlikely(!pskb_may_pull(skb, len)))
ae3e1337 292 return PACKET_REJECT;
1d7e2ed2 293
d91e8db5
WT
294 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
295 pkt_md = (struct erspan_metadata *)(ershdr + 1);
296
84e54fe0 297 if (__iptunnel_pull_header(skb,
1d7e2ed2 298 len,
84e54fe0
WT
299 htons(ETH_P_TEB),
300 false, false) < 0)
301 goto drop;
302
1a66a836
WT
303 if (tunnel->collect_md) {
304 struct ip_tunnel_info *info;
305 struct erspan_metadata *md;
306 __be64 tun_id;
307 __be16 flags;
308
309 tpi->flags |= TUNNEL_KEY;
310 flags = tpi->flags;
311 tun_id = key32_to_tunnel_id(tpi->key);
312
313 tun_dst = ip_tun_rx_dst(skb, flags,
314 tun_id, sizeof(*md));
315 if (!tun_dst)
316 return PACKET_REJECT;
317
318 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
f551c91d 319 md->version = ver;
3df19283
WT
320 md2 = &md->u.md2;
321 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
322 ERSPAN_V2_MDSIZE);
f551c91d 323
1a66a836
WT
324 info = &tun_dst->u.tun_info;
325 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
326 info->options_len = sizeof(*md);
1a66a836
WT
327 }
328
84e54fe0
WT
329 skb_reset_mac_header(skb);
330 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
331 return PACKET_RCVD;
332 }
5a64506b
HY
333 return PACKET_REJECT;
334
84e54fe0
WT
335drop:
336 kfree_skb(skb);
337 return PACKET_RCVD;
338}
339
125372fa
JB
340static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
341 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
1da177e4 342{
2e15ea39 343 struct metadata_dst *tun_dst = NULL;
b71d1d42 344 const struct iphdr *iph;
1da177e4 345 struct ip_tunnel *tunnel;
1da177e4 346
c5441932 347 iph = ip_hdr(skb);
bda7bb46
PS
348 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
349 iph->saddr, iph->daddr, tpi->key);
e1a80002 350
d2083287 351 if (tunnel) {
125372fa
JB
352 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
353 raw_proto, false) < 0)
244a797b
JB
354 goto drop;
355
e271c7b4
JB
356 if (tunnel->dev->type != ARPHRD_NONE)
357 skb_pop_mac_header(skb);
358 else
359 skb_reset_mac_header(skb);
2e15ea39 360 if (tunnel->collect_md) {
c29a70d2
PS
361 __be16 flags;
362 __be64 tun_id;
2e15ea39 363
c29a70d2 364 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
d817f432 365 tun_id = key32_to_tunnel_id(tpi->key);
c29a70d2 366 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
2e15ea39
PS
367 if (!tun_dst)
368 return PACKET_REJECT;
2e15ea39
PS
369 }
370
371 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
bda7bb46 372 return PACKET_RCVD;
1da177e4 373 }
125372fa 374 return PACKET_NEXT;
244a797b
JB
375
376drop:
377 kfree_skb(skb);
378 return PACKET_RCVD;
1da177e4
LT
379}
380
125372fa
JB
381static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
382 int hdr_len)
383{
384 struct net *net = dev_net(skb->dev);
385 struct ip_tunnel_net *itn;
386 int res;
387
388 if (tpi->proto == htons(ETH_P_TEB))
389 itn = net_generic(net, gre_tap_net_id);
390 else
391 itn = net_generic(net, ipgre_net_id);
392
393 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
394 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
395 /* ipgre tunnels in collect metadata mode should receive
396 * also ETH_P_TEB traffic.
397 */
398 itn = net_generic(net, ipgre_net_id);
399 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
400 }
401 return res;
402}
403
9f57c67c
PS
404static int gre_rcv(struct sk_buff *skb)
405{
406 struct tnl_ptk_info tpi;
407 bool csum_err = false;
95f5c64c 408 int hdr_len;
9f57c67c
PS
409
410#ifdef CONFIG_NET_IPGRE_BROADCAST
411 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
412 /* Looped back packet, drop it! */
413 if (rt_is_output_route(skb_rtable(skb)))
414 goto drop;
415 }
416#endif
417
e582615a 418 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
f132ae7c 419 if (hdr_len < 0)
95f5c64c
TH
420 goto drop;
421
f551c91d
WT
422 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
423 tpi.proto == htons(ETH_P_ERSPAN2))) {
84e54fe0
WT
424 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
425 return 0;
dd8d5b8c 426 goto out;
84e54fe0
WT
427 }
428
244a797b 429 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
9f57c67c
PS
430 return 0;
431
dd8d5b8c 432out:
9f57c67c
PS
433 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
434drop:
435 kfree_skb(skb);
436 return 0;
437}
438
c5441932
PS
439static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
440 const struct iphdr *tnl_params,
441 __be16 proto)
442{
443 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 444
c5441932
PS
445 if (tunnel->parms.o_flags & TUNNEL_SEQ)
446 tunnel->o_seqno++;
1da177e4 447
c5441932 448 /* Push GRE header. */
182a352d
TH
449 gre_build_header(skb, tunnel->tun_hlen,
450 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
451 htonl(tunnel->o_seqno));
54bc9bac 452
bf3d6a8f 453 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
c5441932 454}
1da177e4 455
aed069df 456static int gre_handle_offloads(struct sk_buff *skb, bool csum)
b2acd1dc 457{
6fa79666 458 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
b2acd1dc
PS
459}
460
fc4099f1
PS
461static struct rtable *gre_get_rt(struct sk_buff *skb,
462 struct net_device *dev,
463 struct flowi4 *fl,
464 const struct ip_tunnel_key *key)
465{
466 struct net *net = dev_net(dev);
467
468 memset(fl, 0, sizeof(*fl));
469 fl->daddr = key->u.ipv4.dst;
470 fl->saddr = key->u.ipv4.src;
471 fl->flowi4_tos = RT_TOS(key->tos);
472 fl->flowi4_mark = skb->mark;
473 fl->flowi4_proto = IPPROTO_GRE;
474
475 return ip_route_output_key(net, fl);
476}
477
862a03c3
WT
478static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
479 struct net_device *dev,
480 struct flowi4 *fl,
481 int tunnel_hlen)
2e15ea39
PS
482{
483 struct ip_tunnel_info *tun_info;
2e15ea39 484 const struct ip_tunnel_key *key;
db3c6139 485 struct rtable *rt = NULL;
2e15ea39 486 int min_headroom;
db3c6139 487 bool use_cache;
2e15ea39
PS
488 int err;
489
61adedf3 490 tun_info = skb_tunnel_info(skb);
2e15ea39 491 key = &tun_info->key;
db3c6139 492 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
862a03c3 493
db3c6139 494 if (use_cache)
862a03c3 495 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
3c1cb4d2 496 if (!rt) {
862a03c3 497 rt = gre_get_rt(skb, dev, fl, key);
3c1cb4d2 498 if (IS_ERR(rt))
862a03c3 499 goto err_free_skb;
db3c6139 500 if (use_cache)
3c1cb4d2 501 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
862a03c3 502 fl->saddr);
3c1cb4d2 503 }
2e15ea39 504
2e15ea39
PS
505 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
506 + tunnel_hlen + sizeof(struct iphdr);
507 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
508 int head_delta = SKB_DATA_ALIGN(min_headroom -
509 skb_headroom(skb) +
510 16);
511 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
512 0, GFP_ATOMIC);
513 if (unlikely(err))
514 goto err_free_rt;
515 }
862a03c3
WT
516 return rt;
517
518err_free_rt:
519 ip_rt_put(rt);
520err_free_skb:
521 kfree_skb(skb);
522 dev->stats.tx_dropped++;
523 return NULL;
524}
525
526static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
527 __be16 proto)
528{
77a5196a 529 struct ip_tunnel *tunnel = netdev_priv(dev);
862a03c3
WT
530 struct ip_tunnel_info *tun_info;
531 const struct ip_tunnel_key *key;
532 struct rtable *rt = NULL;
533 struct flowi4 fl;
534 int tunnel_hlen;
535 __be16 df, flags;
536
537 tun_info = skb_tunnel_info(skb);
538 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
539 ip_tunnel_info_af(tun_info) != AF_INET))
540 goto err_free_skb;
541
542 key = &tun_info->key;
543 tunnel_hlen = gre_calc_hlen(key->tun_flags);
544
545 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
546 if (!rt)
547 return;
2e15ea39
PS
548
549 /* Push Tunnel header. */
aed069df 550 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
2e15ea39 551 goto err_free_rt;
2e15ea39 552
77a5196a
WT
553 flags = tun_info->key.tun_flags &
554 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
cba65321 555 gre_build_header(skb, tunnel_hlen, flags, proto,
77a5196a 556 tunnel_id_to_key32(tun_info->key.tun_id),
15746394 557 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
2e15ea39
PS
558
559 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
039f5062
PS
560
561 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
562 key->tos, key->ttl, df, false);
2e15ea39
PS
563 return;
564
565err_free_rt:
566 ip_rt_put(rt);
567err_free_skb:
568 kfree_skb(skb);
569 dev->stats.tx_dropped++;
570}
571
1a66a836
WT
572static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
573 __be16 proto)
574{
575 struct ip_tunnel *tunnel = netdev_priv(dev);
576 struct ip_tunnel_info *tun_info;
577 const struct ip_tunnel_key *key;
578 struct erspan_metadata *md;
579 struct rtable *rt = NULL;
580 bool truncate = false;
581 struct flowi4 fl;
582 int tunnel_hlen;
f551c91d 583 int version;
1a66a836 584 __be16 df;
1baf5ebf 585 int nhoff;
d5db21a3 586 int thoff;
1a66a836
WT
587
588 tun_info = skb_tunnel_info(skb);
589 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
590 ip_tunnel_info_af(tun_info) != AF_INET))
591 goto err_free_skb;
592
593 key = &tun_info->key;
256c87c1
PJV
594 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
595 goto err_free_rt;
f551c91d
WT
596 md = ip_tunnel_info_opts(tun_info);
597 if (!md)
598 goto err_free_rt;
1a66a836
WT
599
600 /* ERSPAN has fixed 8 byte GRE header */
f551c91d
WT
601 version = md->version;
602 tunnel_hlen = 8 + erspan_hdr_len(version);
1a66a836
WT
603
604 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
605 if (!rt)
606 return;
607
608 if (gre_handle_offloads(skb, false))
609 goto err_free_rt;
610
f192970d
WT
611 if (skb->len > dev->mtu + dev->hard_header_len) {
612 pskb_trim(skb, dev->mtu + dev->hard_header_len);
1a66a836
WT
613 truncate = true;
614 }
615
1baf5ebf
WT
616 nhoff = skb_network_header(skb) - skb_mac_header(skb);
617 if (skb->protocol == htons(ETH_P_IP) &&
618 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
619 truncate = true;
d5db21a3
WT
620
621 thoff = skb_transport_header(skb) - skb_mac_header(skb);
622 if (skb->protocol == htons(ETH_P_IPV6) &&
623 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
624 truncate = true;
1baf5ebf 625
f551c91d 626 if (version == 1) {
c69de58b 627 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
f551c91d
WT
628 ntohl(md->u.index), truncate, true);
629 } else if (version == 2) {
c69de58b
WT
630 erspan_build_header_v2(skb,
631 ntohl(tunnel_id_to_key32(key->tun_id)),
632 md->u.md2.dir,
633 get_hwid(&md->u.md2),
634 truncate, true);
f551c91d
WT
635 } else {
636 goto err_free_rt;
637 }
1a66a836
WT
638
639 gre_build_header(skb, 8, TUNNEL_SEQ,
640 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
641
642 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
643
644 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
645 key->tos, key->ttl, df, false);
646 return;
647
648err_free_rt:
649 ip_rt_put(rt);
650err_free_skb:
651 kfree_skb(skb);
652 dev->stats.tx_dropped++;
653}
654
fc4099f1
PS
655static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
656{
657 struct ip_tunnel_info *info = skb_tunnel_info(skb);
658 struct rtable *rt;
659 struct flowi4 fl4;
660
661 if (ip_tunnel_info_af(info) != AF_INET)
662 return -EINVAL;
663
664 rt = gre_get_rt(skb, dev, &fl4, &info->key);
665 if (IS_ERR(rt))
666 return PTR_ERR(rt);
667
668 ip_rt_put(rt);
669 info->key.u.ipv4.src = fl4.saddr;
670 return 0;
671}
672
c5441932
PS
673static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
674 struct net_device *dev)
675{
676 struct ip_tunnel *tunnel = netdev_priv(dev);
677 const struct iphdr *tnl_params;
1da177e4 678
cb9f1b78
WB
679 if (!pskb_inet_may_pull(skb))
680 goto free_skb;
681
2e15ea39 682 if (tunnel->collect_md) {
2090714e 683 gre_fb_xmit(skb, dev, skb->protocol);
2e15ea39
PS
684 return NETDEV_TX_OK;
685 }
686
c5441932
PS
687 if (dev->header_ops) {
688 /* Need space for new headers */
689 if (skb_cow_head(skb, dev->needed_headroom -
2bac7cb3 690 (tunnel->hlen + sizeof(struct iphdr))))
c5441932 691 goto free_skb;
1da177e4 692
c5441932 693 tnl_params = (const struct iphdr *)skb->data;
1da177e4 694
c5441932
PS
695 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
696 * to gre header.
697 */
698 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
8a0033a9 699 skb_reset_mac_header(skb);
c5441932
PS
700 } else {
701 if (skb_cow_head(skb, dev->needed_headroom))
702 goto free_skb;
1da177e4 703
c5441932 704 tnl_params = &tunnel->parms.iph;
1da177e4
LT
705 }
706
aed069df
AD
707 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
708 goto free_skb;
8a0033a9 709
c5441932 710 __gre_xmit(skb, dev, tnl_params, skb->protocol);
6ed10654 711 return NETDEV_TX_OK;
1da177e4 712
c5441932 713free_skb:
3acfa1e7 714 kfree_skb(skb);
c5441932 715 dev->stats.tx_dropped++;
6ed10654 716 return NETDEV_TX_OK;
1da177e4
LT
717}
718
84e54fe0
WT
719static netdev_tx_t erspan_xmit(struct sk_buff *skb,
720 struct net_device *dev)
721{
722 struct ip_tunnel *tunnel = netdev_priv(dev);
723 bool truncate = false;
724
cb9f1b78
WB
725 if (!pskb_inet_may_pull(skb))
726 goto free_skb;
727
1a66a836
WT
728 if (tunnel->collect_md) {
729 erspan_fb_xmit(skb, dev, skb->protocol);
730 return NETDEV_TX_OK;
731 }
732
84e54fe0
WT
733 if (gre_handle_offloads(skb, false))
734 goto free_skb;
735
736 if (skb_cow_head(skb, dev->needed_headroom))
737 goto free_skb;
738
f192970d
WT
739 if (skb->len > dev->mtu + dev->hard_header_len) {
740 pskb_trim(skb, dev->mtu + dev->hard_header_len);
84e54fe0
WT
741 truncate = true;
742 }
743
744 /* Push ERSPAN header */
f551c91d 745 if (tunnel->erspan_ver == 1)
c69de58b
WT
746 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
747 tunnel->index,
f551c91d 748 truncate, true);
02f99df1 749 else if (tunnel->erspan_ver == 2)
c69de58b 750 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
f551c91d
WT
751 tunnel->dir, tunnel->hwid,
752 truncate, true);
02f99df1
WT
753 else
754 goto free_skb;
f551c91d 755
84e54fe0
WT
756 tunnel->parms.o_flags &= ~TUNNEL_KEY;
757 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
758 return NETDEV_TX_OK;
759
760free_skb:
761 kfree_skb(skb);
762 dev->stats.tx_dropped++;
763 return NETDEV_TX_OK;
764}
765
c5441932
PS
766static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
767 struct net_device *dev)
ee34c1eb 768{
c5441932 769 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 770
cb9f1b78
WB
771 if (!pskb_inet_may_pull(skb))
772 goto free_skb;
773
2e15ea39 774 if (tunnel->collect_md) {
2090714e 775 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
2e15ea39
PS
776 return NETDEV_TX_OK;
777 }
778
aed069df
AD
779 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
780 goto free_skb;
ee34c1eb 781
c5441932
PS
782 if (skb_cow_head(skb, dev->needed_headroom))
783 goto free_skb;
42aa9162 784
c5441932 785 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
c5441932 786 return NETDEV_TX_OK;
ee34c1eb 787
c5441932 788free_skb:
3acfa1e7 789 kfree_skb(skb);
c5441932
PS
790 dev->stats.tx_dropped++;
791 return NETDEV_TX_OK;
ee34c1eb
MS
792}
793
dd9d598c
XL
794static void ipgre_link_update(struct net_device *dev, bool set_mtu)
795{
796 struct ip_tunnel *tunnel = netdev_priv(dev);
797 int len;
798
799 len = tunnel->tun_hlen;
800 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
801 len = tunnel->tun_hlen - len;
802 tunnel->hlen = tunnel->hlen + len;
803
804 dev->needed_headroom = dev->needed_headroom + len;
805 if (set_mtu)
806 dev->mtu = max_t(int, dev->mtu - len, 68);
807
808 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
809 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
810 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
811 dev->features |= NETIF_F_GSO_SOFTWARE;
812 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1cc5954f
SD
813 } else {
814 dev->features &= ~NETIF_F_GSO_SOFTWARE;
815 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
dd9d598c
XL
816 }
817 dev->features |= NETIF_F_LLTX;
1cc5954f
SD
818 } else {
819 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
820 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
dd9d598c
XL
821 }
822}
823
c5441932
PS
824static int ipgre_tunnel_ioctl(struct net_device *dev,
825 struct ifreq *ifr, int cmd)
1da177e4 826{
1da177e4 827 struct ip_tunnel_parm p;
a0efab67 828 int err;
1da177e4 829
c5441932
PS
830 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
831 return -EFAULT;
a0efab67 832
6c734fb8
CW
833 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
834 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
a0efab67
XL
835 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
836 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
6c734fb8 837 return -EINVAL;
1da177e4 838 }
a0efab67 839
c5441932
PS
840 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
841 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1da177e4 842
c5441932
PS
843 err = ip_tunnel_ioctl(dev, &p, cmd);
844 if (err)
845 return err;
1da177e4 846
a0efab67
XL
847 if (cmd == SIOCCHGTUNNEL) {
848 struct ip_tunnel *t = netdev_priv(dev);
849
850 t->parms.i_flags = p.i_flags;
851 t->parms.o_flags = p.o_flags;
852
853 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
854 ipgre_link_update(dev, true);
855 }
856
95f5c64c
TH
857 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
858 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
c5441932
PS
859
860 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
861 return -EFAULT;
a0efab67 862
1da177e4
LT
863 return 0;
864}
865
1da177e4
LT
866/* Nice toy. Unfortunately, useless in real life :-)
867 It allows to construct virtual multiprotocol broadcast "LAN"
868 over the Internet, provided multicast routing is tuned.
869
870
871 I have no idea was this bicycle invented before me,
872 so that I had to set ARPHRD_IPGRE to a random value.
873 I have an impression, that Cisco could make something similar,
874 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 875
1da177e4
LT
876 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
877 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
878
879 ping -t 255 224.66.66.66
880
881 If nobody answers, mbone does not work.
882
883 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
884 ip addr add 10.66.66.<somewhat>/24 dev Universe
885 ifconfig Universe up
886 ifconfig Universe add fe80::<Your_real_addr>/10
887 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
888 ftp 10.66.66.66
889 ...
890 ftp fec0:6666:6666::193.233.7.65
891 ...
1da177e4 892 */
3b04ddde
SH
893static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
894 unsigned short type,
1507850b 895 const void *daddr, const void *saddr, unsigned int len)
1da177e4 896{
2941a486 897 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
898 struct iphdr *iph;
899 struct gre_base_hdr *greh;
1da177e4 900
d58ff351 901 iph = skb_push(skb, t->hlen + sizeof(*iph));
c5441932 902 greh = (struct gre_base_hdr *)(iph+1);
95f5c64c 903 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
c5441932 904 greh->protocol = htons(type);
1da177e4 905
c5441932 906 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 907
c5441932 908 /* Set the source hardware address. */
1da177e4
LT
909 if (saddr)
910 memcpy(&iph->saddr, saddr, 4);
6d55cb91 911 if (daddr)
1da177e4 912 memcpy(&iph->daddr, daddr, 4);
6d55cb91 913 if (iph->daddr)
77a482bd 914 return t->hlen + sizeof(*iph);
e905a9ed 915
c5441932 916 return -(t->hlen + sizeof(*iph));
1da177e4
LT
917}
918
6a5f44d7
TT
919static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
920{
b71d1d42 921 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
922 memcpy(haddr, &iph->saddr, 4);
923 return 4;
924}
925
3b04ddde
SH
926static const struct header_ops ipgre_header_ops = {
927 .create = ipgre_header,
6a5f44d7 928 .parse = ipgre_header_parse,
3b04ddde
SH
929};
930
6a5f44d7 931#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
932static int ipgre_open(struct net_device *dev)
933{
2941a486 934 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 935
f97c1e0c 936 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
937 struct flowi4 fl4;
938 struct rtable *rt;
939
b57708ad 940 rt = ip_route_output_gre(t->net, &fl4,
cbb1e85f
DM
941 t->parms.iph.daddr,
942 t->parms.iph.saddr,
943 t->parms.o_key,
944 RT_TOS(t->parms.iph.tos),
945 t->parms.link);
b23dd4fe 946 if (IS_ERR(rt))
1da177e4 947 return -EADDRNOTAVAIL;
d8d1f30b 948 dev = rt->dst.dev;
1da177e4 949 ip_rt_put(rt);
51456b29 950 if (!__in_dev_get_rtnl(dev))
1da177e4
LT
951 return -EADDRNOTAVAIL;
952 t->mlink = dev->ifindex;
e5ed6399 953 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
954 }
955 return 0;
956}
957
958static int ipgre_close(struct net_device *dev)
959{
2941a486 960 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 961
f97c1e0c 962 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 963 struct in_device *in_dev;
b57708ad 964 in_dev = inetdev_by_index(t->net, t->mlink);
8723e1b4 965 if (in_dev)
1da177e4 966 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
967 }
968 return 0;
969}
1da177e4
LT
970#endif
971
b8c26a33
SH
972static const struct net_device_ops ipgre_netdev_ops = {
973 .ndo_init = ipgre_tunnel_init,
c5441932 974 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
975#ifdef CONFIG_NET_IPGRE_BROADCAST
976 .ndo_open = ipgre_open,
977 .ndo_stop = ipgre_close,
978#endif
c5441932 979 .ndo_start_xmit = ipgre_xmit,
b8c26a33 980 .ndo_do_ioctl = ipgre_tunnel_ioctl,
c5441932
PS
981 .ndo_change_mtu = ip_tunnel_change_mtu,
982 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 983 .ndo_get_iflink = ip_tunnel_get_iflink,
b8c26a33
SH
984};
985
6b78f16e
ED
986#define GRE_FEATURES (NETIF_F_SG | \
987 NETIF_F_FRAGLIST | \
988 NETIF_F_HIGHDMA | \
989 NETIF_F_HW_CSUM)
990
1da177e4
LT
991static void ipgre_tunnel_setup(struct net_device *dev)
992{
b8c26a33 993 dev->netdev_ops = &ipgre_netdev_ops;
5a455275 994 dev->type = ARPHRD_IPGRE;
c5441932
PS
995 ip_tunnel_setup(dev, ipgre_net_id);
996}
1da177e4 997
c5441932
PS
998static void __gre_tunnel_init(struct net_device *dev)
999{
1000 struct ip_tunnel *tunnel;
1001
1002 tunnel = netdev_priv(dev);
95f5c64c 1003 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
c5441932
PS
1004 tunnel->parms.iph.protocol = IPPROTO_GRE;
1005
4565e991
TH
1006 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1007
b57708ad 1008 dev->features |= GRE_FEATURES;
6b78f16e 1009 dev->hw_features |= GRE_FEATURES;
c5441932
PS
1010
1011 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
a0ca153f
AD
1012 /* TCP offload with GRE SEQ is not supported, nor
1013 * can we support 2 levels of outer headers requiring
1014 * an update.
1015 */
1016 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1017 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1018 dev->features |= NETIF_F_GSO_SOFTWARE;
1019 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1020 }
1021
c5441932
PS
1022 /* Can use a lockless transmit, unless we generate
1023 * output sequences
1024 */
1025 dev->features |= NETIF_F_LLTX;
1026 }
1da177e4
LT
1027}
1028
1029static int ipgre_tunnel_init(struct net_device *dev)
1030{
c5441932
PS
1031 struct ip_tunnel *tunnel = netdev_priv(dev);
1032 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 1033
c5441932 1034 __gre_tunnel_init(dev);
1da177e4 1035
c5441932
PS
1036 memcpy(dev->dev_addr, &iph->saddr, 4);
1037 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 1038
c5441932 1039 dev->flags = IFF_NOARP;
02875878 1040 netif_keep_dst(dev);
c5441932 1041 dev->addr_len = 4;
1da177e4 1042
a64b04d8 1043 if (iph->daddr && !tunnel->collect_md) {
1da177e4 1044#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1045 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1046 if (!iph->saddr)
1047 return -EINVAL;
1048 dev->flags = IFF_BROADCAST;
3b04ddde 1049 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1050 }
1051#endif
a64b04d8 1052 } else if (!tunnel->collect_md) {
6a5f44d7 1053 dev->header_ops = &ipgre_header_ops;
a64b04d8 1054 }
1da177e4 1055
c5441932 1056 return ip_tunnel_init(dev);
1da177e4
LT
1057}
1058
9f57c67c
PS
1059static const struct gre_protocol ipgre_protocol = {
1060 .handler = gre_rcv,
1061 .err_handler = gre_err,
1da177e4
LT
1062};
1063
2c8c1e72 1064static int __net_init ipgre_init_net(struct net *net)
59a4c759 1065{
c5441932 1066 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
1067}
1068
64bc1781 1069static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
59a4c759 1070{
64bc1781 1071 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
59a4c759
PE
1072}
1073
1074static struct pernet_operations ipgre_net_ops = {
1075 .init = ipgre_init_net,
64bc1781 1076 .exit_batch = ipgre_exit_batch_net,
cfb8fbf2 1077 .id = &ipgre_net_id,
c5441932 1078 .size = sizeof(struct ip_tunnel_net),
59a4c759 1079};
1da177e4 1080
a8b8a889
MS
1081static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1082 struct netlink_ext_ack *extack)
c19e654d
HX
1083{
1084 __be16 flags;
1085
1086 if (!data)
1087 return 0;
1088
1089 flags = 0;
1090 if (data[IFLA_GRE_IFLAGS])
1091 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1092 if (data[IFLA_GRE_OFLAGS])
1093 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1094 if (flags & (GRE_VERSION|GRE_ROUTING))
1095 return -EINVAL;
1096
946b636f
JB
1097 if (data[IFLA_GRE_COLLECT_METADATA] &&
1098 data[IFLA_GRE_ENCAP_TYPE] &&
1099 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1100 return -EINVAL;
1101
c19e654d
HX
1102 return 0;
1103}
1104
a8b8a889
MS
1105static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1106 struct netlink_ext_ack *extack)
e1a80002
HX
1107{
1108 __be32 daddr;
1109
1110 if (tb[IFLA_ADDRESS]) {
1111 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1112 return -EINVAL;
1113 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1114 return -EADDRNOTAVAIL;
1115 }
1116
1117 if (!data)
1118 goto out;
1119
1120 if (data[IFLA_GRE_REMOTE]) {
1121 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1122 if (!daddr)
1123 return -EINVAL;
1124 }
1125
1126out:
a8b8a889 1127 return ipgre_tunnel_validate(tb, data, extack);
e1a80002
HX
1128}
1129
84e54fe0
WT
1130static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1131 struct netlink_ext_ack *extack)
1132{
1133 __be16 flags = 0;
1134 int ret;
1135
1136 if (!data)
1137 return 0;
1138
1139 ret = ipgre_tap_validate(tb, data, extack);
1140 if (ret)
1141 return ret;
1142
1143 /* ERSPAN should only have GRE sequence and key flag */
1a66a836
WT
1144 if (data[IFLA_GRE_OFLAGS])
1145 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1146 if (data[IFLA_GRE_IFLAGS])
1147 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1148 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1149 flags != (GRE_SEQ | GRE_KEY))
84e54fe0
WT
1150 return -EINVAL;
1151
1152 /* ERSPAN Session ID only has 10-bit. Since we reuse
1153 * 32-bit key field as ID, check it's range.
1154 */
1155 if (data[IFLA_GRE_IKEY] &&
1156 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1157 return -EINVAL;
1158
1159 if (data[IFLA_GRE_OKEY] &&
1160 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1161 return -EINVAL;
1162
1163 return 0;
1164}
1165
22a59be8 1166static int ipgre_netlink_parms(struct net_device *dev,
2e15ea39
PS
1167 struct nlattr *data[],
1168 struct nlattr *tb[],
9830ad4c
CG
1169 struct ip_tunnel_parm *parms,
1170 __u32 *fwmark)
c19e654d 1171{
22a59be8
PP
1172 struct ip_tunnel *t = netdev_priv(dev);
1173
7bb82d92 1174 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1175
1176 parms->iph.protocol = IPPROTO_GRE;
1177
1178 if (!data)
22a59be8 1179 return 0;
c19e654d
HX
1180
1181 if (data[IFLA_GRE_LINK])
1182 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1183
1184 if (data[IFLA_GRE_IFLAGS])
c5441932 1185 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
1186
1187 if (data[IFLA_GRE_OFLAGS])
c5441932 1188 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
1189
1190 if (data[IFLA_GRE_IKEY])
1191 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1192
1193 if (data[IFLA_GRE_OKEY])
1194 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1195
1196 if (data[IFLA_GRE_LOCAL])
67b61f6c 1197 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1198
1199 if (data[IFLA_GRE_REMOTE])
67b61f6c 1200 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1201
1202 if (data[IFLA_GRE_TTL])
1203 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1204
1205 if (data[IFLA_GRE_TOS])
1206 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1207
22a59be8
PP
1208 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1209 if (t->ignore_df)
1210 return -EINVAL;
c19e654d 1211 parms->iph.frag_off = htons(IP_DF);
22a59be8 1212 }
2e15ea39
PS
1213
1214 if (data[IFLA_GRE_COLLECT_METADATA]) {
2e15ea39 1215 t->collect_md = true;
e271c7b4
JB
1216 if (dev->type == ARPHRD_IPGRE)
1217 dev->type = ARPHRD_NONE;
2e15ea39 1218 }
22a59be8
PP
1219
1220 if (data[IFLA_GRE_IGNORE_DF]) {
1221 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1222 && (parms->iph.frag_off & htons(IP_DF)))
1223 return -EINVAL;
1224 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1225 }
1226
9830ad4c
CG
1227 if (data[IFLA_GRE_FWMARK])
1228 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1229
f551c91d
WT
1230 if (data[IFLA_GRE_ERSPAN_VER]) {
1231 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
84e54fe0 1232
f551c91d 1233 if (t->erspan_ver != 1 && t->erspan_ver != 2)
84e54fe0
WT
1234 return -EINVAL;
1235 }
1236
f551c91d
WT
1237 if (t->erspan_ver == 1) {
1238 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1239 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1240 if (t->index & ~INDEX_MASK)
1241 return -EINVAL;
1242 }
1243 } else if (t->erspan_ver == 2) {
1244 if (data[IFLA_GRE_ERSPAN_DIR]) {
1245 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1246 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1247 return -EINVAL;
1248 }
1249 if (data[IFLA_GRE_ERSPAN_HWID]) {
1250 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1251 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1252 return -EINVAL;
1253 }
1254 }
1255
22a59be8 1256 return 0;
c19e654d
HX
1257}
1258
4565e991
TH
1259/* This function returns true when ENCAP attributes are present in the nl msg */
1260static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1261 struct ip_tunnel_encap *ipencap)
1262{
1263 bool ret = false;
1264
1265 memset(ipencap, 0, sizeof(*ipencap));
1266
1267 if (!data)
1268 return ret;
1269
1270 if (data[IFLA_GRE_ENCAP_TYPE]) {
1271 ret = true;
1272 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1273 }
1274
1275 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1276 ret = true;
1277 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1278 }
1279
1280 if (data[IFLA_GRE_ENCAP_SPORT]) {
1281 ret = true;
3e97fa70 1282 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
4565e991
TH
1283 }
1284
1285 if (data[IFLA_GRE_ENCAP_DPORT]) {
1286 ret = true;
3e97fa70 1287 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
4565e991
TH
1288 }
1289
1290 return ret;
1291}
1292
c5441932 1293static int gre_tap_init(struct net_device *dev)
e1a80002 1294{
c5441932 1295 __gre_tunnel_init(dev);
bec94d43 1296 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
d51711c0 1297 netif_keep_dst(dev);
e1a80002 1298
c5441932 1299 return ip_tunnel_init(dev);
e1a80002
HX
1300}
1301
c5441932
PS
1302static const struct net_device_ops gre_tap_netdev_ops = {
1303 .ndo_init = gre_tap_init,
1304 .ndo_uninit = ip_tunnel_uninit,
1305 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
1306 .ndo_set_mac_address = eth_mac_addr,
1307 .ndo_validate_addr = eth_validate_addr,
c5441932
PS
1308 .ndo_change_mtu = ip_tunnel_change_mtu,
1309 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 1310 .ndo_get_iflink = ip_tunnel_get_iflink,
fc4099f1 1311 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
b8c26a33
SH
1312};
1313
84e54fe0
WT
1314static int erspan_tunnel_init(struct net_device *dev)
1315{
1316 struct ip_tunnel *tunnel = netdev_priv(dev);
84e54fe0
WT
1317
1318 tunnel->tun_hlen = 8;
1319 tunnel->parms.iph.protocol = IPPROTO_GRE;
c122fda2 1320 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
f551c91d 1321 erspan_hdr_len(tunnel->erspan_ver);
84e54fe0 1322
84e54fe0
WT
1323 dev->features |= GRE_FEATURES;
1324 dev->hw_features |= GRE_FEATURES;
1325 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c84bed44 1326 netif_keep_dst(dev);
84e54fe0
WT
1327
1328 return ip_tunnel_init(dev);
1329}
1330
1331static const struct net_device_ops erspan_netdev_ops = {
1332 .ndo_init = erspan_tunnel_init,
1333 .ndo_uninit = ip_tunnel_uninit,
1334 .ndo_start_xmit = erspan_xmit,
1335 .ndo_set_mac_address = eth_mac_addr,
1336 .ndo_validate_addr = eth_validate_addr,
1337 .ndo_change_mtu = ip_tunnel_change_mtu,
1338 .ndo_get_stats64 = ip_tunnel_get_stats64,
1339 .ndo_get_iflink = ip_tunnel_get_iflink,
1340 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1341};
1342
e1a80002
HX
1343static void ipgre_tap_setup(struct net_device *dev)
1344{
e1a80002 1345 ether_setup(dev);
cfddd4c3 1346 dev->max_mtu = 0;
d13b161c
JB
1347 dev->netdev_ops = &gre_tap_netdev_ops;
1348 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1349 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c5441932 1350 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
1351}
1352
c5441932 1353static int ipgre_newlink(struct net *src_net, struct net_device *dev,
7a3f4a18
MS
1354 struct nlattr *tb[], struct nlattr *data[],
1355 struct netlink_ext_ack *extack)
c19e654d 1356{
c5441932 1357 struct ip_tunnel_parm p;
4565e991 1358 struct ip_tunnel_encap ipencap;
9830ad4c 1359 __u32 fwmark = 0;
22a59be8 1360 int err;
4565e991
TH
1361
1362 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1363 struct ip_tunnel *t = netdev_priv(dev);
22a59be8 1364 err = ip_tunnel_encap_setup(t, &ipencap);
4565e991
TH
1365
1366 if (err < 0)
1367 return err;
1368 }
c19e654d 1369
9830ad4c 1370 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
22a59be8
PP
1371 if (err < 0)
1372 return err;
9830ad4c 1373 return ip_tunnel_newlink(dev, tb, &p, fwmark);
c19e654d
HX
1374}
1375
1376static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
ad744b22
MS
1377 struct nlattr *data[],
1378 struct netlink_ext_ack *extack)
c19e654d 1379{
9830ad4c 1380 struct ip_tunnel *t = netdev_priv(dev);
4565e991 1381 struct ip_tunnel_encap ipencap;
9830ad4c 1382 __u32 fwmark = t->fwmark;
dd9d598c 1383 struct ip_tunnel_parm p;
22a59be8 1384 int err;
4565e991
TH
1385
1386 if (ipgre_netlink_encap_parms(data, &ipencap)) {
22a59be8 1387 err = ip_tunnel_encap_setup(t, &ipencap);
4565e991
TH
1388
1389 if (err < 0)
1390 return err;
1391 }
c19e654d 1392
9830ad4c 1393 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
22a59be8
PP
1394 if (err < 0)
1395 return err;
dd9d598c
XL
1396
1397 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1398 if (err < 0)
1399 return err;
1400
1401 t->parms.i_flags = p.i_flags;
1402 t->parms.o_flags = p.o_flags;
1403
1404 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1405 ipgre_link_update(dev, !tb[IFLA_MTU]);
1406
1407 return 0;
c19e654d
HX
1408}
1409
1410static size_t ipgre_get_size(const struct net_device *dev)
1411{
1412 return
1413 /* IFLA_GRE_LINK */
1414 nla_total_size(4) +
1415 /* IFLA_GRE_IFLAGS */
1416 nla_total_size(2) +
1417 /* IFLA_GRE_OFLAGS */
1418 nla_total_size(2) +
1419 /* IFLA_GRE_IKEY */
1420 nla_total_size(4) +
1421 /* IFLA_GRE_OKEY */
1422 nla_total_size(4) +
1423 /* IFLA_GRE_LOCAL */
1424 nla_total_size(4) +
1425 /* IFLA_GRE_REMOTE */
1426 nla_total_size(4) +
1427 /* IFLA_GRE_TTL */
1428 nla_total_size(1) +
1429 /* IFLA_GRE_TOS */
1430 nla_total_size(1) +
1431 /* IFLA_GRE_PMTUDISC */
1432 nla_total_size(1) +
4565e991
TH
1433 /* IFLA_GRE_ENCAP_TYPE */
1434 nla_total_size(2) +
1435 /* IFLA_GRE_ENCAP_FLAGS */
1436 nla_total_size(2) +
1437 /* IFLA_GRE_ENCAP_SPORT */
1438 nla_total_size(2) +
1439 /* IFLA_GRE_ENCAP_DPORT */
1440 nla_total_size(2) +
2e15ea39
PS
1441 /* IFLA_GRE_COLLECT_METADATA */
1442 nla_total_size(0) +
22a59be8
PP
1443 /* IFLA_GRE_IGNORE_DF */
1444 nla_total_size(1) +
9830ad4c
CG
1445 /* IFLA_GRE_FWMARK */
1446 nla_total_size(4) +
84e54fe0
WT
1447 /* IFLA_GRE_ERSPAN_INDEX */
1448 nla_total_size(4) +
f551c91d
WT
1449 /* IFLA_GRE_ERSPAN_VER */
1450 nla_total_size(1) +
1451 /* IFLA_GRE_ERSPAN_DIR */
1452 nla_total_size(1) +
1453 /* IFLA_GRE_ERSPAN_HWID */
1454 nla_total_size(2) +
c19e654d
HX
1455 0;
1456}
1457
1458static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1459{
1460 struct ip_tunnel *t = netdev_priv(dev);
1461 struct ip_tunnel_parm *p = &t->parms;
1462
f3756b79 1463 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
95f5c64c
TH
1464 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1465 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1466 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1467 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
f3756b79
DM
1468 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1469 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
930345ea
JB
1470 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1471 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
f3756b79
DM
1472 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1473 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1474 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
9830ad4c
CG
1475 !!(p->iph.frag_off & htons(IP_DF))) ||
1476 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
f3756b79 1477 goto nla_put_failure;
4565e991
TH
1478
1479 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1480 t->encap.type) ||
3e97fa70
SD
1481 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1482 t->encap.sport) ||
1483 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1484 t->encap.dport) ||
4565e991 1485 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
e1b2cb65 1486 t->encap.flags))
4565e991
TH
1487 goto nla_put_failure;
1488
22a59be8
PP
1489 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1490 goto nla_put_failure;
1491
2e15ea39
PS
1492 if (t->collect_md) {
1493 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1494 goto nla_put_failure;
1495 }
1496
f551c91d
WT
1497 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1498 goto nla_put_failure;
1499
1500 if (t->erspan_ver == 1) {
84e54fe0
WT
1501 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1502 goto nla_put_failure;
f551c91d
WT
1503 } else if (t->erspan_ver == 2) {
1504 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1505 goto nla_put_failure;
1506 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1507 goto nla_put_failure;
1508 }
84e54fe0 1509
c19e654d
HX
1510 return 0;
1511
1512nla_put_failure:
1513 return -EMSGSIZE;
1514}
1515
84e54fe0
WT
1516static void erspan_setup(struct net_device *dev)
1517{
84581bda
XL
1518 struct ip_tunnel *t = netdev_priv(dev);
1519
84e54fe0
WT
1520 ether_setup(dev);
1521 dev->netdev_ops = &erspan_netdev_ops;
1522 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1523 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1524 ip_tunnel_setup(dev, erspan_net_id);
84581bda 1525 t->erspan_ver = 1;
84e54fe0
WT
1526}
1527
c19e654d
HX
1528static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1529 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1530 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1531 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1532 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1533 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1534 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1535 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1536 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1537 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1538 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
4565e991
TH
1539 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1540 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1541 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1542 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2e15ea39 1543 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
22a59be8 1544 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
9830ad4c 1545 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
84e54fe0 1546 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
f551c91d
WT
1547 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1548 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1549 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
c19e654d
HX
1550};
1551
1552static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1553 .kind = "gre",
1554 .maxtype = IFLA_GRE_MAX,
1555 .policy = ipgre_policy,
1556 .priv_size = sizeof(struct ip_tunnel),
1557 .setup = ipgre_tunnel_setup,
1558 .validate = ipgre_tunnel_validate,
1559 .newlink = ipgre_newlink,
1560 .changelink = ipgre_changelink,
c5441932 1561 .dellink = ip_tunnel_dellink,
c19e654d
HX
1562 .get_size = ipgre_get_size,
1563 .fill_info = ipgre_fill_info,
1728d4fa 1564 .get_link_net = ip_tunnel_get_link_net,
c19e654d
HX
1565};
1566
e1a80002
HX
1567static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1568 .kind = "gretap",
1569 .maxtype = IFLA_GRE_MAX,
1570 .policy = ipgre_policy,
1571 .priv_size = sizeof(struct ip_tunnel),
1572 .setup = ipgre_tap_setup,
1573 .validate = ipgre_tap_validate,
1574 .newlink = ipgre_newlink,
1575 .changelink = ipgre_changelink,
c5441932 1576 .dellink = ip_tunnel_dellink,
e1a80002
HX
1577 .get_size = ipgre_get_size,
1578 .fill_info = ipgre_fill_info,
1728d4fa 1579 .get_link_net = ip_tunnel_get_link_net,
e1a80002
HX
1580};
1581
84e54fe0
WT
1582static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1583 .kind = "erspan",
1584 .maxtype = IFLA_GRE_MAX,
1585 .policy = ipgre_policy,
1586 .priv_size = sizeof(struct ip_tunnel),
1587 .setup = erspan_setup,
1588 .validate = erspan_validate,
1589 .newlink = ipgre_newlink,
1590 .changelink = ipgre_changelink,
1591 .dellink = ip_tunnel_dellink,
1592 .get_size = ipgre_get_size,
1593 .fill_info = ipgre_fill_info,
1594 .get_link_net = ip_tunnel_get_link_net,
1595};
1596
b2acd1dc
PS
1597struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1598 u8 name_assign_type)
1599{
1600 struct nlattr *tb[IFLA_MAX + 1];
1601 struct net_device *dev;
106da663 1602 LIST_HEAD(list_kill);
b2acd1dc
PS
1603 struct ip_tunnel *t;
1604 int err;
1605
1606 memset(&tb, 0, sizeof(tb));
1607
1608 dev = rtnl_create_link(net, name, name_assign_type,
d0522f1c 1609 &ipgre_tap_ops, tb, NULL);
b2acd1dc
PS
1610 if (IS_ERR(dev))
1611 return dev;
1612
1613 /* Configure flow based GRE device. */
1614 t = netdev_priv(dev);
1615 t->collect_md = true;
1616
7a3f4a18 1617 err = ipgre_newlink(net, dev, tb, NULL, NULL);
106da663
ND
1618 if (err < 0) {
1619 free_netdev(dev);
1620 return ERR_PTR(err);
1621 }
7e059158
DW
1622
1623 /* openvswitch users expect packet sizes to be unrestricted,
1624 * so set the largest MTU we can.
1625 */
1626 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1627 if (err)
1628 goto out;
1629
da6f1da8
ND
1630 err = rtnl_configure_link(dev, NULL);
1631 if (err < 0)
1632 goto out;
1633
b2acd1dc
PS
1634 return dev;
1635out:
106da663
ND
1636 ip_tunnel_dellink(dev, &list_kill);
1637 unregister_netdevice_many(&list_kill);
b2acd1dc
PS
1638 return ERR_PTR(err);
1639}
1640EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1641
c5441932
PS
1642static int __net_init ipgre_tap_init_net(struct net *net)
1643{
2e15ea39 1644 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
c5441932
PS
1645}
1646
64bc1781 1647static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
c5441932 1648{
64bc1781 1649 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
c5441932
PS
1650}
1651
1652static struct pernet_operations ipgre_tap_net_ops = {
1653 .init = ipgre_tap_init_net,
64bc1781 1654 .exit_batch = ipgre_tap_exit_batch_net,
c5441932
PS
1655 .id = &gre_tap_net_id,
1656 .size = sizeof(struct ip_tunnel_net),
1657};
1da177e4 1658
84e54fe0
WT
1659static int __net_init erspan_init_net(struct net *net)
1660{
1661 return ip_tunnel_init_net(net, erspan_net_id,
1662 &erspan_link_ops, "erspan0");
1663}
1664
64bc1781 1665static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
84e54fe0 1666{
64bc1781 1667 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
84e54fe0
WT
1668}
1669
1670static struct pernet_operations erspan_net_ops = {
1671 .init = erspan_init_net,
64bc1781 1672 .exit_batch = erspan_exit_batch_net,
84e54fe0
WT
1673 .id = &erspan_net_id,
1674 .size = sizeof(struct ip_tunnel_net),
1675};
1676
1da177e4
LT
1677static int __init ipgre_init(void)
1678{
1679 int err;
1680
058bd4d2 1681 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1682
cfb8fbf2 1683 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1684 if (err < 0)
c2892f02
AD
1685 return err;
1686
c5441932
PS
1687 err = register_pernet_device(&ipgre_tap_net_ops);
1688 if (err < 0)
e3d0328c 1689 goto pnet_tap_failed;
c5441932 1690
84e54fe0
WT
1691 err = register_pernet_device(&erspan_net_ops);
1692 if (err < 0)
1693 goto pnet_erspan_failed;
1694
9f57c67c 1695 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1696 if (err < 0) {
058bd4d2 1697 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1698 goto add_proto_failed;
1699 }
7daa0004 1700
c19e654d
HX
1701 err = rtnl_link_register(&ipgre_link_ops);
1702 if (err < 0)
1703 goto rtnl_link_failed;
1704
e1a80002
HX
1705 err = rtnl_link_register(&ipgre_tap_ops);
1706 if (err < 0)
1707 goto tap_ops_failed;
1708
84e54fe0
WT
1709 err = rtnl_link_register(&erspan_link_ops);
1710 if (err < 0)
1711 goto erspan_link_failed;
1712
c5441932 1713 return 0;
c19e654d 1714
84e54fe0
WT
1715erspan_link_failed:
1716 rtnl_link_unregister(&ipgre_tap_ops);
e1a80002
HX
1717tap_ops_failed:
1718 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1719rtnl_link_failed:
9f57c67c 1720 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1721add_proto_failed:
84e54fe0
WT
1722 unregister_pernet_device(&erspan_net_ops);
1723pnet_erspan_failed:
c5441932 1724 unregister_pernet_device(&ipgre_tap_net_ops);
e3d0328c 1725pnet_tap_failed:
c2892f02 1726 unregister_pernet_device(&ipgre_net_ops);
c5441932 1727 return err;
1da177e4
LT
1728}
1729
db44575f 1730static void __exit ipgre_fini(void)
1da177e4 1731{
e1a80002 1732 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1733 rtnl_link_unregister(&ipgre_link_ops);
84e54fe0 1734 rtnl_link_unregister(&erspan_link_ops);
9f57c67c 1735 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c5441932 1736 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1737 unregister_pernet_device(&ipgre_net_ops);
84e54fe0 1738 unregister_pernet_device(&erspan_net_ops);
1da177e4
LT
1739}
1740
1741module_init(ipgre_init);
1742module_exit(ipgre_fini);
1743MODULE_LICENSE("GPL");
4d74f8ba
PM
1744MODULE_ALIAS_RTNL_LINK("gre");
1745MODULE_ALIAS_RTNL_LINK("gretap");
84e54fe0 1746MODULE_ALIAS_RTNL_LINK("erspan");
8909c9ad 1747MODULE_ALIAS_NETDEV("gre0");
c5441932 1748MODULE_ALIAS_NETDEV("gretap0");
84e54fe0 1749MODULE_ALIAS_NETDEV("erspan0");