gre: fix error handler
[linux-2.6-block.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
1da177e4
LT
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
2e15ea39 27#include <linux/if_vlan.h>
1da177e4
LT
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
c5441932 40#include <net/ip_tunnels.h>
1da177e4
LT
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
2e15ea39 50#include <net/dst_metadata.h>
1da177e4 51
1da177e4
LT
52/*
53 Problems & solutions
54 --------------------
55
56 1. The most important issue is detecting local dead loops.
57 They would cause complete host lockup in transmit, which
58 would be "resolved" by stack overflow or, if queueing is enabled,
59 with infinite looping in net_bh.
60
61 We cannot track such dead loops during route installation,
62 it is infeasible task. The most general solutions would be
63 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 64 and silently drop packet when it expires. It is a good
bff52857 65 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
66 skb, even if no tunneling is used.
67
6d0722a2
ED
68 Current solution: xmit_recursion breaks dead loops. This is a percpu
69 counter, since when we enter the first ndo_xmit(), cpu migration is
70 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
71
72 2. Networking dead loops would not kill routers, but would really
73 kill network. IP hop limit plays role of "t->recursion" in this case,
74 if we copy it from packet being encapsulated to upper header.
75 It is very good solution, but it introduces two problems:
76
77 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
78 do not work over tunnels.
79 - traceroute does not work. I planned to relay ICMP from tunnel,
80 so that this problem would be solved and traceroute output
81 would even more informative. This idea appeared to be wrong:
82 only Linux complies to rfc1812 now (yes, guys, Linux is the only
83 true router now :-)), all routers (at least, in neighbourhood of mine)
84 return only 8 bytes of payload. It is the end.
85
86 Hence, if we want that OSPF worked or traceroute said something reasonable,
87 we should search for another solution.
88
89 One of them is to parse packet trying to detect inner encapsulation
90 made by our node. It is difficult or even impossible, especially,
bff52857 91 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
92
93 Current solution: The solution was UNEXPECTEDLY SIMPLE.
94 We force DF flag on tunnels with preconfigured hop limit,
95 that is ALL. :-) Well, it does not remove the problem completely,
96 but exponential growth of network traffic is changed to linear
97 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 98 rapidly degrades to value <68, where looping stops.
1da177e4
LT
99 Yes, it is not good if there exists a router in the loop,
100 which does not force DF, even when encapsulating packets have DF set.
101 But it is not our problem! Nobody could accuse us, we made
102 all that we could make. Even if it is your gated who injected
103 fatal route to network, even if it were you who configured
104 fatal static route: you are innocent. :-)
105
1da177e4
LT
106 Alexey Kuznetsov.
107 */
108
eccc1bb8 109static bool log_ecn_error = true;
110module_param(log_ecn_error, bool, 0644);
111MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
112
c19e654d 113static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4 114static int ipgre_tunnel_init(struct net_device *dev);
eb8ce741 115
f99189b1 116static int ipgre_net_id __read_mostly;
c5441932 117static int gre_tap_net_id __read_mostly;
1da177e4 118
9f57c67c
PS
119static void ipgre_err(struct sk_buff *skb, u32 info,
120 const struct tnl_ptk_info *tpi)
1da177e4 121{
1da177e4 122
c5441932
PS
123 /* All the routers (except for Linux) return only
124 8 bytes of packet payload. It means, that precise relaying of
125 ICMP in the real Internet is absolutely infeasible.
1da177e4 126
c5441932
PS
127 Moreover, Cisco "wise men" put GRE key to the third word
128 in GRE header. It makes impossible maintaining even soft
129 state for keyed GRE tunnels with enabled checksum. Tell
130 them "thank you".
1da177e4 131
c5441932
PS
132 Well, I wonder, rfc1812 was written by Cisco employee,
133 what the hell these idiots break standards established
134 by themselves???
135 */
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn;
96f5a846 138 const struct iphdr *iph;
88c7664f
ACM
139 const int type = icmp_hdr(skb)->type;
140 const int code = icmp_hdr(skb)->code;
1da177e4 141 struct ip_tunnel *t;
1da177e4 142
1da177e4
LT
143 switch (type) {
144 default:
145 case ICMP_PARAMETERPROB:
9f57c67c 146 return;
1da177e4
LT
147
148 case ICMP_DEST_UNREACH:
149 switch (code) {
150 case ICMP_SR_FAILED:
151 case ICMP_PORT_UNREACH:
152 /* Impossible event. */
9f57c67c 153 return;
1da177e4
LT
154 default:
155 /* All others are translated to HOST_UNREACH.
156 rfc2003 contains "deep thoughts" about NET_UNREACH,
157 I believe they are just ether pollution. --ANK
158 */
159 break;
160 }
161 break;
9f57c67c 162
1da177e4
LT
163 case ICMP_TIME_EXCEEDED:
164 if (code != ICMP_EXC_TTL)
9f57c67c 165 return;
1da177e4 166 break;
55be7a9c
DM
167
168 case ICMP_REDIRECT:
169 break;
1da177e4
LT
170 }
171
bda7bb46 172 if (tpi->proto == htons(ETH_P_TEB))
c5441932
PS
173 itn = net_generic(net, gre_tap_net_id);
174 else
175 itn = net_generic(net, ipgre_net_id);
176
c0c0c50f 177 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
bda7bb46
PS
178 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
179 iph->daddr, iph->saddr, tpi->key);
d2083287 180
51456b29 181 if (!t)
9f57c67c 182 return;
36393395 183
36393395 184 if (t->parms.iph.daddr == 0 ||
f97c1e0c 185 ipv4_is_multicast(t->parms.iph.daddr))
9f57c67c 186 return;
1da177e4
LT
187
188 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
9f57c67c 189 return;
1da177e4 190
da6185d8 191 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
192 t->err_count++;
193 else
194 t->err_count = 1;
195 t->err_time = jiffies;
9f57c67c
PS
196}
197
198static void gre_err(struct sk_buff *skb, u32 info)
199{
200 /* All the routers (except for Linux) return only
201 * 8 bytes of packet payload. It means, that precise relaying of
202 * ICMP in the real Internet is absolutely infeasible.
203 *
204 * Moreover, Cisco "wise men" put GRE key to the third word
205 * in GRE header. It makes impossible maintaining even soft
206 * state for keyed
207 * GRE tunnels with enabled checksum. Tell them "thank you".
208 *
209 * Well, I wonder, rfc1812 was written by Cisco employee,
210 * what the hell these idiots break standards established
211 * by themselves???
212 */
213
e582615a 214 const struct iphdr *iph = (struct iphdr *)skb->data;
9f57c67c
PS
215 const int type = icmp_hdr(skb)->type;
216 const int code = icmp_hdr(skb)->code;
217 struct tnl_ptk_info tpi;
218 bool csum_err = false;
219
e582615a
ED
220 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
221 iph->ihl * 4) < 0) {
9f57c67c
PS
222 if (!csum_err) /* ignore csum errors. */
223 return;
224 }
225
226 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
227 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
228 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
229 return;
230 }
231 if (type == ICMP_REDIRECT) {
232 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
233 IPPROTO_GRE, 0);
234 return;
235 }
236
237 ipgre_err(skb, info, &tpi);
1da177e4
LT
238}
239
2e15ea39
PS
240static __be64 key_to_tunnel_id(__be32 key)
241{
242#ifdef __BIG_ENDIAN
243 return (__force __be64)((__force u32)key);
244#else
245 return (__force __be64)((__force u64)key << 32);
246#endif
247}
248
249/* Returns the least-significant 32 bits of a __be64. */
250static __be32 tunnel_id_to_key(__be64 x)
251{
252#ifdef __BIG_ENDIAN
253 return (__force __be32)x;
254#else
255 return (__force __be32)((__force u64)x >> 32);
256#endif
257}
258
125372fa
JB
259static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
260 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
1da177e4 261{
2e15ea39 262 struct metadata_dst *tun_dst = NULL;
b71d1d42 263 const struct iphdr *iph;
1da177e4 264 struct ip_tunnel *tunnel;
1da177e4 265
c5441932 266 iph = ip_hdr(skb);
bda7bb46
PS
267 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
268 iph->saddr, iph->daddr, tpi->key);
e1a80002 269
d2083287 270 if (tunnel) {
125372fa
JB
271 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
272 raw_proto, false) < 0)
244a797b
JB
273 goto drop;
274
e271c7b4
JB
275 if (tunnel->dev->type != ARPHRD_NONE)
276 skb_pop_mac_header(skb);
277 else
278 skb_reset_mac_header(skb);
2e15ea39 279 if (tunnel->collect_md) {
c29a70d2
PS
280 __be16 flags;
281 __be64 tun_id;
2e15ea39 282
c29a70d2
PS
283 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
284 tun_id = key_to_tunnel_id(tpi->key);
285 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
2e15ea39
PS
286 if (!tun_dst)
287 return PACKET_REJECT;
2e15ea39
PS
288 }
289
290 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
bda7bb46 291 return PACKET_RCVD;
1da177e4 292 }
125372fa 293 return PACKET_NEXT;
244a797b
JB
294
295drop:
296 kfree_skb(skb);
297 return PACKET_RCVD;
1da177e4
LT
298}
299
125372fa
JB
300static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
301 int hdr_len)
302{
303 struct net *net = dev_net(skb->dev);
304 struct ip_tunnel_net *itn;
305 int res;
306
307 if (tpi->proto == htons(ETH_P_TEB))
308 itn = net_generic(net, gre_tap_net_id);
309 else
310 itn = net_generic(net, ipgre_net_id);
311
312 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
313 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
314 /* ipgre tunnels in collect metadata mode should receive
315 * also ETH_P_TEB traffic.
316 */
317 itn = net_generic(net, ipgre_net_id);
318 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
319 }
320 return res;
321}
322
9f57c67c
PS
323static int gre_rcv(struct sk_buff *skb)
324{
325 struct tnl_ptk_info tpi;
326 bool csum_err = false;
95f5c64c 327 int hdr_len;
9f57c67c
PS
328
329#ifdef CONFIG_NET_IPGRE_BROADCAST
330 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
331 /* Looped back packet, drop it! */
332 if (rt_is_output_route(skb_rtable(skb)))
333 goto drop;
334 }
335#endif
336
e582615a 337 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
f132ae7c 338 if (hdr_len < 0)
95f5c64c
TH
339 goto drop;
340
244a797b 341 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
9f57c67c
PS
342 return 0;
343
344 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
345drop:
346 kfree_skb(skb);
347 return 0;
348}
349
c5441932
PS
350static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
351 const struct iphdr *tnl_params,
352 __be16 proto)
353{
354 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 355
c5441932
PS
356 if (tunnel->parms.o_flags & TUNNEL_SEQ)
357 tunnel->o_seqno++;
1da177e4 358
c5441932 359 /* Push GRE header. */
182a352d
TH
360 gre_build_header(skb, tunnel->tun_hlen,
361 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
362 htonl(tunnel->o_seqno));
54bc9bac 363
2e15ea39 364 skb_set_inner_protocol(skb, proto);
bf3d6a8f 365 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
c5441932 366}
1da177e4 367
aed069df 368static int gre_handle_offloads(struct sk_buff *skb, bool csum)
b2acd1dc 369{
6fa79666 370 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
b2acd1dc
PS
371}
372
fc4099f1
PS
373static struct rtable *gre_get_rt(struct sk_buff *skb,
374 struct net_device *dev,
375 struct flowi4 *fl,
376 const struct ip_tunnel_key *key)
377{
378 struct net *net = dev_net(dev);
379
380 memset(fl, 0, sizeof(*fl));
381 fl->daddr = key->u.ipv4.dst;
382 fl->saddr = key->u.ipv4.src;
383 fl->flowi4_tos = RT_TOS(key->tos);
384 fl->flowi4_mark = skb->mark;
385 fl->flowi4_proto = IPPROTO_GRE;
386
387 return ip_route_output_key(net, fl);
388}
389
2090714e
JB
390static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
391 __be16 proto)
2e15ea39
PS
392{
393 struct ip_tunnel_info *tun_info;
2e15ea39 394 const struct ip_tunnel_key *key;
db3c6139 395 struct rtable *rt = NULL;
2e15ea39 396 struct flowi4 fl;
2e15ea39
PS
397 int min_headroom;
398 int tunnel_hlen;
399 __be16 df, flags;
db3c6139 400 bool use_cache;
2e15ea39
PS
401 int err;
402
61adedf3 403 tun_info = skb_tunnel_info(skb);
7f9562a1
JB
404 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
405 ip_tunnel_info_af(tun_info) != AF_INET))
2e15ea39
PS
406 goto err_free_skb;
407
408 key = &tun_info->key;
db3c6139
DB
409 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
410 if (use_cache)
411 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
3c1cb4d2
PA
412 if (!rt) {
413 rt = gre_get_rt(skb, dev, &fl, key);
414 if (IS_ERR(rt))
415 goto err_free_skb;
db3c6139 416 if (use_cache)
3c1cb4d2
PA
417 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
418 fl.saddr);
419 }
2e15ea39 420
95f5c64c 421 tunnel_hlen = gre_calc_hlen(key->tun_flags);
2e15ea39
PS
422
423 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
424 + tunnel_hlen + sizeof(struct iphdr);
425 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
426 int head_delta = SKB_DATA_ALIGN(min_headroom -
427 skb_headroom(skb) +
428 16);
429 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
430 0, GFP_ATOMIC);
431 if (unlikely(err))
432 goto err_free_rt;
433 }
434
435 /* Push Tunnel header. */
aed069df 436 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
2e15ea39 437 goto err_free_rt;
2e15ea39
PS
438
439 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
cba65321 440 gre_build_header(skb, tunnel_hlen, flags, proto,
182a352d 441 tunnel_id_to_key(tun_info->key.tun_id), 0);
2e15ea39
PS
442
443 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
039f5062
PS
444
445 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
446 key->tos, key->ttl, df, false);
2e15ea39
PS
447 return;
448
449err_free_rt:
450 ip_rt_put(rt);
451err_free_skb:
452 kfree_skb(skb);
453 dev->stats.tx_dropped++;
454}
455
fc4099f1
PS
456static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
457{
458 struct ip_tunnel_info *info = skb_tunnel_info(skb);
459 struct rtable *rt;
460 struct flowi4 fl4;
461
462 if (ip_tunnel_info_af(info) != AF_INET)
463 return -EINVAL;
464
465 rt = gre_get_rt(skb, dev, &fl4, &info->key);
466 if (IS_ERR(rt))
467 return PTR_ERR(rt);
468
469 ip_rt_put(rt);
470 info->key.u.ipv4.src = fl4.saddr;
471 return 0;
472}
473
c5441932
PS
474static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
475 struct net_device *dev)
476{
477 struct ip_tunnel *tunnel = netdev_priv(dev);
478 const struct iphdr *tnl_params;
1da177e4 479
2e15ea39 480 if (tunnel->collect_md) {
2090714e 481 gre_fb_xmit(skb, dev, skb->protocol);
2e15ea39
PS
482 return NETDEV_TX_OK;
483 }
484
c5441932
PS
485 if (dev->header_ops) {
486 /* Need space for new headers */
487 if (skb_cow_head(skb, dev->needed_headroom -
2bac7cb3 488 (tunnel->hlen + sizeof(struct iphdr))))
c5441932 489 goto free_skb;
1da177e4 490
c5441932 491 tnl_params = (const struct iphdr *)skb->data;
1da177e4 492
c5441932
PS
493 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
494 * to gre header.
495 */
496 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
8a0033a9 497 skb_reset_mac_header(skb);
c5441932
PS
498 } else {
499 if (skb_cow_head(skb, dev->needed_headroom))
500 goto free_skb;
1da177e4 501
c5441932 502 tnl_params = &tunnel->parms.iph;
1da177e4
LT
503 }
504
aed069df
AD
505 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
506 goto free_skb;
8a0033a9 507
c5441932 508 __gre_xmit(skb, dev, tnl_params, skb->protocol);
6ed10654 509 return NETDEV_TX_OK;
1da177e4 510
c5441932 511free_skb:
3acfa1e7 512 kfree_skb(skb);
c5441932 513 dev->stats.tx_dropped++;
6ed10654 514 return NETDEV_TX_OK;
1da177e4
LT
515}
516
c5441932
PS
517static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
518 struct net_device *dev)
ee34c1eb 519{
c5441932 520 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 521
2e15ea39 522 if (tunnel->collect_md) {
2090714e 523 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
2e15ea39
PS
524 return NETDEV_TX_OK;
525 }
526
aed069df
AD
527 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
528 goto free_skb;
ee34c1eb 529
c5441932
PS
530 if (skb_cow_head(skb, dev->needed_headroom))
531 goto free_skb;
42aa9162 532
c5441932 533 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
c5441932 534 return NETDEV_TX_OK;
ee34c1eb 535
c5441932 536free_skb:
3acfa1e7 537 kfree_skb(skb);
c5441932
PS
538 dev->stats.tx_dropped++;
539 return NETDEV_TX_OK;
ee34c1eb
MS
540}
541
c5441932
PS
542static int ipgre_tunnel_ioctl(struct net_device *dev,
543 struct ifreq *ifr, int cmd)
1da177e4 544{
4565e991 545 int err;
1da177e4 546 struct ip_tunnel_parm p;
1da177e4 547
c5441932
PS
548 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
549 return -EFAULT;
6c734fb8
CW
550 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
551 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
552 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
553 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
554 return -EINVAL;
1da177e4 555 }
c5441932
PS
556 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
557 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1da177e4 558
c5441932
PS
559 err = ip_tunnel_ioctl(dev, &p, cmd);
560 if (err)
561 return err;
1da177e4 562
95f5c64c
TH
563 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
564 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
c5441932
PS
565
566 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
567 return -EFAULT;
1da177e4
LT
568 return 0;
569}
570
1da177e4
LT
571/* Nice toy. Unfortunately, useless in real life :-)
572 It allows to construct virtual multiprotocol broadcast "LAN"
573 over the Internet, provided multicast routing is tuned.
574
575
576 I have no idea was this bicycle invented before me,
577 so that I had to set ARPHRD_IPGRE to a random value.
578 I have an impression, that Cisco could make something similar,
579 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 580
1da177e4
LT
581 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
582 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
583
584 ping -t 255 224.66.66.66
585
586 If nobody answers, mbone does not work.
587
588 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
589 ip addr add 10.66.66.<somewhat>/24 dev Universe
590 ifconfig Universe up
591 ifconfig Universe add fe80::<Your_real_addr>/10
592 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
593 ftp 10.66.66.66
594 ...
595 ftp fec0:6666:6666::193.233.7.65
596 ...
1da177e4 597 */
3b04ddde
SH
598static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
599 unsigned short type,
1507850b 600 const void *daddr, const void *saddr, unsigned int len)
1da177e4 601{
2941a486 602 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
603 struct iphdr *iph;
604 struct gre_base_hdr *greh;
1da177e4 605
c5441932
PS
606 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
607 greh = (struct gre_base_hdr *)(iph+1);
95f5c64c 608 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
c5441932 609 greh->protocol = htons(type);
1da177e4 610
c5441932 611 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 612
c5441932 613 /* Set the source hardware address. */
1da177e4
LT
614 if (saddr)
615 memcpy(&iph->saddr, saddr, 4);
6d55cb91 616 if (daddr)
1da177e4 617 memcpy(&iph->daddr, daddr, 4);
6d55cb91 618 if (iph->daddr)
77a482bd 619 return t->hlen + sizeof(*iph);
e905a9ed 620
c5441932 621 return -(t->hlen + sizeof(*iph));
1da177e4
LT
622}
623
6a5f44d7
TT
624static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
625{
b71d1d42 626 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
627 memcpy(haddr, &iph->saddr, 4);
628 return 4;
629}
630
3b04ddde
SH
631static const struct header_ops ipgre_header_ops = {
632 .create = ipgre_header,
6a5f44d7 633 .parse = ipgre_header_parse,
3b04ddde
SH
634};
635
6a5f44d7 636#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
637static int ipgre_open(struct net_device *dev)
638{
2941a486 639 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 640
f97c1e0c 641 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
642 struct flowi4 fl4;
643 struct rtable *rt;
644
b57708ad 645 rt = ip_route_output_gre(t->net, &fl4,
cbb1e85f
DM
646 t->parms.iph.daddr,
647 t->parms.iph.saddr,
648 t->parms.o_key,
649 RT_TOS(t->parms.iph.tos),
650 t->parms.link);
b23dd4fe 651 if (IS_ERR(rt))
1da177e4 652 return -EADDRNOTAVAIL;
d8d1f30b 653 dev = rt->dst.dev;
1da177e4 654 ip_rt_put(rt);
51456b29 655 if (!__in_dev_get_rtnl(dev))
1da177e4
LT
656 return -EADDRNOTAVAIL;
657 t->mlink = dev->ifindex;
e5ed6399 658 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
659 }
660 return 0;
661}
662
663static int ipgre_close(struct net_device *dev)
664{
2941a486 665 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 666
f97c1e0c 667 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 668 struct in_device *in_dev;
b57708ad 669 in_dev = inetdev_by_index(t->net, t->mlink);
8723e1b4 670 if (in_dev)
1da177e4 671 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
672 }
673 return 0;
674}
1da177e4
LT
675#endif
676
b8c26a33
SH
677static const struct net_device_ops ipgre_netdev_ops = {
678 .ndo_init = ipgre_tunnel_init,
c5441932 679 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
680#ifdef CONFIG_NET_IPGRE_BROADCAST
681 .ndo_open = ipgre_open,
682 .ndo_stop = ipgre_close,
683#endif
c5441932 684 .ndo_start_xmit = ipgre_xmit,
b8c26a33 685 .ndo_do_ioctl = ipgre_tunnel_ioctl,
c5441932
PS
686 .ndo_change_mtu = ip_tunnel_change_mtu,
687 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 688 .ndo_get_iflink = ip_tunnel_get_iflink,
b8c26a33
SH
689};
690
6b78f16e
ED
691#define GRE_FEATURES (NETIF_F_SG | \
692 NETIF_F_FRAGLIST | \
693 NETIF_F_HIGHDMA | \
694 NETIF_F_HW_CSUM)
695
1da177e4
LT
696static void ipgre_tunnel_setup(struct net_device *dev)
697{
b8c26a33 698 dev->netdev_ops = &ipgre_netdev_ops;
5a455275 699 dev->type = ARPHRD_IPGRE;
c5441932
PS
700 ip_tunnel_setup(dev, ipgre_net_id);
701}
1da177e4 702
c5441932
PS
703static void __gre_tunnel_init(struct net_device *dev)
704{
705 struct ip_tunnel *tunnel;
4565e991 706 int t_hlen;
c5441932
PS
707
708 tunnel = netdev_priv(dev);
95f5c64c 709 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
c5441932
PS
710 tunnel->parms.iph.protocol = IPPROTO_GRE;
711
4565e991
TH
712 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
713
714 t_hlen = tunnel->hlen + sizeof(struct iphdr);
715
716 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
717 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
6b78f16e 718
b57708ad 719 dev->features |= GRE_FEATURES;
6b78f16e 720 dev->hw_features |= GRE_FEATURES;
c5441932
PS
721
722 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
a0ca153f
AD
723 /* TCP offload with GRE SEQ is not supported, nor
724 * can we support 2 levels of outer headers requiring
725 * an update.
726 */
727 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
728 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
729 dev->features |= NETIF_F_GSO_SOFTWARE;
730 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
731 }
732
c5441932
PS
733 /* Can use a lockless transmit, unless we generate
734 * output sequences
735 */
736 dev->features |= NETIF_F_LLTX;
737 }
1da177e4
LT
738}
739
740static int ipgre_tunnel_init(struct net_device *dev)
741{
c5441932
PS
742 struct ip_tunnel *tunnel = netdev_priv(dev);
743 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 744
c5441932 745 __gre_tunnel_init(dev);
1da177e4 746
c5441932
PS
747 memcpy(dev->dev_addr, &iph->saddr, 4);
748 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 749
c5441932 750 dev->flags = IFF_NOARP;
02875878 751 netif_keep_dst(dev);
c5441932 752 dev->addr_len = 4;
1da177e4 753
a64b04d8 754 if (iph->daddr && !tunnel->collect_md) {
1da177e4 755#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 756 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
757 if (!iph->saddr)
758 return -EINVAL;
759 dev->flags = IFF_BROADCAST;
3b04ddde 760 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
761 }
762#endif
a64b04d8 763 } else if (!tunnel->collect_md) {
6a5f44d7 764 dev->header_ops = &ipgre_header_ops;
a64b04d8 765 }
1da177e4 766
c5441932 767 return ip_tunnel_init(dev);
1da177e4
LT
768}
769
9f57c67c
PS
770static const struct gre_protocol ipgre_protocol = {
771 .handler = gre_rcv,
772 .err_handler = gre_err,
1da177e4
LT
773};
774
2c8c1e72 775static int __net_init ipgre_init_net(struct net *net)
59a4c759 776{
c5441932 777 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
778}
779
2c8c1e72 780static void __net_exit ipgre_exit_net(struct net *net)
59a4c759 781{
c5441932 782 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
6c742e71 783 ip_tunnel_delete_net(itn, &ipgre_link_ops);
59a4c759
PE
784}
785
786static struct pernet_operations ipgre_net_ops = {
787 .init = ipgre_init_net,
788 .exit = ipgre_exit_net,
cfb8fbf2 789 .id = &ipgre_net_id,
c5441932 790 .size = sizeof(struct ip_tunnel_net),
59a4c759 791};
1da177e4 792
c19e654d
HX
793static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
794{
795 __be16 flags;
796
797 if (!data)
798 return 0;
799
800 flags = 0;
801 if (data[IFLA_GRE_IFLAGS])
802 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
803 if (data[IFLA_GRE_OFLAGS])
804 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
805 if (flags & (GRE_VERSION|GRE_ROUTING))
806 return -EINVAL;
807
946b636f
JB
808 if (data[IFLA_GRE_COLLECT_METADATA] &&
809 data[IFLA_GRE_ENCAP_TYPE] &&
810 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
811 return -EINVAL;
812
c19e654d
HX
813 return 0;
814}
815
e1a80002
HX
816static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
817{
818 __be32 daddr;
819
820 if (tb[IFLA_ADDRESS]) {
821 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
822 return -EINVAL;
823 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
824 return -EADDRNOTAVAIL;
825 }
826
827 if (!data)
828 goto out;
829
830 if (data[IFLA_GRE_REMOTE]) {
831 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
832 if (!daddr)
833 return -EINVAL;
834 }
835
836out:
837 return ipgre_tunnel_validate(tb, data);
838}
839
2e15ea39
PS
840static void ipgre_netlink_parms(struct net_device *dev,
841 struct nlattr *data[],
842 struct nlattr *tb[],
843 struct ip_tunnel_parm *parms)
c19e654d 844{
7bb82d92 845 memset(parms, 0, sizeof(*parms));
c19e654d
HX
846
847 parms->iph.protocol = IPPROTO_GRE;
848
849 if (!data)
850 return;
851
852 if (data[IFLA_GRE_LINK])
853 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
854
855 if (data[IFLA_GRE_IFLAGS])
c5441932 856 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
857
858 if (data[IFLA_GRE_OFLAGS])
c5441932 859 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
860
861 if (data[IFLA_GRE_IKEY])
862 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
863
864 if (data[IFLA_GRE_OKEY])
865 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
866
867 if (data[IFLA_GRE_LOCAL])
67b61f6c 868 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
c19e654d
HX
869
870 if (data[IFLA_GRE_REMOTE])
67b61f6c 871 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
c19e654d
HX
872
873 if (data[IFLA_GRE_TTL])
874 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
875
876 if (data[IFLA_GRE_TOS])
877 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
878
879 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
880 parms->iph.frag_off = htons(IP_DF);
2e15ea39
PS
881
882 if (data[IFLA_GRE_COLLECT_METADATA]) {
883 struct ip_tunnel *t = netdev_priv(dev);
884
885 t->collect_md = true;
e271c7b4
JB
886 if (dev->type == ARPHRD_IPGRE)
887 dev->type = ARPHRD_NONE;
2e15ea39 888 }
c19e654d
HX
889}
890
4565e991
TH
891/* This function returns true when ENCAP attributes are present in the nl msg */
892static bool ipgre_netlink_encap_parms(struct nlattr *data[],
893 struct ip_tunnel_encap *ipencap)
894{
895 bool ret = false;
896
897 memset(ipencap, 0, sizeof(*ipencap));
898
899 if (!data)
900 return ret;
901
902 if (data[IFLA_GRE_ENCAP_TYPE]) {
903 ret = true;
904 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
905 }
906
907 if (data[IFLA_GRE_ENCAP_FLAGS]) {
908 ret = true;
909 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
910 }
911
912 if (data[IFLA_GRE_ENCAP_SPORT]) {
913 ret = true;
3e97fa70 914 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
4565e991
TH
915 }
916
917 if (data[IFLA_GRE_ENCAP_DPORT]) {
918 ret = true;
3e97fa70 919 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
4565e991
TH
920 }
921
922 return ret;
923}
924
c5441932 925static int gre_tap_init(struct net_device *dev)
e1a80002 926{
c5441932 927 __gre_tunnel_init(dev);
bec94d43 928 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
e1a80002 929
c5441932 930 return ip_tunnel_init(dev);
e1a80002
HX
931}
932
c5441932
PS
933static const struct net_device_ops gre_tap_netdev_ops = {
934 .ndo_init = gre_tap_init,
935 .ndo_uninit = ip_tunnel_uninit,
936 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
937 .ndo_set_mac_address = eth_mac_addr,
938 .ndo_validate_addr = eth_validate_addr,
c5441932
PS
939 .ndo_change_mtu = ip_tunnel_change_mtu,
940 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 941 .ndo_get_iflink = ip_tunnel_get_iflink,
fc4099f1 942 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
b8c26a33
SH
943};
944
e1a80002
HX
945static void ipgre_tap_setup(struct net_device *dev)
946{
e1a80002 947 ether_setup(dev);
d13b161c
JB
948 dev->netdev_ops = &gre_tap_netdev_ops;
949 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
950 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c5441932 951 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
952}
953
c5441932
PS
954static int ipgre_newlink(struct net *src_net, struct net_device *dev,
955 struct nlattr *tb[], struct nlattr *data[])
c19e654d 956{
c5441932 957 struct ip_tunnel_parm p;
4565e991
TH
958 struct ip_tunnel_encap ipencap;
959
960 if (ipgre_netlink_encap_parms(data, &ipencap)) {
961 struct ip_tunnel *t = netdev_priv(dev);
962 int err = ip_tunnel_encap_setup(t, &ipencap);
963
964 if (err < 0)
965 return err;
966 }
c19e654d 967
2e15ea39 968 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 969 return ip_tunnel_newlink(dev, tb, &p);
c19e654d
HX
970}
971
972static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
973 struct nlattr *data[])
974{
c19e654d 975 struct ip_tunnel_parm p;
4565e991
TH
976 struct ip_tunnel_encap ipencap;
977
978 if (ipgre_netlink_encap_parms(data, &ipencap)) {
979 struct ip_tunnel *t = netdev_priv(dev);
980 int err = ip_tunnel_encap_setup(t, &ipencap);
981
982 if (err < 0)
983 return err;
984 }
c19e654d 985
2e15ea39 986 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 987 return ip_tunnel_changelink(dev, tb, &p);
c19e654d
HX
988}
989
990static size_t ipgre_get_size(const struct net_device *dev)
991{
992 return
993 /* IFLA_GRE_LINK */
994 nla_total_size(4) +
995 /* IFLA_GRE_IFLAGS */
996 nla_total_size(2) +
997 /* IFLA_GRE_OFLAGS */
998 nla_total_size(2) +
999 /* IFLA_GRE_IKEY */
1000 nla_total_size(4) +
1001 /* IFLA_GRE_OKEY */
1002 nla_total_size(4) +
1003 /* IFLA_GRE_LOCAL */
1004 nla_total_size(4) +
1005 /* IFLA_GRE_REMOTE */
1006 nla_total_size(4) +
1007 /* IFLA_GRE_TTL */
1008 nla_total_size(1) +
1009 /* IFLA_GRE_TOS */
1010 nla_total_size(1) +
1011 /* IFLA_GRE_PMTUDISC */
1012 nla_total_size(1) +
4565e991
TH
1013 /* IFLA_GRE_ENCAP_TYPE */
1014 nla_total_size(2) +
1015 /* IFLA_GRE_ENCAP_FLAGS */
1016 nla_total_size(2) +
1017 /* IFLA_GRE_ENCAP_SPORT */
1018 nla_total_size(2) +
1019 /* IFLA_GRE_ENCAP_DPORT */
1020 nla_total_size(2) +
2e15ea39
PS
1021 /* IFLA_GRE_COLLECT_METADATA */
1022 nla_total_size(0) +
c19e654d
HX
1023 0;
1024}
1025
1026static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1027{
1028 struct ip_tunnel *t = netdev_priv(dev);
1029 struct ip_tunnel_parm *p = &t->parms;
1030
f3756b79 1031 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
95f5c64c
TH
1032 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1033 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1034 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1035 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
f3756b79
DM
1036 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1037 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
930345ea
JB
1038 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1039 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
f3756b79
DM
1040 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1041 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1042 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1043 !!(p->iph.frag_off & htons(IP_DF))))
1044 goto nla_put_failure;
4565e991
TH
1045
1046 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1047 t->encap.type) ||
3e97fa70
SD
1048 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1049 t->encap.sport) ||
1050 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1051 t->encap.dport) ||
4565e991 1052 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
e1b2cb65 1053 t->encap.flags))
4565e991
TH
1054 goto nla_put_failure;
1055
2e15ea39
PS
1056 if (t->collect_md) {
1057 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1058 goto nla_put_failure;
1059 }
1060
c19e654d
HX
1061 return 0;
1062
1063nla_put_failure:
1064 return -EMSGSIZE;
1065}
1066
1067static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1068 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1069 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1070 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1071 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1072 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1073 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1074 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1075 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1076 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1077 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
4565e991
TH
1078 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1079 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1080 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1081 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2e15ea39 1082 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
c19e654d
HX
1083};
1084
1085static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1086 .kind = "gre",
1087 .maxtype = IFLA_GRE_MAX,
1088 .policy = ipgre_policy,
1089 .priv_size = sizeof(struct ip_tunnel),
1090 .setup = ipgre_tunnel_setup,
1091 .validate = ipgre_tunnel_validate,
1092 .newlink = ipgre_newlink,
1093 .changelink = ipgre_changelink,
c5441932 1094 .dellink = ip_tunnel_dellink,
c19e654d
HX
1095 .get_size = ipgre_get_size,
1096 .fill_info = ipgre_fill_info,
1728d4fa 1097 .get_link_net = ip_tunnel_get_link_net,
c19e654d
HX
1098};
1099
e1a80002
HX
1100static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1101 .kind = "gretap",
1102 .maxtype = IFLA_GRE_MAX,
1103 .policy = ipgre_policy,
1104 .priv_size = sizeof(struct ip_tunnel),
1105 .setup = ipgre_tap_setup,
1106 .validate = ipgre_tap_validate,
1107 .newlink = ipgre_newlink,
1108 .changelink = ipgre_changelink,
c5441932 1109 .dellink = ip_tunnel_dellink,
e1a80002
HX
1110 .get_size = ipgre_get_size,
1111 .fill_info = ipgre_fill_info,
1728d4fa 1112 .get_link_net = ip_tunnel_get_link_net,
e1a80002
HX
1113};
1114
b2acd1dc
PS
1115struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1116 u8 name_assign_type)
1117{
1118 struct nlattr *tb[IFLA_MAX + 1];
1119 struct net_device *dev;
106da663 1120 LIST_HEAD(list_kill);
b2acd1dc
PS
1121 struct ip_tunnel *t;
1122 int err;
1123
1124 memset(&tb, 0, sizeof(tb));
1125
1126 dev = rtnl_create_link(net, name, name_assign_type,
1127 &ipgre_tap_ops, tb);
1128 if (IS_ERR(dev))
1129 return dev;
1130
1131 /* Configure flow based GRE device. */
1132 t = netdev_priv(dev);
1133 t->collect_md = true;
1134
1135 err = ipgre_newlink(net, dev, tb, NULL);
106da663
ND
1136 if (err < 0) {
1137 free_netdev(dev);
1138 return ERR_PTR(err);
1139 }
7e059158
DW
1140
1141 /* openvswitch users expect packet sizes to be unrestricted,
1142 * so set the largest MTU we can.
1143 */
1144 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1145 if (err)
1146 goto out;
1147
da6f1da8
ND
1148 err = rtnl_configure_link(dev, NULL);
1149 if (err < 0)
1150 goto out;
1151
b2acd1dc
PS
1152 return dev;
1153out:
106da663
ND
1154 ip_tunnel_dellink(dev, &list_kill);
1155 unregister_netdevice_many(&list_kill);
b2acd1dc
PS
1156 return ERR_PTR(err);
1157}
1158EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1159
c5441932
PS
1160static int __net_init ipgre_tap_init_net(struct net *net)
1161{
2e15ea39 1162 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
c5441932
PS
1163}
1164
1165static void __net_exit ipgre_tap_exit_net(struct net *net)
1166{
1167 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
6c742e71 1168 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
c5441932
PS
1169}
1170
1171static struct pernet_operations ipgre_tap_net_ops = {
1172 .init = ipgre_tap_init_net,
1173 .exit = ipgre_tap_exit_net,
1174 .id = &gre_tap_net_id,
1175 .size = sizeof(struct ip_tunnel_net),
1176};
1da177e4
LT
1177
1178static int __init ipgre_init(void)
1179{
1180 int err;
1181
058bd4d2 1182 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1183
cfb8fbf2 1184 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1185 if (err < 0)
c2892f02
AD
1186 return err;
1187
c5441932
PS
1188 err = register_pernet_device(&ipgre_tap_net_ops);
1189 if (err < 0)
1190 goto pnet_tap_faied;
1191
9f57c67c 1192 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1193 if (err < 0) {
058bd4d2 1194 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1195 goto add_proto_failed;
1196 }
7daa0004 1197
c19e654d
HX
1198 err = rtnl_link_register(&ipgre_link_ops);
1199 if (err < 0)
1200 goto rtnl_link_failed;
1201
e1a80002
HX
1202 err = rtnl_link_register(&ipgre_tap_ops);
1203 if (err < 0)
1204 goto tap_ops_failed;
1205
c5441932 1206 return 0;
c19e654d 1207
e1a80002
HX
1208tap_ops_failed:
1209 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1210rtnl_link_failed:
9f57c67c 1211 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1212add_proto_failed:
c5441932
PS
1213 unregister_pernet_device(&ipgre_tap_net_ops);
1214pnet_tap_faied:
c2892f02 1215 unregister_pernet_device(&ipgre_net_ops);
c5441932 1216 return err;
1da177e4
LT
1217}
1218
db44575f 1219static void __exit ipgre_fini(void)
1da177e4 1220{
e1a80002 1221 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1222 rtnl_link_unregister(&ipgre_link_ops);
9f57c67c 1223 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c5441932 1224 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1225 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1226}
1227
1228module_init(ipgre_init);
1229module_exit(ipgre_fini);
1230MODULE_LICENSE("GPL");
4d74f8ba
PM
1231MODULE_ALIAS_RTNL_LINK("gre");
1232MODULE_ALIAS_RTNL_LINK("gretap");
8909c9ad 1233MODULE_ALIAS_NETDEV("gre0");
c5441932 1234MODULE_ALIAS_NETDEV("gretap0");