Merge tag 'iommu-updates-v4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
[linux-2.6-block.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
1da177e4
LT
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
2e15ea39 27#include <linux/if_vlan.h>
1da177e4
LT
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
c5441932 40#include <net/ip_tunnels.h>
1da177e4
LT
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
2e15ea39 50#include <net/dst_metadata.h>
1da177e4 51
dfd56b8b 52#if IS_ENABLED(CONFIG_IPV6)
1da177e4
LT
53#include <net/ipv6.h>
54#include <net/ip6_fib.h>
55#include <net/ip6_route.h>
56#endif
57
58/*
59 Problems & solutions
60 --------------------
61
62 1. The most important issue is detecting local dead loops.
63 They would cause complete host lockup in transmit, which
64 would be "resolved" by stack overflow or, if queueing is enabled,
65 with infinite looping in net_bh.
66
67 We cannot track such dead loops during route installation,
68 it is infeasible task. The most general solutions would be
69 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 70 and silently drop packet when it expires. It is a good
bff52857 71 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
72 skb, even if no tunneling is used.
73
6d0722a2
ED
74 Current solution: xmit_recursion breaks dead loops. This is a percpu
75 counter, since when we enter the first ndo_xmit(), cpu migration is
76 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
77
78 2. Networking dead loops would not kill routers, but would really
79 kill network. IP hop limit plays role of "t->recursion" in this case,
80 if we copy it from packet being encapsulated to upper header.
81 It is very good solution, but it introduces two problems:
82
83 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
84 do not work over tunnels.
85 - traceroute does not work. I planned to relay ICMP from tunnel,
86 so that this problem would be solved and traceroute output
87 would even more informative. This idea appeared to be wrong:
88 only Linux complies to rfc1812 now (yes, guys, Linux is the only
89 true router now :-)), all routers (at least, in neighbourhood of mine)
90 return only 8 bytes of payload. It is the end.
91
92 Hence, if we want that OSPF worked or traceroute said something reasonable,
93 we should search for another solution.
94
95 One of them is to parse packet trying to detect inner encapsulation
96 made by our node. It is difficult or even impossible, especially,
bff52857 97 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
98
99 Current solution: The solution was UNEXPECTEDLY SIMPLE.
100 We force DF flag on tunnels with preconfigured hop limit,
101 that is ALL. :-) Well, it does not remove the problem completely,
102 but exponential growth of network traffic is changed to linear
103 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 104 rapidly degrades to value <68, where looping stops.
1da177e4
LT
105 Yes, it is not good if there exists a router in the loop,
106 which does not force DF, even when encapsulating packets have DF set.
107 But it is not our problem! Nobody could accuse us, we made
108 all that we could make. Even if it is your gated who injected
109 fatal route to network, even if it were you who configured
110 fatal static route: you are innocent. :-)
111
1da177e4
LT
112 Alexey Kuznetsov.
113 */
114
eccc1bb8 115static bool log_ecn_error = true;
116module_param(log_ecn_error, bool, 0644);
117MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118
c19e654d 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4 120static int ipgre_tunnel_init(struct net_device *dev);
eb8ce741 121
f99189b1 122static int ipgre_net_id __read_mostly;
c5441932 123static int gre_tap_net_id __read_mostly;
1da177e4 124
9f57c67c
PS
125static int ip_gre_calc_hlen(__be16 o_flags)
126{
127 int addend = 4;
128
129 if (o_flags & TUNNEL_CSUM)
130 addend += 4;
131 if (o_flags & TUNNEL_KEY)
132 addend += 4;
133 if (o_flags & TUNNEL_SEQ)
134 addend += 4;
135 return addend;
136}
137
138static __be16 gre_flags_to_tnl_flags(__be16 flags)
139{
140 __be16 tflags = 0;
141
142 if (flags & GRE_CSUM)
143 tflags |= TUNNEL_CSUM;
144 if (flags & GRE_ROUTING)
145 tflags |= TUNNEL_ROUTING;
146 if (flags & GRE_KEY)
147 tflags |= TUNNEL_KEY;
148 if (flags & GRE_SEQ)
149 tflags |= TUNNEL_SEQ;
150 if (flags & GRE_STRICT)
151 tflags |= TUNNEL_STRICT;
152 if (flags & GRE_REC)
153 tflags |= TUNNEL_REC;
154 if (flags & GRE_VERSION)
155 tflags |= TUNNEL_VERSION;
156
157 return tflags;
158}
159
160static __be16 tnl_flags_to_gre_flags(__be16 tflags)
161{
162 __be16 flags = 0;
163
164 if (tflags & TUNNEL_CSUM)
165 flags |= GRE_CSUM;
166 if (tflags & TUNNEL_ROUTING)
167 flags |= GRE_ROUTING;
168 if (tflags & TUNNEL_KEY)
169 flags |= GRE_KEY;
170 if (tflags & TUNNEL_SEQ)
171 flags |= GRE_SEQ;
172 if (tflags & TUNNEL_STRICT)
173 flags |= GRE_STRICT;
174 if (tflags & TUNNEL_REC)
175 flags |= GRE_REC;
176 if (tflags & TUNNEL_VERSION)
177 flags |= GRE_VERSION;
178
179 return flags;
180}
181
182static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
183 bool *csum_err)
184{
185 const struct gre_base_hdr *greh;
186 __be32 *options;
187 int hdr_len;
188
189 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
190 return -EINVAL;
191
192 greh = (struct gre_base_hdr *)skb_transport_header(skb);
193 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
194 return -EINVAL;
195
196 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
197 hdr_len = ip_gre_calc_hlen(tpi->flags);
198
199 if (!pskb_may_pull(skb, hdr_len))
200 return -EINVAL;
201
202 greh = (struct gre_base_hdr *)skb_transport_header(skb);
203 tpi->proto = greh->protocol;
204
205 options = (__be32 *)(greh + 1);
206 if (greh->flags & GRE_CSUM) {
207 if (skb_checksum_simple_validate(skb)) {
208 *csum_err = true;
209 return -EINVAL;
210 }
211
212 skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
213 null_compute_pseudo);
214 options++;
215 }
216
217 if (greh->flags & GRE_KEY) {
218 tpi->key = *options;
219 options++;
220 } else {
221 tpi->key = 0;
222 }
223 if (unlikely(greh->flags & GRE_SEQ)) {
224 tpi->seq = *options;
225 options++;
226 } else {
227 tpi->seq = 0;
228 }
229 /* WCCP version 1 and 2 protocol decoding.
230 * - Change protocol to IP
231 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
232 */
233 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
234 tpi->proto = htons(ETH_P_IP);
235 if ((*(u8 *)options & 0xF0) != 0x40) {
236 hdr_len += 4;
237 if (!pskb_may_pull(skb, hdr_len))
238 return -EINVAL;
239 }
240 }
7f290c94 241 return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
9f57c67c
PS
242}
243
244static void ipgre_err(struct sk_buff *skb, u32 info,
245 const struct tnl_ptk_info *tpi)
1da177e4 246{
1da177e4 247
c5441932
PS
248 /* All the routers (except for Linux) return only
249 8 bytes of packet payload. It means, that precise relaying of
250 ICMP in the real Internet is absolutely infeasible.
1da177e4 251
c5441932
PS
252 Moreover, Cisco "wise men" put GRE key to the third word
253 in GRE header. It makes impossible maintaining even soft
254 state for keyed GRE tunnels with enabled checksum. Tell
255 them "thank you".
1da177e4 256
c5441932
PS
257 Well, I wonder, rfc1812 was written by Cisco employee,
258 what the hell these idiots break standards established
259 by themselves???
260 */
261 struct net *net = dev_net(skb->dev);
262 struct ip_tunnel_net *itn;
96f5a846 263 const struct iphdr *iph;
88c7664f
ACM
264 const int type = icmp_hdr(skb)->type;
265 const int code = icmp_hdr(skb)->code;
1da177e4 266 struct ip_tunnel *t;
1da177e4 267
1da177e4
LT
268 switch (type) {
269 default:
270 case ICMP_PARAMETERPROB:
9f57c67c 271 return;
1da177e4
LT
272
273 case ICMP_DEST_UNREACH:
274 switch (code) {
275 case ICMP_SR_FAILED:
276 case ICMP_PORT_UNREACH:
277 /* Impossible event. */
9f57c67c 278 return;
1da177e4
LT
279 default:
280 /* All others are translated to HOST_UNREACH.
281 rfc2003 contains "deep thoughts" about NET_UNREACH,
282 I believe they are just ether pollution. --ANK
283 */
284 break;
285 }
286 break;
9f57c67c 287
1da177e4
LT
288 case ICMP_TIME_EXCEEDED:
289 if (code != ICMP_EXC_TTL)
9f57c67c 290 return;
1da177e4 291 break;
55be7a9c
DM
292
293 case ICMP_REDIRECT:
294 break;
1da177e4
LT
295 }
296
bda7bb46 297 if (tpi->proto == htons(ETH_P_TEB))
c5441932
PS
298 itn = net_generic(net, gre_tap_net_id);
299 else
300 itn = net_generic(net, ipgre_net_id);
301
c0c0c50f 302 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
bda7bb46
PS
303 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
304 iph->daddr, iph->saddr, tpi->key);
d2083287 305
51456b29 306 if (!t)
9f57c67c 307 return;
36393395 308
36393395 309 if (t->parms.iph.daddr == 0 ||
f97c1e0c 310 ipv4_is_multicast(t->parms.iph.daddr))
9f57c67c 311 return;
1da177e4
LT
312
313 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
9f57c67c 314 return;
1da177e4 315
da6185d8 316 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
317 t->err_count++;
318 else
319 t->err_count = 1;
320 t->err_time = jiffies;
9f57c67c
PS
321}
322
323static void gre_err(struct sk_buff *skb, u32 info)
324{
325 /* All the routers (except for Linux) return only
326 * 8 bytes of packet payload. It means, that precise relaying of
327 * ICMP in the real Internet is absolutely infeasible.
328 *
329 * Moreover, Cisco "wise men" put GRE key to the third word
330 * in GRE header. It makes impossible maintaining even soft
331 * state for keyed
332 * GRE tunnels with enabled checksum. Tell them "thank you".
333 *
334 * Well, I wonder, rfc1812 was written by Cisco employee,
335 * what the hell these idiots break standards established
336 * by themselves???
337 */
338
339 const int type = icmp_hdr(skb)->type;
340 const int code = icmp_hdr(skb)->code;
341 struct tnl_ptk_info tpi;
342 bool csum_err = false;
343
344 if (parse_gre_header(skb, &tpi, &csum_err)) {
345 if (!csum_err) /* ignore csum errors. */
346 return;
347 }
348
349 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
350 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
351 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
352 return;
353 }
354 if (type == ICMP_REDIRECT) {
355 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
356 IPPROTO_GRE, 0);
357 return;
358 }
359
360 ipgre_err(skb, info, &tpi);
1da177e4
LT
361}
362
2e15ea39
PS
363static __be64 key_to_tunnel_id(__be32 key)
364{
365#ifdef __BIG_ENDIAN
366 return (__force __be64)((__force u32)key);
367#else
368 return (__force __be64)((__force u64)key << 32);
369#endif
370}
371
372/* Returns the least-significant 32 bits of a __be64. */
373static __be32 tunnel_id_to_key(__be64 x)
374{
375#ifdef __BIG_ENDIAN
376 return (__force __be32)x;
377#else
378 return (__force __be32)((__force u64)x >> 32);
379#endif
380}
381
bda7bb46 382static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
1da177e4 383{
c5441932 384 struct net *net = dev_net(skb->dev);
2e15ea39 385 struct metadata_dst *tun_dst = NULL;
c5441932 386 struct ip_tunnel_net *itn;
b71d1d42 387 const struct iphdr *iph;
1da177e4 388 struct ip_tunnel *tunnel;
1da177e4 389
bda7bb46 390 if (tpi->proto == htons(ETH_P_TEB))
c5441932
PS
391 itn = net_generic(net, gre_tap_net_id);
392 else
393 itn = net_generic(net, ipgre_net_id);
1da177e4 394
c5441932 395 iph = ip_hdr(skb);
bda7bb46
PS
396 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
397 iph->saddr, iph->daddr, tpi->key);
e1a80002 398
d2083287 399 if (tunnel) {
0e3da5bb 400 skb_pop_mac_header(skb);
2e15ea39 401 if (tunnel->collect_md) {
c29a70d2
PS
402 __be16 flags;
403 __be64 tun_id;
2e15ea39 404
c29a70d2
PS
405 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
406 tun_id = key_to_tunnel_id(tpi->key);
407 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
2e15ea39
PS
408 if (!tun_dst)
409 return PACKET_REJECT;
2e15ea39
PS
410 }
411
412 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
bda7bb46 413 return PACKET_RCVD;
1da177e4 414 }
bda7bb46 415 return PACKET_REJECT;
1da177e4
LT
416}
417
9f57c67c
PS
418static int gre_rcv(struct sk_buff *skb)
419{
420 struct tnl_ptk_info tpi;
421 bool csum_err = false;
422
423#ifdef CONFIG_NET_IPGRE_BROADCAST
424 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
425 /* Looped back packet, drop it! */
426 if (rt_is_output_route(skb_rtable(skb)))
427 goto drop;
428 }
429#endif
430
431 if (parse_gre_header(skb, &tpi, &csum_err) < 0)
432 goto drop;
433
434 if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
435 return 0;
436
437 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
438drop:
439 kfree_skb(skb);
440 return 0;
441}
442
53936107
EC
443static __sum16 gre_checksum(struct sk_buff *skb)
444{
445 __wsum csum;
446
447 if (skb->ip_summed == CHECKSUM_PARTIAL)
448 csum = lco_csum(skb);
449 else
450 csum = skb_checksum(skb, 0, skb->len, 0);
451 return csum_fold(csum);
452}
453
2e15ea39
PS
454static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
455 __be16 proto, __be32 key, __be32 seq)
456{
457 struct gre_base_hdr *greh;
458
459 skb_push(skb, hdr_len);
460
461 skb_reset_transport_header(skb);
462 greh = (struct gre_base_hdr *)skb->data;
463 greh->flags = tnl_flags_to_gre_flags(flags);
464 greh->protocol = proto;
465
466 if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
467 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
468
469 if (flags & TUNNEL_SEQ) {
470 *ptr = seq;
471 ptr--;
472 }
473 if (flags & TUNNEL_KEY) {
474 *ptr = key;
475 ptr--;
476 }
477 if (flags & TUNNEL_CSUM &&
478 !(skb_shinfo(skb)->gso_type &
479 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
480 *ptr = 0;
53936107 481 *(__sum16 *)ptr = gre_checksum(skb);
2e15ea39
PS
482 }
483 }
484}
485
c5441932
PS
486static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
487 const struct iphdr *tnl_params,
488 __be16 proto)
489{
490 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 491
c5441932
PS
492 if (tunnel->parms.o_flags & TUNNEL_SEQ)
493 tunnel->o_seqno++;
1da177e4 494
c5441932 495 /* Push GRE header. */
2e15ea39
PS
496 build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
497 proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
54bc9bac 498
2e15ea39 499 skb_set_inner_protocol(skb, proto);
bf3d6a8f 500 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
c5441932 501}
1da177e4 502
b2acd1dc
PS
503static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
504 bool csum)
505{
6fa79666 506 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
b2acd1dc
PS
507}
508
fc4099f1
PS
509static struct rtable *gre_get_rt(struct sk_buff *skb,
510 struct net_device *dev,
511 struct flowi4 *fl,
512 const struct ip_tunnel_key *key)
513{
514 struct net *net = dev_net(dev);
515
516 memset(fl, 0, sizeof(*fl));
517 fl->daddr = key->u.ipv4.dst;
518 fl->saddr = key->u.ipv4.src;
519 fl->flowi4_tos = RT_TOS(key->tos);
520 fl->flowi4_mark = skb->mark;
521 fl->flowi4_proto = IPPROTO_GRE;
522
523 return ip_route_output_key(net, fl);
524}
525
2e15ea39
PS
526static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
527{
528 struct ip_tunnel_info *tun_info;
2e15ea39 529 const struct ip_tunnel_key *key;
db3c6139 530 struct rtable *rt = NULL;
2e15ea39 531 struct flowi4 fl;
2e15ea39
PS
532 int min_headroom;
533 int tunnel_hlen;
534 __be16 df, flags;
db3c6139 535 bool use_cache;
2e15ea39
PS
536 int err;
537
61adedf3 538 tun_info = skb_tunnel_info(skb);
7f9562a1
JB
539 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
540 ip_tunnel_info_af(tun_info) != AF_INET))
2e15ea39
PS
541 goto err_free_skb;
542
543 key = &tun_info->key;
db3c6139
DB
544 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
545 if (use_cache)
546 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
3c1cb4d2
PA
547 if (!rt) {
548 rt = gre_get_rt(skb, dev, &fl, key);
549 if (IS_ERR(rt))
550 goto err_free_skb;
db3c6139 551 if (use_cache)
3c1cb4d2
PA
552 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
553 fl.saddr);
554 }
2e15ea39
PS
555
556 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
557
558 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
559 + tunnel_hlen + sizeof(struct iphdr);
560 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
561 int head_delta = SKB_DATA_ALIGN(min_headroom -
562 skb_headroom(skb) +
563 16);
564 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
565 0, GFP_ATOMIC);
566 if (unlikely(err))
567 goto err_free_rt;
568 }
569
570 /* Push Tunnel header. */
571 skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
572 if (IS_ERR(skb)) {
573 skb = NULL;
574 goto err_free_rt;
575 }
576
577 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
578 build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
579 tunnel_id_to_key(tun_info->key.tun_id), 0);
580
581 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
039f5062
PS
582
583 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
584 key->tos, key->ttl, df, false);
2e15ea39
PS
585 return;
586
587err_free_rt:
588 ip_rt_put(rt);
589err_free_skb:
590 kfree_skb(skb);
591 dev->stats.tx_dropped++;
592}
593
fc4099f1
PS
594static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
595{
596 struct ip_tunnel_info *info = skb_tunnel_info(skb);
597 struct rtable *rt;
598 struct flowi4 fl4;
599
600 if (ip_tunnel_info_af(info) != AF_INET)
601 return -EINVAL;
602
603 rt = gre_get_rt(skb, dev, &fl4, &info->key);
604 if (IS_ERR(rt))
605 return PTR_ERR(rt);
606
607 ip_rt_put(rt);
608 info->key.u.ipv4.src = fl4.saddr;
609 return 0;
610}
611
c5441932
PS
612static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
613 struct net_device *dev)
614{
615 struct ip_tunnel *tunnel = netdev_priv(dev);
616 const struct iphdr *tnl_params;
1da177e4 617
2e15ea39
PS
618 if (tunnel->collect_md) {
619 gre_fb_xmit(skb, dev);
620 return NETDEV_TX_OK;
621 }
622
c5441932
PS
623 if (dev->header_ops) {
624 /* Need space for new headers */
625 if (skb_cow_head(skb, dev->needed_headroom -
2bac7cb3 626 (tunnel->hlen + sizeof(struct iphdr))))
c5441932 627 goto free_skb;
1da177e4 628
c5441932 629 tnl_params = (const struct iphdr *)skb->data;
1da177e4 630
c5441932
PS
631 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
632 * to gre header.
633 */
634 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
8a0033a9 635 skb_reset_mac_header(skb);
c5441932
PS
636 } else {
637 if (skb_cow_head(skb, dev->needed_headroom))
638 goto free_skb;
1da177e4 639
c5441932 640 tnl_params = &tunnel->parms.iph;
1da177e4
LT
641 }
642
8a0033a9
TT
643 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
644 if (IS_ERR(skb))
645 goto out;
646
c5441932 647 __gre_xmit(skb, dev, tnl_params, skb->protocol);
6ed10654 648 return NETDEV_TX_OK;
1da177e4 649
c5441932 650free_skb:
3acfa1e7 651 kfree_skb(skb);
c5441932
PS
652out:
653 dev->stats.tx_dropped++;
6ed10654 654 return NETDEV_TX_OK;
1da177e4
LT
655}
656
c5441932
PS
657static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
658 struct net_device *dev)
ee34c1eb 659{
c5441932 660 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 661
2e15ea39
PS
662 if (tunnel->collect_md) {
663 gre_fb_xmit(skb, dev);
664 return NETDEV_TX_OK;
665 }
666
45f2e997 667 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
c5441932
PS
668 if (IS_ERR(skb))
669 goto out;
ee34c1eb 670
c5441932
PS
671 if (skb_cow_head(skb, dev->needed_headroom))
672 goto free_skb;
42aa9162 673
c5441932 674 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
c5441932 675 return NETDEV_TX_OK;
ee34c1eb 676
c5441932 677free_skb:
3acfa1e7 678 kfree_skb(skb);
c5441932
PS
679out:
680 dev->stats.tx_dropped++;
681 return NETDEV_TX_OK;
ee34c1eb
MS
682}
683
c5441932
PS
684static int ipgre_tunnel_ioctl(struct net_device *dev,
685 struct ifreq *ifr, int cmd)
1da177e4 686{
4565e991 687 int err;
1da177e4 688 struct ip_tunnel_parm p;
1da177e4 689
c5441932
PS
690 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
691 return -EFAULT;
6c734fb8
CW
692 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
693 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
694 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
695 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
696 return -EINVAL;
1da177e4 697 }
c5441932
PS
698 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
699 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1da177e4 700
c5441932
PS
701 err = ip_tunnel_ioctl(dev, &p, cmd);
702 if (err)
703 return err;
1da177e4 704
c5441932
PS
705 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
706 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
707
708 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
709 return -EFAULT;
1da177e4
LT
710 return 0;
711}
712
1da177e4
LT
713/* Nice toy. Unfortunately, useless in real life :-)
714 It allows to construct virtual multiprotocol broadcast "LAN"
715 over the Internet, provided multicast routing is tuned.
716
717
718 I have no idea was this bicycle invented before me,
719 so that I had to set ARPHRD_IPGRE to a random value.
720 I have an impression, that Cisco could make something similar,
721 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 722
1da177e4
LT
723 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
724 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
725
726 ping -t 255 224.66.66.66
727
728 If nobody answers, mbone does not work.
729
730 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
731 ip addr add 10.66.66.<somewhat>/24 dev Universe
732 ifconfig Universe up
733 ifconfig Universe add fe80::<Your_real_addr>/10
734 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
735 ftp 10.66.66.66
736 ...
737 ftp fec0:6666:6666::193.233.7.65
738 ...
1da177e4 739 */
3b04ddde
SH
740static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
741 unsigned short type,
1507850b 742 const void *daddr, const void *saddr, unsigned int len)
1da177e4 743{
2941a486 744 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
745 struct iphdr *iph;
746 struct gre_base_hdr *greh;
1da177e4 747
c5441932
PS
748 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
749 greh = (struct gre_base_hdr *)(iph+1);
750 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
751 greh->protocol = htons(type);
1da177e4 752
c5441932 753 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 754
c5441932 755 /* Set the source hardware address. */
1da177e4
LT
756 if (saddr)
757 memcpy(&iph->saddr, saddr, 4);
6d55cb91 758 if (daddr)
1da177e4 759 memcpy(&iph->daddr, daddr, 4);
6d55cb91 760 if (iph->daddr)
77a482bd 761 return t->hlen + sizeof(*iph);
e905a9ed 762
c5441932 763 return -(t->hlen + sizeof(*iph));
1da177e4
LT
764}
765
6a5f44d7
TT
766static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
767{
b71d1d42 768 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
769 memcpy(haddr, &iph->saddr, 4);
770 return 4;
771}
772
3b04ddde
SH
773static const struct header_ops ipgre_header_ops = {
774 .create = ipgre_header,
6a5f44d7 775 .parse = ipgre_header_parse,
3b04ddde
SH
776};
777
6a5f44d7 778#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
779static int ipgre_open(struct net_device *dev)
780{
2941a486 781 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 782
f97c1e0c 783 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
784 struct flowi4 fl4;
785 struct rtable *rt;
786
b57708ad 787 rt = ip_route_output_gre(t->net, &fl4,
cbb1e85f
DM
788 t->parms.iph.daddr,
789 t->parms.iph.saddr,
790 t->parms.o_key,
791 RT_TOS(t->parms.iph.tos),
792 t->parms.link);
b23dd4fe 793 if (IS_ERR(rt))
1da177e4 794 return -EADDRNOTAVAIL;
d8d1f30b 795 dev = rt->dst.dev;
1da177e4 796 ip_rt_put(rt);
51456b29 797 if (!__in_dev_get_rtnl(dev))
1da177e4
LT
798 return -EADDRNOTAVAIL;
799 t->mlink = dev->ifindex;
e5ed6399 800 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
801 }
802 return 0;
803}
804
805static int ipgre_close(struct net_device *dev)
806{
2941a486 807 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 808
f97c1e0c 809 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 810 struct in_device *in_dev;
b57708ad 811 in_dev = inetdev_by_index(t->net, t->mlink);
8723e1b4 812 if (in_dev)
1da177e4 813 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
814 }
815 return 0;
816}
1da177e4
LT
817#endif
818
b8c26a33
SH
819static const struct net_device_ops ipgre_netdev_ops = {
820 .ndo_init = ipgre_tunnel_init,
c5441932 821 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
822#ifdef CONFIG_NET_IPGRE_BROADCAST
823 .ndo_open = ipgre_open,
824 .ndo_stop = ipgre_close,
825#endif
c5441932 826 .ndo_start_xmit = ipgre_xmit,
b8c26a33 827 .ndo_do_ioctl = ipgre_tunnel_ioctl,
c5441932
PS
828 .ndo_change_mtu = ip_tunnel_change_mtu,
829 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 830 .ndo_get_iflink = ip_tunnel_get_iflink,
b8c26a33
SH
831};
832
6b78f16e
ED
833#define GRE_FEATURES (NETIF_F_SG | \
834 NETIF_F_FRAGLIST | \
835 NETIF_F_HIGHDMA | \
836 NETIF_F_HW_CSUM)
837
1da177e4
LT
838static void ipgre_tunnel_setup(struct net_device *dev)
839{
b8c26a33 840 dev->netdev_ops = &ipgre_netdev_ops;
5a455275 841 dev->type = ARPHRD_IPGRE;
c5441932
PS
842 ip_tunnel_setup(dev, ipgre_net_id);
843}
1da177e4 844
c5441932
PS
845static void __gre_tunnel_init(struct net_device *dev)
846{
847 struct ip_tunnel *tunnel;
4565e991 848 int t_hlen;
c5441932
PS
849
850 tunnel = netdev_priv(dev);
4565e991 851 tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
c5441932
PS
852 tunnel->parms.iph.protocol = IPPROTO_GRE;
853
4565e991
TH
854 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
855
856 t_hlen = tunnel->hlen + sizeof(struct iphdr);
857
858 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
859 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
6b78f16e 860
b57708ad 861 dev->features |= GRE_FEATURES;
6b78f16e 862 dev->hw_features |= GRE_FEATURES;
c5441932
PS
863
864 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
865 /* TCP offload with GRE SEQ is not supported. */
866 dev->features |= NETIF_F_GSO_SOFTWARE;
867 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
868 /* Can use a lockless transmit, unless we generate
869 * output sequences
870 */
871 dev->features |= NETIF_F_LLTX;
872 }
1da177e4
LT
873}
874
875static int ipgre_tunnel_init(struct net_device *dev)
876{
c5441932
PS
877 struct ip_tunnel *tunnel = netdev_priv(dev);
878 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 879
c5441932 880 __gre_tunnel_init(dev);
1da177e4 881
c5441932
PS
882 memcpy(dev->dev_addr, &iph->saddr, 4);
883 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 884
c5441932 885 dev->flags = IFF_NOARP;
02875878 886 netif_keep_dst(dev);
c5441932 887 dev->addr_len = 4;
1da177e4 888
1da177e4 889 if (iph->daddr) {
1da177e4 890#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 891 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
892 if (!iph->saddr)
893 return -EINVAL;
894 dev->flags = IFF_BROADCAST;
3b04ddde 895 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
896 }
897#endif
ee34c1eb 898 } else
6a5f44d7 899 dev->header_ops = &ipgre_header_ops;
1da177e4 900
c5441932 901 return ip_tunnel_init(dev);
1da177e4
LT
902}
903
9f57c67c
PS
904static const struct gre_protocol ipgre_protocol = {
905 .handler = gre_rcv,
906 .err_handler = gre_err,
1da177e4
LT
907};
908
2c8c1e72 909static int __net_init ipgre_init_net(struct net *net)
59a4c759 910{
c5441932 911 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
912}
913
2c8c1e72 914static void __net_exit ipgre_exit_net(struct net *net)
59a4c759 915{
c5441932 916 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
6c742e71 917 ip_tunnel_delete_net(itn, &ipgre_link_ops);
59a4c759
PE
918}
919
920static struct pernet_operations ipgre_net_ops = {
921 .init = ipgre_init_net,
922 .exit = ipgre_exit_net,
cfb8fbf2 923 .id = &ipgre_net_id,
c5441932 924 .size = sizeof(struct ip_tunnel_net),
59a4c759 925};
1da177e4 926
c19e654d
HX
927static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
928{
929 __be16 flags;
930
931 if (!data)
932 return 0;
933
934 flags = 0;
935 if (data[IFLA_GRE_IFLAGS])
936 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
937 if (data[IFLA_GRE_OFLAGS])
938 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
939 if (flags & (GRE_VERSION|GRE_ROUTING))
940 return -EINVAL;
941
942 return 0;
943}
944
e1a80002
HX
945static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
946{
947 __be32 daddr;
948
949 if (tb[IFLA_ADDRESS]) {
950 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
951 return -EINVAL;
952 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
953 return -EADDRNOTAVAIL;
954 }
955
956 if (!data)
957 goto out;
958
959 if (data[IFLA_GRE_REMOTE]) {
960 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
961 if (!daddr)
962 return -EINVAL;
963 }
964
965out:
966 return ipgre_tunnel_validate(tb, data);
967}
968
2e15ea39
PS
969static void ipgre_netlink_parms(struct net_device *dev,
970 struct nlattr *data[],
971 struct nlattr *tb[],
972 struct ip_tunnel_parm *parms)
c19e654d 973{
7bb82d92 974 memset(parms, 0, sizeof(*parms));
c19e654d
HX
975
976 parms->iph.protocol = IPPROTO_GRE;
977
978 if (!data)
979 return;
980
981 if (data[IFLA_GRE_LINK])
982 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
983
984 if (data[IFLA_GRE_IFLAGS])
c5441932 985 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
986
987 if (data[IFLA_GRE_OFLAGS])
c5441932 988 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
989
990 if (data[IFLA_GRE_IKEY])
991 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
992
993 if (data[IFLA_GRE_OKEY])
994 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
995
996 if (data[IFLA_GRE_LOCAL])
67b61f6c 997 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
c19e654d
HX
998
999 if (data[IFLA_GRE_REMOTE])
67b61f6c 1000 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1001
1002 if (data[IFLA_GRE_TTL])
1003 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1004
1005 if (data[IFLA_GRE_TOS])
1006 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1007
1008 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1009 parms->iph.frag_off = htons(IP_DF);
2e15ea39
PS
1010
1011 if (data[IFLA_GRE_COLLECT_METADATA]) {
1012 struct ip_tunnel *t = netdev_priv(dev);
1013
1014 t->collect_md = true;
1015 }
c19e654d
HX
1016}
1017
4565e991
TH
1018/* This function returns true when ENCAP attributes are present in the nl msg */
1019static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1020 struct ip_tunnel_encap *ipencap)
1021{
1022 bool ret = false;
1023
1024 memset(ipencap, 0, sizeof(*ipencap));
1025
1026 if (!data)
1027 return ret;
1028
1029 if (data[IFLA_GRE_ENCAP_TYPE]) {
1030 ret = true;
1031 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1032 }
1033
1034 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1035 ret = true;
1036 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1037 }
1038
1039 if (data[IFLA_GRE_ENCAP_SPORT]) {
1040 ret = true;
3e97fa70 1041 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
4565e991
TH
1042 }
1043
1044 if (data[IFLA_GRE_ENCAP_DPORT]) {
1045 ret = true;
3e97fa70 1046 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
4565e991
TH
1047 }
1048
1049 return ret;
1050}
1051
c5441932 1052static int gre_tap_init(struct net_device *dev)
e1a80002 1053{
c5441932 1054 __gre_tunnel_init(dev);
bec94d43 1055 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
e1a80002 1056
c5441932 1057 return ip_tunnel_init(dev);
e1a80002
HX
1058}
1059
c5441932
PS
1060static const struct net_device_ops gre_tap_netdev_ops = {
1061 .ndo_init = gre_tap_init,
1062 .ndo_uninit = ip_tunnel_uninit,
1063 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
1064 .ndo_set_mac_address = eth_mac_addr,
1065 .ndo_validate_addr = eth_validate_addr,
c5441932
PS
1066 .ndo_change_mtu = ip_tunnel_change_mtu,
1067 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 1068 .ndo_get_iflink = ip_tunnel_get_iflink,
fc4099f1 1069 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
b8c26a33
SH
1070};
1071
e1a80002
HX
1072static void ipgre_tap_setup(struct net_device *dev)
1073{
e1a80002 1074 ether_setup(dev);
d13b161c
JB
1075 dev->netdev_ops = &gre_tap_netdev_ops;
1076 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1077 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c5441932 1078 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
1079}
1080
c5441932
PS
1081static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1082 struct nlattr *tb[], struct nlattr *data[])
c19e654d 1083{
c5441932 1084 struct ip_tunnel_parm p;
4565e991
TH
1085 struct ip_tunnel_encap ipencap;
1086
1087 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1088 struct ip_tunnel *t = netdev_priv(dev);
1089 int err = ip_tunnel_encap_setup(t, &ipencap);
1090
1091 if (err < 0)
1092 return err;
1093 }
c19e654d 1094
2e15ea39 1095 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 1096 return ip_tunnel_newlink(dev, tb, &p);
c19e654d
HX
1097}
1098
1099static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1100 struct nlattr *data[])
1101{
c19e654d 1102 struct ip_tunnel_parm p;
4565e991
TH
1103 struct ip_tunnel_encap ipencap;
1104
1105 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1106 struct ip_tunnel *t = netdev_priv(dev);
1107 int err = ip_tunnel_encap_setup(t, &ipencap);
1108
1109 if (err < 0)
1110 return err;
1111 }
c19e654d 1112
2e15ea39 1113 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 1114 return ip_tunnel_changelink(dev, tb, &p);
c19e654d
HX
1115}
1116
1117static size_t ipgre_get_size(const struct net_device *dev)
1118{
1119 return
1120 /* IFLA_GRE_LINK */
1121 nla_total_size(4) +
1122 /* IFLA_GRE_IFLAGS */
1123 nla_total_size(2) +
1124 /* IFLA_GRE_OFLAGS */
1125 nla_total_size(2) +
1126 /* IFLA_GRE_IKEY */
1127 nla_total_size(4) +
1128 /* IFLA_GRE_OKEY */
1129 nla_total_size(4) +
1130 /* IFLA_GRE_LOCAL */
1131 nla_total_size(4) +
1132 /* IFLA_GRE_REMOTE */
1133 nla_total_size(4) +
1134 /* IFLA_GRE_TTL */
1135 nla_total_size(1) +
1136 /* IFLA_GRE_TOS */
1137 nla_total_size(1) +
1138 /* IFLA_GRE_PMTUDISC */
1139 nla_total_size(1) +
4565e991
TH
1140 /* IFLA_GRE_ENCAP_TYPE */
1141 nla_total_size(2) +
1142 /* IFLA_GRE_ENCAP_FLAGS */
1143 nla_total_size(2) +
1144 /* IFLA_GRE_ENCAP_SPORT */
1145 nla_total_size(2) +
1146 /* IFLA_GRE_ENCAP_DPORT */
1147 nla_total_size(2) +
2e15ea39
PS
1148 /* IFLA_GRE_COLLECT_METADATA */
1149 nla_total_size(0) +
c19e654d
HX
1150 0;
1151}
1152
1153static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1154{
1155 struct ip_tunnel *t = netdev_priv(dev);
1156 struct ip_tunnel_parm *p = &t->parms;
1157
f3756b79 1158 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
c5441932
PS
1159 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1160 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
f3756b79
DM
1161 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1162 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
930345ea
JB
1163 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1164 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
f3756b79
DM
1165 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1166 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1167 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1168 !!(p->iph.frag_off & htons(IP_DF))))
1169 goto nla_put_failure;
4565e991
TH
1170
1171 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1172 t->encap.type) ||
3e97fa70
SD
1173 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1174 t->encap.sport) ||
1175 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1176 t->encap.dport) ||
4565e991 1177 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
e1b2cb65 1178 t->encap.flags))
4565e991
TH
1179 goto nla_put_failure;
1180
2e15ea39
PS
1181 if (t->collect_md) {
1182 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1183 goto nla_put_failure;
1184 }
1185
c19e654d
HX
1186 return 0;
1187
1188nla_put_failure:
1189 return -EMSGSIZE;
1190}
1191
1192static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1193 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1194 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1195 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1196 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1197 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1198 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1199 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1200 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1201 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1202 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
4565e991
TH
1203 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1204 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1205 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1206 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2e15ea39 1207 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
c19e654d
HX
1208};
1209
1210static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1211 .kind = "gre",
1212 .maxtype = IFLA_GRE_MAX,
1213 .policy = ipgre_policy,
1214 .priv_size = sizeof(struct ip_tunnel),
1215 .setup = ipgre_tunnel_setup,
1216 .validate = ipgre_tunnel_validate,
1217 .newlink = ipgre_newlink,
1218 .changelink = ipgre_changelink,
c5441932 1219 .dellink = ip_tunnel_dellink,
c19e654d
HX
1220 .get_size = ipgre_get_size,
1221 .fill_info = ipgre_fill_info,
1728d4fa 1222 .get_link_net = ip_tunnel_get_link_net,
c19e654d
HX
1223};
1224
e1a80002
HX
1225static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1226 .kind = "gretap",
1227 .maxtype = IFLA_GRE_MAX,
1228 .policy = ipgre_policy,
1229 .priv_size = sizeof(struct ip_tunnel),
1230 .setup = ipgre_tap_setup,
1231 .validate = ipgre_tap_validate,
1232 .newlink = ipgre_newlink,
1233 .changelink = ipgre_changelink,
c5441932 1234 .dellink = ip_tunnel_dellink,
e1a80002
HX
1235 .get_size = ipgre_get_size,
1236 .fill_info = ipgre_fill_info,
1728d4fa 1237 .get_link_net = ip_tunnel_get_link_net,
e1a80002
HX
1238};
1239
b2acd1dc
PS
1240struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1241 u8 name_assign_type)
1242{
1243 struct nlattr *tb[IFLA_MAX + 1];
1244 struct net_device *dev;
1245 struct ip_tunnel *t;
1246 int err;
1247
1248 memset(&tb, 0, sizeof(tb));
1249
1250 dev = rtnl_create_link(net, name, name_assign_type,
1251 &ipgre_tap_ops, tb);
1252 if (IS_ERR(dev))
1253 return dev;
1254
1255 /* Configure flow based GRE device. */
1256 t = netdev_priv(dev);
1257 t->collect_md = true;
1258
1259 err = ipgre_newlink(net, dev, tb, NULL);
1260 if (err < 0)
1261 goto out;
7e059158
DW
1262
1263 /* openvswitch users expect packet sizes to be unrestricted,
1264 * so set the largest MTU we can.
1265 */
1266 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1267 if (err)
1268 goto out;
1269
b2acd1dc
PS
1270 return dev;
1271out:
1272 free_netdev(dev);
1273 return ERR_PTR(err);
1274}
1275EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1276
c5441932
PS
1277static int __net_init ipgre_tap_init_net(struct net *net)
1278{
2e15ea39 1279 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
c5441932
PS
1280}
1281
1282static void __net_exit ipgre_tap_exit_net(struct net *net)
1283{
1284 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
6c742e71 1285 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
c5441932
PS
1286}
1287
1288static struct pernet_operations ipgre_tap_net_ops = {
1289 .init = ipgre_tap_init_net,
1290 .exit = ipgre_tap_exit_net,
1291 .id = &gre_tap_net_id,
1292 .size = sizeof(struct ip_tunnel_net),
1293};
1da177e4
LT
1294
1295static int __init ipgre_init(void)
1296{
1297 int err;
1298
058bd4d2 1299 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1300
cfb8fbf2 1301 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1302 if (err < 0)
c2892f02
AD
1303 return err;
1304
c5441932
PS
1305 err = register_pernet_device(&ipgre_tap_net_ops);
1306 if (err < 0)
1307 goto pnet_tap_faied;
1308
9f57c67c 1309 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1310 if (err < 0) {
058bd4d2 1311 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1312 goto add_proto_failed;
1313 }
7daa0004 1314
c19e654d
HX
1315 err = rtnl_link_register(&ipgre_link_ops);
1316 if (err < 0)
1317 goto rtnl_link_failed;
1318
e1a80002
HX
1319 err = rtnl_link_register(&ipgre_tap_ops);
1320 if (err < 0)
1321 goto tap_ops_failed;
1322
c5441932 1323 return 0;
c19e654d 1324
e1a80002
HX
1325tap_ops_failed:
1326 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1327rtnl_link_failed:
9f57c67c 1328 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1329add_proto_failed:
c5441932
PS
1330 unregister_pernet_device(&ipgre_tap_net_ops);
1331pnet_tap_faied:
c2892f02 1332 unregister_pernet_device(&ipgre_net_ops);
c5441932 1333 return err;
1da177e4
LT
1334}
1335
db44575f 1336static void __exit ipgre_fini(void)
1da177e4 1337{
e1a80002 1338 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1339 rtnl_link_unregister(&ipgre_link_ops);
9f57c67c 1340 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c5441932 1341 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1342 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1343}
1344
1345module_init(ipgre_init);
1346module_exit(ipgre_fini);
1347MODULE_LICENSE("GPL");
4d74f8ba
PM
1348MODULE_ALIAS_RTNL_LINK("gre");
1349MODULE_ALIAS_RTNL_LINK("gretap");
8909c9ad 1350MODULE_ALIAS_NETDEV("gre0");
c5441932 1351MODULE_ALIAS_NETDEV("gretap0");