Merge tag 'riscv-for-linus-5.2-mw2' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / drivers / net / geneve.c
CommitLineData
2d07dc79
JL
1/*
2 * GENEVE: Generic Network Virtualization Encapsulation
3 *
4 * Copyright (c) 2015 Red Hat, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13#include <linux/kernel.h>
14#include <linux/module.h>
2d07dc79
JL
15#include <linux/etherdevice.h>
16#include <linux/hash.h>
3616d08b 17#include <net/ipv6_stubs.h>
e305ac6c 18#include <net/dst_metadata.h>
8e816df8 19#include <net/gro_cells.h>
2d07dc79
JL
20#include <net/rtnetlink.h>
21#include <net/geneve.h>
371bd106 22#include <net/protocol.h>
2d07dc79
JL
23
24#define GENEVE_NETDEV_VER "0.6"
25
2d07dc79
JL
26#define GENEVE_N_VID (1u << 24)
27#define GENEVE_VID_MASK (GENEVE_N_VID - 1)
28
29#define VNI_HASH_BITS 10
30#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
31
32static bool log_ecn_error = true;
33module_param(log_ecn_error, bool, 0644);
34MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
35
371bd106
PS
36#define GENEVE_VER 0
37#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
5edbea69
AK
38#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
39#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
371bd106 40
2d07dc79
JL
41/* per-network namespace private data for this module */
42struct geneve_net {
371bd106 43 struct list_head geneve_list;
371bd106 44 struct list_head sock_list;
2d07dc79
JL
45};
46
c7d03a00 47static unsigned int geneve_net_id;
371bd106 48
4b4c21fa
JB
49struct geneve_dev_node {
50 struct hlist_node hlist;
51 struct geneve_dev *geneve;
52};
53
2d07dc79
JL
54/* Pseudo network device */
55struct geneve_dev {
4b4c21fa
JB
56 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */
57#if IS_ENABLED(CONFIG_IPV6)
58 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */
59#endif
2d07dc79
JL
60 struct net *net; /* netns for packet i/o */
61 struct net_device *dev; /* netdev for geneve tunnel */
9b4437a5 62 struct ip_tunnel_info info;
fceb9c3e 63 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
8ed66f0e 64#if IS_ENABLED(CONFIG_IPV6)
fceb9c3e 65 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
8ed66f0e 66#endif
2d07dc79 67 struct list_head next; /* geneve's per namespace list */
8e816df8 68 struct gro_cells gro_cells;
9b4437a5 69 bool collect_md;
70 bool use_udp6_rx_checksums;
52d0d404 71 bool ttl_inherit;
a025fb5f 72 enum ifla_geneve_df df;
2d07dc79
JL
73};
74
371bd106
PS
75struct geneve_sock {
76 bool collect_md;
371bd106
PS
77 struct list_head list;
78 struct socket *sock;
79 struct rcu_head rcu;
80 int refcnt;
66d47003 81 struct hlist_head vni_list[VNI_HASH_SIZE];
371bd106 82};
2d07dc79
JL
83
84static inline __u32 geneve_net_vni_hash(u8 vni[3])
85{
86 __u32 vnid;
87
88 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
89 return hash_32(vnid, VNI_HASH_BITS);
90}
91
e305ac6c
PS
92static __be64 vni_to_tunnel_id(const __u8 *vni)
93{
94#ifdef __BIG_ENDIAN
95 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
96#else
97 return (__force __be64)(((__force u64)vni[0] << 40) |
98 ((__force u64)vni[1] << 48) |
99 ((__force u64)vni[2] << 56));
100#endif
101}
102
9b4437a5 103/* Convert 64 bit tunnel ID to 24 bit VNI. */
104static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
105{
106#ifdef __BIG_ENDIAN
107 vni[0] = (__force __u8)(tun_id >> 16);
108 vni[1] = (__force __u8)(tun_id >> 8);
109 vni[2] = (__force __u8)tun_id;
110#else
111 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
112 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
113 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
114#endif
115}
116
2e0b26e1 117static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
118{
2e0b26e1 119 return !memcmp(vni, &tun_id[5], 3);
2e0b26e1 120}
121
1e9f12ec
JB
122static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
123{
124 return gs->sock->sk->sk_family;
125}
126
66d47003 127static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
371bd106 128 __be32 addr, u8 vni[])
2d07dc79 129{
2d07dc79 130 struct hlist_head *vni_list_head;
4b4c21fa 131 struct geneve_dev_node *node;
2d07dc79
JL
132 __u32 hash;
133
2d07dc79 134 /* Find the device for this VNI */
371bd106 135 hash = geneve_net_vni_hash(vni);
66d47003 136 vni_list_head = &gs->vni_list[hash];
4b4c21fa
JB
137 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
138 if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
139 addr == node->geneve->info.key.u.ipv4.dst)
140 return node->geneve;
8ed66f0e
JL
141 }
142 return NULL;
143}
144
145#if IS_ENABLED(CONFIG_IPV6)
146static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
147 struct in6_addr addr6, u8 vni[])
148{
149 struct hlist_head *vni_list_head;
4b4c21fa 150 struct geneve_dev_node *node;
8ed66f0e
JL
151 __u32 hash;
152
153 /* Find the device for this VNI */
154 hash = geneve_net_vni_hash(vni);
155 vni_list_head = &gs->vni_list[hash];
4b4c21fa
JB
156 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
157 if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
158 ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst))
159 return node->geneve;
2d07dc79 160 }
e305ac6c
PS
161 return NULL;
162}
8ed66f0e 163#endif
e305ac6c 164
371bd106
PS
165static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
166{
167 return (struct genevehdr *)(udp_hdr(skb) + 1);
168}
169
9fc47545
JB
170static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
171 struct sk_buff *skb)
e305ac6c 172{
8ed66f0e 173 static u8 zero_vni[3];
9b4437a5 174 u8 *vni;
e305ac6c 175
1e9f12ec 176 if (geneve_get_sk_family(gs) == AF_INET) {
9fc47545 177 struct iphdr *iph;
9b4437a5 178 __be32 addr;
9fc47545 179
8ed66f0e 180 iph = ip_hdr(skb); /* outer IP header... */
371bd106 181
8ed66f0e
JL
182 if (gs->collect_md) {
183 vni = zero_vni;
184 addr = 0;
185 } else {
9fc47545 186 vni = geneve_hdr(skb)->vni;
8ed66f0e
JL
187 addr = iph->saddr;
188 }
189
9fc47545 190 return geneve_lookup(gs, addr, vni);
8ed66f0e 191#if IS_ENABLED(CONFIG_IPV6)
1e9f12ec 192 } else if (geneve_get_sk_family(gs) == AF_INET6) {
9b4437a5 193 static struct in6_addr zero_addr6;
9fc47545
JB
194 struct ipv6hdr *ip6h;
195 struct in6_addr addr6;
196
8ed66f0e 197 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
371bd106 198
8ed66f0e
JL
199 if (gs->collect_md) {
200 vni = zero_vni;
201 addr6 = zero_addr6;
202 } else {
9fc47545 203 vni = geneve_hdr(skb)->vni;
8ed66f0e
JL
204 addr6 = ip6h->saddr;
205 }
206
9fc47545 207 return geneve6_lookup(gs, addr6, vni);
8ed66f0e
JL
208#endif
209 }
9fc47545
JB
210 return NULL;
211}
212
213/* geneve receive/decap routine */
214static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
215 struct sk_buff *skb)
216{
217 struct genevehdr *gnvh = geneve_hdr(skb);
218 struct metadata_dst *tun_dst = NULL;
219 struct pcpu_sw_netstats *stats;
fe741e23 220 unsigned int len;
9fc47545
JB
221 int err = 0;
222 void *oiph;
2d07dc79 223
371bd106 224 if (ip_tunnel_collect_metadata() || gs->collect_md) {
e305ac6c 225 __be16 flags;
e305ac6c
PS
226
227 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
228 (gnvh->oam ? TUNNEL_OAM : 0) |
229 (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
230
1e9f12ec 231 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
e305ac6c
PS
232 vni_to_tunnel_id(gnvh->vni),
233 gnvh->opt_len * 4);
fe741e23
GM
234 if (!tun_dst) {
235 geneve->dev->stats.rx_dropped++;
e305ac6c 236 goto drop;
fe741e23 237 }
e305ac6c 238 /* Update tunnel dst according to Geneve options. */
4c222798 239 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
256c87c1
PJV
240 gnvh->options, gnvh->opt_len * 4,
241 TUNNEL_GENEVE_OPT);
e305ac6c
PS
242 } else {
243 /* Drop packets w/ critical options,
244 * since we don't support any...
245 */
fe741e23
GM
246 if (gnvh->critical) {
247 geneve->dev->stats.rx_frame_errors++;
248 geneve->dev->stats.rx_errors++;
e305ac6c 249 goto drop;
fe741e23 250 }
e305ac6c 251 }
2d07dc79
JL
252
253 skb_reset_mac_header(skb);
2d07dc79
JL
254 skb->protocol = eth_type_trans(skb, geneve->dev);
255 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
256
e305ac6c
PS
257 if (tun_dst)
258 skb_dst_set(skb, &tun_dst->dst);
259
2d07dc79 260 /* Ignore packet loops (and multicast echo) */
fe741e23
GM
261 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) {
262 geneve->dev->stats.rx_errors++;
2d07dc79 263 goto drop;
fe741e23 264 }
2d07dc79 265
9fc47545 266 oiph = skb_network_header(skb);
2d07dc79
JL
267 skb_reset_network_header(skb);
268
9fc47545
JB
269 if (geneve_get_sk_family(gs) == AF_INET)
270 err = IP_ECN_decapsulate(oiph, skb);
8ed66f0e 271#if IS_ENABLED(CONFIG_IPV6)
9fc47545
JB
272 else
273 err = IP6_ECN_decapsulate(oiph, skb);
8ed66f0e 274#endif
2d07dc79
JL
275
276 if (unlikely(err)) {
8ed66f0e 277 if (log_ecn_error) {
9fc47545 278 if (geneve_get_sk_family(gs) == AF_INET)
8ed66f0e
JL
279 net_info_ratelimited("non-ECT from %pI4 "
280 "with TOS=%#x\n",
9fc47545
JB
281 &((struct iphdr *)oiph)->saddr,
282 ((struct iphdr *)oiph)->tos);
8ed66f0e 283#if IS_ENABLED(CONFIG_IPV6)
9fc47545 284 else
8ed66f0e 285 net_info_ratelimited("non-ECT from %pI6\n",
9fc47545 286 &((struct ipv6hdr *)oiph)->saddr);
8ed66f0e
JL
287#endif
288 }
2d07dc79
JL
289 if (err > 1) {
290 ++geneve->dev->stats.rx_frame_errors;
291 ++geneve->dev->stats.rx_errors;
292 goto drop;
293 }
294 }
295
fe741e23
GM
296 len = skb->len;
297 err = gro_cells_receive(&geneve->gro_cells, skb);
298 if (likely(err == NET_RX_SUCCESS)) {
299 stats = this_cpu_ptr(geneve->dev->tstats);
300 u64_stats_update_begin(&stats->syncp);
301 stats->rx_packets++;
302 stats->rx_bytes += len;
303 u64_stats_update_end(&stats->syncp);
304 }
2d07dc79
JL
305 return;
306drop:
307 /* Consume bad packet */
308 kfree_skb(skb);
309}
310
311/* Setup stats when device is created */
312static int geneve_init(struct net_device *dev)
313{
8e816df8
JG
314 struct geneve_dev *geneve = netdev_priv(dev);
315 int err;
316
2d07dc79
JL
317 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
318 if (!dev->tstats)
319 return -ENOMEM;
8e816df8
JG
320
321 err = gro_cells_init(&geneve->gro_cells, dev);
322 if (err) {
323 free_percpu(dev->tstats);
324 return err;
325 }
326
9b4437a5 327 err = dst_cache_init(&geneve->info.dst_cache, GFP_KERNEL);
468dfffc
PA
328 if (err) {
329 free_percpu(dev->tstats);
330 gro_cells_destroy(&geneve->gro_cells);
331 return err;
332 }
2d07dc79
JL
333 return 0;
334}
335
336static void geneve_uninit(struct net_device *dev)
337{
8e816df8
JG
338 struct geneve_dev *geneve = netdev_priv(dev);
339
9b4437a5 340 dst_cache_destroy(&geneve->info.dst_cache);
8e816df8 341 gro_cells_destroy(&geneve->gro_cells);
2d07dc79
JL
342 free_percpu(dev->tstats);
343}
344
371bd106
PS
345/* Callback from net/ipv4/udp.c to receive packets */
346static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
347{
348 struct genevehdr *geneveh;
9fc47545 349 struct geneve_dev *geneve;
371bd106
PS
350 struct geneve_sock *gs;
351 int opts_len;
352
fe741e23 353 /* Need UDP and Geneve header to be present */
371bd106 354 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
e5aed006 355 goto drop;
371bd106
PS
356
357 /* Return packets with reserved bits set */
358 geneveh = geneve_hdr(skb);
359 if (unlikely(geneveh->ver != GENEVE_VER))
e5aed006 360 goto drop;
371bd106
PS
361
362 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
e5aed006 363 goto drop;
371bd106 364
9fc47545
JB
365 gs = rcu_dereference_sk_user_data(sk);
366 if (!gs)
367 goto drop;
368
369 geneve = geneve_lookup_skb(gs, skb);
370 if (!geneve)
371 goto drop;
372
371bd106
PS
373 opts_len = geneveh->opt_len * 4;
374 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
7f290c94 375 htons(ETH_P_TEB),
fe741e23
GM
376 !net_eq(geneve->net, dev_net(geneve->dev)))) {
377 geneve->dev->stats.rx_dropped++;
371bd106 378 goto drop;
fe741e23 379 }
371bd106 380
9fc47545 381 geneve_rx(geneve, gs, skb);
371bd106
PS
382 return 0;
383
384drop:
385 /* Consume bad packet */
386 kfree_skb(skb);
387 return 0;
371bd106
PS
388}
389
a0796644
SB
390/* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
391static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
392{
393 struct genevehdr *geneveh;
394 struct geneve_sock *gs;
395 u8 zero_vni[3] = { 0 };
396 u8 *vni = zero_vni;
397
398 if (skb->len < GENEVE_BASE_HLEN)
399 return -EINVAL;
400
401 geneveh = geneve_hdr(skb);
402 if (geneveh->ver != GENEVE_VER)
403 return -EINVAL;
404
405 if (geneveh->proto_type != htons(ETH_P_TEB))
406 return -EINVAL;
407
408 gs = rcu_dereference_sk_user_data(sk);
409 if (!gs)
410 return -ENOENT;
411
412 if (geneve_get_sk_family(gs) == AF_INET) {
413 struct iphdr *iph = ip_hdr(skb);
414 __be32 addr4 = 0;
415
416 if (!gs->collect_md) {
417 vni = geneve_hdr(skb)->vni;
418 addr4 = iph->daddr;
419 }
420
421 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
422 }
423
424#if IS_ENABLED(CONFIG_IPV6)
425 if (geneve_get_sk_family(gs) == AF_INET6) {
426 struct ipv6hdr *ip6h = ipv6_hdr(skb);
8a962c4a
NC
427 struct in6_addr addr6;
428
429 memset(&addr6, 0, sizeof(struct in6_addr));
a0796644
SB
430
431 if (!gs->collect_md) {
432 vni = geneve_hdr(skb)->vni;
433 addr6 = ip6h->daddr;
434 }
435
436 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
437 }
438#endif
439
440 return -EPFNOSUPPORT;
441}
442
371bd106 443static struct socket *geneve_create_sock(struct net *net, bool ipv6,
9b4437a5 444 __be16 port, bool ipv6_rx_csum)
371bd106
PS
445{
446 struct socket *sock;
447 struct udp_port_cfg udp_conf;
448 int err;
449
450 memset(&udp_conf, 0, sizeof(udp_conf));
451
452 if (ipv6) {
453 udp_conf.family = AF_INET6;
8ed66f0e 454 udp_conf.ipv6_v6only = 1;
9b4437a5 455 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
371bd106
PS
456 } else {
457 udp_conf.family = AF_INET;
458 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
459 }
460
461 udp_conf.local_udp_port = port;
462
463 /* Open UDP socket */
464 err = udp_sock_create(net, &udp_conf, &sock);
465 if (err < 0)
466 return ERR_PTR(err);
467
468 return sock;
469}
470
371bd106
PS
471static int geneve_hlen(struct genevehdr *gh)
472{
473 return sizeof(*gh) + gh->opt_len * 4;
474}
475
d4546c25
DM
476static struct sk_buff *geneve_gro_receive(struct sock *sk,
477 struct list_head *head,
478 struct sk_buff *skb)
371bd106 479{
d4546c25
DM
480 struct sk_buff *pp = NULL;
481 struct sk_buff *p;
371bd106
PS
482 struct genevehdr *gh, *gh2;
483 unsigned int hlen, gh_len, off_gnv;
484 const struct packet_offload *ptype;
485 __be16 type;
486 int flush = 1;
487
488 off_gnv = skb_gro_offset(skb);
489 hlen = off_gnv + sizeof(*gh);
490 gh = skb_gro_header_fast(skb, off_gnv);
491 if (skb_gro_header_hard(skb, hlen)) {
492 gh = skb_gro_header_slow(skb, hlen, off_gnv);
493 if (unlikely(!gh))
494 goto out;
495 }
496
497 if (gh->ver != GENEVE_VER || gh->oam)
498 goto out;
499 gh_len = geneve_hlen(gh);
500
501 hlen = off_gnv + gh_len;
502 if (skb_gro_header_hard(skb, hlen)) {
503 gh = skb_gro_header_slow(skb, hlen, off_gnv);
504 if (unlikely(!gh))
505 goto out;
506 }
507
d4546c25 508 list_for_each_entry(p, head, list) {
371bd106
PS
509 if (!NAPI_GRO_CB(p)->same_flow)
510 continue;
511
512 gh2 = (struct genevehdr *)(p->data + off_gnv);
513 if (gh->opt_len != gh2->opt_len ||
514 memcmp(gh, gh2, gh_len)) {
515 NAPI_GRO_CB(p)->same_flow = 0;
516 continue;
517 }
518 }
519
520 type = gh->proto_type;
521
522 rcu_read_lock();
523 ptype = gro_find_receive_by_type(type);
c194cf93 524 if (!ptype)
371bd106 525 goto out_unlock;
371bd106
PS
526
527 skb_gro_pull(skb, gh_len);
528 skb_gro_postpull_rcsum(skb, gh, gh_len);
fcd91dd4 529 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
c194cf93 530 flush = 0;
371bd106
PS
531
532out_unlock:
533 rcu_read_unlock();
534out:
603d4cf8 535 skb_gro_flush_final(skb, pp, flush);
371bd106
PS
536
537 return pp;
538}
539
4a0090a9
TH
540static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
541 int nhoff)
371bd106
PS
542{
543 struct genevehdr *gh;
544 struct packet_offload *ptype;
545 __be16 type;
546 int gh_len;
547 int err = -ENOSYS;
548
371bd106
PS
549 gh = (struct genevehdr *)(skb->data + nhoff);
550 gh_len = geneve_hlen(gh);
551 type = gh->proto_type;
552
553 rcu_read_lock();
554 ptype = gro_find_complete_by_type(type);
555 if (ptype)
556 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
557
558 rcu_read_unlock();
229740c6
JR
559
560 skb_set_inner_mac_header(skb, nhoff + gh_len);
561
371bd106
PS
562 return err;
563}
564
565/* Create new listen socket if needed */
566static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
9b4437a5 567 bool ipv6, bool ipv6_rx_csum)
371bd106
PS
568{
569 struct geneve_net *gn = net_generic(net, geneve_net_id);
570 struct geneve_sock *gs;
571 struct socket *sock;
572 struct udp_tunnel_sock_cfg tunnel_cfg;
66d47003 573 int h;
371bd106
PS
574
575 gs = kzalloc(sizeof(*gs), GFP_KERNEL);
576 if (!gs)
577 return ERR_PTR(-ENOMEM);
578
9b4437a5 579 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
371bd106
PS
580 if (IS_ERR(sock)) {
581 kfree(gs);
582 return ERR_CAST(sock);
583 }
584
585 gs->sock = sock;
586 gs->refcnt = 1;
66d47003
PS
587 for (h = 0; h < VNI_HASH_SIZE; ++h)
588 INIT_HLIST_HEAD(&gs->vni_list[h]);
371bd106
PS
589
590 /* Initialize the geneve udp offloads structure */
e7b3db5e 591 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
371bd106
PS
592
593 /* Mark socket as an encapsulation socket */
4a0090a9 594 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
371bd106
PS
595 tunnel_cfg.sk_user_data = gs;
596 tunnel_cfg.encap_type = 1;
4a0090a9
TH
597 tunnel_cfg.gro_receive = geneve_gro_receive;
598 tunnel_cfg.gro_complete = geneve_gro_complete;
371bd106 599 tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
a0796644 600 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
371bd106
PS
601 tunnel_cfg.encap_destroy = NULL;
602 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
371bd106
PS
603 list_add(&gs->list, &gn->sock_list);
604 return gs;
605}
606
8ed66f0e 607static void __geneve_sock_release(struct geneve_sock *gs)
371bd106 608{
8ed66f0e 609 if (!gs || --gs->refcnt)
371bd106
PS
610 return;
611
612 list_del(&gs->list);
e7b3db5e 613 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
371bd106
PS
614 udp_tunnel_sock_release(gs->sock);
615 kfree_rcu(gs, rcu);
616}
617
8ed66f0e
JL
618static void geneve_sock_release(struct geneve_dev *geneve)
619{
fceb9c3e 620 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
8ed66f0e 621#if IS_ENABLED(CONFIG_IPV6)
fceb9c3e 622 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
623
624 rcu_assign_pointer(geneve->sock6, NULL);
625#endif
626
627 rcu_assign_pointer(geneve->sock4, NULL);
628 synchronize_net();
629
630 __geneve_sock_release(gs4);
631#if IS_ENABLED(CONFIG_IPV6)
632 __geneve_sock_release(gs6);
8ed66f0e
JL
633#endif
634}
635
371bd106 636static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
8ed66f0e 637 sa_family_t family,
371bd106
PS
638 __be16 dst_port)
639{
640 struct geneve_sock *gs;
641
642 list_for_each_entry(gs, &gn->sock_list, list) {
643 if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
1e9f12ec 644 geneve_get_sk_family(gs) == family) {
371bd106
PS
645 return gs;
646 }
647 }
648 return NULL;
649}
650
8ed66f0e 651static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
2d07dc79 652{
2d07dc79 653 struct net *net = geneve->net;
371bd106 654 struct geneve_net *gn = net_generic(net, geneve_net_id);
4b4c21fa 655 struct geneve_dev_node *node;
2d07dc79 656 struct geneve_sock *gs;
9b4437a5 657 __u8 vni[3];
66d47003 658 __u32 hash;
2d07dc79 659
9b4437a5 660 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->info.key.tp_dst);
371bd106
PS
661 if (gs) {
662 gs->refcnt++;
663 goto out;
664 }
665
9b4437a5 666 gs = geneve_socket_create(net, geneve->info.key.tp_dst, ipv6,
667 geneve->use_udp6_rx_checksums);
2d07dc79
JL
668 if (IS_ERR(gs))
669 return PTR_ERR(gs);
670
371bd106
PS
671out:
672 gs->collect_md = geneve->collect_md;
8ed66f0e 673#if IS_ENABLED(CONFIG_IPV6)
4b4c21fa 674 if (ipv6) {
fceb9c3e 675 rcu_assign_pointer(geneve->sock6, gs);
4b4c21fa
JB
676 node = &geneve->hlist6;
677 } else
8ed66f0e 678#endif
4b4c21fa 679 {
fceb9c3e 680 rcu_assign_pointer(geneve->sock4, gs);
4b4c21fa
JB
681 node = &geneve->hlist4;
682 }
683 node->geneve = geneve;
66d47003 684
9b4437a5 685 tunnel_id_to_vni(geneve->info.key.tun_id, vni);
686 hash = geneve_net_vni_hash(vni);
4b4c21fa 687 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
2d07dc79
JL
688 return 0;
689}
690
8ed66f0e
JL
691static int geneve_open(struct net_device *dev)
692{
693 struct geneve_dev *geneve = netdev_priv(dev);
8ed66f0e 694 bool metadata = geneve->collect_md;
cf1c9ccb 695 bool ipv4, ipv6;
8ed66f0e
JL
696 int ret = 0;
697
cf1c9ccb
JB
698 ipv6 = geneve->info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
699 ipv4 = !ipv6 || metadata;
8ed66f0e 700#if IS_ENABLED(CONFIG_IPV6)
cf1c9ccb 701 if (ipv6) {
8ed66f0e 702 ret = geneve_sock_add(geneve, true);
cf1c9ccb
JB
703 if (ret < 0 && ret != -EAFNOSUPPORT)
704 ipv4 = false;
705 }
8ed66f0e 706#endif
cf1c9ccb 707 if (ipv4)
8ed66f0e
JL
708 ret = geneve_sock_add(geneve, false);
709 if (ret < 0)
710 geneve_sock_release(geneve);
711
712 return ret;
713}
714
2d07dc79
JL
715static int geneve_stop(struct net_device *dev)
716{
717 struct geneve_dev *geneve = netdev_priv(dev);
2d07dc79 718
4b4c21fa
JB
719 hlist_del_init_rcu(&geneve->hlist4.hlist);
720#if IS_ENABLED(CONFIG_IPV6)
721 hlist_del_init_rcu(&geneve->hlist6.hlist);
722#endif
8ed66f0e 723 geneve_sock_release(geneve);
371bd106
PS
724 return 0;
725}
726
8ed66f0e 727static void geneve_build_header(struct genevehdr *geneveh,
c3ef5aa5 728 const struct ip_tunnel_info *info)
8ed66f0e
JL
729{
730 geneveh->ver = GENEVE_VER;
c3ef5aa5 731 geneveh->opt_len = info->options_len / 4;
732 geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
733 geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
8ed66f0e 734 geneveh->rsvd1 = 0;
c3ef5aa5 735 tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
8ed66f0e
JL
736 geneveh->proto_type = htons(ETH_P_TEB);
737 geneveh->rsvd2 = 0;
738
256c87c1
PJV
739 if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
740 ip_tunnel_info_opts_get(geneveh->options, info);
8ed66f0e
JL
741}
742
c3ef5aa5 743static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
744 const struct ip_tunnel_info *info,
745 bool xnet, int ip_hdr_len)
371bd106 746{
c3ef5aa5 747 bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
8ed66f0e
JL
748 struct genevehdr *gnvh;
749 int min_headroom;
750 int err;
751
c3ef5aa5 752 skb_reset_mac_header(skb);
8ed66f0e
JL
753 skb_scrub_packet(skb, xnet);
754
c3ef5aa5 755 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
756 GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
8ed66f0e 757 err = skb_cow_head(skb, min_headroom);
aed069df 758 if (unlikely(err))
8ed66f0e 759 goto free_dst;
8ed66f0e 760
aed069df 761 err = udp_tunnel_handle_offloads(skb, udp_sum);
1ba64fac 762 if (err)
8ed66f0e 763 goto free_dst;
8ed66f0e 764
d58ff351 765 gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
c3ef5aa5 766 geneve_build_header(gnvh, info);
8ed66f0e
JL
767 skb_set_inner_protocol(skb, htons(ETH_P_TEB));
768 return 0;
769
770free_dst:
771 dst_release(dst);
772 return err;
773}
8ed66f0e
JL
774
775static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
776 struct net_device *dev,
5b861f6b 777 struct geneve_sock *gs4,
8ed66f0e 778 struct flowi4 *fl4,
c3ef5aa5 779 const struct ip_tunnel_info *info)
e305ac6c 780{
db3c6139 781 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
e305ac6c 782 struct geneve_dev *geneve = netdev_priv(dev);
468dfffc 783 struct dst_cache *dst_cache;
e305ac6c
PS
784 struct rtable *rt = NULL;
785 __u8 tos;
786
5b861f6b 787 if (!gs4)
fceb9c3e 788 return ERR_PTR(-EIO);
789
e305ac6c
PS
790 memset(fl4, 0, sizeof(*fl4));
791 fl4->flowi4_mark = skb->mark;
792 fl4->flowi4_proto = IPPROTO_UDP;
9b4437a5 793 fl4->daddr = info->key.u.ipv4.dst;
794 fl4->saddr = info->key.u.ipv4.src;
e305ac6c 795
9b4437a5 796 tos = info->key.tos;
797 if ((tos == 1) && !geneve->collect_md) {
798 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
799 use_cache = false;
468dfffc 800 }
9b4437a5 801 fl4->flowi4_tos = RT_TOS(tos);
468dfffc 802
c3ef5aa5 803 dst_cache = (struct dst_cache *)&info->dst_cache;
468dfffc
PA
804 if (use_cache) {
805 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
806 if (rt)
807 return rt;
e305ac6c 808 }
e305ac6c
PS
809 rt = ip_route_output_key(geneve->net, fl4);
810 if (IS_ERR(rt)) {
811 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
fc4099f1 812 return ERR_PTR(-ENETUNREACH);
e305ac6c
PS
813 }
814 if (rt->dst.dev == dev) { /* is this necessary? */
815 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
e305ac6c 816 ip_rt_put(rt);
fc4099f1 817 return ERR_PTR(-ELOOP);
e305ac6c 818 }
468dfffc
PA
819 if (use_cache)
820 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
e305ac6c
PS
821 return rt;
822}
823
8ed66f0e
JL
824#if IS_ENABLED(CONFIG_IPV6)
825static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
826 struct net_device *dev,
5b861f6b 827 struct geneve_sock *gs6,
8ed66f0e 828 struct flowi6 *fl6,
c3ef5aa5 829 const struct ip_tunnel_info *info)
8ed66f0e 830{
db3c6139 831 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
8ed66f0e 832 struct geneve_dev *geneve = netdev_priv(dev);
8ed66f0e 833 struct dst_entry *dst = NULL;
468dfffc 834 struct dst_cache *dst_cache;
3a56f86f 835 __u8 prio;
8ed66f0e 836
fceb9c3e 837 if (!gs6)
838 return ERR_PTR(-EIO);
839
8ed66f0e
JL
840 memset(fl6, 0, sizeof(*fl6));
841 fl6->flowi6_mark = skb->mark;
842 fl6->flowi6_proto = IPPROTO_UDP;
9b4437a5 843 fl6->daddr = info->key.u.ipv6.dst;
844 fl6->saddr = info->key.u.ipv6.src;
845 prio = info->key.tos;
846 if ((prio == 1) && !geneve->collect_md) {
847 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
848 use_cache = false;
468dfffc
PA
849 }
850
9b4437a5 851 fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
852 info->key.label);
c3ef5aa5 853 dst_cache = (struct dst_cache *)&info->dst_cache;
468dfffc
PA
854 if (use_cache) {
855 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
856 if (dst)
857 return dst;
8ed66f0e 858 }
8ed66f0e
JL
859 if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
860 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
861 return ERR_PTR(-ENETUNREACH);
862 }
863 if (dst->dev == dev) { /* is this necessary? */
864 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
865 dst_release(dst);
866 return ERR_PTR(-ELOOP);
867 }
868
468dfffc
PA
869 if (use_cache)
870 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
8ed66f0e
JL
871 return dst;
872}
873#endif
874
9b4437a5 875static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
c3ef5aa5 876 struct geneve_dev *geneve,
877 const struct ip_tunnel_info *info)
2d07dc79 878{
9b4437a5 879 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
880 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
881 const struct ip_tunnel_key *key = &info->key;
882 struct rtable *rt;
2d07dc79 883 struct flowi4 fl4;
8760ce58 884 __u8 tos, ttl;
a025fb5f 885 __be16 df = 0;
e305ac6c 886 __be16 sport;
bcceeec3 887 int err;
980c394c 888
5b861f6b 889 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
9b4437a5 890 if (IS_ERR(rt))
891 return PTR_ERR(rt);
371bd106 892
6b4f92af
SB
893 skb_tunnel_check_pmtu(skb, &rt->dst,
894 GENEVE_IPV4_HLEN + info->options_len);
52a589d5 895
371bd106 896 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
9b4437a5 897 if (geneve->collect_md) {
898 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
371bd106 899 ttl = key->ttl;
a025fb5f
SB
900
901 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
e305ac6c 902 } else {
9b4437a5 903 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
52d0d404
HL
904 if (geneve->ttl_inherit)
905 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
906 else
907 ttl = key->ttl;
908 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
a025fb5f
SB
909
910 if (geneve->df == GENEVE_DF_SET) {
911 df = htons(IP_DF);
912 } else if (geneve->df == GENEVE_DF_INHERIT) {
913 struct ethhdr *eth = eth_hdr(skb);
914
915 if (ntohs(eth->h_proto) == ETH_P_IPV6) {
916 df = htons(IP_DF);
917 } else if (ntohs(eth->h_proto) == ETH_P_IP) {
918 struct iphdr *iph = ip_hdr(skb);
919
920 if (iph->frag_off & htons(IP_DF))
921 df = htons(IP_DF);
922 }
923 }
2d07dc79 924 }
2d07dc79 925
c3ef5aa5 926 err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
9b4437a5 927 if (unlikely(err))
928 return err;
efeb2267 929
9b4437a5 930 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
931 tos, ttl, df, sport, geneve->info.key.tp_dst,
932 !net_eq(geneve->net, dev_net(geneve->dev)),
933 !(info->key.tun_flags & TUNNEL_CSUM));
934 return 0;
2d07dc79
JL
935}
936
8ed66f0e 937#if IS_ENABLED(CONFIG_IPV6)
9b4437a5 938static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
c3ef5aa5 939 struct geneve_dev *geneve,
940 const struct ip_tunnel_info *info)
8ed66f0e 941{
9b4437a5 942 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
943 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
944 const struct ip_tunnel_key *key = &info->key;
8ed66f0e 945 struct dst_entry *dst = NULL;
8ed66f0e 946 struct flowi6 fl6;
3a56f86f 947 __u8 prio, ttl;
8ed66f0e 948 __be16 sport;
bcceeec3 949 int err;
8ed66f0e 950
5b861f6b 951 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
9b4437a5 952 if (IS_ERR(dst))
953 return PTR_ERR(dst);
8ed66f0e 954
6b4f92af 955 skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
52a589d5 956
8ed66f0e 957 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
9b4437a5 958 if (geneve->collect_md) {
959 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
960 ttl = key->ttl;
961 } else {
962 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
963 ip_hdr(skb), skb);
52d0d404
HL
964 if (geneve->ttl_inherit)
965 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
966 else
967 ttl = key->ttl;
968 ttl = ttl ? : ip6_dst_hoplimit(dst);
9b4437a5 969 }
31ac1c19 970 err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
9b4437a5 971 if (unlikely(err))
972 return err;
8ed66f0e 973
9b4437a5 974 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
975 &fl6.saddr, &fl6.daddr, prio, ttl,
976 info->key.label, sport, geneve->info.key.tp_dst,
977 !(info->key.tun_flags & TUNNEL_CSUM));
978 return 0;
979}
980#endif
8ed66f0e 981
9b4437a5 982static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
983{
984 struct geneve_dev *geneve = netdev_priv(dev);
985 struct ip_tunnel_info *info = NULL;
986 int err;
abe492b4 987
9b4437a5 988 if (geneve->collect_md) {
989 info = skb_tunnel_info(skb);
990 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
991 err = -EINVAL;
992 netdev_dbg(dev, "no tunnel metadata\n");
aed069df 993 goto tx_error;
9b4437a5 994 }
8ed66f0e 995 } else {
9b4437a5 996 info = &geneve->info;
8ed66f0e 997 }
8eb3b995 998
a717e3f7 999 rcu_read_lock();
9b4437a5 1000#if IS_ENABLED(CONFIG_IPV6)
1001 if (info->mode & IP_TUNNEL_INFO_IPV6)
1002 err = geneve6_xmit_skb(skb, dev, geneve, info);
1003 else
1004#endif
1005 err = geneve_xmit_skb(skb, dev, geneve, info);
a717e3f7 1006 rcu_read_unlock();
8ed66f0e 1007
9b4437a5 1008 if (likely(!err))
1009 return NETDEV_TX_OK;
8ed66f0e
JL
1010tx_error:
1011 dev_kfree_skb(skb);
aed069df 1012
8ed66f0e
JL
1013 if (err == -ELOOP)
1014 dev->stats.collisions++;
1015 else if (err == -ENETUNREACH)
1016 dev->stats.tx_carrier_errors++;
efeb2267
HY
1017
1018 dev->stats.tx_errors++;
8ed66f0e
JL
1019 return NETDEV_TX_OK;
1020}
8ed66f0e 1021
91572088 1022static int geneve_change_mtu(struct net_device *dev, int new_mtu)
55e5bfb5 1023{
91572088
JW
1024 if (new_mtu > dev->max_mtu)
1025 new_mtu = dev->max_mtu;
321acc1c
AK
1026 else if (new_mtu < dev->min_mtu)
1027 new_mtu = dev->min_mtu;
aeee0e66 1028
55e5bfb5
DW
1029 dev->mtu = new_mtu;
1030 return 0;
1031}
1032
fc4099f1
PS
1033static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1034{
1035 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1036 struct geneve_dev *geneve = netdev_priv(dev);
fc4099f1 1037
b8812fa8 1038 if (ip_tunnel_info_af(info) == AF_INET) {
9b4437a5 1039 struct rtable *rt;
1040 struct flowi4 fl4;
5b861f6b 1041 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
9b4437a5 1042
5b861f6b 1043 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
b8812fa8
JL
1044 if (IS_ERR(rt))
1045 return PTR_ERR(rt);
fc4099f1 1046
b8812fa8
JL
1047 ip_rt_put(rt);
1048 info->key.u.ipv4.src = fl4.saddr;
1049#if IS_ENABLED(CONFIG_IPV6)
1050 } else if (ip_tunnel_info_af(info) == AF_INET6) {
9b4437a5 1051 struct dst_entry *dst;
1052 struct flowi6 fl6;
5b861f6b 1053 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
9b4437a5 1054
5b861f6b 1055 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
b8812fa8
JL
1056 if (IS_ERR(dst))
1057 return PTR_ERR(dst);
1058
1059 dst_release(dst);
1060 info->key.u.ipv6.src = fl6.saddr;
1061#endif
1062 } else {
1063 return -EINVAL;
1064 }
fc4099f1 1065
fc4099f1
PS
1066 info->key.tp_src = udp_flow_src_port(geneve->net, skb,
1067 1, USHRT_MAX, true);
9b4437a5 1068 info->key.tp_dst = geneve->info.key.tp_dst;
fc4099f1
PS
1069 return 0;
1070}
1071
2d07dc79
JL
1072static const struct net_device_ops geneve_netdev_ops = {
1073 .ndo_init = geneve_init,
1074 .ndo_uninit = geneve_uninit,
1075 .ndo_open = geneve_open,
1076 .ndo_stop = geneve_stop,
1077 .ndo_start_xmit = geneve_xmit,
1078 .ndo_get_stats64 = ip_tunnel_get_stats64,
55e5bfb5 1079 .ndo_change_mtu = geneve_change_mtu,
2d07dc79
JL
1080 .ndo_validate_addr = eth_validate_addr,
1081 .ndo_set_mac_address = eth_mac_addr,
fc4099f1 1082 .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
2d07dc79
JL
1083};
1084
1085static void geneve_get_drvinfo(struct net_device *dev,
1086 struct ethtool_drvinfo *drvinfo)
1087{
1088 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
1089 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
1090}
1091
1092static const struct ethtool_ops geneve_ethtool_ops = {
1093 .get_drvinfo = geneve_get_drvinfo,
1094 .get_link = ethtool_op_get_link,
1095};
1096
1097/* Info for udev, that this is a virtual tunnel endpoint */
1098static struct device_type geneve_type = {
1099 .name = "geneve",
1100};
1101
e5de25dc 1102/* Calls the ndo_udp_tunnel_add of the caller in order to
05ca4029 1103 * supply the listening GENEVE udp ports. Callers are expected
e5de25dc 1104 * to implement the ndo_udp_tunnel_add.
05ca4029 1105 */
2d2b13fc 1106static void geneve_offload_rx_ports(struct net_device *dev, bool push)
05ca4029
SA
1107{
1108 struct net *net = dev_net(dev);
1109 struct geneve_net *gn = net_generic(net, geneve_net_id);
1110 struct geneve_sock *gs;
681e683f 1111
05ca4029 1112 rcu_read_lock();
2d2b13fc
SD
1113 list_for_each_entry_rcu(gs, &gn->sock_list, list) {
1114 if (push) {
1115 udp_tunnel_push_rx_port(dev, gs->sock,
1116 UDP_TUNNEL_TYPE_GENEVE);
1117 } else {
1118 udp_tunnel_drop_rx_port(dev, gs->sock,
1119 UDP_TUNNEL_TYPE_GENEVE);
1120 }
1121 }
05ca4029
SA
1122 rcu_read_unlock();
1123}
05ca4029 1124
2d07dc79
JL
1125/* Initialize the device structure. */
1126static void geneve_setup(struct net_device *dev)
1127{
1128 ether_setup(dev);
1129
1130 dev->netdev_ops = &geneve_netdev_ops;
1131 dev->ethtool_ops = &geneve_ethtool_ops;
cf124db5 1132 dev->needs_free_netdev = true;
2d07dc79
JL
1133
1134 SET_NETDEV_DEVTYPE(dev, &geneve_type);
1135
2d07dc79
JL
1136 dev->features |= NETIF_F_LLTX;
1137 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
1138 dev->features |= NETIF_F_RXCSUM;
1139 dev->features |= NETIF_F_GSO_SOFTWARE;
1140
2d07dc79
JL
1141 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
1142 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
2d07dc79 1143
91572088
JW
1144 /* MTU range: 68 - (something less than 65535) */
1145 dev->min_mtu = ETH_MIN_MTU;
1146 /* The max_mtu calculation does not take account of GENEVE
1147 * options, to avoid excluding potentially valid
1148 * configurations. This will be further reduced by IPvX hdr size.
1149 */
1150 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
1151
2d07dc79 1152 netif_keep_dst(dev);
fc41cdb3 1153 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
ed961ac2 1154 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
87cd3dca 1155 eth_hw_addr_random(dev);
2d07dc79
JL
1156}
1157
1158static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
1159 [IFLA_GENEVE_ID] = { .type = NLA_U32 },
1160 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
8ed66f0e 1161 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
8760ce58 1162 [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
d8951125 1163 [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
8eb3b995 1164 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
cd7918b3 1165 [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
e305ac6c 1166 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
abe492b4
TH
1167 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
1168 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
1169 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
52d0d404 1170 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
a025fb5f 1171 [IFLA_GENEVE_DF] = { .type = NLA_U8 },
2d07dc79
JL
1172};
1173
a8b8a889
MS
1174static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
1175 struct netlink_ext_ack *extack)
2d07dc79
JL
1176{
1177 if (tb[IFLA_ADDRESS]) {
c5ebc440
GM
1178 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1179 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1180 "Provided link layer address is not Ethernet");
2d07dc79 1181 return -EINVAL;
c5ebc440 1182 }
2d07dc79 1183
c5ebc440
GM
1184 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1185 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1186 "Provided Ethernet address is not unicast");
2d07dc79 1187 return -EADDRNOTAVAIL;
c5ebc440 1188 }
2d07dc79
JL
1189 }
1190
c5ebc440
GM
1191 if (!data) {
1192 NL_SET_ERR_MSG(extack,
1193 "Not enough attributes provided to perform the operation");
2d07dc79 1194 return -EINVAL;
c5ebc440 1195 }
2d07dc79
JL
1196
1197 if (data[IFLA_GENEVE_ID]) {
1198 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1199
c5ebc440
GM
1200 if (vni >= GENEVE_N_VID) {
1201 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
1202 "Geneve ID must be lower than 16777216");
2d07dc79 1203 return -ERANGE;
c5ebc440 1204 }
2d07dc79
JL
1205 }
1206
a025fb5f
SB
1207 if (data[IFLA_GENEVE_DF]) {
1208 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
1209
1210 if (df < 0 || df > GENEVE_DF_MAX) {
1211 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF],
1212 "Invalid DF attribute");
1213 return -EINVAL;
1214 }
1215 }
1216
2d07dc79
JL
1217 return 0;
1218}
1219
371bd106 1220static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
9b4437a5 1221 const struct ip_tunnel_info *info,
371bd106
PS
1222 bool *tun_on_same_port,
1223 bool *tun_collect_md)
1224{
9b4437a5 1225 struct geneve_dev *geneve, *t = NULL;
371bd106
PS
1226
1227 *tun_on_same_port = false;
1228 *tun_collect_md = false;
371bd106 1229 list_for_each_entry(geneve, &gn->geneve_list, next) {
9b4437a5 1230 if (info->key.tp_dst == geneve->info.key.tp_dst) {
371bd106
PS
1231 *tun_collect_md = geneve->collect_md;
1232 *tun_on_same_port = true;
1233 }
9b4437a5 1234 if (info->key.tun_id == geneve->info.key.tun_id &&
1235 info->key.tp_dst == geneve->info.key.tp_dst &&
1236 !memcmp(&info->key.u, &geneve->info.key.u, sizeof(info->key.u)))
371bd106
PS
1237 t = geneve;
1238 }
1239 return t;
1240}
1241
9b4437a5 1242static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
1243{
3fa5f11d
SB
1244 return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
1245 info->key.ttl || info->key.label || info->key.tp_src ||
1246 memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
9b4437a5 1247}
1248
5b861f6b
GM
1249static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
1250 struct ip_tunnel_info *b)
1251{
1252 if (ip_tunnel_info_af(a) == AF_INET)
1253 return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
1254 else
1255 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
1256}
1257
e305ac6c 1258static int geneve_configure(struct net *net, struct net_device *dev,
c5ebc440 1259 struct netlink_ext_ack *extack,
9b4437a5 1260 const struct ip_tunnel_info *info,
52d0d404 1261 bool metadata, bool ipv6_rx_csum,
a025fb5f 1262 bool ttl_inherit, enum ifla_geneve_df df)
2d07dc79
JL
1263{
1264 struct geneve_net *gn = net_generic(net, geneve_net_id);
371bd106
PS
1265 struct geneve_dev *t, *geneve = netdev_priv(dev);
1266 bool tun_collect_md, tun_on_same_port;
184fc8b5 1267 int err, encap_len;
2d07dc79 1268
c5ebc440
GM
1269 if (metadata && !is_tnl_info_zero(info)) {
1270 NL_SET_ERR_MSG(extack,
1271 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
8ed66f0e 1272 return -EINVAL;
c5ebc440 1273 }
2d07dc79
JL
1274
1275 geneve->net = net;
1276 geneve->dev = dev;
1277
9b4437a5 1278 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
371bd106
PS
1279 if (t)
1280 return -EBUSY;
1281
184fc8b5
PA
1282 /* make enough headroom for basic scenario */
1283 encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
9a1c44d9 1284 if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
184fc8b5 1285 encap_len += sizeof(struct iphdr);
91572088
JW
1286 dev->max_mtu -= sizeof(struct iphdr);
1287 } else {
184fc8b5 1288 encap_len += sizeof(struct ipv6hdr);
91572088
JW
1289 dev->max_mtu -= sizeof(struct ipv6hdr);
1290 }
184fc8b5
PA
1291 dev->needed_headroom = encap_len + ETH_HLEN;
1292
371bd106 1293 if (metadata) {
c5ebc440
GM
1294 if (tun_on_same_port) {
1295 NL_SET_ERR_MSG(extack,
1296 "There can be only one externally controlled device on a destination port");
371bd106 1297 return -EPERM;
c5ebc440 1298 }
371bd106 1299 } else {
c5ebc440
GM
1300 if (tun_collect_md) {
1301 NL_SET_ERR_MSG(extack,
1302 "There already exists an externally controlled device on this destination port");
371bd106 1303 return -EPERM;
c5ebc440 1304 }
371bd106
PS
1305 }
1306
9b4437a5 1307 dst_cache_reset(&geneve->info.dst_cache);
1308 geneve->info = *info;
1309 geneve->collect_md = metadata;
1310 geneve->use_udp6_rx_checksums = ipv6_rx_csum;
52d0d404 1311 geneve->ttl_inherit = ttl_inherit;
a025fb5f 1312 geneve->df = df;
468dfffc 1313
2d07dc79
JL
1314 err = register_netdevice(dev);
1315 if (err)
1316 return err;
1317
e305ac6c 1318 list_add(&geneve->next, &gn->geneve_list);
e305ac6c
PS
1319 return 0;
1320}
1321
9b4437a5 1322static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
1323{
1324 memset(info, 0, sizeof(*info));
1325 info->key.tp_dst = htons(dst_port);
1326}
1327
c5ebc440
GM
1328static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
1329 struct netlink_ext_ack *extack,
1330 struct ip_tunnel_info *info, bool *metadata,
52d0d404 1331 bool *use_udp6_rx_checksums, bool *ttl_inherit,
a025fb5f 1332 enum ifla_geneve_df *df, bool changelink)
e305ac6c 1333{
c5ebc440
GM
1334 int attrtype;
1335
1336 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
1337 NL_SET_ERR_MSG(extack,
1338 "Cannot specify both IPv4 and IPv6 Remote addresses");
8ed66f0e 1339 return -EINVAL;
c5ebc440 1340 }
8ed66f0e
JL
1341
1342 if (data[IFLA_GENEVE_REMOTE]) {
c5ebc440
GM
1343 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
1344 attrtype = IFLA_GENEVE_REMOTE;
1345 goto change_notsup;
1346 }
5b861f6b
GM
1347
1348 info->key.u.ipv4.dst =
8ed66f0e 1349 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
9b4437a5 1350
5b861f6b 1351 if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
c5ebc440
GM
1352 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
1353 "Remote IPv4 address cannot be Multicast");
9b4437a5 1354 return -EINVAL;
1355 }
8ed66f0e
JL
1356 }
1357
1358 if (data[IFLA_GENEVE_REMOTE6]) {
4c52a889 1359#if IS_ENABLED(CONFIG_IPV6)
c5ebc440
GM
1360 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
1361 attrtype = IFLA_GENEVE_REMOTE6;
1362 goto change_notsup;
1363 }
5b861f6b
GM
1364
1365 info->mode = IP_TUNNEL_INFO_IPV6;
1366 info->key.u.ipv6.dst =
8ed66f0e
JL
1367 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
1368
5b861f6b 1369 if (ipv6_addr_type(&info->key.u.ipv6.dst) &
8ed66f0e 1370 IPV6_ADDR_LINKLOCAL) {
c5ebc440
GM
1371 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1372 "Remote IPv6 address cannot be link-local");
8ed66f0e
JL
1373 return -EINVAL;
1374 }
5b861f6b 1375 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
c5ebc440
GM
1376 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1377 "Remote IPv6 address cannot be Multicast");
9b4437a5 1378 return -EINVAL;
1379 }
5b861f6b
GM
1380 info->key.tun_flags |= TUNNEL_CSUM;
1381 *use_udp6_rx_checksums = true;
9b4437a5 1382#else
c5ebc440
GM
1383 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1384 "IPv6 support not enabled in the kernel");
9b4437a5 1385 return -EPFNOSUPPORT;
1386#endif
8ed66f0e
JL
1387 }
1388
9b4437a5 1389 if (data[IFLA_GENEVE_ID]) {
1390 __u32 vni;
1391 __u8 tvni[3];
5b861f6b 1392 __be64 tunid;
9b4437a5 1393
e277de5f 1394 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
9b4437a5 1395 tvni[0] = (vni & 0x00ff0000) >> 16;
1396 tvni[1] = (vni & 0x0000ff00) >> 8;
1397 tvni[2] = vni & 0x000000ff;
e305ac6c 1398
5b861f6b 1399 tunid = vni_to_tunnel_id(tvni);
c5ebc440
GM
1400 if (changelink && (tunid != info->key.tun_id)) {
1401 attrtype = IFLA_GENEVE_ID;
1402 goto change_notsup;
1403 }
5b861f6b 1404 info->key.tun_id = tunid;
9b4437a5 1405 }
5b861f6b 1406
a97d97ba
HL
1407 if (data[IFLA_GENEVE_TTL_INHERIT]) {
1408 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
1409 *ttl_inherit = true;
1410 else
1411 *ttl_inherit = false;
1412 } else if (data[IFLA_GENEVE_TTL]) {
5b861f6b 1413 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
a97d97ba
HL
1414 *ttl_inherit = false;
1415 }
52d0d404 1416
d8951125 1417 if (data[IFLA_GENEVE_TOS])
5b861f6b 1418 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
d8951125 1419
a025fb5f
SB
1420 if (data[IFLA_GENEVE_DF])
1421 *df = nla_get_u8(data[IFLA_GENEVE_DF]);
1422
9b4437a5 1423 if (data[IFLA_GENEVE_LABEL]) {
5b861f6b 1424 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
9b4437a5 1425 IPV6_FLOWLABEL_MASK;
c5ebc440
GM
1426 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
1427 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
1428 "Label attribute only applies for IPv6 Geneve devices");
9b4437a5 1429 return -EINVAL;
c5ebc440 1430 }
9b4437a5 1431 }
8eb3b995 1432
5b861f6b 1433 if (data[IFLA_GENEVE_PORT]) {
c5ebc440
GM
1434 if (changelink) {
1435 attrtype = IFLA_GENEVE_PORT;
1436 goto change_notsup;
1437 }
5b861f6b
GM
1438 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
1439 }
2d07dc79 1440
5b861f6b 1441 if (data[IFLA_GENEVE_COLLECT_METADATA]) {
c5ebc440
GM
1442 if (changelink) {
1443 attrtype = IFLA_GENEVE_COLLECT_METADATA;
1444 goto change_notsup;
1445 }
5b861f6b
GM
1446 *metadata = true;
1447 }
2d07dc79 1448
5b861f6b 1449 if (data[IFLA_GENEVE_UDP_CSUM]) {
c5ebc440
GM
1450 if (changelink) {
1451 attrtype = IFLA_GENEVE_UDP_CSUM;
1452 goto change_notsup;
1453 }
5b861f6b
GM
1454 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
1455 info->key.tun_flags |= TUNNEL_CSUM;
1456 }
abe492b4 1457
5b861f6b 1458 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
f9094b76 1459#if IS_ENABLED(CONFIG_IPV6)
c5ebc440
GM
1460 if (changelink) {
1461 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
1462 goto change_notsup;
1463 }
5b861f6b
GM
1464 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
1465 info->key.tun_flags &= ~TUNNEL_CSUM;
f9094b76
HL
1466#else
1467 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
1468 "IPv6 support not enabled in the kernel");
1469 return -EPFNOSUPPORT;
1470#endif
5b861f6b 1471 }
abe492b4 1472
5b861f6b 1473 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
f9094b76 1474#if IS_ENABLED(CONFIG_IPV6)
c5ebc440
GM
1475 if (changelink) {
1476 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
1477 goto change_notsup;
1478 }
5b861f6b
GM
1479 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
1480 *use_udp6_rx_checksums = false;
f9094b76
HL
1481#else
1482 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
1483 "IPv6 support not enabled in the kernel");
1484 return -EPFNOSUPPORT;
1485#endif
5b861f6b
GM
1486 }
1487
1488 return 0;
c5ebc440
GM
1489change_notsup:
1490 NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
1491 "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
1492 return -EOPNOTSUPP;
5b861f6b
GM
1493}
1494
c40e89fd
AK
1495static void geneve_link_config(struct net_device *dev,
1496 struct ip_tunnel_info *info, struct nlattr *tb[])
1497{
1498 struct geneve_dev *geneve = netdev_priv(dev);
1499 int ldev_mtu = 0;
1500
1501 if (tb[IFLA_MTU]) {
1502 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1503 return;
1504 }
1505
1506 switch (ip_tunnel_info_af(info)) {
1507 case AF_INET: {
1508 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
1509 struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
1510
1511 if (!IS_ERR(rt) && rt->dst.dev) {
1512 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
1513 ip_rt_put(rt);
1514 }
1515 break;
1516 }
1517#if IS_ENABLED(CONFIG_IPV6)
1518 case AF_INET6: {
c0a47e44
HL
1519 struct rt6_info *rt;
1520
1521 if (!__in6_dev_get(dev))
1522 break;
1523
1524 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
1525 NULL, 0);
c40e89fd
AK
1526
1527 if (rt && rt->dst.dev)
1528 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
1529 ip6_rt_put(rt);
1530 break;
1531 }
1532#endif
1533 }
1534
1535 if (ldev_mtu <= 0)
1536 return;
1537
1538 geneve_change_mtu(dev, ldev_mtu - info->options_len);
1539}
1540
5b861f6b
GM
1541static int geneve_newlink(struct net *net, struct net_device *dev,
1542 struct nlattr *tb[], struct nlattr *data[],
1543 struct netlink_ext_ack *extack)
1544{
a025fb5f 1545 enum ifla_geneve_df df = GENEVE_DF_UNSET;
5b861f6b
GM
1546 bool use_udp6_rx_checksums = false;
1547 struct ip_tunnel_info info;
52d0d404 1548 bool ttl_inherit = false;
5b861f6b
GM
1549 bool metadata = false;
1550 int err;
1551
1552 init_tnl_info(&info, GENEVE_UDP_PORT);
c5ebc440 1553 err = geneve_nl2info(tb, data, extack, &info, &metadata,
a025fb5f 1554 &use_udp6_rx_checksums, &ttl_inherit, &df, false);
5b861f6b
GM
1555 if (err)
1556 return err;
abe492b4 1557
c40e89fd 1558 err = geneve_configure(net, dev, extack, &info, metadata,
a025fb5f 1559 use_udp6_rx_checksums, ttl_inherit, df);
c40e89fd
AK
1560 if (err)
1561 return err;
1562
1563 geneve_link_config(dev, &info, tb);
1564
1565 return 0;
2d07dc79
JL
1566}
1567
5b861f6b
GM
1568/* Quiesces the geneve device data path for both TX and RX.
1569 *
1570 * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
1571 * So, if we set that socket to NULL under RCU and wait for synchronize_net()
1572 * to complete for the existing set of in-flight packets to be transmitted,
1573 * then we would have quiesced the transmit data path. All the future packets
1574 * will get dropped until we unquiesce the data path.
1575 *
1576 * On receive geneve dereference the geneve_sock stashed in the socket. So,
1577 * if we set that to NULL under RCU and wait for synchronize_net() to
1578 * complete, then we would have quiesced the receive data path.
1579 */
1580static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
1581 struct geneve_sock **gs6)
1582{
1583 *gs4 = rtnl_dereference(geneve->sock4);
1584 rcu_assign_pointer(geneve->sock4, NULL);
1585 if (*gs4)
1586 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
1587#if IS_ENABLED(CONFIG_IPV6)
1588 *gs6 = rtnl_dereference(geneve->sock6);
1589 rcu_assign_pointer(geneve->sock6, NULL);
1590 if (*gs6)
1591 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
1592#else
1593 *gs6 = NULL;
1594#endif
1595 synchronize_net();
1596}
1597
1598/* Resumes the geneve device data path for both TX and RX. */
1599static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
1600 struct geneve_sock __maybe_unused *gs6)
1601{
1602 rcu_assign_pointer(geneve->sock4, gs4);
1603 if (gs4)
1604 rcu_assign_sk_user_data(gs4->sock->sk, gs4);
1605#if IS_ENABLED(CONFIG_IPV6)
1606 rcu_assign_pointer(geneve->sock6, gs6);
1607 if (gs6)
1608 rcu_assign_sk_user_data(gs6->sock->sk, gs6);
1609#endif
1610 synchronize_net();
1611}
1612
1613static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
1614 struct nlattr *data[],
1615 struct netlink_ext_ack *extack)
1616{
1617 struct geneve_dev *geneve = netdev_priv(dev);
1618 struct geneve_sock *gs4, *gs6;
1619 struct ip_tunnel_info info;
1620 bool metadata;
1621 bool use_udp6_rx_checksums;
a025fb5f 1622 enum ifla_geneve_df df;
52d0d404 1623 bool ttl_inherit;
5b861f6b
GM
1624 int err;
1625
1626 /* If the geneve device is configured for metadata (or externally
1627 * controlled, for example, OVS), then nothing can be changed.
1628 */
1629 if (geneve->collect_md)
1630 return -EOPNOTSUPP;
1631
1632 /* Start with the existing info. */
1633 memcpy(&info, &geneve->info, sizeof(info));
1634 metadata = geneve->collect_md;
1635 use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
52d0d404 1636 ttl_inherit = geneve->ttl_inherit;
c5ebc440 1637 err = geneve_nl2info(tb, data, extack, &info, &metadata,
a025fb5f 1638 &use_udp6_rx_checksums, &ttl_inherit, &df, true);
5b861f6b
GM
1639 if (err)
1640 return err;
1641
c40e89fd 1642 if (!geneve_dst_addr_equal(&geneve->info, &info)) {
5b861f6b 1643 dst_cache_reset(&info.dst_cache);
c40e89fd
AK
1644 geneve_link_config(dev, &info, tb);
1645 }
5b861f6b
GM
1646
1647 geneve_quiesce(geneve, &gs4, &gs6);
1648 geneve->info = info;
1649 geneve->collect_md = metadata;
1650 geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
52d0d404 1651 geneve->ttl_inherit = ttl_inherit;
5b861f6b
GM
1652 geneve_unquiesce(geneve, gs4, gs6);
1653
1654 return 0;
1655}
1656
2d07dc79
JL
1657static void geneve_dellink(struct net_device *dev, struct list_head *head)
1658{
1659 struct geneve_dev *geneve = netdev_priv(dev);
1660
2d07dc79
JL
1661 list_del(&geneve->next);
1662 unregister_netdevice_queue(dev, head);
1663}
1664
1665static size_t geneve_get_size(const struct net_device *dev)
1666{
1667 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
8ed66f0e 1668 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
8760ce58 1669 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
d8951125 1670 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
a025fb5f 1671 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
8eb3b995 1672 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
7bbe33ff 1673 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
e305ac6c 1674 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
abe492b4
TH
1675 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
1676 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1677 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
52d0d404 1678 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
2d07dc79
JL
1679 0;
1680}
1681
1682static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
1683{
1684 struct geneve_dev *geneve = netdev_priv(dev);
9b4437a5 1685 struct ip_tunnel_info *info = &geneve->info;
52d0d404 1686 bool ttl_inherit = geneve->ttl_inherit;
fd7eafd0 1687 bool metadata = geneve->collect_md;
9b4437a5 1688 __u8 tmp_vni[3];
2d07dc79
JL
1689 __u32 vni;
1690
9b4437a5 1691 tunnel_id_to_vni(info->key.tun_id, tmp_vni);
1692 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
2d07dc79
JL
1693 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
1694 goto nla_put_failure;
1695
fd7eafd0 1696 if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
8ed66f0e 1697 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
9b4437a5 1698 info->key.u.ipv4.dst))
1699 goto nla_put_failure;
9b4437a5 1700 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
1701 !!(info->key.tun_flags & TUNNEL_CSUM)))
8ed66f0e 1702 goto nla_put_failure;
9b4437a5 1703
8ed66f0e 1704#if IS_ENABLED(CONFIG_IPV6)
fd7eafd0 1705 } else if (!metadata) {
8ed66f0e 1706 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
9b4437a5 1707 &info->key.u.ipv6.dst))
1708 goto nla_put_failure;
9b4437a5 1709 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
1710 !(info->key.tun_flags & TUNNEL_CSUM)))
1711 goto nla_put_failure;
11387fe4 1712#endif
fd7eafd0 1713 }
2d07dc79 1714
9b4437a5 1715 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
1716 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
1717 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
8760ce58
JL
1718 goto nla_put_failure;
1719
a025fb5f
SB
1720 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->df))
1721 goto nla_put_failure;
1722
9b4437a5 1723 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
cd7918b3
PS
1724 goto nla_put_failure;
1725
fd7eafd0 1726 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
f9094b76 1727 goto nla_put_failure;
fd7eafd0 1728
f9094b76 1729#if IS_ENABLED(CONFIG_IPV6)
fd7eafd0
HL
1730 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
1731 !geneve->use_udp6_rx_checksums))
1732 goto nla_put_failure;
f9094b76 1733#endif
fd7eafd0 1734
52d0d404
HL
1735 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
1736 goto nla_put_failure;
1737
2d07dc79
JL
1738 return 0;
1739
1740nla_put_failure:
1741 return -EMSGSIZE;
1742}
1743
1744static struct rtnl_link_ops geneve_link_ops __read_mostly = {
1745 .kind = "geneve",
1746 .maxtype = IFLA_GENEVE_MAX,
1747 .policy = geneve_policy,
1748 .priv_size = sizeof(struct geneve_dev),
1749 .setup = geneve_setup,
1750 .validate = geneve_validate,
1751 .newlink = geneve_newlink,
5b861f6b 1752 .changelink = geneve_changelink,
2d07dc79
JL
1753 .dellink = geneve_dellink,
1754 .get_size = geneve_get_size,
1755 .fill_info = geneve_fill_info,
1756};
1757
e305ac6c
PS
1758struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
1759 u8 name_assign_type, u16 dst_port)
1760{
1761 struct nlattr *tb[IFLA_MAX + 1];
9b4437a5 1762 struct ip_tunnel_info info;
e305ac6c 1763 struct net_device *dev;
106da663 1764 LIST_HEAD(list_kill);
e305ac6c
PS
1765 int err;
1766
1767 memset(tb, 0, sizeof(tb));
1768 dev = rtnl_create_link(net, name, name_assign_type,
d0522f1c 1769 &geneve_link_ops, tb, NULL);
e305ac6c
PS
1770 if (IS_ERR(dev))
1771 return dev;
1772
9b4437a5 1773 init_tnl_info(&info, dst_port);
a025fb5f
SB
1774 err = geneve_configure(net, dev, NULL, &info,
1775 true, true, false, GENEVE_DF_UNSET);
106da663
ND
1776 if (err) {
1777 free_netdev(dev);
1778 return ERR_PTR(err);
1779 }
7e059158
DW
1780
1781 /* openvswitch users expect packet sizes to be unrestricted,
1782 * so set the largest MTU we can.
1783 */
91572088 1784 err = geneve_change_mtu(dev, IP_MAX_MTU);
7e059158
DW
1785 if (err)
1786 goto err;
1787
41009481
ND
1788 err = rtnl_configure_link(dev, NULL);
1789 if (err < 0)
1790 goto err;
1791
e305ac6c 1792 return dev;
9b4437a5 1793err:
106da663
ND
1794 geneve_dellink(dev, &list_kill);
1795 unregister_netdevice_many(&list_kill);
7e059158 1796 return ERR_PTR(err);
e305ac6c
PS
1797}
1798EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
1799
681e683f
HFS
1800static int geneve_netdevice_event(struct notifier_block *unused,
1801 unsigned long event, void *ptr)
1802{
1803 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1804
2d2b13fc 1805 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
04584957 1806 event == NETDEV_UDP_TUNNEL_DROP_INFO) {
2d2b13fc 1807 geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
04584957
SD
1808 } else if (event == NETDEV_UNREGISTER) {
1809 geneve_offload_rx_ports(dev, false);
1810 } else if (event == NETDEV_REGISTER) {
1811 geneve_offload_rx_ports(dev, true);
1812 }
681e683f
HFS
1813
1814 return NOTIFY_DONE;
1815}
1816
1817static struct notifier_block geneve_notifier_block __read_mostly = {
1818 .notifier_call = geneve_netdevice_event,
1819};
1820
2d07dc79
JL
1821static __net_init int geneve_init_net(struct net *net)
1822{
1823 struct geneve_net *gn = net_generic(net, geneve_net_id);
2d07dc79
JL
1824
1825 INIT_LIST_HEAD(&gn->geneve_list);
371bd106 1826 INIT_LIST_HEAD(&gn->sock_list);
2d07dc79
JL
1827 return 0;
1828}
1829
2843a253 1830static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
2d07dc79
JL
1831{
1832 struct geneve_net *gn = net_generic(net, geneve_net_id);
1833 struct geneve_dev *geneve, *next;
1834 struct net_device *dev, *aux;
2d07dc79
JL
1835
1836 /* gather any geneve devices that were moved into this ns */
1837 for_each_netdev_safe(net, dev, aux)
1838 if (dev->rtnl_link_ops == &geneve_link_ops)
2843a253 1839 unregister_netdevice_queue(dev, head);
2d07dc79
JL
1840
1841 /* now gather any other geneve devices that were created in this ns */
1842 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
1843 /* If geneve->dev is in the same netns, it was already added
1844 * to the list by the previous loop.
1845 */
1846 if (!net_eq(dev_net(geneve->dev), net))
2843a253 1847 unregister_netdevice_queue(geneve->dev, head);
2d07dc79
JL
1848 }
1849
2843a253
HY
1850 WARN_ON_ONCE(!list_empty(&gn->sock_list));
1851}
1852
1853static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
1854{
1855 struct net *net;
1856 LIST_HEAD(list);
1857
1858 rtnl_lock();
1859 list_for_each_entry(net, net_list, exit_list)
1860 geneve_destroy_tunnels(net, &list);
1861
2d07dc79
JL
1862 /* unregister the devices gathered above */
1863 unregister_netdevice_many(&list);
1864 rtnl_unlock();
1865}
1866
1867static struct pernet_operations geneve_net_ops = {
1868 .init = geneve_init_net,
2843a253 1869 .exit_batch = geneve_exit_batch_net,
2d07dc79
JL
1870 .id = &geneve_net_id,
1871 .size = sizeof(struct geneve_net),
1872};
1873
1874static int __init geneve_init_module(void)
1875{
1876 int rc;
1877
1878 rc = register_pernet_subsys(&geneve_net_ops);
1879 if (rc)
1880 goto out1;
1881
681e683f 1882 rc = register_netdevice_notifier(&geneve_notifier_block);
2d07dc79
JL
1883 if (rc)
1884 goto out2;
1885
681e683f
HFS
1886 rc = rtnl_link_register(&geneve_link_ops);
1887 if (rc)
1888 goto out3;
1889
2d07dc79 1890 return 0;
681e683f
HFS
1891out3:
1892 unregister_netdevice_notifier(&geneve_notifier_block);
2d07dc79
JL
1893out2:
1894 unregister_pernet_subsys(&geneve_net_ops);
1895out1:
1896 return rc;
1897}
1898late_initcall(geneve_init_module);
1899
1900static void __exit geneve_cleanup_module(void)
1901{
1902 rtnl_link_unregister(&geneve_link_ops);
681e683f 1903 unregister_netdevice_notifier(&geneve_notifier_block);
2d07dc79
JL
1904 unregister_pernet_subsys(&geneve_net_ops);
1905}
1906module_exit(geneve_cleanup_module);
1907
1908MODULE_LICENSE("GPL");
1909MODULE_VERSION(GENEVE_NETDEV_VER);
1910MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1911MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1912MODULE_ALIAS_RTNL_LINK("geneve");