Commit | Line | Data |
---|---|---|
0b5e8b8e AZ |
1 | /* |
2 | * Geneve: Generic Network Virtualization Encapsulation | |
3 | * | |
4 | * Copyright (c) 2014 Nicira, Inc. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/types.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/errno.h> | |
18 | #include <linux/slab.h> | |
19 | #include <linux/skbuff.h> | |
20 | #include <linux/rculist.h> | |
21 | #include <linux/netdevice.h> | |
22 | #include <linux/in.h> | |
23 | #include <linux/ip.h> | |
24 | #include <linux/udp.h> | |
25 | #include <linux/igmp.h> | |
26 | #include <linux/etherdevice.h> | |
27 | #include <linux/if_ether.h> | |
28 | #include <linux/if_vlan.h> | |
29 | #include <linux/hash.h> | |
30 | #include <linux/ethtool.h> | |
31 | #include <net/arp.h> | |
32 | #include <net/ndisc.h> | |
33 | #include <net/ip.h> | |
34 | #include <net/ip_tunnels.h> | |
35 | #include <net/icmp.h> | |
36 | #include <net/udp.h> | |
37 | #include <net/rtnetlink.h> | |
38 | #include <net/route.h> | |
39 | #include <net/dsfield.h> | |
40 | #include <net/inet_ecn.h> | |
41 | #include <net/net_namespace.h> | |
42 | #include <net/netns/generic.h> | |
43 | #include <net/geneve.h> | |
44 | #include <net/protocol.h> | |
45 | #include <net/udp_tunnel.h> | |
46 | #if IS_ENABLED(CONFIG_IPV6) | |
47 | #include <net/ipv6.h> | |
48 | #include <net/addrconf.h> | |
49 | #include <net/ip6_tunnel.h> | |
50 | #include <net/ip6_checksum.h> | |
51 | #endif | |
52 | ||
53 | #define PORT_HASH_BITS 8 | |
54 | #define PORT_HASH_SIZE (1<<PORT_HASH_BITS) | |
55 | ||
56 | /* per-network namespace private data for this module */ | |
57 | struct geneve_net { | |
58 | struct hlist_head sock_list[PORT_HASH_SIZE]; | |
59 | spinlock_t sock_lock; /* Protects sock_list */ | |
60 | }; | |
61 | ||
62 | static int geneve_net_id; | |
63 | ||
64 | static struct workqueue_struct *geneve_wq; | |
65 | ||
66 | static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) | |
67 | { | |
68 | return (struct genevehdr *)(udp_hdr(skb) + 1); | |
69 | } | |
70 | ||
71 | static struct hlist_head *gs_head(struct net *net, __be16 port) | |
72 | { | |
73 | struct geneve_net *gn = net_generic(net, geneve_net_id); | |
74 | ||
75 | return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; | |
76 | } | |
77 | ||
78 | /* Find geneve socket based on network namespace and UDP port */ | |
79 | static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) | |
80 | { | |
81 | struct geneve_sock *gs; | |
82 | ||
83 | hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) { | |
84 | if (inet_sk(gs->sock->sk)->inet_sport == port) | |
85 | return gs; | |
86 | } | |
87 | ||
88 | return NULL; | |
89 | } | |
90 | ||
91 | static void geneve_build_header(struct genevehdr *geneveh, | |
92 | __be16 tun_flags, u8 vni[3], | |
93 | u8 options_len, u8 *options) | |
94 | { | |
95 | geneveh->ver = GENEVE_VER; | |
96 | geneveh->opt_len = options_len / 4; | |
97 | geneveh->oam = !!(tun_flags & TUNNEL_OAM); | |
98 | geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); | |
99 | geneveh->rsvd1 = 0; | |
100 | memcpy(geneveh->vni, vni, 3); | |
101 | geneveh->proto_type = htons(ETH_P_TEB); | |
102 | geneveh->rsvd2 = 0; | |
103 | ||
104 | memcpy(geneveh->options, options, options_len); | |
105 | } | |
106 | ||
f4e715c3 | 107 | /* Transmit a fully formatted Geneve frame. |
0b5e8b8e AZ |
108 | * |
109 | * When calling this function. The skb->data should point | |
110 | * to the geneve header which is fully formed. | |
111 | * | |
112 | * This function will add other UDP tunnel headers. | |
113 | */ | |
114 | int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, | |
115 | struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, | |
116 | __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, | |
117 | __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, | |
118 | bool xnet) | |
119 | { | |
120 | struct genevehdr *gnvh; | |
121 | int min_headroom; | |
122 | int err; | |
123 | ||
124 | skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); | |
125 | ||
126 | min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len | |
127 | + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) | |
128 | + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); | |
129 | ||
130 | err = skb_cow_head(skb, min_headroom); | |
131 | if (unlikely(err)) | |
132 | return err; | |
133 | ||
5968250c JP |
134 | skb = vlan_hwaccel_push_inside(skb); |
135 | if (unlikely(!skb)) | |
136 | return -ENOMEM; | |
0b5e8b8e AZ |
137 | |
138 | gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); | |
139 | geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); | |
140 | ||
45cac46e JG |
141 | skb_set_inner_protocol(skb, htons(ETH_P_TEB)); |
142 | ||
0b5e8b8e AZ |
143 | return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, |
144 | tos, ttl, df, src_port, dst_port, xnet); | |
145 | } | |
146 | EXPORT_SYMBOL_GPL(geneve_xmit_skb); | |
147 | ||
148 | static void geneve_notify_add_rx_port(struct geneve_sock *gs) | |
149 | { | |
150 | struct sock *sk = gs->sock->sk; | |
151 | sa_family_t sa_family = sk->sk_family; | |
152 | int err; | |
153 | ||
154 | if (sa_family == AF_INET) { | |
155 | err = udp_add_offload(&gs->udp_offloads); | |
156 | if (err) | |
157 | pr_warn("geneve: udp_add_offload failed with status %d\n", | |
158 | err); | |
159 | } | |
160 | } | |
161 | ||
7ed767f7 JG |
162 | static void geneve_notify_del_rx_port(struct geneve_sock *gs) |
163 | { | |
164 | struct sock *sk = gs->sock->sk; | |
165 | sa_family_t sa_family = sk->sk_family; | |
166 | ||
167 | if (sa_family == AF_INET) | |
168 | udp_del_offload(&gs->udp_offloads); | |
169 | } | |
170 | ||
0b5e8b8e AZ |
171 | /* Callback from net/ipv4/udp.c to receive packets */ |
172 | static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) | |
173 | { | |
174 | struct genevehdr *geneveh; | |
175 | struct geneve_sock *gs; | |
176 | int opts_len; | |
177 | ||
178 | /* Need Geneve and inner Ethernet header to be present */ | |
179 | if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) | |
180 | goto error; | |
181 | ||
182 | /* Return packets with reserved bits set */ | |
183 | geneveh = geneve_hdr(skb); | |
184 | ||
185 | if (unlikely(geneveh->ver != GENEVE_VER)) | |
186 | goto error; | |
187 | ||
188 | if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) | |
189 | goto error; | |
190 | ||
191 | opts_len = geneveh->opt_len * 4; | |
192 | if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, | |
193 | htons(ETH_P_TEB))) | |
194 | goto drop; | |
195 | ||
196 | gs = rcu_dereference_sk_user_data(sk); | |
197 | if (!gs) | |
198 | goto drop; | |
199 | ||
200 | gs->rcv(gs, skb); | |
201 | return 0; | |
202 | ||
203 | drop: | |
204 | /* Consume bad packet */ | |
205 | kfree_skb(skb); | |
206 | return 0; | |
207 | ||
208 | error: | |
209 | /* Let the UDP layer deal with the skb */ | |
210 | return 1; | |
211 | } | |
212 | ||
213 | static void geneve_del_work(struct work_struct *work) | |
214 | { | |
215 | struct geneve_sock *gs = container_of(work, struct geneve_sock, | |
216 | del_work); | |
217 | ||
218 | udp_tunnel_sock_release(gs->sock); | |
219 | kfree_rcu(gs, rcu); | |
220 | } | |
221 | ||
222 | static struct socket *geneve_create_sock(struct net *net, bool ipv6, | |
223 | __be16 port) | |
224 | { | |
225 | struct socket *sock; | |
226 | struct udp_port_cfg udp_conf; | |
227 | int err; | |
228 | ||
229 | memset(&udp_conf, 0, sizeof(udp_conf)); | |
230 | ||
231 | if (ipv6) { | |
232 | udp_conf.family = AF_INET6; | |
233 | } else { | |
234 | udp_conf.family = AF_INET; | |
42350dca | 235 | udp_conf.local_ip.s_addr = htonl(INADDR_ANY); |
0b5e8b8e AZ |
236 | } |
237 | ||
238 | udp_conf.local_udp_port = port; | |
239 | ||
240 | /* Open UDP socket */ | |
241 | err = udp_sock_create(net, &udp_conf, &sock); | |
242 | if (err < 0) | |
243 | return ERR_PTR(err); | |
244 | ||
245 | return sock; | |
246 | } | |
247 | ||
248 | /* Create new listen socket if needed */ | |
249 | static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, | |
250 | geneve_rcv_t *rcv, void *data, | |
251 | bool ipv6) | |
252 | { | |
253 | struct geneve_net *gn = net_generic(net, geneve_net_id); | |
254 | struct geneve_sock *gs; | |
255 | struct socket *sock; | |
256 | struct udp_tunnel_sock_cfg tunnel_cfg; | |
257 | ||
258 | gs = kzalloc(sizeof(*gs), GFP_KERNEL); | |
259 | if (!gs) | |
260 | return ERR_PTR(-ENOMEM); | |
261 | ||
262 | INIT_WORK(&gs->del_work, geneve_del_work); | |
263 | ||
264 | sock = geneve_create_sock(net, ipv6, port); | |
265 | if (IS_ERR(sock)) { | |
266 | kfree(gs); | |
267 | return ERR_CAST(sock); | |
268 | } | |
269 | ||
270 | gs->sock = sock; | |
271 | atomic_set(&gs->refcnt, 1); | |
272 | gs->rcv = rcv; | |
273 | gs->rcv_data = data; | |
274 | ||
275 | /* Initialize the geneve udp offloads structure */ | |
276 | gs->udp_offloads.port = port; | |
277 | gs->udp_offloads.callbacks.gro_receive = NULL; | |
278 | gs->udp_offloads.callbacks.gro_complete = NULL; | |
279 | ||
280 | spin_lock(&gn->sock_lock); | |
281 | hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); | |
282 | geneve_notify_add_rx_port(gs); | |
283 | spin_unlock(&gn->sock_lock); | |
284 | ||
285 | /* Mark socket as an encapsulation socket */ | |
286 | tunnel_cfg.sk_user_data = gs; | |
287 | tunnel_cfg.encap_type = 1; | |
288 | tunnel_cfg.encap_rcv = geneve_udp_encap_recv; | |
289 | tunnel_cfg.encap_destroy = NULL; | |
290 | setup_udp_tunnel_sock(net, sock, &tunnel_cfg); | |
291 | ||
292 | return gs; | |
293 | } | |
294 | ||
295 | struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, | |
296 | geneve_rcv_t *rcv, void *data, | |
297 | bool no_share, bool ipv6) | |
298 | { | |
12069401 | 299 | struct geneve_net *gn = net_generic(net, geneve_net_id); |
0b5e8b8e AZ |
300 | struct geneve_sock *gs; |
301 | ||
302 | gs = geneve_socket_create(net, port, rcv, data, ipv6); | |
303 | if (!IS_ERR(gs)) | |
304 | return gs; | |
305 | ||
306 | if (no_share) /* Return error if sharing is not allowed. */ | |
307 | return ERR_PTR(-EINVAL); | |
308 | ||
12069401 | 309 | spin_lock(&gn->sock_lock); |
0b5e8b8e | 310 | gs = geneve_find_sock(net, port); |
12069401 JG |
311 | if (gs && ((gs->rcv != rcv) || |
312 | !atomic_add_unless(&gs->refcnt, 1, 0))) | |
0b5e8b8e | 313 | gs = ERR_PTR(-EBUSY); |
12069401 JG |
314 | spin_unlock(&gn->sock_lock); |
315 | ||
316 | if (!gs) | |
0b5e8b8e | 317 | gs = ERR_PTR(-EINVAL); |
0b5e8b8e AZ |
318 | |
319 | return gs; | |
320 | } | |
321 | EXPORT_SYMBOL_GPL(geneve_sock_add); | |
322 | ||
323 | void geneve_sock_release(struct geneve_sock *gs) | |
324 | { | |
7ed767f7 JG |
325 | struct net *net = sock_net(gs->sock->sk); |
326 | struct geneve_net *gn = net_generic(net, geneve_net_id); | |
327 | ||
0b5e8b8e AZ |
328 | if (!atomic_dec_and_test(&gs->refcnt)) |
329 | return; | |
330 | ||
7ed767f7 JG |
331 | spin_lock(&gn->sock_lock); |
332 | hlist_del_rcu(&gs->hlist); | |
333 | geneve_notify_del_rx_port(gs); | |
334 | spin_unlock(&gn->sock_lock); | |
335 | ||
0b5e8b8e AZ |
336 | queue_work(geneve_wq, &gs->del_work); |
337 | } | |
338 | EXPORT_SYMBOL_GPL(geneve_sock_release); | |
339 | ||
340 | static __net_init int geneve_init_net(struct net *net) | |
341 | { | |
342 | struct geneve_net *gn = net_generic(net, geneve_net_id); | |
343 | unsigned int h; | |
344 | ||
345 | spin_lock_init(&gn->sock_lock); | |
346 | ||
347 | for (h = 0; h < PORT_HASH_SIZE; ++h) | |
348 | INIT_HLIST_HEAD(&gn->sock_list[h]); | |
349 | ||
350 | return 0; | |
351 | } | |
352 | ||
353 | static struct pernet_operations geneve_net_ops = { | |
354 | .init = geneve_init_net, | |
355 | .exit = NULL, | |
356 | .id = &geneve_net_id, | |
357 | .size = sizeof(struct geneve_net), | |
358 | }; | |
359 | ||
360 | static int __init geneve_init_module(void) | |
361 | { | |
362 | int rc; | |
363 | ||
364 | geneve_wq = alloc_workqueue("geneve", 0, 0); | |
365 | if (!geneve_wq) | |
366 | return -ENOMEM; | |
367 | ||
368 | rc = register_pernet_subsys(&geneve_net_ops); | |
369 | if (rc) | |
370 | return rc; | |
371 | ||
372 | pr_info("Geneve driver\n"); | |
373 | ||
374 | return 0; | |
375 | } | |
376 | late_initcall(geneve_init_module); | |
377 | ||
378 | static void __exit geneve_cleanup_module(void) | |
379 | { | |
380 | destroy_workqueue(geneve_wq); | |
d3ca9eaf | 381 | unregister_pernet_subsys(&geneve_net_ops); |
0b5e8b8e AZ |
382 | } |
383 | module_exit(geneve_cleanup_module); | |
384 | ||
385 | MODULE_LICENSE("GPL"); | |
386 | MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>"); | |
387 | MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic"); | |
388 | MODULE_ALIAS_RTNL_LINK("geneve"); |