Commit | Line | Data |
---|---|---|
da5bab07 DB |
1 | /* |
2 | * IPV4 GSO/GRO offload support | |
3 | * Linux INET implementation | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License | |
7 | * as published by the Free Software Foundation; either version | |
8 | * 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * UDPv4 GSO support | |
11 | */ | |
12 | ||
13 | #include <linux/skbuff.h> | |
14 | #include <net/udp.h> | |
15 | #include <net/protocol.h> | |
16 | ||
b582ef09 | 17 | static DEFINE_SPINLOCK(udp_offload_lock); |
a1d0cd8e | 18 | static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; |
b582ef09 | 19 | |
a664a4f7 SP |
20 | #define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) |
21 | ||
b582ef09 OG |
22 | struct udp_offload_priv { |
23 | struct udp_offload *offload; | |
787d7ac3 | 24 | possible_net_t net; |
b582ef09 OG |
25 | struct rcu_head rcu; |
26 | struct udp_offload_priv __rcu *next; | |
27 | }; | |
28 | ||
8bce6d7d TH |
29 | static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, |
30 | netdev_features_t features, | |
31 | struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, | |
32 | netdev_features_t features), | |
4bcb877d | 33 | __be16 new_protocol, bool is_ipv6) |
155e010e | 34 | { |
dbef491e | 35 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); |
22463876 | 36 | bool remcsum, need_csum, offload_csum, ufo; |
155e010e | 37 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
dbef491e | 38 | struct udphdr *uh = udp_hdr(skb); |
155e010e | 39 | u16 mac_offset = skb->mac_header; |
155e010e | 40 | __be16 protocol = skb->protocol; |
dbef491e | 41 | u16 mac_len = skb->mac_len; |
155e010e | 42 | int udp_offset, outer_hlen; |
08334824 | 43 | __wsum partial; |
155e010e TH |
44 | |
45 | if (unlikely(!pskb_may_pull(skb, tnl_hlen))) | |
46 | goto out; | |
47 | ||
08334824 AD |
48 | /* Adjust partial header checksum to negate old length. |
49 | * We cannot rely on the value contained in uh->len as it is | |
50 | * possible that the actual value exceeds the boundaries of the | |
51 | * 16 bit length field due to the header being added outside of an | |
52 | * IP or IPv6 frame that was already limited to 64K - 1. | |
53 | */ | |
54 | partial = csum_sub(csum_unfold(uh->check), | |
55 | (__force __wsum)htonl(skb->len)); | |
dbef491e AD |
56 | |
57 | /* setup inner skb. */ | |
155e010e | 58 | skb->encapsulation = 0; |
5197f349 | 59 | SKB_GSO_CB(skb)->encap_level = 0; |
155e010e TH |
60 | __skb_pull(skb, tnl_hlen); |
61 | skb_reset_mac_header(skb); | |
62 | skb_set_network_header(skb, skb_inner_network_offset(skb)); | |
63 | skb->mac_len = skb_inner_network_offset(skb); | |
8bce6d7d | 64 | skb->protocol = new_protocol; |
fdaefd62 AD |
65 | |
66 | need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); | |
4bcb877d | 67 | skb->encap_hdr_csum = need_csum; |
fdaefd62 AD |
68 | |
69 | remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); | |
e585f236 | 70 | skb->remcsum_offload = remcsum; |
155e010e | 71 | |
22463876 AD |
72 | ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); |
73 | ||
4bcb877d TH |
74 | /* Try to offload checksum if possible */ |
75 | offload_csum = !!(need_csum && | |
fdaefd62 AD |
76 | (skb->dev->features & |
77 | (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : | |
78 | (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); | |
155e010e | 79 | |
bef3c6c9 AD |
80 | features &= skb->dev->hw_enc_features; |
81 | ||
7fbeffed AD |
82 | /* The only checksum offload we care about from here on out is the |
83 | * outer one so strip the existing checksum feature flags and | |
84 | * instead set the flag based on our outer checksum offload value. | |
85 | */ | |
22463876 | 86 | if (remcsum || ufo) { |
7fbeffed | 87 | features &= ~NETIF_F_CSUM_MASK; |
22463876 | 88 | if (!need_csum || offload_csum) |
7fbeffed AD |
89 | features |= NETIF_F_HW_CSUM; |
90 | } | |
91 | ||
155e010e | 92 | /* segment inner packet. */ |
bef3c6c9 | 93 | segs = gso_inner_segment(skb, features); |
27446442 | 94 | if (IS_ERR_OR_NULL(segs)) { |
155e010e TH |
95 | skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, |
96 | mac_len); | |
97 | goto out; | |
98 | } | |
99 | ||
100 | outer_hlen = skb_tnl_header_len(skb); | |
101 | udp_offset = outer_hlen - tnl_hlen; | |
102 | skb = segs; | |
103 | do { | |
dbef491e | 104 | __be16 len; |
4bcb877d | 105 | |
fdaefd62 | 106 | if (remcsum) |
4bcb877d | 107 | skb->ip_summed = CHECKSUM_NONE; |
fdaefd62 AD |
108 | |
109 | /* Set up inner headers if we are offloading inner checksum */ | |
110 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
4bcb877d TH |
111 | skb_reset_inner_headers(skb); |
112 | skb->encapsulation = 1; | |
113 | } | |
155e010e TH |
114 | |
115 | skb->mac_len = mac_len; | |
4bcb877d | 116 | skb->protocol = protocol; |
155e010e | 117 | |
dbef491e | 118 | __skb_push(skb, outer_hlen); |
155e010e TH |
119 | skb_reset_mac_header(skb); |
120 | skb_set_network_header(skb, mac_len); | |
121 | skb_set_transport_header(skb, udp_offset); | |
dbef491e | 122 | len = htons(skb->len - udp_offset); |
155e010e | 123 | uh = udp_hdr(skb); |
dbef491e | 124 | uh->len = len; |
155e010e | 125 | |
4bcb877d TH |
126 | if (!need_csum) |
127 | continue; | |
128 | ||
08334824 | 129 | uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len)); |
155e010e | 130 | |
fdaefd62 AD |
131 | if (skb->encapsulation || !offload_csum) { |
132 | uh->check = gso_make_checksum(skb, ~uh->check); | |
155e010e TH |
133 | if (uh->check == 0) |
134 | uh->check = CSUM_MANGLED_0; | |
fdaefd62 AD |
135 | } else { |
136 | skb->ip_summed = CHECKSUM_PARTIAL; | |
137 | skb->csum_start = skb_transport_header(skb) - skb->head; | |
138 | skb->csum_offset = offsetof(struct udphdr, check); | |
155e010e | 139 | } |
155e010e TH |
140 | } while ((skb = skb->next)); |
141 | out: | |
142 | return segs; | |
143 | } | |
144 | ||
8bce6d7d TH |
145 | struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, |
146 | netdev_features_t features, | |
147 | bool is_ipv6) | |
148 | { | |
149 | __be16 protocol = skb->protocol; | |
150 | const struct net_offload **offloads; | |
151 | const struct net_offload *ops; | |
152 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
153 | struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, | |
154 | netdev_features_t features); | |
155 | ||
156 | rcu_read_lock(); | |
157 | ||
158 | switch (skb->inner_protocol_type) { | |
159 | case ENCAP_TYPE_ETHER: | |
160 | protocol = skb->inner_protocol; | |
161 | gso_inner_segment = skb_mac_gso_segment; | |
162 | break; | |
163 | case ENCAP_TYPE_IPPROTO: | |
164 | offloads = is_ipv6 ? inet6_offloads : inet_offloads; | |
165 | ops = rcu_dereference(offloads[skb->inner_ipproto]); | |
166 | if (!ops || !ops->callbacks.gso_segment) | |
167 | goto out_unlock; | |
168 | gso_inner_segment = ops->callbacks.gso_segment; | |
169 | break; | |
170 | default: | |
171 | goto out_unlock; | |
172 | } | |
173 | ||
174 | segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment, | |
4bcb877d | 175 | protocol, is_ipv6); |
8bce6d7d TH |
176 | |
177 | out_unlock: | |
178 | rcu_read_unlock(); | |
179 | ||
180 | return segs; | |
181 | } | |
182 | ||
da5bab07 DB |
183 | static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, |
184 | netdev_features_t features) | |
185 | { | |
186 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
187 | unsigned int mss; | |
7a7ffbab | 188 | __wsum csum; |
f71470b3 TH |
189 | struct udphdr *uh; |
190 | struct iphdr *iph; | |
7a7ffbab WCC |
191 | |
192 | if (skb->encapsulation && | |
0f4f4ffa TH |
193 | (skb_shinfo(skb)->gso_type & |
194 | (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { | |
8bce6d7d | 195 | segs = skb_udp_tunnel_segment(skb, features, false); |
7a7ffbab WCC |
196 | goto out; |
197 | } | |
da5bab07 | 198 | |
f71470b3 TH |
199 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
200 | goto out; | |
201 | ||
da5bab07 DB |
202 | mss = skb_shinfo(skb)->gso_size; |
203 | if (unlikely(skb->len <= mss)) | |
204 | goto out; | |
205 | ||
206 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { | |
207 | /* Packet is from an untrusted source, reset gso_segs. */ | |
208 | int type = skb_shinfo(skb)->gso_type; | |
209 | ||
210 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | | |
211 | SKB_GSO_UDP_TUNNEL | | |
0f4f4ffa | 212 | SKB_GSO_UDP_TUNNEL_CSUM | |
e585f236 | 213 | SKB_GSO_TUNNEL_REMCSUM | |
cb32f511 | 214 | SKB_GSO_IPIP | |
59b93b41 | 215 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM) || |
da5bab07 DB |
216 | !(type & (SKB_GSO_UDP)))) |
217 | goto out; | |
218 | ||
219 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); | |
220 | ||
221 | segs = NULL; | |
222 | goto out; | |
223 | } | |
224 | ||
7a7ffbab WCC |
225 | /* Do software UFO. Complete and fill in the UDP checksum as |
226 | * HW cannot do checksum of UDP packets sent as multiple | |
227 | * IP fragments. | |
228 | */ | |
f71470b3 TH |
229 | |
230 | uh = udp_hdr(skb); | |
231 | iph = ip_hdr(skb); | |
232 | ||
233 | uh->check = 0; | |
234 | csum = skb_checksum(skb, 0, skb->len, 0); | |
235 | uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); | |
236 | if (uh->check == 0) | |
237 | uh->check = CSUM_MANGLED_0; | |
238 | ||
7a7ffbab WCC |
239 | skb->ip_summed = CHECKSUM_NONE; |
240 | ||
22463876 AD |
241 | /* If there is no outer header we can fake a checksum offload |
242 | * due to the fact that we have already done the checksum in | |
243 | * software prior to segmenting the frame. | |
244 | */ | |
245 | if (!skb->encap_hdr_csum) | |
246 | features |= NETIF_F_HW_CSUM; | |
247 | ||
da5bab07 DB |
248 | /* Fragment the skb. IP headers of the fragments are updated in |
249 | * inet_gso_segment() | |
250 | */ | |
7a7ffbab | 251 | segs = skb_segment(skb, features); |
da5bab07 DB |
252 | out: |
253 | return segs; | |
254 | } | |
255 | ||
787d7ac3 | 256 | int udp_add_offload(struct net *net, struct udp_offload *uo) |
b582ef09 | 257 | { |
b5aaab12 | 258 | struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); |
b582ef09 OG |
259 | |
260 | if (!new_offload) | |
261 | return -ENOMEM; | |
262 | ||
787d7ac3 | 263 | write_pnet(&new_offload->net, net); |
b582ef09 OG |
264 | new_offload->offload = uo; |
265 | ||
266 | spin_lock(&udp_offload_lock); | |
a664a4f7 SP |
267 | new_offload->next = udp_offload_base; |
268 | rcu_assign_pointer(udp_offload_base, new_offload); | |
b582ef09 OG |
269 | spin_unlock(&udp_offload_lock); |
270 | ||
271 | return 0; | |
272 | } | |
273 | EXPORT_SYMBOL(udp_add_offload); | |
274 | ||
275 | static void udp_offload_free_routine(struct rcu_head *head) | |
276 | { | |
277 | struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); | |
278 | kfree(ou_priv); | |
279 | } | |
280 | ||
281 | void udp_del_offload(struct udp_offload *uo) | |
282 | { | |
283 | struct udp_offload_priv __rcu **head = &udp_offload_base; | |
284 | struct udp_offload_priv *uo_priv; | |
285 | ||
286 | spin_lock(&udp_offload_lock); | |
287 | ||
a664a4f7 | 288 | uo_priv = udp_deref_protected(*head); |
b582ef09 | 289 | for (; uo_priv != NULL; |
a664a4f7 | 290 | uo_priv = udp_deref_protected(*head)) { |
b582ef09 | 291 | if (uo_priv->offload == uo) { |
a664a4f7 SP |
292 | rcu_assign_pointer(*head, |
293 | udp_deref_protected(uo_priv->next)); | |
b582ef09 OG |
294 | goto unlock; |
295 | } | |
296 | head = &uo_priv->next; | |
297 | } | |
a1d0cd8e | 298 | pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); |
b582ef09 OG |
299 | unlock: |
300 | spin_unlock(&udp_offload_lock); | |
00db4124 | 301 | if (uo_priv) |
b582ef09 OG |
302 | call_rcu(&uo_priv->rcu, udp_offload_free_routine); |
303 | } | |
304 | EXPORT_SYMBOL(udp_del_offload); | |
305 | ||
57c67ff4 TH |
306 | struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, |
307 | struct udphdr *uh) | |
b582ef09 OG |
308 | { |
309 | struct udp_offload_priv *uo_priv; | |
310 | struct sk_buff *p, **pp = NULL; | |
57c67ff4 TH |
311 | struct udphdr *uh2; |
312 | unsigned int off = skb_gro_offset(skb); | |
b582ef09 OG |
313 | int flush = 1; |
314 | ||
fac8e0f5 | 315 | if (NAPI_GRO_CB(skb)->encap_mark || |
662880f4 TH |
316 | (skb->ip_summed != CHECKSUM_PARTIAL && |
317 | NAPI_GRO_CB(skb)->csum_cnt == 0 && | |
318 | !NAPI_GRO_CB(skb)->csum_valid)) | |
b582ef09 OG |
319 | goto out; |
320 | ||
fac8e0f5 JG |
321 | /* mark that this skb passed once through the tunnel gro layer */ |
322 | NAPI_GRO_CB(skb)->encap_mark = 1; | |
b582ef09 OG |
323 | |
324 | rcu_read_lock(); | |
325 | uo_priv = rcu_dereference(udp_offload_base); | |
326 | for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { | |
787d7ac3 HFS |
327 | if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && |
328 | uo_priv->offload->port == uh->dest && | |
b582ef09 OG |
329 | uo_priv->offload->callbacks.gro_receive) |
330 | goto unflush; | |
331 | } | |
332 | goto out_unlock; | |
333 | ||
334 | unflush: | |
335 | flush = 0; | |
336 | ||
337 | for (p = *head; p; p = p->next) { | |
338 | if (!NAPI_GRO_CB(p)->same_flow) | |
339 | continue; | |
340 | ||
341 | uh2 = (struct udphdr *)(p->data + off); | |
57c67ff4 TH |
342 | |
343 | /* Match ports and either checksums are either both zero | |
344 | * or nonzero. | |
345 | */ | |
346 | if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || | |
347 | (!uh->check ^ !uh2->check)) { | |
b582ef09 OG |
348 | NAPI_GRO_CB(p)->same_flow = 0; |
349 | continue; | |
350 | } | |
351 | } | |
352 | ||
353 | skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ | |
6bae1d4c | 354 | skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); |
afe93325 | 355 | NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; |
a2b12f3c TH |
356 | pp = uo_priv->offload->callbacks.gro_receive(head, skb, |
357 | uo_priv->offload); | |
b582ef09 OG |
358 | |
359 | out_unlock: | |
360 | rcu_read_unlock(); | |
361 | out: | |
362 | NAPI_GRO_CB(skb)->flush |= flush; | |
363 | return pp; | |
364 | } | |
365 | ||
57c67ff4 TH |
366 | static struct sk_buff **udp4_gro_receive(struct sk_buff **head, |
367 | struct sk_buff *skb) | |
368 | { | |
369 | struct udphdr *uh = udp_gro_udphdr(skb); | |
370 | ||
2abb7cdc TH |
371 | if (unlikely(!uh)) |
372 | goto flush; | |
57c67ff4 | 373 | |
2abb7cdc | 374 | /* Don't bother verifying checksum if we're going to flush anyway. */ |
2d8f7e2c | 375 | if (NAPI_GRO_CB(skb)->flush) |
2abb7cdc TH |
376 | goto skip; |
377 | ||
378 | if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, | |
379 | inet_gro_compute_pseudo)) | |
380 | goto flush; | |
381 | else if (uh->check) | |
382 | skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, | |
383 | inet_gro_compute_pseudo); | |
384 | skip: | |
efc98d08 | 385 | NAPI_GRO_CB(skb)->is_ipv6 = 0; |
57c67ff4 | 386 | return udp_gro_receive(head, skb, uh); |
2abb7cdc TH |
387 | |
388 | flush: | |
389 | NAPI_GRO_CB(skb)->flush = 1; | |
390 | return NULL; | |
57c67ff4 TH |
391 | } |
392 | ||
393 | int udp_gro_complete(struct sk_buff *skb, int nhoff) | |
b582ef09 OG |
394 | { |
395 | struct udp_offload_priv *uo_priv; | |
396 | __be16 newlen = htons(skb->len - nhoff); | |
397 | struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | |
398 | int err = -ENOSYS; | |
399 | ||
400 | uh->len = newlen; | |
401 | ||
402 | rcu_read_lock(); | |
403 | ||
404 | uo_priv = rcu_dereference(udp_offload_base); | |
405 | for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { | |
787d7ac3 HFS |
406 | if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && |
407 | uo_priv->offload->port == uh->dest && | |
b582ef09 OG |
408 | uo_priv->offload->callbacks.gro_complete) |
409 | break; | |
410 | } | |
411 | ||
00db4124 | 412 | if (uo_priv) { |
afe93325 | 413 | NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; |
a2b12f3c TH |
414 | err = uo_priv->offload->callbacks.gro_complete(skb, |
415 | nhoff + sizeof(struct udphdr), | |
416 | uo_priv->offload); | |
afe93325 | 417 | } |
b582ef09 OG |
418 | |
419 | rcu_read_unlock(); | |
6db93ea1 TH |
420 | |
421 | if (skb->remcsum_offload) | |
422 | skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; | |
423 | ||
424 | skb->encapsulation = 1; | |
425 | skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); | |
426 | ||
b582ef09 OG |
427 | return err; |
428 | } | |
429 | ||
72bb17b3 | 430 | static int udp4_gro_complete(struct sk_buff *skb, int nhoff) |
57c67ff4 TH |
431 | { |
432 | const struct iphdr *iph = ip_hdr(skb); | |
433 | struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | |
434 | ||
6db93ea1 TH |
435 | if (uh->check) { |
436 | skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; | |
57c67ff4 TH |
437 | uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, |
438 | iph->daddr, 0); | |
6db93ea1 TH |
439 | } else { |
440 | skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; | |
441 | } | |
57c67ff4 TH |
442 | |
443 | return udp_gro_complete(skb, nhoff); | |
444 | } | |
445 | ||
da5bab07 DB |
446 | static const struct net_offload udpv4_offload = { |
447 | .callbacks = { | |
da5bab07 | 448 | .gso_segment = udp4_ufo_fragment, |
57c67ff4 TH |
449 | .gro_receive = udp4_gro_receive, |
450 | .gro_complete = udp4_gro_complete, | |
da5bab07 DB |
451 | }, |
452 | }; | |
453 | ||
454 | int __init udpv4_offload_init(void) | |
455 | { | |
456 | return inet_add_offload(&udpv4_offload, IPPROTO_UDP); | |
457 | } |