Commit | Line | Data |
---|---|---|
dac09149 BT |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* XDP user-space ring structure | |
423f3832 | 3 | * Copyright(c) 2018 Intel Corporation. |
423f3832 MK |
4 | */ |
5 | ||
6 | #ifndef _LINUX_XSK_QUEUE_H | |
7 | #define _LINUX_XSK_QUEUE_H | |
8 | ||
9 | #include <linux/types.h> | |
10 | #include <linux/if_xdp.h> | |
e61e62b9 | 11 | #include <net/xdp_sock.h> |
2b43470a | 12 | #include <net/xsk_buff_pool.h> |
423f3832 | 13 | |
89e4a376 BT |
14 | #include "xsk.h" |
15 | ||
b3a9e0be BT |
16 | struct xdp_ring { |
17 | u32 producer ____cacheline_aligned_in_smp; | |
c3f01fdc MK |
18 | /* Hinder the adjacent cache prefetcher to prefetch the consumer |
19 | * pointer if the producer pointer is touched and vice versa. | |
20 | */ | |
b8c7aece | 21 | u32 pad1 ____cacheline_aligned_in_smp; |
b3a9e0be | 22 | u32 consumer ____cacheline_aligned_in_smp; |
b8c7aece | 23 | u32 pad2 ____cacheline_aligned_in_smp; |
77cd0d7b | 24 | u32 flags; |
b8c7aece | 25 | u32 pad3 ____cacheline_aligned_in_smp; |
b3a9e0be BT |
26 | }; |
27 | ||
28 | /* Used for the RX and TX queues for packets */ | |
29 | struct xdp_rxtx_ring { | |
30 | struct xdp_ring ptrs; | |
95e486f5 | 31 | struct xdp_desc desc[] ____cacheline_aligned_in_smp; |
b3a9e0be BT |
32 | }; |
33 | ||
34 | /* Used for the fill and completion queues for buffers */ | |
35 | struct xdp_umem_ring { | |
36 | struct xdp_ring ptrs; | |
95e486f5 | 37 | u64 desc[] ____cacheline_aligned_in_smp; |
b3a9e0be BT |
38 | }; |
39 | ||
423f3832 | 40 | struct xsk_queue { |
423f3832 MK |
41 | u32 ring_mask; |
42 | u32 nentries; | |
d7012f05 | 43 | u32 cached_prod; |
c5ed924b | 44 | u32 cached_cons; |
423f3832 MK |
45 | struct xdp_ring *ring; |
46 | u64 invalid_descs; | |
8aa5a335 | 47 | u64 queue_empty_descs; |
9f78bf33 | 48 | size_t ring_vmalloc_size; |
423f3832 MK |
49 | }; |
50 | ||
d5581966 MF |
51 | struct parsed_desc { |
52 | u32 mb; | |
53 | u32 valid; | |
54 | }; | |
55 | ||
a23b3f56 BT |
56 | /* The structure of the shared state of the rings are a simple |
57 | * circular buffer, as outlined in | |
58 | * Documentation/core-api/circular-buffers.rst. For the Rx and | |
59 | * completion ring, the kernel is the producer and user space is the | |
60 | * consumer. For the Tx and fill rings, the kernel is the consumer and | |
61 | * user space is the producer. | |
f63666de MK |
62 | * |
63 | * producer consumer | |
64 | * | |
a23b3f56 | 65 | * if (LOAD ->consumer) { (A) LOAD.acq ->producer (C) |
f63666de | 66 | * STORE $data LOAD $data |
a23b3f56 | 67 | * STORE.rel ->producer (B) STORE.rel ->consumer (D) |
f63666de MK |
68 | * } |
69 | * | |
70 | * (A) pairs with (D), and (B) pairs with (C). | |
71 | * | |
72 | * Starting with (B), it protects the data from being written after | |
73 | * the producer pointer. If this barrier was missing, the consumer | |
74 | * could observe the producer pointer being set and thus load the data | |
75 | * before the producer has written the new data. The consumer would in | |
76 | * this case load the old data. | |
77 | * | |
78 | * (C) protects the consumer from speculatively loading the data before | |
79 | * the producer pointer actually has been read. If we do not have this | |
80 | * barrier, some architectures could load old data as speculative loads | |
81 | * are not discarded as the CPU does not know there is a dependency | |
82 | * between ->producer and data. | |
83 | * | |
84 | * (A) is a control dependency that separates the load of ->consumer | |
85 | * from the stores of $data. In case ->consumer indicates there is no | |
a23b3f56 BT |
86 | * room in the buffer to store $data we do not. The dependency will |
87 | * order both of the stores after the loads. So no barrier is needed. | |
f63666de MK |
88 | * |
89 | * (D) protects the load of the data to be observed to happen after the | |
90 | * store of the consumer pointer. If we did not have this memory | |
91 | * barrier, the producer could observe the consumer pointer being set | |
92 | * and overwrite the data with a new value before the consumer got the | |
93 | * chance to read the old value. The consumer would thus miss reading | |
94 | * the old entry and very likely read the new entry twice, once right | |
95 | * now and again after circling through the ring. | |
96 | */ | |
97 | ||
15d8c916 MK |
98 | /* The operations on the rings are the following: |
99 | * | |
100 | * producer consumer | |
101 | * | |
102 | * RESERVE entries PEEK in the ring for entries | |
103 | * WRITE data into the ring READ data from the ring | |
104 | * SUBMIT entries RELEASE entries | |
105 | * | |
106 | * The producer reserves one or more entries in the ring. It can then | |
107 | * fill in these entries and finally submit them so that they can be | |
108 | * seen and read by the consumer. | |
109 | * | |
110 | * The consumer peeks into the ring to see if the producer has written | |
f1fc8ece | 111 | * any new entries. If so, the consumer can then read these entries |
15d8c916 MK |
112 | * and when it is done reading them release them back to the producer |
113 | * so that the producer can use these slots to fill in new entries. | |
114 | * | |
115 | * The function names below reflect these operations. | |
116 | */ | |
d57d7642 | 117 | |
15d8c916 | 118 | /* Functions that read and validate content from consumer rings. */ |
c497176c | 119 | |
47e4075d | 120 | static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) |
2b43470a BT |
121 | { |
122 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
47e4075d | 123 | u32 idx = cached_cons & q->ring_mask; |
2b43470a | 124 | |
47e4075d MK |
125 | *addr = ring->desc[idx]; |
126 | } | |
2b43470a | 127 | |
47e4075d MK |
128 | static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) |
129 | { | |
130 | if (q->cached_cons != q->cached_prod) { | |
131 | __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr); | |
c05cd364 KL |
132 | return true; |
133 | } | |
134 | ||
2b43470a BT |
135 | return false; |
136 | } | |
35fcde7f | 137 | |
63a64a56 TS |
138 | static inline bool xp_unused_options_set(u32 options) |
139 | { | |
140 | return options & ~XDP_PKT_CONTD; | |
141 | } | |
142 | ||
26062b18 BT |
143 | static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, |
144 | struct xdp_desc *desc) | |
145 | { | |
0c5f4859 | 146 | u64 offset = desc->addr & (pool->chunk_size - 1); |
26062b18 | 147 | |
07428da9 TS |
148 | if (!desc->len) |
149 | return false; | |
150 | ||
0c5f4859 KC |
151 | if (offset + desc->len > pool->chunk_size) |
152 | return false; | |
f654fae4 | 153 | |
0c5f4859 | 154 | if (desc->addr >= pool->addrs_cnt) |
26062b18 BT |
155 | return false; |
156 | ||
63a64a56 | 157 | if (xp_unused_options_set(desc->options)) |
26062b18 BT |
158 | return false; |
159 | return true; | |
160 | } | |
161 | ||
162 | static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, | |
163 | struct xdp_desc *desc) | |
164 | { | |
1ba83f50 | 165 | u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); |
26062b18 | 166 | |
07428da9 TS |
167 | if (!desc->len) |
168 | return false; | |
169 | ||
26062b18 BT |
170 | if (desc->len > pool->chunk_size) |
171 | return false; | |
172 | ||
1ba83f50 | 173 | if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || |
26062b18 BT |
174 | xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) |
175 | return false; | |
176 | ||
63a64a56 | 177 | if (xp_unused_options_set(desc->options)) |
26062b18 BT |
178 | return false; |
179 | return true; | |
180 | } | |
181 | ||
182 | static inline bool xp_validate_desc(struct xsk_buff_pool *pool, | |
183 | struct xdp_desc *desc) | |
184 | { | |
185 | return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) : | |
186 | xp_aligned_validate_desc(pool, desc); | |
187 | } | |
188 | ||
cf24f5a5 TS |
189 | static inline bool xskq_has_descs(struct xsk_queue *q) |
190 | { | |
191 | return q->cached_cons != q->cached_prod; | |
192 | } | |
193 | ||
2b43470a BT |
194 | static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, |
195 | struct xdp_desc *d, | |
1c1efc2a | 196 | struct xsk_buff_pool *pool) |
2b43470a | 197 | { |
1c1efc2a | 198 | if (!xp_validate_desc(pool, d)) { |
35fcde7f MK |
199 | q->invalid_descs++; |
200 | return false; | |
201 | } | |
35fcde7f MK |
202 | return true; |
203 | } | |
204 | ||
03896ef1 MK |
205 | static inline bool xskq_cons_read_desc(struct xsk_queue *q, |
206 | struct xdp_desc *desc, | |
1c1efc2a | 207 | struct xsk_buff_pool *pool) |
35fcde7f | 208 | { |
cf24f5a5 | 209 | if (q->cached_cons != q->cached_prod) { |
35fcde7f | 210 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; |
c5ed924b | 211 | u32 idx = q->cached_cons & q->ring_mask; |
35fcde7f | 212 | |
c34787fc | 213 | *desc = ring->desc[idx]; |
cf24f5a5 | 214 | return xskq_cons_is_valid_desc(q, desc, pool); |
35fcde7f MK |
215 | } |
216 | ||
cf24f5a5 | 217 | q->queue_empty_descs++; |
03896ef1 | 218 | return false; |
35fcde7f MK |
219 | } |
220 | ||
c00c4461 MF |
221 | static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) |
222 | { | |
223 | q->cached_cons += cnt; | |
224 | } | |
225 | ||
d5581966 MF |
226 | static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool, |
227 | struct xdp_desc *desc, struct parsed_desc *parsed) | |
228 | { | |
229 | parsed->valid = xskq_cons_is_valid_desc(q, desc, pool); | |
230 | parsed->mb = xp_mb_desc(desc); | |
231 | } | |
232 | ||
233 | static inline | |
234 | u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, | |
235 | u32 max) | |
9349eb3a MK |
236 | { |
237 | u32 cached_cons = q->cached_cons, nb_entries = 0; | |
d1bc532e | 238 | struct xdp_desc *descs = pool->tx_descs; |
d5581966 | 239 | u32 total_descs = 0, nr_frags = 0; |
9349eb3a | 240 | |
d5581966 MF |
241 | /* track first entry, if stumble upon *any* invalid descriptor, rewind |
242 | * current packet that consists of frags and stop the processing | |
243 | */ | |
9349eb3a MK |
244 | while (cached_cons != q->cached_prod && nb_entries < max) { |
245 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | |
246 | u32 idx = cached_cons & q->ring_mask; | |
d5581966 | 247 | struct parsed_desc parsed; |
9349eb3a MK |
248 | |
249 | descs[nb_entries] = ring->desc[idx]; | |
d5581966 MF |
250 | cached_cons++; |
251 | parse_desc(q, pool, &descs[nb_entries], &parsed); | |
252 | if (unlikely(!parsed.valid)) | |
253 | break; | |
254 | ||
255 | if (likely(!parsed.mb)) { | |
256 | total_descs += (nr_frags + 1); | |
257 | nr_frags = 0; | |
258 | } else { | |
259 | nr_frags++; | |
260 | if (nr_frags == pool->netdev->xdp_zc_max_segs) { | |
261 | nr_frags = 0; | |
262 | break; | |
263 | } | |
9349eb3a | 264 | } |
9349eb3a | 265 | nb_entries++; |
9349eb3a MK |
266 | } |
267 | ||
d5581966 | 268 | cached_cons -= nr_frags; |
c00c4461 MF |
269 | /* Release valid plus any invalid entries */ |
270 | xskq_cons_release_n(q, cached_cons - q->cached_cons); | |
d5581966 | 271 | return total_descs; |
9349eb3a MK |
272 | } |
273 | ||
15d8c916 MK |
274 | /* Functions for consumers */ |
275 | ||
276 | static inline void __xskq_cons_release(struct xsk_queue *q) | |
277 | { | |
a23b3f56 | 278 | smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */ |
15d8c916 MK |
279 | } |
280 | ||
281 | static inline void __xskq_cons_peek(struct xsk_queue *q) | |
282 | { | |
283 | /* Refresh the local pointer */ | |
a23b3f56 | 284 | q->cached_prod = smp_load_acquire(&q->ring->producer); /* C, matches B */ |
15d8c916 MK |
285 | } |
286 | ||
287 | static inline void xskq_cons_get_entries(struct xsk_queue *q) | |
288 | { | |
289 | __xskq_cons_release(q); | |
290 | __xskq_cons_peek(q); | |
291 | } | |
292 | ||
9349eb3a | 293 | static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) |
15d8c916 MK |
294 | { |
295 | u32 entries = q->cached_prod - q->cached_cons; | |
296 | ||
9349eb3a MK |
297 | if (entries >= max) |
298 | return max; | |
15d8c916 MK |
299 | |
300 | __xskq_cons_peek(q); | |
301 | entries = q->cached_prod - q->cached_cons; | |
302 | ||
9349eb3a MK |
303 | return entries >= max ? max : entries; |
304 | } | |
305 | ||
306 | static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) | |
307 | { | |
0fb53aab | 308 | return xskq_cons_nb_entries(q, cnt) >= cnt; |
15d8c916 MK |
309 | } |
310 | ||
2b43470a BT |
311 | static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) |
312 | { | |
313 | if (q->cached_prod == q->cached_cons) | |
314 | xskq_cons_get_entries(q); | |
315 | return xskq_cons_read_addr_unchecked(q, addr); | |
316 | } | |
317 | ||
03896ef1 MK |
318 | static inline bool xskq_cons_peek_desc(struct xsk_queue *q, |
319 | struct xdp_desc *desc, | |
1c1efc2a | 320 | struct xsk_buff_pool *pool) |
35fcde7f | 321 | { |
c5ed924b MK |
322 | if (q->cached_prod == q->cached_cons) |
323 | xskq_cons_get_entries(q); | |
1c1efc2a | 324 | return xskq_cons_read_desc(q, desc, pool); |
35fcde7f MK |
325 | } |
326 | ||
9349eb3a MK |
327 | /* To improve performance in the xskq_cons_release functions, only update local state here. |
328 | * Reflect this to global state when we get new entries from the ring in | |
329 | * xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop. | |
330 | */ | |
15d8c916 MK |
331 | static inline void xskq_cons_release(struct xsk_queue *q) |
332 | { | |
15d8c916 MK |
333 | q->cached_cons++; |
334 | } | |
335 | ||
b7f72a30 TS |
336 | static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt) |
337 | { | |
338 | q->cached_cons -= cnt; | |
339 | } | |
340 | ||
3413f041 XZ |
341 | static inline u32 xskq_cons_present_entries(struct xsk_queue *q) |
342 | { | |
343 | /* No barriers needed since data is not accessed */ | |
344 | return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); | |
345 | } | |
346 | ||
15d8c916 MK |
347 | /* Functions for producers */ |
348 | ||
9349eb3a | 349 | static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) |
15d8c916 MK |
350 | { |
351 | u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
352 | ||
9349eb3a MK |
353 | if (free_entries >= max) |
354 | return max; | |
15d8c916 MK |
355 | |
356 | /* Refresh the local tail pointer */ | |
357 | q->cached_cons = READ_ONCE(q->ring->consumer); | |
358 | free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
359 | ||
9349eb3a MK |
360 | return free_entries >= max ? max : free_entries; |
361 | } | |
362 | ||
363 | static inline bool xskq_prod_is_full(struct xsk_queue *q) | |
364 | { | |
365 | return xskq_prod_nb_free(q, 1) ? false : true; | |
15d8c916 MK |
366 | } |
367 | ||
b7f72a30 | 368 | static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt) |
b1b95cb5 | 369 | { |
b7f72a30 | 370 | q->cached_prod -= cnt; |
b1b95cb5 MK |
371 | } |
372 | ||
15d8c916 MK |
373 | static inline int xskq_prod_reserve(struct xsk_queue *q) |
374 | { | |
375 | if (xskq_prod_is_full(q)) | |
376 | return -ENOSPC; | |
377 | ||
378 | /* A, matches D */ | |
379 | q->cached_prod++; | |
380 | return 0; | |
381 | } | |
382 | ||
383 | static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) | |
384 | { | |
385 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
386 | ||
387 | if (xskq_prod_is_full(q)) | |
388 | return -ENOSPC; | |
389 | ||
390 | /* A, matches D */ | |
391 | ring->desc[q->cached_prod++ & q->ring_mask] = addr; | |
392 | return 0; | |
393 | } | |
394 | ||
c00c4461 MF |
395 | static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, |
396 | u32 nb_entries) | |
9349eb3a MK |
397 | { |
398 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
c00c4461 | 399 | u32 i, cached_prod; |
9349eb3a MK |
400 | |
401 | /* A, matches D */ | |
402 | cached_prod = q->cached_prod; | |
403 | for (i = 0; i < nb_entries; i++) | |
404 | ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr; | |
405 | q->cached_prod = cached_prod; | |
9349eb3a MK |
406 | } |
407 | ||
59e35e55 | 408 | static inline int xskq_prod_reserve_desc(struct xsk_queue *q, |
63a64a56 | 409 | u64 addr, u32 len, u32 flags) |
c497176c BT |
410 | { |
411 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | |
59e35e55 | 412 | u32 idx; |
c497176c | 413 | |
df0ae6f7 | 414 | if (xskq_prod_is_full(q)) |
c6c1f11b | 415 | return -ENOBUFS; |
c497176c | 416 | |
f63666de | 417 | /* A, matches D */ |
d7012f05 | 418 | idx = q->cached_prod++ & q->ring_mask; |
bbff2f32 | 419 | ring->desc[idx].addr = addr; |
c497176c | 420 | ring->desc[idx].len = len; |
63a64a56 | 421 | ring->desc[idx].options = flags; |
c497176c BT |
422 | |
423 | return 0; | |
424 | } | |
425 | ||
15d8c916 | 426 | static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) |
35fcde7f | 427 | { |
a23b3f56 | 428 | smp_store_release(&q->ring->producer, idx); /* B, matches C */ |
15d8c916 MK |
429 | } |
430 | ||
431 | static inline void xskq_prod_submit(struct xsk_queue *q) | |
432 | { | |
433 | __xskq_prod_submit(q, q->cached_prod); | |
434 | } | |
435 | ||
15d8c916 MK |
436 | static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) |
437 | { | |
438 | __xskq_prod_submit(q, q->ring->producer + nb_entries); | |
35fcde7f MK |
439 | } |
440 | ||
59e35e55 | 441 | static inline bool xskq_prod_is_empty(struct xsk_queue *q) |
c497176c | 442 | { |
11cc2d21 MK |
443 | /* No barriers needed since data is not accessed */ |
444 | return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); | |
c497176c BT |
445 | } |
446 | ||
15d8c916 MK |
447 | /* For both producers and consumers */ |
448 | ||
449 | static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) | |
450 | { | |
451 | return q ? q->invalid_descs : 0; | |
452 | } | |
453 | ||
8aa5a335 CL |
454 | static inline u64 xskq_nb_queue_empty_descs(struct xsk_queue *q) |
455 | { | |
456 | return q ? q->queue_empty_descs : 0; | |
457 | } | |
458 | ||
b9b6b68e | 459 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); |
c497176c | 460 | void xskq_destroy(struct xsk_queue *q_ops); |
423f3832 MK |
461 | |
462 | #endif /* _LINUX_XSK_QUEUE_H */ |