Commit | Line | Data |
---|---|---|
dac09149 BT |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* XDP user-space ring structure | |
423f3832 | 3 | * Copyright(c) 2018 Intel Corporation. |
423f3832 MK |
4 | */ |
5 | ||
6 | #ifndef _LINUX_XSK_QUEUE_H | |
7 | #define _LINUX_XSK_QUEUE_H | |
8 | ||
9 | #include <linux/types.h> | |
10 | #include <linux/if_xdp.h> | |
e61e62b9 | 11 | #include <net/xdp_sock.h> |
2b43470a | 12 | #include <net/xsk_buff_pool.h> |
423f3832 | 13 | |
89e4a376 BT |
14 | #include "xsk.h" |
15 | ||
b3a9e0be BT |
16 | struct xdp_ring { |
17 | u32 producer ____cacheline_aligned_in_smp; | |
c3f01fdc MK |
18 | /* Hinder the adjacent cache prefetcher to prefetch the consumer |
19 | * pointer if the producer pointer is touched and vice versa. | |
20 | */ | |
b8c7aece | 21 | u32 pad1 ____cacheline_aligned_in_smp; |
b3a9e0be | 22 | u32 consumer ____cacheline_aligned_in_smp; |
b8c7aece | 23 | u32 pad2 ____cacheline_aligned_in_smp; |
77cd0d7b | 24 | u32 flags; |
b8c7aece | 25 | u32 pad3 ____cacheline_aligned_in_smp; |
b3a9e0be BT |
26 | }; |
27 | ||
28 | /* Used for the RX and TX queues for packets */ | |
29 | struct xdp_rxtx_ring { | |
30 | struct xdp_ring ptrs; | |
95e486f5 | 31 | struct xdp_desc desc[] ____cacheline_aligned_in_smp; |
b3a9e0be BT |
32 | }; |
33 | ||
34 | /* Used for the fill and completion queues for buffers */ | |
35 | struct xdp_umem_ring { | |
36 | struct xdp_ring ptrs; | |
95e486f5 | 37 | u64 desc[] ____cacheline_aligned_in_smp; |
b3a9e0be BT |
38 | }; |
39 | ||
423f3832 | 40 | struct xsk_queue { |
423f3832 MK |
41 | u32 ring_mask; |
42 | u32 nentries; | |
d7012f05 | 43 | u32 cached_prod; |
c5ed924b | 44 | u32 cached_cons; |
423f3832 MK |
45 | struct xdp_ring *ring; |
46 | u64 invalid_descs; | |
8aa5a335 | 47 | u64 queue_empty_descs; |
423f3832 MK |
48 | }; |
49 | ||
a23b3f56 BT |
50 | /* The structure of the shared state of the rings are a simple |
51 | * circular buffer, as outlined in | |
52 | * Documentation/core-api/circular-buffers.rst. For the Rx and | |
53 | * completion ring, the kernel is the producer and user space is the | |
54 | * consumer. For the Tx and fill rings, the kernel is the consumer and | |
55 | * user space is the producer. | |
f63666de MK |
56 | * |
57 | * producer consumer | |
58 | * | |
a23b3f56 | 59 | * if (LOAD ->consumer) { (A) LOAD.acq ->producer (C) |
f63666de | 60 | * STORE $data LOAD $data |
a23b3f56 | 61 | * STORE.rel ->producer (B) STORE.rel ->consumer (D) |
f63666de MK |
62 | * } |
63 | * | |
64 | * (A) pairs with (D), and (B) pairs with (C). | |
65 | * | |
66 | * Starting with (B), it protects the data from being written after | |
67 | * the producer pointer. If this barrier was missing, the consumer | |
68 | * could observe the producer pointer being set and thus load the data | |
69 | * before the producer has written the new data. The consumer would in | |
70 | * this case load the old data. | |
71 | * | |
72 | * (C) protects the consumer from speculatively loading the data before | |
73 | * the producer pointer actually has been read. If we do not have this | |
74 | * barrier, some architectures could load old data as speculative loads | |
75 | * are not discarded as the CPU does not know there is a dependency | |
76 | * between ->producer and data. | |
77 | * | |
78 | * (A) is a control dependency that separates the load of ->consumer | |
79 | * from the stores of $data. In case ->consumer indicates there is no | |
a23b3f56 BT |
80 | * room in the buffer to store $data we do not. The dependency will |
81 | * order both of the stores after the loads. So no barrier is needed. | |
f63666de MK |
82 | * |
83 | * (D) protects the load of the data to be observed to happen after the | |
84 | * store of the consumer pointer. If we did not have this memory | |
85 | * barrier, the producer could observe the consumer pointer being set | |
86 | * and overwrite the data with a new value before the consumer got the | |
87 | * chance to read the old value. The consumer would thus miss reading | |
88 | * the old entry and very likely read the new entry twice, once right | |
89 | * now and again after circling through the ring. | |
90 | */ | |
91 | ||
15d8c916 MK |
92 | /* The operations on the rings are the following: |
93 | * | |
94 | * producer consumer | |
95 | * | |
96 | * RESERVE entries PEEK in the ring for entries | |
97 | * WRITE data into the ring READ data from the ring | |
98 | * SUBMIT entries RELEASE entries | |
99 | * | |
100 | * The producer reserves one or more entries in the ring. It can then | |
101 | * fill in these entries and finally submit them so that they can be | |
102 | * seen and read by the consumer. | |
103 | * | |
104 | * The consumer peeks into the ring to see if the producer has written | |
f1fc8ece | 105 | * any new entries. If so, the consumer can then read these entries |
15d8c916 MK |
106 | * and when it is done reading them release them back to the producer |
107 | * so that the producer can use these slots to fill in new entries. | |
108 | * | |
109 | * The function names below reflect these operations. | |
110 | */ | |
d57d7642 | 111 | |
15d8c916 | 112 | /* Functions that read and validate content from consumer rings. */ |
c497176c | 113 | |
47e4075d | 114 | static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) |
2b43470a BT |
115 | { |
116 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
47e4075d | 117 | u32 idx = cached_cons & q->ring_mask; |
2b43470a | 118 | |
47e4075d MK |
119 | *addr = ring->desc[idx]; |
120 | } | |
2b43470a | 121 | |
47e4075d MK |
122 | static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) |
123 | { | |
124 | if (q->cached_cons != q->cached_prod) { | |
125 | __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr); | |
c05cd364 KL |
126 | return true; |
127 | } | |
128 | ||
2b43470a BT |
129 | return false; |
130 | } | |
35fcde7f | 131 | |
26062b18 BT |
132 | static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, |
133 | struct xdp_desc *desc) | |
134 | { | |
f654fae4 | 135 | u64 chunk, chunk_end; |
26062b18 | 136 | |
ac31565c | 137 | chunk = xp_aligned_extract_addr(pool, desc->addr); |
f654fae4 MK |
138 | if (likely(desc->len)) { |
139 | chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1); | |
140 | if (chunk != chunk_end) | |
141 | return false; | |
142 | } | |
143 | ||
26062b18 BT |
144 | if (chunk >= pool->addrs_cnt) |
145 | return false; | |
146 | ||
147 | if (desc->options) | |
148 | return false; | |
149 | return true; | |
150 | } | |
151 | ||
152 | static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, | |
153 | struct xdp_desc *desc) | |
154 | { | |
155 | u64 addr, base_addr; | |
156 | ||
157 | base_addr = xp_unaligned_extract_addr(desc->addr); | |
158 | addr = xp_unaligned_add_offset_to_addr(desc->addr); | |
159 | ||
160 | if (desc->len > pool->chunk_size) | |
161 | return false; | |
162 | ||
163 | if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt || | |
164 | xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) | |
165 | return false; | |
166 | ||
167 | if (desc->options) | |
168 | return false; | |
169 | return true; | |
170 | } | |
171 | ||
172 | static inline bool xp_validate_desc(struct xsk_buff_pool *pool, | |
173 | struct xdp_desc *desc) | |
174 | { | |
175 | return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) : | |
176 | xp_aligned_validate_desc(pool, desc); | |
177 | } | |
178 | ||
2b43470a BT |
179 | static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, |
180 | struct xdp_desc *d, | |
1c1efc2a | 181 | struct xsk_buff_pool *pool) |
2b43470a | 182 | { |
1c1efc2a | 183 | if (!xp_validate_desc(pool, d)) { |
35fcde7f MK |
184 | q->invalid_descs++; |
185 | return false; | |
186 | } | |
35fcde7f MK |
187 | return true; |
188 | } | |
189 | ||
03896ef1 MK |
190 | static inline bool xskq_cons_read_desc(struct xsk_queue *q, |
191 | struct xdp_desc *desc, | |
1c1efc2a | 192 | struct xsk_buff_pool *pool) |
35fcde7f | 193 | { |
c5ed924b | 194 | while (q->cached_cons != q->cached_prod) { |
35fcde7f | 195 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; |
c5ed924b | 196 | u32 idx = q->cached_cons & q->ring_mask; |
35fcde7f | 197 | |
c34787fc | 198 | *desc = ring->desc[idx]; |
1c1efc2a | 199 | if (xskq_cons_is_valid_desc(q, desc, pool)) |
03896ef1 | 200 | return true; |
35fcde7f | 201 | |
c5ed924b | 202 | q->cached_cons++; |
35fcde7f MK |
203 | } |
204 | ||
03896ef1 | 205 | return false; |
35fcde7f MK |
206 | } |
207 | ||
c00c4461 MF |
208 | static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) |
209 | { | |
210 | q->cached_cons += cnt; | |
211 | } | |
212 | ||
d1bc532e MK |
213 | static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, |
214 | u32 max) | |
9349eb3a MK |
215 | { |
216 | u32 cached_cons = q->cached_cons, nb_entries = 0; | |
d1bc532e | 217 | struct xdp_desc *descs = pool->tx_descs; |
9349eb3a MK |
218 | |
219 | while (cached_cons != q->cached_prod && nb_entries < max) { | |
220 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | |
221 | u32 idx = cached_cons & q->ring_mask; | |
222 | ||
223 | descs[nb_entries] = ring->desc[idx]; | |
224 | if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) { | |
225 | /* Skip the entry */ | |
226 | cached_cons++; | |
227 | continue; | |
228 | } | |
229 | ||
230 | nb_entries++; | |
231 | cached_cons++; | |
232 | } | |
233 | ||
c00c4461 MF |
234 | /* Release valid plus any invalid entries */ |
235 | xskq_cons_release_n(q, cached_cons - q->cached_cons); | |
9349eb3a MK |
236 | return nb_entries; |
237 | } | |
238 | ||
15d8c916 MK |
239 | /* Functions for consumers */ |
240 | ||
241 | static inline void __xskq_cons_release(struct xsk_queue *q) | |
242 | { | |
a23b3f56 | 243 | smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */ |
15d8c916 MK |
244 | } |
245 | ||
246 | static inline void __xskq_cons_peek(struct xsk_queue *q) | |
247 | { | |
248 | /* Refresh the local pointer */ | |
a23b3f56 | 249 | q->cached_prod = smp_load_acquire(&q->ring->producer); /* C, matches B */ |
15d8c916 MK |
250 | } |
251 | ||
252 | static inline void xskq_cons_get_entries(struct xsk_queue *q) | |
253 | { | |
254 | __xskq_cons_release(q); | |
255 | __xskq_cons_peek(q); | |
256 | } | |
257 | ||
9349eb3a | 258 | static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) |
15d8c916 MK |
259 | { |
260 | u32 entries = q->cached_prod - q->cached_cons; | |
261 | ||
9349eb3a MK |
262 | if (entries >= max) |
263 | return max; | |
15d8c916 MK |
264 | |
265 | __xskq_cons_peek(q); | |
266 | entries = q->cached_prod - q->cached_cons; | |
267 | ||
9349eb3a MK |
268 | return entries >= max ? max : entries; |
269 | } | |
270 | ||
271 | static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) | |
272 | { | |
0fb53aab | 273 | return xskq_cons_nb_entries(q, cnt) >= cnt; |
15d8c916 MK |
274 | } |
275 | ||
2b43470a BT |
276 | static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) |
277 | { | |
278 | if (q->cached_prod == q->cached_cons) | |
279 | xskq_cons_get_entries(q); | |
280 | return xskq_cons_read_addr_unchecked(q, addr); | |
281 | } | |
282 | ||
03896ef1 MK |
283 | static inline bool xskq_cons_peek_desc(struct xsk_queue *q, |
284 | struct xdp_desc *desc, | |
1c1efc2a | 285 | struct xsk_buff_pool *pool) |
35fcde7f | 286 | { |
c5ed924b MK |
287 | if (q->cached_prod == q->cached_cons) |
288 | xskq_cons_get_entries(q); | |
1c1efc2a | 289 | return xskq_cons_read_desc(q, desc, pool); |
35fcde7f MK |
290 | } |
291 | ||
9349eb3a MK |
292 | /* To improve performance in the xskq_cons_release functions, only update local state here. |
293 | * Reflect this to global state when we get new entries from the ring in | |
294 | * xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop. | |
295 | */ | |
15d8c916 MK |
296 | static inline void xskq_cons_release(struct xsk_queue *q) |
297 | { | |
15d8c916 MK |
298 | q->cached_cons++; |
299 | } | |
300 | ||
3413f041 XZ |
301 | static inline u32 xskq_cons_present_entries(struct xsk_queue *q) |
302 | { | |
303 | /* No barriers needed since data is not accessed */ | |
304 | return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); | |
305 | } | |
306 | ||
15d8c916 MK |
307 | /* Functions for producers */ |
308 | ||
9349eb3a | 309 | static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) |
15d8c916 MK |
310 | { |
311 | u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
312 | ||
9349eb3a MK |
313 | if (free_entries >= max) |
314 | return max; | |
15d8c916 MK |
315 | |
316 | /* Refresh the local tail pointer */ | |
317 | q->cached_cons = READ_ONCE(q->ring->consumer); | |
318 | free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
319 | ||
9349eb3a MK |
320 | return free_entries >= max ? max : free_entries; |
321 | } | |
322 | ||
323 | static inline bool xskq_prod_is_full(struct xsk_queue *q) | |
324 | { | |
325 | return xskq_prod_nb_free(q, 1) ? false : true; | |
15d8c916 MK |
326 | } |
327 | ||
b1b95cb5 MK |
328 | static inline void xskq_prod_cancel(struct xsk_queue *q) |
329 | { | |
330 | q->cached_prod--; | |
331 | } | |
332 | ||
15d8c916 MK |
333 | static inline int xskq_prod_reserve(struct xsk_queue *q) |
334 | { | |
335 | if (xskq_prod_is_full(q)) | |
336 | return -ENOSPC; | |
337 | ||
338 | /* A, matches D */ | |
339 | q->cached_prod++; | |
340 | return 0; | |
341 | } | |
342 | ||
343 | static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) | |
344 | { | |
345 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
346 | ||
347 | if (xskq_prod_is_full(q)) | |
348 | return -ENOSPC; | |
349 | ||
350 | /* A, matches D */ | |
351 | ring->desc[q->cached_prod++ & q->ring_mask] = addr; | |
352 | return 0; | |
353 | } | |
354 | ||
c00c4461 MF |
355 | static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, |
356 | u32 nb_entries) | |
9349eb3a MK |
357 | { |
358 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
c00c4461 | 359 | u32 i, cached_prod; |
9349eb3a MK |
360 | |
361 | /* A, matches D */ | |
362 | cached_prod = q->cached_prod; | |
363 | for (i = 0; i < nb_entries; i++) | |
364 | ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr; | |
365 | q->cached_prod = cached_prod; | |
9349eb3a MK |
366 | } |
367 | ||
59e35e55 MK |
368 | static inline int xskq_prod_reserve_desc(struct xsk_queue *q, |
369 | u64 addr, u32 len) | |
c497176c BT |
370 | { |
371 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | |
59e35e55 | 372 | u32 idx; |
c497176c | 373 | |
df0ae6f7 | 374 | if (xskq_prod_is_full(q)) |
c6c1f11b | 375 | return -ENOBUFS; |
c497176c | 376 | |
f63666de | 377 | /* A, matches D */ |
d7012f05 | 378 | idx = q->cached_prod++ & q->ring_mask; |
bbff2f32 | 379 | ring->desc[idx].addr = addr; |
c497176c | 380 | ring->desc[idx].len = len; |
c497176c BT |
381 | |
382 | return 0; | |
383 | } | |
384 | ||
15d8c916 | 385 | static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) |
35fcde7f | 386 | { |
a23b3f56 | 387 | smp_store_release(&q->ring->producer, idx); /* B, matches C */ |
15d8c916 MK |
388 | } |
389 | ||
390 | static inline void xskq_prod_submit(struct xsk_queue *q) | |
391 | { | |
392 | __xskq_prod_submit(q, q->cached_prod); | |
393 | } | |
394 | ||
395 | static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) | |
396 | { | |
397 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
398 | u32 idx = q->ring->producer; | |
399 | ||
400 | ring->desc[idx++ & q->ring_mask] = addr; | |
401 | ||
402 | __xskq_prod_submit(q, idx); | |
403 | } | |
404 | ||
405 | static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) | |
406 | { | |
407 | __xskq_prod_submit(q, q->ring->producer + nb_entries); | |
35fcde7f MK |
408 | } |
409 | ||
59e35e55 | 410 | static inline bool xskq_prod_is_empty(struct xsk_queue *q) |
c497176c | 411 | { |
11cc2d21 MK |
412 | /* No barriers needed since data is not accessed */ |
413 | return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); | |
c497176c BT |
414 | } |
415 | ||
15d8c916 MK |
416 | /* For both producers and consumers */ |
417 | ||
418 | static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) | |
419 | { | |
420 | return q ? q->invalid_descs : 0; | |
421 | } | |
422 | ||
8aa5a335 CL |
423 | static inline u64 xskq_nb_queue_empty_descs(struct xsk_queue *q) |
424 | { | |
425 | return q ? q->queue_empty_descs : 0; | |
426 | } | |
427 | ||
b9b6b68e | 428 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); |
c497176c | 429 | void xskq_destroy(struct xsk_queue *q_ops); |
423f3832 MK |
430 | |
431 | #endif /* _LINUX_XSK_QUEUE_H */ |