Commit | Line | Data |
---|---|---|
dac09149 BT |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* XDP user-space ring structure | |
423f3832 | 3 | * Copyright(c) 2018 Intel Corporation. |
423f3832 MK |
4 | */ |
5 | ||
6 | #ifndef _LINUX_XSK_QUEUE_H | |
7 | #define _LINUX_XSK_QUEUE_H | |
8 | ||
9 | #include <linux/types.h> | |
10 | #include <linux/if_xdp.h> | |
e61e62b9 | 11 | #include <net/xdp_sock.h> |
2b43470a | 12 | #include <net/xsk_buff_pool.h> |
423f3832 | 13 | |
89e4a376 BT |
14 | #include "xsk.h" |
15 | ||
b3a9e0be BT |
16 | struct xdp_ring { |
17 | u32 producer ____cacheline_aligned_in_smp; | |
c3f01fdc MK |
18 | /* Hinder the adjacent cache prefetcher to prefetch the consumer |
19 | * pointer if the producer pointer is touched and vice versa. | |
20 | */ | |
b8c7aece | 21 | u32 pad1 ____cacheline_aligned_in_smp; |
b3a9e0be | 22 | u32 consumer ____cacheline_aligned_in_smp; |
b8c7aece | 23 | u32 pad2 ____cacheline_aligned_in_smp; |
77cd0d7b | 24 | u32 flags; |
b8c7aece | 25 | u32 pad3 ____cacheline_aligned_in_smp; |
b3a9e0be BT |
26 | }; |
27 | ||
28 | /* Used for the RX and TX queues for packets */ | |
29 | struct xdp_rxtx_ring { | |
30 | struct xdp_ring ptrs; | |
95e486f5 | 31 | struct xdp_desc desc[] ____cacheline_aligned_in_smp; |
b3a9e0be BT |
32 | }; |
33 | ||
34 | /* Used for the fill and completion queues for buffers */ | |
35 | struct xdp_umem_ring { | |
36 | struct xdp_ring ptrs; | |
95e486f5 | 37 | u64 desc[] ____cacheline_aligned_in_smp; |
b3a9e0be BT |
38 | }; |
39 | ||
423f3832 | 40 | struct xsk_queue { |
423f3832 MK |
41 | u32 ring_mask; |
42 | u32 nentries; | |
d7012f05 | 43 | u32 cached_prod; |
c5ed924b | 44 | u32 cached_cons; |
423f3832 MK |
45 | struct xdp_ring *ring; |
46 | u64 invalid_descs; | |
8aa5a335 | 47 | u64 queue_empty_descs; |
9f78bf33 | 48 | size_t ring_vmalloc_size; |
423f3832 MK |
49 | }; |
50 | ||
a23b3f56 BT |
51 | /* The structure of the shared state of the rings are a simple |
52 | * circular buffer, as outlined in | |
53 | * Documentation/core-api/circular-buffers.rst. For the Rx and | |
54 | * completion ring, the kernel is the producer and user space is the | |
55 | * consumer. For the Tx and fill rings, the kernel is the consumer and | |
56 | * user space is the producer. | |
f63666de MK |
57 | * |
58 | * producer consumer | |
59 | * | |
a23b3f56 | 60 | * if (LOAD ->consumer) { (A) LOAD.acq ->producer (C) |
f63666de | 61 | * STORE $data LOAD $data |
a23b3f56 | 62 | * STORE.rel ->producer (B) STORE.rel ->consumer (D) |
f63666de MK |
63 | * } |
64 | * | |
65 | * (A) pairs with (D), and (B) pairs with (C). | |
66 | * | |
67 | * Starting with (B), it protects the data from being written after | |
68 | * the producer pointer. If this barrier was missing, the consumer | |
69 | * could observe the producer pointer being set and thus load the data | |
70 | * before the producer has written the new data. The consumer would in | |
71 | * this case load the old data. | |
72 | * | |
73 | * (C) protects the consumer from speculatively loading the data before | |
74 | * the producer pointer actually has been read. If we do not have this | |
75 | * barrier, some architectures could load old data as speculative loads | |
76 | * are not discarded as the CPU does not know there is a dependency | |
77 | * between ->producer and data. | |
78 | * | |
79 | * (A) is a control dependency that separates the load of ->consumer | |
80 | * from the stores of $data. In case ->consumer indicates there is no | |
a23b3f56 BT |
81 | * room in the buffer to store $data we do not. The dependency will |
82 | * order both of the stores after the loads. So no barrier is needed. | |
f63666de MK |
83 | * |
84 | * (D) protects the load of the data to be observed to happen after the | |
85 | * store of the consumer pointer. If we did not have this memory | |
86 | * barrier, the producer could observe the consumer pointer being set | |
87 | * and overwrite the data with a new value before the consumer got the | |
88 | * chance to read the old value. The consumer would thus miss reading | |
89 | * the old entry and very likely read the new entry twice, once right | |
90 | * now and again after circling through the ring. | |
91 | */ | |
92 | ||
15d8c916 MK |
93 | /* The operations on the rings are the following: |
94 | * | |
95 | * producer consumer | |
96 | * | |
97 | * RESERVE entries PEEK in the ring for entries | |
98 | * WRITE data into the ring READ data from the ring | |
99 | * SUBMIT entries RELEASE entries | |
100 | * | |
101 | * The producer reserves one or more entries in the ring. It can then | |
102 | * fill in these entries and finally submit them so that they can be | |
103 | * seen and read by the consumer. | |
104 | * | |
105 | * The consumer peeks into the ring to see if the producer has written | |
f1fc8ece | 106 | * any new entries. If so, the consumer can then read these entries |
15d8c916 MK |
107 | * and when it is done reading them release them back to the producer |
108 | * so that the producer can use these slots to fill in new entries. | |
109 | * | |
110 | * The function names below reflect these operations. | |
111 | */ | |
d57d7642 | 112 | |
15d8c916 | 113 | /* Functions that read and validate content from consumer rings. */ |
c497176c | 114 | |
47e4075d | 115 | static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) |
2b43470a BT |
116 | { |
117 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
47e4075d | 118 | u32 idx = cached_cons & q->ring_mask; |
2b43470a | 119 | |
47e4075d MK |
120 | *addr = ring->desc[idx]; |
121 | } | |
2b43470a | 122 | |
47e4075d MK |
123 | static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) |
124 | { | |
125 | if (q->cached_cons != q->cached_prod) { | |
126 | __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr); | |
c05cd364 KL |
127 | return true; |
128 | } | |
129 | ||
2b43470a BT |
130 | return false; |
131 | } | |
35fcde7f | 132 | |
26062b18 BT |
133 | static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, |
134 | struct xdp_desc *desc) | |
135 | { | |
f654fae4 | 136 | u64 chunk, chunk_end; |
26062b18 | 137 | |
ac31565c | 138 | chunk = xp_aligned_extract_addr(pool, desc->addr); |
f654fae4 MK |
139 | if (likely(desc->len)) { |
140 | chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1); | |
141 | if (chunk != chunk_end) | |
142 | return false; | |
143 | } | |
144 | ||
26062b18 BT |
145 | if (chunk >= pool->addrs_cnt) |
146 | return false; | |
147 | ||
148 | if (desc->options) | |
149 | return false; | |
150 | return true; | |
151 | } | |
152 | ||
153 | static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, | |
154 | struct xdp_desc *desc) | |
155 | { | |
156 | u64 addr, base_addr; | |
157 | ||
158 | base_addr = xp_unaligned_extract_addr(desc->addr); | |
159 | addr = xp_unaligned_add_offset_to_addr(desc->addr); | |
160 | ||
161 | if (desc->len > pool->chunk_size) | |
162 | return false; | |
163 | ||
164 | if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt || | |
165 | xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) | |
166 | return false; | |
167 | ||
168 | if (desc->options) | |
169 | return false; | |
170 | return true; | |
171 | } | |
172 | ||
173 | static inline bool xp_validate_desc(struct xsk_buff_pool *pool, | |
174 | struct xdp_desc *desc) | |
175 | { | |
176 | return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) : | |
177 | xp_aligned_validate_desc(pool, desc); | |
178 | } | |
179 | ||
2b43470a BT |
180 | static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, |
181 | struct xdp_desc *d, | |
1c1efc2a | 182 | struct xsk_buff_pool *pool) |
2b43470a | 183 | { |
1c1efc2a | 184 | if (!xp_validate_desc(pool, d)) { |
35fcde7f MK |
185 | q->invalid_descs++; |
186 | return false; | |
187 | } | |
35fcde7f MK |
188 | return true; |
189 | } | |
190 | ||
03896ef1 MK |
191 | static inline bool xskq_cons_read_desc(struct xsk_queue *q, |
192 | struct xdp_desc *desc, | |
1c1efc2a | 193 | struct xsk_buff_pool *pool) |
35fcde7f | 194 | { |
c5ed924b | 195 | while (q->cached_cons != q->cached_prod) { |
35fcde7f | 196 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; |
c5ed924b | 197 | u32 idx = q->cached_cons & q->ring_mask; |
35fcde7f | 198 | |
c34787fc | 199 | *desc = ring->desc[idx]; |
1c1efc2a | 200 | if (xskq_cons_is_valid_desc(q, desc, pool)) |
03896ef1 | 201 | return true; |
35fcde7f | 202 | |
c5ed924b | 203 | q->cached_cons++; |
35fcde7f MK |
204 | } |
205 | ||
03896ef1 | 206 | return false; |
35fcde7f MK |
207 | } |
208 | ||
c00c4461 MF |
209 | static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) |
210 | { | |
211 | q->cached_cons += cnt; | |
212 | } | |
213 | ||
d1bc532e MK |
214 | static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, |
215 | u32 max) | |
9349eb3a MK |
216 | { |
217 | u32 cached_cons = q->cached_cons, nb_entries = 0; | |
d1bc532e | 218 | struct xdp_desc *descs = pool->tx_descs; |
9349eb3a MK |
219 | |
220 | while (cached_cons != q->cached_prod && nb_entries < max) { | |
221 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | |
222 | u32 idx = cached_cons & q->ring_mask; | |
223 | ||
224 | descs[nb_entries] = ring->desc[idx]; | |
225 | if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) { | |
226 | /* Skip the entry */ | |
227 | cached_cons++; | |
228 | continue; | |
229 | } | |
230 | ||
231 | nb_entries++; | |
232 | cached_cons++; | |
233 | } | |
234 | ||
c00c4461 MF |
235 | /* Release valid plus any invalid entries */ |
236 | xskq_cons_release_n(q, cached_cons - q->cached_cons); | |
9349eb3a MK |
237 | return nb_entries; |
238 | } | |
239 | ||
15d8c916 MK |
240 | /* Functions for consumers */ |
241 | ||
242 | static inline void __xskq_cons_release(struct xsk_queue *q) | |
243 | { | |
a23b3f56 | 244 | smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */ |
15d8c916 MK |
245 | } |
246 | ||
247 | static inline void __xskq_cons_peek(struct xsk_queue *q) | |
248 | { | |
249 | /* Refresh the local pointer */ | |
a23b3f56 | 250 | q->cached_prod = smp_load_acquire(&q->ring->producer); /* C, matches B */ |
15d8c916 MK |
251 | } |
252 | ||
253 | static inline void xskq_cons_get_entries(struct xsk_queue *q) | |
254 | { | |
255 | __xskq_cons_release(q); | |
256 | __xskq_cons_peek(q); | |
257 | } | |
258 | ||
9349eb3a | 259 | static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) |
15d8c916 MK |
260 | { |
261 | u32 entries = q->cached_prod - q->cached_cons; | |
262 | ||
9349eb3a MK |
263 | if (entries >= max) |
264 | return max; | |
15d8c916 MK |
265 | |
266 | __xskq_cons_peek(q); | |
267 | entries = q->cached_prod - q->cached_cons; | |
268 | ||
9349eb3a MK |
269 | return entries >= max ? max : entries; |
270 | } | |
271 | ||
272 | static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) | |
273 | { | |
0fb53aab | 274 | return xskq_cons_nb_entries(q, cnt) >= cnt; |
15d8c916 MK |
275 | } |
276 | ||
2b43470a BT |
277 | static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) |
278 | { | |
279 | if (q->cached_prod == q->cached_cons) | |
280 | xskq_cons_get_entries(q); | |
281 | return xskq_cons_read_addr_unchecked(q, addr); | |
282 | } | |
283 | ||
03896ef1 MK |
284 | static inline bool xskq_cons_peek_desc(struct xsk_queue *q, |
285 | struct xdp_desc *desc, | |
1c1efc2a | 286 | struct xsk_buff_pool *pool) |
35fcde7f | 287 | { |
c5ed924b MK |
288 | if (q->cached_prod == q->cached_cons) |
289 | xskq_cons_get_entries(q); | |
1c1efc2a | 290 | return xskq_cons_read_desc(q, desc, pool); |
35fcde7f MK |
291 | } |
292 | ||
9349eb3a MK |
293 | /* To improve performance in the xskq_cons_release functions, only update local state here. |
294 | * Reflect this to global state when we get new entries from the ring in | |
295 | * xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop. | |
296 | */ | |
15d8c916 MK |
297 | static inline void xskq_cons_release(struct xsk_queue *q) |
298 | { | |
15d8c916 MK |
299 | q->cached_cons++; |
300 | } | |
301 | ||
3413f041 XZ |
302 | static inline u32 xskq_cons_present_entries(struct xsk_queue *q) |
303 | { | |
304 | /* No barriers needed since data is not accessed */ | |
305 | return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); | |
306 | } | |
307 | ||
15d8c916 MK |
308 | /* Functions for producers */ |
309 | ||
9349eb3a | 310 | static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) |
15d8c916 MK |
311 | { |
312 | u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
313 | ||
9349eb3a MK |
314 | if (free_entries >= max) |
315 | return max; | |
15d8c916 MK |
316 | |
317 | /* Refresh the local tail pointer */ | |
318 | q->cached_cons = READ_ONCE(q->ring->consumer); | |
319 | free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
320 | ||
9349eb3a MK |
321 | return free_entries >= max ? max : free_entries; |
322 | } | |
323 | ||
324 | static inline bool xskq_prod_is_full(struct xsk_queue *q) | |
325 | { | |
326 | return xskq_prod_nb_free(q, 1) ? false : true; | |
15d8c916 MK |
327 | } |
328 | ||
b1b95cb5 MK |
329 | static inline void xskq_prod_cancel(struct xsk_queue *q) |
330 | { | |
331 | q->cached_prod--; | |
332 | } | |
333 | ||
15d8c916 MK |
334 | static inline int xskq_prod_reserve(struct xsk_queue *q) |
335 | { | |
336 | if (xskq_prod_is_full(q)) | |
337 | return -ENOSPC; | |
338 | ||
339 | /* A, matches D */ | |
340 | q->cached_prod++; | |
341 | return 0; | |
342 | } | |
343 | ||
344 | static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) | |
345 | { | |
346 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
347 | ||
348 | if (xskq_prod_is_full(q)) | |
349 | return -ENOSPC; | |
350 | ||
351 | /* A, matches D */ | |
352 | ring->desc[q->cached_prod++ & q->ring_mask] = addr; | |
353 | return 0; | |
354 | } | |
355 | ||
c00c4461 MF |
356 | static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, |
357 | u32 nb_entries) | |
9349eb3a MK |
358 | { |
359 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
c00c4461 | 360 | u32 i, cached_prod; |
9349eb3a MK |
361 | |
362 | /* A, matches D */ | |
363 | cached_prod = q->cached_prod; | |
364 | for (i = 0; i < nb_entries; i++) | |
365 | ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr; | |
366 | q->cached_prod = cached_prod; | |
9349eb3a MK |
367 | } |
368 | ||
59e35e55 MK |
369 | static inline int xskq_prod_reserve_desc(struct xsk_queue *q, |
370 | u64 addr, u32 len) | |
c497176c BT |
371 | { |
372 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | |
59e35e55 | 373 | u32 idx; |
c497176c | 374 | |
df0ae6f7 | 375 | if (xskq_prod_is_full(q)) |
c6c1f11b | 376 | return -ENOBUFS; |
c497176c | 377 | |
f63666de | 378 | /* A, matches D */ |
d7012f05 | 379 | idx = q->cached_prod++ & q->ring_mask; |
bbff2f32 | 380 | ring->desc[idx].addr = addr; |
c497176c | 381 | ring->desc[idx].len = len; |
c497176c BT |
382 | |
383 | return 0; | |
384 | } | |
385 | ||
15d8c916 | 386 | static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) |
35fcde7f | 387 | { |
a23b3f56 | 388 | smp_store_release(&q->ring->producer, idx); /* B, matches C */ |
15d8c916 MK |
389 | } |
390 | ||
391 | static inline void xskq_prod_submit(struct xsk_queue *q) | |
392 | { | |
393 | __xskq_prod_submit(q, q->cached_prod); | |
394 | } | |
395 | ||
396 | static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) | |
397 | { | |
398 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
399 | u32 idx = q->ring->producer; | |
400 | ||
401 | ring->desc[idx++ & q->ring_mask] = addr; | |
402 | ||
403 | __xskq_prod_submit(q, idx); | |
404 | } | |
405 | ||
406 | static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) | |
407 | { | |
408 | __xskq_prod_submit(q, q->ring->producer + nb_entries); | |
35fcde7f MK |
409 | } |
410 | ||
59e35e55 | 411 | static inline bool xskq_prod_is_empty(struct xsk_queue *q) |
c497176c | 412 | { |
11cc2d21 MK |
413 | /* No barriers needed since data is not accessed */ |
414 | return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); | |
c497176c BT |
415 | } |
416 | ||
15d8c916 MK |
417 | /* For both producers and consumers */ |
418 | ||
419 | static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) | |
420 | { | |
421 | return q ? q->invalid_descs : 0; | |
422 | } | |
423 | ||
8aa5a335 CL |
424 | static inline u64 xskq_nb_queue_empty_descs(struct xsk_queue *q) |
425 | { | |
426 | return q ? q->queue_empty_descs : 0; | |
427 | } | |
428 | ||
b9b6b68e | 429 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); |
c497176c | 430 | void xskq_destroy(struct xsk_queue *q_ops); |
423f3832 MK |
431 | |
432 | #endif /* _LINUX_XSK_QUEUE_H */ |