Merge branch 'stable/for-jens-4.15' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / include / linux / ptr_ring.h
CommitLineData
2e0ab8ca
MT
1/*
2 * Definitions for the 'struct ptr_ring' datastructure.
3 *
4 * Author:
5 * Michael S. Tsirkin <mst@redhat.com>
6 *
7 * Copyright (C) 2016 Red Hat, Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2 of the License, or (at your
12 * option) any later version.
13 *
14 * This is a limited-size FIFO maintaining pointers in FIFO order, with
15 * one CPU producing entries and another consuming entries from a FIFO.
16 *
17 * This implementation tries to minimize cache-contention when there is a
18 * single producer and a single consumer CPU.
19 */
20
21#ifndef _LINUX_PTR_RING_H
22#define _LINUX_PTR_RING_H 1
23
24#ifdef __KERNEL__
25#include <linux/spinlock.h>
26#include <linux/cache.h>
27#include <linux/types.h>
28#include <linux/compiler.h>
29#include <linux/cache.h>
30#include <linux/slab.h>
31#include <asm/errno.h>
32#endif
33
34struct ptr_ring {
35 int producer ____cacheline_aligned_in_smp;
36 spinlock_t producer_lock;
fb9de970
MT
37 int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
38 int consumer_tail; /* next entry to invalidate */
2e0ab8ca
MT
39 spinlock_t consumer_lock;
40 /* Shared consumer/producer data */
41 /* Read-only by both the producer and the consumer */
42 int size ____cacheline_aligned_in_smp; /* max entries in queue */
fb9de970 43 int batch; /* number of entries to consume in a batch */
2e0ab8ca
MT
44 void **queue;
45};
46
47/* Note: callers invoking this in a loop must use a compiler barrier,
84328342
MT
48 * for example cpu_relax().
49 *
50 * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
51 * see e.g. ptr_ring_full.
2e0ab8ca
MT
52 */
53static inline bool __ptr_ring_full(struct ptr_ring *r)
54{
55 return r->queue[r->producer];
56}
57
58static inline bool ptr_ring_full(struct ptr_ring *r)
59{
5d49de53
MT
60 bool ret;
61
62 spin_lock(&r->producer_lock);
63 ret = __ptr_ring_full(r);
64 spin_unlock(&r->producer_lock);
65
66 return ret;
67}
68
69static inline bool ptr_ring_full_irq(struct ptr_ring *r)
70{
71 bool ret;
72
73 spin_lock_irq(&r->producer_lock);
74 ret = __ptr_ring_full(r);
75 spin_unlock_irq(&r->producer_lock);
76
77 return ret;
78}
79
80static inline bool ptr_ring_full_any(struct ptr_ring *r)
81{
82 unsigned long flags;
83 bool ret;
84
85 spin_lock_irqsave(&r->producer_lock, flags);
86 ret = __ptr_ring_full(r);
87 spin_unlock_irqrestore(&r->producer_lock, flags);
88
89 return ret;
90}
91
92static inline bool ptr_ring_full_bh(struct ptr_ring *r)
93{
94 bool ret;
95
96 spin_lock_bh(&r->producer_lock);
97 ret = __ptr_ring_full(r);
98 spin_unlock_bh(&r->producer_lock);
99
100 return ret;
2e0ab8ca
MT
101}
102
103/* Note: callers invoking this in a loop must use a compiler barrier,
5d49de53 104 * for example cpu_relax(). Callers must hold producer_lock.
a8ceb5db
MT
105 * Callers are responsible for making sure pointer that is being queued
106 * points to a valid data.
2e0ab8ca
MT
107 */
108static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
109{
982fb490 110 if (unlikely(!r->size) || r->queue[r->producer])
2e0ab8ca
MT
111 return -ENOSPC;
112
a8ceb5db
MT
113 /* Make sure the pointer we are storing points to a valid data. */
114 /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
115 smp_wmb();
116
a07d29c6 117 WRITE_ONCE(r->queue[r->producer++], ptr);
2e0ab8ca
MT
118 if (unlikely(r->producer >= r->size))
119 r->producer = 0;
120 return 0;
121}
122
e7169530
MT
123/*
124 * Note: resize (below) nests producer lock within consumer lock, so if you
125 * consume in interrupt or BH context, you must disable interrupts/BH when
126 * calling this.
127 */
2e0ab8ca
MT
128static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
129{
130 int ret;
131
132 spin_lock(&r->producer_lock);
133 ret = __ptr_ring_produce(r, ptr);
134 spin_unlock(&r->producer_lock);
135
136 return ret;
137}
138
139static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
140{
141 int ret;
142
143 spin_lock_irq(&r->producer_lock);
144 ret = __ptr_ring_produce(r, ptr);
145 spin_unlock_irq(&r->producer_lock);
146
147 return ret;
148}
149
150static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
151{
152 unsigned long flags;
153 int ret;
154
155 spin_lock_irqsave(&r->producer_lock, flags);
156 ret = __ptr_ring_produce(r, ptr);
157 spin_unlock_irqrestore(&r->producer_lock, flags);
158
159 return ret;
160}
161
162static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
163{
164 int ret;
165
166 spin_lock_bh(&r->producer_lock);
167 ret = __ptr_ring_produce(r, ptr);
168 spin_unlock_bh(&r->producer_lock);
169
170 return ret;
171}
172
2e0ab8ca
MT
173static inline void *__ptr_ring_peek(struct ptr_ring *r)
174{
982fb490 175 if (likely(r->size))
a07d29c6 176 return READ_ONCE(r->queue[r->consumer_head]);
982fb490 177 return NULL;
2e0ab8ca
MT
178}
179
8619d384
MT
180/*
181 * Test ring empty status without taking any locks.
182 *
183 * NB: This is only safe to call if ring is never resized.
184 *
185 * However, if some other CPU consumes ring entries at the same time, the value
186 * returned is not guaranteed to be correct.
187 *
188 * In this case - to avoid incorrectly detecting the ring
189 * as empty - the CPU consuming the ring entries is responsible
190 * for either consuming all ring entries until the ring is empty,
191 * or synchronizing with some other CPU and causing it to
192 * re-test __ptr_ring_empty and/or consume the ring enteries
193 * after the synchronization point.
194 *
195 * Note: callers invoking this in a loop must use a compiler barrier,
196 * for example cpu_relax().
197 */
5d49de53 198static inline bool __ptr_ring_empty(struct ptr_ring *r)
2e0ab8ca 199{
a259df36
MT
200 if (likely(r->size))
201 return !r->queue[READ_ONCE(r->consumer_head)];
202 return true;
2e0ab8ca
MT
203}
204
5d49de53
MT
205static inline bool ptr_ring_empty(struct ptr_ring *r)
206{
207 bool ret;
208
209 spin_lock(&r->consumer_lock);
210 ret = __ptr_ring_empty(r);
211 spin_unlock(&r->consumer_lock);
212
213 return ret;
214}
215
216static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
217{
218 bool ret;
219
220 spin_lock_irq(&r->consumer_lock);
221 ret = __ptr_ring_empty(r);
222 spin_unlock_irq(&r->consumer_lock);
223
224 return ret;
225}
226
227static inline bool ptr_ring_empty_any(struct ptr_ring *r)
228{
229 unsigned long flags;
230 bool ret;
231
232 spin_lock_irqsave(&r->consumer_lock, flags);
233 ret = __ptr_ring_empty(r);
234 spin_unlock_irqrestore(&r->consumer_lock, flags);
235
236 return ret;
237}
238
239static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
240{
241 bool ret;
242
243 spin_lock_bh(&r->consumer_lock);
244 ret = __ptr_ring_empty(r);
245 spin_unlock_bh(&r->consumer_lock);
246
247 return ret;
248}
249
2e0ab8ca
MT
250/* Must only be called after __ptr_ring_peek returned !NULL */
251static inline void __ptr_ring_discard_one(struct ptr_ring *r)
252{
fb9de970
MT
253 /* Fundamentally, what we want to do is update consumer
254 * index and zero out the entry so producer can reuse it.
255 * Doing it naively at each consume would be as simple as:
406de755
MT
256 * consumer = r->consumer;
257 * r->queue[consumer++] = NULL;
258 * if (unlikely(consumer >= r->size))
259 * consumer = 0;
260 * r->consumer = consumer;
fb9de970
MT
261 * but that is suboptimal when the ring is full as producer is writing
262 * out new entries in the same cache line. Defer these updates until a
263 * batch of entries has been consumed.
264 */
406de755
MT
265 /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
266 * to work correctly.
267 */
268 int consumer_head = r->consumer_head;
269 int head = consumer_head++;
fb9de970
MT
270
271 /* Once we have processed enough entries invalidate them in
272 * the ring all at once so producer can reuse their space in the ring.
273 * We also do this when we reach end of the ring - not mandatory
274 * but helps keep the implementation simple.
275 */
406de755
MT
276 if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
277 consumer_head >= r->size)) {
fb9de970
MT
278 /* Zero out entries in the reverse order: this way we touch the
279 * cache line that producer might currently be reading the last;
280 * producer won't make progress and touch other cache lines
281 * besides the first one until we write out all entries.
282 */
283 while (likely(head >= r->consumer_tail))
284 r->queue[head--] = NULL;
406de755 285 r->consumer_tail = consumer_head;
fb9de970 286 }
406de755
MT
287 if (unlikely(consumer_head >= r->size)) {
288 consumer_head = 0;
fb9de970
MT
289 r->consumer_tail = 0;
290 }
a259df36
MT
291 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
292 WRITE_ONCE(r->consumer_head, consumer_head);
2e0ab8ca
MT
293}
294
295static inline void *__ptr_ring_consume(struct ptr_ring *r)
296{
297 void *ptr;
298
299 ptr = __ptr_ring_peek(r);
300 if (ptr)
301 __ptr_ring_discard_one(r);
302
a8ceb5db
MT
303 /* Make sure anyone accessing data through the pointer is up to date. */
304 /* Pairs with smp_wmb in __ptr_ring_produce. */
305 smp_read_barrier_depends();
2e0ab8ca
MT
306 return ptr;
307}
308
728fc8d5
JW
309static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
310 void **array, int n)
311{
312 void *ptr;
313 int i;
314
315 for (i = 0; i < n; i++) {
316 ptr = __ptr_ring_consume(r);
317 if (!ptr)
318 break;
319 array[i] = ptr;
320 }
321
322 return i;
323}
324
e7169530
MT
325/*
326 * Note: resize (below) nests producer lock within consumer lock, so if you
327 * call this in interrupt or BH context, you must disable interrupts/BH when
328 * producing.
329 */
2e0ab8ca
MT
330static inline void *ptr_ring_consume(struct ptr_ring *r)
331{
332 void *ptr;
333
334 spin_lock(&r->consumer_lock);
335 ptr = __ptr_ring_consume(r);
336 spin_unlock(&r->consumer_lock);
337
338 return ptr;
339}
340
341static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
342{
343 void *ptr;
344
345 spin_lock_irq(&r->consumer_lock);
346 ptr = __ptr_ring_consume(r);
347 spin_unlock_irq(&r->consumer_lock);
348
349 return ptr;
350}
351
352static inline void *ptr_ring_consume_any(struct ptr_ring *r)
353{
354 unsigned long flags;
355 void *ptr;
356
357 spin_lock_irqsave(&r->consumer_lock, flags);
358 ptr = __ptr_ring_consume(r);
359 spin_unlock_irqrestore(&r->consumer_lock, flags);
360
361 return ptr;
362}
363
364static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
365{
366 void *ptr;
367
368 spin_lock_bh(&r->consumer_lock);
369 ptr = __ptr_ring_consume(r);
370 spin_unlock_bh(&r->consumer_lock);
371
372 return ptr;
373}
374
728fc8d5
JW
375static inline int ptr_ring_consume_batched(struct ptr_ring *r,
376 void **array, int n)
377{
378 int ret;
379
380 spin_lock(&r->consumer_lock);
381 ret = __ptr_ring_consume_batched(r, array, n);
382 spin_unlock(&r->consumer_lock);
383
384 return ret;
385}
386
387static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
388 void **array, int n)
389{
390 int ret;
391
392 spin_lock_irq(&r->consumer_lock);
393 ret = __ptr_ring_consume_batched(r, array, n);
394 spin_unlock_irq(&r->consumer_lock);
395
396 return ret;
397}
398
399static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
400 void **array, int n)
401{
402 unsigned long flags;
403 int ret;
404
405 spin_lock_irqsave(&r->consumer_lock, flags);
406 ret = __ptr_ring_consume_batched(r, array, n);
407 spin_unlock_irqrestore(&r->consumer_lock, flags);
408
409 return ret;
410}
411
412static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
413 void **array, int n)
414{
415 int ret;
416
417 spin_lock_bh(&r->consumer_lock);
418 ret = __ptr_ring_consume_batched(r, array, n);
419 spin_unlock_bh(&r->consumer_lock);
420
421 return ret;
422}
423
2e0ab8ca
MT
424/* Cast to structure type and call a function without discarding from FIFO.
425 * Function must return a value.
426 * Callers must take consumer_lock.
427 */
428#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
429
430#define PTR_RING_PEEK_CALL(r, f) ({ \
431 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
432 \
433 spin_lock(&(r)->consumer_lock); \
434 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
435 spin_unlock(&(r)->consumer_lock); \
436 __PTR_RING_PEEK_CALL_v; \
437})
438
439#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
440 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
441 \
442 spin_lock_irq(&(r)->consumer_lock); \
443 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
444 spin_unlock_irq(&(r)->consumer_lock); \
445 __PTR_RING_PEEK_CALL_v; \
446})
447
448#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
449 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
450 \
451 spin_lock_bh(&(r)->consumer_lock); \
452 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
453 spin_unlock_bh(&(r)->consumer_lock); \
454 __PTR_RING_PEEK_CALL_v; \
455})
456
457#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
458 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
459 unsigned long __PTR_RING_PEEK_CALL_f;\
460 \
461 spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
462 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
463 spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
464 __PTR_RING_PEEK_CALL_v; \
465})
466
0bf7800f
JW
467/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
468 * documentation for vmalloc for which of them are legal.
469 */
81fbfe8a 470static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
5d49de53 471{
54e02162 472 if (size > KMALLOC_MAX_SIZE / sizeof(void *))
6e6e41c3 473 return NULL;
0bf7800f 474 return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
5d49de53
MT
475}
476
fb9de970
MT
477static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
478{
479 r->size = size;
480 r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
481 /* We need to set batch at least to 1 to make logic
482 * in __ptr_ring_discard_one work correctly.
483 * Batching too much (because ring is small) would cause a lot of
484 * burstiness. Needs tuning, for now disable batching.
485 */
486 if (r->batch > r->size / 2 || !r->batch)
487 r->batch = 1;
488}
489
2e0ab8ca
MT
490static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
491{
5d49de53 492 r->queue = __ptr_ring_init_queue_alloc(size, gfp);
2e0ab8ca
MT
493 if (!r->queue)
494 return -ENOMEM;
495
fb9de970
MT
496 __ptr_ring_set_size(r, size);
497 r->producer = r->consumer_head = r->consumer_tail = 0;
2e0ab8ca
MT
498 spin_lock_init(&r->producer_lock);
499 spin_lock_init(&r->consumer_lock);
500
501 return 0;
502}
503
197a5212
MT
504/*
505 * Return entries into ring. Destroy entries that don't fit.
506 *
507 * Note: this is expected to be a rare slow path operation.
508 *
509 * Note: producer lock is nested within consumer lock, so if you
510 * resize you must make sure all uses nest correctly.
511 * In particular if you consume ring in interrupt or BH context, you must
512 * disable interrupts/BH when doing so.
513 */
514static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
515 void (*destroy)(void *))
516{
517 unsigned long flags;
518 int head;
519
520 spin_lock_irqsave(&r->consumer_lock, flags);
521 spin_lock(&r->producer_lock);
522
523 if (!r->size)
524 goto done;
525
526 /*
527 * Clean out buffered entries (for simplicity). This way following code
528 * can test entries for NULL and if not assume they are valid.
529 */
530 head = r->consumer_head - 1;
531 while (likely(head >= r->consumer_tail))
532 r->queue[head--] = NULL;
533 r->consumer_tail = r->consumer_head;
534
535 /*
536 * Go over entries in batch, start moving head back and copy entries.
537 * Stop when we run into previously unconsumed entries.
538 */
539 while (n) {
540 head = r->consumer_head - 1;
541 if (head < 0)
542 head = r->size - 1;
543 if (r->queue[head]) {
544 /* This batch entry will have to be destroyed. */
545 goto done;
546 }
547 r->queue[head] = batch[--n];
a259df36
MT
548 r->consumer_tail = head;
549 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
550 WRITE_ONCE(r->consumer_head, head);
197a5212
MT
551 }
552
553done:
554 /* Destroy all entries left in the batch. */
555 while (n)
556 destroy(batch[--n]);
557 spin_unlock(&r->producer_lock);
558 spin_unlock_irqrestore(&r->consumer_lock, flags);
559}
560
59e6ae53
MT
561static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
562 int size, gfp_t gfp,
563 void (*destroy)(void *))
5d49de53 564{
5d49de53 565 int producer = 0;
5d49de53
MT
566 void **old;
567 void *ptr;
568
e7169530 569 while ((ptr = __ptr_ring_consume(r)))
5d49de53
MT
570 if (producer < size)
571 queue[producer++] = ptr;
572 else if (destroy)
573 destroy(ptr);
574
fb9de970 575 __ptr_ring_set_size(r, size);
5d49de53 576 r->producer = producer;
fb9de970
MT
577 r->consumer_head = 0;
578 r->consumer_tail = 0;
5d49de53
MT
579 old = r->queue;
580 r->queue = queue;
581
59e6ae53
MT
582 return old;
583}
584
e7169530
MT
585/*
586 * Note: producer lock is nested within consumer lock, so if you
587 * resize you must make sure all uses nest correctly.
588 * In particular if you consume ring in interrupt or BH context, you must
589 * disable interrupts/BH when doing so.
590 */
59e6ae53
MT
591static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
592 void (*destroy)(void *))
593{
594 unsigned long flags;
595 void **queue = __ptr_ring_init_queue_alloc(size, gfp);
596 void **old;
597
598 if (!queue)
599 return -ENOMEM;
600
e7169530
MT
601 spin_lock_irqsave(&(r)->consumer_lock, flags);
602 spin_lock(&(r)->producer_lock);
59e6ae53
MT
603
604 old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
605
e7169530
MT
606 spin_unlock(&(r)->producer_lock);
607 spin_unlock_irqrestore(&(r)->consumer_lock, flags);
5d49de53 608
0bf7800f 609 kvfree(old);
5d49de53
MT
610
611 return 0;
612}
613
e7169530
MT
614/*
615 * Note: producer lock is nested within consumer lock, so if you
616 * resize you must make sure all uses nest correctly.
617 * In particular if you consume ring in interrupt or BH context, you must
618 * disable interrupts/BH when doing so.
619 */
81fbfe8a
ED
620static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
621 unsigned int nrings,
59e6ae53
MT
622 int size,
623 gfp_t gfp, void (*destroy)(void *))
624{
625 unsigned long flags;
626 void ***queues;
627 int i;
628
81fbfe8a 629 queues = kmalloc_array(nrings, sizeof(*queues), gfp);
59e6ae53
MT
630 if (!queues)
631 goto noqueues;
632
633 for (i = 0; i < nrings; ++i) {
634 queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
635 if (!queues[i])
636 goto nomem;
637 }
638
639 for (i = 0; i < nrings; ++i) {
e7169530
MT
640 spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
641 spin_lock(&(rings[i])->producer_lock);
59e6ae53
MT
642 queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
643 size, gfp, destroy);
e7169530
MT
644 spin_unlock(&(rings[i])->producer_lock);
645 spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
59e6ae53
MT
646 }
647
648 for (i = 0; i < nrings; ++i)
0bf7800f 649 kvfree(queues[i]);
59e6ae53
MT
650
651 kfree(queues);
652
653 return 0;
654
655nomem:
656 while (--i >= 0)
0bf7800f 657 kvfree(queues[i]);
59e6ae53
MT
658
659 kfree(queues);
660
661noqueues:
662 return -ENOMEM;
663}
664
5d49de53 665static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
2e0ab8ca 666{
5d49de53
MT
667 void *ptr;
668
669 if (destroy)
670 while ((ptr = ptr_ring_consume(r)))
671 destroy(ptr);
0bf7800f 672 kvfree(r->queue);
2e0ab8ca
MT
673}
674
675#endif /* _LINUX_PTR_RING_H */