diff options
author | Bart Van Assche <bvanassche@acm.org> | 2020-06-21 13:36:45 -0700 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-06-21 20:49:50 -0600 |
commit | b9c0bf79aa879727f55f4fe7333dd41bebd0acd4 (patch) | |
tree | dfbce2a1c21a04d86314e5ccdc91331787a101c4 | |
parent | 7c0f3faf775d29371c57c6eafabea4c6d5cce7f5 (diff) | |
download | liburing-b9c0bf79aa879727f55f4fe7333dd41bebd0acd4.tar.gz liburing-b9c0bf79aa879727f55f4fe7333dd41bebd0acd4.tar.bz2 |
src/include/liburing/barrier.h: Use C11 atomics
Instead of using a combination of open-coding atomic primitives and using
gcc builtins, use C11 atomics for all CPU architectures. Note: despite their
name, atomic_*() operations do not necessarily translate into an atomic
instruction. This patch changes the order of the instructions in e.g.
io_uring_get_sqe() but not the number of instructions generated by gcc 10
on x86_64:
Without this patch:
0x0000000000000360 <+0>: mov 0x44(%rdi),%eax
0x0000000000000363 <+3>: lea 0x1(%rax),%edx
0x0000000000000366 <+6>: mov (%rdi),%rax
0x0000000000000369 <+9>: mov (%rax),%eax
0x000000000000036b <+11>: mov 0x18(%rdi),%rcx
0x000000000000036f <+15>: mov %edx,%esi
0x0000000000000371 <+17>: sub %eax,%esi
0x0000000000000373 <+19>: xor %eax,%eax
0x0000000000000375 <+21>: cmp (%rcx),%esi
0x0000000000000377 <+23>: ja 0x38d <io_uring_get_sqe+45>
0x0000000000000379 <+25>: mov 0x10(%rdi),%rax
0x000000000000037d <+29>: mov (%rax),%eax
0x000000000000037f <+31>: and 0x44(%rdi),%eax
0x0000000000000382 <+34>: mov %edx,0x44(%rdi)
0x0000000000000385 <+37>: shl $0x6,%rax
0x0000000000000389 <+41>: add 0x38(%rdi),%rax
0x000000000000038d <+45>: retq
With this patch applied:
0x0000000000000360 <+0>: mov 0x44(%rdi),%eax
0x0000000000000363 <+3>: lea 0x1(%rax),%edx
0x0000000000000366 <+6>: mov (%rdi),%rax
0x0000000000000369 <+9>: mov %edx,%esi
0x000000000000036b <+11>: mov (%rax),%eax
0x000000000000036d <+13>: sub %eax,%esi
0x000000000000036f <+15>: xor %eax,%eax
0x0000000000000371 <+17>: mov 0x18(%rdi),%rcx
0x0000000000000375 <+21>: cmp (%rcx),%esi
0x0000000000000377 <+23>: ja 0x38d <io_uring_get_sqe+45>
0x0000000000000379 <+25>: mov 0x10(%rdi),%rax
0x000000000000037d <+29>: mov (%rax),%eax
0x000000000000037f <+31>: and 0x44(%rdi),%eax
0x0000000000000382 <+34>: mov %edx,0x44(%rdi)
0x0000000000000385 <+37>: shl $0x6,%rax
0x0000000000000389 <+41>: add 0x38(%rdi),%rax
0x000000000000038d <+45>: retq
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | src/include/liburing/barrier.h | 44 |
1 files changed, 10 insertions, 34 deletions
diff --git a/src/include/liburing/barrier.h b/src/include/liburing/barrier.h index ad69506..c8aa421 100644 --- a/src/include/liburing/barrier.h +++ b/src/include/liburing/barrier.h @@ -2,6 +2,8 @@ #ifndef LIBURING_BARRIER_H #define LIBURING_BARRIER_H +#include <stdatomic.h> + /* From the kernel documentation file refcount-vs-atomic.rst: @@ -21,40 +23,14 @@ after the acquire operation executes. This is implemented using :c:func:`smp_acquire__after_ctrl_dep`. */ -/* From tools/include/linux/compiler.h */ -/* Optimization barrier */ -/* The "volatile" is due to gcc bugs */ -#define io_uring_barrier() __asm__ __volatile__("": : :"memory") - -/* From tools/virtio/linux/compiler.h */ -#define IO_URING_WRITE_ONCE(var, val) \ - (*((volatile __typeof(val) *)(&(var))) = (val)) -#define IO_URING_READ_ONCE(var) (*((volatile __typeof(var) *)(&(var)))) - +#define IO_URING_WRITE_ONCE(var, val) \ + atomic_store_explicit(&(var), (val), memory_order_relaxed) +#define IO_URING_READ_ONCE(var) \ + atomic_load_explicit(&(var), memory_order_relaxed) -#if defined(__x86_64__) || defined(__i386__) -/* Adapted from arch/x86/include/asm/barrier.h */ -#define io_uring_smp_store_release(p, v) \ -do { \ - io_uring_barrier(); \ - IO_URING_WRITE_ONCE(*(p), (v)); \ -} while (0) - -#define io_uring_smp_load_acquire(p) \ -({ \ - __typeof(*p) ___p1 = IO_URING_READ_ONCE(*(p)); \ - io_uring_barrier(); \ - ___p1; \ -}) - -#else /* defined(__x86_64__) || defined(__i386__) */ -/* - * Add arch appropriate definitions. Use built-in atomic operations for - * archs we don't have support for. - */ -#define io_uring_smp_store_release(p, v) \ - __atomic_store_n(p, v, __ATOMIC_RELEASE) -#define io_uring_smp_load_acquire(p) __atomic_load_n(p, __ATOMIC_ACQUIRE) -#endif /* defined(__x86_64__) || defined(__i386__) */ +#define io_uring_smp_store_release(p, v) \ + atomic_store_explicit((p), (v), memory_order_release) +#define io_uring_smp_load_acquire(p) \ + atomic_load_explicit((p), memory_order_acquire) #endif /* defined(LIBURING_BARRIER_H) */ |