From a250edd0678ac6aef34bfbd01423a7b87a1d5f8d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Oct 2017 12:11:44 -0600 Subject: [PATCH] gettime: improve cpu clock test We're missing a synchronization before reading and comparing with the current *t->seq in the loop. Rewrite the loop to use compare-and-exchange for the increment, and ensure that we have the proper synchronization inserted. This should both be faster and more reliable. Signed-off-by: Jens Axboe --- configure | 46 ++++++++++++++++++++++++++++++++++++++++++++++ gettime.c | 21 ++++++++++++--------- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/configure b/configure index 2b46ab83..d34c0006 100755 --- a/configure +++ b/configure @@ -343,6 +343,8 @@ CYGWIN*) # Flags below are still necessary mostly for MinGW. socklen_t="yes" sfaa="yes" + sync_sync="yes" + cmp_swap="yes" rusage_thread="yes" fdatasync="yes" clock_gettime="yes" # clock_monotonic probe has dependency on this @@ -706,6 +708,44 @@ if compile_prog "" "" "__sync_fetch_and_add()" ; then fi print_config "__sync_fetch_and_add" "$sfaa" +########################################## +# __sync_synchronize() test +if test "$sync_sync" != "yes" ; then + sync_sync="no" +fi +cat > $TMPC << EOF +#include + +int main(int argc, char **argv) +{ + __sync_synchronize(); + return 0; +} +EOF +if compile_prog "" "" "__sync_synchronize()" ; then + sync_sync="yes" +fi +print_config "__sync_synchronize" "$sync_sync" + +########################################## +# __sync_val_compare_and_swap() test +if test "$cmp_swap" != "yes" ; then + cmp_swap="no" +fi +cat > $TMPC << EOF +#include + +int main(int argc, char **argv) +{ + int x = 0; + return __sync_val_compare_and_swap(&x, 1, 2); +} +EOF +if compile_prog "" "" "__sync_val_compare_and_swap()" ; then + cmp_swap="yes" +fi +print_config "__sync_val_compare_and_swap" "$cmp_swap" + ########################################## # libverbs probe if test "$libverbs" != "yes" ; then @@ -2108,6 +2148,12 @@ fi if test "$sfaa" = "yes" ; then output_sym "CONFIG_SFAA" fi +if test "$sync_sync" = "yes" ; then + output_sym "CONFIG_SYNC_SYNC" +fi +if test "$cmp_swap" = "yes" ; then + output_sym "CONFIG_CMP_SWAP" +fi if test "$libverbs" = "yes" -a "$rdmacm" = "yes" ; then output_sym "CONFIG_RDMA" fi diff --git a/gettime.c b/gettime.c index 1cbef84b..c256a96c 100644 --- a/gettime.c +++ b/gettime.c @@ -548,7 +548,7 @@ uint64_t time_since_now(const struct timespec *s) } #if defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) && \ - defined(CONFIG_SFAA) + defined(CONFIG_SYNC_SYNC) && defined(CONFIG_CMP_SWAP) #define CLOCK_ENTRIES_DEBUG 100000 #define CLOCK_ENTRIES_TEST 1000 @@ -570,9 +570,10 @@ struct clock_thread { struct clock_entry *entries; }; -static inline uint32_t atomic32_inc_return(uint32_t *seq) +static inline uint32_t atomic32_compare_and_swap(uint32_t *ptr, uint32_t old, + uint32_t new) { - return 1 + __sync_fetch_and_add(seq, 1); + return __sync_val_compare_and_swap(ptr, old, new); } static void *clock_thread_fn(void *data) @@ -580,7 +581,6 @@ static void *clock_thread_fn(void *data) struct clock_thread *t = data; struct clock_entry *c; os_cpu_mask_t cpu_mask; - uint32_t last_seq; unsigned long long first; int i; @@ -604,7 +604,6 @@ static void *clock_thread_fn(void *data) pthread_mutex_unlock(&t->started); first = get_cpu_clock(); - last_seq = 0; c = &t->entries[0]; for (i = 0; i < t->nr_entries; i++, c++) { uint32_t seq; @@ -612,11 +611,15 @@ static void *clock_thread_fn(void *data) c->cpu = t->cpu; do { - seq = atomic32_inc_return(t->seq); - if (seq < last_seq) + seq = *t->seq; + if (seq == UINT_MAX) break; + __sync_synchronize(); tsc = get_cpu_clock(); - } while (seq != *t->seq); + } while (seq != atomic32_compare_and_swap(t->seq, seq, seq + 1)); + + if (seq == UINT_MAX) + break; c->seq = seq; c->tsc = tsc; @@ -634,7 +637,7 @@ static void *clock_thread_fn(void *data) * The most common platform clock breakage is returning zero * indefinitely. Check for that and return failure. */ - if (!t->entries[i - 1].tsc && !t->entries[0].tsc) + if (i > 1 && !t->entries[i - 1].tsc && !t->entries[0].tsc) goto err; fio_cpuset_exit(&cpu_mask); -- 2.25.1