X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=gettime.c;h=272a3e6244527c27d59a06ff99fb4871424a063b;hp=6a50ec548d0091d0f489fb45c6310254539ed7f2;hb=a819dfb6b6b1e1e1339bbd8c3a446b52b5e7575c;hpb=84e893fd54a0895b9eadd8b4c62243faf19c9305 diff --git a/gettime.c b/gettime.c index 6a50ec54..272a3e62 100644 --- a/gettime.c +++ b/gettime.c @@ -2,20 +2,14 @@ * Clock functions */ -#include #include -#include -#include #include "fio.h" -#include "smalloc.h" - -#include "hash.h" #include "os/os.h" #if defined(ARCH_HAVE_CPU_CLOCK) #ifndef ARCH_CPU_CLOCK_CYCLES_PER_USEC -static unsigned long cycles_per_msec; +static unsigned long long cycles_per_msec; static unsigned long long cycles_start; static unsigned long long clock_mult; static unsigned long long max_cycles_mask; @@ -28,7 +22,7 @@ static unsigned int max_cycles_shift; static unsigned int cycles_wrap; #endif #endif -int tsc_reliable = 0; +bool tsc_reliable = false; struct tv_valid { int warned; @@ -243,16 +237,15 @@ static unsigned long get_cycles_per_msec(void) c_s = get_cpu_clock(); do { __fio_gettime(&e); + c_e = get_cpu_clock(); - elapsed = utime_since(&s, &e); - if (elapsed >= 1280) { - c_e = get_cpu_clock(); + elapsed = ntime_since(&s, &e); + if (elapsed >= 1280000) break; - } } while (1); fio_clock_source = old_cs; - return (c_e - c_s) * 1000 / elapsed; + return (c_e - c_s) * 1000000 / elapsed; } #define NR_TIME_ITERS 50 @@ -305,15 +298,16 @@ static int calibrate_cpu_clock(void) avg /= samples; cycles_per_msec = avg; - dprint(FD_TIME, "avg: %llu\n", (unsigned long long) avg); - dprint(FD_TIME, "min=%llu, max=%llu, mean=%f, S=%f\n", + dprint(FD_TIME, "min=%llu, max=%llu, mean=%f, S=%f, N=%d\n", (unsigned long long) minc, - (unsigned long long) maxc, mean, S); + (unsigned long long) maxc, mean, S, NR_TIME_ITERS); + dprint(FD_TIME, "trimmed mean=%llu, N=%d\n", (unsigned long long) avg, samples); max_ticks = MAX_CLOCK_SEC * cycles_per_msec * 1000ULL; - max_mult = ULLONG_MAX / max_ticks; - dprint(FD_TIME, "\n\nmax_ticks=%llu, __builtin_clzll=%d, max_mult=%llu\n", - max_ticks, __builtin_clzll(max_ticks), max_mult); + max_mult = ULLONG_MAX / max_ticks; + dprint(FD_TIME, "\n\nmax_ticks=%llu, __builtin_clzll=%d, " + "max_mult=%llu\n", max_ticks, + __builtin_clzll(max_ticks), max_mult); /* * Find the largest shift count that will produce @@ -326,30 +320,41 @@ static int calibrate_cpu_clock(void) dprint(FD_TIME, "tmp=%llu, sft=%u\n", tmp, sft); } - clock_shift = sft; - clock_mult = (1ULL << sft) * 1000000 / cycles_per_msec; - dprint(FD_TIME, "clock_shift=%u, clock_mult=%llu\n", clock_shift, clock_mult); + clock_shift = sft; + clock_mult = (1ULL << sft) * 1000000 / cycles_per_msec; + dprint(FD_TIME, "clock_shift=%u, clock_mult=%llu\n", clock_shift, + clock_mult); - // Find the greatest power of 2 clock ticks that is less than the ticks in MAX_CLOCK_SEC_2STAGE + /* + * Find the greatest power of 2 clock ticks that is less than the + * ticks in MAX_CLOCK_SEC_2STAGE + */ max_cycles_shift = max_cycles_mask = 0; tmp = MAX_CLOCK_SEC * 1000ULL * cycles_per_msec; - dprint(FD_TIME, "tmp=%llu, max_cycles_shift=%u\n", tmp, max_cycles_shift); + dprint(FD_TIME, "tmp=%llu, max_cycles_shift=%u\n", tmp, + max_cycles_shift); while (tmp > 1) { tmp >>= 1; max_cycles_shift++; dprint(FD_TIME, "tmp=%llu, max_cycles_shift=%u\n", tmp, max_cycles_shift); } - // if use use (1ULL << max_cycles_shift) * 1000 / cycles_per_msec here we will - // have a discontinuity every (1ULL << max_cycles_shift) cycles - nsecs_for_max_cycles = ((1ULL << max_cycles_shift) * clock_mult) >> clock_shift; + /* + * if use use (1ULL << max_cycles_shift) * 1000 / cycles_per_msec + * here we will have a discontinuity every + * (1ULL << max_cycles_shift) cycles + */ + nsecs_for_max_cycles = ((1ULL << max_cycles_shift) * clock_mult) + >> clock_shift; - // Use a bitmask to calculate ticks % (1ULL << max_cycles_shift) + /* Use a bitmask to calculate ticks % (1ULL << max_cycles_shift) */ for (tmp = 0; tmp < max_cycles_shift; tmp++) max_cycles_mask |= 1ULL << tmp; - dprint(FD_TIME, "max_cycles_shift=%u, 2^max_cycles_shift=%llu, nsecs_for_max_cycles=%llu, max_cycles_mask=%016llx\n", - max_cycles_shift, (1ULL << max_cycles_shift), - nsecs_for_max_cycles, max_cycles_mask); + dprint(FD_TIME, "max_cycles_shift=%u, 2^max_cycles_shift=%llu, " + "nsecs_for_max_cycles=%llu, " + "max_cycles_mask=%016llx\n", + max_cycles_shift, (1ULL << max_cycles_shift), + nsecs_for_max_cycles, max_cycles_mask); cycles_start = get_cpu_clock(); dprint(FD_TIME, "cycles_start=%llu\n", cycles_start); @@ -367,7 +372,7 @@ static int calibrate_cpu_clock(void) #endif // ARCH_HAVE_CPU_CLOCK #ifndef CONFIG_TLS_THREAD -void fio_local_clock_init(int is_thread) +void fio_local_clock_init(void) { struct tv_valid *t; @@ -383,7 +388,7 @@ static void kill_tv_tls_key(void *data) free(data); } #else -void fio_local_clock_init(int is_thread) +void fio_local_clock_init(void) { } #endif @@ -401,7 +406,7 @@ void fio_clock_init(void) fio_clock_source_inited = fio_clock_source; if (calibrate_cpu_clock()) - tsc_reliable = 0; + tsc_reliable = false; /* * If the arch sets tsc_reliable != 0, then it must be good enough @@ -413,6 +418,7 @@ void fio_clock_init(void) fio_clock_source = CS_CPUCLOCK; } else if (fio_clock_source == CS_CPUCLOCK) log_info("fio: clocksource=cpu may not be reliable\n"); + dprint(FD_TIME, "gettime: clocksource=%d\n", (int) fio_clock_source); } uint64_t ntime_since(const struct timespec *s, const struct timespec *e) @@ -435,6 +441,14 @@ uint64_t ntime_since(const struct timespec *s, const struct timespec *e) return nsec + (sec * 1000000000LL); } +uint64_t ntime_since_now(const struct timespec *s) +{ + struct timespec now; + + fio_gettime(&now, NULL); + return ntime_since(s, &now); +} + uint64_t utime_since(const struct timespec *s, const struct timespec *e) { int64_t sec, usec; @@ -527,10 +541,10 @@ uint64_t time_since_now(const struct timespec *s) } #if defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) && \ - defined(CONFIG_SFAA) + defined(CONFIG_SYNC_SYNC) && defined(CONFIG_CMP_SWAP) #define CLOCK_ENTRIES_DEBUG 100000 -#define CLOCK_ENTRIES_TEST 10000 +#define CLOCK_ENTRIES_TEST 1000 struct clock_entry { uint32_t seq; @@ -542,16 +556,16 @@ struct clock_thread { pthread_t thread; int cpu; int debug; - pthread_mutex_t lock; - pthread_mutex_t started; + struct fio_sem lock; unsigned long nr_entries; uint32_t *seq; struct clock_entry *entries; }; -static inline uint32_t atomic32_inc_return(uint32_t *seq) +static inline uint32_t atomic32_compare_and_swap(uint32_t *ptr, uint32_t old, + uint32_t new) { - return 1 + __sync_fetch_and_add(seq, 1); + return __sync_val_compare_and_swap(ptr, old, new); } static void *clock_thread_fn(void *data) @@ -559,7 +573,6 @@ static void *clock_thread_fn(void *data) struct clock_thread *t = data; struct clock_entry *c; os_cpu_mask_t cpu_mask; - uint32_t last_seq; unsigned long long first; int i; @@ -579,11 +592,9 @@ static void *clock_thread_fn(void *data) goto err; } - pthread_mutex_lock(&t->lock); - pthread_mutex_unlock(&t->started); + fio_sem_down(&t->lock); first = get_cpu_clock(); - last_seq = 0; c = &t->entries[0]; for (i = 0; i < t->nr_entries; i++, c++) { uint32_t seq; @@ -591,11 +602,15 @@ static void *clock_thread_fn(void *data) c->cpu = t->cpu; do { - seq = atomic32_inc_return(t->seq); - if (seq < last_seq) + seq = *t->seq; + if (seq == UINT_MAX) break; + __sync_synchronize(); tsc = get_cpu_clock(); - } while (seq != *t->seq); + } while (seq != atomic32_compare_and_swap(t->seq, seq, seq + 1)); + + if (seq == UINT_MAX) + break; c->seq = seq; c->tsc = tsc; @@ -613,7 +628,7 @@ static void *clock_thread_fn(void *data) * The most common platform clock breakage is returning zero * indefinitely. Check for that and return failure. */ - if (!t->entries[i - 1].tsc && !t->entries[0].tsc) + if (i > 1 && !t->entries[i - 1].tsc && !t->entries[0].tsc) goto err; fio_cpuset_exit(&cpu_mask); @@ -678,9 +693,7 @@ int fio_monotonic_clocktest(int debug) t->seq = &seq; t->nr_entries = nr_entries; t->entries = &entries[i * nr_entries]; - pthread_mutex_init(&t->lock, NULL); - pthread_mutex_init(&t->started, NULL); - pthread_mutex_lock(&t->lock); + __fio_sem_init(&t->lock, FIO_SEM_LOCKED); if (pthread_create(&t->thread, NULL, clock_thread_fn, t)) { failed++; nr_cpus = i; @@ -691,13 +704,7 @@ int fio_monotonic_clocktest(int debug) for (i = 0; i < nr_cpus; i++) { struct clock_thread *t = &cthreads[i]; - pthread_mutex_lock(&t->started); - } - - for (i = 0; i < nr_cpus; i++) { - struct clock_thread *t = &cthreads[i]; - - pthread_mutex_unlock(&t->lock); + fio_sem_up(&t->lock); } for (i = 0; i < nr_cpus; i++) { @@ -707,6 +714,7 @@ int fio_monotonic_clocktest(int debug) pthread_join(t->thread, &ret); if (ret) failed++; + __fio_sem_remove(&t->lock); } free(cthreads);