X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=gettime.c;h=14462420219812266033a297d3c4d38f8fdc9006;hp=1cbef84b0d765f224309214067fa92f1048c7fed;hb=HEAD;hpb=cae9edd999e5233a1ca54d34cd18d90596f125b6 diff --git a/gettime.c b/gettime.c index 1cbef84b..5ca31206 100644 --- a/gettime.c +++ b/gettime.c @@ -2,15 +2,9 @@ * Clock functions */ -#include #include -#include -#include #include "fio.h" -#include "smalloc.h" - -#include "hash.h" #include "os/os.h" #if defined(ARCH_HAVE_CPU_CLOCK) @@ -133,18 +127,33 @@ static void fio_init gtod_init(void) #endif /* FIO_DEBUG_TIME */ -#ifdef CONFIG_CLOCK_GETTIME -static int fill_clock_gettime(struct timespec *ts) +/* + * Queries the value of the monotonic clock if a monotonic clock is available + * or the wall clock time if no monotonic clock is available. Returns 0 if + * querying the clock succeeded or -1 if querying the clock failed. + */ +int fio_get_mono_time(struct timespec *ts) { -#if defined(CONFIG_CLOCK_MONOTONIC_RAW) - return clock_gettime(CLOCK_MONOTONIC_RAW, ts); -#elif defined(CONFIG_CLOCK_MONOTONIC) - return clock_gettime(CLOCK_MONOTONIC, ts); + int ret; + +#ifdef CONFIG_CLOCK_GETTIME +#if defined(CONFIG_CLOCK_MONOTONIC) + ret = clock_gettime(CLOCK_MONOTONIC, ts); #else - return clock_gettime(CLOCK_REALTIME, ts); + ret = clock_gettime(CLOCK_REALTIME, ts); #endif -} +#else + struct timeval tv; + + ret = gettimeofday(&tv, NULL); + if (ret == 0) { + ts->tv_sec = tv.tv_sec; + ts->tv_nsec = tv.tv_usec * 1000; + } #endif + assert(ret <= 0); + return ret; +} static void __fio_gettime(struct timespec *tp) { @@ -161,8 +170,8 @@ static void __fio_gettime(struct timespec *tp) #endif #ifdef CONFIG_CLOCK_GETTIME case CS_CGETTIME: { - if (fill_clock_gettime(tp) < 0) { - log_err("fio: clock_gettime fails\n"); + if (fio_get_mono_time(tp) < 0) { + log_err("fio: fio_get_mono_time() fails\n"); assert(0); } break; @@ -230,29 +239,21 @@ static unsigned long get_cycles_per_msec(void) { struct timespec s, e; uint64_t c_s, c_e; - enum fio_cs old_cs = fio_clock_source; uint64_t elapsed; -#ifdef CONFIG_CLOCK_GETTIME - fio_clock_source = CS_CGETTIME; -#else - fio_clock_source = CS_GTOD; -#endif - __fio_gettime(&s); + fio_get_mono_time(&s); c_s = get_cpu_clock(); do { - __fio_gettime(&e); + fio_get_mono_time(&e); + c_e = get_cpu_clock(); - elapsed = utime_since(&s, &e); - if (elapsed >= 1280) { - c_e = get_cpu_clock(); + elapsed = ntime_since(&s, &e); + if (elapsed >= 1280000) break; - } } while (1); - fio_clock_source = old_cs; - return (c_e - c_s) * 1000 / elapsed; + return (c_e - c_s) * 1000000 / elapsed; } #define NR_TIME_ITERS 50 @@ -305,14 +306,14 @@ static int calibrate_cpu_clock(void) avg /= samples; cycles_per_msec = avg; - dprint(FD_TIME, "avg: %llu\n", (unsigned long long) avg); - dprint(FD_TIME, "min=%llu, max=%llu, mean=%f, S=%f\n", + dprint(FD_TIME, "min=%llu, max=%llu, mean=%f, S=%f, N=%d\n", (unsigned long long) minc, - (unsigned long long) maxc, mean, S); + (unsigned long long) maxc, mean, S, NR_TIME_ITERS); + dprint(FD_TIME, "trimmed mean=%llu, N=%d\n", (unsigned long long) avg, samples); max_ticks = MAX_CLOCK_SEC * cycles_per_msec * 1000ULL; max_mult = ULLONG_MAX / max_ticks; - dprint(FD_TIME, "\n\nmax_ticks=%llu, __builtin_clzll=%d, " + dprint(FD_TIME, "max_ticks=%llu, __builtin_clzll=%d, " "max_mult=%llu\n", max_ticks, __builtin_clzll(max_ticks), max_mult); @@ -334,7 +335,7 @@ static int calibrate_cpu_clock(void) /* * Find the greatest power of 2 clock ticks that is less than the - * ticks in MAX_CLOCK_SEC_2STAGE + * ticks in MAX_CLOCK_SEC */ max_cycles_shift = max_cycles_mask = 0; tmp = MAX_CLOCK_SEC * 1000ULL * cycles_per_msec; @@ -378,8 +379,8 @@ static int calibrate_cpu_clock(void) } #endif // ARCH_HAVE_CPU_CLOCK -#ifndef CONFIG_TLS_THREAD -void fio_local_clock_init(int is_thread) +#if defined(ARCH_HAVE_CPU_CLOCK) && !defined(CONFIG_TLS_THREAD) +void fio_local_clock_init(void) { struct tv_valid *t; @@ -395,7 +396,7 @@ static void kill_tv_tls_key(void *data) free(data); } #else -void fio_local_clock_init(int is_thread) +void fio_local_clock_init(void) { } #endif @@ -405,7 +406,7 @@ void fio_clock_init(void) if (fio_clock_source == fio_clock_source_inited) return; -#ifndef CONFIG_TLS_THREAD +#if defined(ARCH_HAVE_CPU_CLOCK) && !defined(CONFIG_TLS_THREAD) if (pthread_key_create(&tv_tls_key, kill_tv_tls_key)) log_err("fio: can't create TLS key\n"); #endif @@ -430,22 +431,22 @@ void fio_clock_init(void) uint64_t ntime_since(const struct timespec *s, const struct timespec *e) { - int64_t sec, nsec; + int64_t sec, nsec; - sec = e->tv_sec - s->tv_sec; - nsec = e->tv_nsec - s->tv_nsec; - if (sec > 0 && nsec < 0) { - sec--; - nsec += 1000000000LL; - } + sec = e->tv_sec - s->tv_sec; + nsec = e->tv_nsec - s->tv_nsec; + if (sec > 0 && nsec < 0) { + sec--; + nsec += 1000000000LL; + } /* * time warp bug on some kernels? */ - if (sec < 0 || (sec == 0 && nsec < 0)) - return 0; + if (sec < 0 || (sec == 0 && nsec < 0)) + return 0; - return nsec + (sec * 1000000000LL); + return nsec + (sec * 1000000000LL); } uint64_t ntime_since_now(const struct timespec *s) @@ -523,23 +524,33 @@ uint64_t mtime_since_now(const struct timespec *s) return mtime_since(s, &t); } -uint64_t mtime_since(const struct timespec *s, const struct timespec *e) +/* + * Returns *e - *s in milliseconds as a signed integer. Note: rounding is + * asymmetric. If the difference yields +1 ns then 0 is returned. If the + * difference yields -1 ns then -1 is returned. + */ +int64_t rel_time_since(const struct timespec *s, const struct timespec *e) { - int64_t sec, usec; + int64_t sec, nsec; sec = e->tv_sec - s->tv_sec; - usec = (e->tv_nsec - s->tv_nsec) / 1000; - if (sec > 0 && usec < 0) { + nsec = e->tv_nsec - s->tv_nsec; + if (nsec < 0) { sec--; - usec += 1000000; + nsec += 1000ULL * 1000 * 1000; } + assert(0 <= nsec && nsec < 1000ULL * 1000 * 1000); - if (sec < 0 || (sec == 0 && usec < 0)) - return 0; + return sec * 1000 + nsec / (1000 * 1000); +} - sec *= 1000; - usec /= 1000; - return sec + usec; +/* + * Returns *e - *s in milliseconds as an unsigned integer. Returns 0 if + * *e < *s. + */ +uint64_t mtime_since(const struct timespec *s, const struct timespec *e) +{ + return max(rel_time_since(s, e), (int64_t)0); } uint64_t time_since_now(const struct timespec *s) @@ -548,7 +559,7 @@ uint64_t time_since_now(const struct timespec *s) } #if defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) && \ - defined(CONFIG_SFAA) + defined(CONFIG_SYNC_SYNC) && defined(CONFIG_CMP_SWAP) #define CLOCK_ENTRIES_DEBUG 100000 #define CLOCK_ENTRIES_TEST 1000 @@ -563,16 +574,16 @@ struct clock_thread { pthread_t thread; int cpu; int debug; - pthread_mutex_t lock; - pthread_mutex_t started; + struct fio_sem lock; unsigned long nr_entries; uint32_t *seq; struct clock_entry *entries; }; -static inline uint32_t atomic32_inc_return(uint32_t *seq) +static inline uint32_t atomic32_compare_and_swap(uint32_t *ptr, uint32_t old, + uint32_t new) { - return 1 + __sync_fetch_and_add(seq, 1); + return __sync_val_compare_and_swap(ptr, old, new); } static void *clock_thread_fn(void *data) @@ -580,7 +591,6 @@ static void *clock_thread_fn(void *data) struct clock_thread *t = data; struct clock_entry *c; os_cpu_mask_t cpu_mask; - uint32_t last_seq; unsigned long long first; int i; @@ -600,11 +610,9 @@ static void *clock_thread_fn(void *data) goto err; } - pthread_mutex_lock(&t->lock); - pthread_mutex_unlock(&t->started); + fio_sem_down(&t->lock); first = get_cpu_clock(); - last_seq = 0; c = &t->entries[0]; for (i = 0; i < t->nr_entries; i++, c++) { uint32_t seq; @@ -612,11 +620,15 @@ static void *clock_thread_fn(void *data) c->cpu = t->cpu; do { - seq = atomic32_inc_return(t->seq); - if (seq < last_seq) + seq = *t->seq; + if (seq == UINT_MAX) break; + tsc_barrier(); tsc = get_cpu_clock(); - } while (seq != *t->seq); + } while (seq != atomic32_compare_and_swap(t->seq, seq, seq + 1)); + + if (seq == UINT_MAX) + break; c->seq = seq; c->tsc = tsc; @@ -634,7 +646,7 @@ static void *clock_thread_fn(void *data) * The most common platform clock breakage is returning zero * indefinitely. Check for that and return failure. */ - if (!t->entries[i - 1].tsc && !t->entries[0].tsc) + if (i > 1 && !t->entries[i - 1].tsc && !t->entries[0].tsc) goto err; fio_cpuset_exit(&cpu_mask); @@ -659,12 +671,21 @@ static int clock_cmp(const void *p1, const void *p2) int fio_monotonic_clocktest(int debug) { struct clock_thread *cthreads; - unsigned int nr_cpus = cpus_online(); + unsigned int seen_cpus, nr_cpus = cpus_configured(); struct clock_entry *entries; unsigned long nr_entries, tentries, failed = 0; struct clock_entry *prev, *this; uint32_t seq = 0; unsigned int i; + os_cpu_mask_t mask; + +#ifdef FIO_HAVE_GET_THREAD_AFFINITY + fio_get_thread_affinity(mask); +#else + memset(&mask, 0, sizeof(mask)); + for (i = 0; i < nr_cpus; i++) + fio_cpu_set(&mask, i); +#endif if (debug) { log_info("cs: reliable_tsc: %s\n", tsc_reliable ? "yes" : "no"); @@ -691,43 +712,44 @@ int fio_monotonic_clocktest(int debug) if (debug) log_info("cs: Testing %u CPUs\n", nr_cpus); + seen_cpus = 0; for (i = 0; i < nr_cpus; i++) { struct clock_thread *t = &cthreads[i]; + if (!fio_cpu_isset(&mask, i)) + continue; t->cpu = i; t->debug = debug; t->seq = &seq; t->nr_entries = nr_entries; - t->entries = &entries[i * nr_entries]; - pthread_mutex_init(&t->lock, NULL); - pthread_mutex_init(&t->started, NULL); - pthread_mutex_lock(&t->lock); + t->entries = &entries[seen_cpus * nr_entries]; + __fio_sem_init(&t->lock, FIO_SEM_LOCKED); if (pthread_create(&t->thread, NULL, clock_thread_fn, t)) { failed++; nr_cpus = i; break; } + seen_cpus++; } for (i = 0; i < nr_cpus; i++) { struct clock_thread *t = &cthreads[i]; - pthread_mutex_lock(&t->started); - } - - for (i = 0; i < nr_cpus; i++) { - struct clock_thread *t = &cthreads[i]; - - pthread_mutex_unlock(&t->lock); + if (!fio_cpu_isset(&mask, i)) + continue; + fio_sem_up(&t->lock); } for (i = 0; i < nr_cpus; i++) { struct clock_thread *t = &cthreads[i]; void *ret; + if (!fio_cpu_isset(&mask, i)) + continue; pthread_join(t->thread, &ret); if (ret) failed++; + __fio_sem_remove(&t->lock); } free(cthreads); @@ -737,6 +759,7 @@ int fio_monotonic_clocktest(int debug) goto err; } + tentries = nr_entries * seen_cpus; qsort(entries, tentries, sizeof(struct clock_entry), clock_cmp); /* silence silly gcc */