X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=gettime.c;h=964a52fa237afe50a05cd7d9081c3e29c26c70fc;hp=df329f66d3d5735b3225411315303f87b039c796;hb=be6bb2b72608d7efbec13d06c67446e229136afa;hpb=71339117638469372f703332d8b969382b12c49b diff --git a/gettime.c b/gettime.c index df329f66..964a52fa 100644 --- a/gettime.c +++ b/gettime.c @@ -13,22 +13,34 @@ #include "hash.h" #include "os/os.h" -#ifdef ARCH_HAVE_CPU_CLOCK +#if defined(ARCH_HAVE_CPU_CLOCK) +#ifndef ARCH_CPU_CLOCK_CYCLES_PER_USEC static unsigned long cycles_per_usec; static unsigned long inv_cycles_per_usec; -int tsc_reliable = 0; +static uint64_t max_cycles_for_mult; +#endif +#ifdef ARCH_CPU_CLOCK_WRAPS +static unsigned long long cycles_start, cycles_wrap; +#endif #endif +int tsc_reliable = 0; struct tv_valid { - struct timeval last_tv; + uint64_t last_cycles; int last_tv_valid; - unsigned long last_cycles; + int warned; }; +#ifdef ARCH_HAVE_CPU_CLOCK +#ifdef CONFIG_TLS_THREAD +static __thread struct tv_valid static_tv_valid; +#else static pthread_key_t tv_tls_key; +#endif +#endif enum fio_cs fio_clock_source = FIO_PREFERRED_CLOCK_SOURCE; int fio_clock_source_set = 0; -enum fio_cs fio_clock_source_inited = CS_INVAL; +static enum fio_cs fio_clock_source_inited = CS_INVAL; #ifdef FIO_DEBUG_TIME @@ -60,7 +72,7 @@ static struct gtod_log *find_hash(void *caller) return NULL; } -static struct gtod_log *find_log(void *caller) +static void inc_caller(void *caller) { struct gtod_log *log = find_hash(caller); @@ -76,16 +88,13 @@ static struct gtod_log *find_log(void *caller) flist_add_tail(&log->list, &hash[h]); } - return log; + log->calls++; } static void gtod_log_caller(void *caller) { - if (gtod_inited) { - struct gtod_log *log = find_log(caller); - - log->calls++; - } + if (gtod_inited) + inc_caller(caller); } static void fio_exit fio_dump_gtod(void) @@ -121,40 +130,28 @@ static void fio_init gtod_init(void) #endif /* FIO_DEBUG_TIME */ +#ifdef CONFIG_CLOCK_GETTIME static int fill_clock_gettime(struct timespec *ts) { -#ifdef FIO_HAVE_CLOCK_MONOTONIC +#if defined(CONFIG_CLOCK_MONOTONIC_RAW) + return clock_gettime(CLOCK_MONOTONIC_RAW, ts); +#elif defined(CONFIG_CLOCK_MONOTONIC) return clock_gettime(CLOCK_MONOTONIC, ts); #else return clock_gettime(CLOCK_REALTIME, ts); #endif } - -#ifdef FIO_DEBUG_TIME -void fio_gettime(struct timeval *tp, void *caller) -#else -void fio_gettime(struct timeval *tp, void fio_unused *caller) -#endif -{ - struct tv_valid *tv; - -#ifdef FIO_DEBUG_TIME - if (!caller) - caller = __builtin_return_address(0); - - gtod_log_caller(caller); #endif - if (fio_tv) { - memcpy(tp, fio_tv, sizeof(*tp)); - return; - } - - tv = pthread_getspecific(tv_tls_key); +static void __fio_gettime(struct timeval *tp) +{ switch (fio_clock_source) { +#ifdef CONFIG_GETTIMEOFDAY case CS_GTOD: gettimeofday(tp, NULL); break; +#endif +#ifdef CONFIG_CLOCK_GETTIME case CS_CGETTIME: { struct timespec ts; @@ -167,18 +164,39 @@ void fio_gettime(struct timeval *tp, void fio_unused *caller) tp->tv_usec = ts.tv_nsec / 1000; break; } +#endif #ifdef ARCH_HAVE_CPU_CLOCK case CS_CPUCLOCK: { - unsigned long long usecs, t; + uint64_t usecs, t; + struct tv_valid *tv; + +#ifdef CONFIG_TLS_THREAD + tv = &static_tv_valid; +#else + tv = pthread_getspecific(tv_tls_key); +#endif t = get_cpu_clock(); - if (tv && t < tv->last_cycles) { - dprint(FD_TIME, "CPU clock going back in time\n"); - t = tv->last_cycles; - } else if (tv) - tv->last_cycles = t; +#ifdef ARCH_CPU_CLOCK_WRAPS + if (t < cycles_start && !cycles_wrap) + cycles_wrap = 1; + else if (cycles_wrap && t >= cycles_start && !tv->warned) { + log_err("fio: double CPU clock wrap\n"); + tv->warned = 1; + } - usecs = (t * inv_cycles_per_usec) / 16777216UL; + t -= cycles_start; +#endif + tv->last_cycles = t; + tv->last_tv_valid = 1; +#ifdef ARCH_CPU_CLOCK_CYCLES_PER_USEC + usecs = t / ARCH_CPU_CLOCK_CYCLES_PER_USEC; +#else + if (t < max_cycles_for_mult) + usecs = (t * inv_cycles_per_usec) / 16777216UL; + else + usecs = t / cycles_per_usec; +#endif tp->tv_sec = usecs / 1000000; tp->tv_usec = usecs % 1000000; break; @@ -188,42 +206,44 @@ void fio_gettime(struct timeval *tp, void fio_unused *caller) log_err("fio: invalid clock source %d\n", fio_clock_source); break; } +} - /* - * If Linux is using the tsc clock on non-synced processors, - * sometimes time can appear to drift backwards. Fix that up. - */ - if (tv) { - if (tv->last_tv_valid) { - if (tp->tv_sec < tv->last_tv.tv_sec) - tp->tv_sec = tv->last_tv.tv_sec; - else if (tv->last_tv.tv_sec == tp->tv_sec && - tp->tv_usec < tv->last_tv.tv_usec) - tp->tv_usec = tv->last_tv.tv_usec; - } - tv->last_tv_valid = 1; - memcpy(&tv->last_tv, tp, sizeof(*tp)); - } +#ifdef FIO_DEBUG_TIME +void fio_gettime(struct timeval *tp, void *caller) +#else +void fio_gettime(struct timeval *tp, void fio_unused *caller) +#endif +{ +#ifdef FIO_DEBUG_TIME + if (!caller) + caller = __builtin_return_address(0); + + gtod_log_caller(caller); +#endif + if (fio_unlikely(fio_gettime_offload(tp))) + return; + + __fio_gettime(tp); } -#ifdef ARCH_HAVE_CPU_CLOCK +#if defined(ARCH_HAVE_CPU_CLOCK) && !defined(ARCH_CPU_CLOCK_CYCLES_PER_USEC) static unsigned long get_cycles_per_usec(void) { - struct timespec ts; struct timeval s, e; - unsigned long long c_s, c_e; + uint64_t c_s, c_e; + enum fio_cs old_cs = fio_clock_source; + uint64_t elapsed; - fill_clock_gettime(&ts); - s.tv_sec = ts.tv_sec; - s.tv_usec = ts.tv_nsec / 1000; +#ifdef CONFIG_CLOCK_GETTIME + fio_clock_source = CS_CGETTIME; +#else + fio_clock_source = CS_GTOD; +#endif + __fio_gettime(&s); c_s = get_cpu_clock(); do { - unsigned long long elapsed; - - fill_clock_gettime(&ts); - e.tv_sec = ts.tv_sec; - e.tv_usec = ts.tv_nsec / 1000; + __fio_gettime(&e); elapsed = utime_since(&s, &e); if (elapsed >= 1280) { @@ -232,15 +252,16 @@ static unsigned long get_cycles_per_usec(void) } } while (1); - return (c_e - c_s + 127) >> 7; + fio_clock_source = old_cs; + return (c_e - c_s) / elapsed; } #define NR_TIME_ITERS 50 -static void calibrate_cpu_clock(void) +static int calibrate_cpu_clock(void) { double delta, mean, S; - unsigned long avg, cycles[NR_TIME_ITERS]; + uint64_t minc, maxc, avg, cycles[NR_TIME_ITERS]; int i, samples; cycles[0] = get_cycles_per_usec(); @@ -254,12 +275,23 @@ static void calibrate_cpu_clock(void) } } + /* + * The most common platform clock breakage is returning zero + * indefinitely. Check for that and return failure. + */ + if (!cycles[0] && !cycles[NR_TIME_ITERS - 1]) + return 1; + S = sqrt(S / (NR_TIME_ITERS - 1.0)); - samples = avg = 0; + minc = -1ULL; + maxc = samples = avg = 0; for (i = 0; i < NR_TIME_ITERS; i++) { double this = cycles[i]; + minc = min(cycles[i], minc); + maxc = max(cycles[i], maxc); + if ((fmax(this, mean) - fmin(this, mean)) > S) continue; samples++; @@ -267,50 +299,73 @@ static void calibrate_cpu_clock(void) } S /= (double) NR_TIME_ITERS; - mean /= 10.0; for (i = 0; i < NR_TIME_ITERS; i++) - dprint(FD_TIME, "cycles[%d]=%lu\n", i, cycles[i] / 10); + dprint(FD_TIME, "cycles[%d]=%llu\n", i, (unsigned long long) cycles[i]); avg /= samples; - avg = (avg + 9) / 10; - dprint(FD_TIME, "avg: %lu\n", avg); - dprint(FD_TIME, "mean=%f, S=%f\n", mean, S); + dprint(FD_TIME, "avg: %llu\n", (unsigned long long) avg); + dprint(FD_TIME, "min=%llu, max=%llu, mean=%f, S=%f\n", + (unsigned long long) minc, + (unsigned long long) maxc, mean, S); cycles_per_usec = avg; inv_cycles_per_usec = 16777216UL / cycles_per_usec; + max_cycles_for_mult = ~0ULL / inv_cycles_per_usec; dprint(FD_TIME, "inv_cycles_per_usec=%lu\n", inv_cycles_per_usec); +#ifdef ARCH_CPU_CLOCK_WRAPS + cycles_start = get_cpu_clock(); + dprint(FD_TIME, "cycles_start=%llu\n", cycles_start); +#endif + return 0; } #else -static void calibrate_cpu_clock(void) +static int calibrate_cpu_clock(void) { -} +#ifdef ARCH_CPU_CLOCK_CYCLES_PER_USEC + return 0; +#else + return 1; #endif +} +#endif // ARCH_HAVE_CPU_CLOCK +#ifndef CONFIG_TLS_THREAD void fio_local_clock_init(int is_thread) { struct tv_valid *t; - t = calloc(sizeof(*t), 1); - if (pthread_setspecific(tv_tls_key, t)) + t = calloc(1, sizeof(*t)); + if (pthread_setspecific(tv_tls_key, t)) { log_err("fio: can't set TLS key\n"); + assert(0); + } } static void kill_tv_tls_key(void *data) { free(data); } +#else +void fio_local_clock_init(int is_thread) +{ +} +#endif void fio_clock_init(void) { if (fio_clock_source == fio_clock_source_inited) return; +#ifndef CONFIG_TLS_THREAD if (pthread_key_create(&tv_tls_key, kill_tv_tls_key)) log_err("fio: can't create TLS key\n"); +#endif fio_clock_source_inited = fio_clock_source; - calibrate_cpu_clock(); + + if (calibrate_cpu_clock()) + tsc_reliable = 0; /* * If the arch sets tsc_reliable != 0, then it must be good enough @@ -318,16 +373,16 @@ void fio_clock_init(void) * runs at a constant rate and is synced across CPU cores. */ if (tsc_reliable) { - if (!fio_clock_source_set) + if (!fio_clock_source_set && !fio_monotonic_clocktest(0)) fio_clock_source = CS_CPUCLOCK; } else if (fio_clock_source == CS_CPUCLOCK) log_info("fio: clocksource=cpu may not be reliable\n"); } -unsigned long long utime_since(struct timeval *s, struct timeval *e) +uint64_t utime_since(const struct timeval *s, const struct timeval *e) { long sec, usec; - unsigned long long ret; + uint64_t ret; sec = e->tv_sec - s->tv_sec; usec = e->tv_usec - s->tv_usec; @@ -347,7 +402,7 @@ unsigned long long utime_since(struct timeval *s, struct timeval *e) return ret; } -unsigned long long utime_since_now(struct timeval *s) +uint64_t utime_since_now(const struct timeval *s) { struct timeval t; @@ -355,7 +410,7 @@ unsigned long long utime_since_now(struct timeval *s) return utime_since(s, &t); } -unsigned long mtime_since(struct timeval *s, struct timeval *e) +uint64_t mtime_since(const struct timeval *s, const struct timeval *e) { long sec, usec, ret; @@ -369,14 +424,14 @@ unsigned long mtime_since(struct timeval *s, struct timeval *e) if (sec < 0 || (sec == 0 && usec < 0)) return 0; - sec *= 1000UL; - usec /= 1000UL; + sec *= 1000; + usec /= 1000; ret = sec + usec; return ret; } -unsigned long mtime_since_now(struct timeval *s) +uint64_t mtime_since_now(const struct timeval *s) { struct timeval t; void *p = __builtin_return_address(0); @@ -385,31 +440,35 @@ unsigned long mtime_since_now(struct timeval *s) return mtime_since(s, &t); } -unsigned long time_since_now(struct timeval *s) +uint64_t time_since_now(const struct timeval *s) { return mtime_since_now(s) / 1000; } -#if defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) +#if defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) && \ + defined(CONFIG_SFAA) -#define CLOCK_ENTRIES 100000 +#define CLOCK_ENTRIES_DEBUG 100000 +#define CLOCK_ENTRIES_TEST 10000 struct clock_entry { - unsigned long seq; - unsigned long tsc; - unsigned long cpu; + uint32_t seq; + uint32_t cpu; + uint64_t tsc; }; struct clock_thread { pthread_t thread; int cpu; + int debug; pthread_mutex_t lock; pthread_mutex_t started; - uint64_t *seq; + unsigned long nr_entries; + uint32_t *seq; struct clock_entry *entries; }; -static inline uint64_t atomic64_inc_return(uint64_t *seq) +static inline uint32_t atomic32_inc_return(uint32_t *seq) { return 1 + __sync_fetch_and_add(seq, 1); } @@ -419,26 +478,41 @@ static void *clock_thread_fn(void *data) struct clock_thread *t = data; struct clock_entry *c; os_cpu_mask_t cpu_mask; + uint32_t last_seq; + unsigned long long first; int i; - memset(&cpu_mask, 0, sizeof(cpu_mask)); + if (fio_cpuset_init(&cpu_mask)) { + int __err = errno; + + log_err("clock cpuset init failed: %s\n", strerror(__err)); + goto err_out; + } + fio_cpu_set(&cpu_mask, t->cpu); if (fio_setaffinity(gettid(), cpu_mask) == -1) { - log_err("clock setaffinity failed\n"); - return (void *) 1; + int __err = errno; + + log_err("clock setaffinity failed: %s\n", strerror(__err)); + goto err; } pthread_mutex_lock(&t->lock); pthread_mutex_unlock(&t->started); + first = get_cpu_clock(); + last_seq = 0; c = &t->entries[0]; - for (i = 0; i < CLOCK_ENTRIES; i++, c++) { - uint64_t seq, tsc; + for (i = 0; i < t->nr_entries; i++, c++) { + uint32_t seq; + uint64_t tsc; c->cpu = t->cpu; do { - seq = atomic64_inc_return(t->seq); + seq = atomic32_inc_return(t->seq); + if (seq < last_seq) + break; tsc = get_cpu_clock(); } while (seq != *t->seq); @@ -446,8 +520,27 @@ static void *clock_thread_fn(void *data) c->tsc = tsc; } - log_info("cs: cpu%3d: %lu clocks seen\n", t->cpu, t->entries[CLOCK_ENTRIES - 1].tsc - t->entries[0].tsc); + if (t->debug) { + unsigned long long clocks; + + clocks = t->entries[i - 1].tsc - t->entries[0].tsc; + log_info("cs: cpu%3d: %llu clocks seen, first %llu\n", t->cpu, + clocks, first); + } + + /* + * The most common platform clock breakage is returning zero + * indefinitely. Check for that and return failure. + */ + if (!t->entries[i - 1].tsc && !t->entries[0].tsc) + goto err; + + fio_cpuset_exit(&cpu_mask); return NULL; +err: + fio_cpuset_exit(&cpu_mask); +err_out: + return (void *) 1; } static int clock_cmp(const void *p1, const void *p2) @@ -461,68 +554,93 @@ static int clock_cmp(const void *p1, const void *p2) return c1->seq - c2->seq; } -int fio_monotonic_clocktest(void) +int fio_monotonic_clocktest(int debug) { - struct clock_thread *threads; + struct clock_thread *cthreads; unsigned int nr_cpus = cpus_online(); struct clock_entry *entries; - unsigned long tentries, failed; - uint64_t seq = 0; - int i; + unsigned long nr_entries, tentries, failed = 0; + struct clock_entry *prev, *this; + uint32_t seq = 0; + unsigned int i; + + if (debug) { + log_info("cs: reliable_tsc: %s\n", tsc_reliable ? "yes" : "no"); + +#ifdef FIO_INC_DEBUG + fio_debug |= 1U << FD_TIME; +#endif + nr_entries = CLOCK_ENTRIES_DEBUG; + } else + nr_entries = CLOCK_ENTRIES_TEST; - fio_debug |= 1U << FD_TIME; calibrate_cpu_clock(); - fio_debug &= ~(1U << FD_TIME); - threads = malloc(nr_cpus * sizeof(struct clock_thread)); - tentries = CLOCK_ENTRIES * nr_cpus; + if (debug) { +#ifdef FIO_INC_DEBUG + fio_debug &= ~(1U << FD_TIME); +#endif + } + + cthreads = malloc(nr_cpus * sizeof(struct clock_thread)); + tentries = nr_entries * nr_cpus; entries = malloc(tentries * sizeof(struct clock_entry)); - log_info("cs: Testing %u CPUs\n", nr_cpus); + if (debug) + log_info("cs: Testing %u CPUs\n", nr_cpus); for (i = 0; i < nr_cpus; i++) { - struct clock_thread *t = &threads[i]; + struct clock_thread *t = &cthreads[i]; t->cpu = i; + t->debug = debug; t->seq = &seq; - t->entries = &entries[i * CLOCK_ENTRIES]; + t->nr_entries = nr_entries; + t->entries = &entries[i * nr_entries]; pthread_mutex_init(&t->lock, NULL); pthread_mutex_init(&t->started, NULL); pthread_mutex_lock(&t->lock); - pthread_create(&t->thread, NULL, clock_thread_fn, t); + if (pthread_create(&t->thread, NULL, clock_thread_fn, t)) { + failed++; + nr_cpus = i; + break; + } } for (i = 0; i < nr_cpus; i++) { - struct clock_thread *t = &threads[i]; + struct clock_thread *t = &cthreads[i]; pthread_mutex_lock(&t->started); } for (i = 0; i < nr_cpus; i++) { - struct clock_thread *t = &threads[i]; + struct clock_thread *t = &cthreads[i]; pthread_mutex_unlock(&t->lock); } - for (failed = i = 0; i < nr_cpus; i++) { - struct clock_thread *t = &threads[i]; + for (i = 0; i < nr_cpus; i++) { + struct clock_thread *t = &cthreads[i]; void *ret; pthread_join(t->thread, &ret); if (ret) failed++; } - free(threads); + free(cthreads); if (failed) { - log_err("Clocksource test: %u threads failed\n", failed); + if (debug) + log_err("Clocksource test: %lu threads failed\n", failed); goto err; } qsort(entries, tentries, sizeof(struct clock_entry), clock_cmp); + /* silence silly gcc */ + prev = NULL; for (failed = i = 0; i < tentries; i++) { - struct clock_entry *prev, *this = &entries[i]; + this = &entries[i]; if (!i) { prev = this; @@ -532,20 +650,27 @@ int fio_monotonic_clocktest(void) if (prev->tsc > this->tsc) { uint64_t diff = prev->tsc - this->tsc; - log_info("cs: CPU clock mismatch (diff=%lu):\n", diff); - log_info("\t CPU%3lu: TSC=%lu, SEQ=%lu\n", prev->cpu, prev->tsc, prev->seq); - log_info("\t CPU%3lu: TSC=%lu, SEQ=%lu\n", this->cpu, this->tsc, this->seq); + if (!debug) { + failed++; + break; + } + + log_info("cs: CPU clock mismatch (diff=%llu):\n", + (unsigned long long) diff); + log_info("\t CPU%3u: TSC=%llu, SEQ=%u\n", prev->cpu, (unsigned long long) prev->tsc, prev->seq); + log_info("\t CPU%3u: TSC=%llu, SEQ=%u\n", this->cpu, (unsigned long long) this->tsc, this->seq); failed++; } prev = this; } - if (failed) - log_info("cs: Failed: %lu\n", failed); - else - log_info("cs: Pass!\n"); - + if (debug) { + if (failed) + log_info("cs: Failed: %lu\n", failed); + else + log_info("cs: Pass!\n"); + } err: free(entries); return !!failed; @@ -553,10 +678,11 @@ err: #else /* defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) */ -int fio_monotonic_clocktest(void) +int fio_monotonic_clocktest(int debug) { - log_info("cs: current platform does not support CPU clocks\n"); - return 0; + if (debug) + log_info("cs: current platform does not support CPU clocks\n"); + return 1; } #endif