From: Jens Axboe Date: Fri, 21 Dec 2012 21:54:56 +0000 (+0100) Subject: clock: turn expensive division into multiply + cheap division X-Git-Tag: fio-2.0.13~14 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=71339117638469372f703332d8b969382b12c49b;ds=sidebyside clock: turn expensive division into multiply + cheap division On x86-64, dividing by a variable turns into a hugely expensive divq. It's much cheaper to invert the division. Instead of dividing clocks by clocks-per-usec, multiply by a 16M/clocks-per-usec constant instead. Signed-off-by: Jens Axboe --- diff --git a/gettime.c b/gettime.c index 035d275d..df329f66 100644 --- a/gettime.c +++ b/gettime.c @@ -15,6 +15,7 @@ #ifdef ARCH_HAVE_CPU_CLOCK static unsigned long cycles_per_usec; +static unsigned long inv_cycles_per_usec; int tsc_reliable = 0; #endif @@ -177,7 +178,7 @@ void fio_gettime(struct timeval *tp, void fio_unused *caller) } else if (tv) tv->last_cycles = t; - usecs = t / cycles_per_usec; + usecs = (t * inv_cycles_per_usec) / 16777216UL; tp->tv_sec = usecs / 1000000; tp->tv_usec = usecs % 1000000; break; @@ -277,6 +278,8 @@ static void calibrate_cpu_clock(void) dprint(FD_TIME, "mean=%f, S=%f\n", mean, S); cycles_per_usec = avg; + inv_cycles_per_usec = 16777216UL / cycles_per_usec; + dprint(FD_TIME, "inv_cycles_per_usec=%lu\n", inv_cycles_per_usec); } #else static void calibrate_cpu_clock(void)