On x86-64, dividing by a variable turns into a hugely expensive
divq. It's much cheaper to invert the division. Instead of
dividing clocks by clocks-per-usec, multiply by a 16M/clocks-per-usec
constant instead.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
#ifdef ARCH_HAVE_CPU_CLOCK
static unsigned long cycles_per_usec;
#ifdef ARCH_HAVE_CPU_CLOCK
static unsigned long cycles_per_usec;
+static unsigned long inv_cycles_per_usec;
int tsc_reliable = 0;
#endif
int tsc_reliable = 0;
#endif
} else if (tv)
tv->last_cycles = t;
} else if (tv)
tv->last_cycles = t;
- usecs = t / cycles_per_usec;
+ usecs = (t * inv_cycles_per_usec) / 16777216UL;
tp->tv_sec = usecs / 1000000;
tp->tv_usec = usecs % 1000000;
break;
tp->tv_sec = usecs / 1000000;
tp->tv_usec = usecs % 1000000;
break;
dprint(FD_TIME, "mean=%f, S=%f\n", mean, S);
cycles_per_usec = avg;
dprint(FD_TIME, "mean=%f, S=%f\n", mean, S);
cycles_per_usec = avg;
+ inv_cycles_per_usec = 16777216UL / cycles_per_usec;
+ dprint(FD_TIME, "inv_cycles_per_usec=%lu\n", inv_cycles_per_usec);
}
#else
static void calibrate_cpu_clock(void)
}
#else
static void calibrate_cpu_clock(void)