Commit | Line | Data |
---|---|---|
dcbc7841 VF |
1 | /* |
2 | * Carry out arithmetic to explore conversion of CPU clock ticks to nsec | |
3 | * | |
4 | * When we use the CPU clock for timing, we do the following: | |
5 | * | |
6 | * 1) Calibrate the CPU clock to relate the frequency of CPU clock ticks | |
7 | * to actual time. | |
8 | * | |
9 | * Using gettimeofday() or clock_gettime(), count how many CPU clock | |
10 | * ticks occur per usec | |
11 | * | |
12 | * 2) Calculate conversion factors so that we can ultimately convert | |
13 | * from clocks ticks to nsec with | |
14 | * nsec = (ticks * clock_mult) >> clock_shift | |
15 | * | |
16 | * This is equivalent to | |
17 | * nsec = ticks * (MULTIPLIER / cycles_per_nsec) / MULTIPLIER | |
18 | * where | |
19 | * clock_mult = MULTIPLIER / cycles_per_nsec | |
20 | * MULTIPLIER = 2^clock_shift | |
21 | * | |
22 | * It would be simpler to just calculate nsec = ticks / cycles_per_nsec, | |
23 | * but all of this is necessary because of rounding when calculating | |
24 | * cycles_per_nsec. With a 3.0GHz CPU, cycles_per_nsec would simply | |
25 | * be 3. But with a 3.33GHz CPU or a 4.5GHz CPU, the fractional | |
26 | * portion is lost with integer arithmetic. | |
27 | * | |
28 | * This multiply and shift calculation also has a performance benefit | |
29 | * as multiplication and bit shift operations are faster than integer | |
30 | * division. | |
31 | * | |
32 | * 3) Dynamically determine clock_shift and clock_mult at run time based | |
33 | * on MAX_CLOCK_SEC and cycles_per_usec. MAX_CLOCK_SEC is the maximum | |
34 | * duration for which the conversion will be valid. | |
35 | * | |
36 | * The primary constraint is that (ticks * clock_mult) must not overflow | |
37 | * when ticks is at its maximum value. | |
38 | * | |
39 | * So we have | |
48e7b920 | 40 | * max_ticks = MAX_CLOCK_SEC * 1000000000 * cycles_per_nsec |
dcbc7841 VF |
41 | * max_ticks * clock_mult <= ULLONG_MAX |
42 | * max_ticks * MULTIPLIER / cycles_per_nsec <= ULLONG_MAX | |
48e7b920 | 43 | * MULTIPLIER <= ULLONG_MAX * cycles_per_nsec / max_ticks |
dcbc7841 VF |
44 | * |
45 | * Then choose the largest clock_shift that satisfies | |
48e7b920 | 46 | * 2^clock_shift <= ULLONG_MAX * cycles_per_nsec / max_ticks |
dcbc7841 VF |
47 | * |
48 | * Finally calculate the appropriate clock_mult associated with clock_shift | |
49 | * clock_mult = 2^clock_shift / cycles_per_nsec | |
50 | * | |
51 | * 4) In the code below we have cycles_per_usec and use | |
52 | * cycles_per_nsec = cycles_per_usec / 1000 | |
53 | * | |
48e7b920 VF |
54 | * |
55 | * The code below implements 4 clock tick to nsec conversion strategies | |
56 | * | |
57 | * i) 64-bit arithmetic for the (ticks * clock_mult) product with the | |
58 | * conversion valid for at most MAX_CLOCK_SEC | |
59 | * | |
60 | * ii) NOT IMPLEMENTED Use 64-bit integers to emulate 128-bit multiplication | |
61 | * for the (ticks * clock_mult) product | |
62 | * | |
63 | * iii) 64-bit arithmetic with clock ticks to nsec conversion occurring in | |
64 | * two stages. The first stage counts the number of discrete, large chunks | |
65 | * of time that have elapsed. To this is added the time represented by | |
66 | * the remaining clock ticks. The advantage of this strategy is better | |
67 | * accuracy because the (ticks * clock_mult) product used for final | |
68 | * fractional chunk | |
69 | * | |
fc002f14 | 70 | * iv) 64-bit arithmetic with the clock ticks to nsec conversion occurring in |
48e7b920 VF |
71 | * two stages. This is carried out using locks to update the number of |
72 | * large time chunks (MAX_CLOCK_SEC_2STAGE) that have elapsed. | |
73 | * | |
74 | * v) 128-bit arithmetic used for the clock ticks to nsec conversion. | |
75 | * | |
dcbc7841 VF |
76 | */ |
77 | ||
78 | #include <stdio.h> | |
79 | #include <stdlib.h> | |
80 | #include <limits.h> | |
81 | #include <assert.h> | |
82 | #include <stdlib.h> | |
48e7b920 | 83 | #include "lib/seqlock.h" |
dcbc7841 VF |
84 | |
85 | #define DEBUG 0 | |
86 | #define MAX_CLOCK_SEC 365*24*60*60ULL | |
48e7b920 | 87 | #define MAX_CLOCK_SEC_2STAGE 60*60ULL |
dcbc7841 VF |
88 | #define dprintf(...) if (DEBUG) { printf(__VA_ARGS__); } |
89 | ||
90 | enum { | |
48e7b920 VF |
91 | __CLOCK64_BIT = 1 << 0, |
92 | __CLOCK128_BIT = 1 << 1, | |
dcbc7841 VF |
93 | __CLOCK_MULT_SHIFT = 1 << 2, |
94 | __CLOCK_EMULATE_128 = 1 << 3, | |
48e7b920 VF |
95 | __CLOCK_2STAGE = 1 << 4, |
96 | __CLOCK_LOCK = 1 << 5, | |
97 | ||
98 | CLOCK64_MULT_SHIFT = __CLOCK64_BIT | __CLOCK_MULT_SHIFT, | |
99 | CLOCK64_EMULATE_128 = __CLOCK64_BIT | __CLOCK_EMULATE_128, | |
100 | CLOCK64_2STAGE = __CLOCK64_BIT | __CLOCK_2STAGE, | |
101 | CLOCK64_LOCK = __CLOCK64_BIT | __CLOCK_LOCK, | |
102 | CLOCK128_MULT_SHIFT = __CLOCK128_BIT | __CLOCK_MULT_SHIFT, | |
dcbc7841 VF |
103 | }; |
104 | ||
cf6b7fb4 JA |
105 | static struct seqlock clock_seqlock; |
106 | static unsigned long long cycles_start; | |
107 | static unsigned long long elapsed_nsec; | |
48e7b920 | 108 | |
cf6b7fb4 JA |
109 | static unsigned int max_cycles_shift; |
110 | static unsigned long long max_cycles_mask; | |
111 | static unsigned long long nsecs_for_max_cycles; | |
48e7b920 | 112 | |
cf6b7fb4 JA |
113 | static unsigned int clock_shift; |
114 | static unsigned long long clock_mult; | |
48e7b920 | 115 | |
cf6b7fb4 JA |
116 | static unsigned long long *nsecs; |
117 | static unsigned long long clock_mult64_128[2]; | |
118 | static __uint128_t clock_mult128; | |
dcbc7841 | 119 | |
dcbc7841 VF |
120 | /* |
121 | * Functions for carrying out 128-bit | |
122 | * arithmetic using 64-bit integers | |
123 | * | |
124 | * 128-bit integers are stored as | |
125 | * arrays of two 64-bit integers | |
126 | * | |
127 | * Ordering is little endian | |
128 | * | |
129 | * a[0] has the less significant bits | |
130 | * a[1] has the more significant bits | |
48e7b920 VF |
131 | * |
132 | * NOT FULLY IMPLEMENTED | |
dcbc7841 | 133 | */ |
cf6b7fb4 JA |
134 | static void do_mult(unsigned long long a[2], unsigned long long b, |
135 | unsigned long long product[2]) | |
dcbc7841 VF |
136 | { |
137 | product[0] = product[1] = 0; | |
138 | return; | |
139 | } | |
140 | ||
cf6b7fb4 JA |
141 | static void do_div(unsigned long long a[2], unsigned long long b, |
142 | unsigned long long c[2]) | |
dcbc7841 VF |
143 | { |
144 | return; | |
145 | } | |
146 | ||
cf6b7fb4 | 147 | static void do_shift64(unsigned long long a[2], unsigned int count) |
dcbc7841 VF |
148 | { |
149 | a[0] = a[1] >> (count-64); | |
150 | a[1] = 0; | |
151 | } | |
152 | ||
cf6b7fb4 | 153 | static void do_shift(unsigned long long a[2], unsigned int count) |
dcbc7841 VF |
154 | { |
155 | if (count > 64) | |
156 | do_shift64(a, count); | |
cf6b7fb4 | 157 | else { |
dcbc7841 VF |
158 | while (count--) { |
159 | a[0] >>= 1; | |
160 | a[0] |= a[1] << 63; | |
161 | a[1] >>= 1; | |
162 | } | |
cf6b7fb4 | 163 | } |
dcbc7841 VF |
164 | } |
165 | ||
cf6b7fb4 | 166 | static void update_clock(unsigned long long t) |
48e7b920 VF |
167 | { |
168 | write_seqlock_begin(&clock_seqlock); | |
169 | elapsed_nsec = (t >> max_cycles_shift) * nsecs_for_max_cycles; | |
170 | cycles_start = t & ~max_cycles_mask; | |
171 | write_seqlock_end(&clock_seqlock); | |
172 | } | |
173 | ||
cf6b7fb4 | 174 | static unsigned long long _get_nsec(int mode, unsigned long long t) |
dcbc7841 VF |
175 | { |
176 | switch(mode) { | |
cf6b7fb4 JA |
177 | case CLOCK64_MULT_SHIFT: |
178 | return (t * clock_mult) >> clock_shift; | |
179 | case CLOCK64_EMULATE_128: { | |
180 | unsigned long long product[2] = { }; | |
181 | ||
182 | do_mult(clock_mult64_128, t, product); | |
183 | do_shift(product, clock_shift); | |
184 | return product[0]; | |
dcbc7841 | 185 | } |
cf6b7fb4 JA |
186 | case CLOCK64_2STAGE: { |
187 | unsigned long long multiples, nsec; | |
188 | ||
189 | multiples = t >> max_cycles_shift; | |
190 | dprintf("multiples=%llu\n", multiples); | |
191 | nsec = multiples * nsecs_for_max_cycles; | |
192 | nsec += ((t & max_cycles_mask) * clock_mult) >> clock_shift; | |
193 | return nsec; | |
48e7b920 | 194 | } |
cf6b7fb4 JA |
195 | case CLOCK64_LOCK: { |
196 | unsigned int seq; | |
197 | unsigned long long nsec; | |
198 | ||
199 | do { | |
200 | seq = read_seqlock_begin(&clock_seqlock); | |
201 | nsec = elapsed_nsec; | |
202 | nsec += ((t - cycles_start) * clock_mult) >> clock_shift; | |
203 | } while (read_seqlock_retry(&clock_seqlock, seq)); | |
204 | return nsec; | |
dcbc7841 | 205 | } |
cf6b7fb4 JA |
206 | case CLOCK128_MULT_SHIFT: |
207 | return (unsigned long long)((t * clock_mult128) >> clock_shift); | |
208 | default: | |
dcbc7841 | 209 | assert(0); |
dcbc7841 VF |
210 | } |
211 | } | |
212 | ||
cf6b7fb4 | 213 | static unsigned long long get_nsec(int mode, unsigned long long t) |
48e7b920 VF |
214 | { |
215 | if (mode == CLOCK64_LOCK) { | |
216 | update_clock(t); | |
217 | } | |
218 | ||
219 | return _get_nsec(mode, t); | |
220 | } | |
221 | ||
cf6b7fb4 JA |
222 | static void calc_mult_shift(int mode, void *mult, unsigned int *shift, |
223 | unsigned long long max_sec, | |
224 | unsigned long long cycles_per_usec) | |
dcbc7841 VF |
225 | { |
226 | unsigned long long max_ticks; | |
227 | max_ticks = max_sec * cycles_per_usec * 1000000ULL; | |
228 | ||
229 | switch (mode) { | |
cf6b7fb4 JA |
230 | case CLOCK64_MULT_SHIFT: { |
231 | unsigned long long max_mult, tmp; | |
232 | unsigned int sft = 0; | |
233 | ||
234 | /* | |
235 | * Calculate the largest multiplier that will not | |
236 | * produce a 64-bit overflow in the multiplication | |
237 | * step of the clock ticks to nsec conversion | |
238 | */ | |
239 | max_mult = ULLONG_MAX / max_ticks; | |
240 | dprintf("max_ticks=%llu, __builtin_clzll=%d, max_mult=%llu\n", max_ticks, __builtin_clzll(max_ticks), max_mult); | |
241 | ||
242 | /* | |
243 | * Find the largest shift count that will produce | |
244 | * a multiplier less than max_mult | |
245 | */ | |
246 | tmp = max_mult * cycles_per_usec / 1000; | |
247 | while (tmp > 1) { | |
248 | tmp >>= 1; | |
249 | sft++; | |
250 | dprintf("tmp=%llu, sft=%u\n", tmp, sft); | |
251 | } | |
dcbc7841 | 252 | |
cf6b7fb4 JA |
253 | *shift = sft; |
254 | *((unsigned long long *)mult) = (unsigned long long) ((1ULL << sft) * 1000 / cycles_per_usec); | |
255 | break; | |
256 | } | |
257 | case CLOCK64_EMULATE_128: { | |
258 | unsigned long long max_mult[2], tmp[2] = { }; | |
259 | unsigned int sft = 0; | |
260 | ||
261 | /* | |
262 | * Calculate the largest multiplier that will not | |
263 | * produce a 128-bit overflow in the multiplication | |
264 | * step of the clock ticks to nsec conversion, | |
265 | * but use only 64-bit integers in the process | |
266 | */ | |
267 | max_mult[0] = max_mult[1] = ULLONG_MAX; | |
268 | do_div(max_mult, max_ticks, max_mult); | |
269 | dprintf("max_ticks=%llu, __builtin_clzll=%d, max_mult=0x%016llx%016llx\n", | |
270 | max_ticks, __builtin_clzll(max_ticks), max_mult[1], max_mult[0]); | |
271 | ||
272 | /* | |
273 | * Find the largest shift count that will produce | |
274 | * a multiplier less than max_mult | |
275 | */ | |
276 | do_div(max_mult, cycles_per_usec, tmp); | |
277 | do_div(tmp, 1000ULL, tmp); | |
278 | while (tmp[0] > 1 || tmp[1] > 1) { | |
279 | do_shift(tmp, 1); | |
280 | sft++; | |
281 | dprintf("tmp=0x%016llx%016llx, sft=%u\n", tmp[1], tmp[0], sft); | |
dcbc7841 | 282 | } |
dcbc7841 | 283 | |
cf6b7fb4 JA |
284 | *shift = sft; |
285 | // *((unsigned long long *)mult) = (__uint128_t) (((__uint128_t)1 << sft) * 1000 / cycles_per_usec); | |
286 | break; | |
dcbc7841 | 287 | } |
cf6b7fb4 JA |
288 | case CLOCK64_2STAGE: { |
289 | unsigned long long tmp; | |
dcbc7841 VF |
290 | /* |
291 | * This clock tick to nsec conversion requires two stages. | |
292 | * | |
48e7b920 | 293 | * Stage 1: Determine how many ~MAX_CLOCK_SEC_2STAGE periods worth of clock ticks |
dcbc7841 | 294 | * have elapsed and set nsecs to the appropriate value for those |
48e7b920 VF |
295 | * ~MAX_CLOCK_SEC_2STAGE periods. |
296 | * Stage 2: Subtract the ticks for the elapsed ~MAX_CLOCK_SEC_2STAGE periods from | |
dcbc7841 VF |
297 | * Stage 1. Convert remaining clock ticks to nsecs and add to previously |
298 | * set nsec value. | |
299 | * | |
300 | * To optimize the arithmetic operations, use the greatest power of 2 ticks | |
48e7b920 | 301 | * less than the number of ticks in MAX_CLOCK_SEC_2STAGE seconds. |
dcbc7841 VF |
302 | * |
303 | */ | |
cf6b7fb4 JA |
304 | // Use a period shorter than MAX_CLOCK_SEC here for better accuracy |
305 | calc_mult_shift(CLOCK64_MULT_SHIFT, mult, shift, MAX_CLOCK_SEC_2STAGE, cycles_per_usec); | |
306 | ||
307 | // Find the greatest power of 2 clock ticks that is less than the ticks in MAX_CLOCK_SEC_2STAGE | |
308 | max_cycles_shift = max_cycles_mask = 0; | |
309 | tmp = MAX_CLOCK_SEC_2STAGE * 1000000ULL * cycles_per_usec; | |
310 | dprintf("tmp=%llu, max_cycles_shift=%u\n", tmp, max_cycles_shift); | |
311 | while (tmp > 1) { | |
312 | tmp >>= 1; | |
313 | max_cycles_shift++; | |
dcbc7841 | 314 | dprintf("tmp=%llu, max_cycles_shift=%u\n", tmp, max_cycles_shift); |
cf6b7fb4 JA |
315 | } |
316 | // if use use (1ULL << max_cycles_shift) * 1000 / cycles_per_usec here we will | |
317 | // have a discontinuity every (1ULL << max_cycles_shift) cycles | |
318 | nsecs_for_max_cycles = (1ULL << max_cycles_shift) * *((unsigned long long *)mult) >> *shift; | |
dcbc7841 | 319 | |
cf6b7fb4 JA |
320 | // Use a bitmask to calculate ticks % (1ULL << max_cycles_shift) |
321 | for (tmp = 0; tmp < max_cycles_shift; tmp++) | |
322 | max_cycles_mask |= 1ULL << tmp; | |
dcbc7841 | 323 | |
cf6b7fb4 | 324 | dprintf("max_cycles_shift=%u, 2^max_cycles_shift=%llu, nsecs_for_max_cycles=%llu, max_cycles_mask=%016llx\n", |
dcbc7841 VF |
325 | max_cycles_shift, (1ULL << max_cycles_shift), |
326 | nsecs_for_max_cycles, max_cycles_mask); | |
327 | ||
328 | ||
cf6b7fb4 | 329 | break; |
dcbc7841 | 330 | } |
cf6b7fb4 | 331 | case CLOCK64_LOCK: { |
48e7b920 VF |
332 | /* |
333 | * This clock tick to nsec conversion also requires two stages. | |
334 | * | |
335 | * Stage 1: Add to nsec the current running total of elapsed long periods | |
336 | * Stage 2: Subtract from clock ticks the tick count corresponding to the | |
337 | * most recently elapsed long period. Convert the remaining ticks to | |
338 | * nsec and add to the previous nsec value. | |
339 | * | |
340 | * In practice the elapsed nsec from Stage 1 and the tick count subtracted | |
341 | * in Stage 2 will be maintained in a separate thread. | |
342 | * | |
343 | */ | |
cf6b7fb4 JA |
344 | calc_mult_shift(CLOCK64_2STAGE, mult, shift, MAX_CLOCK_SEC, cycles_per_usec); |
345 | cycles_start = 0; | |
346 | break; | |
48e7b920 | 347 | } |
cf6b7fb4 JA |
348 | case CLOCK128_MULT_SHIFT: { |
349 | __uint128_t max_mult, tmp; | |
350 | unsigned int sft = 0; | |
351 | ||
352 | /* | |
353 | * Calculate the largest multiplier that will not | |
354 | * produce a 128-bit overflow in the multiplication | |
355 | * step of the clock ticks to nsec conversion | |
356 | */ | |
357 | max_mult = ((__uint128_t) ULLONG_MAX) << 64 | ULLONG_MAX; | |
358 | max_mult /= max_ticks; | |
359 | dprintf("max_ticks=%llu, __builtin_clzll=%d, max_mult=0x%016llx%016llx\n", | |
dcbc7841 VF |
360 | max_ticks, __builtin_clzll(max_ticks), |
361 | (unsigned long long) (max_mult >> 64), | |
362 | (unsigned long long) max_mult); | |
363 | ||
cf6b7fb4 JA |
364 | /* |
365 | * Find the largest shift count that will produce | |
366 | * a multiplier less than max_mult | |
367 | */ | |
368 | tmp = max_mult * cycles_per_usec / 1000; | |
369 | while (tmp > 1) { | |
370 | tmp >>= 1; | |
371 | sft++; | |
372 | dprintf("tmp=0x%016llx%016llx, sft=%u\n", | |
dcbc7841 VF |
373 | (unsigned long long) (tmp >> 64), |
374 | (unsigned long long) tmp, sft); | |
cf6b7fb4 | 375 | } |
dcbc7841 | 376 | |
cf6b7fb4 JA |
377 | *shift = sft; |
378 | *((__uint128_t *)mult) = (__uint128_t) (((__uint128_t)1 << sft) * 1000 / cycles_per_usec); | |
379 | break; | |
48e7b920 VF |
380 | } |
381 | } | |
dcbc7841 VF |
382 | } |
383 | ||
cf6b7fb4 JA |
384 | static int discontinuity(int mode, int delta_ticks, int delta_nsec, |
385 | unsigned long long start, unsigned long len) | |
dcbc7841 VF |
386 | { |
387 | int i; | |
388 | unsigned long mismatches = 0, bad_mismatches = 0; | |
389 | unsigned long long delta, max_mismatch = 0; | |
390 | unsigned long long *ns = nsecs; | |
391 | ||
392 | for (i = 0; i < len; ns++, i++) { | |
393 | *ns = get_nsec(mode, start + i); | |
394 | if (i - delta_ticks >= 0) { | |
395 | if (*ns > *(ns - delta_ticks)) | |
396 | delta = *ns - *(ns - delta_ticks); | |
397 | else | |
398 | delta = *(ns - delta_ticks) - *ns; | |
399 | if (delta > delta_nsec) | |
400 | delta -= delta_nsec; | |
401 | else | |
402 | delta = delta_nsec - delta; | |
403 | if (delta) { | |
404 | mismatches++; | |
405 | if (delta > 1) | |
406 | bad_mismatches++; | |
407 | if (delta > max_mismatch) | |
408 | max_mismatch = delta; | |
409 | } | |
410 | } | |
411 | if (!bad_mismatches) | |
412 | assert(max_mismatch == 0 || max_mismatch == 1); | |
413 | if (!mismatches) | |
414 | assert(max_mismatch == 0); | |
415 | } | |
416 | ||
417 | printf("%lu discontinuities (%lu%%) (%lu errors > 1ns, max delta = %lluns) for ticks = %llu...%llu\n", | |
418 | mismatches, (mismatches * 100) / len, bad_mismatches, max_mismatch, start, | |
419 | start + len - 1); | |
420 | return mismatches; | |
421 | } | |
422 | ||
423 | #define MIN_TICKS 1ULL | |
424 | #define LEN 1000000000ULL | |
425 | #define NSEC_ONE_SEC 1000000000ULL | |
426 | #define TESTLEN 9 | |
cf6b7fb4 JA |
427 | |
428 | static long long test_clock(int mode, int cycles_per_usec, int fast_test, | |
429 | int quiet, int delta_ticks, int delta_nsec) | |
dcbc7841 VF |
430 | { |
431 | int i; | |
432 | long long delta; | |
433 | unsigned long long max_ticks; | |
434 | unsigned long long nsecs; | |
435 | void *mult; | |
436 | unsigned long long test_ns[TESTLEN] = | |
437 | {NSEC_ONE_SEC, NSEC_ONE_SEC, | |
438 | NSEC_ONE_SEC, NSEC_ONE_SEC*60, NSEC_ONE_SEC*60*60, | |
439 | NSEC_ONE_SEC*60*60*2, NSEC_ONE_SEC*60*60*4, | |
440 | NSEC_ONE_SEC*60*60*8, NSEC_ONE_SEC*60*60*24}; | |
441 | unsigned long long test_ticks[TESTLEN]; | |
442 | ||
443 | max_ticks = MAX_CLOCK_SEC * (unsigned long long) cycles_per_usec * 1000000ULL; | |
444 | ||
445 | switch(mode) { | |
cf6b7fb4 JA |
446 | case CLOCK64_MULT_SHIFT: |
447 | mult = &clock_mult; | |
448 | break; | |
449 | case CLOCK64_EMULATE_128: | |
450 | mult = clock_mult64_128; | |
451 | break; | |
452 | case CLOCK64_2STAGE: | |
453 | mult = &clock_mult; | |
454 | break; | |
455 | case CLOCK64_LOCK: | |
456 | mult = &clock_mult; | |
457 | break; | |
458 | case CLOCK128_MULT_SHIFT: | |
459 | mult = &clock_mult128; | |
460 | break; | |
461 | default: | |
462 | assert(0); | |
dcbc7841 VF |
463 | } |
464 | calc_mult_shift(mode, mult, &clock_shift, MAX_CLOCK_SEC, cycles_per_usec); | |
465 | nsecs = get_nsec(mode, max_ticks); | |
466 | delta = nsecs/1000000 - MAX_CLOCK_SEC*1000; | |
467 | ||
48e7b920 | 468 | if (mode == CLOCK64_2STAGE) { |
dcbc7841 VF |
469 | test_ns[0] = nsecs_for_max_cycles - 1; |
470 | test_ns[1] = nsecs_for_max_cycles; | |
471 | test_ticks[0] = (1ULL << max_cycles_shift) - 1; | |
472 | test_ticks[1] = (1ULL << max_cycles_shift); | |
473 | ||
474 | for (i = 2; i < TESTLEN; i++) | |
475 | test_ticks[i] = test_ns[i] / 1000 * cycles_per_usec; | |
476 | } | |
477 | else { | |
478 | for (i = 0; i < TESTLEN; i++) | |
479 | test_ticks[i] = test_ns[i] / 1000 * cycles_per_usec; | |
480 | } | |
481 | ||
482 | if (!quiet) { | |
483 | printf("cycles_per_usec=%d, delta_ticks=%d, delta_nsec=%d, max_ticks=%llu, shift=%u, 2^shift=%llu\n", | |
484 | cycles_per_usec, delta_ticks, delta_nsec, max_ticks, clock_shift, (1ULL << clock_shift)); | |
485 | switch(mode) { | |
48e7b920 VF |
486 | case CLOCK64_LOCK: |
487 | case CLOCK64_2STAGE: | |
488 | case CLOCK64_MULT_SHIFT: { | |
dcbc7841 VF |
489 | printf("clock_mult=%llu, clock_mult / 2^clock_shift=%f\n", |
490 | clock_mult, (double) clock_mult / (1ULL << clock_shift)); | |
491 | break; | |
492 | } | |
48e7b920 | 493 | case CLOCK64_EMULATE_128: { |
dcbc7841 VF |
494 | printf("clock_mult=0x%016llx%016llx\n", |
495 | clock_mult64_128[1], clock_mult64_128[0]); | |
496 | break; | |
497 | } | |
48e7b920 | 498 | case CLOCK128_MULT_SHIFT: { |
dcbc7841 VF |
499 | printf("clock_mult=0x%016llx%016llx\n", |
500 | (unsigned long long) (clock_mult128 >> 64), | |
501 | (unsigned long long) clock_mult128); | |
502 | break; | |
503 | } | |
504 | } | |
505 | printf("get_nsec(max_ticks) = %lluns, should be %lluns, error<=abs(%lld)ms\n", | |
506 | nsecs, MAX_CLOCK_SEC*1000000000ULL, delta); | |
507 | } | |
508 | ||
509 | for (i = 0; i < TESTLEN; i++) | |
510 | { | |
511 | nsecs = get_nsec(mode, test_ticks[i]); | |
512 | delta = nsecs > test_ns[i] ? nsecs - test_ns[i] : test_ns[i] - nsecs; | |
513 | if (!quiet || delta > 0) | |
514 | printf("get_nsec(%llu)=%llu, expected %llu, delta=%llu\n", | |
515 | test_ticks[i], nsecs, test_ns[i], delta); | |
516 | } | |
517 | ||
518 | if (!fast_test) { | |
519 | discontinuity(mode, delta_ticks, delta_nsec, max_ticks - LEN + 1, LEN); | |
520 | discontinuity(mode, delta_ticks, delta_nsec, MIN_TICKS, LEN); | |
521 | } | |
522 | ||
523 | if (!quiet) | |
524 | printf("\n\n"); | |
525 | ||
526 | return delta; | |
527 | } | |
528 | ||
529 | int main(int argc, char *argv[]) | |
530 | { | |
dcbc7841 | 531 | nsecs = malloc(LEN * sizeof(unsigned long long)); |
dcbc7841 | 532 | |
48e7b920 | 533 | test_clock(CLOCK64_LOCK, 3333, 1, 0, 0, 0); |
48e7b920 VF |
534 | test_clock(CLOCK64_LOCK, 1000, 1, 0, 1, 1); |
535 | test_clock(CLOCK64_LOCK, 1100, 1, 0, 11, 10); | |
536 | test_clock(CLOCK64_LOCK, 3000, 1, 0, 3, 1); | |
537 | test_clock(CLOCK64_LOCK, 3333, 1, 0, 3333, 1000); | |
538 | test_clock(CLOCK64_LOCK, 3392, 1, 0, 424, 125); | |
539 | test_clock(CLOCK64_LOCK, 4500, 1, 0, 9, 2); | |
540 | test_clock(CLOCK64_LOCK, 5000, 1, 0, 5, 1); | |
dcbc7841 VF |
541 | |
542 | free(nsecs); | |
543 | return 0; | |
544 | } |