Commit | Line | Data |
---|---|---|
dcbc7841 VF |
1 | /* |
2 | * Carry out arithmetic to explore conversion of CPU clock ticks to nsec | |
3 | * | |
4 | * When we use the CPU clock for timing, we do the following: | |
5 | * | |
6 | * 1) Calibrate the CPU clock to relate the frequency of CPU clock ticks | |
7 | * to actual time. | |
8 | * | |
9 | * Using gettimeofday() or clock_gettime(), count how many CPU clock | |
10 | * ticks occur per usec | |
11 | * | |
12 | * 2) Calculate conversion factors so that we can ultimately convert | |
13 | * from clocks ticks to nsec with | |
14 | * nsec = (ticks * clock_mult) >> clock_shift | |
15 | * | |
16 | * This is equivalent to | |
17 | * nsec = ticks * (MULTIPLIER / cycles_per_nsec) / MULTIPLIER | |
18 | * where | |
19 | * clock_mult = MULTIPLIER / cycles_per_nsec | |
20 | * MULTIPLIER = 2^clock_shift | |
21 | * | |
22 | * It would be simpler to just calculate nsec = ticks / cycles_per_nsec, | |
23 | * but all of this is necessary because of rounding when calculating | |
24 | * cycles_per_nsec. With a 3.0GHz CPU, cycles_per_nsec would simply | |
25 | * be 3. But with a 3.33GHz CPU or a 4.5GHz CPU, the fractional | |
26 | * portion is lost with integer arithmetic. | |
27 | * | |
28 | * This multiply and shift calculation also has a performance benefit | |
29 | * as multiplication and bit shift operations are faster than integer | |
30 | * division. | |
31 | * | |
32 | * 3) Dynamically determine clock_shift and clock_mult at run time based | |
33 | * on MAX_CLOCK_SEC and cycles_per_usec. MAX_CLOCK_SEC is the maximum | |
34 | * duration for which the conversion will be valid. | |
35 | * | |
36 | * The primary constraint is that (ticks * clock_mult) must not overflow | |
37 | * when ticks is at its maximum value. | |
38 | * | |
39 | * So we have | |
48e7b920 | 40 | * max_ticks = MAX_CLOCK_SEC * 1000000000 * cycles_per_nsec |
dcbc7841 VF |
41 | * max_ticks * clock_mult <= ULLONG_MAX |
42 | * max_ticks * MULTIPLIER / cycles_per_nsec <= ULLONG_MAX | |
48e7b920 | 43 | * MULTIPLIER <= ULLONG_MAX * cycles_per_nsec / max_ticks |
dcbc7841 VF |
44 | * |
45 | * Then choose the largest clock_shift that satisfies | |
48e7b920 | 46 | * 2^clock_shift <= ULLONG_MAX * cycles_per_nsec / max_ticks |
dcbc7841 VF |
47 | * |
48 | * Finally calculate the appropriate clock_mult associated with clock_shift | |
49 | * clock_mult = 2^clock_shift / cycles_per_nsec | |
50 | * | |
51 | * 4) In the code below we have cycles_per_usec and use | |
52 | * cycles_per_nsec = cycles_per_usec / 1000 | |
53 | * | |
48e7b920 VF |
54 | * |
55 | * The code below implements 4 clock tick to nsec conversion strategies | |
56 | * | |
57 | * i) 64-bit arithmetic for the (ticks * clock_mult) product with the | |
58 | * conversion valid for at most MAX_CLOCK_SEC | |
59 | * | |
60 | * ii) NOT IMPLEMENTED Use 64-bit integers to emulate 128-bit multiplication | |
61 | * for the (ticks * clock_mult) product | |
62 | * | |
63 | * iii) 64-bit arithmetic with clock ticks to nsec conversion occurring in | |
64 | * two stages. The first stage counts the number of discrete, large chunks | |
65 | * of time that have elapsed. To this is added the time represented by | |
66 | * the remaining clock ticks. The advantage of this strategy is better | |
67 | * accuracy because the (ticks * clock_mult) product used for final | |
68 | * fractional chunk | |
69 | * | |
70 | * iv) 64-bit arithmetic with the clock ticks to nsec conversion occuring in | |
71 | * two stages. This is carried out using locks to update the number of | |
72 | * large time chunks (MAX_CLOCK_SEC_2STAGE) that have elapsed. | |
73 | * | |
74 | * v) 128-bit arithmetic used for the clock ticks to nsec conversion. | |
75 | * | |
dcbc7841 VF |
76 | */ |
77 | ||
78 | #include <stdio.h> | |
79 | #include <stdlib.h> | |
80 | #include <limits.h> | |
81 | #include <assert.h> | |
82 | #include <stdlib.h> | |
48e7b920 | 83 | #include "lib/seqlock.h" |
dcbc7841 VF |
84 | |
85 | #define DEBUG 0 | |
86 | #define MAX_CLOCK_SEC 365*24*60*60ULL | |
48e7b920 | 87 | #define MAX_CLOCK_SEC_2STAGE 60*60ULL |
dcbc7841 VF |
88 | #define dprintf(...) if (DEBUG) { printf(__VA_ARGS__); } |
89 | ||
90 | enum { | |
48e7b920 VF |
91 | __CLOCK64_BIT = 1 << 0, |
92 | __CLOCK128_BIT = 1 << 1, | |
dcbc7841 VF |
93 | __CLOCK_MULT_SHIFT = 1 << 2, |
94 | __CLOCK_EMULATE_128 = 1 << 3, | |
48e7b920 VF |
95 | __CLOCK_2STAGE = 1 << 4, |
96 | __CLOCK_LOCK = 1 << 5, | |
97 | ||
98 | CLOCK64_MULT_SHIFT = __CLOCK64_BIT | __CLOCK_MULT_SHIFT, | |
99 | CLOCK64_EMULATE_128 = __CLOCK64_BIT | __CLOCK_EMULATE_128, | |
100 | CLOCK64_2STAGE = __CLOCK64_BIT | __CLOCK_2STAGE, | |
101 | CLOCK64_LOCK = __CLOCK64_BIT | __CLOCK_LOCK, | |
102 | CLOCK128_MULT_SHIFT = __CLOCK128_BIT | __CLOCK_MULT_SHIFT, | |
dcbc7841 VF |
103 | }; |
104 | ||
48e7b920 VF |
105 | struct seqlock clock_seqlock; |
106 | unsigned long long cycles_start; | |
107 | unsigned long long elapsed_nsec; | |
108 | ||
dcbc7841 VF |
109 | unsigned int max_cycles_shift; |
110 | unsigned long long max_cycles_mask; | |
dcbc7841 | 111 | unsigned long long nsecs_for_max_cycles; |
48e7b920 VF |
112 | |
113 | unsigned int clock_shift; | |
114 | unsigned long long clock_mult; | |
115 | ||
116 | unsigned long long *nsecs; | |
dcbc7841 VF |
117 | unsigned long long clock_mult64_128[2]; |
118 | __uint128_t clock_mult128; | |
119 | ||
dcbc7841 VF |
120 | /* |
121 | * Functions for carrying out 128-bit | |
122 | * arithmetic using 64-bit integers | |
123 | * | |
124 | * 128-bit integers are stored as | |
125 | * arrays of two 64-bit integers | |
126 | * | |
127 | * Ordering is little endian | |
128 | * | |
129 | * a[0] has the less significant bits | |
130 | * a[1] has the more significant bits | |
48e7b920 VF |
131 | * |
132 | * NOT FULLY IMPLEMENTED | |
dcbc7841 VF |
133 | */ |
134 | void do_mult(unsigned long long a[2], unsigned long long b, unsigned long long product[2]) | |
135 | { | |
136 | product[0] = product[1] = 0; | |
137 | return; | |
138 | } | |
139 | ||
140 | void do_div(unsigned long long a[2], unsigned long long b, unsigned long long c[2]) | |
141 | { | |
142 | return; | |
143 | } | |
144 | ||
145 | void do_shift64(unsigned long long a[2], unsigned int count) | |
146 | { | |
147 | a[0] = a[1] >> (count-64); | |
148 | a[1] = 0; | |
149 | } | |
150 | ||
151 | void do_shift(unsigned long long a[2], unsigned int count) | |
152 | { | |
153 | if (count > 64) | |
154 | do_shift64(a, count); | |
155 | else | |
156 | while (count--) { | |
157 | a[0] >>= 1; | |
158 | a[0] |= a[1] << 63; | |
159 | a[1] >>= 1; | |
160 | } | |
161 | } | |
162 | ||
48e7b920 VF |
163 | void update_clock(unsigned long long t) |
164 | { | |
165 | write_seqlock_begin(&clock_seqlock); | |
166 | elapsed_nsec = (t >> max_cycles_shift) * nsecs_for_max_cycles; | |
167 | cycles_start = t & ~max_cycles_mask; | |
168 | write_seqlock_end(&clock_seqlock); | |
169 | } | |
170 | ||
171 | unsigned long long _get_nsec(int mode, unsigned long long t) | |
dcbc7841 VF |
172 | { |
173 | switch(mode) { | |
48e7b920 | 174 | case CLOCK64_MULT_SHIFT: { |
dcbc7841 VF |
175 | return (t * clock_mult) >> clock_shift; |
176 | } | |
48e7b920 | 177 | case CLOCK64_EMULATE_128: { |
dcbc7841 VF |
178 | unsigned long long product[2]; |
179 | do_mult(clock_mult64_128, t, product); | |
180 | do_shift(product, clock_shift); | |
181 | return product[0]; | |
182 | } | |
48e7b920 | 183 | case CLOCK64_2STAGE: { |
dcbc7841 VF |
184 | unsigned long long multiples, nsec; |
185 | multiples = t >> max_cycles_shift; | |
186 | dprintf("multiples=%llu\n", multiples); | |
187 | nsec = multiples * nsecs_for_max_cycles; | |
188 | nsec += ((t & max_cycles_mask) * clock_mult) >> clock_shift; | |
189 | return nsec; | |
190 | } | |
48e7b920 VF |
191 | case CLOCK64_LOCK: { |
192 | unsigned int seq; | |
193 | unsigned long long nsec; | |
194 | do { | |
195 | seq = read_seqlock_begin(&clock_seqlock); | |
196 | nsec = elapsed_nsec; | |
197 | nsec += ((t - cycles_start) * clock_mult) >> clock_shift; | |
198 | } while (read_seqlock_retry(&clock_seqlock, seq)); | |
199 | return nsec; | |
200 | } | |
201 | case CLOCK128_MULT_SHIFT: { | |
dcbc7841 VF |
202 | return (unsigned long long)((t * clock_mult128) >> clock_shift); |
203 | } | |
204 | default: { | |
205 | assert(0); | |
206 | } | |
207 | } | |
208 | } | |
209 | ||
48e7b920 VF |
210 | unsigned long long get_nsec(int mode, unsigned long long t) |
211 | { | |
212 | if (mode == CLOCK64_LOCK) { | |
213 | update_clock(t); | |
214 | } | |
215 | ||
216 | return _get_nsec(mode, t); | |
217 | } | |
218 | ||
dcbc7841 VF |
219 | void calc_mult_shift(int mode, void *mult, unsigned int *shift, unsigned long long max_sec, unsigned long long cycles_per_usec) |
220 | { | |
221 | unsigned long long max_ticks; | |
222 | max_ticks = max_sec * cycles_per_usec * 1000000ULL; | |
223 | ||
224 | switch (mode) { | |
48e7b920 | 225 | case CLOCK64_MULT_SHIFT: { |
dcbc7841 VF |
226 | unsigned long long max_mult, tmp; |
227 | unsigned int sft = 0; | |
228 | ||
229 | /* | |
230 | * Calculate the largest multiplier that will not | |
231 | * produce a 64-bit overflow in the multiplication | |
232 | * step of the clock ticks to nsec conversion | |
233 | */ | |
234 | max_mult = ULLONG_MAX / max_ticks; | |
235 | dprintf("max_ticks=%llu, __builtin_clzll=%d, max_mult=%llu\n", max_ticks, __builtin_clzll(max_ticks), max_mult); | |
236 | ||
237 | /* | |
238 | * Find the largest shift count that will produce | |
239 | * a multiplier less than max_mult | |
240 | */ | |
241 | tmp = max_mult * cycles_per_usec / 1000; | |
242 | while (tmp > 1) { | |
243 | tmp >>= 1; | |
244 | sft++; | |
245 | dprintf("tmp=%llu, sft=%u\n", tmp, sft); | |
246 | } | |
247 | ||
248 | *shift = sft; | |
249 | *((unsigned long long *)mult) = (unsigned long long) ((1ULL << sft) * 1000 / cycles_per_usec); | |
250 | break; | |
251 | } | |
48e7b920 | 252 | case CLOCK64_EMULATE_128: { |
dcbc7841 VF |
253 | unsigned long long max_mult[2], tmp[2]; |
254 | unsigned int sft = 0; | |
255 | ||
256 | /* | |
257 | * Calculate the largest multiplier that will not | |
258 | * produce a 128-bit overflow in the multiplication | |
259 | * step of the clock ticks to nsec conversion, | |
260 | * but use only 64-bit integers in the process | |
261 | */ | |
262 | max_mult[0] = max_mult[1] = ULLONG_MAX; | |
263 | do_div(max_mult, max_ticks, max_mult); | |
264 | dprintf("max_ticks=%llu, __builtin_clzll=%d, max_mult=0x%016llx%016llx\n", | |
265 | max_ticks, __builtin_clzll(max_ticks), max_mult[1], max_mult[0]); | |
266 | ||
267 | /* | |
268 | * Find the largest shift count that will produce | |
269 | * a multiplier less than max_mult | |
270 | */ | |
271 | do_div(max_mult, cycles_per_usec, tmp); | |
272 | do_div(tmp, 1000ULL, tmp); | |
273 | while (tmp[0] > 1 || tmp[1] > 1) { | |
274 | do_shift(tmp, 1); | |
275 | sft++; | |
276 | dprintf("tmp=0x%016llx%016llx, sft=%u\n", tmp[1], tmp[0], sft); | |
277 | } | |
278 | ||
279 | *shift = sft; | |
280 | // *((unsigned long long *)mult) = (__uint128_t) (((__uint128_t)1 << sft) * 1000 / cycles_per_usec); | |
281 | break; | |
282 | } | |
48e7b920 | 283 | case CLOCK64_2STAGE: { |
dcbc7841 VF |
284 | unsigned long long tmp; |
285 | /* | |
286 | * This clock tick to nsec conversion requires two stages. | |
287 | * | |
48e7b920 | 288 | * Stage 1: Determine how many ~MAX_CLOCK_SEC_2STAGE periods worth of clock ticks |
dcbc7841 | 289 | * have elapsed and set nsecs to the appropriate value for those |
48e7b920 VF |
290 | * ~MAX_CLOCK_SEC_2STAGE periods. |
291 | * Stage 2: Subtract the ticks for the elapsed ~MAX_CLOCK_SEC_2STAGE periods from | |
dcbc7841 VF |
292 | * Stage 1. Convert remaining clock ticks to nsecs and add to previously |
293 | * set nsec value. | |
294 | * | |
295 | * To optimize the arithmetic operations, use the greatest power of 2 ticks | |
48e7b920 | 296 | * less than the number of ticks in MAX_CLOCK_SEC_2STAGE seconds. |
dcbc7841 VF |
297 | * |
298 | */ | |
299 | // Use a period shorter than MAX_CLOCK_SEC here for better accuracy | |
48e7b920 | 300 | calc_mult_shift(CLOCK64_MULT_SHIFT, mult, shift, MAX_CLOCK_SEC_2STAGE, cycles_per_usec); |
dcbc7841 | 301 | |
48e7b920 | 302 | // Find the greatest power of 2 clock ticks that is less than the ticks in MAX_CLOCK_SEC_2STAGE |
dcbc7841 | 303 | max_cycles_shift = max_cycles_mask = 0; |
48e7b920 | 304 | tmp = MAX_CLOCK_SEC_2STAGE * 1000000ULL * cycles_per_usec; |
dcbc7841 VF |
305 | dprintf("tmp=%llu, max_cycles_shift=%u\n", tmp, max_cycles_shift); |
306 | while (tmp > 1) { | |
307 | tmp >>= 1; | |
308 | max_cycles_shift++; | |
309 | dprintf("tmp=%llu, max_cycles_shift=%u\n", tmp, max_cycles_shift); | |
310 | } | |
311 | // if use use (1ULL << max_cycles_shift) * 1000 / cycles_per_usec here we will | |
312 | // have a discontinuity every (1ULL << max_cycles_shift) cycles | |
313 | nsecs_for_max_cycles = (1ULL << max_cycles_shift) * *((unsigned long long *)mult) >> *shift; | |
314 | ||
315 | // Use a bitmask to calculate ticks % (1ULL << max_cycles_shift) | |
316 | for (tmp = 0; tmp < max_cycles_shift; tmp++) | |
317 | max_cycles_mask |= 1ULL << tmp; | |
318 | ||
319 | dprintf("max_cycles_shift=%u, 2^max_cycles_shift=%llu, nsecs_for_max_cycles=%llu, max_cycles_mask=%016llx\n", | |
320 | max_cycles_shift, (1ULL << max_cycles_shift), | |
321 | nsecs_for_max_cycles, max_cycles_mask); | |
322 | ||
323 | ||
324 | break; | |
325 | } | |
48e7b920 VF |
326 | case CLOCK64_LOCK: { |
327 | /* | |
328 | * This clock tick to nsec conversion also requires two stages. | |
329 | * | |
330 | * Stage 1: Add to nsec the current running total of elapsed long periods | |
331 | * Stage 2: Subtract from clock ticks the tick count corresponding to the | |
332 | * most recently elapsed long period. Convert the remaining ticks to | |
333 | * nsec and add to the previous nsec value. | |
334 | * | |
335 | * In practice the elapsed nsec from Stage 1 and the tick count subtracted | |
336 | * in Stage 2 will be maintained in a separate thread. | |
337 | * | |
338 | */ | |
339 | calc_mult_shift(CLOCK64_2STAGE, mult, shift, MAX_CLOCK_SEC, cycles_per_usec); | |
340 | cycles_start = 0; | |
341 | break; | |
342 | } | |
343 | case CLOCK128_MULT_SHIFT: { | |
dcbc7841 VF |
344 | __uint128_t max_mult, tmp; |
345 | unsigned int sft = 0; | |
346 | ||
347 | /* | |
348 | * Calculate the largest multiplier that will not | |
349 | * produce a 128-bit overflow in the multiplication | |
350 | * step of the clock ticks to nsec conversion | |
351 | */ | |
352 | max_mult = ((__uint128_t) ULLONG_MAX) << 64 | ULLONG_MAX; | |
353 | max_mult /= max_ticks; | |
354 | dprintf("max_ticks=%llu, __builtin_clzll=%d, max_mult=0x%016llx%016llx\n", | |
355 | max_ticks, __builtin_clzll(max_ticks), | |
356 | (unsigned long long) (max_mult >> 64), | |
357 | (unsigned long long) max_mult); | |
358 | ||
359 | /* | |
360 | * Find the largest shift count that will produce | |
361 | * a multiplier less than max_mult | |
362 | */ | |
363 | tmp = max_mult * cycles_per_usec / 1000; | |
364 | while (tmp > 1) { | |
365 | tmp >>= 1; | |
366 | sft++; | |
367 | dprintf("tmp=0x%016llx%016llx, sft=%u\n", | |
368 | (unsigned long long) (tmp >> 64), | |
369 | (unsigned long long) tmp, sft); | |
48e7b920 | 370 | } |
dcbc7841 VF |
371 | |
372 | *shift = sft; | |
373 | *((__uint128_t *)mult) = (__uint128_t) (((__uint128_t)1 << sft) * 1000 / cycles_per_usec); | |
374 | break; | |
48e7b920 VF |
375 | } |
376 | } | |
dcbc7841 VF |
377 | } |
378 | ||
379 | int discontinuity(int mode, int delta_ticks, int delta_nsec, unsigned long long start, unsigned long len) | |
380 | { | |
381 | int i; | |
382 | unsigned long mismatches = 0, bad_mismatches = 0; | |
383 | unsigned long long delta, max_mismatch = 0; | |
384 | unsigned long long *ns = nsecs; | |
385 | ||
386 | for (i = 0; i < len; ns++, i++) { | |
387 | *ns = get_nsec(mode, start + i); | |
388 | if (i - delta_ticks >= 0) { | |
389 | if (*ns > *(ns - delta_ticks)) | |
390 | delta = *ns - *(ns - delta_ticks); | |
391 | else | |
392 | delta = *(ns - delta_ticks) - *ns; | |
393 | if (delta > delta_nsec) | |
394 | delta -= delta_nsec; | |
395 | else | |
396 | delta = delta_nsec - delta; | |
397 | if (delta) { | |
398 | mismatches++; | |
399 | if (delta > 1) | |
400 | bad_mismatches++; | |
401 | if (delta > max_mismatch) | |
402 | max_mismatch = delta; | |
403 | } | |
404 | } | |
405 | if (!bad_mismatches) | |
406 | assert(max_mismatch == 0 || max_mismatch == 1); | |
407 | if (!mismatches) | |
408 | assert(max_mismatch == 0); | |
409 | } | |
410 | ||
411 | printf("%lu discontinuities (%lu%%) (%lu errors > 1ns, max delta = %lluns) for ticks = %llu...%llu\n", | |
412 | mismatches, (mismatches * 100) / len, bad_mismatches, max_mismatch, start, | |
413 | start + len - 1); | |
414 | return mismatches; | |
415 | } | |
416 | ||
417 | #define MIN_TICKS 1ULL | |
418 | #define LEN 1000000000ULL | |
419 | #define NSEC_ONE_SEC 1000000000ULL | |
420 | #define TESTLEN 9 | |
421 | long long test_clock(int mode, int cycles_per_usec, int fast_test, int quiet, int delta_ticks, int delta_nsec) | |
422 | { | |
423 | int i; | |
424 | long long delta; | |
425 | unsigned long long max_ticks; | |
426 | unsigned long long nsecs; | |
427 | void *mult; | |
428 | unsigned long long test_ns[TESTLEN] = | |
429 | {NSEC_ONE_SEC, NSEC_ONE_SEC, | |
430 | NSEC_ONE_SEC, NSEC_ONE_SEC*60, NSEC_ONE_SEC*60*60, | |
431 | NSEC_ONE_SEC*60*60*2, NSEC_ONE_SEC*60*60*4, | |
432 | NSEC_ONE_SEC*60*60*8, NSEC_ONE_SEC*60*60*24}; | |
433 | unsigned long long test_ticks[TESTLEN]; | |
434 | ||
435 | max_ticks = MAX_CLOCK_SEC * (unsigned long long) cycles_per_usec * 1000000ULL; | |
436 | ||
437 | switch(mode) { | |
48e7b920 | 438 | case CLOCK64_MULT_SHIFT: { |
dcbc7841 VF |
439 | mult = &clock_mult; |
440 | break; | |
441 | } | |
48e7b920 | 442 | case CLOCK64_EMULATE_128: { |
dcbc7841 VF |
443 | mult = clock_mult64_128; |
444 | break; | |
445 | } | |
48e7b920 VF |
446 | case CLOCK64_2STAGE: { |
447 | mult = &clock_mult; | |
448 | break; | |
449 | } | |
450 | case CLOCK64_LOCK: { | |
dcbc7841 VF |
451 | mult = &clock_mult; |
452 | break; | |
453 | } | |
48e7b920 | 454 | case CLOCK128_MULT_SHIFT: { |
dcbc7841 VF |
455 | mult = &clock_mult128; |
456 | break; | |
457 | } | |
458 | } | |
459 | calc_mult_shift(mode, mult, &clock_shift, MAX_CLOCK_SEC, cycles_per_usec); | |
460 | nsecs = get_nsec(mode, max_ticks); | |
461 | delta = nsecs/1000000 - MAX_CLOCK_SEC*1000; | |
462 | ||
48e7b920 | 463 | if (mode == CLOCK64_2STAGE) { |
dcbc7841 VF |
464 | test_ns[0] = nsecs_for_max_cycles - 1; |
465 | test_ns[1] = nsecs_for_max_cycles; | |
466 | test_ticks[0] = (1ULL << max_cycles_shift) - 1; | |
467 | test_ticks[1] = (1ULL << max_cycles_shift); | |
468 | ||
469 | for (i = 2; i < TESTLEN; i++) | |
470 | test_ticks[i] = test_ns[i] / 1000 * cycles_per_usec; | |
471 | } | |
472 | else { | |
473 | for (i = 0; i < TESTLEN; i++) | |
474 | test_ticks[i] = test_ns[i] / 1000 * cycles_per_usec; | |
475 | } | |
476 | ||
477 | if (!quiet) { | |
478 | printf("cycles_per_usec=%d, delta_ticks=%d, delta_nsec=%d, max_ticks=%llu, shift=%u, 2^shift=%llu\n", | |
479 | cycles_per_usec, delta_ticks, delta_nsec, max_ticks, clock_shift, (1ULL << clock_shift)); | |
480 | switch(mode) { | |
48e7b920 VF |
481 | case CLOCK64_LOCK: |
482 | case CLOCK64_2STAGE: | |
483 | case CLOCK64_MULT_SHIFT: { | |
dcbc7841 VF |
484 | printf("clock_mult=%llu, clock_mult / 2^clock_shift=%f\n", |
485 | clock_mult, (double) clock_mult / (1ULL << clock_shift)); | |
486 | break; | |
487 | } | |
48e7b920 | 488 | case CLOCK64_EMULATE_128: { |
dcbc7841 VF |
489 | printf("clock_mult=0x%016llx%016llx\n", |
490 | clock_mult64_128[1], clock_mult64_128[0]); | |
491 | break; | |
492 | } | |
48e7b920 | 493 | case CLOCK128_MULT_SHIFT: { |
dcbc7841 VF |
494 | printf("clock_mult=0x%016llx%016llx\n", |
495 | (unsigned long long) (clock_mult128 >> 64), | |
496 | (unsigned long long) clock_mult128); | |
497 | break; | |
498 | } | |
499 | } | |
500 | printf("get_nsec(max_ticks) = %lluns, should be %lluns, error<=abs(%lld)ms\n", | |
501 | nsecs, MAX_CLOCK_SEC*1000000000ULL, delta); | |
502 | } | |
503 | ||
504 | for (i = 0; i < TESTLEN; i++) | |
505 | { | |
506 | nsecs = get_nsec(mode, test_ticks[i]); | |
507 | delta = nsecs > test_ns[i] ? nsecs - test_ns[i] : test_ns[i] - nsecs; | |
508 | if (!quiet || delta > 0) | |
509 | printf("get_nsec(%llu)=%llu, expected %llu, delta=%llu\n", | |
510 | test_ticks[i], nsecs, test_ns[i], delta); | |
511 | } | |
512 | ||
513 | if (!fast_test) { | |
514 | discontinuity(mode, delta_ticks, delta_nsec, max_ticks - LEN + 1, LEN); | |
515 | discontinuity(mode, delta_ticks, delta_nsec, MIN_TICKS, LEN); | |
516 | } | |
517 | ||
518 | if (!quiet) | |
519 | printf("\n\n"); | |
520 | ||
521 | return delta; | |
522 | } | |
523 | ||
524 | int main(int argc, char *argv[]) | |
525 | { | |
526 | int i, days; | |
527 | long long error; | |
528 | long long errors[10001]; | |
529 | double mean; | |
530 | ||
531 | nsecs = malloc(LEN * sizeof(unsigned long long)); | |
532 | assert(nsecs != NULL); | |
533 | days = MAX_CLOCK_SEC / 60 / 60 / 24; | |
534 | ||
48e7b920 VF |
535 | test_clock(CLOCK64_LOCK, 3333, 1, 0, 0, 0); |
536 | // test_clock(CLOCK64_MULT_SHIFT, 3333, 1, 0, 0, 0); | |
537 | // test_clock(CLOCK128_MULT_SHIFT, 3333, 1, 0, 0, 0); | |
dcbc7841 VF |
538 | |
539 | // Test 3 different clock types from 1000 to 10000 MHz | |
540 | // and calculate average error | |
541 | /* | |
542 | for (i = 1000, mean = 0.0; i <= 10000; i++) { | |
48e7b920 | 543 | error = test_clock(CLOCK64_MULT_SHIFT, i, 1, 1, 0, 0); |
dcbc7841 VF |
544 | errors[i] = error > 0 ? error : -1LL * error; |
545 | mean += (double) errors[i] / 9000; | |
546 | } | |
547 | printf(" 64-bit average error per %d days: %fms\n", days, mean); | |
548 | ||
549 | for (i = 1000, mean = 0.0; i <= 10000; i++) { | |
48e7b920 | 550 | error = test_clock(CLOCK64_2STAGE, i, 1, 1, 0, 0); |
dcbc7841 VF |
551 | errors[i] = error > 0 ? error : -1LL * error; |
552 | mean += (double) errors[i] / 9000; | |
553 | } | |
554 | printf(" 64-bit two-stage average error per %d days: %fms\n", days, mean); | |
555 | ||
556 | for (i = 1000, mean = 0.0; i <= 10000; i++) { | |
48e7b920 | 557 | error = test_clock(CLOCK128_MULT_SHIFT, i, 1, 1, 0, 0); |
dcbc7841 VF |
558 | errors[i] = error > 0 ? error : -1LL * error; |
559 | mean += (double) errors[i] / 9000; | |
560 | } | |
561 | printf(" 128-bit average error per %d days: %fms\n", days, mean); | |
562 | */ | |
48e7b920 VF |
563 | test_clock(CLOCK64_LOCK, 1000, 1, 0, 1, 1); |
564 | test_clock(CLOCK64_LOCK, 1100, 1, 0, 11, 10); | |
565 | test_clock(CLOCK64_LOCK, 3000, 1, 0, 3, 1); | |
566 | test_clock(CLOCK64_LOCK, 3333, 1, 0, 3333, 1000); | |
567 | test_clock(CLOCK64_LOCK, 3392, 1, 0, 424, 125); | |
568 | test_clock(CLOCK64_LOCK, 4500, 1, 0, 9, 2); | |
569 | test_clock(CLOCK64_LOCK, 5000, 1, 0, 5, 1); | |
dcbc7841 VF |
570 | |
571 | free(nsecs); | |
572 | return 0; | |
573 | } |