X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=lib%2Flfsr.c;h=a4f1fb13b64f7b1087970bba0e9c58afafd002db;hp=8a70029627a34ccbec53a104f879c94ec6498234;hb=3d2d14bcb844e72809192311369a642c5d415472;hpb=2a988d8bcb447eb098fc382835cc507587c6ba66 diff --git a/lib/lfsr.c b/lib/lfsr.c index 8a700296..a4f1fb13 100644 --- a/lib/lfsr.c +++ b/lib/lfsr.c @@ -1,269 +1,253 @@ #include #include "lfsr.h" +#include "../compiler/compiler.h" /* - * From table 3 of + * LFSR taps retrieved from: + * http://home1.gte.net/res0658s/electronics/LFSRtaps.html * - * http://www.xilinx.com/support/documentation/application_notes/xapp052.pdf + * The memory overhead of the following tap table should be relatively small, + * no more than 400 bytes. */ -static struct lfsr_taps lfsr_taps[] = { - { - .length = 16, - .taps = { 16, 15, 13, 4, }, - }, - { - .length = 17, - .taps = { 17, 14, }, - }, - { - .length = 18, - .taps = { 18, 11, }, - }, - { - .length = 19, - .taps = { 19, 6, 2, 1, }, - }, - { - .length = 20, - .taps = { 20, 17, }, - }, - { - .length = 21, - .taps = { 21, 19, }, - }, - { - .length = 22, - .taps = { 22, 21, }, - }, - { - .length = 23, - .taps = { 23, 18, }, - }, - { - .length = 24, - .taps = { 24, 23, 22, 17, }, - }, - { - .length = 25, - .taps = { 25, 22, }, - }, - { - .length = 26, - .taps = {26, 6, 2, 1, }, - }, - { - .length = 27, - .taps = { 27, 5, 2, 1, }, - }, - { - .length = 28, - .taps = { 28, 25, }, - }, - { - .length = 29, - .taps = {29, 27, }, - }, - { - .length = 30, - .taps = { 30, 6, 4, 1, }, - }, - { - .length = 31, - .taps = { 31, 28, }, - }, - { - .length = 32, - .taps = { 32, 22, 2, 1, }, - }, - { - .length = 33, - .taps = { 33, 20, }, - }, - { - .length = 34, - .taps = { 34, 27, 2, 1, }, - }, - { - .length = 35, - .taps = { 35, 33, }, - }, - { - .length = 36, - .taps = { 36, 25, }, - }, - { - .length = 37, - .taps = { 37, 5, 4, 3, 2, 1, }, - }, - { - .length = 38, - .taps = { 38, 6, 5, 1, }, - }, - { - .length = 39, - .taps = { 39, 35, }, - }, - { - .length = 40, - .taps = { 40, 38, 21, 19, }, - }, - { - .length = 41, - .taps = { 41, 38, }, - }, - { - .length = 42, - .taps = { 42, 41, 20, 19, }, - }, - { - .length = 43, - .taps = { 43, 42, 38, 37, }, - }, - { - .length = 44, - .taps = { 44, 43, 38, 37, }, - }, - { - .length = 45, - .taps = { 45, 44, 42, 41, }, - }, - { - .length = 46, - .taps = { 46, 45, 26, 25, }, - }, - { - .length = 47, - .taps = { 47, 42, }, - }, - { - .length = 48, - .taps = { 48, 47, 21, 20, }, - }, - { - .length = 49, - .taps = { 49, 40, }, - }, - { - .length = 50, - .taps = { 50, 49, 36, 35, }, - }, - { - .length = 51, - .taps = { 51, 50, 36, 35, }, - }, - { - .length = 52, - .taps = { 52, 49, }, - }, - { - .length = 53, - .taps = { 53, 52, 38, 37 }, - }, - { - .length = 54, - .taps = { 54, 53, 18, 17 }, - }, - { - .length = 55, - .taps = { 55, 31, }, - }, - { - .length = 56, - .taps = { 56, 55, 35, 34, }, - }, - { - .length = 57, - .taps = { 57, 50, }, - }, - { - .length = 58, - .taps = { 58, 39, }, - }, - { - .length = 59, - .taps = { 59, 58, 38, 37, }, - }, - { - .length = 60, - .taps = { 60, 59, }, - }, - { - .length = 61, - .taps = { 61, 60, 46, 45, }, - }, - { - .length = 62, - .taps = { 62, 61, 6, 5, }, - }, - { - .length = 63, - .taps = { 63, 62, }, - }, +static uint8_t lfsr_taps[64][FIO_MAX_TAPS] = +{ + {0}, {0}, {0}, //LFSRs with less that 3-bits cannot exist + {3, 2}, //Tap position for 3-bit LFSR + {4, 3}, //Tap position for 4-bit LFSR + {5, 3}, //Tap position for 5-bit LFSR + {6, 5}, //Tap position for 6-bit LFSR + {7, 6}, //Tap position for 7-bit LFSR + {8, 6, 5 ,4}, //Tap position for 8-bit LFSR + {9, 5}, //Tap position for 9-bit LFSR + {10, 7}, //Tap position for 10-bit LFSR + {11, 9}, //Tap position for 11-bit LFSR + {12, 6, 4, 1}, //Tap position for 12-bit LFSR + {13, 4, 3, 1}, //Tap position for 13-bit LFSR + {14, 5, 3, 1}, //Tap position for 14-bit LFSR + {15, 14}, //Tap position for 15-bit LFSR + {16, 15, 13, 4}, //Tap position for 16-bit LFSR + {17, 14}, //Tap position for 17-bit LFSR + {18, 11}, //Tap position for 18-bit LFSR + {19, 6, 2, 1}, //Tap position for 19-bit LFSR + {20, 17}, //Tap position for 20-bit LFSR + {21, 19}, //Tap position for 21-bit LFSR + {22, 21}, //Tap position for 22-bit LFSR + {23, 18}, //Tap position for 23-bit LFSR + {24, 23, 22, 17}, //Tap position for 24-bit LFSR + {25, 22}, //Tap position for 25-bit LFSR + {26, 6, 2, 1}, //Tap position for 26-bit LFSR + {27, 5, 2, 1}, //Tap position for 27-bit LFSR + {28, 25}, //Tap position for 28-bit LFSR + {29, 27}, //Tap position for 29-bit LFSR + {30, 6, 4, 1}, //Tap position for 30-bit LFSR + {31, 28}, //Tap position for 31-bit LFSR + {32, 31, 29, 1}, //Tap position for 32-bit LFSR + {33, 20}, //Tap position for 33-bit LFSR + {34, 27, 2, 1}, //Tap position for 34-bit LFSR + {35, 33}, //Tap position for 35-bit LFSR + {36, 25}, //Tap position for 36-bit LFSR + {37, 5, 4, 3, 2, 1}, //Tap position for 37-bit LFSR + {38, 6, 5, 1}, //Tap position for 38-bit LFSR + {39, 35}, //Tap position for 39-bit LFSR + {40, 38, 21, 19}, //Tap position for 40-bit LFSR + {41, 38}, //Tap position for 41-bit LFSR + {42, 41, 20, 19}, //Tap position for 42-bit LFSR + {43, 42, 38, 37}, //Tap position for 43-bit LFSR + {44, 43, 18, 17}, //Tap position for 44-bit LFSR + {45, 44, 42, 41}, //Tap position for 45-bit LFSR + {46, 45, 26, 25}, //Tap position for 46-bit LFSR + {47, 42}, //Tap position for 47-bit LFSR + {48, 47, 21, 20}, //Tap position for 48-bit LFSR + {49, 40}, //Tap position for 49-bit LFSR + {50, 49, 24, 23}, //Tap position for 50-bit LFSR + {51, 50, 36, 35}, //Tap position for 51-bit LFSR + {52, 49}, //Tap position for 52-bit LFSR + {53, 52, 38, 37}, //Tap position for 53-bit LFSR + {54, 53, 18, 17}, //Tap position for 54-bit LFSR + {55, 31}, //Tap position for 55-bit LFSR + {56, 55, 35, 34}, //Tap position for 56-bit LFSR + {57, 50}, //Tap position for 57-bit LFSR + {58, 39}, //Tap position for 58-bit LFSR + {59, 58, 38, 37}, //Tap position for 59-bit LFSR + {60, 59}, //Tap position for 60-bit LFSR + {61, 60, 46, 45}, //Tap position for 61-bit LFSR + {62, 61, 6, 5}, //Tap position for 62-bit LFSR + {63, 62}, //Tap position for 63-bit LFSR }; -#define FIO_LFSR_CRANKS 128 +#define __LFSR_NEXT(__fl, __v) \ + __v = ((__v >> 1) | __fl->cached_bit) ^ \ + (((__v & 1UL) - 1UL) & __fl->xormask); -static uint64_t __lfsr_next(uint64_t v, struct lfsr_taps *lt) +static inline void __lfsr_next(struct fio_lfsr *fl, unsigned int spin) { - uint64_t xor_mask = 0; - int i; - - for (i = 0; lt->taps[i]; i++) - xor_mask ^= (v << (lt->taps[i] - 1)); - - xor_mask &= ~(~0UL << 1) << (lt->length - 1); - return xor_mask | (v >> 1); + /* + * This should be O(1) since most compilers will create a jump table for + * this switch. + */ + switch (spin) { + case 15: __LFSR_NEXT(fl, fl->last_val); + case 14: __LFSR_NEXT(fl, fl->last_val); + case 13: __LFSR_NEXT(fl, fl->last_val); + case 12: __LFSR_NEXT(fl, fl->last_val); + case 11: __LFSR_NEXT(fl, fl->last_val); + case 10: __LFSR_NEXT(fl, fl->last_val); + case 9: __LFSR_NEXT(fl, fl->last_val); + case 8: __LFSR_NEXT(fl, fl->last_val); + case 7: __LFSR_NEXT(fl, fl->last_val); + case 6: __LFSR_NEXT(fl, fl->last_val); + case 5: __LFSR_NEXT(fl, fl->last_val); + case 4: __LFSR_NEXT(fl, fl->last_val); + case 3: __LFSR_NEXT(fl, fl->last_val); + case 2: __LFSR_NEXT(fl, fl->last_val); + case 1: __LFSR_NEXT(fl, fl->last_val); + case 0: __LFSR_NEXT(fl, fl->last_val); + default: break; + } } +/* + * lfsr_next does the following: + * + * a. Return if the number of max values has been exceeded. + * b. Check if we have a spin value that produces a repeating subsequence. + * This is previously calculated in `prepare_spin` and cycle_length should + * be > 0. If we do have such a spin: + * + * i. Decrement the calculated cycle. + * ii. If it reaches zero, add "+1" to the spin and reset the cycle_length + * (we have it cached in the struct fio_lfsr) + * + * In either case, continue with the calculation of the next value. + * c. Check if the calculated value exceeds the desirable range. In this case, + * go back to b, else return. + */ int lfsr_next(struct fio_lfsr *fl, uint64_t *off) { - if (fl->num_vals > fl->max_val) + if (fl->num_vals++ > fl->max_val) return 1; do { - fl->last_val = __lfsr_next(fl->last_val, &fl->taps); - if (fl->last_val - 1 <= fl->max_val) - break; - } while (1); - - *off = fl->last_val - 1; - fl->num_vals++; + if (fl->cycle_length && !--fl->cycle_length) { + __lfsr_next(fl, fl->spin + 1); + fl->cycle_length = fl->cached_cycle_length; + } else + __lfsr_next(fl, fl->spin); + } while (fio_unlikely(fl->last_val > fl->max_val)); + + *off = fl->last_val; return 0; } -static struct lfsr_taps *find_lfsr(uint64_t size) +static uint64_t lfsr_create_xormask(uint8_t *taps) { int i; + uint64_t xormask = 0; - for (i = 0; lfsr_taps[i].length; i++) - if (((1UL << lfsr_taps[i].length) + FIO_LFSR_CRANKS) >= size) - return &lfsr_taps[i]; + for(i = 0; i < FIO_MAX_TAPS && taps[i] != 0; i++) + xormask |= 1UL << (taps[i] - 1); + + return xormask; +} + +static uint8_t *find_lfsr(uint64_t size) +{ + int i; + + /* + * For an LFSR, there is always a prohibited state (all ones). + * Thus, if we need to find the proper LFSR for our size, we must + * take that into account. + */ + for (i = 3; i < 64; i++) + if ((1UL << i) > size) + return lfsr_taps[i]; return NULL; } -int lfsr_init(struct fio_lfsr *fl, uint64_t size, unsigned long seed) +/* + * It is well-known that all maximal n-bit LFSRs will start repeating + * themselves after their 2^n iteration. The introduction of spins however, is + * possible to create a repetition of a sub-sequence before we hit that mark. + * This happens if: + * + * [1]: ((2^n - 1) * i) % (spin + 1) == 0, + * where "n" is LFSR's bits and "i" any number within the range [1,spin] + * + * It is important to know beforehand if a spin can cause a repetition of a + * sub-sequence (cycle) and its length. However, calculating (2^n - 1) * i may + * produce a buffer overflow for "n" close to 64, so we expand the above to: + * + * [2]: (2^n - 1) -> (x * (spin + 1) + y), where x >= 0 and 0 <= y <= spin + * + * Thus, [1] is equivalent to (y * i) % (spin + 1) == 0; + * Also, the cycle's length will be (x * i) + (y * i) / (spin + 1) + */ +static int prepare_spin(struct fio_lfsr *fl, unsigned int spin) { - struct lfsr_taps *tap; + uint64_t max = (fl->cached_bit << 1) - 1; + uint64_t x, y; int i; - tap = find_lfsr(size); - if (!tap) + if (spin > 15) return 1; - fl->last_val = seed; - fl->max_val = size - 1; - fl->num_vals = 0; - fl->taps.length = tap->length; - for (i = 0; i < FIO_MAX_TAPS; i++) { - fl->taps.taps[i] = tap->taps[i]; - if (!fl->taps.taps[i]) + x = max / (spin + 1); + y = max % (spin + 1); + fl->cycle_length = 0; /* No cycle occurs, other than the expected */ + fl->spin = spin; + + for (i = 1; i <= spin; i++) { + if ((y * i) % (spin + 1) == 0) { + fl->cycle_length = (x * i) + (y * i) / (spin + 1); break; + } } + fl->cached_cycle_length = fl->cycle_length; + + /* + * Increment cycle length for the first time only since the stored value + * will not be printed otherwise. + */ + fl->cycle_length++; + + return 0; +} + +int lfsr_reset(struct fio_lfsr *fl, unsigned long seed) +{ + uint64_t bitmask = (fl->cached_bit << 1) - 1; + + fl->num_vals = 0; + fl->last_val = seed & bitmask; + + /* All-ones state is illegal for XNOR LFSRs */ + if (fl->last_val == bitmask) + return 1; - for (i = 0; i < FIO_LFSR_CRANKS; i++) - fl->last_val = __lfsr_next(fl->last_val, &fl->taps); + return 0; +} + +int lfsr_init(struct fio_lfsr *fl, uint64_t nums, unsigned long seed, + unsigned int spin) +{ + uint8_t *taps; + + taps = find_lfsr(nums); + if (!taps) + return 1; + + fl->max_val = nums - 1; + fl->xormask = lfsr_create_xormask(taps); + fl->cached_bit = 1UL << (taps[0] - 1); + + if (prepare_spin(fl, spin)) + return 1; + + if (lfsr_reset(fl, seed)) + return 1; return 0; }