X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=lib%2Flfsr.c;h=0c0072ccb39b15e970eeb6bd92342b67a85cd8bb;hp=758dc8beee7c0e7dc77d166c68eaf016d85a29c2;hb=ae626d4ead6416adf464cf209cdf3e8b85d58190;hpb=d474cbc9ba33448848b50cc697622a402e91e33e

diff --git a/lib/lfsr.c b/lib/lfsr.c
index 758dc8be..0c0072cc 100644
--- a/lib/lfsr.c
+++ b/lib/lfsr.c
@@ -2,6 +2,7 @@
 #include <math.h>
 
 #include "lfsr.h"
+#include "../compiler/compiler.h"
 
 /*
  * LFSR taps retrieved from:
@@ -10,70 +11,70 @@
  * The memory overhead of the following tap table should be relatively small,
  * no more than 400 bytes.
  */
-static uint8_t taps[64][FIO_MAX_TAPS] =
+static uint8_t lfsr_taps[64][FIO_MAX_TAPS] =
 {
-		{0}, {0}, {0},		//LFSRs with less that 3-bits cannot exist
-		{3, 2},				//Tap position for 3-bit LFSR
-		{4, 3},				//Tap position for 4-bit LFSR
-		{5, 3},				//Tap position for 5-bit LFSR
-		{6, 5},				//Tap position for 6-bit LFSR
-		{7, 6},				//Tap position for 7-bit LFSR
-		{8, 6, 5 ,4},		//Tap position for 8-bit LFSR
-		{9, 5},				//Tap position for 9-bit LFSR
-		{10, 7},			//Tap position for 10-bit LFSR
-		{11, 9},			//Tap position for 11-bit LFSR
-		{12, 6, 4, 1},		//Tap position for 12-bit LFSR
-		{13, 4, 3, 1},		//Tap position for 13-bit LFSR
-		{14, 5, 3, 1},		//Tap position for 14-bit LFSR
-		{15, 14},			//Tap position for 15-bit LFSR
-		{16, 15, 13, 4},	//Tap position for 16-bit LFSR
-		{17, 14},			//Tap position for 17-bit LFSR
-		{18, 11},			//Tap position for 18-bit LFSR
-		{19, 6, 2, 1},		//Tap position for 19-bit LFSR
-		{20, 17},			//Tap position for 20-bit LFSR
-		{21, 19},			//Tap position for 21-bit LFSR
-		{22, 21},			//Tap position for 22-bit LFSR
-		{23, 18},			//Tap position for 23-bit LFSR
-		{24, 23, 22, 17},	//Tap position for 24-bit LFSR
-		{25, 22},			//Tap position for 25-bit LFSR
-		{26, 6, 2, 1},		//Tap position for 26-bit LFSR
-		{27, 5, 2, 1},		//Tap position for 27-bit LFSR
-		{28, 25},			//Tap position for 28-bit LFSR
-		{29, 27},			//Tap position for 29-bit LFSR
-		{30, 6, 4, 1},		//Tap position for 30-bit LFSR
-		{31, 28},			//Tap position for 31-bit LFSR
-		{32, 31, 29, 1},	//Tap position for 32-bit LFSR
-		{33, 20},			//Tap position for 33-bit LFSR
-		{34, 27, 2, 1},		//Tap position for 34-bit LFSR
-		{35, 33},			//Tap position for 35-bit LFSR
-		{36, 25},			//Tap position for 36-bit LFSR
-		{37, 5, 4, 3, 2, 1},//Tap position for 37-bit LFSR
-		{38, 6, 5, 1},		//Tap position for 38-bit LFSR
-		{39, 35},			//Tap position for 39-bit LFSR
-		{40, 38, 21, 19},	//Tap position for 40-bit LFSR
-		{41, 38},			//Tap position for 41-bit LFSR
-		{42, 41, 20, 19},	//Tap position for 42-bit LFSR
-		{43, 42, 38, 37},	//Tap position for 43-bit LFSR
-		{44, 43, 18, 17},	//Tap position for 44-bit LFSR
-		{45, 44, 42, 41},	//Tap position for 45-bit LFSR
-		{46, 45, 26, 25},	//Tap position for 46-bit LFSR
-		{47, 42},			//Tap position for 47-bit LFSR
-		{48, 47, 21, 20},	//Tap position for 48-bit LFSR
-		{49, 40},			//Tap position for 49-bit LFSR
-		{50, 49, 24, 23},	//Tap position for 50-bit LFSR
-		{51, 50, 36, 35},	//Tap position for 51-bit LFSR
-		{52, 49},			//Tap position for 52-bit LFSR
-		{53, 52, 38, 37},	//Tap position for 53-bit LFSR
-		{54, 53, 18, 17},	//Tap position for 54-bit LFSR
-		{55, 31},			//Tap position for 55-bit LFSR
-		{56, 55, 35, 34},	//Tap position for 56-bit LFSR
-		{57, 50},			//Tap position for 57-bit LFSR
-		{58, 39},			//Tap position for 58-bit LFSR
-		{59, 58, 38, 37},	//Tap position for 59-bit LFSR
-		{60, 59},			//Tap position for 60-bit LFSR
-		{61, 60, 46, 45},	//Tap position for 61-bit LFSR
-		{62, 61, 6, 5},		//Tap position for 62-bit LFSR
-		{63, 62},			//Tap position for 63-bit LFSR
+	{0}, {0}, {0},		//LFSRs with less that 3-bits cannot exist
+	{3, 2},			//Tap position for 3-bit LFSR
+	{4, 3},			//Tap position for 4-bit LFSR
+	{5, 3},			//Tap position for 5-bit LFSR
+	{6, 5},			//Tap position for 6-bit LFSR
+	{7, 6},			//Tap position for 7-bit LFSR
+	{8, 6, 5 ,4},		//Tap position for 8-bit LFSR
+	{9, 5},			//Tap position for 9-bit LFSR
+	{10, 7},		//Tap position for 10-bit LFSR
+	{11, 9},		//Tap position for 11-bit LFSR
+	{12, 6, 4, 1},		//Tap position for 12-bit LFSR
+	{13, 4, 3, 1},		//Tap position for 13-bit LFSR
+	{14, 5, 3, 1},		//Tap position for 14-bit LFSR
+	{15, 14},		//Tap position for 15-bit LFSR
+	{16, 15, 13, 4},	//Tap position for 16-bit LFSR
+	{17, 14},		//Tap position for 17-bit LFSR
+	{18, 11},		//Tap position for 18-bit LFSR
+	{19, 6, 2, 1},		//Tap position for 19-bit LFSR
+	{20, 17},		//Tap position for 20-bit LFSR
+	{21, 19},		//Tap position for 21-bit LFSR
+	{22, 21},		//Tap position for 22-bit LFSR
+	{23, 18},		//Tap position for 23-bit LFSR
+	{24, 23, 22, 17},	//Tap position for 24-bit LFSR
+	{25, 22},		//Tap position for 25-bit LFSR
+	{26, 6, 2, 1},		//Tap position for 26-bit LFSR
+	{27, 5, 2, 1},		//Tap position for 27-bit LFSR
+	{28, 25},		//Tap position for 28-bit LFSR
+	{29, 27},		//Tap position for 29-bit LFSR
+	{30, 6, 4, 1},		//Tap position for 30-bit LFSR
+	{31, 28},		//Tap position for 31-bit LFSR
+	{32, 31, 29, 1},	//Tap position for 32-bit LFSR
+	{33, 20},		//Tap position for 33-bit LFSR
+	{34, 27, 2, 1},		//Tap position for 34-bit LFSR
+	{35, 33},		//Tap position for 35-bit LFSR
+	{36, 25},		//Tap position for 36-bit LFSR
+	{37, 5, 4, 3, 2, 1},	//Tap position for 37-bit LFSR
+	{38, 6, 5, 1},		//Tap position for 38-bit LFSR
+	{39, 35},		//Tap position for 39-bit LFSR
+	{40, 38, 21, 19},	//Tap position for 40-bit LFSR
+	{41, 38},		//Tap position for 41-bit LFSR
+	{42, 41, 20, 19},	//Tap position for 42-bit LFSR
+	{43, 42, 38, 37},	//Tap position for 43-bit LFSR
+	{44, 43, 18, 17},	//Tap position for 44-bit LFSR
+	{45, 44, 42, 41},	//Tap position for 45-bit LFSR
+	{46, 45, 26, 25},	//Tap position for 46-bit LFSR
+	{47, 42},		//Tap position for 47-bit LFSR
+	{48, 47, 21, 20},	//Tap position for 48-bit LFSR
+	{49, 40},		//Tap position for 49-bit LFSR
+	{50, 49, 24, 23},	//Tap position for 50-bit LFSR
+	{51, 50, 36, 35},	//Tap position for 51-bit LFSR
+	{52, 49},		//Tap position for 52-bit LFSR
+	{53, 52, 38, 37},	//Tap position for 53-bit LFSR
+	{54, 53, 18, 17},	//Tap position for 54-bit LFSR
+	{55, 31},		//Tap position for 55-bit LFSR
+	{56, 55, 35, 34},	//Tap position for 56-bit LFSR
+	{57, 50},		//Tap position for 57-bit LFSR
+	{58, 39},		//Tap position for 58-bit LFSR
+	{59, 58, 38, 37},	//Tap position for 59-bit LFSR
+	{60, 59},		//Tap position for 60-bit LFSR
+	{61, 60, 46, 45},	//Tap position for 61-bit LFSR
+	{62, 61, 6, 5},		//Tap position for 62-bit LFSR
+	{63, 62},		//Tap position for 63-bit LFSR
 };
 
 #define __LFSR_NEXT(__fl, __v)						\
@@ -87,7 +88,6 @@ static inline void __lfsr_next(struct fio_lfsr *fl, unsigned int spin)
 	 * this switch.
 	 */
 	switch (spin) {
-		case 16: __LFSR_NEXT(fl, fl->last_val);
 		case 15: __LFSR_NEXT(fl, fl->last_val);
 		case 14: __LFSR_NEXT(fl, fl->last_val);
 		case 13: __LFSR_NEXT(fl, fl->last_val);
@@ -108,25 +108,34 @@ static inline void __lfsr_next(struct fio_lfsr *fl, unsigned int spin)
 	}
 }
 
-int lfsr_next(struct fio_lfsr *fl, uint64_t *off, uint64_t last)
+/*
+ * lfsr_next does the following:
+ *
+ * a. Return if the number of max values has been exceeded.
+ * b. Check if we have a spin value that produces a repeating subsequence.
+ *    This is previously calculated in `prepare_spin` and cycle_length should
+ *    be > 0. If we do have such a spin:
+ *
+ *    i. Decrement the calculated cycle.
+ *    ii. If it reaches zero, add "+1" to the spin and reset the cycle_length
+ *        (we have it cached in the struct fio_lfsr)
+ *
+ *    In either case, continue with the calculation of the next value.
+ * c. Check if the calculated value exceeds the desirable range. In this case,
+ *    go back to b, else return.
+ */
+int lfsr_next(struct fio_lfsr *fl, uint64_t *off)
 {
-	int repeat;
-	unsigned int spin;
-
-	repeat = fl->num_vals % fl->cycle_length;
-	if (repeat == 0)
-		spin = fl->spin + 1;
-	else
-		spin = fl->spin;
-
-	if (fl->num_vals > fl->max_val)
+	if (fl->num_vals++ > fl->max_val)
 		return 1;
 
-	fl->num_vals++;
-
 	do {
-		__lfsr_next(fl, spin);
-	} while (fl->last_val > fl->max_val);
+		if (fl->cycle_length && !--fl->cycle_length) {
+			__lfsr_next(fl, fl->spin + 1);
+			fl->cycle_length = fl->cached_cycle_length;
+		} else
+			__lfsr_next(fl, fl->spin);
+	} while (fio_unlikely(fl->last_val > fl->max_val));
 
 	*off = fl->last_val;
 	return 0;
@@ -147,9 +156,14 @@ static uint8_t *find_lfsr(uint64_t size)
 {
 	int i;
 
+	/*
+	 * For an LFSR, there is always a prohibited state (all ones).
+	 * Thus, if we need to find the proper LFSR for our size, we must
+	 * take that into account.
+	 */
 	for (i = 3; i < 64; i++)
-		if ((1UL << i) > size) /* TODO: Explain why. */
-			return taps[i];
+		if ((1UL << i) > size)
+			return lfsr_taps[i];
 
 	return NULL;
 }
@@ -172,7 +186,7 @@ static uint8_t *find_lfsr(uint64_t size)
  * Thus, [1] is equivalent to (y * i) % (spin + 1) == 0;
  * Also, the cycle's length will be (x * i) + (y * i) / (spin + 1)
  */
-int prepare_spin(struct fio_lfsr *fl, unsigned int spin)
+static int prepare_spin(struct fio_lfsr *fl, unsigned int spin)
 {
 	uint64_t max = (fl->cached_bit << 1) - 1;
 	uint64_t x, y;
@@ -183,7 +197,7 @@ int prepare_spin(struct fio_lfsr *fl, unsigned int spin)
 
 	x = max / (spin + 1);
 	y = max % (spin + 1);
-	fl->cycle_length = max;	/* This is the expected cycle */
+	fl->cycle_length = 0;	/* No cycle occurs, other than the expected */
 	fl->spin = spin;
 
 	for (i = 1; i <= spin; i++) {
@@ -192,6 +206,13 @@ int prepare_spin(struct fio_lfsr *fl, unsigned int spin)
 			break;
 		}
 	}
+	fl->cached_cycle_length = fl->cycle_length;
+
+	/*
+	 * Increment cycle length for the first time only since the stored value
+	 * will not be printed otherwise.
+	 */
+	fl->cycle_length++;
 
 	return 0;
 }
@@ -213,15 +234,15 @@ int lfsr_reset(struct fio_lfsr *fl, unsigned long seed)
 int lfsr_init(struct fio_lfsr *fl, uint64_t nums, unsigned long seed,
 		unsigned int spin)
 {
-	uint8_t *lfsr_taps;
+	uint8_t *taps;
 
-	lfsr_taps = find_lfsr(nums);
-	if (!lfsr_taps)
+	taps = find_lfsr(nums);
+	if (!taps)
 		return 1;
 
 	fl->max_val = nums - 1;
-	fl->xormask = lfsr_create_xormask(lfsr_taps);
-	fl->cached_bit = 1UL << (lfsr_taps[0] - 1);
+	fl->xormask = lfsr_create_xormask(taps);
+	fl->cached_bit = 1UL << (taps[0] - 1);
 
 	if (prepare_spin(fl, spin))
 		return 1;