[linux-2.6-block.git] / drivers / md / dm-vdo / radix-sort.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2023 Red Hat
 */

#include "radix-sort.h"

#include <linux/limits.h>
#include <linux/types.h>

#include "memory-alloc.h"
#include "string-utils.h"

/*
 * This implementation allocates one large object to do the sorting, which can be reused as many
 * times as desired. The amount of memory required is logarithmically proportional to the number of
 * keys to be sorted.
 */

enum {
	/* Piles smaller than this are handled with a simple insertion sort. */
	INSERTION_SORT_THRESHOLD = 12,
};

/* Sort keys are pointers to immutable fixed-length arrays of bytes. */
typedef const u8 *sort_key_t;

/*
 * The keys are separated into piles based on the byte in each keys at the current offset, so the
 * number of keys with each byte must be counted.
 */
struct histogram {
	/* The number of non-empty bins */
	u16 used;
	/* The index (key byte) of the first non-empty bin */
	u16 first;
	/* The index (key byte) of the last non-empty bin */
	u16 last;
	/* The number of occurrences of each specific byte */
	u32 size[256];
};

/*
 * Sub-tasks are manually managed on a stack, both for performance and to put a logarithmic bound
 * on the stack space needed.
 */
struct task {
	/* Pointer to the first key to sort. */
	sort_key_t *first_key;
	/* Pointer to the last key to sort. */
	sort_key_t *last_key;
	/* The offset into the key at which to continue sorting. */
	u16 offset;
	/* The number of bytes remaining in the sort keys. */
	u16 length;
};

struct radix_sorter {
	unsigned int count;
	struct histogram bins;
	sort_key_t *pile[256];
	struct task *end_of_stack;
	struct task insertion_list[256];
	struct task stack[];
};

/* Compare a segment of two fixed-length keys starting at an offset. */
static inline int compare(sort_key_t key1, sort_key_t key2, u16 offset, u16 length)
{
	return memcmp(&key1[offset], &key2[offset], length);
}

/* Insert the next unsorted key into an array of sorted keys. */
static inline void insert_key(const struct task task, sort_key_t *next)
{
	/* Pull the unsorted key out, freeing up the array slot. */
	sort_key_t unsorted = *next;

	/* Compare the key to the preceding sorted entries, shifting down ones that are larger. */
	while ((--next >= task.first_key) &&
	       (compare(unsorted, next[0], task.offset, task.length) < 0))
		next[1] = next[0];

	/* Insert the key into the last slot that was cleared, sorting it. */
	next[1] = unsorted;
}

/*
 * Sort a range of key segments using an insertion sort. This simple sort is faster than the
 * 256-way radix sort when the number of keys to sort is small.
 */
static inline void insertion_sort(const struct task task)
{
	sort_key_t *next;

	for (next = task.first_key + 1; next <= task.last_key; next++)
		insert_key(task, next);
}

/* Push a sorting task onto a task stack. */
static inline void push_task(struct task **stack_pointer, sort_key_t *first_key,
			     u32 count, u16 offset, u16 length)
{
	struct task *task = (*stack_pointer)++;

	task->first_key = first_key;
	task->last_key = &first_key[count - 1];
	task->offset = offset;
	task->length = length;
}

static inline void swap_keys(sort_key_t *a, sort_key_t *b)
{
	sort_key_t c = *a;
	*a = *b;
	*b = c;
}

/*
 * Count the number of times each byte value appears in the arrays of keys to sort at the current
 * offset, keeping track of the number of non-empty bins, and the index of the first and last
 * non-empty bin.
 */
static inline void measure_bins(const struct task task, struct histogram *bins)
{
	sort_key_t *key_ptr;

	/*
	 * Subtle invariant: bins->used and bins->size[] are zero because the sorting code clears
	 * it all out as it goes. Even though this structure is re-used, we don't need to pay to
	 * zero it before starting a new tally.
	 */
	bins->first = U8_MAX;
	bins->last = 0;

	for (key_ptr = task.first_key; key_ptr <= task.last_key; key_ptr++) {
		/* Increment the count for the byte in the key at the current offset. */
		u8 bin = (*key_ptr)[task.offset];
		u32 size = ++bins->size[bin];

		/* Track non-empty bins. */
		if (size == 1) {
			bins->used += 1;
			if (bin < bins->first)
				bins->first = bin;

			if (bin > bins->last)
				bins->last = bin;
		}
	}
}

/*
 * Convert the bin sizes to pointers to where each pile goes.
 *
 *   pile[0] = first_key + bin->size[0],
 *   pile[1] = pile[0]  + bin->size[1], etc.
 *
 * After the keys are moved to the appropriate pile, we'll need to sort each of the piles by the
 * next radix position. A new task is put on the stack for each pile containing lots of keys, or a
 * new task is put on the list for each pile containing few keys.
 *
 * @stack: pointer the top of the stack
 * @end_of_stack: the end of the stack
 * @list: pointer the head of the list
 * @pile: array for pointers to the end of each pile
 * @bins: the histogram of the sizes of each pile
 * @first_key: the first key of the stack
 * @offset: the next radix position to sort by
 * @length: the number of bytes remaining in the sort keys
 *
 * Return: UDS_SUCCESS or an error code
 */
static inline int push_bins(struct task **stack, struct task *end_of_stack,
			    struct task **list, sort_key_t *pile[],
			    struct histogram *bins, sort_key_t *first_key,
			    u16 offset, u16 length)
{
	sort_key_t *pile_start = first_key;
	int bin;

	for (bin = bins->first; ; bin++) {
		u32 size = bins->size[bin];

		/* Skip empty piles. */
		if (size == 0)
			continue;

		/* There's no need to sort empty keys. */
		if (length > 0) {
			if (size > INSERTION_SORT_THRESHOLD) {
				if (*stack >= end_of_stack)
					return UDS_BAD_STATE;

				push_task(stack, pile_start, size, offset, length);
			} else if (size > 1) {
				push_task(list, pile_start, size, offset, length);
			}
		}

		pile_start += size;
		pile[bin] = pile_start;
		if (--bins->used == 0)
			break;
	}

	return UDS_SUCCESS;
}

int uds_make_radix_sorter(unsigned int count, struct radix_sorter **sorter)
{
	int result;
	unsigned int stack_size = count / INSERTION_SORT_THRESHOLD;
	struct radix_sorter *radix_sorter;

	result = uds_allocate_extended(struct radix_sorter, stack_size, struct task,
				       __func__, &radix_sorter);
	if (result != UDS_SUCCESS)
		return result;

	radix_sorter->count = count;
	radix_sorter->end_of_stack = radix_sorter->stack + stack_size;
	*sorter = radix_sorter;
	return UDS_SUCCESS;
}

void uds_free_radix_sorter(struct radix_sorter *sorter)
{
	uds_free(sorter);
}

/*
 * Sort pointers to fixed-length keys (arrays of bytes) using a radix sort. The sort implementation
 * is unstable, so the relative ordering of equal keys is not preserved.
 */
int uds_radix_sort(struct radix_sorter *sorter, const unsigned char *keys[],
		   unsigned int count, unsigned short length)
{
	struct task start;
	struct histogram *bins = &sorter->bins;
	sort_key_t **pile = sorter->pile;
	struct task *task_stack = sorter->stack;

	/* All zero-length keys are identical and therefore already sorted. */
	if ((count == 0) || (length == 0))
		return UDS_SUCCESS;

	/* The initial task is to sort the entire length of all the keys. */
	start = (struct task) {
		.first_key = keys,
		.last_key = &keys[count - 1],
		.offset = 0,
		.length = length,
	};

	if (count <= INSERTION_SORT_THRESHOLD) {
		insertion_sort(start);
		return UDS_SUCCESS;
	}

	if (count > sorter->count)
		return UDS_INVALID_ARGUMENT;

	/*
	 * Repeatedly consume a sorting task from the stack and process it, pushing new sub-tasks
	 * onto the stack for each radix-sorted pile. When all tasks and sub-tasks have been
	 * processed, the stack will be empty and all the keys in the starting task will be fully
	 * sorted.
	 */
	for (*task_stack = start; task_stack >= sorter->stack; task_stack--) {
		const struct task task = *task_stack;
		struct task *insertion_task_list;
		int result;
		sort_key_t *fence;
		sort_key_t *end;

		measure_bins(task, bins);

		/*
		 * Now that we know how large each bin is, generate pointers for each of the piles
		 * and push a new task to sort each pile by the next radix byte.
		 */
		insertion_task_list = sorter->insertion_list;
		result = push_bins(&task_stack, sorter->end_of_stack,
				   &insertion_task_list, pile, bins, task.first_key,
				   task.offset + 1, task.length - 1);
		if (result != UDS_SUCCESS) {
			memset(bins, 0, sizeof(*bins));
			return result;
		}

		/* Now bins->used is zero again. */

		/*
		 * Don't bother processing the last pile: when piles 0..N-1 are all in place, then
		 * pile N must also be in place.
		 */
		end = task.last_key - bins->size[bins->last];
		bins->size[bins->last] = 0;

		for (fence = task.first_key; fence <= end; ) {
			u8 bin;
			sort_key_t key = *fence;

			/*
			 * The radix byte of the key tells us which pile it belongs in. Swap it for
			 * an unprocessed item just below that pile, and repeat.
			 */
			while (--pile[bin = key[task.offset]] > fence)
				swap_keys(pile[bin], &key);

			/*
			 * The pile reached the fence. Put the key at the bottom of that pile,
			 * completing it, and advance the fence to the next pile.
			 */
			*fence = key;
			fence += bins->size[bin];
			bins->size[bin] = 0;
		}

		/* Now bins->size[] is all zero again. */

		/*
		 * When the number of keys in a task gets small enough, it is faster to use an
		 * insertion sort than to keep subdividing into tiny piles.
		 */
		while (--insertion_task_list >= sorter->insertion_list)
			insertion_sort(*insertion_task_list);
	}

	return UDS_SUCCESS;
}
Commit	Line	Data
4e7ff039 MS	1	// SPDX-License-Identifier: GPL-2.0-only
	2	/*
	3	* Copyright 2023 Red Hat
	4	*/
	5
	6	#include "radix-sort.h"
	7
	8	#include <linux/limits.h>
	9	#include <linux/types.h>
	10
	11	#include "memory-alloc.h"
	12	#include "string-utils.h"
	13
	14	/*
	15	* This implementation allocates one large object to do the sorting, which can be reused as many
	16	* times as desired. The amount of memory required is logarithmically proportional to the number of
	17	* keys to be sorted.
	18	*/
	19
	20	enum {
	21	/* Piles smaller than this are handled with a simple insertion sort. */
	22	INSERTION_SORT_THRESHOLD = 12,
	23	};
	24
	25	/* Sort keys are pointers to immutable fixed-length arrays of bytes. */
	26	typedef const u8 *sort_key_t;
	27
	28	/*
	29	* The keys are separated into piles based on the byte in each keys at the current offset, so the
	30	* number of keys with each byte must be counted.
	31	*/
	32	struct histogram {
	33	/* The number of non-empty bins */
	34	u16 used;
	35	/* The index (key byte) of the first non-empty bin */
	36	u16 first;
	37	/* The index (key byte) of the last non-empty bin */
	38	u16 last;
	39	/* The number of occurrences of each specific byte */
	40	u32 size[256];
	41	};
	42
	43	/*
	44	* Sub-tasks are manually managed on a stack, both for performance and to put a logarithmic bound
	45	* on the stack space needed.
	46	*/
	47	struct task {
	48	/* Pointer to the first key to sort. */
	49	sort_key_t *first_key;
	50	/* Pointer to the last key to sort. */
	51	sort_key_t *last_key;
	52	/* The offset into the key at which to continue sorting. */
	53	u16 offset;
	54	/* The number of bytes remaining in the sort keys. */
	55	u16 length;
	56	};
	57
	58	struct radix_sorter {
	59	unsigned int count;
	60	struct histogram bins;
	61	sort_key_t *pile[256];
	62	struct task *end_of_stack;
	63	struct task insertion_list[256];
	64	struct task stack[];
65	};
66
67	/* Compare a segment of two fixed-length keys starting at an offset. */
68	static inline int compare(sort_key_t key1, sort_key_t key2, u16 offset, u16 length)
69	{
70	return memcmp(&key1[offset], &key2[offset], length);
71	}
72
73	/* Insert the next unsorted key into an array of sorted keys. */
74	static inline void insert_key(const struct task task, sort_key_t *next)
75	{
76	/* Pull the unsorted key out, freeing up the array slot. */
77	sort_key_t unsorted = *next;
78
79	/* Compare the key to the preceding sorted entries, shifting down ones that are larger. */
80	while ((--next >= task.first_key) &&
81	(compare(unsorted, next[0], task.offset, task.length) < 0))
82	next[1] = next[0];
83
84	/* Insert the key into the last slot that was cleared, sorting it. */
85	next[1] = unsorted;
86	}
87
88	/*
89	* Sort a range of key segments using an insertion sort. This simple sort is faster than the
90	* 256-way radix sort when the number of keys to sort is small.
91	*/
92	static inline void insertion_sort(const struct task task)
93	{
94	sort_key_t *next;
95
96	for (next = task.first_key + 1; next <= task.last_key; next++)
97	insert_key(task, next);
98	}
99
100	/* Push a sorting task onto a task stack. */
101	static inline void push_task(struct task *stack_pointer, sort_key_t first_key,
102	u32 count, u16 offset, u16 length)
103	{
104	struct task task = (stack_pointer)++;
105
106	task->first_key = first_key;
107	task->last_key = &first_key[count - 1];
108	task->offset = offset;
109	task->length = length;
110	}
111
112	static inline void swap_keys(sort_key_t a, sort_key_t b)
113	{
114	sort_key_t c = *a;
115	a = b;
116	*b = c;
117	}
118
119	/*
120	* Count the number of times each byte value appears in the arrays of keys to sort at the current
121	* offset, keeping track of the number of non-empty bins, and the index of the first and last
122	* non-empty bin.
123	*/
124	static inline void measure_bins(const struct task task, struct histogram *bins)
125	{
126	sort_key_t *key_ptr;
127
128	/*
129	* Subtle invariant: bins->used and bins->size[] are zero because the sorting code clears
130	* it all out as it goes. Even though this structure is re-used, we don't need to pay to
131	* zero it before starting a new tally.
132	*/
133	bins->first = U8_MAX;
134	bins->last = 0;
135
136	for (key_ptr = task.first_key; key_ptr <= task.last_key; key_ptr++) {
137	/* Increment the count for the byte in the key at the current offset. */
138	u8 bin = (*key_ptr)[task.offset];
139	u32 size = ++bins->size[bin];
140
141	/* Track non-empty bins. */
142	if (size == 1) {
143	bins->used += 1;
144	if (bin < bins->first)
145	bins->first = bin;
146
147	if (bin > bins->last)
148	bins->last = bin;
149	}
150	}
151	}
152
153	/*
154	* Convert the bin sizes to pointers to where each pile goes.
155	*
156	* pile[0] = first_key + bin->size[0],
157	* pile[1] = pile[0] + bin->size[1], etc.
158	*
159	* After the keys are moved to the appropriate pile, we'll need to sort each of the piles by the
160	* next radix position. A new task is put on the stack for each pile containing lots of keys, or a
161	* new task is put on the list for each pile containing few keys.
162	*
163	* @stack: pointer the top of the stack
164	* @end_of_stack: the end of the stack
165	* @list: pointer the head of the list
166	* @pile: array for pointers to the end of each pile
167	* @bins: the histogram of the sizes of each pile
168	* @first_key: the first key of the stack
169	* @offset: the next radix position to sort by
170	* @length: the number of bytes remaining in the sort keys
171	*
172	* Return: UDS_SUCCESS or an error code
173	*/
174	static inline int push_bins(struct task *stack, struct task end_of_stack,
175	struct task *list, sort_key_t pile[],
176	struct histogram bins, sort_key_t first_key,
177	u16 offset, u16 length)
178	{
179	sort_key_t *pile_start = first_key;
180	int bin;
181
182	for (bin = bins->first; ; bin++) {
183	u32 size = bins->size[bin];
184
185	/* Skip empty piles. */
186	if (size == 0)
187	continue;
188
189	/* There's no need to sort empty keys. */
190	if (length > 0) {
191	if (size > INSERTION_SORT_THRESHOLD) {
192	if (*stack >= end_of_stack)
193	return UDS_BAD_STATE;
194
195	push_task(stack, pile_start, size, offset, length);
196	} else if (size > 1) {
197	push_task(list, pile_start, size, offset, length);
198	}
199	}
200
201	pile_start += size;
202	pile[bin] = pile_start;
203	if (--bins->used == 0)
204	break;
205	}
206
207	return UDS_SUCCESS;
208	}
209
210	int uds_make_radix_sorter(unsigned int count, struct radix_sorter **sorter)
211	{
212	int result;
213	unsigned int stack_size = count / INSERTION_SORT_THRESHOLD;
214	struct radix_sorter *radix_sorter;
215
216	result = uds_allocate_extended(struct radix_sorter, stack_size, struct task,
217	__func__, &radix_sorter);
218	if (result != UDS_SUCCESS)
219	return result;
220
221	radix_sorter->count = count;
222	radix_sorter->end_of_stack = radix_sorter->stack + stack_size;
223	*sorter = radix_sorter;
224	return UDS_SUCCESS;
225	}
226
227	void uds_free_radix_sorter(struct radix_sorter *sorter)
228	{
229	uds_free(sorter);
230	}
231
232	/*
233	* Sort pointers to fixed-length keys (arrays of bytes) using a radix sort. The sort implementation
234	* is unstable, so the relative ordering of equal keys is not preserved.
235	*/
236	int uds_radix_sort(struct radix_sorter sorter, const unsigned char keys[],
237	unsigned int count, unsigned short length)
238	{
239	struct task start;
240	struct histogram *bins = &sorter->bins;
241	sort_key_t **pile = sorter->pile;
242	struct task *task_stack = sorter->stack;
243
244	/* All zero-length keys are identical and therefore already sorted. */
245	if ((count == 0) \|\| (length == 0))
246	return UDS_SUCCESS;
247
248	/* The initial task is to sort the entire length of all the keys. */
249	start = (struct task) {
250	.first_key = keys,
251	.last_key = &keys[count - 1],
252	.offset = 0,
253	.length = length,
254	};
255
256	if (count <= INSERTION_SORT_THRESHOLD) {
257	insertion_sort(start);
258	return UDS_SUCCESS;
259	}
260
261	if (count > sorter->count)
262	return UDS_INVALID_ARGUMENT;
263
264	/*
265	* Repeatedly consume a sorting task from the stack and process it, pushing new sub-tasks
266	* onto the stack for each radix-sorted pile. When all tasks and sub-tasks have been
267	* processed, the stack will be empty and all the keys in the starting task will be fully
268	* sorted.
269	*/
270	for (*task_stack = start; task_stack >= sorter->stack; task_stack--) {
271	const struct task task = *task_stack;
272	struct task *insertion_task_list;
273	int result;
274	sort_key_t *fence;
275	sort_key_t *end;
276
277	measure_bins(task, bins);
278
279	/*
280	* Now that we know how large each bin is, generate pointers for each of the piles
281	* and push a new task to sort each pile by the next radix byte.
282	*/
283	insertion_task_list = sorter->insertion_list;
284	result = push_bins(&task_stack, sorter->end_of_stack,
285	&insertion_task_list, pile, bins, task.first_key,
286	task.offset + 1, task.length - 1);
287	if (result != UDS_SUCCESS) {
288	memset(bins, 0, sizeof(*bins));
289	return result;
290	}
291
292	/* Now bins->used is zero again. */
293
294	/*
295	* Don't bother processing the last pile: when piles 0..N-1 are all in place, then
296	* pile N must also be in place.
297	*/
298	end = task.last_key - bins->size[bins->last];
299	bins->size[bins->last] = 0;
300
301	for (fence = task.first_key; fence <= end; ) {
302	u8 bin;
303	sort_key_t key = *fence;
304
305	/*
306	* The radix byte of the key tells us which pile it belongs in. Swap it for
307	* an unprocessed item just below that pile, and repeat.
308	*/
309	while (--pile[bin = key[task.offset]] > fence)
310	swap_keys(pile[bin], &key);
311
312	/*
313	* The pile reached the fence. Put the key at the bottom of that pile,
314	* completing it, and advance the fence to the next pile.
315	*/
316	*fence = key;
317	fence += bins->size[bin];
318	bins->size[bin] = 0;
319	}
320
321	/* Now bins->size[] is all zero again. */
322
323	/*
324	* When the number of keys in a task gets small enough, it is faster to use an
325	* insertion sort than to keep subdividing into tiny piles.
326	*/
327	while (--insertion_task_list >= sorter->insertion_list)
328	insertion_sort(*insertion_task_list);
329	}
330
331	return UDS_SUCCESS;
332	}