[linux-2.6-block.git] / lib / sort.c

// SPDX-License-Identifier: GPL-2.0
/*
 * A fast, small, non-recursive O(nlog n) sort for the Linux kernel
 *
 * Jan 23 2005  Matt Mackall <mpm@selenic.com>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/types.h>
#include <linux/export.h>
#include <linux/sort.h>

/**
 * is_aligned - is this pointer & size okay for word-wide copying?
 * @base: pointer to data
 * @size: size of each element
 * @align: required aignment (typically 4 or 8)
 *
 * Returns true if elements can be copied using word loads and stores.
 * The size must be a multiple of the alignment, and the base address must
 * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
 *
 * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
 * to "if ((a | b) & mask)", so we do that by hand.
 */
__attribute_const__ __always_inline
static bool is_aligned(const void *base, size_t size, unsigned char align)
{
	unsigned char lsbits = (unsigned char)size;

	(void)base;
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
	lsbits |= (unsigned char)(uintptr_t)base;
#endif
	return (lsbits & (align - 1)) == 0;
}

/**
 * swap_words_32 - swap two elements in 32-bit chunks
 * @a, @b: pointers to the elements
 * @size: element size (must be a multiple of 4)
 *
 * Exchange the two objects in memory.  This exploits base+index addressing,
 * which basically all CPUs have, to minimize loop overhead computations.
 *
 * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
 * bottom of the loop, even though the zero flag is stil valid from the
 * subtract (since the intervening mov instructions don't alter the flags).
 * Gcc 8.1.0 doesn't have that problem.
 */
static void swap_words_32(void *a, void *b, int size)
{
	size_t n = (unsigned int)size;

	do {
		u32 t = *(u32 *)(a + (n -= 4));
		*(u32 *)(a + n) = *(u32 *)(b + n);
		*(u32 *)(b + n) = t;
	} while (n);
}

/**
 * swap_words_64 - swap two elements in 64-bit chunks
 * @a, @b: pointers to the elements
 * @size: element size (must be a multiple of 8)
 *
 * Exchange the two objects in memory.  This exploits base+index
 * addressing, which basically all CPUs have, to minimize loop overhead
 * computations.
 *
 * We'd like to use 64-bit loads if possible.  If they're not, emulating
 * one requires base+index+4 addressing which x86 has but most other
 * processors do not.  If CONFIG_64BIT, we definitely have 64-bit loads,
 * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
 * x32 ABI).  Are there any cases the kernel needs to worry about?
 */
static void swap_words_64(void *a, void *b, int size)
{
	size_t n = (unsigned int)size;

	do {
#ifdef CONFIG_64BIT
		u64 t = *(u64 *)(a + (n -= 8));
		*(u64 *)(a + n) = *(u64 *)(b + n);
		*(u64 *)(b + n) = t;
#else
		/* Use two 32-bit transfers to avoid base+index+4 addressing */
		u32 t = *(u32 *)(a + (n -= 4));
		*(u32 *)(a + n) = *(u32 *)(b + n);
		*(u32 *)(b + n) = t;

		t = *(u32 *)(a + (n -= 4));
		*(u32 *)(a + n) = *(u32 *)(b + n);
		*(u32 *)(b + n) = t;
#endif
	} while (n);
}

/**
 * swap_bytes - swap two elements a byte at a time
 * @a, @b: pointers to the elements
 * @size: element size
 *
 * This is the fallback if alignment doesn't allow using larger chunks.
 */
static void swap_bytes(void *a, void *b, int size)
{
	size_t n = (unsigned int)size;

	do {
		char t = ((char *)a)[--n];
		((char *)a)[n] = ((char *)b)[n];
		((char *)b)[n] = t;
	} while (n);
}

/**
 * sort - sort an array of elements
 * @base: pointer to data to sort
 * @num: number of elements
 * @size: size of each element
 * @cmp_func: pointer to comparison function
 * @swap_func: pointer to swap function or NULL
 *
 * This function does a heapsort on the given array.  You may provide
 * a swap_func function if you need to do something more than a memory
 * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
 * isn't usually a bottleneck.
 *
 * Sorting time is O(n log n) both on average and worst-case. While
 * qsort is about 20% faster on average, it suffers from exploitable
 * O(n*n) worst-case behavior and extra memory requirements that make
 * it less suitable for kernel use.
 */

void sort(void *base, size_t num, size_t size,
	  int (*cmp_func)(const void *, const void *),
	  void (*swap_func)(void *, void *, int size))
{
	/* pre-scale counters for performance */
	int i = (num/2 - 1) * size, n = num * size, c, r;

	if (!swap_func) {
		if (is_aligned(base, size, 8))
			swap_func = swap_words_64;
		else if (is_aligned(base, size, 4))
			swap_func = swap_words_32;
		else
			swap_func = swap_bytes;
	}

	/* heapify */
	for ( ; i >= 0; i -= size) {
		for (r = i; r * 2 + size < n; r  = c) {
			c = r * 2 + size;
			if (c < n - size &&
					cmp_func(base + c, base + c + size) < 0)
				c += size;
			if (cmp_func(base + r, base + c) >= 0)
				break;
			swap_func(base + r, base + c, size);
		}
	}

	/* sort */
	for (i = n - size; i > 0; i -= size) {
		swap_func(base, base + i, size);
		for (r = 0; r * 2 + size < i; r = c) {
			c = r * 2 + size;
			if (c < i - size &&
					cmp_func(base + c, base + c + size) < 0)
				c += size;
			if (cmp_func(base + r, base + c) >= 0)
				break;
			swap_func(base + r, base + c, size);
		}
	}
}

EXPORT_SYMBOL(sort);
Commit	Line	Data
b2441318	1	// SPDX-License-Identifier: GPL-2.0
1da177e4 LT	2	/*
	3	* A fast, small, non-recursive O(nlog n) sort for the Linux kernel
	4	*
	5	* Jan 23 2005 Matt Mackall <mpm@selenic.com>
	6	*/
	7
c5adae95 KF	8	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
c5adae95 KF	9
42cf8096 RV	10	#include <linux/types.h>
42cf8096 RV	11	#include <linux/export.h>
ecec4cb7	12	#include <linux/sort.h>
1da177e4	13
37d0ec34 GS	14	/**
	15	* is_aligned - is this pointer & size okay for word-wide copying?
	16	* @base: pointer to data
	17	* @size: size of each element
	18	* @align: required aignment (typically 4 or 8)
	19	*
	20	* Returns true if elements can be copied using word loads and stores.
	21	* The size must be a multiple of the alignment, and the base address must
	22	* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
	23	*
	24	* For some reason, gcc doesn't know to optimize "if (a & mask \|\| b & mask)"
	25	* to "if ((a \| b) & mask)", so we do that by hand.
	26	*/
	27	__attribute_const__ __always_inline
	28	static bool is_aligned(const void *base, size_t size, unsigned char align)
ca96ab85	29	{
37d0ec34 GS	30	unsigned char lsbits = (unsigned char)size;
	31
	32	(void)base;
	33	#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
	34	lsbits \|= (unsigned char)(uintptr_t)base;
	35	#endif
	36	return (lsbits & (align - 1)) == 0;
ca96ab85 DW	37	}
ca96ab85 DW	38
37d0ec34 GS	39	/**
	40	* swap_words_32 - swap two elements in 32-bit chunks
	41	* @a, @b: pointers to the elements
	42	* @size: element size (must be a multiple of 4)
	43	*
	44	* Exchange the two objects in memory. This exploits base+index addressing,
	45	* which basically all CPUs have, to minimize loop overhead computations.
	46	*
	47	* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
	48	* bottom of the loop, even though the zero flag is stil valid from the
	49	* subtract (since the intervening mov instructions don't alter the flags).
	50	* Gcc 8.1.0 doesn't have that problem.
	51	*/
	52	static void swap_words_32(void a, void b, int size)
1da177e4	53	{
37d0ec34 GS	54	size_t n = (unsigned int)size;
	55
	56	do {
	57	u32 t = (u32 )(a + (n -= 4));
	58	(u32 )(a + n) = (u32 )(b + n);
	59	(u32 )(b + n) = t;
	60	} while (n);
1da177e4 LT	61	}
1da177e4 LT	62
37d0ec34 GS	63	/**
	64	* swap_words_64 - swap two elements in 64-bit chunks
	65	* @a, @b: pointers to the elements
	66	* @size: element size (must be a multiple of 8)
	67	*
	68	* Exchange the two objects in memory. This exploits base+index
	69	* addressing, which basically all CPUs have, to minimize loop overhead
	70	* computations.
	71	*
	72	* We'd like to use 64-bit loads if possible. If they're not, emulating
	73	* one requires base+index+4 addressing which x86 has but most other
	74	* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
	75	* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
	76	* x32 ABI). Are there any cases the kernel needs to worry about?
	77	*/
	78	static void swap_words_64(void a, void b, int size)
ca96ab85	79	{
37d0ec34 GS	80	size_t n = (unsigned int)size;
	81
	82	do {
	83	#ifdef CONFIG_64BIT
	84	u64 t = (u64 )(a + (n -= 8));
	85	(u64 )(a + n) = (u64 )(b + n);
	86	(u64 )(b + n) = t;
	87	#else
	88	/* Use two 32-bit transfers to avoid base+index+4 addressing */
	89	u32 t = (u32 )(a + (n -= 4));
	90	(u32 )(a + n) = (u32 )(b + n);
	91	(u32 )(b + n) = t;
	92
	93	t = (u32 )(a + (n -= 4));
	94	(u32 )(a + n) = (u32 )(b + n);
	95	(u32 )(b + n) = t;
	96	#endif
	97	} while (n);
ca96ab85 DW	98	}
ca96ab85 DW	99
37d0ec34 GS	100	/**
	101	* swap_bytes - swap two elements a byte at a time
	102	* @a, @b: pointers to the elements
	103	* @size: element size
	104	*
	105	* This is the fallback if alignment doesn't allow using larger chunks.
	106	*/
	107	static void swap_bytes(void a, void b, int size)
1da177e4	108	{
37d0ec34	109	size_t n = (unsigned int)size;
1da177e4 LT	110
1da177e4 LT	111	do {
37d0ec34 GS	112	char t = ((char *)a)[--n];
	113	((char )a)[n] = ((char )b)[n];
	114	((char *)b)[n] = t;
	115	} while (n);
1da177e4 LT	116	}
1da177e4 LT	117
72fd4a35	118	/**
1da177e4 LT	119	* sort - sort an array of elements
	120	* @base: pointer to data to sort
	121	* @num: number of elements
	122	* @size: size of each element
b53907c0 WF	123	* @cmp_func: pointer to comparison function
b53907c0 WF	124	* @swap_func: pointer to swap function or NULL
1da177e4	125	*
37d0ec34 GS	126	* This function does a heapsort on the given array. You may provide
	127	* a swap_func function if you need to do something more than a memory
	128	* copy (e.g. fix up pointers or auxiliary data), but the built-in swap
	129	* isn't usually a bottleneck.
1da177e4 LT	130	*
	131	* Sorting time is O(n log n) both on average and worst-case. While
	132	* qsort is about 20% faster on average, it suffers from exploitable
	133	* O(n*n) worst-case behavior and extra memory requirements that make
	134	* it less suitable for kernel use.
	135	*/
	136
	137	void sort(void *base, size_t num, size_t size,
b53907c0 WF	138	int (cmp_func)(const void , const void *),
b53907c0 WF	139	void (swap_func)(void , void *, int size))
1da177e4 LT	140	{
1da177e4 LT	141	/* pre-scale counters for performance */
d3717bdf	142	int i = (num/2 - 1) * size, n = num * size, c, r;
1da177e4	143
ca96ab85	144	if (!swap_func) {
37d0ec34 GS	145	if (is_aligned(base, size, 8))
	146	swap_func = swap_words_64;
	147	else if (is_aligned(base, size, 4))
	148	swap_func = swap_words_32;
ca96ab85	149	else
37d0ec34	150	swap_func = swap_bytes;
ca96ab85	151	}
1da177e4 LT	152
	153	/* heapify */
	154	for ( ; i >= 0; i -= size) {
d3717bdf	155	for (r = i; r * 2 + size < n; r = c) {
d3717bdf	156	c = r * 2 + size;
b53907c0 WF	157	if (c < n - size &&
b53907c0 WF	158	cmp_func(base + c, base + c + size) < 0)
1da177e4	159	c += size;
b53907c0	160	if (cmp_func(base + r, base + c) >= 0)
1da177e4	161	break;
b53907c0	162	swap_func(base + r, base + c, size);
1da177e4 LT	163	}
	164	}
	165
	166	/* sort */
995e4286	167	for (i = n - size; i > 0; i -= size) {
b53907c0	168	swap_func(base, base + i, size);
d3717bdf	169	for (r = 0; r * 2 + size < i; r = c) {
d3717bdf	170	c = r * 2 + size;
b53907c0 WF	171	if (c < i - size &&
b53907c0 WF	172	cmp_func(base + c, base + c + size) < 0)
1da177e4	173	c += size;
b53907c0	174	if (cmp_func(base + r, base + c) >= 0)
1da177e4	175	break;
b53907c0	176	swap_func(base + r, base + c, size);
1da177e4 LT	177	}
	178	}
	179	}
	180
	181	EXPORT_SYMBOL(sort);