Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _EYTZINGER_H | |
3 | #define _EYTZINGER_H | |
4 | ||
5 | #include <linux/bitops.h> | |
6 | #include <linux/log2.h> | |
7 | ||
ca1e02f7 | 8 | #ifdef EYTZINGER_DEBUG |
d54b82ec | 9 | #include <linux/bug.h> |
ca1e02f7 KO |
10 | #define EYTZINGER_BUG_ON(cond) BUG_ON(cond) |
11 | #else | |
12 | #define EYTZINGER_BUG_ON(cond) | |
13 | #endif | |
1c6fdbd8 KO |
14 | |
15 | /* | |
16 | * Traversal for trees in eytzinger layout - a full binary tree layed out in an | |
ca1e02f7 | 17 | * array. |
1c6fdbd8 | 18 | * |
ca1e02f7 KO |
19 | * Consider using an eytzinger tree any time you would otherwise be doing binary |
20 | * search over an array. Binary search is a worst case scenario for branch | |
21 | * prediction and prefetching, but in an eytzinger tree every node's children | |
22 | * are adjacent in memory, thus we can prefetch children before knowing the | |
23 | * result of the comparison, assuming multiple nodes fit on a cacheline. | |
24 | * | |
25 | * Two variants are provided, for one based indexing and zero based indexing. | |
26 | * | |
27 | * Zero based indexing is more convenient, but one based indexing has better | |
28 | * alignment and thus better performance because each new level of the tree | |
29 | * starts at a power of two, and thus if element 0 was cacheline aligned, each | |
30 | * new level will be as well. | |
1c6fdbd8 KO |
31 | */ |
32 | ||
33 | static inline unsigned eytzinger1_child(unsigned i, unsigned child) | |
34 | { | |
ca1e02f7 | 35 | EYTZINGER_BUG_ON(child > 1); |
1c6fdbd8 KO |
36 | |
37 | return (i << 1) + child; | |
38 | } | |
39 | ||
40 | static inline unsigned eytzinger1_left_child(unsigned i) | |
41 | { | |
42 | return eytzinger1_child(i, 0); | |
43 | } | |
44 | ||
45 | static inline unsigned eytzinger1_right_child(unsigned i) | |
46 | { | |
47 | return eytzinger1_child(i, 1); | |
48 | } | |
49 | ||
50 | static inline unsigned eytzinger1_first(unsigned size) | |
51 | { | |
8ed58789 | 52 | return size ? rounddown_pow_of_two(size) : 0; |
1c6fdbd8 KO |
53 | } |
54 | ||
55 | static inline unsigned eytzinger1_last(unsigned size) | |
56 | { | |
72492d55 | 57 | return rounddown_pow_of_two(size + 1) - 1; |
1c6fdbd8 KO |
58 | } |
59 | ||
1c6fdbd8 KO |
60 | static inline unsigned eytzinger1_next(unsigned i, unsigned size) |
61 | { | |
f2761465 | 62 | EYTZINGER_BUG_ON(i == 0 || i > size); |
1c6fdbd8 | 63 | |
72492d55 | 64 | if (eytzinger1_right_child(i) <= size) { |
1c6fdbd8 KO |
65 | i = eytzinger1_right_child(i); |
66 | ||
f2761465 | 67 | i <<= __fls(size) - __fls(i); |
72492d55 | 68 | i >>= i > size; |
1c6fdbd8 KO |
69 | } else { |
70 | i >>= ffz(i) + 1; | |
71 | } | |
72 | ||
73 | return i; | |
74 | } | |
75 | ||
76 | static inline unsigned eytzinger1_prev(unsigned i, unsigned size) | |
77 | { | |
f2761465 | 78 | EYTZINGER_BUG_ON(i == 0 || i > size); |
1c6fdbd8 | 79 | |
72492d55 | 80 | if (eytzinger1_left_child(i) <= size) { |
1c6fdbd8 KO |
81 | i = eytzinger1_left_child(i) + 1; |
82 | ||
f2761465 | 83 | i <<= __fls(size) - __fls(i); |
1c6fdbd8 | 84 | i -= 1; |
72492d55 | 85 | i >>= i > size; |
1c6fdbd8 KO |
86 | } else { |
87 | i >>= __ffs(i) + 1; | |
88 | } | |
89 | ||
90 | return i; | |
91 | } | |
92 | ||
93 | static inline unsigned eytzinger1_extra(unsigned size) | |
94 | { | |
8ed58789 KO |
95 | return size |
96 | ? (size + 1 - rounddown_pow_of_two(size)) << 1 | |
97 | : 0; | |
1c6fdbd8 KO |
98 | } |
99 | ||
100 | static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, | |
101 | unsigned extra) | |
102 | { | |
103 | unsigned b = __fls(i); | |
72492d55 | 104 | unsigned shift = __fls(size) - b; |
1c6fdbd8 KO |
105 | int s; |
106 | ||
ca1e02f7 | 107 | EYTZINGER_BUG_ON(!i || i > size); |
1c6fdbd8 KO |
108 | |
109 | i ^= 1U << b; | |
110 | i <<= 1; | |
111 | i |= 1; | |
112 | i <<= shift; | |
113 | ||
114 | /* | |
115 | * sign bit trick: | |
116 | * | |
117 | * if (i > extra) | |
118 | * i -= (i - extra) >> 1; | |
119 | */ | |
120 | s = extra - i; | |
121 | i += (s >> 1) & (s >> 31); | |
122 | ||
123 | return i; | |
124 | } | |
125 | ||
126 | static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, | |
127 | unsigned extra) | |
128 | { | |
129 | unsigned shift; | |
130 | int s; | |
131 | ||
ca1e02f7 | 132 | EYTZINGER_BUG_ON(!i || i > size); |
1c6fdbd8 KO |
133 | |
134 | /* | |
135 | * sign bit trick: | |
136 | * | |
137 | * if (i > extra) | |
138 | * i += i - extra; | |
139 | */ | |
140 | s = extra - i; | |
141 | i -= s & (s >> 31); | |
142 | ||
143 | shift = __ffs(i); | |
144 | ||
145 | i >>= shift + 1; | |
72492d55 | 146 | i |= 1U << (__fls(size) - shift); |
1c6fdbd8 KO |
147 | |
148 | return i; | |
149 | } | |
150 | ||
151 | static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size) | |
152 | { | |
153 | return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size)); | |
154 | } | |
155 | ||
156 | static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) | |
157 | { | |
158 | return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size)); | |
159 | } | |
160 | ||
161 | #define eytzinger1_for_each(_i, _size) \ | |
3fe8a186 | 162 | for (unsigned (_i) = eytzinger1_first((_size)); \ |
1c6fdbd8 KO |
163 | (_i) != 0; \ |
164 | (_i) = eytzinger1_next((_i), (_size))) | |
165 | ||
166 | /* Zero based indexing version: */ | |
167 | ||
168 | static inline unsigned eytzinger0_child(unsigned i, unsigned child) | |
169 | { | |
ca1e02f7 | 170 | EYTZINGER_BUG_ON(child > 1); |
1c6fdbd8 KO |
171 | |
172 | return (i << 1) + 1 + child; | |
173 | } | |
174 | ||
175 | static inline unsigned eytzinger0_left_child(unsigned i) | |
176 | { | |
177 | return eytzinger0_child(i, 0); | |
178 | } | |
179 | ||
180 | static inline unsigned eytzinger0_right_child(unsigned i) | |
181 | { | |
182 | return eytzinger0_child(i, 1); | |
183 | } | |
184 | ||
185 | static inline unsigned eytzinger0_first(unsigned size) | |
186 | { | |
72492d55 | 187 | return eytzinger1_first(size) - 1; |
1c6fdbd8 KO |
188 | } |
189 | ||
190 | static inline unsigned eytzinger0_last(unsigned size) | |
191 | { | |
72492d55 | 192 | return eytzinger1_last(size) - 1; |
1c6fdbd8 KO |
193 | } |
194 | ||
195 | static inline unsigned eytzinger0_next(unsigned i, unsigned size) | |
196 | { | |
72492d55 | 197 | return eytzinger1_next(i + 1, size) - 1; |
1c6fdbd8 KO |
198 | } |
199 | ||
200 | static inline unsigned eytzinger0_prev(unsigned i, unsigned size) | |
201 | { | |
72492d55 | 202 | return eytzinger1_prev(i + 1, size) - 1; |
1c6fdbd8 KO |
203 | } |
204 | ||
205 | static inline unsigned eytzinger0_extra(unsigned size) | |
206 | { | |
72492d55 | 207 | return eytzinger1_extra(size); |
1c6fdbd8 KO |
208 | } |
209 | ||
210 | static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, | |
211 | unsigned extra) | |
212 | { | |
72492d55 | 213 | return __eytzinger1_to_inorder(i + 1, size, extra) - 1; |
1c6fdbd8 KO |
214 | } |
215 | ||
216 | static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, | |
217 | unsigned extra) | |
218 | { | |
72492d55 | 219 | return __inorder_to_eytzinger1(i + 1, size, extra) - 1; |
1c6fdbd8 KO |
220 | } |
221 | ||
222 | static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) | |
223 | { | |
224 | return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size)); | |
225 | } | |
226 | ||
227 | static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) | |
228 | { | |
229 | return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size)); | |
230 | } | |
231 | ||
232 | #define eytzinger0_for_each(_i, _size) \ | |
3fe8a186 | 233 | for (unsigned (_i) = eytzinger0_first((_size)); \ |
1c6fdbd8 KO |
234 | (_i) != -1; \ |
235 | (_i) = eytzinger0_next((_i), (_size))) | |
236 | ||
dc5ceaaa AG |
237 | #define eytzinger0_for_each_prev(_i, _size) \ |
238 | for (unsigned (_i) = eytzinger0_last((_size)); \ | |
239 | (_i) != -1; \ | |
240 | (_i) = eytzinger0_prev((_i), (_size))) | |
241 | ||
1c6fdbd8 | 242 | /* return greatest node <= @search, or -1 if not found */ |
9c432404 KO |
243 | static inline int eytzinger0_find_le(void *base, size_t nr, size_t size, |
244 | cmp_func_t cmp, const void *search) | |
1c6fdbd8 | 245 | { |
d148d804 | 246 | void *base1 = base - size; |
d384dada AG |
247 | unsigned n = 1; |
248 | ||
249 | while (n <= nr) | |
250 | n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0); | |
251 | n >>= __ffs(n) + 1; | |
252 | return n - 1; | |
1c6fdbd8 KO |
253 | } |
254 | ||
2182f295 | 255 | /* return smallest node > @search, or -1 if not found */ |
9c432404 KO |
256 | static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size, |
257 | cmp_func_t cmp, const void *search) | |
ca1e02f7 | 258 | { |
2182f295 AG |
259 | void *base1 = base - size; |
260 | unsigned n = 1; | |
9c432404 | 261 | |
2182f295 AG |
262 | while (n <= nr) |
263 | n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0); | |
264 | n >>= __ffs(n + 1) + 1; | |
265 | return n - 1; | |
ca1e02f7 KO |
266 | } |
267 | ||
11223d0e | 268 | /* return smallest node >= @search, or -1 if not found */ |
820b9efe KO |
269 | static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size, |
270 | cmp_func_t cmp, const void *search) | |
271 | { | |
11223d0e AG |
272 | void *base1 = base - size; |
273 | unsigned n = 1; | |
820b9efe | 274 | |
11223d0e AG |
275 | while (n <= nr) |
276 | n = eytzinger1_child(n, cmp(base1 + n * size, search) < 0); | |
277 | n >>= __ffs(n + 1) + 1; | |
278 | return n - 1; | |
820b9efe KO |
279 | } |
280 | ||
7ef2a73a KO |
281 | #define eytzinger0_find(base, nr, size, _cmp, search) \ |
282 | ({ \ | |
3849bcab AG |
283 | size_t _size = (size); \ |
284 | void *_base1 = (void *)(base) - _size; \ | |
169de419 KO |
285 | const void *_search = (search); \ |
286 | size_t _nr = (nr); \ | |
3849bcab | 287 | size_t _i = 1; \ |
7ef2a73a KO |
288 | int _res; \ |
289 | \ | |
3849bcab AG |
290 | while (_i <= _nr && \ |
291 | (_res = _cmp(_search, _base1 + _i * _size))) \ | |
292 | _i = eytzinger1_child(_i, _res > 0); \ | |
293 | _i - 1; \ | |
7ef2a73a | 294 | }) |
1c6fdbd8 | 295 | |
ca1e02f7 KO |
296 | void eytzinger0_sort_r(void *, size_t, size_t, |
297 | cmp_r_func_t, swap_r_func_t, const void *); | |
298 | void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); | |
1c6fdbd8 KO |
299 | |
300 | #endif /* _EYTZINGER_H */ |