Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _EYTZINGER_H | |
3 | #define _EYTZINGER_H | |
4 | ||
5 | #include <linux/bitops.h> | |
6 | #include <linux/log2.h> | |
7 | ||
ca1e02f7 KO |
8 | #ifdef EYTZINGER_DEBUG |
9 | #define EYTZINGER_BUG_ON(cond) BUG_ON(cond) | |
10 | #else | |
11 | #define EYTZINGER_BUG_ON(cond) | |
12 | #endif | |
1c6fdbd8 KO |
13 | |
14 | /* | |
15 | * Traversal for trees in eytzinger layout - a full binary tree layed out in an | |
ca1e02f7 | 16 | * array. |
1c6fdbd8 | 17 | * |
ca1e02f7 KO |
18 | * Consider using an eytzinger tree any time you would otherwise be doing binary |
19 | * search over an array. Binary search is a worst case scenario for branch | |
20 | * prediction and prefetching, but in an eytzinger tree every node's children | |
21 | * are adjacent in memory, thus we can prefetch children before knowing the | |
22 | * result of the comparison, assuming multiple nodes fit on a cacheline. | |
23 | * | |
24 | * Two variants are provided, for one based indexing and zero based indexing. | |
25 | * | |
26 | * Zero based indexing is more convenient, but one based indexing has better | |
27 | * alignment and thus better performance because each new level of the tree | |
28 | * starts at a power of two, and thus if element 0 was cacheline aligned, each | |
29 | * new level will be as well. | |
1c6fdbd8 KO |
30 | */ |
31 | ||
32 | static inline unsigned eytzinger1_child(unsigned i, unsigned child) | |
33 | { | |
ca1e02f7 | 34 | EYTZINGER_BUG_ON(child > 1); |
1c6fdbd8 KO |
35 | |
36 | return (i << 1) + child; | |
37 | } | |
38 | ||
39 | static inline unsigned eytzinger1_left_child(unsigned i) | |
40 | { | |
41 | return eytzinger1_child(i, 0); | |
42 | } | |
43 | ||
44 | static inline unsigned eytzinger1_right_child(unsigned i) | |
45 | { | |
46 | return eytzinger1_child(i, 1); | |
47 | } | |
48 | ||
49 | static inline unsigned eytzinger1_first(unsigned size) | |
50 | { | |
8ed58789 | 51 | return size ? rounddown_pow_of_two(size) : 0; |
1c6fdbd8 KO |
52 | } |
53 | ||
54 | static inline unsigned eytzinger1_last(unsigned size) | |
55 | { | |
72492d55 | 56 | return rounddown_pow_of_two(size + 1) - 1; |
1c6fdbd8 KO |
57 | } |
58 | ||
59 | /* | |
60 | * eytzinger1_next() and eytzinger1_prev() have the nice properties that | |
61 | * | |
62 | * eytzinger1_next(0) == eytzinger1_first()) | |
63 | * eytzinger1_prev(0) == eytzinger1_last()) | |
64 | * | |
65 | * eytzinger1_prev(eytzinger1_first()) == 0 | |
66 | * eytzinger1_next(eytzinger1_last()) == 0 | |
67 | */ | |
68 | ||
69 | static inline unsigned eytzinger1_next(unsigned i, unsigned size) | |
70 | { | |
ca1e02f7 | 71 | EYTZINGER_BUG_ON(i > size); |
1c6fdbd8 | 72 | |
72492d55 | 73 | if (eytzinger1_right_child(i) <= size) { |
1c6fdbd8 KO |
74 | i = eytzinger1_right_child(i); |
75 | ||
72492d55 KO |
76 | i <<= __fls(size + 1) - __fls(i); |
77 | i >>= i > size; | |
1c6fdbd8 KO |
78 | } else { |
79 | i >>= ffz(i) + 1; | |
80 | } | |
81 | ||
82 | return i; | |
83 | } | |
84 | ||
85 | static inline unsigned eytzinger1_prev(unsigned i, unsigned size) | |
86 | { | |
ca1e02f7 | 87 | EYTZINGER_BUG_ON(i > size); |
1c6fdbd8 | 88 | |
72492d55 | 89 | if (eytzinger1_left_child(i) <= size) { |
1c6fdbd8 KO |
90 | i = eytzinger1_left_child(i) + 1; |
91 | ||
72492d55 | 92 | i <<= __fls(size + 1) - __fls(i); |
1c6fdbd8 | 93 | i -= 1; |
72492d55 | 94 | i >>= i > size; |
1c6fdbd8 KO |
95 | } else { |
96 | i >>= __ffs(i) + 1; | |
97 | } | |
98 | ||
99 | return i; | |
100 | } | |
101 | ||
102 | static inline unsigned eytzinger1_extra(unsigned size) | |
103 | { | |
8ed58789 KO |
104 | return size |
105 | ? (size + 1 - rounddown_pow_of_two(size)) << 1 | |
106 | : 0; | |
1c6fdbd8 KO |
107 | } |
108 | ||
109 | static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, | |
110 | unsigned extra) | |
111 | { | |
112 | unsigned b = __fls(i); | |
72492d55 | 113 | unsigned shift = __fls(size) - b; |
1c6fdbd8 KO |
114 | int s; |
115 | ||
ca1e02f7 | 116 | EYTZINGER_BUG_ON(!i || i > size); |
1c6fdbd8 KO |
117 | |
118 | i ^= 1U << b; | |
119 | i <<= 1; | |
120 | i |= 1; | |
121 | i <<= shift; | |
122 | ||
123 | /* | |
124 | * sign bit trick: | |
125 | * | |
126 | * if (i > extra) | |
127 | * i -= (i - extra) >> 1; | |
128 | */ | |
129 | s = extra - i; | |
130 | i += (s >> 1) & (s >> 31); | |
131 | ||
132 | return i; | |
133 | } | |
134 | ||
135 | static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, | |
136 | unsigned extra) | |
137 | { | |
138 | unsigned shift; | |
139 | int s; | |
140 | ||
ca1e02f7 | 141 | EYTZINGER_BUG_ON(!i || i > size); |
1c6fdbd8 KO |
142 | |
143 | /* | |
144 | * sign bit trick: | |
145 | * | |
146 | * if (i > extra) | |
147 | * i += i - extra; | |
148 | */ | |
149 | s = extra - i; | |
150 | i -= s & (s >> 31); | |
151 | ||
152 | shift = __ffs(i); | |
153 | ||
154 | i >>= shift + 1; | |
72492d55 | 155 | i |= 1U << (__fls(size) - shift); |
1c6fdbd8 KO |
156 | |
157 | return i; | |
158 | } | |
159 | ||
160 | static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size) | |
161 | { | |
162 | return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size)); | |
163 | } | |
164 | ||
165 | static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) | |
166 | { | |
167 | return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size)); | |
168 | } | |
169 | ||
170 | #define eytzinger1_for_each(_i, _size) \ | |
3fe8a186 | 171 | for (unsigned (_i) = eytzinger1_first((_size)); \ |
1c6fdbd8 KO |
172 | (_i) != 0; \ |
173 | (_i) = eytzinger1_next((_i), (_size))) | |
174 | ||
175 | /* Zero based indexing version: */ | |
176 | ||
177 | static inline unsigned eytzinger0_child(unsigned i, unsigned child) | |
178 | { | |
ca1e02f7 | 179 | EYTZINGER_BUG_ON(child > 1); |
1c6fdbd8 KO |
180 | |
181 | return (i << 1) + 1 + child; | |
182 | } | |
183 | ||
184 | static inline unsigned eytzinger0_left_child(unsigned i) | |
185 | { | |
186 | return eytzinger0_child(i, 0); | |
187 | } | |
188 | ||
189 | static inline unsigned eytzinger0_right_child(unsigned i) | |
190 | { | |
191 | return eytzinger0_child(i, 1); | |
192 | } | |
193 | ||
194 | static inline unsigned eytzinger0_first(unsigned size) | |
195 | { | |
72492d55 | 196 | return eytzinger1_first(size) - 1; |
1c6fdbd8 KO |
197 | } |
198 | ||
199 | static inline unsigned eytzinger0_last(unsigned size) | |
200 | { | |
72492d55 | 201 | return eytzinger1_last(size) - 1; |
1c6fdbd8 KO |
202 | } |
203 | ||
204 | static inline unsigned eytzinger0_next(unsigned i, unsigned size) | |
205 | { | |
72492d55 | 206 | return eytzinger1_next(i + 1, size) - 1; |
1c6fdbd8 KO |
207 | } |
208 | ||
209 | static inline unsigned eytzinger0_prev(unsigned i, unsigned size) | |
210 | { | |
72492d55 | 211 | return eytzinger1_prev(i + 1, size) - 1; |
1c6fdbd8 KO |
212 | } |
213 | ||
214 | static inline unsigned eytzinger0_extra(unsigned size) | |
215 | { | |
72492d55 | 216 | return eytzinger1_extra(size); |
1c6fdbd8 KO |
217 | } |
218 | ||
219 | static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, | |
220 | unsigned extra) | |
221 | { | |
72492d55 | 222 | return __eytzinger1_to_inorder(i + 1, size, extra) - 1; |
1c6fdbd8 KO |
223 | } |
224 | ||
225 | static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, | |
226 | unsigned extra) | |
227 | { | |
72492d55 | 228 | return __inorder_to_eytzinger1(i + 1, size, extra) - 1; |
1c6fdbd8 KO |
229 | } |
230 | ||
231 | static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) | |
232 | { | |
233 | return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size)); | |
234 | } | |
235 | ||
236 | static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) | |
237 | { | |
238 | return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size)); | |
239 | } | |
240 | ||
241 | #define eytzinger0_for_each(_i, _size) \ | |
3fe8a186 | 242 | for (unsigned (_i) = eytzinger0_first((_size)); \ |
1c6fdbd8 KO |
243 | (_i) != -1; \ |
244 | (_i) = eytzinger0_next((_i), (_size))) | |
245 | ||
1c6fdbd8 | 246 | /* return greatest node <= @search, or -1 if not found */ |
9c432404 KO |
247 | static inline int eytzinger0_find_le(void *base, size_t nr, size_t size, |
248 | cmp_func_t cmp, const void *search) | |
1c6fdbd8 KO |
249 | { |
250 | unsigned i, n = 0; | |
251 | ||
252 | if (!nr) | |
253 | return -1; | |
254 | ||
255 | do { | |
256 | i = n; | |
ca1e02f7 | 257 | n = eytzinger0_child(i, cmp(base + i * size, search) <= 0); |
1c6fdbd8 KO |
258 | } while (n < nr); |
259 | ||
260 | if (n & 1) { | |
9c432404 KO |
261 | /* |
262 | * @i was greater than @search, return previous node: | |
263 | * | |
264 | * if @i was leftmost/smallest element, | |
265 | * eytzinger0_prev(eytzinger0_first())) returns -1, as expected | |
266 | */ | |
1c6fdbd8 KO |
267 | return eytzinger0_prev(i, nr); |
268 | } else { | |
269 | return i; | |
270 | } | |
271 | } | |
272 | ||
9c432404 KO |
273 | static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size, |
274 | cmp_func_t cmp, const void *search) | |
ca1e02f7 KO |
275 | { |
276 | ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search); | |
9c432404 KO |
277 | |
278 | /* | |
279 | * if eytitzinger0_find_le() returned -1 - no element was <= search - we | |
280 | * want to return the first element; next/prev identities mean this work | |
281 | * as expected | |
282 | * | |
283 | * similarly if find_le() returns last element, we should return -1; | |
284 | * identities mean this all works out: | |
285 | */ | |
286 | return eytzinger0_next(idx, nr); | |
ca1e02f7 KO |
287 | } |
288 | ||
820b9efe KO |
289 | static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size, |
290 | cmp_func_t cmp, const void *search) | |
291 | { | |
292 | ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search); | |
293 | ||
294 | if (idx < nr && !cmp(base + idx * size, search)) | |
295 | return idx; | |
296 | ||
297 | return eytzinger0_next(idx, nr); | |
298 | } | |
299 | ||
7ef2a73a KO |
300 | #define eytzinger0_find(base, nr, size, _cmp, search) \ |
301 | ({ \ | |
169de419 KO |
302 | void *_base = (base); \ |
303 | const void *_search = (search); \ | |
304 | size_t _nr = (nr); \ | |
305 | size_t _size = (size); \ | |
306 | size_t _i = 0; \ | |
7ef2a73a KO |
307 | int _res; \ |
308 | \ | |
309 | while (_i < _nr && \ | |
ca1e02f7 | 310 | (_res = _cmp(_search, _base + _i * _size))) \ |
7ef2a73a KO |
311 | _i = eytzinger0_child(_i, _res > 0); \ |
312 | _i; \ | |
313 | }) | |
1c6fdbd8 | 314 | |
ca1e02f7 KO |
315 | void eytzinger0_sort_r(void *, size_t, size_t, |
316 | cmp_r_func_t, swap_r_func_t, const void *); | |
317 | void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); | |
1c6fdbd8 KO |
318 | |
319 | #endif /* _EYTZINGER_H */ |