Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
4 | #include "bkey.h" | |
5 | #include "bkey_methods.h" | |
6 | #include "bset.h" | |
7 | #include "util.h" | |
8 | ||
9 | #undef EBUG_ON | |
10 | ||
11 | #ifdef DEBUG_BKEYS | |
12 | #define EBUG_ON(cond) BUG_ON(cond) | |
13 | #else | |
14 | #define EBUG_ON(cond) | |
15 | #endif | |
16 | ||
17 | const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT; | |
18 | ||
19 | struct bkey __bch2_bkey_unpack_key(const struct bkey_format *, | |
20 | const struct bkey_packed *); | |
21 | ||
22 | void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits) | |
23 | { | |
24 | unsigned bit = high_bit_offset, done = 0; | |
25 | ||
26 | while (1) { | |
27 | while (bit < 64) { | |
28 | if (done && !(done % 8)) | |
29 | *out++ = ' '; | |
30 | *out++ = *p & (1ULL << (63 - bit)) ? '1' : '0'; | |
31 | bit++; | |
32 | done++; | |
33 | if (done == nr_bits) { | |
34 | *out++ = '\0'; | |
35 | return; | |
36 | } | |
37 | } | |
38 | ||
39 | p = next_word(p); | |
40 | bit = 0; | |
41 | } | |
42 | } | |
43 | ||
44 | #ifdef CONFIG_BCACHEFS_DEBUG | |
45 | ||
46 | static void bch2_bkey_pack_verify(const struct bkey_packed *packed, | |
47 | const struct bkey *unpacked, | |
48 | const struct bkey_format *format) | |
49 | { | |
50 | struct bkey tmp; | |
51 | ||
52 | BUG_ON(bkeyp_val_u64s(format, packed) != | |
53 | bkey_val_u64s(unpacked)); | |
54 | ||
55 | BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed)); | |
56 | ||
57 | tmp = __bch2_bkey_unpack_key(format, packed); | |
58 | ||
59 | if (memcmp(&tmp, unpacked, sizeof(struct bkey))) { | |
60 | char buf1[160], buf2[160]; | |
61 | char buf3[160], buf4[160]; | |
62 | ||
319f9ac3 KO |
63 | bch2_bkey_to_text(&PBUF(buf1), unpacked); |
64 | bch2_bkey_to_text(&PBUF(buf2), &tmp); | |
1c6fdbd8 KO |
65 | bch2_to_binary(buf3, (void *) unpacked, 80); |
66 | bch2_to_binary(buf4, high_word(format, packed), 80); | |
67 | ||
68 | panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n", | |
69 | format->key_u64s, | |
70 | format->bits_per_field[0], | |
71 | format->bits_per_field[1], | |
72 | format->bits_per_field[2], | |
73 | format->bits_per_field[3], | |
74 | format->bits_per_field[4], | |
75 | buf1, buf2, buf3, buf4); | |
76 | } | |
77 | } | |
78 | ||
79 | #else | |
80 | static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed, | |
81 | const struct bkey *unpacked, | |
82 | const struct bkey_format *format) {} | |
83 | #endif | |
84 | ||
85 | struct pack_state { | |
86 | const struct bkey_format *format; | |
87 | unsigned bits; /* bits remaining in current word */ | |
88 | u64 w; /* current word */ | |
89 | u64 *p; /* pointer to next word */ | |
90 | }; | |
91 | ||
92 | __always_inline | |
93 | static struct pack_state pack_state_init(const struct bkey_format *format, | |
94 | struct bkey_packed *k) | |
95 | { | |
96 | u64 *p = high_word(format, k); | |
97 | ||
98 | return (struct pack_state) { | |
99 | .format = format, | |
100 | .bits = 64 - high_bit_offset, | |
101 | .w = 0, | |
102 | .p = p, | |
103 | }; | |
104 | } | |
105 | ||
106 | __always_inline | |
107 | static void pack_state_finish(struct pack_state *state, | |
108 | struct bkey_packed *k) | |
109 | { | |
110 | EBUG_ON(state->p < k->_data); | |
111 | EBUG_ON(state->p >= k->_data + state->format->key_u64s); | |
112 | ||
113 | *state->p = state->w; | |
114 | } | |
115 | ||
116 | struct unpack_state { | |
117 | const struct bkey_format *format; | |
118 | unsigned bits; /* bits remaining in current word */ | |
119 | u64 w; /* current word */ | |
120 | const u64 *p; /* pointer to next word */ | |
121 | }; | |
122 | ||
123 | __always_inline | |
124 | static struct unpack_state unpack_state_init(const struct bkey_format *format, | |
125 | const struct bkey_packed *k) | |
126 | { | |
127 | const u64 *p = high_word(format, k); | |
128 | ||
129 | return (struct unpack_state) { | |
130 | .format = format, | |
131 | .bits = 64 - high_bit_offset, | |
132 | .w = *p << high_bit_offset, | |
133 | .p = p, | |
134 | }; | |
135 | } | |
136 | ||
137 | __always_inline | |
138 | static u64 get_inc_field(struct unpack_state *state, unsigned field) | |
139 | { | |
140 | unsigned bits = state->format->bits_per_field[field]; | |
141 | u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]); | |
142 | ||
143 | if (bits >= state->bits) { | |
144 | v = state->w >> (64 - bits); | |
145 | bits -= state->bits; | |
146 | ||
147 | state->p = next_word(state->p); | |
148 | state->w = *state->p; | |
149 | state->bits = 64; | |
150 | } | |
151 | ||
152 | /* avoid shift by 64 if bits is 0 - bits is never 64 here: */ | |
153 | v |= (state->w >> 1) >> (63 - bits); | |
154 | state->w <<= bits; | |
155 | state->bits -= bits; | |
156 | ||
157 | return v + offset; | |
158 | } | |
159 | ||
160 | __always_inline | |
161 | static bool set_inc_field(struct pack_state *state, unsigned field, u64 v) | |
162 | { | |
163 | unsigned bits = state->format->bits_per_field[field]; | |
164 | u64 offset = le64_to_cpu(state->format->field_offset[field]); | |
165 | ||
166 | if (v < offset) | |
167 | return false; | |
168 | ||
169 | v -= offset; | |
170 | ||
171 | if (fls64(v) > bits) | |
172 | return false; | |
173 | ||
174 | if (bits > state->bits) { | |
175 | bits -= state->bits; | |
176 | /* avoid shift by 64 if bits is 0 - bits is never 64 here: */ | |
177 | state->w |= (v >> 1) >> (bits - 1); | |
178 | ||
179 | *state->p = state->w; | |
180 | state->p = next_word(state->p); | |
181 | state->w = 0; | |
182 | state->bits = 64; | |
183 | } | |
184 | ||
185 | state->bits -= bits; | |
186 | state->w |= v << state->bits; | |
187 | ||
188 | return true; | |
189 | } | |
190 | ||
191 | /* | |
192 | * Note: does NOT set out->format (we don't know what it should be here!) | |
193 | * | |
194 | * Also: doesn't work on extents - it doesn't preserve the invariant that | |
195 | * if k is packed bkey_start_pos(k) will successfully pack | |
196 | */ | |
197 | static bool bch2_bkey_transform_key(const struct bkey_format *out_f, | |
198 | struct bkey_packed *out, | |
199 | const struct bkey_format *in_f, | |
200 | const struct bkey_packed *in) | |
201 | { | |
202 | struct pack_state out_s = pack_state_init(out_f, out); | |
203 | struct unpack_state in_s = unpack_state_init(in_f, in); | |
204 | u64 *w = out->_data; | |
205 | unsigned i; | |
206 | ||
207 | *w = 0; | |
208 | ||
209 | for (i = 0; i < BKEY_NR_FIELDS; i++) | |
210 | if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i))) | |
211 | return false; | |
212 | ||
213 | /* Can't happen because the val would be too big to unpack: */ | |
214 | EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX); | |
215 | ||
216 | pack_state_finish(&out_s, out); | |
217 | out->u64s = out_f->key_u64s + in->u64s - in_f->key_u64s; | |
218 | out->needs_whiteout = in->needs_whiteout; | |
219 | out->type = in->type; | |
220 | ||
221 | return true; | |
222 | } | |
223 | ||
224 | bool bch2_bkey_transform(const struct bkey_format *out_f, | |
225 | struct bkey_packed *out, | |
226 | const struct bkey_format *in_f, | |
227 | const struct bkey_packed *in) | |
228 | { | |
229 | if (!bch2_bkey_transform_key(out_f, out, in_f, in)) | |
230 | return false; | |
231 | ||
232 | memcpy_u64s((u64 *) out + out_f->key_u64s, | |
233 | (u64 *) in + in_f->key_u64s, | |
234 | (in->u64s - in_f->key_u64s)); | |
235 | return true; | |
236 | } | |
237 | ||
238 | #define bkey_fields() \ | |
239 | x(BKEY_FIELD_INODE, p.inode) \ | |
240 | x(BKEY_FIELD_OFFSET, p.offset) \ | |
241 | x(BKEY_FIELD_SNAPSHOT, p.snapshot) \ | |
242 | x(BKEY_FIELD_SIZE, size) \ | |
243 | x(BKEY_FIELD_VERSION_HI, version.hi) \ | |
244 | x(BKEY_FIELD_VERSION_LO, version.lo) | |
245 | ||
246 | struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format, | |
247 | const struct bkey_packed *in) | |
248 | { | |
249 | struct unpack_state state = unpack_state_init(format, in); | |
250 | struct bkey out; | |
251 | ||
252 | EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); | |
253 | EBUG_ON(in->u64s < format->key_u64s); | |
254 | EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE); | |
255 | EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX); | |
256 | ||
257 | out.u64s = BKEY_U64s + in->u64s - format->key_u64s; | |
258 | out.format = KEY_FORMAT_CURRENT; | |
259 | out.needs_whiteout = in->needs_whiteout; | |
260 | out.type = in->type; | |
261 | out.pad[0] = 0; | |
262 | ||
263 | #define x(id, field) out.field = get_inc_field(&state, id); | |
264 | bkey_fields() | |
265 | #undef x | |
266 | ||
267 | return out; | |
268 | } | |
269 | ||
270 | #ifndef HAVE_BCACHEFS_COMPILED_UNPACK | |
271 | struct bpos __bkey_unpack_pos(const struct bkey_format *format, | |
272 | const struct bkey_packed *in) | |
273 | { | |
274 | struct unpack_state state = unpack_state_init(format, in); | |
275 | struct bpos out; | |
276 | ||
277 | EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); | |
278 | EBUG_ON(in->u64s < format->key_u64s); | |
279 | EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE); | |
280 | ||
281 | out.inode = get_inc_field(&state, BKEY_FIELD_INODE); | |
282 | out.offset = get_inc_field(&state, BKEY_FIELD_OFFSET); | |
283 | out.snapshot = get_inc_field(&state, BKEY_FIELD_SNAPSHOT); | |
284 | ||
285 | return out; | |
286 | } | |
287 | #endif | |
288 | ||
289 | /** | |
290 | * bch2_bkey_pack_key -- pack just the key, not the value | |
291 | */ | |
292 | bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in, | |
293 | const struct bkey_format *format) | |
294 | { | |
295 | struct pack_state state = pack_state_init(format, out); | |
296 | u64 *w = out->_data; | |
297 | ||
298 | EBUG_ON((void *) in == (void *) out); | |
299 | EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); | |
300 | EBUG_ON(in->format != KEY_FORMAT_CURRENT); | |
301 | ||
302 | *w = 0; | |
303 | ||
304 | #define x(id, field) if (!set_inc_field(&state, id, in->field)) return false; | |
305 | bkey_fields() | |
306 | #undef x | |
307 | ||
308 | /* | |
309 | * Extents - we have to guarantee that if an extent is packed, a trimmed | |
310 | * version will also pack: | |
311 | */ | |
312 | if (bkey_start_offset(in) < | |
313 | le64_to_cpu(format->field_offset[BKEY_FIELD_OFFSET])) | |
314 | return false; | |
315 | ||
316 | pack_state_finish(&state, out); | |
317 | out->u64s = format->key_u64s + in->u64s - BKEY_U64s; | |
318 | out->format = KEY_FORMAT_LOCAL_BTREE; | |
319 | out->needs_whiteout = in->needs_whiteout; | |
320 | out->type = in->type; | |
321 | ||
322 | bch2_bkey_pack_verify(out, in, format); | |
323 | return true; | |
324 | } | |
325 | ||
326 | /** | |
327 | * bch2_bkey_unpack -- unpack the key and the value | |
328 | */ | |
329 | void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst, | |
330 | const struct bkey_packed *src) | |
331 | { | |
c4e065c2 | 332 | __bkey_unpack_key(b, &dst->k, src); |
1c6fdbd8 KO |
333 | |
334 | memcpy_u64s(&dst->v, | |
335 | bkeyp_val(&b->format, src), | |
336 | bkeyp_val_u64s(&b->format, src)); | |
337 | } | |
338 | ||
339 | /** | |
340 | * bch2_bkey_pack -- pack the key and the value | |
341 | */ | |
342 | bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in, | |
343 | const struct bkey_format *format) | |
344 | { | |
345 | struct bkey_packed tmp; | |
346 | ||
347 | if (!bch2_bkey_pack_key(&tmp, &in->k, format)) | |
348 | return false; | |
349 | ||
350 | memmove_u64s((u64 *) out + format->key_u64s, | |
351 | &in->v, | |
352 | bkey_val_u64s(&in->k)); | |
353 | memcpy_u64s(out, &tmp, format->key_u64s); | |
354 | ||
355 | return true; | |
356 | } | |
357 | ||
358 | __always_inline | |
359 | static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v) | |
360 | { | |
361 | unsigned bits = state->format->bits_per_field[field]; | |
362 | u64 offset = le64_to_cpu(state->format->field_offset[field]); | |
363 | bool ret = true; | |
364 | ||
365 | EBUG_ON(v < offset); | |
366 | v -= offset; | |
367 | ||
368 | if (fls64(v) > bits) { | |
369 | v = ~(~0ULL << bits); | |
370 | ret = false; | |
371 | } | |
372 | ||
373 | if (bits > state->bits) { | |
374 | bits -= state->bits; | |
375 | state->w |= (v >> 1) >> (bits - 1); | |
376 | ||
377 | *state->p = state->w; | |
378 | state->p = next_word(state->p); | |
379 | state->w = 0; | |
380 | state->bits = 64; | |
381 | } | |
382 | ||
383 | state->bits -= bits; | |
384 | state->w |= v << state->bits; | |
385 | ||
386 | return ret; | |
387 | } | |
388 | ||
389 | #ifdef CONFIG_BCACHEFS_DEBUG | |
390 | static bool bkey_packed_successor(struct bkey_packed *out, | |
391 | const struct btree *b, | |
392 | struct bkey_packed k) | |
393 | { | |
394 | const struct bkey_format *f = &b->format; | |
395 | unsigned nr_key_bits = b->nr_key_bits; | |
396 | unsigned first_bit, offset; | |
397 | u64 *p; | |
398 | ||
399 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); | |
400 | ||
401 | if (!nr_key_bits) | |
402 | return false; | |
403 | ||
404 | *out = k; | |
405 | ||
406 | first_bit = high_bit_offset + nr_key_bits - 1; | |
407 | p = nth_word(high_word(f, out), first_bit >> 6); | |
408 | offset = 63 - (first_bit & 63); | |
409 | ||
410 | while (nr_key_bits) { | |
411 | unsigned bits = min(64 - offset, nr_key_bits); | |
412 | u64 mask = (~0ULL >> (64 - bits)) << offset; | |
413 | ||
414 | if ((*p & mask) != mask) { | |
415 | *p += 1ULL << offset; | |
811d2bcd | 416 | EBUG_ON(bch2_bkey_cmp_packed(b, out, &k) <= 0); |
1c6fdbd8 KO |
417 | return true; |
418 | } | |
419 | ||
420 | *p &= ~mask; | |
421 | p = prev_word(p); | |
422 | nr_key_bits -= bits; | |
423 | offset = 0; | |
424 | } | |
425 | ||
426 | return false; | |
427 | } | |
428 | #endif | |
429 | ||
430 | /* | |
431 | * Returns a packed key that compares <= in | |
432 | * | |
433 | * This is used in bset_search_tree(), where we need a packed pos in order to be | |
434 | * able to compare against the keys in the auxiliary search tree - and it's | |
435 | * legal to use a packed pos that isn't equivalent to the original pos, | |
436 | * _provided_ it compares <= to the original pos. | |
437 | */ | |
438 | enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out, | |
439 | struct bpos in, | |
440 | const struct btree *b) | |
441 | { | |
442 | const struct bkey_format *f = &b->format; | |
443 | struct pack_state state = pack_state_init(f, out); | |
444 | u64 *w = out->_data; | |
445 | #ifdef CONFIG_BCACHEFS_DEBUG | |
446 | struct bpos orig = in; | |
447 | #endif | |
448 | bool exact = true; | |
449 | ||
450 | *w = 0; | |
451 | ||
452 | if (unlikely(in.snapshot < | |
453 | le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) { | |
454 | if (!in.offset-- && | |
455 | !in.inode--) | |
456 | return BKEY_PACK_POS_FAIL; | |
457 | in.snapshot = KEY_SNAPSHOT_MAX; | |
458 | exact = false; | |
459 | } | |
460 | ||
461 | if (unlikely(in.offset < | |
462 | le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) { | |
463 | if (!in.inode--) | |
464 | return BKEY_PACK_POS_FAIL; | |
465 | in.offset = KEY_OFFSET_MAX; | |
466 | in.snapshot = KEY_SNAPSHOT_MAX; | |
467 | exact = false; | |
468 | } | |
469 | ||
470 | if (unlikely(in.inode < | |
471 | le64_to_cpu(f->field_offset[BKEY_FIELD_INODE]))) | |
472 | return BKEY_PACK_POS_FAIL; | |
473 | ||
474 | if (!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode)) { | |
475 | in.offset = KEY_OFFSET_MAX; | |
476 | in.snapshot = KEY_SNAPSHOT_MAX; | |
477 | exact = false; | |
478 | } | |
479 | ||
480 | if (!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset)) { | |
481 | in.snapshot = KEY_SNAPSHOT_MAX; | |
482 | exact = false; | |
483 | } | |
484 | ||
485 | if (!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot)) | |
486 | exact = false; | |
487 | ||
488 | pack_state_finish(&state, out); | |
489 | out->u64s = f->key_u64s; | |
490 | out->format = KEY_FORMAT_LOCAL_BTREE; | |
26609b61 | 491 | out->type = KEY_TYPE_deleted; |
1c6fdbd8 KO |
492 | |
493 | #ifdef CONFIG_BCACHEFS_DEBUG | |
494 | if (exact) { | |
495 | BUG_ON(bkey_cmp_left_packed(b, out, &orig)); | |
496 | } else { | |
497 | struct bkey_packed successor; | |
498 | ||
499 | BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0); | |
500 | BUG_ON(bkey_packed_successor(&successor, b, *out) && | |
501 | bkey_cmp_left_packed(b, &successor, &orig) < 0); | |
502 | } | |
503 | #endif | |
504 | ||
505 | return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER; | |
506 | } | |
507 | ||
508 | void bch2_bkey_format_init(struct bkey_format_state *s) | |
509 | { | |
510 | unsigned i; | |
511 | ||
512 | for (i = 0; i < ARRAY_SIZE(s->field_min); i++) | |
513 | s->field_min[i] = U64_MAX; | |
514 | ||
515 | for (i = 0; i < ARRAY_SIZE(s->field_max); i++) | |
516 | s->field_max[i] = 0; | |
517 | ||
518 | /* Make sure we can store a size of 0: */ | |
519 | s->field_min[BKEY_FIELD_SIZE] = 0; | |
520 | } | |
521 | ||
522 | static void __bkey_format_add(struct bkey_format_state *s, | |
523 | unsigned field, u64 v) | |
524 | { | |
525 | s->field_min[field] = min(s->field_min[field], v); | |
526 | s->field_max[field] = max(s->field_max[field], v); | |
527 | } | |
528 | ||
529 | /* | |
530 | * Changes @format so that @k can be successfully packed with @format | |
531 | */ | |
532 | void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k) | |
533 | { | |
534 | #define x(id, field) __bkey_format_add(s, id, k->field); | |
535 | bkey_fields() | |
536 | #undef x | |
537 | __bkey_format_add(s, BKEY_FIELD_OFFSET, bkey_start_offset(k)); | |
538 | } | |
539 | ||
540 | void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p) | |
541 | { | |
542 | unsigned field = 0; | |
543 | ||
544 | __bkey_format_add(s, field++, p.inode); | |
545 | __bkey_format_add(s, field++, p.offset); | |
546 | __bkey_format_add(s, field++, p.snapshot); | |
547 | } | |
548 | ||
549 | /* | |
550 | * We don't want it to be possible for the packed format to represent fields | |
551 | * bigger than a u64... that will cause confusion and issues (like with | |
552 | * bkey_packed_successor()) | |
553 | */ | |
554 | static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i, | |
555 | unsigned bits, u64 offset) | |
556 | { | |
e01dacf7 KO |
557 | unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; |
558 | u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); | |
559 | ||
560 | bits = min(bits, unpacked_bits); | |
561 | ||
562 | offset = bits == unpacked_bits ? 0 : min(offset, unpacked_max - ((1ULL << bits) - 1)); | |
1c6fdbd8 KO |
563 | |
564 | f->bits_per_field[i] = bits; | |
565 | f->field_offset[i] = cpu_to_le64(offset); | |
566 | } | |
567 | ||
568 | struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s) | |
569 | { | |
570 | unsigned i, bits = KEY_PACKED_BITS_START; | |
571 | struct bkey_format ret = { | |
572 | .nr_fields = BKEY_NR_FIELDS, | |
573 | }; | |
574 | ||
575 | for (i = 0; i < ARRAY_SIZE(s->field_min); i++) { | |
576 | s->field_min[i] = min(s->field_min[i], s->field_max[i]); | |
577 | ||
578 | set_format_field(&ret, i, | |
579 | fls64(s->field_max[i] - s->field_min[i]), | |
580 | s->field_min[i]); | |
581 | ||
582 | bits += ret.bits_per_field[i]; | |
583 | } | |
584 | ||
585 | /* allow for extent merging: */ | |
586 | if (ret.bits_per_field[BKEY_FIELD_SIZE]) { | |
587 | ret.bits_per_field[BKEY_FIELD_SIZE] += 4; | |
588 | bits += 4; | |
589 | } | |
590 | ||
591 | ret.key_u64s = DIV_ROUND_UP(bits, 64); | |
592 | ||
593 | /* if we have enough spare bits, round fields up to nearest byte */ | |
594 | bits = ret.key_u64s * 64 - bits; | |
595 | ||
596 | for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) { | |
597 | unsigned r = round_up(ret.bits_per_field[i], 8) - | |
598 | ret.bits_per_field[i]; | |
599 | ||
600 | if (r <= bits) { | |
601 | set_format_field(&ret, i, | |
602 | ret.bits_per_field[i] + r, | |
603 | le64_to_cpu(ret.field_offset[i])); | |
604 | bits -= r; | |
605 | } | |
606 | } | |
607 | ||
608 | EBUG_ON(bch2_bkey_format_validate(&ret)); | |
609 | return ret; | |
610 | } | |
611 | ||
612 | const char *bch2_bkey_format_validate(struct bkey_format *f) | |
613 | { | |
614 | unsigned i, bits = KEY_PACKED_BITS_START; | |
615 | ||
616 | if (f->nr_fields != BKEY_NR_FIELDS) | |
617 | return "incorrect number of fields"; | |
618 | ||
619 | for (i = 0; i < f->nr_fields; i++) { | |
e751c01a KO |
620 | unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; |
621 | u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1)); | |
1c6fdbd8 KO |
622 | u64 field_offset = le64_to_cpu(f->field_offset[i]); |
623 | ||
e751c01a | 624 | if (f->bits_per_field[i] > unpacked_bits) |
1c6fdbd8 KO |
625 | return "field too large"; |
626 | ||
e751c01a KO |
627 | if ((f->bits_per_field[i] == unpacked_bits) && field_offset) |
628 | return "offset + bits overflow"; | |
629 | ||
630 | if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) & | |
631 | unpacked_mask) < | |
632 | field_offset) | |
1c6fdbd8 KO |
633 | return "offset + bits overflow"; |
634 | ||
635 | bits += f->bits_per_field[i]; | |
636 | } | |
637 | ||
638 | if (f->key_u64s != DIV_ROUND_UP(bits, 64)) | |
639 | return "incorrect key_u64s"; | |
640 | ||
641 | return NULL; | |
642 | } | |
643 | ||
644 | /* | |
645 | * Most significant differing bit | |
646 | * Bits are indexed from 0 - return is [0, nr_key_bits) | |
647 | */ | |
648 | __pure | |
649 | unsigned bch2_bkey_greatest_differing_bit(const struct btree *b, | |
650 | const struct bkey_packed *l_k, | |
651 | const struct bkey_packed *r_k) | |
652 | { | |
653 | const u64 *l = high_word(&b->format, l_k); | |
654 | const u64 *r = high_word(&b->format, r_k); | |
655 | unsigned nr_key_bits = b->nr_key_bits; | |
656 | unsigned word_bits = 64 - high_bit_offset; | |
657 | u64 l_v, r_v; | |
658 | ||
659 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format)); | |
660 | ||
661 | /* for big endian, skip past header */ | |
662 | l_v = *l & (~0ULL >> high_bit_offset); | |
663 | r_v = *r & (~0ULL >> high_bit_offset); | |
664 | ||
665 | while (nr_key_bits) { | |
666 | if (nr_key_bits < word_bits) { | |
667 | l_v >>= word_bits - nr_key_bits; | |
668 | r_v >>= word_bits - nr_key_bits; | |
669 | nr_key_bits = 0; | |
670 | } else { | |
671 | nr_key_bits -= word_bits; | |
672 | } | |
673 | ||
674 | if (l_v != r_v) | |
675 | return fls64(l_v ^ r_v) - 1 + nr_key_bits; | |
676 | ||
677 | l = next_word(l); | |
678 | r = next_word(r); | |
679 | ||
680 | l_v = *l; | |
681 | r_v = *r; | |
682 | word_bits = 64; | |
683 | } | |
684 | ||
685 | return 0; | |
686 | } | |
687 | ||
688 | /* | |
689 | * First set bit | |
690 | * Bits are indexed from 0 - return is [0, nr_key_bits) | |
691 | */ | |
692 | __pure | |
693 | unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k) | |
694 | { | |
695 | const u64 *p = high_word(&b->format, k); | |
696 | unsigned nr_key_bits = b->nr_key_bits; | |
697 | unsigned ret = 0, offset; | |
698 | ||
699 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format)); | |
700 | ||
701 | offset = nr_key_bits; | |
702 | while (offset > 64) { | |
703 | p = next_word(p); | |
704 | offset -= 64; | |
705 | } | |
706 | ||
707 | offset = 64 - offset; | |
708 | ||
709 | while (nr_key_bits) { | |
710 | unsigned bits = nr_key_bits + offset < 64 | |
711 | ? nr_key_bits | |
712 | : 64 - offset; | |
713 | ||
714 | u64 mask = (~0ULL >> (64 - bits)) << offset; | |
715 | ||
716 | if (*p & mask) | |
717 | return ret + __ffs64(*p & mask) - offset; | |
718 | ||
719 | p = prev_word(p); | |
720 | nr_key_bits -= bits; | |
721 | ret += bits; | |
722 | offset = 0; | |
723 | } | |
724 | ||
725 | return 0; | |
726 | } | |
727 | ||
728 | #ifdef HAVE_BCACHEFS_COMPILED_UNPACK | |
729 | ||
730 | static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, | |
731 | unsigned nr_key_bits) | |
732 | { | |
733 | long d0, d1, d2, d3; | |
734 | int cmp; | |
735 | ||
736 | /* we shouldn't need asm for this, but gcc is being retarded: */ | |
737 | ||
738 | asm(".intel_syntax noprefix;" | |
739 | "xor eax, eax;" | |
740 | "xor edx, edx;" | |
741 | "1:;" | |
742 | "mov r8, [rdi];" | |
743 | "mov r9, [rsi];" | |
744 | "sub ecx, 64;" | |
745 | "jl 2f;" | |
746 | ||
747 | "cmp r8, r9;" | |
748 | "jnz 3f;" | |
749 | ||
750 | "lea rdi, [rdi - 8];" | |
751 | "lea rsi, [rsi - 8];" | |
752 | "jmp 1b;" | |
753 | ||
754 | "2:;" | |
755 | "not ecx;" | |
756 | "shr r8, 1;" | |
757 | "shr r9, 1;" | |
758 | "shr r8, cl;" | |
759 | "shr r9, cl;" | |
760 | "cmp r8, r9;" | |
761 | ||
762 | "3:\n" | |
763 | "seta al;" | |
764 | "setb dl;" | |
765 | "sub eax, edx;" | |
766 | ".att_syntax prefix;" | |
767 | : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp) | |
768 | : "0" (l), "1" (r), "3" (nr_key_bits) | |
769 | : "r8", "r9", "cc", "memory"); | |
770 | ||
771 | return cmp; | |
772 | } | |
773 | ||
774 | #define I(_x) (*(out)++ = (_x)) | |
775 | #define I1(i0) I(i0) | |
776 | #define I2(i0, i1) (I1(i0), I(i1)) | |
777 | #define I3(i0, i1, i2) (I2(i0, i1), I(i2)) | |
778 | #define I4(i0, i1, i2, i3) (I3(i0, i1, i2), I(i3)) | |
779 | #define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3), I(i4)) | |
780 | ||
781 | static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out, | |
782 | enum bch_bkey_fields field, | |
783 | unsigned dst_offset, unsigned dst_size, | |
784 | bool *eax_zeroed) | |
785 | { | |
786 | unsigned bits = format->bits_per_field[field]; | |
787 | u64 offset = le64_to_cpu(format->field_offset[field]); | |
788 | unsigned i, byte, bit_offset, align, shl, shr; | |
789 | ||
790 | if (!bits && !offset) { | |
791 | if (!*eax_zeroed) { | |
792 | /* xor eax, eax */ | |
793 | I2(0x31, 0xc0); | |
794 | } | |
795 | ||
796 | *eax_zeroed = true; | |
797 | goto set_field; | |
798 | } | |
799 | ||
800 | if (!bits) { | |
801 | /* just return offset: */ | |
802 | ||
803 | switch (dst_size) { | |
804 | case 8: | |
805 | if (offset > S32_MAX) { | |
806 | /* mov [rdi + dst_offset], offset */ | |
807 | I3(0xc7, 0x47, dst_offset); | |
808 | memcpy(out, &offset, 4); | |
809 | out += 4; | |
810 | ||
811 | I3(0xc7, 0x47, dst_offset + 4); | |
812 | memcpy(out, (void *) &offset + 4, 4); | |
813 | out += 4; | |
814 | } else { | |
815 | /* mov [rdi + dst_offset], offset */ | |
816 | /* sign extended */ | |
817 | I4(0x48, 0xc7, 0x47, dst_offset); | |
818 | memcpy(out, &offset, 4); | |
819 | out += 4; | |
820 | } | |
821 | break; | |
822 | case 4: | |
823 | /* mov [rdi + dst_offset], offset */ | |
824 | I3(0xc7, 0x47, dst_offset); | |
825 | memcpy(out, &offset, 4); | |
826 | out += 4; | |
827 | break; | |
828 | default: | |
829 | BUG(); | |
830 | } | |
831 | ||
832 | return out; | |
833 | } | |
834 | ||
835 | bit_offset = format->key_u64s * 64; | |
836 | for (i = 0; i <= field; i++) | |
837 | bit_offset -= format->bits_per_field[i]; | |
838 | ||
839 | byte = bit_offset / 8; | |
840 | bit_offset -= byte * 8; | |
841 | ||
842 | *eax_zeroed = false; | |
843 | ||
844 | if (bit_offset == 0 && bits == 8) { | |
845 | /* movzx eax, BYTE PTR [rsi + imm8] */ | |
846 | I4(0x0f, 0xb6, 0x46, byte); | |
847 | } else if (bit_offset == 0 && bits == 16) { | |
848 | /* movzx eax, WORD PTR [rsi + imm8] */ | |
849 | I4(0x0f, 0xb7, 0x46, byte); | |
850 | } else if (bit_offset + bits <= 32) { | |
851 | align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3); | |
852 | byte -= align; | |
853 | bit_offset += align * 8; | |
854 | ||
855 | BUG_ON(bit_offset + bits > 32); | |
856 | ||
857 | /* mov eax, [rsi + imm8] */ | |
858 | I3(0x8b, 0x46, byte); | |
859 | ||
860 | if (bit_offset) { | |
861 | /* shr eax, imm8 */ | |
862 | I3(0xc1, 0xe8, bit_offset); | |
863 | } | |
864 | ||
865 | if (bit_offset + bits < 32) { | |
866 | unsigned mask = ~0U >> (32 - bits); | |
867 | ||
868 | /* and eax, imm32 */ | |
869 | I1(0x25); | |
870 | memcpy(out, &mask, 4); | |
871 | out += 4; | |
872 | } | |
873 | } else if (bit_offset + bits <= 64) { | |
874 | align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7); | |
875 | byte -= align; | |
876 | bit_offset += align * 8; | |
877 | ||
878 | BUG_ON(bit_offset + bits > 64); | |
879 | ||
880 | /* mov rax, [rsi + imm8] */ | |
881 | I4(0x48, 0x8b, 0x46, byte); | |
882 | ||
883 | shl = 64 - bit_offset - bits; | |
884 | shr = bit_offset + shl; | |
885 | ||
886 | if (shl) { | |
887 | /* shl rax, imm8 */ | |
888 | I4(0x48, 0xc1, 0xe0, shl); | |
889 | } | |
890 | ||
891 | if (shr) { | |
892 | /* shr rax, imm8 */ | |
893 | I4(0x48, 0xc1, 0xe8, shr); | |
894 | } | |
895 | } else { | |
896 | align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3); | |
897 | byte -= align; | |
898 | bit_offset += align * 8; | |
899 | ||
900 | BUG_ON(bit_offset + bits > 96); | |
901 | ||
902 | /* mov rax, [rsi + byte] */ | |
903 | I4(0x48, 0x8b, 0x46, byte); | |
904 | ||
905 | /* mov edx, [rsi + byte + 8] */ | |
906 | I3(0x8b, 0x56, byte + 8); | |
907 | ||
908 | /* bits from next word: */ | |
909 | shr = bit_offset + bits - 64; | |
910 | BUG_ON(shr > bit_offset); | |
911 | ||
912 | /* shr rax, bit_offset */ | |
913 | I4(0x48, 0xc1, 0xe8, shr); | |
914 | ||
915 | /* shl rdx, imm8 */ | |
916 | I4(0x48, 0xc1, 0xe2, 64 - shr); | |
917 | ||
918 | /* or rax, rdx */ | |
919 | I3(0x48, 0x09, 0xd0); | |
920 | ||
921 | shr = bit_offset - shr; | |
922 | ||
923 | if (shr) { | |
924 | /* shr rax, imm8 */ | |
925 | I4(0x48, 0xc1, 0xe8, shr); | |
926 | } | |
927 | } | |
928 | ||
929 | /* rax += offset: */ | |
930 | if (offset > S32_MAX) { | |
931 | /* mov rdx, imm64 */ | |
932 | I2(0x48, 0xba); | |
933 | memcpy(out, &offset, 8); | |
934 | out += 8; | |
935 | /* add %rdx, %rax */ | |
936 | I3(0x48, 0x01, 0xd0); | |
937 | } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) { | |
938 | /* add rax, imm32 */ | |
939 | I2(0x48, 0x05); | |
940 | memcpy(out, &offset, 4); | |
941 | out += 4; | |
942 | } else if (offset) { | |
943 | /* add eax, imm32 */ | |
944 | I1(0x05); | |
945 | memcpy(out, &offset, 4); | |
946 | out += 4; | |
947 | } | |
948 | set_field: | |
949 | switch (dst_size) { | |
950 | case 8: | |
951 | /* mov [rdi + dst_offset], rax */ | |
952 | I4(0x48, 0x89, 0x47, dst_offset); | |
953 | break; | |
954 | case 4: | |
955 | /* mov [rdi + dst_offset], eax */ | |
956 | I3(0x89, 0x47, dst_offset); | |
957 | break; | |
958 | default: | |
959 | BUG(); | |
960 | } | |
961 | ||
962 | return out; | |
963 | } | |
964 | ||
965 | int bch2_compile_bkey_format(const struct bkey_format *format, void *_out) | |
966 | { | |
967 | bool eax_zeroed = false; | |
968 | u8 *out = _out; | |
969 | ||
970 | /* | |
971 | * rdi: dst - unpacked key | |
972 | * rsi: src - packed key | |
973 | */ | |
974 | ||
975 | /* k->u64s, k->format, k->type */ | |
976 | ||
977 | /* mov eax, [rsi] */ | |
978 | I2(0x8b, 0x06); | |
979 | ||
980 | /* add eax, BKEY_U64s - format->key_u64s */ | |
981 | I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0); | |
982 | ||
983 | /* and eax, imm32: mask out k->pad: */ | |
984 | I5(0x25, 0xff, 0xff, 0xff, 0); | |
985 | ||
986 | /* mov [rdi], eax */ | |
987 | I2(0x89, 0x07); | |
988 | ||
989 | #define x(id, field) \ | |
990 | out = compile_bkey_field(format, out, id, \ | |
991 | offsetof(struct bkey, field), \ | |
992 | sizeof(((struct bkey *) NULL)->field), \ | |
993 | &eax_zeroed); | |
994 | bkey_fields() | |
995 | #undef x | |
996 | ||
997 | /* retq */ | |
998 | I1(0xc3); | |
999 | ||
1000 | return (void *) out - _out; | |
1001 | } | |
1002 | ||
1003 | #else | |
1004 | static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, | |
1005 | unsigned nr_key_bits) | |
1006 | { | |
1007 | u64 l_v, r_v; | |
1008 | ||
1009 | if (!nr_key_bits) | |
1010 | return 0; | |
1011 | ||
1012 | /* for big endian, skip past header */ | |
1013 | nr_key_bits += high_bit_offset; | |
1014 | l_v = *l & (~0ULL >> high_bit_offset); | |
1015 | r_v = *r & (~0ULL >> high_bit_offset); | |
1016 | ||
1017 | while (1) { | |
1018 | if (nr_key_bits < 64) { | |
1019 | l_v >>= 64 - nr_key_bits; | |
1020 | r_v >>= 64 - nr_key_bits; | |
1021 | nr_key_bits = 0; | |
1022 | } else { | |
1023 | nr_key_bits -= 64; | |
1024 | } | |
1025 | ||
ed1646ca KO |
1026 | if (!nr_key_bits || l_v != r_v) |
1027 | break; | |
1c6fdbd8 KO |
1028 | |
1029 | l = next_word(l); | |
1030 | r = next_word(r); | |
1031 | ||
1032 | l_v = *l; | |
1033 | r_v = *r; | |
1034 | } | |
ed1646ca | 1035 | |
3ea2b1e1 | 1036 | return cmp_int(l_v, r_v); |
1c6fdbd8 KO |
1037 | } |
1038 | #endif | |
1039 | ||
1040 | __pure | |
1041 | int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l, | |
1042 | const struct bkey_packed *r, | |
1043 | const struct btree *b) | |
1044 | { | |
1045 | const struct bkey_format *f = &b->format; | |
1046 | int ret; | |
1047 | ||
1048 | EBUG_ON(!bkey_packed(l) || !bkey_packed(r)); | |
1049 | EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); | |
1050 | ||
1051 | ret = __bkey_cmp_bits(high_word(f, l), | |
1052 | high_word(f, r), | |
1053 | b->nr_key_bits); | |
1054 | ||
4cf91b02 | 1055 | EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l), |
1c6fdbd8 KO |
1056 | bkey_unpack_pos(b, r))); |
1057 | return ret; | |
1058 | } | |
1059 | ||
1060 | __pure __flatten | |
1061 | int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b, | |
1062 | const struct bkey_packed *l, | |
1063 | const struct bpos *r) | |
1064 | { | |
4cf91b02 | 1065 | return bpos_cmp(bkey_unpack_pos_format_checked(b, l), *r); |
1c6fdbd8 KO |
1066 | } |
1067 | ||
1068 | __pure __flatten | |
811d2bcd KO |
1069 | int bch2_bkey_cmp_packed(const struct btree *b, |
1070 | const struct bkey_packed *l, | |
1071 | const struct bkey_packed *r) | |
1c6fdbd8 | 1072 | { |
fab4f8c6 | 1073 | struct bkey unpacked; |
1c6fdbd8 | 1074 | |
fab4f8c6 | 1075 | if (likely(bkey_packed(l) && bkey_packed(r))) |
1c6fdbd8 KO |
1076 | return __bch2_bkey_cmp_packed_format_checked(l, r, b); |
1077 | ||
fab4f8c6 KO |
1078 | if (bkey_packed(l)) { |
1079 | __bkey_unpack_key_format_checked(b, &unpacked, l); | |
1080 | l = (void*) &unpacked; | |
1081 | } else if (bkey_packed(r)) { | |
1082 | __bkey_unpack_key_format_checked(b, &unpacked, r); | |
1083 | r = (void*) &unpacked; | |
1c6fdbd8 | 1084 | } |
fab4f8c6 | 1085 | |
4cf91b02 | 1086 | return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p); |
1c6fdbd8 KO |
1087 | } |
1088 | ||
1089 | __pure __flatten | |
1090 | int __bch2_bkey_cmp_left_packed(const struct btree *b, | |
1091 | const struct bkey_packed *l, | |
1092 | const struct bpos *r) | |
1093 | { | |
1094 | const struct bkey *l_unpacked; | |
1095 | ||
1096 | return unlikely(l_unpacked = packed_to_bkey_c(l)) | |
4cf91b02 | 1097 | ? bpos_cmp(l_unpacked->p, *r) |
1c6fdbd8 KO |
1098 | : __bch2_bkey_cmp_left_packed_format_checked(b, l, r); |
1099 | } | |
1100 | ||
1101 | void bch2_bpos_swab(struct bpos *p) | |
1102 | { | |
1103 | u8 *l = (u8 *) p; | |
1104 | u8 *h = ((u8 *) &p[1]) - 1; | |
1105 | ||
1106 | while (l < h) { | |
1107 | swap(*l, *h); | |
1108 | l++; | |
1109 | --h; | |
1110 | } | |
1111 | } | |
1112 | ||
1113 | void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k) | |
1114 | { | |
1115 | const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current; | |
1116 | u8 *l = k->key_start; | |
1117 | u8 *h = (u8 *) (k->_data + f->key_u64s) - 1; | |
1118 | ||
1119 | while (l < h) { | |
1120 | swap(*l, *h); | |
1121 | l++; | |
1122 | --h; | |
1123 | } | |
1124 | } | |
1125 | ||
1126 | #ifdef CONFIG_BCACHEFS_DEBUG | |
1127 | void bch2_bkey_pack_test(void) | |
1128 | { | |
1129 | struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0); | |
1130 | struct bkey_packed p; | |
1131 | ||
1132 | struct bkey_format test_format = { | |
e751c01a | 1133 | .key_u64s = 3, |
1c6fdbd8 KO |
1134 | .nr_fields = BKEY_NR_FIELDS, |
1135 | .bits_per_field = { | |
1136 | 13, | |
1137 | 64, | |
e751c01a | 1138 | 32, |
1c6fdbd8 KO |
1139 | }, |
1140 | }; | |
1141 | ||
1142 | struct unpack_state in_s = | |
1143 | unpack_state_init(&bch2_bkey_format_current, (void *) &t); | |
1144 | struct pack_state out_s = pack_state_init(&test_format, &p); | |
1145 | unsigned i; | |
1146 | ||
1147 | for (i = 0; i < out_s.format->nr_fields; i++) { | |
1148 | u64 a, v = get_inc_field(&in_s, i); | |
1149 | ||
1150 | switch (i) { | |
1151 | #define x(id, field) case id: a = t.field; break; | |
1152 | bkey_fields() | |
1153 | #undef x | |
1154 | default: | |
1155 | BUG(); | |
1156 | } | |
1157 | ||
1158 | if (a != v) | |
1159 | panic("got %llu actual %llu i %u\n", v, a, i); | |
1160 | ||
1161 | if (!set_inc_field(&out_s, i, v)) | |
1162 | panic("failed at %u\n", i); | |
1163 | } | |
1164 | ||
1165 | BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format)); | |
1166 | } | |
1167 | #endif |