bcachefs: Simplify hash table checks
[linux-block.git] / fs / bcachefs / btree_update_leaf.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "btree_update.h"
5#include "btree_update_interior.h"
36e916e1 6#include "btree_gc.h"
1c6fdbd8
KO
7#include "btree_io.h"
8#include "btree_iter.h"
2ca88e5a 9#include "btree_key_cache.h"
1c6fdbd8 10#include "btree_locking.h"
b35b1925 11#include "buckets.h"
1c6fdbd8 12#include "debug.h"
3636ed48 13#include "error.h"
08c07fea 14#include "extent_update.h"
1c6fdbd8
KO
15#include "journal.h"
16#include "journal_reclaim.h"
17#include "keylist.h"
1d25849c 18#include "replicas.h"
1c6fdbd8
KO
19#include "trace.h"
20
b7ba66c8 21#include <linux/prefetch.h>
1c6fdbd8
KO
22#include <linux/sort.h>
23
6333bd2f
KO
24static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
25 const struct btree_insert_entry *r)
26{
27 return cmp_int(l->btree_id, r->btree_id) ?:
28 -cmp_int(l->level, r->level) ?:
4cf91b02 29 bpos_cmp(l->k->k.p, r->k->k.p);
6333bd2f
KO
30}
31
36e9d698 32static inline bool same_leaf_as_prev(struct btree_trans *trans,
24326cd1 33 struct btree_insert_entry *i)
36e9d698 34{
e3e464ac 35 return i != trans->updates2 &&
e62d65f2 36 iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
36e9d698
KO
37}
38
9623ab27
KO
39inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
40 struct btree_iter *iter)
41{
42 bch2_btree_node_lock_write(b, iter);
43
2ca88e5a
KO
44 if (btree_iter_type(iter) == BTREE_ITER_CACHED)
45 return;
46
fdfab313 47 if (unlikely(btree_node_just_written(b)) &&
9623ab27
KO
48 bch2_btree_post_write_cleanup(c, b))
49 bch2_btree_iter_reinit_node(iter, b);
50
51 /*
52 * If the last bset has been written, or if it's gotten too big - start
53 * a new bset to insert into:
54 */
55 if (want_new_bset(c, b))
56 bch2_btree_init_next(c, b, iter);
57}
58
1c6fdbd8
KO
59/* Inserting into a given leaf node (last stage of insert): */
60
61/* Handle overwrites and do insert, for non extents: */
62bool bch2_btree_bset_insert_key(struct btree_iter *iter,
63 struct btree *b,
64 struct btree_node_iter *node_iter,
65 struct bkey_i *insert)
66{
1c6fdbd8 67 struct bkey_packed *k;
fdf22400 68 unsigned clobber_u64s = 0, new_u64s = 0;
1c6fdbd8
KO
69
70 EBUG_ON(btree_node_just_written(b));
71 EBUG_ON(bset_written(b, btree_bset_last(b)));
72 EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
4cf91b02
KO
73 EBUG_ON(bpos_cmp(insert->k.p, b->data->min_key) < 0);
74 EBUG_ON(bpos_cmp(insert->k.p, b->data->max_key) > 0);
e3e464ac
KO
75 EBUG_ON(insert->k.u64s >
76 bch_btree_keys_u64s_remaining(iter->trans->c, b));
77 EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
1c6fdbd8
KO
78
79 k = bch2_btree_node_iter_peek_all(node_iter, b);
811d2bcd 80 if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
ae54c453 81 k = NULL;
1c6fdbd8 82
ae54c453 83 /* @k is the key being overwritten/deleted, if any: */
c052cf82 84 EBUG_ON(k && bkey_deleted(k));
1c6fdbd8 85
fdf22400 86 /* Deleting, but not found? nothing to do: */
c052cf82 87 if (bkey_deleted(&insert->k) && !k)
fdf22400
KO
88 return false;
89
c052cf82 90 if (bkey_deleted(&insert->k)) {
ae54c453 91 /* Deleting: */
ae54c453
KO
92 btree_account_key_drop(b, k);
93 k->type = KEY_TYPE_deleted;
c9bebae6 94
fdf22400 95 if (k->needs_whiteout)
e3e464ac 96 push_whiteout(iter->trans->c, b, insert->k.p);
fdf22400 97 k->needs_whiteout = false;
1c6fdbd8 98
ae54c453
KO
99 if (k >= btree_bset_last(b)->start) {
100 clobber_u64s = k->u64s;
ae54c453 101 bch2_bset_delete(b, k, clobber_u64s);
fdf22400 102 goto fix_iter;
ae54c453
KO
103 } else {
104 bch2_btree_iter_fix_key_modified(iter, b, k);
105 }
106
107 return true;
108 }
c9bebae6 109
ae54c453
KO
110 if (k) {
111 /* Overwriting: */
ae54c453
KO
112 btree_account_key_drop(b, k);
113 k->type = KEY_TYPE_deleted;
114
f2e8c69f
KO
115 insert->k.needs_whiteout = k->needs_whiteout;
116 k->needs_whiteout = false;
117
c9bebae6
KO
118 if (k >= btree_bset_last(b)->start) {
119 clobber_u64s = k->u64s;
1c6fdbd8 120 goto overwrite;
ae54c453
KO
121 } else {
122 bch2_btree_iter_fix_key_modified(iter, b, k);
1c6fdbd8 123 }
1c6fdbd8
KO
124 }
125
216c9fac 126 k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
1c6fdbd8
KO
127overwrite:
128 bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
fdf22400
KO
129 new_u64s = k->u64s;
130fix_iter:
131 if (clobber_u64s != new_u64s)
132 bch2_btree_node_iter_fix(iter, b, node_iter, k,
133 clobber_u64s, new_u64s);
1c6fdbd8
KO
134 return true;
135}
136
2940295c 137static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
1c6fdbd8
KO
138 unsigned i, u64 seq)
139{
140 struct bch_fs *c = container_of(j, struct bch_fs, journal);
141 struct btree_write *w = container_of(pin, struct btree_write, journal);
142 struct btree *b = container_of(w, struct btree, writes[i]);
143
144 btree_node_lock_type(c, b, SIX_LOCK_read);
145 bch2_btree_node_write_cond(c, b,
4077991c 146 (btree_current_write(b) == w && w->journal.seq == seq));
c43a6ef9 147 six_unlock_read(&b->c.lock);
2940295c 148 return 0;
1c6fdbd8
KO
149}
150
2940295c 151static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
1c6fdbd8
KO
152{
153 return __btree_node_flush(j, pin, 0, seq);
154}
155
2940295c 156static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
1c6fdbd8
KO
157{
158 return __btree_node_flush(j, pin, 1, seq);
159}
160
6357d607
KO
161inline void bch2_btree_add_journal_pin(struct bch_fs *c,
162 struct btree *b, u64 seq)
163{
164 struct btree_write *w = btree_current_write(b);
165
166 bch2_journal_pin_add(&c->journal, seq, &w->journal,
167 btree_node_write_idx(b) == 0
168 ? btree_node_flush0
169 : btree_node_flush1);
170}
171
1c6fdbd8
KO
172/**
173 * btree_insert_key - insert a key one key into a leaf node
174 */
4e8224ed 175static bool btree_insert_key_leaf(struct btree_trans *trans,
54e86b58
KO
176 struct btree_iter *iter,
177 struct bkey_i *insert)
1c6fdbd8
KO
178{
179 struct bch_fs *c = trans->c;
e62d65f2 180 struct btree *b = iter_l(iter)->b;
2a9101a9 181 struct bset_tree *t = bset_tree_last(b);
4e8224ed 182 struct bset *i = bset(b, t);
2a9101a9 183 int old_u64s = bset_u64s(t);
1c6fdbd8
KO
184 int old_live_u64s = b->nr.live_u64s;
185 int live_u64s_added, u64s_added;
186
5d20ba48
KO
187 EBUG_ON(!iter->level &&
188 !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
189
4e8224ed
KO
190 if (unlikely(!bch2_btree_bset_insert_key(iter, b,
191 &iter_l(iter)->iter, insert)))
192 return false;
193
194 i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
195 le64_to_cpu(i->journal_seq)));
bcd6f3e0 196
4e8224ed
KO
197 bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
198
199 if (unlikely(!btree_node_dirty(b)))
6a747c46 200 set_btree_node_dirty(c, b);
1c6fdbd8
KO
201
202 live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
2a9101a9 203 u64s_added = (int) bset_u64s(t) - old_u64s;
1c6fdbd8
KO
204
205 if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
206 b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
207 if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
208 b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
209
210 if (u64s_added > live_u64s_added &&
211 bch2_maybe_compact_whiteouts(c, b))
212 bch2_btree_iter_reinit_node(iter, b);
213
54e86b58 214 trace_btree_insert_key(c, b, insert);
4e8224ed 215 return true;
1c6fdbd8
KO
216}
217
2ca88e5a
KO
218/* Cached btree updates: */
219
9623ab27 220/* Normal update interface: */
1c6fdbd8 221
1a470560 222static inline void btree_insert_entry_checks(struct btree_trans *trans,
6333bd2f 223 struct btree_insert_entry *i)
1c6fdbd8 224{
1a470560 225 struct bch_fs *c = trans->c;
a7199432 226
e751c01a
KO
227 if (bch2_debug_check_bkeys) {
228 const char *invalid = bch2_bkey_invalid(c,
229 bkey_i_to_s_c(i->k), i->bkey_type);
230 if (invalid) {
231 char buf[200];
232
233 bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
234 panic("invalid bkey %s on insert: %s\n", buf, invalid);
235 }
236 }
237 BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
6333bd2f
KO
238 BUG_ON(i->level != i->iter->level);
239 BUG_ON(i->btree_id != i->iter->btree_id);
9623ab27 240}
1c6fdbd8 241
2a9101a9
KO
242static noinline int
243bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s)
1c6fdbd8 244{
9623ab27 245 struct bch_fs *c = trans->c;
9623ab27 246 int ret;
1c6fdbd8 247
58fbf808 248 bch2_trans_unlock(trans);
1c6fdbd8 249
9623ab27
KO
250 ret = bch2_journal_preres_get(&c->journal,
251 &trans->journal_preres, u64s, 0);
252 if (ret)
253 return ret;
1c6fdbd8 254
58fbf808 255 if (!bch2_trans_relock(trans)) {
20bceecb 256 trace_trans_restart_journal_preres_get(trans->ip);
9623ab27
KO
257 return -EINTR;
258 }
1c6fdbd8 259
9623ab27 260 return 0;
1c6fdbd8
KO
261}
262
2a9101a9
KO
263static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
264 unsigned flags)
c8cc5b3e 265{
9623ab27 266 struct bch_fs *c = trans->c;
9623ab27 267 int ret;
c8cc5b3e 268
9623ab27
KO
269 if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
270 flags |= JOURNAL_RES_GET_RESERVED;
c8cc5b3e 271
9623ab27 272 ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
87c3beb4 273 trans->journal_u64s, flags);
9623ab27
KO
274
275 return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
276}
1c6fdbd8 277
b0004d8d 278static enum btree_insert_ret
0dc17247 279btree_key_can_insert(struct btree_trans *trans,
54e86b58 280 struct btree_iter *iter,
64f2a880 281 unsigned u64s)
b0004d8d
KO
282{
283 struct bch_fs *c = trans->c;
e62d65f2 284 struct btree *b = iter_l(iter)->b;
b0004d8d 285
f8058242 286 if (!bch2_btree_node_insert_fits(c, b, u64s))
b0004d8d
KO
287 return BTREE_INSERT_BTREE_NODE_FULL;
288
289 return BTREE_INSERT_OK;
290}
291
2ca88e5a
KO
292static enum btree_insert_ret
293btree_key_can_insert_cached(struct btree_trans *trans,
294 struct btree_iter *iter,
64f2a880 295 unsigned u64s)
2ca88e5a
KO
296{
297 struct bkey_cached *ck = (void *) iter->l[0].b;
298 unsigned new_u64s;
299 struct bkey_i *new_k;
300
301 BUG_ON(iter->level);
302
d5425a3b 303 if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
bcdb4b97
KO
304 bch2_btree_key_cache_must_wait(trans->c) &&
305 !(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM))
d5425a3b
KO
306 return BTREE_INSERT_NEED_JOURNAL_RECLAIM;
307
64f2a880 308 if (u64s <= ck->u64s)
2ca88e5a
KO
309 return BTREE_INSERT_OK;
310
64f2a880 311 new_u64s = roundup_pow_of_two(u64s);
2ca88e5a
KO
312 new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
313 if (!new_k)
314 return -ENOMEM;
315
316 ck->u64s = new_u64s;
317 ck->k = new_k;
318 return BTREE_INSERT_OK;
319}
320
0dc17247 321static inline void do_btree_insert_one(struct btree_trans *trans,
54e86b58
KO
322 struct btree_iter *iter,
323 struct bkey_i *insert)
3636ed48 324{
4e8224ed
KO
325 struct bch_fs *c = trans->c;
326 struct journal *j = &c->journal;
327 bool did_work;
328
329 EBUG_ON(trans->journal_res.ref !=
330 !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
331
332 insert->k.needs_whiteout = false;
333
2ca88e5a
KO
334 did_work = (btree_iter_type(iter) != BTREE_ITER_CACHED)
335 ? btree_insert_key_leaf(trans, iter, insert)
336 : bch2_btree_insert_key_cached(trans, iter, insert);
4e8224ed
KO
337 if (!did_work)
338 return;
339
340 if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
341 bch2_journal_add_keys(j, &trans->journal_res,
342 iter->btree_id, insert);
343
344 bch2_journal_set_has_inode(j, &trans->journal_res,
345 insert->k.p.inode);
346
347 if (trans->journal_seq)
348 *trans->journal_seq = trans->journal_res.seq;
349 }
3636ed48
KO
350}
351
2a9101a9
KO
352static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
353{
354 __bch2_btree_iter_unlock(iter);
355}
356
357static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
1c6fdbd8
KO
358{
359 struct bch_fs *c = trans->c;
360 struct btree_insert_entry *i;
1c6fdbd8 361
2ca88e5a
KO
362 trans_for_each_update(trans, i) {
363 /*
364 * XXX: synchronization of cached update triggers with gc
365 */
366 BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED);
367
368 if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
54e86b58 369 bch2_mark_update(trans, i->iter, i->k, NULL,
2d594dfb 370 i->trigger_flags|BTREE_TRIGGER_GC);
2ca88e5a 371 }
2a9101a9 372}
36e9d698 373
2a9101a9
KO
374static inline int
375bch2_trans_commit_write_locked(struct btree_trans *trans,
376 struct btree_insert_entry **stopped_at)
377{
378 struct bch_fs *c = trans->c;
2a9101a9 379 struct btree_insert_entry *i;
43d00243 380 struct btree_trans_commit_hook *h;
24326cd1 381 unsigned u64s = 0;
b7ba66c8 382 bool marking = false;
2a9101a9 383 int ret;
4d8100da 384
1c6fdbd8 385 if (race_fault()) {
20bceecb 386 trace_trans_restart_fault_inject(trans->ip);
2a9101a9 387 return -EINTR;
1c6fdbd8
KO
388 }
389
b0004d8d
KO
390 /*
391 * Check if the insert will fit in the leaf node with the write lock
392 * held, otherwise another thread could write the node changing the
393 * amount of space available:
394 */
c8cc5b3e 395
b7ba66c8 396 prefetch(&trans->c->journal.flags);
932aa837 397
43d00243
KO
398 h = trans->hooks;
399 while (h) {
400 ret = h->fn(trans, h);
401 if (ret)
402 return ret;
403 h = h->next;
404 }
405
e3e464ac 406 trans_for_each_update2(trans, i) {
b7ba66c8 407 /* Multiple inserts might go to same leaf: */
24326cd1 408 if (!same_leaf_as_prev(trans, i))
b7ba66c8 409 u64s = 0;
932aa837 410
b7ba66c8 411 u64s += i->k->k.u64s;
2ca88e5a 412 ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED
f8058242
KO
413 ? btree_key_can_insert(trans, i->iter, u64s)
414 : btree_key_can_insert_cached(trans, i->iter, u64s);
b7ba66c8
KO
415 if (ret) {
416 *stopped_at = i;
417 return ret;
932aa837 418 }
b7ba66c8 419
6333bd2f 420 if (btree_node_type_needs_gc(i->bkey_type))
b7ba66c8
KO
421 marking = true;
422 }
423
424 if (marking) {
425 percpu_down_read(&c->mark_lock);
932aa837
KO
426 }
427
6167f7c8
KO
428 /* Must be called under mark_lock: */
429 if (marking && trans->fs_usage_deltas &&
35d5aff2 430 !bch2_replicas_delta_list_marked(c, trans->fs_usage_deltas)) {
6167f7c8
KO
431 ret = BTREE_INSERT_NEED_MARK_REPLICAS;
432 goto err;
433 }
434
9623ab27
KO
435 /*
436 * Don't get journal reservation until after we know insert will
437 * succeed:
438 */
87c3beb4 439 if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
2a9101a9
KO
440 ret = bch2_trans_journal_res_get(trans,
441 JOURNAL_RES_GET_NONBLOCK);
87c3beb4 442 if (ret)
2a9101a9 443 goto err;
4e8224ed
KO
444 } else {
445 trans->journal_res.seq = c->journal.replay_journal_seq;
87c3beb4 446 }
c8cc5b3e 447
96e2aa1b 448 if (unlikely(trans->extra_journal_entry_u64s)) {
00b8ccf7 449 memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
96e2aa1b
KO
450 trans->extra_journal_entries,
451 trans->extra_journal_entry_u64s);
452
453 trans->journal_res.offset += trans->extra_journal_entry_u64s;
454 trans->journal_res.u64s -= trans->extra_journal_entry_u64s;
455 }
456
2a9101a9
KO
457 /*
458 * Not allowed to fail after we've gotten our journal reservation - we
459 * have to use it:
460 */
461
1c6fdbd8 462 if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
29364f34 463 if (bch2_journal_seq_verify)
e3e464ac 464 trans_for_each_update2(trans, i)
1c6fdbd8 465 i->k->k.version.lo = trans->journal_res.seq;
29364f34 466 else if (bch2_inject_invalid_keys)
e3e464ac 467 trans_for_each_update2(trans, i)
1c6fdbd8
KO
468 i->k->k.version = MAX_VERSION;
469 }
470
a7199432 471 trans_for_each_update(trans, i)
6333bd2f 472 if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
54e86b58 473 bch2_mark_update(trans, i->iter, i->k,
35d5aff2 474 NULL, i->trigger_flags);
932aa837 475
35d5aff2
KO
476 if (marking && trans->fs_usage_deltas)
477 bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas);
36e916e1 478
2a9101a9
KO
479 if (unlikely(c->gc_pos.phase))
480 bch2_trans_mark_gc(trans);
932aa837 481
e3e464ac 482 trans_for_each_update2(trans, i)
54e86b58 483 do_btree_insert_one(trans, i->iter, i->k);
2a9101a9 484err:
b7ba66c8 485 if (marking) {
4d8100da
KO
486 percpu_up_read(&c->mark_lock);
487 }
488
2a9101a9
KO
489 return ret;
490}
491
b182ff60
KO
492static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree_iter *iter)
493{
494 struct btree_insert_entry *i;
495 struct btree *b = iter_l(iter)->b;
496 struct bkey_s_c old;
497 int u64s_delta = 0;
498 int ret;
499
500 /*
501 * Inserting directly into interior nodes is an uncommon operation with
502 * various weird edge cases: also, a lot of things about
503 * BTREE_ITER_NODES iters need to be audited
504 */
505 if (unlikely(btree_iter_type(iter) != BTREE_ITER_KEYS))
506 return 0;
507
508 BUG_ON(iter->level);
509
510 trans_for_each_update2(trans, i) {
511 if (iter_l(i->iter)->b != b)
512 continue;
513
514 old = bch2_btree_iter_peek_slot(i->iter);
515 ret = bkey_err(old);
516 if (ret)
517 return ret;
518
519 u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0;
520 u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0;
521 }
522
523 return u64s_delta <= 0
524 ? (bch2_foreground_maybe_merge(trans->c, iter, iter->level,
525 trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR)
526 : 0;
527}
528
2a9101a9
KO
529/*
530 * Get journal reservation, take write locks, and attempt to do btree update(s):
531 */
532static inline int do_bch2_trans_commit(struct btree_trans *trans,
533 struct btree_insert_entry **stopped_at)
534{
b182ff60 535 struct bch_fs *c = trans->c;
2a9101a9
KO
536 struct btree_insert_entry *i;
537 struct btree_iter *iter;
2a9101a9
KO
538 int ret;
539
b182ff60
KO
540 trans_for_each_update2(trans, i) {
541 struct btree *b;
542
543 BUG_ON(!btree_node_intent_locked(i->iter, i->level));
544
545 if (btree_iter_type(i->iter) == BTREE_ITER_CACHED)
546 continue;
547
548 b = iter_l(i->iter)->b;
549 if (b->sib_u64s[0] < c->btree_foreground_merge_threshold ||
550 b->sib_u64s[1] < c->btree_foreground_merge_threshold) {
551 ret = maybe_do_btree_merge(trans, i->iter);
552 if (unlikely(ret))
553 return ret;
554 }
555 }
556
e3e464ac 557 trans_for_each_update2(trans, i)
b182ff60 558 BUG_ON(!btree_node_intent_locked(i->iter, i->level));
1c6fdbd8 559
b182ff60 560 ret = bch2_journal_preres_get(&c->journal,
8b3bbe2c 561 &trans->journal_preres, trans->journal_preres_u64s,
2ca88e5a 562 JOURNAL_RES_GET_NONBLOCK|
2940295c
KO
563 ((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
564 ? JOURNAL_RES_GET_RESERVED : 0));
2a9101a9
KO
565 if (unlikely(ret == -EAGAIN))
566 ret = bch2_trans_journal_preres_get_cold(trans,
8b3bbe2c 567 trans->journal_preres_u64s);
2a9101a9
KO
568 if (unlikely(ret))
569 return ret;
570
571 /*
572 * Can't be holding any read locks when we go to take write locks:
aa8889c0
KO
573 * another thread could be holding an intent lock on the same node we
574 * have a read lock on, and it'll block trying to take a write lock
575 * (because we hold a read lock) and it could be blocking us by holding
576 * its own read lock (while we're trying to to take write locks).
2a9101a9
KO
577 *
578 * note - this must be done after bch2_trans_journal_preres_get_cold()
579 * or anything else that might call bch2_trans_relock(), since that
580 * would just retake the read locks:
581 */
515282ac 582 trans_for_each_iter(trans, iter) {
2a9101a9 583 if (iter->nodes_locked != iter->nodes_intent_locked) {
1f7fdc0a 584 if (btree_iter_keep(trans, iter)) {
aa8889c0
KO
585 if (!bch2_btree_iter_upgrade(iter, 1)) {
586 trace_trans_restart_upgrade(trans->ip);
587 return -EINTR;
588 }
589 } else {
590 bch2_btree_iter_unlock_noinline(iter);
591 }
2a9101a9
KO
592 }
593 }
594
595 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
e3e464ac 596 trans_for_each_update2(trans, i)
6333bd2f 597 btree_insert_entry_checks(trans, i);
2a9101a9
KO
598 bch2_btree_trans_verify_locks(trans);
599
e3e464ac 600 trans_for_each_update2(trans, i)
24326cd1 601 if (!same_leaf_as_prev(trans, i))
b182ff60 602 bch2_btree_node_lock_for_insert(c,
e62d65f2 603 iter_l(i->iter)->b, i->iter);
b7ba66c8 604
2a9101a9 605 ret = bch2_trans_commit_write_locked(trans, stopped_at);
b7ba66c8 606
e3e464ac 607 trans_for_each_update2(trans, i)
24326cd1 608 if (!same_leaf_as_prev(trans, i))
e62d65f2 609 bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
b7ba66c8 610 i->iter);
2a9101a9 611
00b8ccf7 612 if (!ret && trans->journal_pin)
b182ff60 613 bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
00b8ccf7
KO
614 trans->journal_pin, NULL);
615
2a9101a9
KO
616 /*
617 * Drop journal reservation after dropping write locks, since dropping
618 * the journal reservation may kick off a journal write:
619 */
b182ff60 620 bch2_journal_res_put(&c->journal, &trans->journal_res);
2a9101a9
KO
621
622 if (unlikely(ret))
623 return ret;
624
72545b5e 625 bch2_trans_downgrade(trans);
2a9101a9
KO
626
627 return 0;
1c6fdbd8
KO
628}
629
24db24c7
KO
630static int journal_reclaim_wait_done(struct bch_fs *c)
631{
632 int ret;
633
634 ret = bch2_journal_error(&c->journal);
635 if (ret)
636 return ret;
637
638 ret = !bch2_btree_key_cache_must_wait(c);
639 if (ret)
640 return ret;
641
6ae0d16d
KO
642 journal_reclaim_kick(&c->journal);
643
24db24c7
KO
644 if (mutex_trylock(&c->journal.reclaim_lock)) {
645 ret = bch2_journal_reclaim(&c->journal);
646 mutex_unlock(&c->journal.reclaim_lock);
647 }
648
649 if (!ret)
650 ret = !bch2_btree_key_cache_must_wait(c);
651 return ret;
652}
653
11e6f19a
KO
654static noinline
655int bch2_trans_commit_error(struct btree_trans *trans,
656 struct btree_insert_entry *i,
657 int ret)
1c6fdbd8
KO
658{
659 struct bch_fs *c = trans->c;
11e6f19a 660 unsigned flags = trans->flags;
1c6fdbd8
KO
661
662 /*
663 * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
664 * update; if we haven't done anything yet it doesn't apply
665 */
94d290e4 666 flags &= ~BTREE_INSERT_NOUNLOCK;
1c6fdbd8 667
1d25849c
KO
668 switch (ret) {
669 case BTREE_INSERT_BTREE_NODE_FULL:
670 ret = bch2_btree_split_leaf(c, i->iter, flags);
1c6fdbd8
KO
671
672 /*
673 * if the split succeeded without dropping locks the insert will
58e2388f
KO
674 * still be atomic (what the caller peeked() and is overwriting
675 * won't have changed)
1c6fdbd8
KO
676 */
677#if 0
678 /*
679 * XXX:
680 * split -> btree node merging (of parent node) might still drop
681 * locks when we're not passing it BTREE_INSERT_NOUNLOCK
94d290e4
KO
682 *
683 * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
684 * will inhibit merging - but we don't have a reliable way yet
685 * (do we?) of checking if we dropped locks in this path
1c6fdbd8 686 */
94d290e4 687 if (!ret)
1c6fdbd8
KO
688 goto retry;
689#endif
690
691 /*
692 * don't care if we got ENOSPC because we told split it
693 * couldn't block:
694 */
ed8413fd
KO
695 if (!ret ||
696 ret == -EINTR ||
697 (flags & BTREE_INSERT_NOUNLOCK)) {
20bceecb 698 trace_trans_restart_btree_node_split(trans->ip);
1c6fdbd8 699 ret = -EINTR;
1c7a0adf 700 }
1d25849c 701 break;
1d25849c
KO
702 case BTREE_INSERT_ENOSPC:
703 ret = -ENOSPC;
704 break;
705 case BTREE_INSERT_NEED_MARK_REPLICAS:
0dc17247 706 bch2_trans_unlock(trans);
db8a5f0a 707
9c2e6242
KO
708 ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
709 if (ret)
710 return ret;
11e6f19a 711
58fbf808 712 if (bch2_trans_relock(trans))
11e6f19a
KO
713 return 0;
714
20bceecb 715 trace_trans_restart_mark_replicas(trans->ip);
11e6f19a 716 ret = -EINTR;
1d25849c 717 break;
9623ab27 718 case BTREE_INSERT_NEED_JOURNAL_RES:
58fbf808 719 bch2_trans_unlock(trans);
9623ab27 720
2940295c
KO
721 if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
722 !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
723 return -EAGAIN;
724
9623ab27
KO
725 ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
726 if (ret)
11e6f19a 727 return ret;
9623ab27 728
58fbf808 729 if (bch2_trans_relock(trans))
11e6f19a 730 return 0;
9623ab27 731
20bceecb 732 trace_trans_restart_journal_res_get(trans->ip);
9623ab27 733 ret = -EINTR;
d5425a3b
KO
734 break;
735 case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
736 bch2_trans_unlock(trans);
737
24db24c7
KO
738 wait_event(c->journal.reclaim_wait,
739 (ret = journal_reclaim_wait_done(c)));
740 if (ret < 0)
741 return ret;
d5425a3b 742
24db24c7 743 if (bch2_trans_relock(trans))
d5425a3b
KO
744 return 0;
745
746 trace_trans_restart_journal_reclaim(trans->ip);
747 ret = -EINTR;
9623ab27 748 break;
1d25849c
KO
749 default:
750 BUG_ON(ret >= 0);
751 break;
1c6fdbd8
KO
752 }
753
11e6f19a
KO
754 return ret;
755}
756
2a9101a9
KO
757static noinline int
758bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
11e6f19a
KO
759{
760 struct bch_fs *c = trans->c;
11e6f19a
KO
761 int ret;
762
2a9101a9
KO
763 if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
764 return -EROFS;
11e6f19a 765
2a9101a9 766 bch2_trans_unlock(trans);
60755344 767
2a9101a9
KO
768 ret = bch2_fs_read_write_early(c);
769 if (ret)
770 return ret;
11e6f19a 771
2a9101a9
KO
772 percpu_ref_get(&c->writes);
773 return 0;
1c6fdbd8
KO
774}
775
4cfb722c
KO
776static void __bch2_trans_update2(struct btree_trans *trans,
777 struct btree_insert_entry n)
645d72aa 778{
6333bd2f 779 struct btree_insert_entry *i;
e3e464ac 780
6333bd2f 781 btree_insert_entry_checks(trans, &n);
e3e464ac 782
3eb26d01 783 EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
e3e464ac 784
6333bd2f 785 n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
8042b5b7 786
6333bd2f
KO
787 trans_for_each_update2(trans, i)
788 if (btree_insert_entry_cmp(&n, i) <= 0)
e3e464ac 789 break;
e3e464ac 790
6333bd2f
KO
791 if (i < trans->updates2 + trans->nr_updates2 &&
792 !btree_insert_entry_cmp(&n, i))
793 *i = n;
794 else
795 array_insert_item(trans->updates2, trans->nr_updates2,
796 i - trans->updates2, n);
e3e464ac
KO
797}
798
4cfb722c
KO
799static void bch2_trans_update2(struct btree_trans *trans,
800 struct btree_iter *iter,
801 struct bkey_i *insert)
6333bd2f 802{
4cfb722c 803 __bch2_trans_update2(trans, (struct btree_insert_entry) {
6333bd2f
KO
804 .bkey_type = __btree_node_type(iter->level, iter->btree_id),
805 .btree_id = iter->btree_id,
806 .level = iter->level,
807 .iter = iter,
808 .k = insert,
809 });
810}
811
e3e464ac 812static int extent_update_to_keys(struct btree_trans *trans,
6333bd2f 813 struct btree_insert_entry n)
e3e464ac 814{
64f2a880
KO
815 int ret;
816
6333bd2f
KO
817 if (bkey_deleted(&n.k->k))
818 return 0;
819
820 ret = bch2_extent_can_insert(trans, n.iter, n.k);
64f2a880
KO
821 if (ret)
822 return ret;
e3e464ac 823
c7bb769c
KO
824 n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p,
825 BTREE_ITER_INTENT|
826 BTREE_ITER_NOT_EXTENTS);
6333bd2f 827 n.is_extent = false;
e3e464ac 828
4cfb722c 829 __bch2_trans_update2(trans, n);
6333bd2f 830 bch2_trans_iter_put(trans, n.iter);
4cfb722c 831 return 0;
e3e464ac
KO
832}
833
834static int extent_handle_overwrites(struct btree_trans *trans,
835 enum btree_id btree_id,
4cfb722c 836 struct bkey_i *insert)
e3e464ac 837{
dbb93db9 838 struct btree_iter *iter, *update_iter;
4cfb722c 839 struct bpos start = bkey_start_pos(&insert->k);
e3e464ac
KO
840 struct bkey_i *update;
841 struct bkey_s_c k;
842 int ret = 0;
843
4cfb722c
KO
844 iter = bch2_trans_get_iter(trans, btree_id, start,
845 BTREE_ITER_INTENT);
e3e464ac
KO
846 k = bch2_btree_iter_peek_with_updates(iter);
847
848 while (k.k && !(ret = bkey_err(k))) {
4cfb722c 849 if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
e3e464ac
KO
850 break;
851
852 if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
e3e464ac
KO
853 update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
854 if ((ret = PTR_ERR_OR_ZERO(update)))
4cfb722c 855 break;
e3e464ac
KO
856
857 bkey_reassemble(update, k);
4cfb722c 858
e3e464ac
KO
859 bch2_cut_back(start, update);
860
4cfb722c
KO
861 update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
862 BTREE_ITER_NOT_EXTENTS|
863 BTREE_ITER_INTENT);
864 bch2_trans_update2(trans, update_iter, update);
e3e464ac
KO
865 bch2_trans_iter_put(trans, update_iter);
866 }
867
4cfb722c
KO
868 if (bkey_cmp(k.k->p, insert->k.p) < 0 ||
869 (!bkey_cmp(k.k->p, insert->k.p) && bkey_deleted(&insert->k))) {
870 update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
e3e464ac 871 if ((ret = PTR_ERR_OR_ZERO(update)))
4cfb722c 872 break;
e3e464ac 873
4cfb722c
KO
874 bkey_init(&update->k);
875 update->k.p = k.k->p;
e3e464ac 876
4cfb722c
KO
877 update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
878 BTREE_ITER_NOT_EXTENTS|
879 BTREE_ITER_INTENT);
880 bch2_trans_update2(trans, update_iter, update);
e3e464ac 881 bch2_trans_iter_put(trans, update_iter);
4cfb722c
KO
882 }
883
884 if (bkey_cmp(k.k->p, insert->k.p) > 0) {
885 update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
e3e464ac 886 if ((ret = PTR_ERR_OR_ZERO(update)))
4cfb722c 887 break;
e3e464ac 888
4cfb722c
KO
889 bkey_reassemble(update, k);
890 bch2_cut_front(insert->k.p, update);
e3e464ac 891
4cfb722c
KO
892 update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
893 BTREE_ITER_NOT_EXTENTS|
894 BTREE_ITER_INTENT);
895 bch2_trans_update2(trans, update_iter, update);
e3e464ac 896 bch2_trans_iter_put(trans, update_iter);
4cfb722c 897 break;
e3e464ac
KO
898 }
899
900 k = bch2_btree_iter_next_with_updates(iter);
901 }
dbb93db9 902 bch2_trans_iter_put(trans, iter);
4cfb722c 903
e3e464ac
KO
904 return ret;
905}
906
2a9101a9 907int __bch2_trans_commit(struct btree_trans *trans)
1c6fdbd8 908{
d9b022fe 909 struct btree_insert_entry *i = NULL;
24326cd1
KO
910 struct btree_iter *iter;
911 bool trans_trigger_run;
f793fd85 912 unsigned u64s, reset_flags = 0;
dc3b63dc 913 int ret = 0;
1c6fdbd8
KO
914
915 if (!trans->nr_updates)
b7cf4bd7 916 goto out_reset;
1c6fdbd8 917
2a9101a9
KO
918 if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
919 lockdep_assert_held(&trans->c->gc_lock);
11e6f19a 920
9623ab27 921 memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
0dc17247 922
96e2aa1b 923 trans->journal_u64s = trans->extra_journal_entry_u64s;
8b3bbe2c
KO
924 trans->journal_preres_u64s = 0;
925
2a9101a9
KO
926 if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
927 unlikely(!percpu_ref_tryget(&trans->c->writes))) {
928 ret = bch2_trans_commit_get_rw_cold(trans);
134915f3 929 if (ret)
b7cf4bd7 930 goto out_reset;
2a9101a9 931 }
8b3bbe2c 932
2ca88e5a
KO
933#ifdef CONFIG_BCACHEFS_DEBUG
934 trans_for_each_update(trans, i)
935 if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
936 !(i->trigger_flags & BTREE_TRIGGER_NORUN))
937 bch2_btree_key_cache_verify_clean(trans,
6333bd2f 938 i->btree_id, i->k->k.p);
2ca88e5a
KO
939#endif
940
8b3bbe2c 941 /*
24326cd1
KO
942 * Running triggers will append more updates to the list of updates as
943 * we're walking it:
8b3bbe2c 944 */
24326cd1
KO
945 do {
946 trans_trigger_run = false;
947
948 trans_for_each_update(trans, i) {
6333bd2f 949 if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
24326cd1
KO
950 !i->trans_triggers_run) {
951 i->trans_triggers_run = true;
952 trans_trigger_run = true;
953
954 ret = bch2_trans_mark_update(trans, i->iter, i->k,
955 i->trigger_flags);
956 if (unlikely(ret)) {
957 if (ret == -EINTR)
958 trace_trans_restart_mark(trans->ip);
959 goto out;
960 }
961 }
962 }
963 } while (trans_trigger_run);
964
e3e464ac
KO
965 /* Turn extents updates into keys: */
966 trans_for_each_update(trans, i)
6333bd2f 967 if (i->is_extent) {
4cfb722c
KO
968 ret = extent_handle_overwrites(trans, i->btree_id, i->k);
969 if (unlikely(ret))
e3e464ac
KO
970 goto out;
971 }
972
8b3bbe2c 973 trans_for_each_update(trans, i) {
6333bd2f
KO
974 ret = i->is_extent
975 ? extent_update_to_keys(trans, *i)
4cfb722c
KO
976 : (__bch2_trans_update2(trans, *i), 0);
977 if (unlikely(ret))
8042b5b7 978 goto out;
e3e464ac
KO
979 }
980
981 trans_for_each_update2(trans, i) {
9f631dc1
KO
982 ret = bch2_btree_iter_traverse(i->iter);
983 if (unlikely(ret)) {
984 trace_trans_restart_traverse(trans->ip);
985 goto out;
986 }
987
5c1d808a 988 if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) {
6333bd2f
KO
989 trace_trans_restart_upgrade(trans->ip);
990 ret = -EINTR;
991 goto out;
992 }
993
5c1d808a
KO
994 BUG_ON(!btree_node_intent_locked(i->iter, i->level));
995
8b3bbe2c 996 u64s = jset_u64s(i->k->k.u64s);
2ca88e5a
KO
997 if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
998 likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
8b3bbe2c
KO
999 trans->journal_preres_u64s += u64s;
1000 trans->journal_u64s += u64s;
1001 }
2a9101a9
KO
1002retry:
1003 memset(&trans->journal_res, 0, sizeof(trans->journal_res));
134915f3 1004
2a9101a9 1005 ret = do_bch2_trans_commit(trans, &i);
134915f3 1006
2a9101a9
KO
1007 /* make sure we didn't drop or screw up locks: */
1008 bch2_btree_trans_verify_locks(trans);
1009
11e6f19a
KO
1010 if (ret)
1011 goto err;
24326cd1
KO
1012
1013 trans_for_each_iter(trans, iter)
1f7fdc0a 1014 if (btree_iter_live(trans, iter) &&
792e2c4c
KO
1015 (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT))
1016 bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
11e6f19a 1017out:
2a9101a9 1018 bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
9623ab27 1019
2a9101a9
KO
1020 if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
1021 percpu_ref_put(&trans->c->writes);
b7cf4bd7 1022out_reset:
f793fd85
KO
1023 if (!ret)
1024 reset_flags |= TRANS_RESET_NOTRAVERSE;
1025 if (!ret && (trans->flags & BTREE_INSERT_NOUNLOCK))
1026 reset_flags |= TRANS_RESET_NOUNLOCK;
1027 bch2_trans_reset(trans, reset_flags);
8b3bbe2c 1028
0dc17247 1029 return ret;
11e6f19a
KO
1030err:
1031 ret = bch2_trans_commit_error(trans, i, ret);
2a9101a9
KO
1032 if (ret)
1033 goto out;
932aa837 1034
2a9101a9 1035 goto retry;
1c6fdbd8
KO
1036}
1037
24326cd1
KO
1038int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
1039 struct bkey_i *k, enum btree_trigger_flags flags)
1040{
1041 struct btree_insert_entry *i, n = (struct btree_insert_entry) {
6333bd2f
KO
1042 .trigger_flags = flags,
1043 .bkey_type = __btree_node_type(iter->level, iter->btree_id),
1044 .btree_id = iter->btree_id,
1045 .level = iter->level,
1046 .is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0,
1047 .iter = iter,
1048 .k = k
24326cd1
KO
1049 };
1050
6333bd2f
KO
1051 BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
1052
f9ef45ad
KO
1053#ifdef CONFIG_BCACHEFS_DEBUG
1054 BUG_ON(bkey_cmp(iter->pos,
6333bd2f 1055 n.is_extent ? bkey_start_pos(&k->k) : k->k.p));
f9ef45ad
KO
1056
1057 trans_for_each_update(trans, i) {
1058 BUG_ON(bkey_cmp(i->iter->pos,
6333bd2f 1059 i->is_extent ? bkey_start_pos(&i->k->k) : i->k->k.p));
f9ef45ad
KO
1060
1061 BUG_ON(i != trans->updates &&
6333bd2f 1062 btree_insert_entry_cmp(i - 1, i) >= 0);
f9ef45ad
KO
1063 }
1064#endif
24326cd1
KO
1065
1066 iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
1067
6333bd2f 1068 if (n.is_extent) {
24326cd1
KO
1069 iter->pos_after_commit = k->k.p;
1070 iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
1071 }
1072
1073 /*
6333bd2f
KO
1074 * Pending updates are kept sorted: first, find position of new update,
1075 * then delete/trim any updates the new update overwrites:
24326cd1 1076 */
6333bd2f
KO
1077 if (!n.is_extent) {
1078 trans_for_each_update(trans, i)
1079 if (btree_insert_entry_cmp(&n, i) <= 0)
1080 break;
24326cd1 1081
6333bd2f
KO
1082 if (i < trans->updates + trans->nr_updates &&
1083 !btree_insert_entry_cmp(&n, i))
1084 *i = n;
1085 else
1086 array_insert_item(trans->updates, trans->nr_updates,
1087 i - trans->updates, n);
1088 } else {
1089 trans_for_each_update(trans, i)
1090 if (btree_insert_entry_cmp(&n, i) < 0)
1091 break;
1092
1093 while (i > trans->updates &&
1094 i[-1].btree_id == n.btree_id &&
1095 bkey_cmp(bkey_start_pos(&n.k->k),
1096 bkey_start_pos(&i[-1].k->k)) <= 0) {
1097 --i;
1098 array_remove_item(trans->updates, trans->nr_updates,
1099 i - trans->updates);
1100 }
1101
1102 if (i > trans->updates &&
1103 i[-1].btree_id == n.btree_id &&
1104 bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0)
1105 bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k);
1106
1107 if (i < trans->updates + trans->nr_updates &&
1108 i->btree_id == n.btree_id &&
1109 bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) {
1110 /* We don't handle splitting extents here: */
1111 BUG_ON(bkey_cmp(bkey_start_pos(&n.k->k),
1112 bkey_start_pos(&i->k->k)) > 0);
24326cd1 1113
6333bd2f
KO
1114 /*
1115 * When we have an extent that overwrites the start of another
1116 * update, trimming that extent will mean the iterator's
1117 * position has to change since the iterator position has to
1118 * match the extent's start pos - but we don't want to change
1119 * the iterator pos if some other code is using it, so we may
1120 * need to clone it:
1121 */
1f7fdc0a 1122 if (btree_iter_live(trans, i->iter)) {
6333bd2f
KO
1123 i->iter = bch2_trans_copy_iter(trans, i->iter);
1124
1125 i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
1126 bch2_trans_iter_put(trans, i->iter);
1127 }
1128
1129 bch2_cut_front(n.k->k.p, i->k);
1130 bch2_btree_iter_set_pos(i->iter, n.k->k.p);
24326cd1
KO
1131 }
1132
6333bd2f
KO
1133 array_insert_item(trans->updates, trans->nr_updates,
1134 i - trans->updates, n);
24326cd1
KO
1135 }
1136
24326cd1
KO
1137 return 0;
1138}
1139
43d00243
KO
1140void bch2_trans_commit_hook(struct btree_trans *trans,
1141 struct btree_trans_commit_hook *h)
1142{
1143 h->next = trans->hooks;
1144 trans->hooks = h;
1145}
1146
163e885a
KO
1147int __bch2_btree_insert(struct btree_trans *trans,
1148 enum btree_id id, struct bkey_i *k)
b1fd23df
KO
1149{
1150 struct btree_iter *iter;
163e885a 1151 int ret;
b1fd23df
KO
1152
1153 iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k),
1154 BTREE_ITER_INTENT);
b1fd23df 1155
163e885a
KO
1156 ret = bch2_btree_iter_traverse(iter) ?:
1157 bch2_trans_update(trans, iter, k, 0);
1158 bch2_trans_iter_put(trans, iter);
1159 return ret;
b1fd23df
KO
1160}
1161
1c6fdbd8 1162/**
9623ab27 1163 * bch2_btree_insert - insert keys into the extent btree
1c6fdbd8
KO
1164 * @c: pointer to struct bch_fs
1165 * @id: btree to insert into
1166 * @insert_keys: list of keys to insert
1167 * @hook: insert callback
1168 */
1169int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
b1fd23df
KO
1170 struct bkey_i *k,
1171 struct disk_reservation *disk_res,
1172 u64 *journal_seq, int flags)
1c6fdbd8 1173{
b1fd23df
KO
1174 return bch2_trans_do(c, disk_res, journal_seq, flags,
1175 __bch2_btree_insert(&trans, id, k));
1c6fdbd8
KO
1176}
1177
087c2019
KO
1178int bch2_btree_delete_at(struct btree_trans *trans,
1179 struct btree_iter *iter, unsigned flags)
1180{
1181 struct bkey_i k;
1182
1183 bkey_init(&k.k);
1184 k.k.p = iter->pos;
1185
1186 bch2_trans_update(trans, iter, &k, 0);
1187 return bch2_trans_commit(trans, NULL, NULL,
3187aa8d 1188 BTREE_INSERT_NOFAIL|flags);
087c2019
KO
1189}
1190
1191int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
1192 struct bpos start, struct bpos end,
1193 u64 *journal_seq)
1c6fdbd8 1194{
087c2019 1195 struct btree_iter *iter;
1c6fdbd8
KO
1196 struct bkey_s_c k;
1197 int ret = 0;
087c2019
KO
1198
1199 iter = bch2_trans_get_iter(trans, id, start, BTREE_ITER_INTENT);
17758a6c 1200retry:
0564b167 1201 while ((k = bch2_btree_iter_peek(iter)).k &&
0f238367 1202 !(ret = bkey_err(k)) &&
0564b167 1203 bkey_cmp(iter->pos, end) < 0) {
1c6fdbd8
KO
1204 struct bkey_i delete;
1205
163e885a 1206 bch2_trans_begin(trans);
a8abd3a7 1207
1c6fdbd8
KO
1208 bkey_init(&delete.k);
1209
087c2019
KO
1210 /*
1211 * This could probably be more efficient for extents:
1212 */
1213
1c6fdbd8
KO
1214 /*
1215 * For extents, iter.pos won't necessarily be the same as
1216 * bkey_start_pos(k.k) (for non extents they always will be the
1217 * same). It's important that we delete starting from iter.pos
1218 * because the range we want to delete could start in the middle
1219 * of k.
1220 *
1221 * (bch2_btree_iter_peek() does guarantee that iter.pos >=
1222 * bkey_start_pos(k.k)).
1223 */
0564b167 1224 delete.k.p = iter->pos;
1c6fdbd8 1225
c4a94ae3 1226 if (btree_node_type_is_extents(iter->btree_id)) {
17758a6c
KO
1227 unsigned max_sectors =
1228 KEY_SIZE_MAX & (~0 << trans->c->block_bits);
1229
1c6fdbd8
KO
1230 /* create the biggest key we can */
1231 bch2_key_resize(&delete.k, max_sectors);
085ab693 1232 bch2_cut_back(end, &delete);
3c7f3b7a
KO
1233
1234 ret = bch2_extent_trim_atomic(&delete, iter);
1235 if (ret)
1236 break;
1c6fdbd8
KO
1237 }
1238
2d594dfb 1239 bch2_trans_update(trans, iter, &delete, 0);
17758a6c 1240 ret = bch2_trans_commit(trans, NULL, journal_seq,
0564b167 1241 BTREE_INSERT_NOFAIL);
1c6fdbd8
KO
1242 if (ret)
1243 break;
1244
17758a6c 1245 bch2_trans_cond_resched(trans);
1c6fdbd8
KO
1246 }
1247
17758a6c
KO
1248 if (ret == -EINTR) {
1249 ret = 0;
1250 goto retry;
1251 }
1252
a84b6c50 1253 bch2_trans_iter_free(trans, iter);
17758a6c 1254 return ret;
17758a6c
KO
1255}
1256
1257/*
1258 * bch_btree_delete_range - delete everything within a given range
1259 *
1260 * Range is a half open interval - [start, end)
1261 */
1262int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
1263 struct bpos start, struct bpos end,
1264 u64 *journal_seq)
1265{
087c2019
KO
1266 return bch2_trans_do(c, NULL, journal_seq, 0,
1267 bch2_btree_delete_range_trans(&trans, id, start, end, journal_seq));
1c6fdbd8 1268}