Commit | Line | Data |
---|---|---|
8079aab0 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
4 | #include "btree_update.h" | |
5 | #include "btree_iter.h" | |
401585fe | 6 | #include "btree_journal_iter.h" |
8079aab0 KO |
7 | #include "btree_locking.h" |
8 | #include "buckets.h" | |
9 | #include "debug.h" | |
10 | #include "errcode.h" | |
11 | #include "error.h" | |
12 | #include "extents.h" | |
13 | #include "keylist.h" | |
8e877caa | 14 | #include "snapshot.h" |
8079aab0 KO |
15 | #include "trace.h" |
16 | ||
17 | static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, | |
18 | const struct btree_insert_entry *r) | |
19 | { | |
20 | return cmp_int(l->btree_id, r->btree_id) ?: | |
21 | cmp_int(l->cached, r->cached) ?: | |
22 | -cmp_int(l->level, r->level) ?: | |
23 | bpos_cmp(l->k->k.p, r->k->k.p); | |
24 | } | |
25 | ||
26 | static int __must_check | |
7f9821a7 | 27 | bch2_trans_update_by_path(struct btree_trans *, btree_path_idx_t, |
5dd8c60e | 28 | struct bkey_i *, enum btree_iter_update_trigger_flags, |
8079aab0 KO |
29 | unsigned long ip); |
30 | ||
8079aab0 KO |
31 | static noinline int extent_front_merge(struct btree_trans *trans, |
32 | struct btree_iter *iter, | |
33 | struct bkey_s_c k, | |
34 | struct bkey_i **insert, | |
5dd8c60e | 35 | enum btree_iter_update_trigger_flags flags) |
8079aab0 KO |
36 | { |
37 | struct bch_fs *c = trans->c; | |
38 | struct bkey_i *update; | |
39 | int ret; | |
40 | ||
57339b24 KO |
41 | if (unlikely(trans->journal_replay_not_finished)) |
42 | return 0; | |
43 | ||
8079aab0 KO |
44 | update = bch2_bkey_make_mut_noupdate(trans, k); |
45 | ret = PTR_ERR_OR_ZERO(update); | |
46 | if (ret) | |
47 | return ret; | |
48 | ||
49 | if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) | |
50 | return 0; | |
51 | ||
fa5bed37 KO |
52 | ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: |
53 | bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); | |
8079aab0 KO |
54 | if (ret < 0) |
55 | return ret; | |
56 | if (ret) | |
57 | return 0; | |
58 | ||
59 | ret = bch2_btree_delete_at(trans, iter, flags); | |
60 | if (ret) | |
61 | return ret; | |
62 | ||
63 | *insert = update; | |
64 | return 0; | |
65 | } | |
66 | ||
67 | static noinline int extent_back_merge(struct btree_trans *trans, | |
68 | struct btree_iter *iter, | |
69 | struct bkey_i *insert, | |
70 | struct bkey_s_c k) | |
71 | { | |
72 | struct bch_fs *c = trans->c; | |
73 | int ret; | |
74 | ||
57339b24 KO |
75 | if (unlikely(trans->journal_replay_not_finished)) |
76 | return 0; | |
77 | ||
fa5bed37 KO |
78 | ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: |
79 | bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); | |
8079aab0 KO |
80 | if (ret < 0) |
81 | return ret; | |
82 | if (ret) | |
83 | return 0; | |
84 | ||
85 | bch2_bkey_merge(c, bkey_i_to_s(insert), k); | |
86 | return 0; | |
87 | } | |
88 | ||
89 | /* | |
90 | * When deleting, check if we need to emit a whiteout (because we're overwriting | |
91 | * something in an ancestor snapshot) | |
92 | */ | |
93 | static int need_whiteout_for_snapshot(struct btree_trans *trans, | |
94 | enum btree_id btree_id, struct bpos pos) | |
95 | { | |
96 | struct btree_iter iter; | |
97 | struct bkey_s_c k; | |
98 | u32 snapshot = pos.snapshot; | |
99 | int ret; | |
100 | ||
101 | if (!bch2_snapshot_parent(trans->c, pos.snapshot)) | |
102 | return 0; | |
103 | ||
104 | pos.snapshot++; | |
105 | ||
106 | for_each_btree_key_norestart(trans, iter, btree_id, pos, | |
5dd8c60e KO |
107 | BTREE_ITER_all_snapshots| |
108 | BTREE_ITER_nopreserve, k, ret) { | |
8079aab0 KO |
109 | if (!bkey_eq(k.k->p, pos)) |
110 | break; | |
111 | ||
112 | if (bch2_snapshot_is_ancestor(trans->c, snapshot, | |
113 | k.k->p.snapshot)) { | |
114 | ret = !bkey_whiteout(k.k); | |
115 | break; | |
116 | } | |
117 | } | |
118 | bch2_trans_iter_exit(trans, &iter); | |
119 | ||
120 | return ret; | |
121 | } | |
122 | ||
123 | int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, | |
124 | enum btree_id id, | |
125 | struct bpos old_pos, | |
126 | struct bpos new_pos) | |
127 | { | |
128 | struct bch_fs *c = trans->c; | |
129 | struct btree_iter old_iter, new_iter = { NULL }; | |
130 | struct bkey_s_c old_k, new_k; | |
131 | snapshot_id_list s; | |
132 | struct bkey_i *update; | |
40a53b92 | 133 | int ret = 0; |
8079aab0 KO |
134 | |
135 | if (!bch2_snapshot_has_children(c, old_pos.snapshot)) | |
136 | return 0; | |
137 | ||
138 | darray_init(&s); | |
139 | ||
140 | bch2_trans_iter_init(trans, &old_iter, id, old_pos, | |
5dd8c60e KO |
141 | BTREE_ITER_not_extents| |
142 | BTREE_ITER_all_snapshots); | |
8079aab0 KO |
143 | while ((old_k = bch2_btree_iter_prev(&old_iter)).k && |
144 | !(ret = bkey_err(old_k)) && | |
145 | bkey_eq(old_pos, old_k.k->p)) { | |
146 | struct bpos whiteout_pos = | |
147 | SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; | |
148 | ||
149 | if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || | |
150 | snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) | |
151 | continue; | |
152 | ||
153 | new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos, | |
5dd8c60e KO |
154 | BTREE_ITER_not_extents| |
155 | BTREE_ITER_intent); | |
8079aab0 KO |
156 | ret = bkey_err(new_k); |
157 | if (ret) | |
158 | break; | |
159 | ||
160 | if (new_k.k->type == KEY_TYPE_deleted) { | |
161 | update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); | |
162 | ret = PTR_ERR_OR_ZERO(update); | |
163 | if (ret) | |
164 | break; | |
165 | ||
166 | bkey_init(&update->k); | |
167 | update->k.p = whiteout_pos; | |
168 | update->k.type = KEY_TYPE_whiteout; | |
169 | ||
170 | ret = bch2_trans_update(trans, &new_iter, update, | |
5dd8c60e | 171 | BTREE_UPDATE_internal_snapshot_node); |
8079aab0 KO |
172 | } |
173 | bch2_trans_iter_exit(trans, &new_iter); | |
174 | ||
175 | ret = snapshot_list_add(c, &s, old_k.k->p.snapshot); | |
176 | if (ret) | |
177 | break; | |
178 | } | |
179 | bch2_trans_iter_exit(trans, &new_iter); | |
180 | bch2_trans_iter_exit(trans, &old_iter); | |
181 | darray_exit(&s); | |
182 | ||
183 | return ret; | |
184 | } | |
185 | ||
186 | int bch2_trans_update_extent_overwrite(struct btree_trans *trans, | |
187 | struct btree_iter *iter, | |
5dd8c60e | 188 | enum btree_iter_update_trigger_flags flags, |
8079aab0 KO |
189 | struct bkey_s_c old, |
190 | struct bkey_s_c new) | |
191 | { | |
192 | enum btree_id btree_id = iter->btree_id; | |
193 | struct bkey_i *update; | |
194 | struct bpos new_start = bkey_start_pos(new.k); | |
01db5e5f KO |
195 | unsigned front_split = bkey_lt(bkey_start_pos(old.k), new_start); |
196 | unsigned back_split = bkey_gt(old.k->p, new.k->p); | |
197 | unsigned middle_split = (front_split || back_split) && | |
198 | old.k->p.snapshot != new.k->p.snapshot; | |
199 | unsigned nr_splits = front_split + back_split + middle_split; | |
8079aab0 KO |
200 | int ret = 0, compressed_sectors; |
201 | ||
202 | /* | |
203 | * If we're going to be splitting a compressed extent, note it | |
204 | * so that __bch2_trans_commit() can increase our disk | |
205 | * reservation: | |
206 | */ | |
01db5e5f | 207 | if (nr_splits > 1 && |
8079aab0 | 208 | (compressed_sectors = bch2_bkey_sectors_compressed(old))) |
6474b706 | 209 | trans->extra_disk_res += compressed_sectors * (nr_splits - 1); |
8079aab0 KO |
210 | |
211 | if (front_split) { | |
212 | update = bch2_bkey_make_mut_noupdate(trans, old); | |
213 | if ((ret = PTR_ERR_OR_ZERO(update))) | |
214 | return ret; | |
215 | ||
216 | bch2_cut_back(new_start, update); | |
217 | ||
218 | ret = bch2_insert_snapshot_whiteouts(trans, btree_id, | |
219 | old.k->p, update->k.p) ?: | |
220 | bch2_btree_insert_nonextent(trans, btree_id, update, | |
5dd8c60e | 221 | BTREE_UPDATE_internal_snapshot_node|flags); |
8079aab0 KO |
222 | if (ret) |
223 | return ret; | |
224 | } | |
225 | ||
226 | /* If we're overwriting in a different snapshot - middle split: */ | |
01db5e5f | 227 | if (middle_split) { |
8079aab0 KO |
228 | update = bch2_bkey_make_mut_noupdate(trans, old); |
229 | if ((ret = PTR_ERR_OR_ZERO(update))) | |
230 | return ret; | |
231 | ||
232 | bch2_cut_front(new_start, update); | |
233 | bch2_cut_back(new.k->p, update); | |
234 | ||
235 | ret = bch2_insert_snapshot_whiteouts(trans, btree_id, | |
236 | old.k->p, update->k.p) ?: | |
237 | bch2_btree_insert_nonextent(trans, btree_id, update, | |
5dd8c60e | 238 | BTREE_UPDATE_internal_snapshot_node|flags); |
8079aab0 KO |
239 | if (ret) |
240 | return ret; | |
241 | } | |
242 | ||
243 | if (bkey_le(old.k->p, new.k->p)) { | |
244 | update = bch2_trans_kmalloc(trans, sizeof(*update)); | |
245 | if ((ret = PTR_ERR_OR_ZERO(update))) | |
246 | return ret; | |
247 | ||
248 | bkey_init(&update->k); | |
249 | update->k.p = old.k->p; | |
250 | update->k.p.snapshot = new.k->p.snapshot; | |
251 | ||
252 | if (new.k->p.snapshot != old.k->p.snapshot) { | |
253 | update->k.type = KEY_TYPE_whiteout; | |
254 | } else if (btree_type_has_snapshots(btree_id)) { | |
255 | ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p); | |
256 | if (ret < 0) | |
257 | return ret; | |
258 | if (ret) | |
259 | update->k.type = KEY_TYPE_whiteout; | |
260 | } | |
261 | ||
262 | ret = bch2_btree_insert_nonextent(trans, btree_id, update, | |
5dd8c60e | 263 | BTREE_UPDATE_internal_snapshot_node|flags); |
8079aab0 KO |
264 | if (ret) |
265 | return ret; | |
266 | } | |
267 | ||
268 | if (back_split) { | |
269 | update = bch2_bkey_make_mut_noupdate(trans, old); | |
270 | if ((ret = PTR_ERR_OR_ZERO(update))) | |
271 | return ret; | |
272 | ||
273 | bch2_cut_front(new.k->p, update); | |
274 | ||
7f9821a7 | 275 | ret = bch2_trans_update_by_path(trans, iter->path, update, |
5dd8c60e | 276 | BTREE_UPDATE_internal_snapshot_node| |
8079aab0 KO |
277 | flags, _RET_IP_); |
278 | if (ret) | |
279 | return ret; | |
280 | } | |
281 | ||
282 | return 0; | |
283 | } | |
284 | ||
285 | static int bch2_trans_update_extent(struct btree_trans *trans, | |
286 | struct btree_iter *orig_iter, | |
287 | struct bkey_i *insert, | |
5dd8c60e | 288 | enum btree_iter_update_trigger_flags flags) |
8079aab0 KO |
289 | { |
290 | struct btree_iter iter; | |
291 | struct bkey_s_c k; | |
292 | enum btree_id btree_id = orig_iter->btree_id; | |
293 | int ret = 0; | |
294 | ||
295 | bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k), | |
5dd8c60e KO |
296 | BTREE_ITER_intent| |
297 | BTREE_ITER_with_updates| | |
298 | BTREE_ITER_not_extents); | |
8079aab0 KO |
299 | k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); |
300 | if ((ret = bkey_err(k))) | |
301 | goto err; | |
302 | if (!k.k) | |
303 | goto out; | |
304 | ||
305 | if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) { | |
306 | if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { | |
307 | ret = extent_front_merge(trans, &iter, k, &insert, flags); | |
308 | if (ret) | |
309 | goto err; | |
310 | } | |
311 | ||
312 | goto next; | |
313 | } | |
314 | ||
315 | while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) { | |
316 | bool done = bkey_lt(insert->k.p, k.k->p); | |
317 | ||
318 | ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); | |
319 | if (ret) | |
320 | goto err; | |
321 | ||
322 | if (done) | |
323 | goto out; | |
324 | next: | |
325 | bch2_btree_iter_advance(&iter); | |
326 | k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); | |
327 | if ((ret = bkey_err(k))) | |
328 | goto err; | |
329 | if (!k.k) | |
330 | goto out; | |
331 | } | |
332 | ||
333 | if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { | |
334 | ret = extent_back_merge(trans, &iter, insert, k); | |
335 | if (ret) | |
336 | goto err; | |
337 | } | |
338 | out: | |
339 | if (!bkey_deleted(&insert->k)) | |
340 | ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags); | |
341 | err: | |
342 | bch2_trans_iter_exit(trans, &iter); | |
343 | ||
344 | return ret; | |
345 | } | |
346 | ||
347 | static noinline int flush_new_cached_update(struct btree_trans *trans, | |
8079aab0 | 348 | struct btree_insert_entry *i, |
5dd8c60e | 349 | enum btree_iter_update_trigger_flags flags, |
8079aab0 KO |
350 | unsigned long ip) |
351 | { | |
8079aab0 KO |
352 | struct bkey k; |
353 | int ret; | |
354 | ||
255ebbbf | 355 | btree_path_idx_t path_idx = |
7f9821a7 | 356 | bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0, |
5dd8c60e | 357 | BTREE_ITER_intent, _THIS_IP_); |
96ed47d1 | 358 | ret = bch2_btree_path_traverse(trans, path_idx, 0); |
8079aab0 KO |
359 | if (ret) |
360 | goto out; | |
361 | ||
255ebbbf KO |
362 | struct btree_path *btree_path = trans->paths + path_idx; |
363 | ||
8079aab0 KO |
364 | /* |
365 | * The old key in the insert entry might actually refer to an existing | |
366 | * key in the btree that has been deleted from cache and not yet | |
367 | * flushed. Check for this and skip the flush so we don't run triggers | |
368 | * against a stale key. | |
369 | */ | |
370 | bch2_btree_path_peek_slot_exact(btree_path, &k); | |
371 | if (!bkey_deleted(&k)) | |
372 | goto out; | |
373 | ||
374 | i->key_cache_already_flushed = true; | |
5dd8c60e | 375 | i->flags |= BTREE_TRIGGER_norun; |
8079aab0 KO |
376 | |
377 | btree_path_set_should_be_locked(btree_path); | |
7f9821a7 | 378 | ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip); |
8079aab0 | 379 | out: |
96ed47d1 | 380 | bch2_path_put(trans, path_idx, true); |
8079aab0 KO |
381 | return ret; |
382 | } | |
383 | ||
384 | static int __must_check | |
7f9821a7 | 385 | bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, |
5dd8c60e | 386 | struct bkey_i *k, enum btree_iter_update_trigger_flags flags, |
8079aab0 KO |
387 | unsigned long ip) |
388 | { | |
389 | struct bch_fs *c = trans->c; | |
390 | struct btree_insert_entry *i, n; | |
8079aab0 KO |
391 | int cmp; |
392 | ||
7f9821a7 | 393 | struct btree_path *path = trans->paths + path_idx; |
8079aab0 | 394 | EBUG_ON(!path->should_be_locked); |
2c3b0fc3 | 395 | EBUG_ON(trans->nr_updates >= trans->nr_paths); |
8079aab0 KO |
396 | EBUG_ON(!bpos_eq(k->k.p, path->pos)); |
397 | ||
8079aab0 KO |
398 | n = (struct btree_insert_entry) { |
399 | .flags = flags, | |
400 | .bkey_type = __btree_node_type(path->level, path->btree_id), | |
401 | .btree_id = path->btree_id, | |
402 | .level = path->level, | |
403 | .cached = path->cached, | |
7f9821a7 | 404 | .path = path_idx, |
8079aab0 | 405 | .k = k, |
8079aab0 KO |
406 | .ip_allocated = ip, |
407 | }; | |
408 | ||
409 | #ifdef CONFIG_BCACHEFS_DEBUG | |
410 | trans_for_each_update(trans, i) | |
411 | BUG_ON(i != trans->updates && | |
412 | btree_insert_entry_cmp(i - 1, i) >= 0); | |
413 | #endif | |
414 | ||
415 | /* | |
416 | * Pending updates are kept sorted: first, find position of new update, | |
417 | * then delete/trim any updates the new update overwrites: | |
418 | */ | |
559e6c23 | 419 | for (i = trans->updates; i < trans->updates + trans->nr_updates; i++) { |
8079aab0 KO |
420 | cmp = btree_insert_entry_cmp(&n, i); |
421 | if (cmp <= 0) | |
422 | break; | |
423 | } | |
424 | ||
425 | if (!cmp && i < trans->updates + trans->nr_updates) { | |
426 | EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); | |
427 | ||
7f9821a7 | 428 | bch2_path_put(trans, i->path, true); |
8079aab0 KO |
429 | i->flags = n.flags; |
430 | i->cached = n.cached; | |
431 | i->k = n.k; | |
432 | i->path = n.path; | |
8079aab0 KO |
433 | i->ip_allocated = n.ip_allocated; |
434 | } else { | |
435 | array_insert_item(trans->updates, trans->nr_updates, | |
436 | i - trans->updates, n); | |
437 | ||
438 | i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v; | |
439 | i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; | |
440 | ||
441 | if (unlikely(trans->journal_replay_not_finished)) { | |
442 | struct bkey_i *j_k = | |
443 | bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); | |
444 | ||
445 | if (j_k) { | |
446 | i->old_k = j_k->k; | |
447 | i->old_v = &j_k->v; | |
448 | } | |
449 | } | |
450 | } | |
451 | ||
7f9821a7 | 452 | __btree_path_get(trans->paths + i->path, true); |
8079aab0 KO |
453 | |
454 | /* | |
455 | * If a key is present in the key cache, it must also exist in the | |
456 | * btree - this is necessary for cache coherency. When iterating over | |
457 | * a btree that's cached in the key cache, the btree iter code checks | |
458 | * the key cache - but the key has to exist in the btree for that to | |
459 | * work: | |
460 | */ | |
b3f8e711 | 461 | if (path->cached && !i->old_btree_u64s) |
7f9821a7 | 462 | return flush_new_cached_update(trans, i, flags, ip); |
8079aab0 KO |
463 | |
464 | return 0; | |
465 | } | |
466 | ||
cbf57db5 KO |
467 | static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, |
468 | struct btree_iter *iter, | |
469 | struct btree_path *path) | |
470 | { | |
07f383c7 KO |
471 | struct btree_path *key_cache_path = btree_iter_key_cache_path(trans, iter); |
472 | ||
473 | if (!key_cache_path || | |
474 | !key_cache_path->should_be_locked || | |
475 | !bpos_eq(key_cache_path->pos, iter->pos)) { | |
cbf57db5 KO |
476 | struct bkey_cached *ck; |
477 | int ret; | |
478 | ||
479 | if (!iter->key_cache_path) | |
07f383c7 | 480 | iter->key_cache_path = |
cbf57db5 | 481 | bch2_path_get(trans, path->btree_id, path->pos, 1, 0, |
5dd8c60e KO |
482 | BTREE_ITER_intent| |
483 | BTREE_ITER_cached, _THIS_IP_); | |
cbf57db5 | 484 | |
07f383c7 KO |
485 | iter->key_cache_path = |
486 | bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, | |
5dd8c60e | 487 | iter->flags & BTREE_ITER_intent, |
cbf57db5 KO |
488 | _THIS_IP_); |
489 | ||
5dd8c60e | 490 | ret = bch2_btree_path_traverse(trans, iter->key_cache_path, BTREE_ITER_cached); |
cbf57db5 KO |
491 | if (unlikely(ret)) |
492 | return ret; | |
493 | ||
07f383c7 | 494 | ck = (void *) trans->paths[iter->key_cache_path].l[0].b; |
cbf57db5 KO |
495 | |
496 | if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { | |
497 | trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); | |
498 | return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); | |
499 | } | |
500 | ||
07f383c7 | 501 | btree_path_set_should_be_locked(trans->paths + iter->key_cache_path); |
cbf57db5 KO |
502 | } |
503 | ||
504 | return 0; | |
505 | } | |
506 | ||
8079aab0 | 507 | int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, |
5dd8c60e | 508 | struct bkey_i *k, enum btree_iter_update_trigger_flags flags) |
8079aab0 | 509 | { |
7f9821a7 | 510 | btree_path_idx_t path_idx = iter->update_path ?: iter->path; |
8079aab0 KO |
511 | int ret; |
512 | ||
5dd8c60e | 513 | if (iter->flags & BTREE_ITER_is_extents) |
8079aab0 KO |
514 | return bch2_trans_update_extent(trans, iter, k, flags); |
515 | ||
516 | if (bkey_deleted(&k->k) && | |
5dd8c60e KO |
517 | !(flags & BTREE_UPDATE_key_cache_reclaim) && |
518 | (iter->flags & BTREE_ITER_filter_snapshots)) { | |
8079aab0 KO |
519 | ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); |
520 | if (unlikely(ret < 0)) | |
521 | return ret; | |
522 | ||
523 | if (ret) | |
524 | k->k.type = KEY_TYPE_whiteout; | |
525 | } | |
526 | ||
527 | /* | |
528 | * Ensure that updates to cached btrees go to the key cache: | |
529 | */ | |
7f9821a7 | 530 | struct btree_path *path = trans->paths + path_idx; |
5dd8c60e | 531 | if (!(flags & BTREE_UPDATE_key_cache_reclaim) && |
8079aab0 KO |
532 | !path->cached && |
533 | !path->level && | |
534 | btree_id_cached(trans->c, path->btree_id)) { | |
cbf57db5 KO |
535 | ret = bch2_trans_update_get_key_cache(trans, iter, path); |
536 | if (ret) | |
537 | return ret; | |
8079aab0 | 538 | |
7f9821a7 | 539 | path_idx = iter->key_cache_path; |
8079aab0 KO |
540 | } |
541 | ||
7f9821a7 | 542 | return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_); |
8079aab0 KO |
543 | } |
544 | ||
67997234 KO |
545 | int bch2_btree_insert_clone_trans(struct btree_trans *trans, |
546 | enum btree_id btree, | |
547 | struct bkey_i *k) | |
ef6fae4a KO |
548 | { |
549 | struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); | |
550 | int ret = PTR_ERR_OR_ZERO(n); | |
551 | if (ret) | |
552 | return ret; | |
553 | ||
554 | bkey_copy(n, k); | |
555 | return bch2_btree_insert_trans(trans, btree, n, 0); | |
556 | } | |
557 | ||
24de63da KO |
558 | struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) |
559 | { | |
560 | unsigned new_top = trans->journal_entries_u64s + u64s; | |
561 | unsigned old_size = trans->journal_entries_size; | |
562 | ||
563 | if (new_top > trans->journal_entries_size) { | |
564 | trans->journal_entries_size = roundup_pow_of_two(new_top); | |
565 | ||
83322e8c | 566 | btree_trans_stats(trans)->journal_entries_size = trans->journal_entries_size; |
24de63da KO |
567 | } |
568 | ||
569 | struct jset_entry *n = | |
570 | bch2_trans_kmalloc_nomemzero(trans, | |
571 | trans->journal_entries_size * sizeof(u64)); | |
572 | if (IS_ERR(n)) | |
573 | return ERR_CAST(n); | |
574 | ||
575 | if (trans->journal_entries) | |
576 | memcpy(n, trans->journal_entries, old_size * sizeof(u64)); | |
577 | trans->journal_entries = n; | |
578 | ||
579 | struct jset_entry *e = btree_trans_journal_entries_top(trans); | |
580 | trans->journal_entries_u64s = new_top; | |
581 | return e; | |
582 | } | |
583 | ||
8079aab0 KO |
584 | int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, |
585 | enum btree_id btree, struct bpos end) | |
586 | { | |
587 | struct bkey_s_c k; | |
588 | int ret = 0; | |
589 | ||
5dd8c60e | 590 | bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent); |
8079aab0 KO |
591 | k = bch2_btree_iter_prev(iter); |
592 | ret = bkey_err(k); | |
593 | if (ret) | |
594 | goto err; | |
595 | ||
596 | bch2_btree_iter_advance(iter); | |
597 | k = bch2_btree_iter_peek_slot(iter); | |
598 | ret = bkey_err(k); | |
599 | if (ret) | |
600 | goto err; | |
601 | ||
602 | BUG_ON(k.k->type != KEY_TYPE_deleted); | |
603 | ||
604 | if (bkey_gt(k.k->p, end)) { | |
605 | ret = -BCH_ERR_ENOSPC_btree_slot; | |
606 | goto err; | |
607 | } | |
608 | ||
609 | return 0; | |
610 | err: | |
611 | bch2_trans_iter_exit(trans, iter); | |
612 | return ret; | |
613 | } | |
614 | ||
615 | void bch2_trans_commit_hook(struct btree_trans *trans, | |
616 | struct btree_trans_commit_hook *h) | |
617 | { | |
618 | h->next = trans->hooks; | |
619 | trans->hooks = h; | |
620 | } | |
621 | ||
622 | int bch2_btree_insert_nonextent(struct btree_trans *trans, | |
623 | enum btree_id btree, struct bkey_i *k, | |
5dd8c60e | 624 | enum btree_iter_update_trigger_flags flags) |
8079aab0 KO |
625 | { |
626 | struct btree_iter iter; | |
627 | int ret; | |
628 | ||
629 | bch2_trans_iter_init(trans, &iter, btree, k->k.p, | |
5dd8c60e KO |
630 | BTREE_ITER_cached| |
631 | BTREE_ITER_not_extents| | |
632 | BTREE_ITER_intent); | |
8079aab0 KO |
633 | ret = bch2_btree_iter_traverse(&iter) ?: |
634 | bch2_trans_update(trans, &iter, k, flags); | |
635 | bch2_trans_iter_exit(trans, &iter); | |
636 | return ret; | |
637 | } | |
638 | ||
aef32bf7 | 639 | int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, |
5dd8c60e | 640 | struct bkey_i *k, enum btree_iter_update_trigger_flags flags) |
8079aab0 KO |
641 | { |
642 | struct btree_iter iter; | |
8079aab0 | 643 | bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), |
65bd4423 KO |
644 | BTREE_ITER_intent|flags); |
645 | int ret = bch2_btree_iter_traverse(&iter) ?: | |
646 | bch2_trans_update(trans, &iter, k, flags); | |
8079aab0 KO |
647 | bch2_trans_iter_exit(trans, &iter); |
648 | return ret; | |
649 | } | |
650 | ||
651 | /** | |
652 | * bch2_btree_insert - insert keys into the extent btree | |
653 | * @c: pointer to struct bch_fs | |
654 | * @id: btree to insert into | |
96dea3d5 KO |
655 | * @k: key to insert |
656 | * @disk_res: must be non-NULL whenever inserting or potentially | |
657 | * splitting data extents | |
658 | * @flags: transaction commit flags | |
659 | * | |
660 | * Returns: 0 on success, error code on failure | |
8079aab0 | 661 | */ |
96dea3d5 KO |
662 | int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, |
663 | struct disk_reservation *disk_res, int flags) | |
8079aab0 | 664 | { |
96dea3d5 | 665 | return bch2_trans_do(c, disk_res, NULL, flags, |
6bd68ec2 | 666 | bch2_btree_insert_trans(trans, id, k, 0)); |
8079aab0 KO |
667 | } |
668 | ||
669 | int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, | |
670 | unsigned len, unsigned update_flags) | |
671 | { | |
672 | struct bkey_i *k; | |
673 | ||
674 | k = bch2_trans_kmalloc(trans, sizeof(*k)); | |
675 | if (IS_ERR(k)) | |
676 | return PTR_ERR(k); | |
677 | ||
678 | bkey_init(&k->k); | |
679 | k->k.p = iter->pos; | |
680 | bch2_key_resize(&k->k, len); | |
681 | return bch2_trans_update(trans, iter, k, update_flags); | |
682 | } | |
683 | ||
684 | int bch2_btree_delete_at(struct btree_trans *trans, | |
685 | struct btree_iter *iter, unsigned update_flags) | |
686 | { | |
687 | return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); | |
688 | } | |
689 | ||
aaad530a KO |
690 | int bch2_btree_delete(struct btree_trans *trans, |
691 | enum btree_id btree, struct bpos pos, | |
692 | unsigned update_flags) | |
693 | { | |
694 | struct btree_iter iter; | |
695 | int ret; | |
696 | ||
697 | bch2_trans_iter_init(trans, &iter, btree, pos, | |
5dd8c60e KO |
698 | BTREE_ITER_cached| |
699 | BTREE_ITER_intent); | |
aaad530a KO |
700 | ret = bch2_btree_iter_traverse(&iter) ?: |
701 | bch2_btree_delete_at(trans, &iter, update_flags); | |
702 | bch2_trans_iter_exit(trans, &iter); | |
703 | ||
704 | return ret; | |
705 | } | |
706 | ||
8079aab0 KO |
707 | int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, |
708 | struct bpos start, struct bpos end, | |
709 | unsigned update_flags, | |
710 | u64 *journal_seq) | |
711 | { | |
712 | u32 restart_count = trans->restart_count; | |
713 | struct btree_iter iter; | |
714 | struct bkey_s_c k; | |
715 | int ret = 0; | |
716 | ||
5dd8c60e | 717 | bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); |
8079aab0 KO |
718 | while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { |
719 | struct disk_reservation disk_res = | |
720 | bch2_disk_reservation_init(trans->c, 0); | |
721 | struct bkey_i delete; | |
722 | ||
723 | ret = bkey_err(k); | |
724 | if (ret) | |
725 | goto err; | |
726 | ||
727 | bkey_init(&delete.k); | |
728 | ||
729 | /* | |
730 | * This could probably be more efficient for extents: | |
731 | */ | |
732 | ||
733 | /* | |
734 | * For extents, iter.pos won't necessarily be the same as | |
735 | * bkey_start_pos(k.k) (for non extents they always will be the | |
736 | * same). It's important that we delete starting from iter.pos | |
737 | * because the range we want to delete could start in the middle | |
738 | * of k. | |
739 | * | |
740 | * (bch2_btree_iter_peek() does guarantee that iter.pos >= | |
741 | * bkey_start_pos(k.k)). | |
742 | */ | |
743 | delete.k.p = iter.pos; | |
744 | ||
5dd8c60e | 745 | if (iter.flags & BTREE_ITER_is_extents) |
8079aab0 KO |
746 | bch2_key_resize(&delete.k, |
747 | bpos_min(end, k.k->p).offset - | |
748 | iter.pos.offset); | |
749 | ||
750 | ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: | |
751 | bch2_trans_commit(trans, &disk_res, journal_seq, | |
cb52d23e | 752 | BCH_TRANS_COMMIT_no_enospc); |
8079aab0 KO |
753 | bch2_disk_reservation_put(trans->c, &disk_res); |
754 | err: | |
755 | /* | |
756 | * the bch2_trans_begin() call is in a weird place because we | |
757 | * need to call it after every transaction commit, to avoid path | |
758 | * overflow, but don't want to call it if the delete operation | |
759 | * is a no-op and we have no work to do: | |
760 | */ | |
761 | bch2_trans_begin(trans); | |
762 | ||
763 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) | |
764 | ret = 0; | |
765 | if (ret) | |
766 | break; | |
767 | } | |
768 | bch2_trans_iter_exit(trans, &iter); | |
769 | ||
c872afa2 | 770 | return ret ?: trans_was_restarted(trans, restart_count); |
8079aab0 KO |
771 | } |
772 | ||
773 | /* | |
774 | * bch_btree_delete_range - delete everything within a given range | |
775 | * | |
776 | * Range is a half open interval - [start, end) | |
777 | */ | |
778 | int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, | |
779 | struct bpos start, struct bpos end, | |
780 | unsigned update_flags, | |
781 | u64 *journal_seq) | |
782 | { | |
783 | int ret = bch2_trans_run(c, | |
6bd68ec2 | 784 | bch2_btree_delete_range_trans(trans, id, start, end, |
8079aab0 KO |
785 | update_flags, journal_seq)); |
786 | if (ret == -BCH_ERR_transaction_restart_nested) | |
787 | ret = 0; | |
788 | return ret; | |
789 | } | |
790 | ||
e07c28ab KO |
791 | int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, |
792 | struct bpos pos, bool set) | |
793 | { | |
794 | struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); | |
795 | int ret = PTR_ERR_OR_ZERO(k); | |
796 | if (ret) | |
797 | return ret; | |
798 | ||
799 | bkey_init(&k->k); | |
800 | k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; | |
801 | k->k.p = pos; | |
802 | ||
803 | struct btree_iter iter; | |
5dd8c60e | 804 | bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); |
e07c28ab KO |
805 | |
806 | ret = bch2_btree_iter_traverse(&iter) ?: | |
807 | bch2_trans_update(trans, &iter, k, 0); | |
808 | bch2_trans_iter_exit(trans, &iter); | |
809 | return ret; | |
810 | } | |
811 | ||
506b1876 KO |
812 | int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, |
813 | struct bpos pos, bool set) | |
8079aab0 | 814 | { |
c259bd95 | 815 | struct bkey_i k; |
8079aab0 | 816 | |
c259bd95 KO |
817 | bkey_init(&k.k); |
818 | k.k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; | |
819 | k.k.p = pos; | |
8079aab0 | 820 | |
c259bd95 | 821 | return bch2_trans_update_buffered(trans, btree, &k); |
8079aab0 KO |
822 | } |
823 | ||
24de63da | 824 | static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s) |
8079aab0 | 825 | { |
24de63da KO |
826 | struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); |
827 | int ret = PTR_ERR_OR_ZERO(e); | |
8079aab0 | 828 | if (ret) |
24de63da | 829 | return ret; |
8079aab0 | 830 | |
24de63da KO |
831 | struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry); |
832 | journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s); | |
833 | memcpy(l->d, buf->buf, buf->pos); | |
834 | return 0; | |
8079aab0 KO |
835 | } |
836 | ||
96dea3d5 | 837 | __printf(3, 0) |
8079aab0 KO |
838 | static int |
839 | __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, | |
840 | va_list args) | |
841 | { | |
24de63da KO |
842 | struct printbuf buf = PRINTBUF; |
843 | prt_vprintf(&buf, fmt, args); | |
844 | ||
845 | unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); | |
846 | prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos); | |
847 | ||
848 | int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; | |
849 | if (ret) | |
850 | goto err; | |
8079aab0 | 851 | |
b895c703 | 852 | if (!test_bit(JOURNAL_running, &c->journal.flags)) { |
24de63da KO |
853 | ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s)); |
854 | if (ret) | |
855 | goto err; | |
856 | ||
857 | struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries); | |
858 | journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s); | |
859 | memcpy(l->d, buf.buf, buf.pos); | |
860 | c->journal.early_journal_entries.nr += jset_u64s(u64s); | |
8079aab0 KO |
861 | } else { |
862 | ret = bch2_trans_do(c, NULL, NULL, | |
cb52d23e | 863 | BCH_TRANS_COMMIT_lazy_rw|commit_flags, |
24de63da | 864 | __bch2_trans_log_msg(trans, &buf, u64s)); |
8079aab0 | 865 | } |
24de63da KO |
866 | err: |
867 | printbuf_exit(&buf); | |
8079aab0 KO |
868 | return ret; |
869 | } | |
870 | ||
96dea3d5 | 871 | __printf(2, 3) |
8079aab0 KO |
872 | int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) |
873 | { | |
874 | va_list args; | |
875 | int ret; | |
876 | ||
877 | va_start(args, fmt); | |
878 | ret = __bch2_fs_log_msg(c, 0, fmt, args); | |
879 | va_end(args); | |
880 | return ret; | |
881 | } | |
882 | ||
883 | /* | |
884 | * Use for logging messages during recovery to enable reserved space and avoid | |
885 | * blocking. | |
886 | */ | |
96dea3d5 | 887 | __printf(2, 3) |
8079aab0 KO |
888 | int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...) |
889 | { | |
890 | va_list args; | |
891 | int ret; | |
892 | ||
893 | va_start(args, fmt); | |
894 | ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args); | |
895 | va_end(args); | |
896 | return ret; | |
897 | } |