#undef x
}
-static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
+int bch2_alloc_read(struct bch_fs *c)
{
- struct bch_fs *c = trans->c;
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
struct bch_dev *ca;
struct bucket *g;
struct bkey_alloc_unpacked u;
-
- if (!bkey_is_alloc(k.k))
- return 0;
-
- ca = bch_dev_bkey_exists(c, k.k->p.inode);
- g = bucket(ca, k.k->p.offset);
- u = bch2_alloc_unpack(k);
-
- *bucket_gen(ca, k.k->p.offset) = u.gen;
- g->_mark.gen = u.gen;
- g->_mark.data_type = u.data_type;
- g->_mark.dirty_sectors = u.dirty_sectors;
- g->_mark.cached_sectors = u.cached_sectors;
- g->_mark.stripe = u.stripe != 0;
- g->stripe = u.stripe;
- g->stripe_redundancy = u.stripe_redundancy;
- g->io_time[READ] = u.read_time;
- g->io_time[WRITE] = u.write_time;
- g->oldest_gen = u.oldest_gen;
- g->gen_valid = 1;
-
- return 0;
-}
-
-int bch2_alloc_read(struct bch_fs *c)
-{
- struct btree_trans trans;
int ret;
bch2_trans_init(&trans, c, 0, 0);
down_read(&c->gc_lock);
- ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_alloc, bch2_alloc_read_fn);
+
+ for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ret) {
+ if (!bkey_is_alloc(k.k))
+ continue;
+
+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
+ g = bucket(ca, k.k->p.offset);
+ u = bch2_alloc_unpack(k);
+
+ *bucket_gen(ca, k.k->p.offset) = u.gen;
+ g->_mark.gen = u.gen;
+ g->_mark.data_type = u.data_type;
+ g->_mark.dirty_sectors = u.dirty_sectors;
+ g->_mark.cached_sectors = u.cached_sectors;
+ g->_mark.stripe = u.stripe != 0;
+ g->stripe = u.stripe;
+ g->stripe_redundancy = u.stripe_redundancy;
+ g->io_time[READ] = u.read_time;
+ g->io_time[WRITE] = u.write_time;
+ g->oldest_gen = u.oldest_gen;
+ g->gen_valid = 1;
+ }
+ bch2_trans_iter_exit(&trans, &iter);
+
up_read(&c->gc_lock);
bch2_trans_exit(&trans);
+
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
return ret;
u64 reflink_hint;
reflink_gc_table reflink_gc_table;
size_t reflink_gc_nr;
- size_t reflink_gc_idx;
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;
return 0;
}
-static int bch2_gc_reflink_done_initial_fn(struct btree_trans *trans,
- struct bkey_s_c k)
-{
- struct bch_fs *c = trans->c;
- struct reflink_gc *r;
- const __le64 *refcount = bkey_refcount_c(k);
- char buf[200];
- int ret = 0;
-
- if (!refcount)
- return 0;
-
- r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
- if (!r)
- return -ENOMEM;
-
- if (!r ||
- r->offset != k.k->p.offset ||
- r->size != k.k->size) {
- bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
- return -EINVAL;
- }
-
- if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
- "reflink key has wrong refcount:\n"
- " %s\n"
- " should be %u",
- (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
- r->refcount)) {
- struct bkey_i *new;
-
- new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- goto fsck_err;
- }
-
- bkey_reassemble(new, k);
-
- if (!r->refcount) {
- new->k.type = KEY_TYPE_deleted;
- new->k.size = 0;
- } else {
- *bkey_refcount(new) = cpu_to_le64(r->refcount);
- }
-
- ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
- kfree(new);
- }
-fsck_err:
- return ret;
-}
-
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
bool metadata_only)
{
bch2_trans_init(&trans, c, 0, 0);
- if (initial) {
- c->reflink_gc_idx = 0;
-
- ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
- bch2_gc_reflink_done_initial_fn);
- goto out;
- }
-
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
- r = genradix_ptr(&c->reflink_gc_table, idx);
+ r = genradix_ptr(&c->reflink_gc_table, idx++);
if (!r ||
r->offset != k.k->p.offset ||
r->size != k.k->size) {
else
*bkey_refcount(new) = cpu_to_le64(r->refcount);
- ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+ ret = initial
+ ? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, new)
+ : __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
kfree(new);
}
fsck_err:
bch2_trans_iter_exit(&trans, &iter);
-out:
c->reflink_gc_nr = 0;
bch2_trans_exit(&trans);
return ret;
}
-static int bch2_gc_stripes_done_initial_fn(struct btree_trans *trans,
- struct bkey_s_c k)
+static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
+ bool metadata_only)
{
- struct bch_fs *c = trans->c;
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
struct gc_stripe *m;
const struct bch_stripe *s;
char buf[200];
unsigned i;
int ret = 0;
- if (k.k->type != KEY_TYPE_stripe)
+ if (metadata_only)
return 0;
- s = bkey_s_c_to_stripe(k).v;
+ bch2_trans_init(&trans, c, 0, 0);
+
+ for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ret) {
+ if (k.k->type != KEY_TYPE_stripe)
+ continue;
- m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
+ s = bkey_s_c_to_stripe(k).v;
+ m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
- for (i = 0; i < s->nr_blocks; i++)
- if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
- goto inconsistent;
- return 0;
+ for (i = 0; i < s->nr_blocks; i++)
+ if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
+ goto inconsistent;
+ continue;
inconsistent:
- if (fsck_err_on(true, c,
- "stripe has wrong block sector count %u:\n"
- " %s\n"
- " should be %u", i,
- (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
- m ? m->block_sectors[i] : 0)) {
- struct bkey_i_stripe *new;
+ if (fsck_err_on(true, c,
+ "stripe has wrong block sector count %u:\n"
+ " %s\n"
+ " should be %u", i,
+ (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
+ m ? m->block_sectors[i] : 0)) {
+ struct bkey_i_stripe *new;
- new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- goto fsck_err;
- }
+ new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
+ if (!new) {
+ ret = -ENOMEM;
+ break;
+ }
- bkey_reassemble(&new->k_i, k);
+ bkey_reassemble(&new->k_i, k);
- for (i = 0; i < new->v.nr_blocks; i++)
- stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
+ for (i = 0; i < new->v.nr_blocks; i++)
+ stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
- ret = bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i);
- kfree(new);
+ ret = initial
+ ? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i)
+ : __bch2_trans_do(&trans, NULL, NULL, 0,
+ __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
+ kfree(new);
+ }
}
fsck_err:
- return ret;
-}
-
-static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
- bool metadata_only)
-{
- struct btree_trans trans;
- int ret = 0;
-
- if (metadata_only)
- return 0;
-
- bch2_trans_init(&trans, c, 0, 0);
-
- if (initial) {
- ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
- bch2_gc_stripes_done_initial_fn);
- } else {
- BUG();
- }
+ bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}
-static int bch2_gc_reflink_start_initial_fn(struct btree_trans *trans,
- struct bkey_s_c k)
-{
-
- struct bch_fs *c = trans->c;
- struct reflink_gc *r;
- const __le64 *refcount = bkey_refcount_c(k);
-
- if (!refcount)
- return 0;
-
- r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
- GFP_KERNEL);
- if (!r)
- return -ENOMEM;
-
- r->offset = k.k->p.offset;
- r->size = k.k->size;
- r->refcount = 0;
- return 0;
-}
-
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
bool metadata_only)
{
bch2_trans_init(&trans, c, 0, 0);
c->reflink_gc_nr = 0;
- if (initial) {
- ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
- bch2_gc_reflink_start_initial_fn);
- goto out;
- }
-
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
r->refcount = 0;
}
bch2_trans_iter_exit(&trans, &iter);
-out:
+
bch2_trans_exit(&trans);
return ret;
}
#include "error.h"
#include "extents.h"
#include "journal.h"
+#include "recovery.h"
#include "replicas.h"
#include "subvolume.h"
#include "trace.h"
static void btree_path_verify_new_node(struct btree_trans *trans,
struct btree_path *path, struct btree *b)
{
+ struct bch_fs *c = trans->c;
struct btree_path_level *l;
unsigned plevel;
bool parent_locked;
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
return;
+ if (trans->journal_replay_not_finished)
+ return;
+
plevel = b->c.level + 1;
if (!btree_path_node(path, plevel))
return;
char buf4[100];
struct bkey uk = bkey_unpack_key(b, k);
- bch2_dump_btree_node(trans->c, l->b);
+ bch2_dump_btree_node(c, l->b);
bch2_bpos_to_text(&PBUF(buf1), path->pos);
bch2_bkey_to_text(&PBUF(buf2), &uk);
bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
return ret;
}
+static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
+ struct btree_and_journal_iter *jiter)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c k;
+ struct bkey_buf tmp;
+ unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
+ ? (path->level > 1 ? 0 : 2)
+ : (path->level > 1 ? 1 : 16);
+ bool was_locked = btree_node_locked(path, path->level);
+ int ret = 0;
+
+ bch2_bkey_buf_init(&tmp);
+
+ while (nr && !ret) {
+ if (!bch2_btree_node_relock(trans, path, path->level))
+ break;
+
+ bch2_btree_and_journal_iter_advance(jiter);
+ k = bch2_btree_and_journal_iter_peek(jiter);
+ if (!k.k)
+ break;
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
+ path->level - 1);
+ }
+
+ if (!was_locked)
+ btree_node_unlock(path, path->level);
+
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
struct btree_path *path,
unsigned plevel, struct btree *b)
btree_node_unlock(path, plevel);
}
+static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned flags,
+ struct bkey_buf *out)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path_level *l = path_l(path);
+ struct btree_and_journal_iter jiter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
+
+ k = bch2_btree_and_journal_iter_peek(&jiter);
+
+ bch2_bkey_buf_reassemble(out, c, k);
+
+ if (flags & BTREE_ITER_PREFETCH)
+ ret = btree_path_prefetch_j(trans, path, &jiter);
+
+ bch2_btree_and_journal_iter_exit(&jiter);
+ return ret;
+}
+
static __always_inline int btree_path_down(struct btree_trans *trans,
struct btree_path *path,
unsigned flags,
EBUG_ON(!btree_node_locked(path, path->level));
bch2_bkey_buf_init(&tmp);
- bch2_bkey_buf_unpack(&tmp, c, l->b,
- bch2_btree_node_iter_peek(&l->iter, l->b));
+
+ if (unlikely(trans->journal_replay_not_finished)) {
+ ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
+ if (ret)
+ goto err;
+ } else {
+ bch2_bkey_buf_unpack(&tmp, c, l->b,
+ bch2_btree_node_iter_peek(&l->iter, l->b));
+
+ if (flags & BTREE_ITER_PREFETCH) {
+ ret = btree_path_prefetch(trans, path);
+ if (ret)
+ goto err;
+ }
+ }
b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b);
mark_btree_node_locked(path, level, lock_type);
btree_path_level_init(trans, path, b);
- if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
+ if (likely(!trans->journal_replay_not_finished &&
+ tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
unlikely(b != btree_node_mem_ptr(tmp.k)))
btree_node_mem_ptr_set(trans, path, level + 1, b);
- if (flags & BTREE_ITER_PREFETCH)
- ret = btree_path_prefetch(trans, path);
-
if (btree_node_read_locked(path, level + 1))
btree_node_unlock(path, level + 1);
path->level = level;
return ret;
}
+static struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ struct journal_keys *keys = &trans->c->journal_keys;
+ size_t idx = bch2_journal_key_search(keys, path->btree_id,
+ path->level, path->pos);
+
+ while (idx < keys->nr && keys->d[idx].overwritten)
+ idx++;
+
+ return (idx < keys->nr &&
+ keys->d[idx].btree_id == path->btree_id &&
+ keys->d[idx].level == path->level)
+ ? keys->d[idx].k
+ : NULL;
+}
+
+static noinline
+struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
+ struct btree_iter *iter)
+{
+ struct bkey_i *k = __btree_trans_peek_journal(trans, iter->path);
+
+ if (k && !bpos_cmp(k->k.p, iter->pos)) {
+ iter->k = k->k;
+ return bkey_i_to_s_c(k);
+ } else {
+ return bkey_s_c_null;
+ }
+}
+
+static noinline
+struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bkey_i *next_journal =
+ __btree_trans_peek_journal(trans, iter->path);
+
+ if (next_journal &&
+ bpos_cmp(next_journal->k.p,
+ k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
+ iter->k = next_journal->k;
+ k = bkey_i_to_s_c(next_journal);
+ }
+
+ return k;
+}
+
/**
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
* current position
goto out;
}
- next_update = btree_trans_peek_updates(iter);
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
- /* * In the btree, deleted keys sort before non deleted: */
- if (k.k && bkey_deleted(k.k) &&
- (!next_update ||
- bpos_cmp(k.k->p, next_update->k.p) <= 0)) {
- search_key = k.k->p;
- continue;
- }
+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
+ k = btree_trans_peek_journal(trans, iter, k);
+
+ next_update = btree_trans_peek_updates(iter);
if (next_update &&
bpos_cmp(next_update->k.p,
k = bkey_i_to_s_c(next_update);
}
+ if (k.k && bkey_deleted(k.k)) {
+ /*
+ * If we've got a whiteout, and it's after the search
+ * key, advance the search key to the whiteout instead
+ * of just after the whiteout - it might be a btree
+ * whiteout, with a real key at the same position, since
+ * in the btree deleted keys sort before non deleted.
+ */
+ search_key = bpos_cmp(search_key, k.k->p)
+ ? k.k->p
+ : bpos_successor(k.k->p);
+ continue;
+ }
+
if (likely(k.k)) {
/*
* We can never have a key in a leaf node at POS_MAX, so
EBUG_ON(iter->path->cached || iter->path->level);
EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
+
+ if (iter->flags & BTREE_ITER_WITH_JOURNAL)
+ return bkey_s_c_err(-EIO);
+
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
struct bkey_i *next_update;
- next_update = btree_trans_peek_updates(iter);
- if (next_update &&
+ if ((next_update = btree_trans_peek_updates(iter)) &&
!bpos_cmp(next_update->k.p, iter->pos)) {
iter->k = next_update->k;
k = bkey_i_to_s_c(next_update);
- } else {
- k = bch2_btree_path_peek_slot(iter->path, &iter->k);
+ goto out;
}
- if (!k.k ||
- ((iter->flags & BTREE_ITER_ALL_SNAPSHOTS)
- ? bpos_cmp(iter->pos, k.k->p)
- : bkey_cmp(iter->pos, k.k->p))) {
- bkey_init(&iter->k);
- iter->k.p = iter->pos;
- k = (struct bkey_s_c) { &iter->k, NULL };
- }
+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
+ (k = btree_trans_peek_slot_journal(trans, iter)).k)
+ goto out;
+
+ k = bch2_btree_path_peek_slot(iter->path, &iter->k);
} else {
struct bpos next;
k = (struct bkey_s_c) { &iter->k, NULL };
}
}
-
+out:
iter->path->should_be_locked = true;
bch2_btree_iter_verify_entry_exit(iter);
btree_type_has_snapshots(btree_id))
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
+ if (trans->journal_replay_not_finished)
+ flags |= BTREE_ITER_WITH_JOURNAL;
+
iter->trans = trans;
iter->path = NULL;
iter->btree_id = btree_id;
memset(trans, 0, sizeof(*trans));
trans->c = c;
trans->ip = _RET_IP_;
+ trans->journal_replay_not_finished =
+ !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
bch2_trans_alloc_paths(trans, c);
#define BTREE_ITER_CACHED_NOFILL (1 << 8)
#define BTREE_ITER_CACHED_NOCREATE (1 << 9)
#define BTREE_ITER_WITH_UPDATES (1 << 10)
-#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
-#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
-#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13)
-#define BTREE_ITER_NOPRESERVE (1 << 14)
+#define BTREE_ITER_WITH_JOURNAL (1 << 11)
+#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
+#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
+#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14)
+#define BTREE_ITER_NOPRESERVE (1 << 15)
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
bool restarted:1;
bool paths_sorted:1;
bool journal_transaction_names:1;
+ bool journal_replay_not_finished:1;
/*
* For when bch2_trans_update notices we'll be splitting a compressed
* extent:
#include "journal.h"
#include "journal_reclaim.h"
#include "keylist.h"
+#include "recovery.h"
#include "replicas.h"
#include "super-io.h"
#include "trace.h"
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
!btree_ptr_sectors_written(insert));
+ if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
+ bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
+
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
if (invalid) {
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
- if (!ret && unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
+ if (!ret && unlikely(trans->journal_replay_not_finished))
bch2_drop_overwrites_from_journal(trans);
trans_for_each_update(trans, i)
bch2_stripes_heap_insert(c, m, iter.pos);
}
-static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
+int bch2_stripes_read(struct bch_fs *c)
{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
const struct bch_stripe *s;
- struct bch_fs *c = trans->c;
struct stripe *m;
unsigned i;
- int ret = 0;
+ int ret;
- if (k.k->type != KEY_TYPE_stripe)
- return 0;
+ bch2_trans_init(&trans, c, 0, 0);
- ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
- if (ret)
- return ret;
+ for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ret) {
+ if (k.k->type != KEY_TYPE_stripe)
+ continue;
- s = bkey_s_c_to_stripe(k).v;
+ ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
+ if (ret)
+ break;
- m = genradix_ptr(&c->stripes, k.k->p.offset);
- m->alive = true;
- m->sectors = le16_to_cpu(s->sectors);
- m->algorithm = s->algorithm;
- m->nr_blocks = s->nr_blocks;
- m->nr_redundant = s->nr_redundant;
- m->blocks_nonempty = 0;
+ s = bkey_s_c_to_stripe(k).v;
- for (i = 0; i < s->nr_blocks; i++)
- m->blocks_nonempty += !!stripe_blockcount_get(s, i);
+ m = genradix_ptr(&c->stripes, k.k->p.offset);
+ m->alive = true;
+ m->sectors = le16_to_cpu(s->sectors);
+ m->algorithm = s->algorithm;
+ m->nr_blocks = s->nr_blocks;
+ m->nr_redundant = s->nr_redundant;
+ m->blocks_nonempty = 0;
- spin_lock(&c->ec_stripes_heap_lock);
- bch2_stripes_heap_update(c, m, k.k->p.offset);
- spin_unlock(&c->ec_stripes_heap_lock);
-
- return ret;
-}
+ for (i = 0; i < s->nr_blocks; i++)
+ m->blocks_nonempty += !!stripe_blockcount_get(s, i);
-int bch2_stripes_read(struct bch_fs *c)
-{
- struct btree_trans trans;
- int ret;
+ spin_lock(&c->ec_stripes_heap_lock);
+ bch2_stripes_heap_update(c, m, k.k->p.offset);
+ spin_unlock(&c->ec_stripes_heap_lock);
+ }
+ bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_init(&trans, c, 0, 0);
- ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
- bch2_stripes_read_fn);
bch2_trans_exit(&trans);
+
if (ret)
bch_err(c, "error reading stripes: %i", ret);
static int __journal_key_cmp(enum btree_id l_btree_id,
unsigned l_level,
struct bpos l_pos,
- struct journal_key *r)
+ const struct journal_key *r)
{
return (cmp_int(l_btree_id, r->btree_id) ?:
cmp_int(l_level, r->level) ?:
bpos_cmp(l_pos, r->k->k.p));
}
-static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
+static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
{
- return (cmp_int(l->btree_id, r->btree_id) ?:
- cmp_int(l->level, r->level) ?:
- bpos_cmp(l->k->k.p, r->k->k.p));
+ return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
}
-static size_t journal_key_search(struct journal_keys *journal_keys,
- enum btree_id id, unsigned level,
- struct bpos pos)
+size_t bch2_journal_key_search(struct journal_keys *journal_keys,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
{
size_t l = 0, r = journal_keys->nr, m;
};
struct journal_keys *keys = &c->journal_keys;
struct journal_iter *iter;
- unsigned idx = journal_key_search(keys, id, level, k->k.p);
+ size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
BUG_ON(test_bit(BCH_FS_RW, &c->flags));
return 0;
}
+/*
+ * Can only be used from the recovery thread while we're still RO - can't be
+ * used once we've got RW, as journal_keys is at that point used by multiple
+ * threads:
+ */
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_i *k)
{
unsigned level, struct bpos pos)
{
struct journal_keys *keys = &c->journal_keys;
- size_t idx = journal_key_search(keys, btree, level, pos);
+ size_t idx = bch2_journal_key_search(keys, btree, level, pos);
if (idx < keys->nr &&
keys->d[idx].btree_id == btree &&
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
{
- struct journal_key *k = iter->idx - iter->keys->nr
- ? iter->keys->d + iter->idx : NULL;
+ struct journal_key *k = iter->keys->d + iter->idx;
- if (k &&
- k->btree_id == iter->btree_id &&
- k->level == iter->level)
- return k->k;
+ while (k < iter->keys->d + iter->keys->nr &&
+ k->btree_id == iter->btree_id &&
+ k->level == iter->level) {
+ if (!k->overwritten)
+ return k->k;
+
+ iter->idx++;
+ k = iter->keys->d + iter->idx;
+ }
- iter->idx = iter->keys->nr;
return NULL;
}
iter->btree_id = id;
iter->level = level;
iter->keys = &c->journal_keys;
- iter->idx = journal_key_search(&c->journal_keys, id, level, pos);
- list_add(&iter->list, &c->journal_iters);
+ iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
}
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
bch2_journal_iter_exit(&iter->journal);
}
-void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
- struct bch_fs *c,
- struct btree *b)
+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
+ struct bch_fs *c,
+ struct btree *b,
+ struct btree_node_iter node_iter,
+ struct bpos pos)
{
memset(iter, 0, sizeof(*iter));
iter->b = b;
- bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
- bch2_journal_iter_init(c, &iter->journal,
- b->c.btree_id, b->c.level, b->data->min_key);
-}
-
-/* Walk btree, overlaying keys from the journal: */
-
-static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
- struct btree_and_journal_iter iter)
-{
- unsigned i = 0, nr = b->c.level > 1 ? 2 : 16;
- struct bkey_s_c k;
- struct bkey_buf tmp;
-
- BUG_ON(!b->c.level);
-
- bch2_bkey_buf_init(&tmp);
-
- while (i < nr &&
- (k = bch2_btree_and_journal_iter_peek(&iter)).k) {
- bch2_bkey_buf_reassemble(&tmp, c, k);
-
- bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
- b->c.btree_id, b->c.level - 1);
-
- bch2_btree_and_journal_iter_advance(&iter);
- i++;
- }
-
- bch2_bkey_buf_exit(&tmp, c);
-}
-
-static int bch2_btree_and_journal_walk_recurse(struct btree_trans *trans, struct btree *b,
- enum btree_id btree_id,
- btree_walk_key_fn key_fn)
-{
- struct bch_fs *c = trans->c;
- struct btree_and_journal_iter iter;
- struct bkey_s_c k;
- struct bkey_buf tmp;
- struct btree *child;
- int ret = 0;
-
- bch2_bkey_buf_init(&tmp);
- bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
-
- while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
- if (b->c.level) {
- bch2_bkey_buf_reassemble(&tmp, c, k);
-
- child = bch2_btree_node_get_noiter(c, tmp.k,
- b->c.btree_id, b->c.level - 1,
- false);
-
- ret = PTR_ERR_OR_ZERO(child);
- if (ret)
- break;
-
- btree_and_journal_iter_prefetch(c, b, iter);
-
- ret = bch2_btree_and_journal_walk_recurse(trans, child,
- btree_id, key_fn);
- six_unlock_read(&child->c.lock);
- } else {
- ret = key_fn(trans, k);
- }
-
- if (ret)
- break;
-
- bch2_btree_and_journal_iter_advance(&iter);
- }
-
- bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&tmp, c);
- return ret;
+ iter->node_iter = node_iter;
+ bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
+ INIT_LIST_HEAD(&iter->journal.list);
}
-int bch2_btree_and_journal_walk(struct btree_trans *trans, enum btree_id btree_id,
- btree_walk_key_fn key_fn)
+/*
+ * this version is used by btree_gc before filesystem has gone RW and
+ * multithreaded, so uses the journal_iters list:
+ */
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
+ struct bch_fs *c,
+ struct btree *b)
{
- struct bch_fs *c = trans->c;
- struct btree *b = c->btree_roots[btree_id].b;
- int ret = 0;
-
- if (btree_node_fake(b))
- return 0;
-
- six_lock_read(&b->c.lock, NULL, NULL);
- ret = bch2_btree_and_journal_walk_recurse(trans, b, btree_id, key_fn);
- six_unlock_read(&b->c.lock);
+ struct btree_node_iter node_iter;
- return ret;
+ bch2_btree_node_iter_init_from_start(&node_iter, b);
+ __bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
+ list_add(&iter->journal.list, &c->journal_iters);
}
/* sort and dedup all keys in the journal: */
const struct journal_key *l = _l;
const struct journal_key *r = _r;
- return cmp_int(l->btree_id, r->btree_id) ?:
- cmp_int(l->level, r->level) ?:
- bpos_cmp(l->k->k.p, r->k->k.p) ?:
+ return journal_key_cmp(l, r) ?:
cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->journal_offset, r->journal_offset);
}
} last;
};
+size_t bch2_journal_key_search(struct journal_keys *, enum btree_id,
+ unsigned, struct bpos);
+
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
unsigned, struct bkey_i *);
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
+ struct bch_fs *, struct btree *,
+ struct btree_node_iter, struct bpos);
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct bch_fs *,
struct btree *);
-typedef int (*btree_walk_key_fn)(struct btree_trans *, struct bkey_s_c);
-
-int bch2_btree_and_journal_walk(struct btree_trans *, enum btree_id, btree_walk_key_fn);
-
void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct list_head *);