*p += bytes;
}
+struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
+{
+ struct bkey_alloc_unpacked ret = { .gen = a->gen };
+ const void *d = a->data;
+ unsigned idx = 0;
+
+#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++);
+ BCH_ALLOC_FIELDS()
+#undef x
+ return ret;
+}
+
+static void bch2_alloc_pack(struct bkey_i_alloc *dst,
+ const struct bkey_alloc_unpacked src)
+{
+ unsigned idx = 0;
+ void *d = dst->v.data;
+
+ dst->v.fields = 0;
+ dst->v.gen = src.gen;
+
+#define x(_name, _bits) put_alloc_field(dst, &d, idx++, src._name);
+ BCH_ALLOC_FIELDS()
+#undef x
+
+ set_bkey_val_bytes(&dst->k, (void *) d - (void *) &dst->v);
+}
+
static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
{
unsigned i, bytes = offsetof(struct bch_alloc, data);
static void __alloc_read_key(struct bucket *g, const struct bch_alloc *a)
{
const void *d = a->data;
- unsigned idx = 0;
+ unsigned idx = 0, data_type, dirty_sectors, cached_sectors;
+ struct bucket_mark m;
- g->_mark.gen = a->gen;
- g->gen_valid = 1;
g->io_time[READ] = get_alloc_field(a, &d, idx++);
g->io_time[WRITE] = get_alloc_field(a, &d, idx++);
- g->_mark.data_type = get_alloc_field(a, &d, idx++);
- g->_mark.dirty_sectors = get_alloc_field(a, &d, idx++);
- g->_mark.cached_sectors = get_alloc_field(a, &d, idx++);
+ data_type = get_alloc_field(a, &d, idx++);
+ dirty_sectors = get_alloc_field(a, &d, idx++);
+ cached_sectors = get_alloc_field(a, &d, idx++);
g->oldest_gen = get_alloc_field(a, &d, idx++);
+
+ bucket_cmpxchg(g, m, ({
+ m.gen = a->gen;
+ m.data_type = data_type;
+ m.dirty_sectors = dirty_sectors;
+ m.cached_sectors = cached_sectors;
+ }));
+
+ g->gen_valid = 1;
}
static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
+ BTREE_INSERT_NOMARK|
flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
if (ret)
? 0
: bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_REPLAY,
+ BTREE_INSERT_JOURNAL_REPLAY|
+ BTREE_INSERT_NOMARK,
BTREE_INSERT_ENTRY(&iter, k));
err:
bch2_btree_iter_unlock(&iter);
return -1;
}
+/*
+ * returns sequence number of most recent journal entry that updated this
+ * bucket:
+ */
+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
+{
+ if (m.journal_seq_valid) {
+ u64 journal_seq = atomic64_read(&c->journal.seq);
+ u64 bucket_seq = journal_seq;
+
+ bucket_seq &= ~((u64) U16_MAX);
+ bucket_seq |= m.journal_seq;
+
+ if (bucket_seq > journal_seq)
+ bucket_seq -= 1 << 16;
+
+ return bucket_seq;
+ } else {
+ return 0;
+ }
+}
+
+static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
+ struct btree_iter *iter,
+ u64 *journal_seq, unsigned flags)
+{
+#if 0
+ __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
+#else
+ /* hack: */
+ __BKEY_PADDED(k, 8) alloc_key;
+#endif
+ struct bkey_i_alloc *a;
+ struct bkey_alloc_unpacked u;
+ struct bucket_mark m;
+ struct bkey_s_c k;
+ bool invalidating_cached_data;
+ size_t b;
+ int ret;
+
+ BUG_ON(!ca->alloc_heap.used ||
+ !ca->alloc_heap.data[0].nr);
+ b = ca->alloc_heap.data[0].bucket;
+
+ /* first, put on free_inc and mark as owned by allocator: */
+ percpu_down_read(&c->mark_lock);
+ spin_lock(&c->freelist_lock);
+
+ verify_not_on_freelist(c, ca, b);
+
+ BUG_ON(!fifo_push(&ca->free_inc, b));
+
+ bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
+ m = bucket(ca, b)->mark;
+
+ spin_unlock(&c->freelist_lock);
+ percpu_up_read(&c->mark_lock);
+
+ bch2_btree_iter_cond_resched(iter);
+
+ BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
+
+ bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
+retry:
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = btree_iter_err(k);
+ if (ret)
+ return ret;
+
+ if (k.k && k.k->type == KEY_TYPE_alloc)
+ u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
+ else
+ memset(&u, 0, sizeof(u));
+
+ invalidating_cached_data = u.cached_sectors != 0;
+
+ //BUG_ON(u.dirty_sectors);
+ u.data_type = 0;
+ u.dirty_sectors = 0;
+ u.cached_sectors = 0;
+ u.read_time = c->bucket_clock[READ].hand;
+ u.write_time = c->bucket_clock[WRITE].hand;
+ u.gen++;
+
+ a = bkey_alloc_init(&alloc_key.k);
+ a->k.p = iter->pos;
+ bch2_alloc_pack(a, u);
+
+ ret = bch2_btree_insert_at(c, NULL,
+ invalidating_cached_data ? journal_seq : NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_USE_ALLOC_RESERVE|
+ flags,
+ BTREE_INSERT_ENTRY(iter, &a->k_i));
+ if (ret == -EINTR)
+ goto retry;
+
+ if (!ret) {
+ /* remove from alloc_heap: */
+ struct alloc_heap_entry e, *top = ca->alloc_heap.data;
+
+ top->bucket++;
+ top->nr--;
+
+ if (!top->nr)
+ heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
+
+ /*
+ * Make sure we flush the last journal entry that updated this
+ * bucket (i.e. deleting the last reference) before writing to
+ * this bucket again:
+ */
+ *journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
+ } else {
+ size_t b2;
+
+ /* remove from free_inc: */
+ percpu_down_read(&c->mark_lock);
+ spin_lock(&c->freelist_lock);
+
+ bch2_mark_alloc_bucket(c, ca, b, false,
+ gc_pos_alloc(c, NULL), 0);
+
+ BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
+ BUG_ON(b != b2);
+
+ spin_unlock(&c->freelist_lock);
+ percpu_up_read(&c->mark_lock);
+ }
+
+ return ret;
+}
+
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t bucket, u64 *flush_seq)
{
percpu_up_read(&c->mark_lock);
- if (m.journal_seq_valid) {
- u64 journal_seq = atomic64_read(&c->journal.seq);
- u64 bucket_seq = journal_seq;
-
- bucket_seq &= ~((u64) U16_MAX);
- bucket_seq |= m.journal_seq;
-
- if (bucket_seq > journal_seq)
- bucket_seq -= 1 << 16;
-
- *flush_seq = max(*flush_seq, bucket_seq);
- }
+ *flush_seq = max(*flush_seq, bucket_journal_seq(c, m));
return m.cached_sectors != 0;
}
struct btree_iter iter;
u64 journal_seq = 0;
int ret = 0;
- long b;
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
/* Only use nowait if we've already invalidated at least one bucket: */
while (!ret &&
!fifo_full(&ca->free_inc) &&
- (b = next_alloc_bucket(ca)) >= 0) {
- bool must_flush =
- bch2_invalidate_one_bucket(c, ca, b, &journal_seq);
-
- ret = __bch2_alloc_write_key(c, ca, b, &iter,
- must_flush ? &journal_seq : NULL,
+ ca->alloc_heap.used)
+ ret = bch2_invalidate_one_bucket2(c, ca, &iter, &journal_seq,
BTREE_INSERT_GC_LOCK_HELD|
(!fifo_empty(&ca->free_inc)
? BTREE_INSERT_NOWAIT : 0));
- }
bch2_btree_iter_unlock(&iter);
ca, b, owned_by_allocator);
}
+static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
+ bool inserting,
+ struct bch_fs_usage *fs_usage,
+ unsigned journal_seq, unsigned flags,
+ bool gc)
+{
+ struct bkey_alloc_unpacked u;
+ struct bch_dev *ca;
+ struct bucket *g;
+ struct bucket_mark old, m;
+
+ if (!inserting)
+ return 0;
+
+ /*
+ * alloc btree is read in by bch2_alloc_read, not gc:
+ */
+ if (flags & BCH_BUCKET_MARK_GC)
+ return 0;
+
+ u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
+ g = __bucket(ca, k.k->p.offset, gc);
+
+ /*
+ * this should currently only be getting called from the bucket
+ * invalidate path:
+ */
+ BUG_ON(u.dirty_sectors);
+ BUG_ON(u.cached_sectors);
+ BUG_ON(!g->mark.owned_by_allocator);
+
+ old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
+ m.gen = u.gen;
+ m.data_type = u.data_type;
+ m.dirty_sectors = u.dirty_sectors;
+ m.cached_sectors = u.cached_sectors;
+ }));
+
+ g->io_time[READ] = u.read_time;
+ g->io_time[WRITE] = u.write_time;
+ g->oldest_gen = u.oldest_gen;
+ g->gen_valid = 1;
+
+ if (old.cached_sectors) {
+ update_cached_sectors(c, fs_usage, ca->dev_idx,
+ -old.cached_sectors);
+ trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
+ old.cached_sectors);
+ }
+
+ return 0;
+}
+
#define checked_add(a, b) \
do { \
unsigned _res = (unsigned) (a) + (b); \
fs_usage = this_cpu_ptr(c->usage[gc]);
switch (k.k->type) {
+ case KEY_TYPE_alloc:
+ return bch2_mark_alloc(c, k, inserting,
+ fs_usage, journal_seq, flags, gc);
case KEY_TYPE_btree_ptr:
return bch2_mark_extent(c, k, inserting
- ? c->opts.btree_node_size
- : -c->opts.btree_node_size,
- BCH_DATA_BTREE,
- fs_usage, journal_seq, flags, gc);
+ ? c->opts.btree_node_size
+ : -c->opts.btree_node_size,
+ BCH_DATA_BTREE,
+ fs_usage, journal_seq, flags, gc);
case KEY_TYPE_extent:
return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
- fs_usage, journal_seq, flags, gc);
+ fs_usage, journal_seq, flags, gc);
case KEY_TYPE_stripe:
return bch2_mark_stripe(c, k, inserting,
- fs_usage, journal_seq, flags, gc);
+ fs_usage, journal_seq, flags, gc);
case KEY_TYPE_inode:
if (inserting)
fs_usage->s.nr_inodes++;
preempt_disable();
fs_usage = bch2_fs_usage_get_scratch(c);
- if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
+ if (!(trans->flags & BTREE_INSERT_NOMARK))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),