diff options
author | Jens Axboe <axboe@kernel.dk> | 2022-08-01 11:21:43 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2022-08-01 11:21:43 -0600 |
commit | fb0c50f4fedf8b654c2bf33f8c03fa7469cbd4ea (patch) | |
tree | bc9c2f26a1ed339164209c6f21a79a201087b494 | |
parent | c9ca8dcc66a99d1123f0fdc2dc161436b93d194b (diff) | |
parent | ae0a80935d6a65764b0db00c8b03d3807b4110a6 (diff) |
Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.20/drivers-postfor-5.20/drivers-post
Pull MD fixes from Song:
"The major change is:
"1. Fix potential deadlock with raid5_quiesce and
raid5_get_active_stripe, by Logan Gunthorpe."
* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
drivers:md:fix a potential use-after-free bug
md/raid5: Ensure batch_last is released before sleeping for quiesce
md/raid5: Move stripe_request_ctx up
md/raid5: Drop unnecessary call to r5c_check_stripe_cache_usage()
md/raid5: Make is_inactive_blocked() helper
md/raid5: Refactor raid5_get_active_stripe()
-rw-r--r-- | drivers/md/raid5.c | 164 | ||||
-rw-r--r-- | drivers/md/raid5.h | 2 |
2 files changed, 102 insertions, 64 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3b7887428cd0..4456ac51f7c5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -755,9 +755,54 @@ static bool has_failed(struct r5conf *conf) return degraded > conf->max_degraded; } -struct stripe_head * -raid5_get_active_stripe(struct r5conf *conf, sector_t sector, - int previous, int noblock, int noquiesce) +enum stripe_result { + STRIPE_SUCCESS = 0, + STRIPE_RETRY, + STRIPE_SCHEDULE_AND_RETRY, + STRIPE_FAIL, +}; + +struct stripe_request_ctx { + /* a reference to the last stripe_head for batching */ + struct stripe_head *batch_last; + + /* first sector in the request */ + sector_t first_sector; + + /* last sector in the request */ + sector_t last_sector; + + /* + * bitmap to track stripe sectors that have been added to stripes + * add one to account for unaligned requests + */ + DECLARE_BITMAP(sectors_to_do, RAID5_MAX_REQ_STRIPES + 1); + + /* the request had REQ_PREFLUSH, cleared after the first stripe_head */ + bool do_flush; +}; + +/* + * Block until another thread clears R5_INACTIVE_BLOCKED or + * there are fewer than 3/4 the maximum number of active stripes + * and there is an inactive stripe available. + */ +static bool is_inactive_blocked(struct r5conf *conf, int hash) +{ + int active = atomic_read(&conf->active_stripes); + + if (list_empty(conf->inactive_list + hash)) + return false; + + if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) + return true; + + return active < (conf->max_nr_stripes * 3 / 4); +} + +static struct stripe_head *__raid5_get_active_stripe(struct r5conf *conf, + struct stripe_request_ctx *ctx, sector_t sector, + bool previous, bool noblock, bool noquiesce) { struct stripe_head *sh; int hash = stripe_hash_locks_hash(conf, sector); @@ -766,45 +811,65 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, spin_lock_irq(conf->hash_locks + hash); - do { - wait_event_lock_irq(conf->wait_for_quiescent, - conf->quiesce == 0 || noquiesce, +retry: + if (!noquiesce && conf->quiesce) { + /* + * Must release the reference to batch_last before waiting, + * on quiesce, otherwise the batch_last will hold a reference + * to a stripe and raid5_quiesce() will deadlock waiting for + * active_stripes to go to zero. + */ + if (ctx && ctx->batch_last) { + raid5_release_stripe(ctx->batch_last); + ctx->batch_last = NULL; + } + + wait_event_lock_irq(conf->wait_for_quiescent, !conf->quiesce, *(conf->hash_locks + hash)); - sh = find_get_stripe(conf, sector, conf->generation - previous, - hash); - if (sh) - break; + } - if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { - sh = get_free_stripe(conf, hash); - if (!sh && !test_bit(R5_DID_ALLOC, &conf->cache_state)) - set_bit(R5_ALLOC_MORE, &conf->cache_state); - } - if (noblock && !sh) - break; + sh = find_get_stripe(conf, sector, conf->generation - previous, hash); + if (sh) + goto out; + + if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) + goto wait_for_stripe; + sh = get_free_stripe(conf, hash); + if (sh) { r5c_check_stripe_cache_usage(conf); - if (!sh) { - set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - r5l_wake_reclaim(conf->log, 0); - wait_event_lock_irq(conf->wait_for_stripe, - !list_empty(conf->inactive_list + hash) && - (atomic_read(&conf->active_stripes) - < (conf->max_nr_stripes * 3 / 4) - || !test_bit(R5_INACTIVE_BLOCKED, - &conf->cache_state)), - *(conf->hash_locks + hash)); - clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - } else { - init_stripe(sh, sector, previous); - atomic_inc(&sh->count); - } - } while (sh == NULL); + init_stripe(sh, sector, previous); + atomic_inc(&sh->count); + goto out; + } + + if (!test_bit(R5_DID_ALLOC, &conf->cache_state)) + set_bit(R5_ALLOC_MORE, &conf->cache_state); +wait_for_stripe: + if (noblock) + goto out; + + set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); + r5l_wake_reclaim(conf->log, 0); + wait_event_lock_irq(conf->wait_for_stripe, + is_inactive_blocked(conf, hash), + *(conf->hash_locks + hash)); + clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); + goto retry; + +out: spin_unlock_irq(conf->hash_locks + hash); return sh; } +struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, + sector_t sector, bool previous, bool noblock, bool noquiesce) +{ + return __raid5_get_active_stripe(conf, NULL, sector, previous, noblock, + noquiesce); +} + static bool is_full_stripe_write(struct stripe_head *sh) { BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded)); @@ -2887,10 +2952,10 @@ static void raid5_end_write_request(struct bio *bi) if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); - raid5_release_stripe(sh); if (sh->batch_head && sh != sh->batch_head) raid5_release_stripe(sh->batch_head); + raid5_release_stripe(sh); } static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) @@ -5856,33 +5921,6 @@ static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf, return ret; } -enum stripe_result { - STRIPE_SUCCESS = 0, - STRIPE_RETRY, - STRIPE_SCHEDULE_AND_RETRY, - STRIPE_FAIL, -}; - -struct stripe_request_ctx { - /* a reference to the last stripe_head for batching */ - struct stripe_head *batch_last; - - /* first sector in the request */ - sector_t first_sector; - - /* last sector in the request */ - sector_t last_sector; - - /* - * bitmap to track stripe sectors that have been added to stripes - * add one to account for unaligned requests - */ - DECLARE_BITMAP(sectors_to_do, RAID5_MAX_REQ_STRIPES + 1); - - /* the request had REQ_PREFLUSH, cleared after the first stripe_head */ - bool do_flush; -}; - static int add_all_stripe_bios(struct r5conf *conf, struct stripe_request_ctx *ctx, struct stripe_head *sh, struct bio *bi, int forwrite, int previous) @@ -5974,8 +6012,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, pr_debug("raid456: %s, sector %llu logical %llu\n", __func__, new_sector, logical_sector); - sh = raid5_get_active_stripe(conf, new_sector, previous, - (bi->bi_opf & REQ_RAHEAD), 0); + sh = __raid5_get_active_stripe(conf, ctx, new_sector, previous, + (bi->bi_opf & REQ_RAHEAD), 0); if (unlikely(!sh)) { /* cannot get stripe, just give-up */ bi->bi_status = BLK_STS_IOERR; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 638d29863503..a5082bed83c8 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -812,7 +812,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, struct stripe_head *sh); extern struct stripe_head * raid5_get_active_stripe(struct r5conf *conf, sector_t sector, - int previous, int noblock, int noquiesce); + bool previous, bool noblock, bool noquiesce); extern int raid5_calc_degraded(struct r5conf *conf); extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode); #endif |