wake_up(&dm_global_eventq);
}
+DEFINE_STATIC_KEY_FALSE(stats_enabled);
+DEFINE_STATIC_KEY_FALSE(swap_bios_enabled);
+DEFINE_STATIC_KEY_FALSE(zoned_enabled);
+
/*
* One of these is allocated (on-stack) per original bio.
*/
else
bio_end_io_acct(bio, start_time);
- if (unlikely(dm_stats_used(&md->stats)))
+ if (static_branch_unlikely(&stats_enabled) &&
+ unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
end, start_time, stats_aux);
bio->bi_iter.bi_size = bi_size;
}
-static void __dm_start_io_acct(struct dm_io *io, struct bio *bio)
+static void __dm_start_io_acct(struct dm_io *io)
{
- dm_io_acct(false, io->md, bio, io->start_time, &io->stats_aux);
+ dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux);
}
static void dm_start_io_acct(struct dm_io *io, struct bio *clone)
{
- /* Must account IO to DM device in terms of orig_bio */
- struct bio *bio = io->orig_bio;
-
/*
* Ensure IO accounting is only ever started once.
- * Expect no possibility for race unless DM_TIO_IS_DUPLICATE_BIO.
*/
- if (!clone ||
- likely(!dm_tio_flagged(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO))) {
- if (WARN_ON_ONCE(dm_io_flagged(io, DM_IO_ACCOUNTED)))
- return;
+ if (dm_io_flagged(io, DM_IO_ACCOUNTED))
+ return;
+
+ /* Expect no possibility for race unless DM_TIO_IS_DUPLICATE_BIO. */
+ if (!clone || likely(dm_tio_is_normal(clone_to_tio(clone)))) {
dm_io_set_flag(io, DM_IO_ACCOUNTED);
} else {
unsigned long flags;
- if (dm_io_flagged(io, DM_IO_ACCOUNTED))
- return;
/* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */
spin_lock_irqsave(&io->lock, flags);
dm_io_set_flag(io, DM_IO_ACCOUNTED);
spin_unlock_irqrestore(&io->lock, flags);
}
- __dm_start_io_acct(io, bio);
+ __dm_start_io_acct(io);
}
-static void dm_end_io_acct(struct dm_io *io, struct bio *bio)
+static void dm_end_io_acct(struct dm_io *io)
{
- dm_io_acct(true, io->md, bio, io->start_time, &io->stats_aux);
+ dm_io_acct(true, io->md, io->orig_bio, io->start_time, &io->stats_aux);
}
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io = container_of(tio, struct dm_io, tio);
io->magic = DM_IO_MAGIC;
- io->status = 0;
+ io->status = BLK_STS_OK;
atomic_set(&io->io_count, 1);
this_cpu_inc(*md->pending_io);
io->orig_bio = NULL;
io->start_time = jiffies;
io->flags = 0;
- dm_stats_record_start(&md->stats, &io->stats_aux);
+ if (static_branch_unlikely(&stats_enabled))
+ dm_stats_record_start(&md->stats, &io->stats_aux);
return io;
}
* function to access the md->map field, and make sure they call
* dm_put_live_table() when finished.
*/
-struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
+struct dm_table *dm_get_live_table(struct mapped_device *md,
+ int *srcu_idx) __acquires(md->io_barrier)
{
*srcu_idx = srcu_read_lock(&md->io_barrier);
return srcu_dereference(md->map, &md->io_barrier);
}
-void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
+void dm_put_live_table(struct mapped_device *md,
+ int srcu_idx) __releases(md->io_barrier)
{
srcu_read_unlock(&md->io_barrier, srcu_idx);
}
rcu_read_unlock();
}
+static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md,
+ int *srcu_idx, struct bio *bio)
+{
+ if (bio->bi_opf & REQ_NOWAIT)
+ return dm_get_live_table_fast(md);
+ else
+ return dm_get_live_table(md, srcu_idx);
+}
+
+static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx,
+ struct bio *bio)
+{
+ if (bio->bi_opf & REQ_NOWAIT)
+ dm_put_live_table_fast(md);
+ else
+ dm_put_live_table(md, srcu_idx);
+}
+
static char *_dm_claim_ptr = "I belong to device-mapper";
/*
io_error = io->status;
if (dm_io_flagged(io, DM_IO_ACCOUNTED))
- dm_end_io_acct(io, bio);
+ dm_end_io_acct(io);
else if (!io_error) {
/*
* Must handle target that DM_MAPIO_SUBMITTED only to
* then bio_endio() rather than dm_submit_bio_remap()
*/
- __dm_start_io_acct(io, bio);
- dm_end_io_acct(io, bio);
+ __dm_start_io_acct(io);
+ dm_end_io_acct(io);
}
free_io(io);
smp_wmb();
* may only reflect a subset of the pre-split original)
* so clear REQ_POLLED in case of requeue.
*/
- bio->bi_opf &= ~REQ_POLLED;
+ bio_clear_polled(bio);
if (io_error == BLK_STS_AGAIN) {
/* io_uring doesn't handle BLK_STS_AGAIN (yet) */
queue_io(md, bio);
}
}
-static inline bool dm_tio_is_normal(struct dm_target_io *tio)
-{
- return (dm_tio_flagged(tio, DM_TIO_INSIDE_DM_IO) &&
- !dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
-}
-
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
*/
-void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
+static inline void __dm_io_dec_pending(struct dm_io *io)
+{
+ if (atomic_dec_and_test(&io->io_count))
+ dm_io_complete(io);
+}
+
+static void dm_io_set_error(struct dm_io *io, blk_status_t error)
{
+ unsigned long flags;
+
/* Push-back supersedes any I/O errors */
- if (unlikely(error)) {
- unsigned long flags;
- spin_lock_irqsave(&io->lock, flags);
- if (!(io->status == BLK_STS_DM_REQUEUE &&
- __noflush_suspending(io->md)))
- io->status = error;
- spin_unlock_irqrestore(&io->lock, flags);
+ spin_lock_irqsave(&io->lock, flags);
+ if (!(io->status == BLK_STS_DM_REQUEUE &&
+ __noflush_suspending(io->md))) {
+ io->status = error;
}
+ spin_unlock_irqrestore(&io->lock, flags);
+}
- if (atomic_dec_and_test(&io->io_count))
- dm_io_complete(io);
+void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
+{
+ if (unlikely(error))
+ dm_io_set_error(io, error);
+
+ __dm_io_dec_pending(io);
}
void disable_discard(struct mapped_device *md)
/* device doesn't really support DISCARD, disable it */
limits->max_discard_sectors = 0;
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
}
void disable_write_zeroes(struct mapped_device *md)
static void clone_endio(struct bio *bio)
{
blk_status_t error = bio->bi_status;
+ struct request_queue *q = bio->bi_bdev->bd_disk->queue;
struct dm_target_io *tio = clone_to_tio(bio);
+ struct dm_target *ti = tio->ti;
+ dm_endio_fn endio = ti->type->end_io;
struct dm_io *io = tio->io;
- struct mapped_device *md = tio->io->md;
- dm_endio_fn endio = tio->ti->type->end_io;
- struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+ struct mapped_device *md = io->md;
if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_DISCARD &&
- !q->limits.max_discard_sectors)
+ !bdev_max_discard_sectors(bio->bi_bdev))
disable_discard(md);
else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
!q->limits.max_write_zeroes_sectors)
disable_write_zeroes(md);
}
- if (blk_queue_is_zoned(q))
+ if (static_branch_unlikely(&zoned_enabled) &&
+ unlikely(blk_queue_is_zoned(q)))
dm_zone_endio(io, bio);
if (endio) {
- int r = endio(tio->ti, bio, &error);
+ int r = endio(ti, bio, &error);
switch (r) {
case DM_ENDIO_REQUEUE:
- /*
- * Requeuing writes to a sequential zone of a zoned
- * target will break the sequential write pattern:
- * fail such IO.
- */
- if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
- error = BLK_STS_IOERR;
- else
+ if (static_branch_unlikely(&zoned_enabled)) {
+ /*
+ * Requeuing writes to a sequential zone of a zoned
+ * target will break the sequential write pattern:
+ * fail such IO.
+ */
+ if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
+ error = BLK_STS_IOERR;
+ else
+ error = BLK_STS_DM_REQUEUE;
+ } else
error = BLK_STS_DM_REQUEUE;
fallthrough;
case DM_ENDIO_DONE:
}
}
- if (unlikely(swap_bios_limit(tio->ti, bio))) {
- struct mapped_device *md = io->md;
+ if (static_branch_unlikely(&swap_bios_enabled) &&
+ unlikely(swap_bios_limit(ti, bio)))
up(&md->swap_bios_semaphore);
- }
free_tio(bio);
dm_io_dec_pending(io, error);
* +--------------------+---------------+-------+
*
* <-------------- *tio->len_ptr --------------->
- * <------- bi_size ------->
+ * <----- bio_sectors ----->
* <-- n_sectors -->
*
* Region 1 was already iterated over with bio_advance or similar function.
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
struct dm_target_io *tio = clone_to_tio(bio);
- unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+ unsigned bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
BUG_ON(op_is_zone_mgmt(bio_op(bio)));
BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND);
- BUG_ON(bi_size > *tio->len_ptr);
- BUG_ON(n_sectors > bi_size);
+ BUG_ON(bio_sectors > *tio->len_ptr);
+ BUG_ON(n_sectors > bio_sectors);
- *tio->len_ptr -= bi_size - n_sectors;
+ *tio->len_ptr -= bio_sectors - n_sectors;
bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
static void __map_bio(struct bio *clone)
{
struct dm_target_io *tio = clone_to_tio(clone);
- int r;
- struct dm_io *io = tio->io;
struct dm_target *ti = tio->ti;
+ struct dm_io *io = tio->io;
+ struct mapped_device *md = io->md;
+ int r;
clone->bi_end_io = clone_endio;
dm_io_inc_pending(io);
tio->old_sector = clone->bi_iter.bi_sector;
- if (unlikely(swap_bios_limit(ti, clone))) {
- struct mapped_device *md = io->md;
+ if (static_branch_unlikely(&swap_bios_enabled) &&
+ unlikely(swap_bios_limit(ti, clone))) {
int latch = get_swap_bios();
if (unlikely(latch != md->swap_bios))
__set_swap_bios_limit(md, latch);
down(&md->swap_bios_semaphore);
}
- /*
- * Check if the IO needs a special mapping due to zone append emulation
- * on zoned target. In this case, dm_zone_map_bio() calls the target
- * map operation.
- */
- if (dm_emulate_zone_append(io->md))
- r = dm_zone_map_bio(tio);
- else
+ if (static_branch_unlikely(&zoned_enabled)) {
+ /*
+ * Check if the IO needs a special mapping due to zone append
+ * emulation on zoned target. In this case, dm_zone_map_bio()
+ * calls the target map operation.
+ */
+ if (unlikely(dm_emulate_zone_append(md)))
+ r = dm_zone_map_bio(tio);
+ else
+ r = ti->type->map(ti, clone);
+ } else
r = ti->type->map(ti, clone);
switch (r) {
* the bio has been remapped so dispatch it, but defer
* dm_start_io_acct() until after possible bio_split().
*/
- __dm_submit_bio_remap(clone, disk_devt(io->md->disk),
+ __dm_submit_bio_remap(clone, disk_devt(md->disk),
tio->old_sector);
dm_io_set_flag(io, DM_IO_START_ACCT);
break;
case DM_MAPIO_KILL:
case DM_MAPIO_REQUEUE:
- if (unlikely(swap_bios_limit(ti, clone)))
- up(&io->md->swap_bios_semaphore);
+ if (static_branch_unlikely(&swap_bios_enabled) &&
+ unlikely(swap_bios_limit(ti, clone)))
+ up(&md->swap_bios_semaphore);
free_tio(clone);
if (r == DM_MAPIO_KILL)
dm_io_dec_pending(io, BLK_STS_IOERR);
}
static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
- int *result)
+ blk_status_t *status)
{
unsigned num_bios = 0;
* reconfiguration might also have changed that since the
* check was performed.
*/
- if (!num_bios)
- *result = -EOPNOTSUPP;
+ if (unlikely(!num_bios))
+ *status = BLK_STS_NOTSUPP;
else {
__send_changing_extent_only(ci, ti, num_bios);
- *result = 0;
+ *status = BLK_STS_OK;
}
return true;
}
/*
* Select the correct strategy for processing a non-flush bio.
*/
-static int __split_and_process_bio(struct clone_info *ci)
+static blk_status_t __split_and_process_bio(struct clone_info *ci)
{
struct bio *clone;
struct dm_target *ti;
unsigned len;
- int r;
+ blk_status_t error = BLK_STS_IOERR;
ti = dm_table_find_target(ci->map, ci->sector);
- if (!ti)
- return -EIO;
-
- if (__process_abnormal_io(ci, ti, &r))
- return r;
+ if (unlikely(!ti || __process_abnormal_io(ci, ti, &error)))
+ return error;
/*
* Only support bio polling for normal IO, and the target io is
ci->sector += len;
ci->sector_count -= len;
- return 0;
+ return BLK_STS_OK;
}
static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
ci->sector_count = bio_sectors(bio);
/* Shouldn't happen but sector_count was being set to 0 so... */
- if (WARN_ON_ONCE(op_is_zone_mgmt(bio_op(bio)) && ci->sector_count))
+ if (static_branch_unlikely(&zoned_enabled) &&
+ WARN_ON_ONCE(op_is_zone_mgmt(bio_op(bio)) && ci->sector_count))
ci->sector_count = 0;
}
struct dm_table *map, struct bio *bio)
{
struct clone_info ci;
+ struct dm_io *io;
struct bio *orig_bio = NULL;
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
init_clone_info(&ci, md, map, bio);
+ io = ci.io;
if (bio->bi_opf & REQ_PREFLUSH) {
__send_empty_flush(&ci);
}
error = __split_and_process_bio(&ci);
- ci.io->map_task = NULL;
+ io->map_task = NULL;
if (error || !ci.sector_count)
goto out;
/*
* Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed.
- * We take a clone of the original to store in ci.io->orig_bio to be
+ * We take a clone of the original to store in io->orig_bio to be
* used by dm_end_io_acct() and for dm_io_complete() to use for
* completion handling.
*/
out:
if (!orig_bio)
orig_bio = bio;
- smp_store_release(&ci.io->orig_bio, orig_bio);
- if (dm_io_flagged(ci.io, DM_IO_START_ACCT))
- dm_start_io_acct(ci.io, NULL);
+ smp_store_release(&io->orig_bio, orig_bio);
+ if (dm_io_flagged(io, DM_IO_START_ACCT))
+ dm_start_io_acct(io, NULL);
/*
* Drop the extra reference count for non-POLLED bio, and hold one
* bio->bi_private, so that dm_poll_bio can poll them all.
*/
if (error || !ci.submit_as_polled)
- dm_io_dec_pending(ci.io, errno_to_blk_status(error));
+ dm_io_dec_pending(ci.io, error);
else
- dm_queue_poll_io(bio, ci.io);
+ dm_queue_poll_io(bio, io);
}
static void dm_submit_bio(struct bio *bio)
int srcu_idx;
struct dm_table *map;
- map = dm_get_live_table(md, &srcu_idx);
+ map = dm_get_live_table_bio(md, &srcu_idx, bio);
/* If suspended, or map not yet available, queue this IO for later */
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
* Use blk_queue_split() for abnormal IO (e.g. discard, writesame, etc)
* otherwise associated queue_limits won't be imposed.
*/
- if (is_abnormal_io(bio))
+ if (unlikely(is_abnormal_io(bio)))
blk_queue_split(&bio);
dm_split_and_process_bio(md, map, bio);
out:
- dm_put_live_table(md, srcu_idx);
+ dm_put_live_table_bio(md, srcu_idx, bio);
}
static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,
if (dm_poll_dm_io(io, iob, flags)) {
hlist_del_init(&io->node);
/*
- * clone_endio() has already occurred, so passing
- * error as 0 here doesn't override io->status
+ * clone_endio() has already occurred, so no
+ * error handling is needed here.
*/
- dm_io_dec_pending(io, 0);
+ __dm_io_dec_pending(io);
}
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned integrity, unsigned per_io_data_size,
- unsigned min_pool_size)
+ unsigned per_io_data_size, unsigned min_pool_size,
+ bool integrity, bool poll)
{
struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
unsigned int pool_size = 0;
pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
io_front_pad = roundup(per_io_data_size, __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
- ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, 0);
+ ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, poll ? BIOSET_PERCPU_CACHE : 0);
if (ret)
goto out;
if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))