From: Jens Axboe Date: Wed, 13 Nov 2024 22:24:56 +0000 (-0700) Subject: block: support uncached IO X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=refs%2Fheads%2Fbuffered-uncached.8;p=linux-2.6-block.git block: support uncached IO Add a bio helper for deferring uncached writes to workqueue, like what is done for bios that contain folios that need marking as dirty. The read side of block fops is already fine, only the write needs special handling to avoid IRQ context for uncached write completions. Enable uncached buffered IO to block devices by setting FOP_UNCACHED. Signed-off-by: Jens Axboe --- diff --git a/block/bdev.c b/block/bdev.c index 738e3c8457e7..6344a1050ae6 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -439,6 +439,9 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) iput(inode); return NULL; } + bdev->uncached_list = NULL; + INIT_WORK(&bdev->uncached_work, bio_uncache_work); + spin_lock_init(&bdev->uncached_lock); bdev->bd_disk = disk; return bdev; } diff --git a/block/bio.c b/block/bio.c index 699a78c85c75..d93b47c86986 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1522,8 +1522,43 @@ EXPORT_SYMBOL_GPL(bio_set_pages_dirty); static void bio_dirty_fn(struct work_struct *work); static DECLARE_WORK(bio_dirty_work, bio_dirty_fn); -static DEFINE_SPINLOCK(bio_dirty_lock); static struct bio *bio_dirty_list; +static DEFINE_SPINLOCK(bio_dirty_lock); + +void bio_uncache_work(struct work_struct *work) +{ + struct block_device *bdev; + struct bio *bio, *next; + + bdev = container_of(work, struct block_device, uncached_work); + spin_lock_irq(&bdev->uncached_lock); + next = bdev->uncached_list; + bdev->uncached_list = NULL; + spin_unlock_irq(&bdev->uncached_lock); + + while ((bio = next) != NULL) { + struct buffer_head *bh = bio->bi_private; + + next = bio->bi_next; + bh->b_end_io(bh, !bio->bi_status); + bio_put(bio); + } +} + +void bio_reap_uncached_write(struct bio *bio) +{ + struct block_device *bdev = bio->bi_bdev; + unsigned long flags; + bool was_empty; + + spin_lock_irqsave(&bdev->uncached_lock, flags); + bio->bi_next = bdev->uncached_list; + bdev->uncached_list = bio; + was_empty = !bio->bi_next; + spin_unlock_irqrestore(&bdev->uncached_lock, flags); + if (was_empty) + schedule_work(&bdev->uncached_work); +} /* * This runs in process context diff --git a/block/fops.c b/block/fops.c index 13a67940d040..f617c66633ac 100644 --- a/block/fops.c +++ b/block/fops.c @@ -872,7 +872,7 @@ const struct file_operations def_blk_fops = { .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, .uring_cmd = blkdev_uring_cmd, - .fop_flags = FOP_BUFFER_RASYNC, + .fop_flags = FOP_BUFFER_RASYNC | FOP_UNCACHED, }; static __init int blkdev_init(void) diff --git a/fs/buffer.c b/fs/buffer.c index cc8452f60251..940c536529c1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2763,8 +2763,13 @@ static void end_bio_bh_io_sync(struct bio *bio) if (unlikely(bio_flagged(bio, BIO_QUIET))) set_bit(BH_Quiet, &bh->b_state); - bh->b_end_io(bh, !bio->bi_status); - bio_put(bio); + if (op_is_write(bio_op(bio)) && + folio_test_uncached(page_folio(bh->b_page))) { + bio_reap_uncached_write(bio); + } else { + bh->b_end_io(bh, !bio->bi_status); + bio_put(bio); + } } static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, diff --git a/include/linux/bio.h b/include/linux/bio.h index 60830a6a5939..71731551a5a4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -427,6 +427,8 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); +void bio_uncache_work(struct work_struct *work); +void bio_reap_uncached_write(struct bio *bio); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dce7615c35e7..44a536ca98f4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -74,6 +74,14 @@ struct block_device { #ifdef CONFIG_SECURITY void *bd_security; #endif + + /* + * For punting of uncached buffered writes to a workqueue context + */ + struct bio *uncached_list; + spinlock_t uncached_lock; + struct work_struct uncached_work; + /* * keep this out-of-line as it's both big and not needed in the fast * path