fs,block: yield devices early
authorChristian Brauner <brauner@kernel.org>
Tue, 26 Mar 2024 12:47:22 +0000 (13:47 +0100)
committerChristian Brauner <brauner@kernel.org>
Wed, 27 Mar 2024 12:17:15 +0000 (13:17 +0100)
Currently a device is only really released once the umount returns to
userspace due to how file closing works. That ultimately could cause
an old umount assumption to be violated that concurrent umount and mount
don't fail. So an exclusively held device with a temporary holder should
be yielded before the filesystem is gone. Add a helper that allows
callers to do that. This also allows us to remove the two holder ops
that Linus wasn't excited about.

Link: https://lore.kernel.org/r/20240326-vfs-bdev-end_holder-v1-1-20af85202918@kernel.org
Fixes: f3a608827d1f ("bdev: open block device as files") # mainline only
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
13 files changed:
block/bdev.c
drivers/mtd/devices/block2mtd.c
fs/bcachefs/super-io.c
fs/cramfs/inode.c
fs/ext4/super.c
fs/f2fs/super.c
fs/jfs/jfs_logmgr.c
fs/reiserfs/journal.c
fs/romfs/super.c
fs/super.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_super.c
include/linux/blkdev.h

index a1946a902df36ecc212c6748e8322cc76ea38e62..b8e32d933a6369aebbffe36e2ce3165cc2288bef 100644 (file)
@@ -583,9 +583,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder,
        mutex_unlock(&bdev->bd_holder_lock);
        bd_clear_claiming(whole, holder);
        mutex_unlock(&bdev_lock);
-
-       if (hops && hops->get_holder)
-               hops->get_holder(holder);
 }
 
 /**
@@ -608,7 +605,6 @@ EXPORT_SYMBOL(bd_abort_claiming);
 static void bd_end_claim(struct block_device *bdev, void *holder)
 {
        struct block_device *whole = bdev_whole(bdev);
-       const struct blk_holder_ops *hops = bdev->bd_holder_ops;
        bool unblock = false;
 
        /*
@@ -631,9 +627,6 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
                whole->bd_holder = NULL;
        mutex_unlock(&bdev_lock);
 
-       if (hops && hops->put_holder)
-               hops->put_holder(holder);
-
        /*
         * If this was the last claim, remove holder link and unblock evpoll if
         * it was a write holder.
@@ -813,6 +806,11 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
                bdev->bd_writers++;
 }
 
+static inline bool bdev_unclaimed(const struct file *bdev_file)
+{
+       return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
+}
+
 static void bdev_yield_write_access(struct file *bdev_file)
 {
        struct block_device *bdev;
@@ -820,6 +818,9 @@ static void bdev_yield_write_access(struct file *bdev_file)
        if (bdev_allow_write_mounted)
                return;
 
+       if (bdev_unclaimed(bdev_file))
+               return;
+
        bdev = file_bdev(bdev_file);
 
        if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
@@ -1012,6 +1013,20 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
 }
 EXPORT_SYMBOL(bdev_file_open_by_path);
 
+static inline void bd_yield_claim(struct file *bdev_file)
+{
+       struct block_device *bdev = file_bdev(bdev_file);
+       void *holder = bdev_file->private_data;
+
+       lockdep_assert_held(&bdev->bd_disk->open_mutex);
+
+       if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
+               return;
+
+       if (!bdev_unclaimed(bdev_file))
+               bd_end_claim(bdev, holder);
+}
+
 void bdev_release(struct file *bdev_file)
 {
        struct block_device *bdev = file_bdev(bdev_file);
@@ -1036,7 +1051,7 @@ void bdev_release(struct file *bdev_file)
        bdev_yield_write_access(bdev_file);
 
        if (holder)
-               bd_end_claim(bdev, holder);
+               bd_yield_claim(bdev_file);
 
        /*
         * Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -1056,6 +1071,39 @@ put_no_open:
        blkdev_put_no_open(bdev);
 }
 
+/**
+ * bdev_fput - yield claim to the block device and put the file
+ * @bdev_file: open block device
+ *
+ * Yield claim on the block device and put the file. Ensure that the
+ * block device can be reclaimed before the file is closed which is a
+ * deferred operation.
+ */
+void bdev_fput(struct file *bdev_file)
+{
+       if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
+               return;
+
+       if (bdev_file->private_data) {
+               struct block_device *bdev = file_bdev(bdev_file);
+               struct gendisk *disk = bdev->bd_disk;
+
+               mutex_lock(&disk->open_mutex);
+               bdev_yield_write_access(bdev_file);
+               bd_yield_claim(bdev_file);
+               /*
+                * Tell release we already gave up our hold on the
+                * device and if write restrictions are available that
+                * we already gave up write access to the device.
+                */
+               bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
+               mutex_unlock(&disk->open_mutex);
+       }
+
+       fput(bdev_file);
+}
+EXPORT_SYMBOL(bdev_fput);
+
 /**
  * lookup_bdev() - Look up a struct block_device by name.
  * @pathname: Name of the block device in the filesystem.
index 97a00ec9a4d48944a8233b49c5fa0106493abb47..caacdc0a3819458fbb47faae23432f5950fe5869 100644 (file)
@@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
 
        if (dev->bdev_file) {
                invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1);
-               fput(dev->bdev_file);
+               bdev_fput(dev->bdev_file);
        }
 
        kfree(dev);
index ad28e370b6404c915ee8bf8743ed535366fc6a55..cb7b4de11a49e69ecf3188b88399c3a6c23c57ea 100644 (file)
@@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
 {
        kfree(sb->bio);
        if (!IS_ERR_OR_NULL(sb->s_bdev_file))
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        kfree(sb->holder);
        kfree(sb->sb_name);
 
index 39e75131fd5aa01d732f703cb1f421a3696bffd6..9901057a15ba79a110c8a90423bc7707102590d8 100644 (file)
@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
                sb->s_mtd = NULL;
        } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
                sync_blockdev(sb->s_bdev);
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        }
        kfree(sbi);
 }
index cfb8449c731f9ac53fb3add808e13493175508c4..044135796f2b6ebe86e56b69f57501e7567d761b 100644 (file)
@@ -5668,7 +5668,7 @@ failed_mount:
        brelse(sbi->s_sbh);
        if (sbi->s_journal_bdev_file) {
                invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
-               fput(sbi->s_journal_bdev_file);
+               bdev_fput(sbi->s_journal_bdev_file);
        }
 out_fail:
        invalidate_bdev(sb->s_bdev);
@@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb,
 out_bh:
        brelse(bh);
 out_bdev:
-       fput(bdev_file);
+       bdev_fput(bdev_file);
        return ERR_PTR(errno);
 }
 
@@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb,
 out_journal:
        jbd2_journal_destroy(journal);
 out_bdev:
-       fput(bdev_file);
+       bdev_fput(bdev_file);
        return ERR_PTR(errno);
 }
 
@@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb)
        kill_block_super(sb);
 
        if (bdev_file)
-               fput(bdev_file);
+               bdev_fput(bdev_file);
 }
 
 static struct file_system_type ext4_fs_type = {
index a6867f26f141836dcd4a4f0136dd67a9de6c3c74..a4bc26dfdb1af5973783d2817bf2deed889f3c33 100644 (file)
@@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 
        for (i = 0; i < sbi->s_ndevs; i++) {
                if (i > 0)
-                       fput(FDEV(i).bdev_file);
+                       bdev_fput(FDEV(i).bdev_file);
 #ifdef CONFIG_BLK_DEV_ZONED
                kvfree(FDEV(i).blkz_seq);
 #endif
index 73389c68e25170c81d6f84483f09b43216ba4b52..9609349e92e5e1ba422369fa29a2f6345f7fe908 100644 (file)
@@ -1141,7 +1141,7 @@ journal_found:
        lbmLogShutdown(log);
 
       close:           /* close external log device */
-       fput(bdev_file);
+       bdev_fput(bdev_file);
 
       free:            /* free log descriptor */
        mutex_unlock(&jfs_log_mutex);
@@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb)
        bdev_file = log->bdev_file;
        rc = lmLogShutdown(log);
 
-       fput(bdev_file);
+       bdev_fput(bdev_file);
 
        kfree(log);
 
index 6474529c42530628fd3969573fb175283f4f51e8..e539ccd39e1ee74cd8bdfd35d29f826be6f514e1 100644 (file)
@@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb)
 static void release_journal_dev(struct reiserfs_journal *journal)
 {
        if (journal->j_bdev_file) {
-               fput(journal->j_bdev_file);
+               bdev_fput(journal->j_bdev_file);
                journal->j_bdev_file = NULL;
        }
 }
index 2be227532f399788de82a03e55970d33c67dc695..2cbb924620747f68d04ac53783c3b0f21c5ea0ab 100644 (file)
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
 #ifdef CONFIG_ROMFS_ON_BLOCK
        if (sb->s_bdev) {
                sync_blockdev(sb->s_bdev);
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        }
 #endif
 }
index 71d9779c42b10aca8bd4e0b7b667fc62386e2305..69ce6c600968479bd6832a6705352eb2d88427c1 100644 (file)
@@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev)
        return error;
 }
 
-static void fs_bdev_super_get(void *data)
-{
-       struct super_block *sb = data;
-
-       spin_lock(&sb_lock);
-       sb->s_count++;
-       spin_unlock(&sb_lock);
-}
-
-static void fs_bdev_super_put(void *data)
-{
-       struct super_block *sb = data;
-
-       put_super(sb);
-}
-
 const struct blk_holder_ops fs_holder_ops = {
        .mark_dead              = fs_bdev_mark_dead,
        .sync                   = fs_bdev_sync,
        .freeze                 = fs_bdev_freeze,
        .thaw                   = fs_bdev_thaw,
-       .get_holder             = fs_bdev_super_get,
-       .put_holder             = fs_bdev_super_put,
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
@@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
         * writable from userspace even for a read-only block device.
         */
        if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
-               fput(bdev_file);
+               bdev_fput(bdev_file);
                return -EACCES;
        }
 
@@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
        if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
                if (fc)
                        warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-               fput(bdev_file);
+               bdev_fput(bdev_file);
                return -EBUSY;
        }
        spin_lock(&sb_lock);
@@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb)
        generic_shutdown_super(sb);
        if (bdev) {
                sync_blockdev(bdev);
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        }
 }
 
index 1a18c381127e2183169eaa8280aa620d66340a71..f0fa02264edaaeef2d23d1101d1953d6454e8832 100644 (file)
@@ -2030,7 +2030,7 @@ xfs_free_buftarg(
        fs_put_dax(btp->bt_daxdev, btp->bt_mount);
        /* the main block device is closed by kill_block_super */
        if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
-               fput(btp->bt_bdev_file);
+               bdev_fput(btp->bt_bdev_file);
        kfree(btp);
 }
 
index c21f10ab0f5dbef4051b6bef01eb64c77247e056..bce020374c5eba5255a98d71176d7198024603d2 100644 (file)
@@ -485,7 +485,7 @@ xfs_open_devices(
                mp->m_logdev_targp = mp->m_ddev_targp;
                /* Handle won't be used, drop it */
                if (logdev_file)
-                       fput(logdev_file);
+                       bdev_fput(logdev_file);
        }
 
        return 0;
@@ -497,10 +497,10 @@ xfs_open_devices(
        xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
         if (rtdev_file)
-               fput(rtdev_file);
+               bdev_fput(rtdev_file);
  out_close_logdev:
        if (logdev_file)
-               fput(logdev_file);
+               bdev_fput(logdev_file);
        return error;
 }
 
index c3e8f7cf96be9e1c10169d2e7afe31696082eb8f..172c918799995f8ce6ed285bc5a8ea3341ddb6ed 100644 (file)
@@ -1505,16 +1505,6 @@ struct blk_holder_ops {
         * Thaw the file system mounted on the block device.
         */
        int (*thaw)(struct block_device *bdev);
-
-       /*
-        * If needed, get a reference to the holder.
-        */
-       void (*get_holder)(void *holder);
-
-       /*
-        * Release the holder.
-        */
-       void (*put_holder)(void *holder);
 };
 
 /*
@@ -1585,6 +1575,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
 
 int bdev_freeze(struct block_device *bdev);
 int bdev_thaw(struct block_device *bdev);
+void bdev_fput(struct file *bdev_file);
 
 struct io_comp_batch {
        struct request *req_list;