mm/migrate: fix sleep in atomic for large folios and buffer heads
authorDavidlohr Bueso <dave@stgolabs.net>
Fri, 18 Apr 2025 01:59:21 +0000 (18:59 -0700)
committerChristian Brauner <brauner@kernel.org>
Tue, 22 Apr 2025 16:16:08 +0000 (18:16 +0200)
The large folio + buffer head noref migration scenarios are
being naughty and blocking while holding a spinlock.

As a consequence of the pagecache lookup path taking the
folio lock this serializes against migration paths, so
they can wait for each other. For the private_lock
atomic case, a new BH_Migrate flag is introduced which
enables the lookup to bail.

This allows the critical region of the private_lock on
the migration path to be reduced to the way it was before
ebdf4de5642fb6 ("mm: migrate: fix reference  check race
between __find_get_block() and migration"), that is covering
the count checks.

The scope is always noref migration.

Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: syzbot+f3c6fda1297c748a7076@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/oe-lkp/202503101536.27099c77-lkp@intel.com
Fixes: 3c20917120ce61 ("block/bdev: enable large folio support for large logical block sizes")
Reviewed-by: Jan Kara <jack@suse.cz>
Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Link: https://kdevops.org/ext4/v6.15-rc2.html
Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/
Link: https://lore.kernel.org/20250418015921.132400-8-dave@stgolabs.net
Tested-by: kdevops@lists.linux.dev # [0] [1]
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/buffer.c
fs/ext4/ialloc.c
include/linux/buffer_head.h
mm/migrate.c

index f8c9e5eb4685353d98ee8789c7a63c79a2733fae..7be23ff20b2733b9906bfc4cc9f82a78af47e94a 100644 (file)
@@ -207,6 +207,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic)
        head = folio_buffers(folio);
        if (!head)
                goto out_unlock;
+       /*
+        * Upon a noref migration, the folio lock serializes here;
+        * otherwise bail.
+        */
+       if (test_bit_acquire(BH_Migrate, &head->b_state)) {
+               WARN_ON(!atomic);
+               goto out_unlock;
+       }
+
        bh = head;
        do {
                if (!buffer_mapped(bh))
@@ -1390,7 +1399,8 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
 /*
  * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
  * it in the LRU and mark it as accessed.  If it is not present then return
- * NULL
+ * NULL. Atomic context callers may also return NULL if the buffer is being
+ * migrated; similarly the page is not marked accessed either.
  */
 static struct buffer_head *
 find_get_block_common(struct block_device *bdev, sector_t block,
index 38bc8d74f4cc23b79d706c15177db62fcc794b84..e7ecc7c8a729696124e883be98690c39d3985f11 100644 (file)
@@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
        if (!bh || !buffer_uptodate(bh))
                /*
                 * If the block is not in the buffer cache, then it
-                * must have been written out.
+                * must have been written out, or, most unlikely, is
+                * being migrated - false failure should be OK here.
                 */
                goto out;
 
index c791aa9a08daa0d6a124f9aeb4954edf38f1d501..0029ff880e27483baf90bb80daf9b914b857ca0d 100644 (file)
@@ -34,6 +34,7 @@ enum bh_state_bits {
        BH_Meta,        /* Buffer contains metadata */
        BH_Prio,        /* Buffer should be submitted with REQ_PRIO */
        BH_Defer_Completion, /* Defer AIO completion to workqueue */
+       BH_Migrate,     /* Buffer is being migrated (norefs) */
 
        BH_PrivateStart,/* not a state bit, but the first bit available
                         * for private allocation by other entities
index f3ee6d8d5e2eaab4313403aea3e68ddb1736fb63..676d9cfc7059ca02c6378eb625fdc9da9d0198ab 100644 (file)
@@ -845,9 +845,11 @@ static int __buffer_migrate_folio(struct address_space *mapping,
                return -EAGAIN;
 
        if (check_refs) {
-               bool busy;
+               bool busy, migrating;
                bool invalidated = false;
 
+               migrating = test_and_set_bit_lock(BH_Migrate, &head->b_state);
+               VM_WARN_ON_ONCE(migrating);
 recheck_buffers:
                busy = false;
                spin_lock(&mapping->i_private_lock);
@@ -859,12 +861,12 @@ recheck_buffers:
                        }
                        bh = bh->b_this_page;
                } while (bh != head);
+               spin_unlock(&mapping->i_private_lock);
                if (busy) {
                        if (invalidated) {
                                rc = -EAGAIN;
                                goto unlock_buffers;
                        }
-                       spin_unlock(&mapping->i_private_lock);
                        invalidate_bh_lrus();
                        invalidated = true;
                        goto recheck_buffers;
@@ -883,7 +885,7 @@ recheck_buffers:
 
 unlock_buffers:
        if (check_refs)
-               spin_unlock(&mapping->i_private_lock);
+               clear_bit_unlock(BH_Migrate, &head->b_state);
        bh = head;
        do {
                unlock_buffer(bh);