writeback: Fix inode->i_io_list not be protected by inode->i_lock error
authorJchao Sun <sunjunchao2870@gmail.com>
Tue, 24 May 2022 15:05:40 +0000 (08:05 -0700)
committerJan Kara <jack@suse.cz>
Mon, 6 Jun 2022 07:54:30 +0000 (09:54 +0200)
Commit b35250c0816c ("writeback: Protect inode->i_io_list with
inode->i_lock") made inode->i_io_list not only protected by
wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
was missed. Add lock there and also update comment describing
things protected by inode->i_lock. This also fixes a race where
__mark_inode_dirty() could move inode under flush worker's hands
and thus sync(2) could miss writing some inodes.

Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock")
Link: https://lore.kernel.org/r/20220524150540.12552-1-sunjunchao2870@gmail.com
CC: stable@vger.kernel.org
Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
fs/fs-writeback.c
fs/inode.c

index a21d8f1a56d19d3cb1e40bd9e21213aafa715a00..05221366a16dc913961f3e3f7b4a703477bc7bb9 100644 (file)
@@ -120,6 +120,7 @@ static bool inode_io_list_move_locked(struct inode *inode,
                                      struct list_head *head)
 {
        assert_spin_locked(&wb->list_lock);
+       assert_spin_locked(&inode->i_lock);
 
        list_move(&inode->i_io_list, head);
 
@@ -1365,9 +1366,9 @@ static int move_expired_inodes(struct list_head *delaying_queue,
                inode = wb_inode(delaying_queue->prev);
                if (inode_dirtied_after(inode, dirtied_before))
                        break;
+               spin_lock(&inode->i_lock);
                list_move(&inode->i_io_list, &tmp);
                moved++;
-               spin_lock(&inode->i_lock);
                inode->i_state |= I_SYNC_QUEUED;
                spin_unlock(&inode->i_lock);
                if (sb_is_blkdev_sb(inode->i_sb))
@@ -1383,7 +1384,12 @@ static int move_expired_inodes(struct list_head *delaying_queue,
                goto out;
        }
 
-       /* Move inodes from one superblock together */
+       /*
+        * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue',
+        * we don't take inode->i_lock here because it is just a pointless overhead.
+        * Inode is already marked as I_SYNC_QUEUED so writeback list handling is
+        * fully under our control.
+        */
        while (!list_empty(&tmp)) {
                sb = wb_inode(tmp.prev)->i_sb;
                list_for_each_prev_safe(pos, node, &tmp) {
@@ -1826,8 +1832,8 @@ static long writeback_sb_inodes(struct super_block *sb,
                         * We'll have another go at writing back this inode
                         * when we completed a full scan of b_io.
                         */
-                       spin_unlock(&inode->i_lock);
                        requeue_io(inode, wb);
+                       spin_unlock(&inode->i_lock);
                        trace_writeback_sb_inodes_requeue(inode);
                        continue;
                }
@@ -2358,6 +2364,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 {
        struct super_block *sb = inode->i_sb;
        int dirtytime = 0;
+       struct bdi_writeback *wb = NULL;
 
        trace_writeback_mark_inode_dirty(inode, flags);
 
@@ -2409,6 +2416,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        inode->i_state &= ~I_DIRTY_TIME;
                inode->i_state |= flags;
 
+               /*
+                * Grab inode's wb early because it requires dropping i_lock and we
+                * need to make sure following checks happen atomically with dirty
+                * list handling so that we don't move inodes under flush worker's
+                * hands.
+                */
+               if (!was_dirty) {
+                       wb = locked_inode_to_wb_and_lock_list(inode);
+                       spin_lock(&inode->i_lock);
+               }
+
                /*
                 * If the inode is queued for writeback by flush worker, just
                 * update its dirty state. Once the flush worker is done with
@@ -2416,7 +2434,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                 * list, based upon its state.
                 */
                if (inode->i_state & I_SYNC_QUEUED)
-                       goto out_unlock_inode;
+                       goto out_unlock;
 
                /*
                 * Only add valid (hashed) inodes to the superblock's
@@ -2424,22 +2442,19 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                 */
                if (!S_ISBLK(inode->i_mode)) {
                        if (inode_unhashed(inode))
-                               goto out_unlock_inode;
+                               goto out_unlock;
                }
                if (inode->i_state & I_FREEING)
-                       goto out_unlock_inode;
+                       goto out_unlock;
 
                /*
                 * If the inode was already on b_dirty/b_io/b_more_io, don't
                 * reposition it (that would break b_dirty time-ordering).
                 */
                if (!was_dirty) {
-                       struct bdi_writeback *wb;
                        struct list_head *dirty_list;
                        bool wakeup_bdi = false;
 
-                       wb = locked_inode_to_wb_and_lock_list(inode);
-
                        inode->dirtied_when = jiffies;
                        if (dirtytime)
                                inode->dirtied_time_when = jiffies;
@@ -2453,6 +2468,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                                                               dirty_list);
 
                        spin_unlock(&wb->list_lock);
+                       spin_unlock(&inode->i_lock);
                        trace_writeback_dirty_inode_enqueue(inode);
 
                        /*
@@ -2467,6 +2483,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        return;
                }
        }
+out_unlock:
+       if (wb)
+               spin_unlock(&wb->list_lock);
 out_unlock_inode:
        spin_unlock(&inode->i_lock);
 }
index 9d9b422504d1a883236f030befd73953d29dc774..bd4da9c5207eabb0cec8722ee3d30acd963169bf 100644 (file)
@@ -27,7 +27,7 @@
  * Inode locking rules:
  *
  * inode->i_lock protects:
- *   inode->i_state, inode->i_hash, __iget()
+ *   inode->i_state, inode->i_hash, __iget(), inode->i_io_list
  * Inode LRU list locks protect:
  *   inode->i_sb->s_inode_lru, inode->i_lru
  * inode->i_sb->s_inode_list_lock protects: