ext4: hold s_fc_lock while during fast commit
authorHarshad Shirwadkar <harshadshirwadkar@gmail.com>
Thu, 8 May 2025 17:59:08 +0000 (17:59 +0000)
committerTheodore Ts'o <tytso@mit.edu>
Fri, 9 May 2025 01:56:17 +0000 (21:56 -0400)
Leaving s_fc_lock in between during commit in ext4_fc_perform_commit()
function leaves room for subtle concurrency bugs where ext4_fc_del() may
delete an inode from the fast commit list, leaving list in an inconsistent
state.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250508175908.1004880-10-harshadshirwadkar@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/fast_commit.c

index eb888e52261f79a1070078b880bdc8847a15d554..7ac672e35f08c466b8372f250d8bdd38527766b7 100644 (file)
@@ -424,6 +424,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
        node->fcd_ino = inode->i_ino;
        take_dentry_name_snapshot(&node->fcd_name, dentry);
        INIT_LIST_HEAD(&node->fcd_dilist);
+       INIT_LIST_HEAD(&node->fcd_list);
        mutex_lock(&sbi->s_fc_lock);
        if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
                sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
@@ -985,8 +986,6 @@ static int ext4_fc_flush_data(journal_t *journal)
 
 /* Commit all the directory entry updates */
 static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
-__acquires(&sbi->s_fc_lock)
-__releases(&sbi->s_fc_lock)
 {
        struct super_block *sb = journal->j_private;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1000,26 +999,22 @@ __releases(&sbi->s_fc_lock)
        list_for_each_entry_safe(fc_dentry, fc_dentry_n,
                                 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
                if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
-                       mutex_unlock(&sbi->s_fc_lock);
-                       if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
-                               ret = -ENOSPC;
-                               goto lock_and_exit;
-                       }
-                       mutex_lock(&sbi->s_fc_lock);
+                       if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry))
+                               return -ENOSPC;
                        continue;
                }
                /*
                 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
-                * corresponding inode pointer
+                * corresponding inode. Also, the corresponding inode could have been
+                * deleted, in which case, we don't need to do anything.
                 */
-               WARN_ON(list_empty(&fc_dentry->fcd_dilist));
+               if (list_empty(&fc_dentry->fcd_dilist))
+                       continue;
                ei = list_first_entry(&fc_dentry->fcd_dilist,
                                struct ext4_inode_info, i_fc_dilist);
                inode = &ei->vfs_inode;
                WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
 
-               mutex_unlock(&sbi->s_fc_lock);
-
                /*
                 * We first write the inode and then the create dirent. This
                 * allows the recovery code to create an unnamed inode first
@@ -1029,23 +1024,14 @@ __releases(&sbi->s_fc_lock)
                 */
                ret = ext4_fc_write_inode(inode, crc);
                if (ret)
-                       goto lock_and_exit;
-
+                       return ret;
                ret = ext4_fc_write_inode_data(inode, crc);
                if (ret)
-                       goto lock_and_exit;
-
-               if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
-                       ret = -ENOSPC;
-                       goto lock_and_exit;
-               }
-
-               mutex_lock(&sbi->s_fc_lock);
+                       return ret;
+               if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry))
+                       return -ENOSPC;
        }
        return 0;
-lock_and_exit:
-       mutex_lock(&sbi->s_fc_lock);
-       return ret;
 }
 
 static int ext4_fc_perform_commit(journal_t *journal)
@@ -1148,10 +1134,8 @@ static int ext4_fc_perform_commit(journal_t *journal)
        /* Step 6.2: Now write all the dentry updates. */
        mutex_lock(&sbi->s_fc_lock);
        ret = ext4_fc_commit_dentry_updates(journal, &crc);
-       if (ret) {
-               mutex_unlock(&sbi->s_fc_lock);
+       if (ret)
                goto out;
-       }
 
        /* Step 6.3: Now write all the changed inodes to disk. */
        list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
@@ -1159,7 +1143,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
                if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
                        continue;
 
-               mutex_unlock(&sbi->s_fc_lock);
                ret = ext4_fc_write_inode_data(inode, &crc);
                if (ret)
                        goto out;
@@ -1171,6 +1154,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
        ret = ext4_fc_write_tail(sb, crc);
 
 out:
+       mutex_unlock(&sbi->s_fc_lock);
        blk_finish_plug(&plug);
        return ret;
 }
@@ -1353,11 +1337,9 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
                                             fcd_list);
                list_del_init(&fc_dentry->fcd_list);
                list_del_init(&fc_dentry->fcd_dilist);
-               mutex_unlock(&sbi->s_fc_lock);
 
                release_dentry_name_snapshot(&fc_dentry->fcd_name);
                kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
-               mutex_lock(&sbi->s_fc_lock);
        }
 
        list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],