Merge tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 15 Jun 2020 16:32:10 +0000 (09:32 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 15 Jun 2020 16:32:10 +0000 (09:32 -0700)
Pull more ext4 updates from Ted Ts'o:
 "This is the second round of ext4 commits for 5.8 merge window [1].

  It includes the per-inode DAX support, which was dependant on the DAX
  infrastructure which came in via the XFS tree, and a number of
  regression and bug fixes; most notably the "BUG: using
  smp_processor_id() in preemptible code in ext4_mb_new_blocks" reported
  by syzkaller"

[1] The pull request actually came in 15 minutes after I had tagged the
    rc1 release. Tssk, tssk, late..   - Linus

* tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4, jbd2: ensure panic by fix a race between jbd2 abort and ext4 error handlers
  ext4: support xattr gnu.* namespace for the Hurd
  ext4: mballoc: Use this_cpu_read instead of this_cpu_ptr
  ext4: avoid utf8_strncasecmp() with unstable name
  ext4: stop overwrite the errcode in ext4_setup_super
  ext4: fix partial cluster initialization when splitting extent
  ext4: avoid race conditions when remounting with options that change dax
  Documentation/dax: Update DAX enablement for ext4
  fs/ext4: Introduce DAX inode flag
  fs/ext4: Remove jflag variable
  fs/ext4: Make DAX mount option a tri-state
  fs/ext4: Only change S_DAX on inode load
  fs/ext4: Update ext4_should_use_dax()
  fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS
  fs/ext4: Disallow verity if inode is DAX
  fs/ext4: Narrow scope of DAX check in setflags

19 files changed:
Documentation/filesystems/dax.txt
Documentation/filesystems/ext4/verity.rst
fs/ext4/Makefile
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/super.c
fs/ext4/verity.c
fs/ext4/xattr.c
fs/ext4/xattr.h
fs/ext4/xattr_hurd.c [new file with mode: 0644]
fs/jbd2/journal.c
include/linux/jbd2.h
include/uapi/linux/fs.h
include/uapi/linux/xattr.h

index 8e2670781c9b3dce552718d240a8954914b97bc7..8fdb78f3c6c99979180ac845bf492a2c81d5aec6 100644 (file)
@@ -25,7 +25,7 @@ size when creating the filesystem.
 Currently 3 filesystems support DAX: ext2, ext4 and xfs.  Enabling DAX on them
 is different.
 
-Enabling DAX on ext4 and ext2
+Enabling DAX on ext2
 -----------------------------
 
 When mounting the filesystem, use the "-o dax" option on the command line or
@@ -33,8 +33,8 @@ add 'dax' to the options in /etc/fstab.  This works to enable DAX on all files
 within the filesystem.  It is equivalent to the '-o dax=always' behavior below.
 
 
-Enabling DAX on xfs
--------------------
+Enabling DAX on xfs and ext4
+----------------------------
 
 Summary
 -------
index 3e4c0ee0e06839ab350e2b01d753ce9bf8639e76..e99ff3fd09f7e7d055d7674e099cbc2b5265c552 100644 (file)
@@ -39,3 +39,6 @@ is encrypted as well as the data itself.
 
 Verity files cannot have blocks allocated past the end of the verity
 metadata.
+
+Verity and DAX are not compatible and attempts to set both of these flags
+on a file will fail.
index 4ccb3c9189d84d884d82e6fb97dcaa5dd12cd2a6..2e42f47a7f9827343a0012c1f6d02d2b843ebc4e 100644 (file)
@@ -9,7 +9,8 @@ ext4-y  := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
                extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \
                indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
                mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
-               super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o
+               super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \
+               xattr_user.o
 
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)       += acl.o
 ext4-$(CONFIG_EXT4_FS_SECURITY)                += xattr_security.o
index c654205f648dd979abe3a087e92cc2f73f57de3a..1d82336b1cd4504bc7b5b4d9cccc3fc70dff52f6 100644 (file)
@@ -675,6 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
        struct qstr qstr = {.name = str, .len = len };
        const struct dentry *parent = READ_ONCE(dentry->d_parent);
        const struct inode *inode = READ_ONCE(parent->d_inode);
+       char strbuf[DNAME_INLINE_LEN];
 
        if (!inode || !IS_CASEFOLDED(inode) ||
            !EXT4_SB(inode->i_sb)->s_encoding) {
@@ -683,6 +684,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
                return memcmp(str, name->name, len);
        }
 
+       /*
+        * If the dentry name is stored in-line, then it may be concurrently
+        * modified by a rename.  If this happens, the VFS will eventually retry
+        * the lookup, so it doesn't matter what ->d_compare() returns.
+        * However, it's unsafe to call utf8_strncasecmp() with an unstable
+        * string.  Therefore, we have to copy the name into a temporary buffer.
+        */
+       if (len <= DNAME_INLINE_LEN - 1) {
+               memcpy(strbuf, str, len);
+               strbuf[len] = 0;
+               qstr.name = strbuf;
+               /* prevent compiler from optimizing out the temporary buffer */
+               barrier();
+       }
+
        return ext4_ci_compare(inode, name, &qstr, false);
 }
 
index b08841f70b6995704a3dcf681e3f009e23c45cb0..42f5060f3cdf19748acdbf076fb08f0170cbac72 100644 (file)
@@ -426,13 +426,16 @@ struct flex_groups {
 #define EXT4_VERITY_FL                 0x00100000 /* Verity protected inode */
 #define EXT4_EA_INODE_FL               0x00200000 /* Inode used for large EA */
 /* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */
+
+#define EXT4_DAX_FL                    0x02000000 /* Inode is DAX */
+
 #define EXT4_INLINE_DATA_FL            0x10000000 /* Inode has inline data. */
 #define EXT4_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
 #define EXT4_CASEFOLD_FL               0x40000000 /* Casefolded directory */
 #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE           0x705BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE                0x604BC0FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE           0x725BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE                0x624BC0FF /* User modifiable flags */
 
 /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
 #define EXT4_FL_XFLAG_VISIBLE          (EXT4_SYNC_FL | \
@@ -440,14 +443,16 @@ struct flex_groups {
                                         EXT4_APPEND_FL | \
                                         EXT4_NODUMP_FL | \
                                         EXT4_NOATIME_FL | \
-                                        EXT4_PROJINHERIT_FL)
+                                        EXT4_PROJINHERIT_FL | \
+                                        EXT4_DAX_FL)
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
                           EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
                           EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
-                          EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
+                          EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
+                          EXT4_DAX_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
@@ -459,6 +464,10 @@ struct flex_groups {
 /* The only flags that should be swapped */
 #define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
 
+/* Flags which are mutually exclusive to DAX */
+#define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\
+                          EXT4_JOURNAL_DATA_FL)
+
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
 {
@@ -499,6 +508,7 @@ enum {
        EXT4_INODE_VERITY       = 20,   /* Verity protected inode */
        EXT4_INODE_EA_INODE     = 21,   /* Inode used for large EA */
 /* 22 was formerly EXT4_INODE_EOFBLOCKS */
+       EXT4_INODE_DAX          = 25,   /* Inode is DAX */
        EXT4_INODE_INLINE_DATA  = 28,   /* Data in inode. */
        EXT4_INODE_PROJINHERIT  = 29,   /* Create with parents projid */
        EXT4_INODE_CASEFOLD     = 30,   /* Casefolded directory */
@@ -1135,9 +1145,9 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_MINIX_DF            0x00080 /* Mimics the Minix statfs */
 #define EXT4_MOUNT_NOLOAD              0x00100 /* Don't use existing journal*/
 #ifdef CONFIG_FS_DAX
-#define EXT4_MOUNT_DAX                 0x00200 /* Direct Access */
+#define EXT4_MOUNT_DAX_ALWAYS          0x00200 /* Direct Access */
 #else
-#define EXT4_MOUNT_DAX                 0
+#define EXT4_MOUNT_DAX_ALWAYS          0
 #endif
 #define EXT4_MOUNT_DATA_FLAGS          0x00C00 /* Mode for data writes: */
 #define EXT4_MOUNT_JOURNAL_DATA                0x00400 /* Write data to journal */
@@ -1180,6 +1190,8 @@ struct ext4_inode_info {
                                                      blocks */
 #define EXT4_MOUNT2_HURD_COMPAT                0x00000004 /* Support HURD-castrated
                                                      file systems */
+#define EXT4_MOUNT2_DAX_NEVER          0x00000008 /* Do not allow Direct Access */
+#define EXT4_MOUNT2_DAX_INODE          0x00000010 /* For printing options only */
 
 #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM  0x00000008 /* User explicitly
                                                specified journal checksum */
@@ -1992,6 +2004,7 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
  */
 #define EXT4_FLAGS_RESIZING    0
 #define EXT4_FLAGS_SHUTDOWN    1
+#define EXT4_FLAGS_BDEV_IS_DAX 2
 
 static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
 {
@@ -2705,7 +2718,7 @@ extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
 extern int ext4_break_layouts(struct inode *);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
-extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_set_inode_flags(struct inode *, bool init);
 extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
index 7d088ff1e90285773f09c5086d1478033a1eaf7b..221f240eae60475f5dd2503ab0542f4ab9c82e5c 100644 (file)
@@ -2844,7 +2844,7 @@ again:
                         * in use to avoid freeing it when removing blocks.
                         */
                        if (sbi->s_cluster_ratio > 1) {
-                               pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
+                               pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
                                partial.pclu = EXT4_B2C(sbi, pblk);
                                partial.state = nofree;
                        }
index 54d324e80fe5046d3e526c2b311c60fca11280d4..df25d38d65393ed1e88254a73104031886777dae 100644 (file)
@@ -1116,7 +1116,7 @@ got:
        ei->i_block_group = group;
        ei->i_last_alloc_group = ~0;
 
-       ext4_set_inode_flags(inode);
+       ext4_set_inode_flags(inode, true);
        if (IS_DIRSYNC(inode))
                ext4_handle_sync(handle);
        if (insert_inode_locked(inode) < 0) {
index 40ec5c7ef0d35d247b451ba387570c5459944eb0..10dd470876b30256374996319304b5c0938c7a73 100644 (file)
@@ -4403,9 +4403,11 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
                !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
 }
 
-static bool ext4_should_use_dax(struct inode *inode)
+static bool ext4_should_enable_dax(struct inode *inode)
 {
-       if (!test_opt(inode->i_sb, DAX))
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+       if (test_opt2(inode->i_sb, DAX_NEVER))
                return false;
        if (!S_ISREG(inode->i_mode))
                return false;
@@ -4417,14 +4419,21 @@ static bool ext4_should_use_dax(struct inode *inode)
                return false;
        if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
                return false;
-       return true;
+       if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
+               return false;
+       if (test_opt(inode->i_sb, DAX_ALWAYS))
+               return true;
+
+       return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
 }
 
-void ext4_set_inode_flags(struct inode *inode)
+void ext4_set_inode_flags(struct inode *inode, bool init)
 {
        unsigned int flags = EXT4_I(inode)->i_flags;
        unsigned int new_fl = 0;
 
+       WARN_ON_ONCE(IS_DAX(inode) && init);
+
        if (flags & EXT4_SYNC_FL)
                new_fl |= S_SYNC;
        if (flags & EXT4_APPEND_FL)
@@ -4435,8 +4444,13 @@ void ext4_set_inode_flags(struct inode *inode)
                new_fl |= S_NOATIME;
        if (flags & EXT4_DIRSYNC_FL)
                new_fl |= S_DIRSYNC;
-       if (ext4_should_use_dax(inode))
+
+       /* Because of the way inode_set_flags() works we must preserve S_DAX
+        * here if already set. */
+       new_fl |= (inode->i_flags & S_DAX);
+       if (init && ext4_should_enable_dax(inode))
                new_fl |= S_DAX;
+
        if (flags & EXT4_ENCRYPT_FL)
                new_fl |= S_ENCRYPTED;
        if (flags & EXT4_CASEFOLD_FL)
@@ -4650,7 +4664,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
                 * not initialized on a new filesystem. */
        }
        ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-       ext4_set_inode_flags(inode);
+       ext4_set_inode_flags(inode, true);
        inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
        ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
        if (ext4_has_feature_64bit(sb))
index 2162db0c747d2694c7f050ef8447535ae17bab54..999cf6add39c62de85c8accfd60ef515d0fe92bf 100644 (file)
@@ -292,6 +292,38 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
        return 0;
 }
 
+static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+
+       if (S_ISDIR(inode->i_mode))
+               return;
+
+       if (test_opt2(inode->i_sb, DAX_NEVER) ||
+           test_opt(inode->i_sb, DAX_ALWAYS))
+               return;
+
+       if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
+               d_mark_dontcache(inode);
+}
+
+static bool dax_compatible(struct inode *inode, unsigned int oldflags,
+                          unsigned int flags)
+{
+       if (flags & EXT4_DAX_FL) {
+               if ((oldflags & EXT4_DAX_MUT_EXCL) ||
+                    ext4_test_inode_state(inode,
+                                         EXT4_STATE_VERITY_IN_PROGRESS)) {
+                       return false;
+               }
+       }
+
+       if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL))
+                       return false;
+
+       return true;
+}
+
 static int ext4_ioctl_setflags(struct inode *inode,
                               unsigned int flags)
 {
@@ -300,7 +332,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
        int err = -EPERM, migrate = 0;
        struct ext4_iloc iloc;
        unsigned int oldflags, mask, i;
-       unsigned int jflag;
        struct super_block *sb = inode->i_sb;
 
        /* Is it quota file? Do not allow user to mess with it */
@@ -309,9 +340,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
 
        oldflags = ei->i_flags;
 
-       /* The JOURNAL_DATA flag is modifiable only by root */
-       jflag = flags & EXT4_JOURNAL_DATA_FL;
-
        err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
        if (err)
                goto flags_out;
@@ -320,10 +348,16 @@ static int ext4_ioctl_setflags(struct inode *inode,
         * The JOURNAL_DATA flag can only be changed by
         * the relevant capability.
         */
-       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+       if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
                if (!capable(CAP_SYS_RESOURCE))
                        goto flags_out;
        }
+
+       if (!dax_compatible(inode, oldflags, flags)) {
+               err = -EOPNOTSUPP;
+               goto flags_out;
+       }
+
        if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
                migrate = 1;
 
@@ -369,6 +403,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
        if (err)
                goto flags_err;
 
+       ext4_dax_dontcache(inode, flags);
+
        for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
                if (!(mask & EXT4_FL_USER_MODIFIABLE))
                        continue;
@@ -381,7 +417,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
                        ext4_clear_inode_flag(inode, i);
        }
 
-       ext4_set_inode_flags(inode);
+       ext4_set_inode_flags(inode, false);
+
        inode->i_ctime = current_time(inode);
 
        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -390,17 +427,18 @@ flags_err:
        if (err)
                goto flags_out;
 
-       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+       if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
                /*
                 * Changes to the journaling mode can cause unsafe changes to
-                * S_DAX if we are using the DAX mount option.
+                * S_DAX if the inode is DAX
                 */
-               if (test_opt(inode->i_sb, DAX)) {
+               if (IS_DAX(inode)) {
                        err = -EBUSY;
                        goto flags_out;
                }
 
-               err = ext4_change_inode_journal_flag(inode, jflag);
+               err = ext4_change_inode_journal_flag(inode,
+                                                    flags & EXT4_JOURNAL_DATA_FL);
                if (err)
                        goto flags_out;
        }
@@ -527,12 +565,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
                xflags |= FS_XFLAG_NOATIME;
        if (iflags & EXT4_PROJINHERIT_FL)
                xflags |= FS_XFLAG_PROJINHERIT;
+       if (iflags & EXT4_DAX_FL)
+               xflags |= FS_XFLAG_DAX;
        return xflags;
 }
 
 #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
                                  FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
-                                 FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
+                                 FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
+                                 FS_XFLAG_DAX)
 
 /* Transfer xflags flags to internal */
 static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
@@ -551,6 +592,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
                iflags |= EXT4_NOATIME_FL;
        if (xflags & FS_XFLAG_PROJINHERIT)
                iflags |= EXT4_PROJINHERIT_FL;
+       if (xflags & FS_XFLAG_DAX)
+               iflags |= EXT4_DAX_FL;
 
        return iflags;
 }
index a9083113a8c0f47d7d4340da5d21fd4fa98073e6..c0a331e2feb02454a10f111b4d7f7f6c1cc29f09 100644 (file)
@@ -4708,7 +4708,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        }
 
        ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
-       seq = *this_cpu_ptr(&discard_pa_seq);
+       seq = this_cpu_read(discard_pa_seq);
        if (!ext4_mb_use_preallocated(ac)) {
                ac->ac_op = EXT4_MB_HISTORY_ALLOC;
                ext4_mb_normalize_request(ac, ar);
index c668f6b42374bd728d45a90b38df5ca98140b9e1..330957ed1f05c817ee1c2d0f3d064279af75b5c1 100644 (file)
@@ -522,9 +522,6 @@ static void ext4_handle_error(struct super_block *sb)
                smp_wmb();
                sb->s_flags |= SB_RDONLY;
        } else if (test_opt(sb, ERRORS_PANIC)) {
-               if (EXT4_SB(sb)->s_journal &&
-                 !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
-                       return;
                panic("EXT4-fs (device %s): panic forced after error\n",
                        sb->s_id);
        }
@@ -725,23 +722,20 @@ void __ext4_abort(struct super_block *sb, const char *function,
        va_end(args);
 
        if (sb_rdonly(sb) == 0) {
-               ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               if (EXT4_SB(sb)->s_journal)
+                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+
+               ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                /*
                 * Make sure updated value of ->s_mount_flags will be visible
                 * before ->s_flags update
                 */
                smp_wmb();
                sb->s_flags |= SB_RDONLY;
-               if (EXT4_SB(sb)->s_journal)
-                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
        }
-       if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
-               if (EXT4_SB(sb)->s_journal &&
-                 !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
-                       return;
+       if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
                panic("EXT4-fs panic from previous error\n");
-       }
 }
 
 void __ext4_msg(struct super_block *sb,
@@ -1324,6 +1318,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
        if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
                return -EINVAL;
 
+       if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
+               return -EOPNOTSUPP;
+
        res = ext4_convert_inline_data(inode);
        if (res)
                return res;
@@ -1349,7 +1346,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
                         * Update inode->i_flags - S_ENCRYPTED will be enabled,
                         * S_DAX may be disabled
                         */
-                       ext4_set_inode_flags(inode);
+                       ext4_set_inode_flags(inode, false);
                }
                return res;
        }
@@ -1376,7 +1373,7 @@ retry:
                 * Update inode->i_flags - S_ENCRYPTED will be enabled,
                 * S_DAX may be disabled
                 */
-               ext4_set_inode_flags(inode);
+               ext4_set_inode_flags(inode, false);
                res = ext4_mark_inode_dirty(handle, inode);
                if (res)
                        EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
@@ -1514,7 +1511,8 @@ enum {
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
-       Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
+       Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
+       Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
        Opt_nowarn_on_error, Opt_mblk_io_submit,
        Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1581,6 +1579,9 @@ static const match_table_t tokens = {
        {Opt_nobarrier, "nobarrier"},
        {Opt_i_version, "i_version"},
        {Opt_dax, "dax"},
+       {Opt_dax_always, "dax=always"},
+       {Opt_dax_inode, "dax=inode"},
+       {Opt_dax_never, "dax=never"},
        {Opt_stripe, "stripe=%u"},
        {Opt_delalloc, "delalloc"},
        {Opt_warn_on_error, "warn_on_error"},
@@ -1729,6 +1730,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
 #define MOPT_NO_EXT3   0x0200
 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
 #define MOPT_STRING    0x0400
+#define MOPT_SKIP      0x0800
 
 static const struct mount_opts {
        int     token;
@@ -1778,7 +1780,13 @@ static const struct mount_opts {
        {Opt_min_batch_time, 0, MOPT_GTE0},
        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
        {Opt_init_itable, 0, MOPT_GTE0},
-       {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
+       {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
+       {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
+               MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+       {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
+               MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+       {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
+               MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
        {Opt_stripe, 0, MOPT_GTE0},
        {Opt_resuid, 0, MOPT_GTE0},
        {Opt_resgid, 0, MOPT_GTE0},
@@ -2123,13 +2131,56 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                }
                sbi->s_jquota_fmt = m->mount_opt;
 #endif
-       } else if (token == Opt_dax) {
+       } else if (token == Opt_dax || token == Opt_dax_always ||
+                  token == Opt_dax_inode || token == Opt_dax_never) {
 #ifdef CONFIG_FS_DAX
-               ext4_msg(sb, KERN_WARNING,
-               "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
-               sbi->s_mount_opt |= m->mount_opt;
+               switch (token) {
+               case Opt_dax:
+               case Opt_dax_always:
+                       if (is_remount &&
+                           (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+                            (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
+                       fail_dax_change_remount:
+                               ext4_msg(sb, KERN_ERR, "can't change "
+                                        "dax mount option while remounting");
+                               return -1;
+                       }
+                       if (is_remount &&
+                           (test_opt(sb, DATA_FLAGS) ==
+                            EXT4_MOUNT_JOURNAL_DATA)) {
+                                   ext4_msg(sb, KERN_ERR, "can't mount with "
+                                            "both data=journal and dax");
+                                   return -1;
+                       }
+                       ext4_msg(sb, KERN_WARNING,
+                               "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+                       sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
+                       sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+                       break;
+               case Opt_dax_never:
+                       if (is_remount &&
+                           (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+                            (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
+                               goto fail_dax_change_remount;
+                       sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+                       sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+                       break;
+               case Opt_dax_inode:
+                       if (is_remount &&
+                           ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+                            (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+                            !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
+                               goto fail_dax_change_remount;
+                       sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+                       sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+                       /* Strictly for printing options */
+                       sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
+                       break;
+               }
 #else
                ext4_msg(sb, KERN_INFO, "dax option not supported");
+               sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+               sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
                return -1;
 #endif
        } else if (token == Opt_data_err_abort) {
@@ -2293,7 +2344,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
                int want_set = m->flags & MOPT_SET;
                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
-                   (m->flags & MOPT_CLEAR_ERR))
+                   (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
                        continue;
                if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
                        continue; /* skip if same as the default */
@@ -2353,6 +2404,17 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 
        fscrypt_show_test_dummy_encryption(seq, sep, sb);
 
+       if (test_opt(sb, DAX_ALWAYS)) {
+               if (IS_EXT2_SB(sb))
+                       SEQ_OPTS_PUTS("dax");
+               else
+                       SEQ_OPTS_PUTS("dax=always");
+       } else if (test_opt2(sb, DAX_NEVER)) {
+               SEQ_OPTS_PUTS("dax=never");
+       } else if (test_opt2(sb, DAX_INODE)) {
+               SEQ_OPTS_PUTS("dax=inode");
+       }
+
        ext4_show_quota_options(seq, sb);
        return 0;
 }
@@ -2383,6 +2445,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                ext4_msg(sb, KERN_ERR, "revision level too high, "
                         "forcing read-only mode");
                err = -EROFS;
+               goto done;
        }
        if (read_only)
                goto done;
@@ -4017,7 +4080,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                 "both data=journal and delalloc");
                        goto failed_mount;
                }
-               if (test_opt(sb, DAX)) {
+               if (test_opt(sb, DAX_ALWAYS)) {
                        ext4_msg(sb, KERN_ERR, "can't mount with "
                                 "both data=journal and dax");
                        goto failed_mount;
@@ -4127,13 +4190,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                goto failed_mount;
        }
 
-       if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+       if (bdev_dax_supported(sb->s_bdev, blocksize))
+               set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+
+       if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
                if (ext4_has_feature_inline_data(sb)) {
                        ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
                                        " that may contain inline data");
                        goto failed_mount;
                }
-               if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+               if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
                        ext4_msg(sb, KERN_ERR,
                                "DAX unsupported by block device.");
                        goto failed_mount;
@@ -5447,12 +5513,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                        err = -EINVAL;
                        goto restore_opts;
                }
-               if (test_opt(sb, DAX)) {
-                       ext4_msg(sb, KERN_ERR, "can't mount with "
-                                "both data=journal and dax");
-                       err = -EINVAL;
-                       goto restore_opts;
-               }
        } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
                if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
                        ext4_msg(sb, KERN_ERR, "can't mount with "
@@ -5468,12 +5528,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
-       if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
-               ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
-                       "dax flag with busy inodes while remounting");
-               sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
-       }
-
        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
                ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
 
index dec1244dd062b02aa74f3ef1c5225d6e4e4a101c..bbd5e7e0632b4ab34883f3362148cee631cbdc37 100644 (file)
@@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
        handle_t *handle;
        int err;
 
+       if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX))
+               return -EINVAL;
+
        if (ext4_verity_in_progress(inode))
                return -EBUSY;
 
@@ -241,7 +244,7 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc,
                if (err)
                        goto out_stop;
                ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
-               ext4_set_inode_flags(inode);
+               ext4_set_inode_flags(inode, false);
                err = ext4_mark_iloc_dirty(handle, inode, &iloc);
        }
 out_stop:
index 9b29a40738acc03b3aad5a0c934fdfc6c84425cd..7d2f6576d954400700221f42bc74475cb48db1dc 100644 (file)
@@ -93,6 +93,7 @@ static const struct xattr_handler * const ext4_xattr_handler_map[] = {
 #ifdef CONFIG_EXT4_FS_SECURITY
        [EXT4_XATTR_INDEX_SECURITY]          = &ext4_xattr_security_handler,
 #endif
+       [EXT4_XATTR_INDEX_HURD]              = &ext4_xattr_hurd_handler,
 };
 
 const struct xattr_handler *ext4_xattr_handlers[] = {
@@ -105,6 +106,7 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
 #ifdef CONFIG_EXT4_FS_SECURITY
        &ext4_xattr_security_handler,
 #endif
+       &ext4_xattr_hurd_handler,
        NULL
 };
 
index ffe21ac77f788aa6b9016b1ff7da94d0c8b58b7e..730b91fa0dd70231c1c3300650146c2e5ea8902b 100644 (file)
@@ -124,6 +124,7 @@ struct ext4_xattr_inode_array {
 extern const struct xattr_handler ext4_xattr_user_handler;
 extern const struct xattr_handler ext4_xattr_trusted_handler;
 extern const struct xattr_handler ext4_xattr_security_handler;
+extern const struct xattr_handler ext4_xattr_hurd_handler;
 
 #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
 
diff --git a/fs/ext4/xattr_hurd.c b/fs/ext4/xattr_hurd.c
new file mode 100644 (file)
index 0000000..8cfa74a
--- /dev/null
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/fs/ext4/xattr_hurd.c
+ * Handler for extended gnu attributes for the Hurd.
+ *
+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ * Copyright (C) 2020 by Jan (janneke) Nieuwenhuizen, <janneke@gnu.org>
+ */
+
+#include <linux/init.h>
+#include <linux/string.h>
+#include "ext4.h"
+#include "xattr.h"
+
+static bool
+ext4_xattr_hurd_list(struct dentry *dentry)
+{
+       return test_opt(dentry->d_sb, XATTR_USER);
+}
+
+static int
+ext4_xattr_hurd_get(const struct xattr_handler *handler,
+                   struct dentry *unused, struct inode *inode,
+                   const char *name, void *buffer, size_t size)
+{
+       if (!test_opt(inode->i_sb, XATTR_USER))
+               return -EOPNOTSUPP;
+
+       return ext4_xattr_get(inode, EXT4_XATTR_INDEX_HURD,
+                             name, buffer, size);
+}
+
+static int
+ext4_xattr_hurd_set(const struct xattr_handler *handler,
+                   struct dentry *unused, struct inode *inode,
+                   const char *name, const void *value,
+                   size_t size, int flags)
+{
+       if (!test_opt(inode->i_sb, XATTR_USER))
+               return -EOPNOTSUPP;
+
+       return ext4_xattr_set(inode, EXT4_XATTR_INDEX_HURD,
+                             name, value, size, flags);
+}
+
+const struct xattr_handler ext4_xattr_hurd_handler = {
+       .prefix = XATTR_HURD_PREFIX,
+       .list   = ext4_xattr_hurd_list,
+       .get    = ext4_xattr_hurd_get,
+       .set    = ext4_xattr_hurd_set,
+};
index a49d0e670ddf82bdb98382312101481af2943c2b..e4944436e733d01611504502455d24147fa1f2ce 100644 (file)
@@ -1140,6 +1140,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
        init_waitqueue_head(&journal->j_wait_commit);
        init_waitqueue_head(&journal->j_wait_updates);
        init_waitqueue_head(&journal->j_wait_reserved);
+       mutex_init(&journal->j_abort_mutex);
        mutex_init(&journal->j_barrier);
        mutex_init(&journal->j_checkpoint_mutex);
        spin_lock_init(&journal->j_revoke_lock);
@@ -1402,7 +1403,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
                printk(KERN_ERR "JBD2: Error %d detected when updating "
                       "journal superblock for %s.\n", ret,
                       journal->j_devname);
-               jbd2_journal_abort(journal, ret);
+               if (!is_journal_aborted(journal))
+                       jbd2_journal_abort(journal, ret);
        }
 
        return ret;
@@ -2153,6 +2155,13 @@ void jbd2_journal_abort(journal_t *journal, int errno)
 {
        transaction_t *transaction;
 
+       /*
+        * Lock the aborting procedure until everything is done, this avoid
+        * races between filesystem's error handling flow (e.g. ext4_abort()),
+        * ensure panic after the error info is written into journal's
+        * superblock.
+        */
+       mutex_lock(&journal->j_abort_mutex);
        /*
         * ESHUTDOWN always takes precedence because a file system check
         * caused by any other journal abort error is not required after
@@ -2167,6 +2176,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
                        journal->j_errno = errno;
                        jbd2_journal_update_sb_errno(journal);
                }
+               mutex_unlock(&journal->j_abort_mutex);
                return;
        }
 
@@ -2188,10 +2198,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
         * layer could realise that a filesystem check is needed.
         */
        jbd2_journal_update_sb_errno(journal);
-
-       write_lock(&journal->j_state_lock);
-       journal->j_flags |= JBD2_REC_ERR;
-       write_unlock(&journal->j_state_lock);
+       mutex_unlock(&journal->j_abort_mutex);
 }
 
 /**
index f613d8529863f6421f8f944ed971a3f99d99236a..d56128df2aff9f6d83bc7a8b1194b36c894fc969 100644 (file)
@@ -765,6 +765,11 @@ struct journal_s
         */
        int                     j_errno;
 
+       /**
+        * @j_abort_mutex: Lock the whole aborting procedure.
+        */
+       struct mutex            j_abort_mutex;
+
        /**
         * @j_sb_buffer: The first part of the superblock buffer.
         */
@@ -1247,7 +1252,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3,                CSUM_V3)
 #define JBD2_ABORT_ON_SYNCDATA_ERR     0x040   /* Abort the journal on file
                                                 * data write error in ordered
                                                 * mode */
-#define JBD2_REC_ERR   0x080   /* The errno in the sb has been recorded */
 
 /*
  * Function declarations for the journaling transaction and buffer
index 379a612f8f1d9f45d1703bc55a67fa677862b8d0..f44eb0a04afdd8cea369af1395c3637a5f69122d 100644 (file)
@@ -262,6 +262,7 @@ struct fsxattr {
 #define FS_EA_INODE_FL                 0x00200000 /* Inode used for large EA */
 #define FS_EOFBLOCKS_FL                        0x00400000 /* Reserved for ext4 */
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
+#define FS_DAX_FL                      0x02000000 /* Inode is DAX */
 #define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
 #define FS_CASEFOLD_FL                 0x40000000 /* Folder is case insensitive */
index c1395b5bd432a0e4fe9275b8d4eece00a52999c2..9463db2dfa9d412dbf13051763416b7464876c0f 100644 (file)
@@ -7,6 +7,7 @@
   Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
   Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+  Copyright (c) 2020 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
 */
 
 #include <linux/libc-compat.h>
@@ -31,6 +32,9 @@
 #define XATTR_BTRFS_PREFIX "btrfs."
 #define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1)
 
+#define XATTR_HURD_PREFIX "gnu."
+#define XATTR_HURD_PREFIX_LEN (sizeof(XATTR_HURD_PREFIX) - 1)
+
 #define XATTR_SECURITY_PREFIX  "security."
 #define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)