Enable ext4 support for per-file/directory dax operations
authorTheodore Ts'o <tytso@mit.edu>
Thu, 11 Jun 2020 14:51:44 +0000 (10:51 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 11 Jun 2020 14:51:44 +0000 (10:51 -0400)
This adds the same per-file/per-directory DAX support for ext4 as was
done for xfs, now that we finally have consensus over what the
interface should be.

47 files changed:
Documentation/filesystems/fiemap.txt
fs/bad_inode.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c
fs/cifs/inode.c
fs/cifs/smb2ops.c
fs/ext2/inode.c
fs/ext4/Kconfig
fs/ext4/acl.c
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/ext4_extents.h
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/extents_status.c
fs/ext4/file.c
fs/ext4/fsync.c
fs/ext4/ialloc.c
fs/ext4/indirect.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/mballoc.h
fs/ext4/migrate.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/f2fs/data.c
fs/f2fs/inline.c
fs/fs-writeback.c
fs/gfs2/inode.c
fs/hpfs/file.c
fs/internal.h
fs/ioctl.c
fs/iomap/fiemap.c
fs/jbd2/transaction.c
fs/nilfs2/inode.c
fs/ocfs2/extent_map.c
fs/overlayfs/inode.c
fs/xfs/xfs_iops.c
include/linux/fiemap.h [new file with mode: 0644]
include/linux/fs.h
include/linux/iomap.h
include/linux/writeback.h
include/trace/events/ext4.h
include/uapi/linux/fiemap.h

index ac87e6fda842b4dab2898095ff6fc3a14e870e1d..35c8571eccb6e378f9c2db8aaafee2b6cf8adf80 100644 (file)
@@ -203,16 +203,18 @@ EINTR once fatal signal received.
 
 
 Flag checking should be done at the beginning of the ->fiemap callback via the
-fiemap_check_flags() helper:
+fiemap_prep() helper:
 
-int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
+int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               u64 start, u64 *len, u32 supported_flags);
 
 The struct fieinfo should be passed in as received from ioctl_fiemap(). The
 set of fiemap flags which the fs understands should be passed via fs_flags. If
-fiemap_check_flags finds invalid user flags, it will place the bad values in
+fiemap_prep finds invalid user flags, it will place the bad values in
 fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
-fiemap_check_flags(), it should immediately exit, returning that error back to
-ioctl_fiemap().
+fiemap_prep(), it should immediately exit, returning that error back to
+ioctl_fiemap().  Additionally the range is validate against the supported
+maximum file size.
 
 
 For each extent in the request range, the file system should call
index 8035d2a445617905b595222e2b5bb2d4f52b80d4..54f0ce44427200ba32ba0db078faf120a9b5f172 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/time.h>
 #include <linux/namei.h>
 #include <linux/poll.h>
+#include <linux/fiemap.h>
 
 static int bad_file_open(struct inode *inode, struct file *filp)
 {
index 2ed65bd0760ea901c13164ea1871bcbfbc446c0d..817698bc0669392a5eb60cb2082893cff476fb97 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
+#include <linux/fiemap.h>
 #include "ulist.h"
 
 /*
index 320d1062068d36efa08869fd6f557255cf38b4cc..529ffa5e7b4525a6e26356a564dcdadf9bc4a191 100644 (file)
@@ -8243,14 +8243,12 @@ out:
        return ret;
 }
 
-#define BTRFS_FIEMAP_FLAGS     (FIEMAP_FLAG_SYNC)
-
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
        int     ret;
 
-       ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
+       ret = fiemap_prep(inode, fieinfo, start, &len, 0);
        if (ret)
                return ret;
 
index 390d2b15ef6ef9d7014e90069cdad3d2880806d3..3f276eb8ca68dc638f2672a3258cadb335a8097e 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
 #include <linux/wait_bit.h>
+#include <linux/fiemap.h>
 
 #include <asm/div64.h>
 #include "cifsfs.h"
index f829f4165d38c1b196481b91d2d172b1c96090ec..300ade2acc41e321a6adc00f2dccfda1a10dbd5e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/uuid.h>
 #include <linux/sort.h>
 #include <crypto/aead.h>
+#include <linux/fiemap.h>
 #include "cifsfs.h"
 #include "cifsglob.h"
 #include "smb2pdu.h"
@@ -3407,8 +3408,9 @@ static int smb3_fiemap(struct cifs_tcon *tcon,
        int i, num, rc, flags, last_blob;
        u64 next;
 
-       if (fiemap_check_flags(fei, FIEMAP_FLAG_SYNC))
-               return -EBADR;
+       rc = fiemap_prep(d_inode(cfile->dentry), fei, start, &len, 0);
+       if (rc)
+               return rc;
 
        xid = get_xid();
  again:
index c885cf7d724b4830d0e952acbc91f0db1b3b1f63..0f12a0e8a8d975c55e5af51e83e033eaabec24fa 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/iomap.h>
 #include <linux/namei.h>
 #include <linux/uio.h>
+#include <linux/fiemap.h>
 #include "ext2.h"
 #include "acl.h"
 #include "xattr.h"
index 2a592e38cdfeceafac5a98354cb5fcee63b3cfd3..cf9e430514c449ba7b940f448adc03094b205f2e 100644 (file)
@@ -99,8 +99,7 @@ config EXT4_DEBUG
          Enables run-time debugging support for the ext4 filesystem.
 
          If you select Y here, then you will be able to turn on debugging
-         with a command such as:
-               echo 1 > /sys/module/ext4/parameters/mballoc_debug
+         using dynamic debug control for mb_debug() / ext_debug() msgs.
 
 config EXT4_KUNIT_TESTS
        tristate "KUnit tests for ext4"
index 8c7bbf3e566def24a84c4d7819ade77cbcbf9da8..76f634d185f1084935bf37f01beaf7724f851e5f 100644 (file)
@@ -215,9 +215,8 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
                                      value, size, xattr_flags);
 
        kfree(value);
-       if (!error) {
+       if (!error)
                set_cached_acl(inode, type, acl);
-       }
 
        return error;
 }
@@ -256,7 +255,7 @@ retry:
        if (!error && update_mode) {
                inode->i_mode = mode;
                inode->i_ctime = current_time(inode);
-               ext4_mark_inode_dirty(handle, inode);
+               error = ext4_mark_inode_dirty(handle, inode);
        }
 out_stop:
        ext4_journal_stop(handle);
index a32e5f7b53853ffce8c8249c0753ff3ad554d5b1..1ba46d87cdf103a81146358b84765bfa4f6fc311 100644 (file)
@@ -903,10 +903,11 @@ ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
                return bg_start;
 
        if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
-               colour = (current->pid % 16) *
+               colour = (task_pid_nr(current) % 16) *
                        (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
        else
-               colour = (current->pid % 16) * ((last_block - bg_start) / 16);
+               colour = (task_pid_nr(current) % 16) *
+                       ((last_block - bg_start) / 16);
        return bg_start + colour;
 }
 
index 598e00a9453fc7d2b0d1a628cae820f3ec48b21a..5148cbb0b4b34f7c29139fafd9ac6d382098d8e9 100644 (file)
@@ -36,6 +36,7 @@
 #include <crypto/hash.h>
 #include <linux/falloc.h>
 #include <linux/percpu-rwsem.h>
+#include <linux/fiemap.h>
 #ifdef __KERNEL__
 #include <linux/compat.h>
 #endif
 #define ext4_debug(fmt, ...)   no_printk(fmt, ##__VA_ARGS__)
 #endif
 
+ /*
+  * Turn on EXT_DEBUG to enable ext4_ext_show_path/leaf/move in extents.c
+  */
+#define EXT_DEBUG__
+
 /*
- * Turn on EXT_DEBUG to get lots of info about extents operations.
+ * Dynamic printk for controlled extents debugging.
  */
-#define EXT_DEBUG__
-#ifdef EXT_DEBUG
-#define ext_debug(fmt, ...)    printk(fmt, ##__VA_ARGS__)
+#ifdef CONFIG_EXT4_DEBUG
+#define ext_debug(ino, fmt, ...)                                       \
+       pr_debug("[%s/%d] EXT4-fs (%s): ino %lu: (%s, %d): %s:" fmt,    \
+                current->comm, task_pid_nr(current),                   \
+                ino->i_sb->s_id, ino->i_ino, __FILE__, __LINE__,       \
+                __func__, ##__VA_ARGS__)
 #else
-#define ext_debug(fmt, ...)    no_printk(fmt, ##__VA_ARGS__)
+#define ext_debug(ino, fmt, ...)       no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* data type for block offset of block group */
@@ -142,6 +151,8 @@ enum SHIFT_DIRECTION {
 #define EXT4_MB_USE_ROOT_BLOCKS                0x1000
 /* Use blocks from reserved pool */
 #define EXT4_MB_USE_RESERVED           0x2000
+/* Do strict check for free blocks while retrying block allocation */
+#define EXT4_MB_STRICT_CHECK           0x4000
 
 struct ext4_allocation_request {
        /* target inode for block we're allocating */
@@ -171,10 +182,10 @@ struct ext4_allocation_request {
  * well as to store the information returned by ext4_map_blocks().  It
  * takes less room on the stack than a struct buffer_head.
  */
-#define EXT4_MAP_NEW           (1 << BH_New)
-#define EXT4_MAP_MAPPED                (1 << BH_Mapped)
-#define EXT4_MAP_UNWRITTEN     (1 << BH_Unwritten)
-#define EXT4_MAP_BOUNDARY      (1 << BH_Boundary)
+#define EXT4_MAP_NEW           BIT(BH_New)
+#define EXT4_MAP_MAPPED                BIT(BH_Mapped)
+#define EXT4_MAP_UNWRITTEN     BIT(BH_Unwritten)
+#define EXT4_MAP_BOUNDARY      BIT(BH_Boundary)
 #define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
                                 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
 
@@ -420,7 +431,7 @@ struct flex_groups {
 
 #define EXT4_INLINE_DATA_FL            0x10000000 /* Inode has inline data. */
 #define EXT4_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
-#define EXT4_CASEFOLD_FL               0x40000000 /* Casefolded file */
+#define EXT4_CASEFOLD_FL               0x40000000 /* Casefolded directory */
 #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
 
 #define EXT4_FL_USER_VISIBLE           0x725BDFFF /* User visible flags */
@@ -500,6 +511,7 @@ enum {
        EXT4_INODE_DAX          = 25,   /* Inode is DAX */
        EXT4_INODE_INLINE_DATA  = 28,   /* Data in inode. */
        EXT4_INODE_PROJINHERIT  = 29,   /* Create with parents projid */
+       EXT4_INODE_CASEFOLD     = 30,   /* Casefolded directory */
        EXT4_INODE_RESERVED     = 31,   /* reserved for ext4 lib */
 };
 
@@ -545,6 +557,7 @@ static inline void ext4_check_flag_values(void)
        CHECK_FLAG_VALUE(EA_INODE);
        CHECK_FLAG_VALUE(INLINE_DATA);
        CHECK_FLAG_VALUE(PROJINHERIT);
+       CHECK_FLAG_VALUE(CASEFOLD);
        CHECK_FLAG_VALUE(RESERVED);
 }
 
@@ -619,8 +632,6 @@ enum {
 #define EXT4_GET_BLOCKS_METADATA_NOFAIL                0x0020
        /* Don't normalize allocation size (used for fallocate) */
 #define EXT4_GET_BLOCKS_NO_NORMALIZE           0x0040
-       /* Request will not result in inode size update (user for fallocate) */
-#define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
        /* Convert written extents to unwritten */
 #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0100
        /* Write zeros to newly created written extents */
@@ -642,6 +653,7 @@ enum {
  */
 #define EXT4_EX_NOCACHE                                0x40000000
 #define EXT4_EX_FORCE_CACHE                    0x20000000
+#define EXT4_EX_NOFAIL                         0x10000000
 
 /*
  * Flags used by ext4_free_blocks
@@ -732,7 +744,7 @@ enum {
 #define EXT4_MAX_BLOCK_FILE_PHYS       0xFFFFFFFF
 
 /* Max logical block we can support */
-#define EXT4_MAX_LOGICAL_BLOCK         0xFFFFFFFF
+#define EXT4_MAX_LOGICAL_BLOCK         0xFFFFFFFE
 
 /*
  * Structure of an inode on the disk
@@ -2063,7 +2075,7 @@ struct ext4_dir_entry_2 {
        __le32  inode;                  /* Inode number */
        __le16  rec_len;                /* Directory entry length */
        __u8    name_len;               /* Name length */
-       __u8    file_type;
+       __u8    file_type;              /* See file type macros EXT4_FT_* below */
        char    name[EXT4_NAME_LEN];    /* File name */
 };
 
@@ -3367,7 +3379,7 @@ struct ext4_extent;
  */
 #define EXT_MAX_BLOCKS 0xffffffff
 
-extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
+extern void ext4_ext_tree_init(handle_t *handle, struct inode *inode);
 extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                               struct ext4_map_blocks *map, int flags);
index 1c216fcc202ad4629927d822645107a234bf0dad..44e59881a1f0b5d6d7f18082d36acc53f0d8233e 100644 (file)
@@ -170,10 +170,13 @@ struct partial_cluster {
        (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
 #define EXT_LAST_INDEX(__hdr__) \
        (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
-#define EXT_MAX_EXTENT(__hdr__) \
-       (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_EXTENT(__hdr__)        \
+       ((le16_to_cpu((__hdr__)->eh_max)) ? \
+       ((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \
+                                       : 0)
 #define EXT_MAX_INDEX(__hdr__) \
-       (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+       ((le16_to_cpu((__hdr__)->eh_max)) ? \
+       ((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) : 0)
 
 static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
 {
index 4b9002f0e84c0efff1498bfafa1310d3fdf37282..00dc668e052b38c700dac3bcab56b442b961c8c6 100644 (file)
@@ -222,7 +222,10 @@ ext4_mark_iloc_dirty(handle_t *handle,
 int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
                        struct ext4_iloc *iloc);
 
-int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
+#define ext4_mark_inode_dirty(__h, __i)                                        \
+               __ext4_mark_inode_dirty((__h), (__i), __func__, __LINE__)
+int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode,
+                               const char *func, unsigned int line);
 
 int ext4_expand_extra_isize(struct inode *inode,
                            unsigned int new_extra_isize,
@@ -335,12 +338,6 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
 handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
                                        int type);
 
-static inline void ext4_journal_free_reserved(handle_t *handle)
-{
-       if (ext4_handle_valid(handle))
-               jbd2_journal_free_reserved(handle);
-}
-
 static inline handle_t *ext4_journal_current_handle(void)
 {
        return journal_current_handle();
index f2b577b315a09371210b180934f3c0019134756e..7d088ff1e90285773f09c5086d1478033a1eaf7b 100644 (file)
@@ -297,11 +297,14 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
 {
        struct ext4_ext_path *path = *ppath;
        int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
+       int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
+
+       if (nofail)
+               flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
 
        return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
                        EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
-                       EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
-                       (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
+                       flags);
 }
 
 static int
@@ -487,8 +490,12 @@ __read_extent_tree_block(const char *function, unsigned int line,
 {
        struct buffer_head              *bh;
        int                             err;
+       gfp_t                           gfp_flags = __GFP_MOVABLE | GFP_NOFS;
+
+       if (flags & EXT4_EX_NOFAIL)
+               gfp_flags |= __GFP_NOFAIL;
 
-       bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
+       bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
        if (unlikely(!bh))
                return ERR_PTR(-ENOMEM);
 
@@ -600,22 +607,22 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 {
        int k, l = path->p_depth;
 
-       ext_debug("path:");
+       ext_debug(inode, "path:");
        for (k = 0; k <= l; k++, path++) {
                if (path->p_idx) {
-                       ext_debug("  %d->%llu",
+                       ext_debug(inode, "  %d->%llu",
                                  le32_to_cpu(path->p_idx->ei_block),
                                  ext4_idx_pblock(path->p_idx));
                } else if (path->p_ext) {
-                       ext_debug("  %d:[%d]%d:%llu ",
+                       ext_debug(inode, "  %d:[%d]%d:%llu ",
                                  le32_to_cpu(path->p_ext->ee_block),
                                  ext4_ext_is_unwritten(path->p_ext),
                                  ext4_ext_get_actual_len(path->p_ext),
                                  ext4_ext_pblock(path->p_ext));
                } else
-                       ext_debug("  []");
+                       ext_debug(inode, "  []");
        }
-       ext_debug("\n");
+       ext_debug(inode, "\n");
 }
 
 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
@@ -631,14 +638,14 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
        eh = path[depth].p_hdr;
        ex = EXT_FIRST_EXTENT(eh);
 
-       ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
+       ext_debug(inode, "Displaying leaf extents\n");
 
        for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
-               ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
+               ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
                          ext4_ext_is_unwritten(ex),
                          ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
        }
-       ext_debug("\n");
+       ext_debug(inode, "\n");
 }
 
 static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
@@ -651,10 +658,9 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
                struct ext4_extent_idx *idx;
                idx = path[level].p_idx;
                while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
-                       ext_debug("%d: move %d:%llu in new index %llu\n", level,
-                                       le32_to_cpu(idx->ei_block),
-                                       ext4_idx_pblock(idx),
-                                       newblock);
+                       ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
+                                 level, le32_to_cpu(idx->ei_block),
+                                 ext4_idx_pblock(idx), newblock);
                        idx++;
                }
 
@@ -663,7 +669,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
 
        ex = path[depth].p_ext;
        while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
-               ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
+               ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
                                le32_to_cpu(ex->ee_block),
                                ext4_ext_pblock(ex),
                                ext4_ext_is_unwritten(ex),
@@ -707,7 +713,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
        struct ext4_extent_idx *r, *l, *m;
 
 
-       ext_debug("binsearch for %u(idx):  ", block);
+       ext_debug(inode, "binsearch for %u(idx):  ", block);
 
        l = EXT_FIRST_INDEX(eh) + 1;
        r = EXT_LAST_INDEX(eh);
@@ -717,13 +723,13 @@ ext4_ext_binsearch_idx(struct inode *inode,
                        r = m - 1;
                else
                        l = m + 1;
-               ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
-                               m, le32_to_cpu(m->ei_block),
-                               r, le32_to_cpu(r->ei_block));
+               ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
+                         le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
+                         r, le32_to_cpu(r->ei_block));
        }
 
        path->p_idx = l - 1;
-       ext_debug("  -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
+       ext_debug(inode, "  -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
                  ext4_idx_pblock(path->p_idx));
 
 #ifdef CHECK_BINSEARCH
@@ -774,7 +780,7 @@ ext4_ext_binsearch(struct inode *inode,
                return;
        }
 
-       ext_debug("binsearch for %u:  ", block);
+       ext_debug(inode, "binsearch for %u:  ", block);
 
        l = EXT_FIRST_EXTENT(eh) + 1;
        r = EXT_LAST_EXTENT(eh);
@@ -785,13 +791,13 @@ ext4_ext_binsearch(struct inode *inode,
                        r = m - 1;
                else
                        l = m + 1;
-               ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
-                               m, le32_to_cpu(m->ee_block),
-                               r, le32_to_cpu(r->ee_block));
+               ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
+                         le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
+                         r, le32_to_cpu(r->ee_block));
        }
 
        path->p_ext = l - 1;
-       ext_debug("  -> %d:%llu:[%d]%d ",
+       ext_debug(inode, "  -> %d:%llu:[%d]%d ",
                        le32_to_cpu(path->p_ext->ee_block),
                        ext4_ext_pblock(path->p_ext),
                        ext4_ext_is_unwritten(path->p_ext),
@@ -816,7 +822,7 @@ ext4_ext_binsearch(struct inode *inode,
 
 }
 
-int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
+void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 {
        struct ext4_extent_header *eh;
 
@@ -826,7 +832,6 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
        eh->eh_magic = EXT4_EXT_MAGIC;
        eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
        ext4_mark_inode_dirty(handle, inode);
-       return 0;
 }
 
 struct ext4_ext_path *
@@ -838,6 +843,10 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
        struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
        short int depth, i, ppos = 0;
        int ret;
+       gfp_t gfp_flags = GFP_NOFS;
+
+       if (flags & EXT4_EX_NOFAIL)
+               gfp_flags |= __GFP_NOFAIL;
 
        eh = ext_inode_hdr(inode);
        depth = ext_depth(inode);
@@ -858,7 +867,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
        if (!path) {
                /* account possible depth increase */
                path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
-                               GFP_NOFS);
+                               gfp_flags);
                if (unlikely(!path))
                        return ERR_PTR(-ENOMEM);
                path[0].p_maxdepth = depth + 1;
@@ -871,7 +880,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
                ext4_cache_extents(inode, eh);
        /* walk through the tree */
        while (i) {
-               ext_debug("depth %d: num %d, max %d\n",
+               ext_debug(inode, "depth %d: num %d, max %d\n",
                          ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 
                ext4_ext_binsearch_idx(inode, path + ppos, block);
@@ -948,18 +957,20 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
        if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
                /* insert after */
-               ext_debug("insert new index %d after: %llu\n", logical, ptr);
+               ext_debug(inode, "insert new index %d after: %llu\n",
+                         logical, ptr);
                ix = curp->p_idx + 1;
        } else {
                /* insert before */
-               ext_debug("insert new index %d before: %llu\n", logical, ptr);
+               ext_debug(inode, "insert new index %d before: %llu\n",
+                         logical, ptr);
                ix = curp->p_idx;
        }
 
        len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
        BUG_ON(len < 0);
        if (len > 0) {
-               ext_debug("insert new index %d: "
+               ext_debug(inode, "insert new index %d: "
                                "move %d indices from 0x%p to 0x%p\n",
                                logical, len, ix, ix + 1);
                memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
@@ -1008,9 +1019,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        ext4_fsblk_t newblock, oldblock;
        __le32 border;
        ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
+       gfp_t gfp_flags = GFP_NOFS;
        int err = 0;
        size_t ext_size = 0;
 
+       if (flags & EXT4_EX_NOFAIL)
+               gfp_flags |= __GFP_NOFAIL;
+
        /* make decision: where to split? */
        /* FIXME: now decision is simplest: at current extent */
 
@@ -1022,12 +1037,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        }
        if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
                border = path[depth].p_ext[1].ee_block;
-               ext_debug("leaf will be split."
+               ext_debug(inode, "leaf will be split."
                                " next leaf starts at %d\n",
                                  le32_to_cpu(border));
        } else {
                border = newext->ee_block;
-               ext_debug("leaf will be added."
+               ext_debug(inode, "leaf will be added."
                                " next leaf starts at %d\n",
                                le32_to_cpu(border));
        }
@@ -1044,12 +1059,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
         * We need this to handle errors and free blocks
         * upon them.
         */
-       ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), GFP_NOFS);
+       ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
        if (!ablocks)
                return -ENOMEM;
 
        /* allocate all needed blocks */
-       ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
+       ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
        for (a = 0; a < depth - at; a++) {
                newblock = ext4_ext_new_meta_block(handle, inode, path,
                                                   newext, &err, flags);
@@ -1135,7 +1150,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                goto cleanup;
        }
        if (k)
-               ext_debug("create %d intermediate indices\n", k);
+               ext_debug(inode, "create %d intermediate indices\n", k);
        /* insert new index into current index block */
        /* current depth stored in i var */
        i = depth - 1;
@@ -1162,7 +1177,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                fidx->ei_block = border;
                ext4_idx_store_pblock(fidx, oldblock);
 
-               ext_debug("int.index at %d (block %llu): %u -> %llu\n",
+               ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
                                i, newblock, le32_to_cpu(border), oldblock);
 
                /* move remainder of path[i] to the new index block */
@@ -1176,7 +1191,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                }
                /* start copy indexes */
                m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
-               ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
+               ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
                                EXT_MAX_INDEX(path[i].p_hdr));
                ext4_ext_show_move(inode, path, newblock, i);
                if (m) {
@@ -1313,13 +1328,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
                EXT_FIRST_INDEX(neh)->ei_block =
                        EXT_FIRST_EXTENT(neh)->ee_block;
        }
-       ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
+       ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
                  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
                  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
                  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
 
        le16_add_cpu(&neh->eh_depth, 1);
-       ext4_mark_inode_dirty(handle, inode);
+       err = ext4_mark_inode_dirty(handle, inode);
 out:
        brelse(bh);
 
@@ -1955,7 +1970,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 
                /* Try to append newex to the ex */
                if (ext4_can_extents_be_merged(inode, ex, newext)) {
-                       ext_debug("append [%d]%d block to %u:[%d]%d"
+                       ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
                                  "(from %llu)\n",
                                  ext4_ext_is_unwritten(newext),
                                  ext4_ext_get_actual_len(newext),
@@ -1980,7 +1995,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 prepend:
                /* Try to prepend newex to the ex */
                if (ext4_can_extents_be_merged(inode, newext, ex)) {
-                       ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
+                       ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
                                  "(from %llu)\n",
                                  le32_to_cpu(newext->ee_block),
                                  ext4_ext_is_unwritten(newext),
@@ -2018,20 +2033,20 @@ prepend:
        if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
                next = ext4_ext_next_leaf_block(path);
        if (next != EXT_MAX_BLOCKS) {
-               ext_debug("next leaf block - %u\n", next);
+               ext_debug(inode, "next leaf block - %u\n", next);
                BUG_ON(npath != NULL);
-               npath = ext4_find_extent(inode, next, NULL, 0);
+               npath = ext4_find_extent(inode, next, NULL, gb_flags);
                if (IS_ERR(npath))
                        return PTR_ERR(npath);
                BUG_ON(npath->p_depth != path->p_depth);
                eh = npath[depth].p_hdr;
                if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
-                       ext_debug("next leaf isn't full(%d)\n",
+                       ext_debug(inode, "next leaf isn't full(%d)\n",
                                  le16_to_cpu(eh->eh_entries));
                        path = npath;
                        goto has_space;
                }
-               ext_debug("next leaf has no free space(%d,%d)\n",
+               ext_debug(inode, "next leaf has no free space(%d,%d)\n",
                          le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
        }
 
@@ -2057,7 +2072,7 @@ has_space:
 
        if (!nearex) {
                /* there is no extent in this leaf, create first one */
-               ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
+               ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
                                le32_to_cpu(newext->ee_block),
                                ext4_ext_pblock(newext),
                                ext4_ext_is_unwritten(newext),
@@ -2067,7 +2082,7 @@ has_space:
                if (le32_to_cpu(newext->ee_block)
                           > le32_to_cpu(nearex->ee_block)) {
                        /* Insert after */
-                       ext_debug("insert %u:%llu:[%d]%d before: "
+                       ext_debug(inode, "insert %u:%llu:[%d]%d before: "
                                        "nearest %p\n",
                                        le32_to_cpu(newext->ee_block),
                                        ext4_ext_pblock(newext),
@@ -2078,7 +2093,7 @@ has_space:
                } else {
                        /* Insert before */
                        BUG_ON(newext->ee_block == nearex->ee_block);
-                       ext_debug("insert %u:%llu:[%d]%d after: "
+                       ext_debug(inode, "insert %u:%llu:[%d]%d after: "
                                        "nearest %p\n",
                                        le32_to_cpu(newext->ee_block),
                                        ext4_ext_pblock(newext),
@@ -2088,7 +2103,7 @@ has_space:
                }
                len = EXT_LAST_EXTENT(eh) - nearex + 1;
                if (len > 0) {
-                       ext_debug("insert %u:%llu:[%d]%d: "
+                       ext_debug(inode, "insert %u:%llu:[%d]%d: "
                                        "move %d extents from 0x%p to 0x%p\n",
                                        le32_to_cpu(newext->ee_block),
                                        ext4_ext_pblock(newext),
@@ -2232,7 +2247,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
                        return;
                hole_len = min(es.es_lblk - hole_start, hole_len);
        }
-       ext_debug(" -> %u:%u\n", hole_start, hole_len);
+       ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
        ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
                              EXTENT_STATUS_HOLE);
 }
@@ -2269,7 +2284,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        err = ext4_ext_dirty(handle, inode, path);
        if (err)
                return err;
-       ext_debug("index is empty, remove it, free block %llu\n", leaf);
+       ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
        trace_ext4_ext_rm_idx(inode, leaf);
 
        ext4_free_blocks(handle, inode, NULL, leaf, 1,
@@ -2548,7 +2563,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
        ext4_fsblk_t pblk;
 
        /* the header must be checked already in ext4_ext_remove_space() */
-       ext_debug("truncate since %u in leaf to %u\n", start, end);
+       ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
        if (!path[depth].p_hdr)
                path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
        eh = path[depth].p_hdr;
@@ -2574,7 +2589,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                else
                        unwritten = 0;
 
-               ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
+               ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
                          unwritten, ex_ee_len);
                path[depth].p_ext = ex;
 
@@ -2582,7 +2597,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                b = ex_ee_block+ex_ee_len - 1 < end ?
                        ex_ee_block+ex_ee_len - 1 : end;
 
-               ext_debug("  border %u:%u\n", a, b);
+               ext_debug(inode, "  border %u:%u\n", a, b);
 
                /* If this extent is beyond the end of the hole, skip it */
                if (end < ex_ee_block) {
@@ -2691,7 +2706,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                if (err)
                        goto out;
 
-               ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
+               ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
                                ext4_ext_pblock(ex));
                ex--;
                ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2768,7 +2783,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
        partial.lblk = 0;
        partial.state = initial;
 
-       ext_debug("truncate since %u to %u\n", start, end);
+       ext_debug(inode, "truncate since %u to %u\n", start, end);
 
        /* probably first extent we're gonna free will be last in block */
        handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
@@ -2793,7 +2808,8 @@ again:
                ext4_fsblk_t pblk;
 
                /* find extent for or closest extent to this block */
-               path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
+               path = ext4_find_extent(inode, end, NULL,
+                                       EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
                if (IS_ERR(path)) {
                        ext4_journal_stop(handle);
                        return PTR_ERR(path);
@@ -2879,7 +2895,7 @@ again:
                                le16_to_cpu(path[k].p_hdr->eh_entries)+1;
        } else {
                path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
-                              GFP_NOFS);
+                              GFP_NOFS | __GFP_NOFAIL);
                if (path == NULL) {
                        ext4_journal_stop(handle);
                        return -ENOMEM;
@@ -2909,7 +2925,7 @@ again:
 
                /* this is index block */
                if (!path[i].p_hdr) {
-                       ext_debug("initialize header\n");
+                       ext_debug(inode, "initialize header\n");
                        path[i].p_hdr = ext_block_hdr(path[i].p_bh);
                }
 
@@ -2917,7 +2933,7 @@ again:
                        /* this level hasn't been touched yet */
                        path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
                        path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
-                       ext_debug("init index ptr: hdr 0x%p, num %d\n",
+                       ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
                                  path[i].p_hdr,
                                  le16_to_cpu(path[i].p_hdr->eh_entries));
                } else {
@@ -2925,13 +2941,13 @@ again:
                        path[i].p_idx--;
                }
 
-               ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
+               ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
                                i, EXT_FIRST_INDEX(path[i].p_hdr),
                                path[i].p_idx);
                if (ext4_ext_more_to_rm(path + i)) {
                        struct buffer_head *bh;
                        /* go to the next level */
-                       ext_debug("move to level %d (block %llu)\n",
+                       ext_debug(inode, "move to level %d (block %llu)\n",
                                  i + 1, ext4_idx_pblock(path[i].p_idx));
                        memset(path + i + 1, 0, sizeof(*path));
                        bh = read_extent_tree_block(inode,
@@ -2967,7 +2983,7 @@ again:
                        brelse(path[i].p_bh);
                        path[i].p_bh = NULL;
                        i--;
-                       ext_debug("return to level %d\n", i);
+                       ext_debug(inode, "return to level %d\n", i);
                }
        }
 
@@ -3135,8 +3151,7 @@ static int ext4_split_extent_at(handle_t *handle,
        BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
               (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
 
-       ext_debug("ext4_split_extents_at: inode %lu, logical"
-               "block %llu\n", inode->i_ino, (unsigned long long)split);
+       ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
 
        ext4_ext_show_leaf(inode, path);
 
@@ -3244,6 +3259,10 @@ out:
 
 fix_extent_len:
        ex->ee_len = orig_ex.ee_len;
+       /*
+        * Ignore ext4_ext_dirty return value since we are already in error path
+        * and err is a non-zero error code.
+        */
        ext4_ext_dirty(handle, inode, path + path->p_depth);
        return err;
 }
@@ -3300,7 +3319,7 @@ static int ext4_split_extent(handle_t *handle,
         * Update path is required because previous ext4_split_extent_at() may
         * result in split of original leaf or extent zeroout.
         */
-       path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+       path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
        if (IS_ERR(path))
                return PTR_ERR(path);
        depth = ext_depth(inode);
@@ -3369,9 +3388,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        int err = 0;
        int split_flag = EXT4_EXT_DATA_VALID2;
 
-       ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
-               "block %llu, max_blocks %u\n", inode->i_ino,
-               (unsigned long long)map->m_lblk, map_len);
+       ext_debug(inode, "logical block %llu, max_blocks %u\n",
+                 (unsigned long long)map->m_lblk, map_len);
 
        sbi = EXT4_SB(inode->i_sb);
        eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
@@ -3503,7 +3521,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        }
        if (allocated) {
                /* Mark the block containing both extents as dirty */
-               ext4_ext_dirty(handle, inode, path + depth);
+               err = ext4_ext_dirty(handle, inode, path + depth);
 
                /* Update path to point to the right extent */
                path[depth].p_ext = abut_ex;
@@ -3623,8 +3641,7 @@ static int ext4_split_convert_extents(handle_t *handle,
        unsigned int ee_len;
        int split_flag = 0, depth;
 
-       ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n",
-                 __func__, inode->i_ino,
+       ext_debug(inode, "logical block %llu, max_blocks %u\n",
                  (unsigned long long)map->m_lblk, map->m_len);
 
        eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
@@ -3670,8 +3687,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
 
-       ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
-               "block %llu, max_blocks %u\n", inode->i_ino,
+       ext_debug(inode, "logical block %llu, max_blocks %u\n",
                  (unsigned long long)ee_block, ee_len);
 
        /* If extent is larger than requested it is a clear sign that we still
@@ -3741,8 +3757,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
 
-       ext_debug("%s: inode %lu, logical"
-               "block %llu, max_blocks %u\n", __func__, inode->i_ino,
+       ext_debug(inode, "logical block %llu, max_blocks %u\n",
                  (unsigned long long)ee_block, ee_len);
 
        if (ee_block != map->m_lblk || ee_len > map->m_len) {
@@ -3794,16 +3809,13 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path **ppath, int flags,
                        unsigned int allocated, ext4_fsblk_t newblock)
 {
-#ifdef EXT_DEBUG
-       struct ext4_ext_path *path = *ppath;
-#endif
+       struct ext4_ext_path __maybe_unused *path = *ppath;
        int ret = 0;
        int err = 0;
 
-       ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
-                 "block %llu, max_blocks %u, flags %x, allocated %u\n",
-                 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
-                 flags, allocated);
+       ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
+                 (unsigned long long)map->m_lblk, map->m_len, flags,
+                 allocated);
        ext4_ext_show_leaf(inode, path);
 
        /*
@@ -3815,39 +3827,38 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
        trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
                                                    allocated, newblock);
 
-       /* get_block() before submit the IO, split the extent */
+       /* get_block() before submitting IO, split the extent */
        if (flags & EXT4_GET_BLOCKS_PRE_IO) {
                ret = ext4_split_convert_extents(handle, inode, map, ppath,
                                         flags | EXT4_GET_BLOCKS_CONVERT);
-               if (ret <= 0)
-                       goto out;
+               if (ret < 0) {
+                       err = ret;
+                       goto out2;
+               }
+               /*
+                * shouldn't get a 0 return when splitting an extent unless
+                * m_len is 0 (bug) or extent has been corrupted
+                */
+               if (unlikely(ret == 0)) {
+                       EXT4_ERROR_INODE(inode,
+                                        "unexpected ret == 0, m_len = %u",
+                                        map->m_len);
+                       err = -EFSCORRUPTED;
+                       goto out2;
+               }
                map->m_flags |= EXT4_MAP_UNWRITTEN;
                goto out;
        }
        /* IO end_io complete, convert the filled extent to written */
        if (flags & EXT4_GET_BLOCKS_CONVERT) {
-               if (flags & EXT4_GET_BLOCKS_ZERO) {
-                       if (allocated > map->m_len)
-                               allocated = map->m_len;
-                       err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
-                                                allocated);
-                       if (err < 0)
-                               goto out2;
-               }
-               ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
+               err = ext4_convert_unwritten_extents_endio(handle, inode, map,
                                                           ppath);
-               if (ret >= 0)
-                       ext4_update_inode_fsync_trans(handle, inode, 1);
-               else
-                       err = ret;
-               map->m_flags |= EXT4_MAP_MAPPED;
-               map->m_pblk = newblock;
-               if (allocated > map->m_len)
-                       allocated = map->m_len;
-               map->m_len = allocated;
-               goto out2;
+               if (err < 0)
+                       goto out2;
+               ext4_update_inode_fsync_trans(handle, inode, 1);
+               goto map_out;
        }
-       /* buffered IO case */
+       /* buffered IO cases */
        /*
         * repeat fallocate creation request
         * we already have an unwritten extent
@@ -3870,29 +3881,39 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
                goto out1;
        }
 
-       /* buffered write, writepage time, convert*/
+       /*
+        * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
+        * For buffered writes, at writepage time, etc.  Convert a
+        * discovered unwritten extent to written.
+        */
        ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
-       if (ret >= 0)
-               ext4_update_inode_fsync_trans(handle, inode, 1);
-out:
-       if (ret <= 0) {
+       if (ret < 0) {
                err = ret;
                goto out2;
-       } else
-               allocated = ret;
-       map->m_flags |= EXT4_MAP_NEW;
-       if (allocated > map->m_len)
-               allocated = map->m_len;
-       map->m_len = allocated;
+       }
+       ext4_update_inode_fsync_trans(handle, inode, 1);
+       /*
+        * shouldn't get a 0 return when converting an unwritten extent
+        * unless m_len is 0 (bug) or extent has been corrupted
+        */
+       if (unlikely(ret == 0)) {
+               EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
+                                map->m_len);
+               err = -EFSCORRUPTED;
+               goto out2;
+       }
 
+out:
+       allocated = ret;
+       map->m_flags |= EXT4_MAP_NEW;
 map_out:
        map->m_flags |= EXT4_MAP_MAPPED;
 out1:
+       map->m_pblk = newblock;
        if (allocated > map->m_len)
                allocated = map->m_len;
-       ext4_ext_show_leaf(inode, path);
-       map->m_pblk = newblock;
        map->m_len = allocated;
+       ext4_ext_show_leaf(inode, path);
 out2:
        return err ? err : allocated;
 }
@@ -4024,15 +4045,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        struct ext4_ext_path *path = NULL;
        struct ext4_extent newex, *ex, *ex2;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       ext4_fsblk_t newblock = 0;
+       ext4_fsblk_t newblock = 0, pblk;
        int err = 0, depth, ret;
        unsigned int allocated = 0, offset = 0;
        unsigned int allocated_clusters = 0;
        struct ext4_allocation_request ar;
        ext4_lblk_t cluster_offset;
 
-       ext_debug("blocks %u/%u requested for inode %lu\n",
-                 map->m_lblk, map->m_len, inode->i_ino);
+       ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
        trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
        /* find extent for this block */
@@ -4040,7 +4060,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        if (IS_ERR(path)) {
                err = PTR_ERR(path);
                path = NULL;
-               goto out2;
+               goto out;
        }
 
        depth = ext_depth(inode);
@@ -4056,7 +4076,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                                 (unsigned long) map->m_lblk, depth,
                                 path[depth].p_block);
                err = -EFSCORRUPTED;
-               goto out2;
+               goto out;
        }
 
        ex = path[depth].p_ext;
@@ -4079,8 +4099,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        newblock = map->m_lblk - ee_block + ee_start;
                        /* number of remaining blocks in the extent */
                        allocated = ee_len - (map->m_lblk - ee_block);
-                       ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
-                                 ee_block, ee_len, newblock);
+                       ext_debug(inode, "%u fit into %u:%d -> %llu\n",
+                                 map->m_lblk, ee_block, ee_len, newblock);
 
                        /*
                         * If the extent is initialized check whether the
@@ -4090,8 +4110,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                            (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
                                err = convert_initialized_extent(handle,
                                        inode, map, &path, &allocated);
-                               goto out2;
+                               goto out;
                        } else if (!ext4_ext_is_unwritten(ex)) {
+                               map->m_flags |= EXT4_MAP_MAPPED;
+                               map->m_pblk = newblock;
+                               if (allocated > map->m_len)
+                                       allocated = map->m_len;
+                               map->m_len = allocated;
+                               ext4_ext_show_leaf(inode, path);
                                goto out;
                        }
 
@@ -4102,7 +4128,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                                err = ret;
                        else
                                allocated = ret;
-                       goto out2;
+                       goto out;
                }
        }
 
@@ -4127,7 +4153,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                map->m_pblk = 0;
                map->m_len = min_t(unsigned int, map->m_len, hole_len);
 
-               goto out2;
+               goto out;
        }
 
        /*
@@ -4151,12 +4177,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        ar.lleft = map->m_lblk;
        err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
        if (err)
-               goto out2;
+               goto out;
        ar.lright = map->m_lblk;
        ex2 = NULL;
        err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
        if (err)
-               goto out2;
+               goto out;
 
        /* Check if the extent after searching to the right implies a
         * cluster we can use. */
@@ -4217,17 +4243,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                ar.flags |= EXT4_MB_USE_RESERVED;
        newblock = ext4_mb_new_blocks(handle, &ar, &err);
        if (!newblock)
-               goto out2;
-       ext_debug("allocate new block: goal %llu, found %llu/%u\n",
-                 ar.goal, newblock, allocated);
+               goto out;
        allocated_clusters = ar.len;
        ar.len = EXT4_C2B(sbi, ar.len) - offset;
+       ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
+                 ar.goal, newblock, ar.len, allocated);
        if (ar.len > allocated)
                ar.len = allocated;
 
 got_allocated_blocks:
        /* try to insert new extent into found leaf and return */
-       ext4_ext_store_pblock(&newex, newblock + offset);
+       pblk = newblock + offset;
+       ext4_ext_store_pblock(&newex, pblk);
        newex.ee_len = cpu_to_le16(ar.len);
        /* Mark unwritten */
        if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
@@ -4252,16 +4279,9 @@ got_allocated_blocks:
                                         EXT4_C2B(sbi, allocated_clusters),
                                         fb_flags);
                }
-               goto out2;
+               goto out;
        }
 
-       /* previous routine could use block we allocated */
-       newblock = ext4_ext_pblock(&newex);
-       allocated = ext4_ext_get_actual_len(&newex);
-       if (allocated > map->m_len)
-               allocated = map->m_len;
-       map->m_flags |= EXT4_MAP_NEW;
-
        /*
         * Reduce the reserved cluster count to reflect successful deferred
         * allocation of delayed allocated clusters or direct allocation of
@@ -4307,14 +4327,14 @@ got_allocated_blocks:
                ext4_update_inode_fsync_trans(handle, inode, 1);
        else
                ext4_update_inode_fsync_trans(handle, inode, 0);
-out:
-       if (allocated > map->m_len)
-               allocated = map->m_len;
+
+       map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
+       map->m_pblk = pblk;
+       map->m_len = ar.len;
+       allocated = map->m_len;
        ext4_ext_show_leaf(inode, path);
-       map->m_flags |= EXT4_MAP_MAPPED;
-       map->m_pblk = newblock;
-       map->m_len = allocated;
-out2:
+
+out:
        ext4_ext_drop_refs(path);
        kfree(path);
 
@@ -4353,7 +4373,14 @@ retry:
        }
        if (err)
                return err;
-       return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
+retry_remove_space:
+       err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
+       if (err == -ENOMEM) {
+               cond_resched();
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               goto retry_remove_space;
+       }
+       return err;
 }
 
 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
@@ -4363,7 +4390,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        struct inode *inode = file_inode(file);
        handle_t *handle;
        int ret = 0;
-       int ret2 = 0;
+       int ret2 = 0, ret3 = 0;
        int retries = 0;
        int depth = 0;
        struct ext4_map_blocks map;
@@ -4423,10 +4450,11 @@ retry:
                        if (ext4_update_inode_size(inode, epos) & 0x1)
                                inode->i_mtime = inode->i_ctime;
                }
-               ext4_mark_inode_dirty(handle, inode);
+               ret2 = ext4_mark_inode_dirty(handle, inode);
                ext4_update_inode_fsync_trans(handle, inode, 1);
-               ret2 = ext4_journal_stop(handle);
-               if (ret2)
+               ret3 = ext4_journal_stop(handle);
+               ret2 = ret3 ? ret3 : ret2;
+               if (unlikely(ret2))
                        break;
        }
        if (ret == -ENOSPC &&
@@ -4490,7 +4518,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        inode_lock(inode);
 
        /*
-        * Indirect files do not support unwritten extnets
+        * Indirect files do not support unwritten extents
         */
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                ret = -EOPNOTSUPP;
@@ -4507,8 +4535,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        }
 
        flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
-       if (mode & FALLOC_FL_KEEP_SIZE)
-               flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
        /* Wait all existing dio workers, newcomers will block on i_mutex */
        inode_dio_wait(inode);
@@ -4577,7 +4603,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        inode->i_mtime = inode->i_ctime = current_time(inode);
        if (new_size)
                ext4_update_inode_size(inode, new_size);
-       ext4_mark_inode_dirty(handle, inode);
+       ret = ext4_mark_inode_dirty(handle, inode);
+       if (unlikely(ret))
+               goto out_handle;
 
        /* Zero out partial block at the edges of the range */
        ret = ext4_zero_partial_blocks(handle, inode, offset, len);
@@ -4587,6 +4615,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (file->f_flags & O_SYNC)
                ext4_handle_sync(handle);
 
+out_handle:
        ext4_journal_stop(handle);
 out_mutex:
        inode_unlock(inode);
@@ -4647,8 +4676,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 
        max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
        flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
-       if (mode & FALLOC_FL_KEEP_SIZE)
-               flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
        inode_lock(inode);
 
@@ -4700,8 +4727,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
                                   loff_t offset, ssize_t len)
 {
        unsigned int max_blocks;
-       int ret = 0;
-       int ret2 = 0;
+       int ret = 0, ret2 = 0, ret3 = 0;
        struct ext4_map_blocks map;
        unsigned int blkbits = inode->i_blkbits;
        unsigned int credits = 0;
@@ -4734,9 +4760,13 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
                                     "ext4_ext_map_blocks returned %d",
                                     inode->i_ino, map.m_lblk,
                                     map.m_len, ret);
-               ext4_mark_inode_dirty(handle, inode);
-               if (credits)
-                       ret2 = ext4_journal_stop(handle);
+               ret2 = ext4_mark_inode_dirty(handle, inode);
+               if (credits) {
+                       ret3 = ext4_journal_stop(handle);
+                       if (unlikely(ret3))
+                               ret2 = ret3;
+               }
+
                if (ret <= 0 || ret2)
                        break;
        }
@@ -4832,11 +4862,31 @@ static const struct iomap_ops ext4_iomap_xattr_ops = {
        .iomap_begin            = ext4_iomap_xattr_begin,
 };
 
-static int _ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-                       __u64 start, __u64 len, bool from_es_cache)
+static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
+{
+       u64 maxbytes;
+
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               maxbytes = inode->i_sb->s_maxbytes;
+       else
+               maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
+
+       if (*len == 0)
+               return -EINVAL;
+       if (start > maxbytes)
+               return -EFBIG;
+
+       /*
+        * Shrink request scope to what the fs can actually handle.
+        */
+       if (*len > maxbytes || (maxbytes - *len) < start)
+               *len = maxbytes - start;
+       return 0;
+}
+
+int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               u64 start, u64 len)
 {
-       ext4_lblk_t start_blk;
-       u32 ext4_fiemap_flags = FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR;
        int error = 0;
 
        if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
@@ -4846,48 +4896,31 @@ static int _ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
        }
 
-       if (from_es_cache)
-               ext4_fiemap_flags &= FIEMAP_FLAG_XATTR;
-
-       if (fiemap_check_flags(fieinfo, ext4_fiemap_flags))
-               return -EBADR;
+       /*
+        * For bitmap files the maximum size limit could be smaller than
+        * s_maxbytes, so check len here manually instead of just relying on the
+        * generic check.
+        */
+       error = ext4_fiemap_check_ranges(inode, start, &len);
+       if (error)
+               return error;
 
        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
                fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
-               error = iomap_fiemap(inode, fieinfo, start, len,
-                                    &ext4_iomap_xattr_ops);
-       } else if (!from_es_cache) {
-               error = iomap_fiemap(inode, fieinfo, start, len,
-                                    &ext4_iomap_report_ops);
-       } else {
-               ext4_lblk_t len_blks;
-               __u64 last_blk;
-
-               start_blk = start >> inode->i_sb->s_blocksize_bits;
-               last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
-               if (last_blk >= EXT_MAX_BLOCKS)
-                       last_blk = EXT_MAX_BLOCKS-1;
-               len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
-
-               /*
-                * Walk the extent tree gathering extent information
-                * and pushing extents back to the user.
-                */
-               error = ext4_fill_es_cache_info(inode, start_blk, len_blks,
-                                               fieinfo);
+               return iomap_fiemap(inode, fieinfo, start, len,
+                                   &ext4_iomap_xattr_ops);
        }
-       return error;
-}
 
-int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-               __u64 start, __u64 len)
-{
-       return _ext4_fiemap(inode, fieinfo, start, len, false);
+       return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
 }
 
 int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
                      __u64 start, __u64 len)
 {
+       ext4_lblk_t start_blk, len_blks;
+       __u64 last_blk;
+       int error = 0;
+
        if (ext4_has_inline_data(inode)) {
                int has_inline;
 
@@ -4898,9 +4931,33 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        return 0;
        }
 
-       return _ext4_fiemap(inode, fieinfo, start, len, true);
-}
+       if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
+               error = ext4_ext_precache(inode);
+               if (error)
+                       return error;
+               fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
+       }
 
+       error = fiemap_prep(inode, fieinfo, start, &len, 0);
+       if (error)
+               return error;
+
+       error = ext4_fiemap_check_ranges(inode, start, &len);
+       if (error)
+               return error;
+
+       start_blk = start >> inode->i_sb->s_blocksize_bits;
+       last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
+       if (last_blk >= EXT_MAX_BLOCKS)
+               last_blk = EXT_MAX_BLOCKS-1;
+       len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
+
+       /*
+        * Walk the extent tree gathering extent information
+        * and pushing extents back to the user.
+        */
+       return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
+}
 
 /*
  * ext4_access_path:
@@ -5273,7 +5330,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
        inode->i_mtime = inode->i_ctime = current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
+       ret = ext4_mark_inode_dirty(handle, inode);
        ext4_update_inode_fsync_trans(handle, inode, 1);
 
 out_stop:
index d996b44d2265b581be5db2b297065f9a745a9dad..e751715353756d6ad2595641490bb36eca86db9d 100644 (file)
@@ -1054,7 +1054,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
        end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
 
        /* record the first block of the first delonly extent seen */
-       if (rc->first_do_lblk_found == false) {
+       if (!rc->first_do_lblk_found) {
                rc->first_do_lblk = i;
                rc->first_do_lblk_found = true;
        }
index 0d624250a62bfdfaef0cf0710ef22f139e65d076..2a01e31a032c4ce393ea5e9514b92e1950d1f5fa 100644 (file)
@@ -287,6 +287,7 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
        bool truncate = false;
        u8 blkbits = inode->i_blkbits;
        ext4_lblk_t written_blk, end_blk;
+       int ret;
 
        /*
         * Note that EXT4_I(inode)->i_disksize can get extended up to
@@ -327,8 +328,14 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
                goto truncate;
        }
 
-       if (ext4_update_inode_size(inode, offset + written))
-               ext4_mark_inode_dirty(handle, inode);
+       if (ext4_update_inode_size(inode, offset + written)) {
+               ret = ext4_mark_inode_dirty(handle, inode);
+               if (unlikely(ret)) {
+                       written = ret;
+                       ext4_journal_stop(handle);
+                       goto truncate;
+               }
+       }
 
        /*
         * We may need to truncate allocated but not written blocks beyond EOF.
@@ -495,6 +502,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (ret <= 0)
                return ret;
 
+       /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
+       if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
+               ret = -EAGAIN;
+               goto out;
+       }
+
        offset = iocb->ki_pos;
        count = ret;
 
index e10206e7f4bbe7f470fd83687ddd3c48322bc892..093c359952cdbad6413d361e021555e28a318523 100644 (file)
  */
 static int ext4_sync_parent(struct inode *inode)
 {
-       struct dentry *dentry = NULL;
-       struct inode *next;
+       struct dentry *dentry, *next;
        int ret = 0;
 
        if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
                return 0;
-       inode = igrab(inode);
+       dentry = d_find_any_alias(inode);
+       if (!dentry)
+               return 0;
        while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
                ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
-               dentry = d_find_any_alias(inode);
-               if (!dentry)
-                       break;
-               next = igrab(d_inode(dentry->d_parent));
+
+               next = dget_parent(dentry);
                dput(dentry);
-               if (!next)
-                       break;
-               iput(inode);
-               inode = next;
+               dentry = next;
+               inode = dentry->d_inode;
+
                /*
                 * The directory inode may have gone through rmdir by now. But
                 * the inode itself and its blocks are still allocated (we hold
-                * a reference to the inode so it didn't go through
-                * ext4_evict_inode()) and so we are safe to flush metadata
-                * blocks and the inode.
+                * a reference to the inode via its dentry), so it didn't go
+                * through ext4_evict_inode()) and so we are safe to flush
+                * metadata blocks and the inode.
                 */
                ret = sync_mapping_buffers(inode->i_mapping);
                if (ret)
@@ -76,7 +74,7 @@ static int ext4_sync_parent(struct inode *inode)
                if (ret)
                        break;
        }
-       iput(inode);
+       dput(dentry);
        return ret;
 }
 
index 7941c140723fa69f1636cf69ecc1367cc6836165..4da86308b8329d047708649efe2c052c6a06e8e5 100644 (file)
@@ -1246,6 +1246,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
                ext4_error_err(sb, -err,
                               "couldn't read orphan inode %lu (err %d)",
                               ino, err);
+               brelse(bitmap_bh);
                return inode;
        }
 
index 107f0043f67f1654c207ccff948fba6685890c06..be2b66eb65f7a398329e292454de667befb417f1 100644 (file)
@@ -467,7 +467,9 @@ static int ext4_splice_branch(handle_t *handle,
                /*
                 * OK, we spliced it into the inode itself on a direct block.
                 */
-               ext4_mark_inode_dirty(handle, ar->inode);
+               err = ext4_mark_inode_dirty(handle, ar->inode);
+               if (unlikely(err))
+                       goto err_out;
                jbd_debug(5, "splicing direct\n");
        }
        return err;
index f35e289e17aa3f714f1aa3b9eaf0e2e40990197e..c3a1ad2db1227bbcec24468ec50a83e4cad8e4c3 100644 (file)
@@ -1260,7 +1260,7 @@ out:
 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
                              struct inode *dir, struct inode *inode)
 {
-       int ret, inline_size, no_expand;
+       int ret, ret2, inline_size, no_expand;
        void *inline_start;
        struct ext4_iloc iloc;
 
@@ -1314,7 +1314,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
 
 out:
        ext4_write_unlock_xattr(dir, &no_expand);
-       ext4_mark_inode_dirty(handle, dir);
+       ret2 = ext4_mark_inode_dirty(handle, dir);
+       if (unlikely(ret2 && !ret))
+               ret = ret2;
        brelse(iloc.bh);
        return ret;
 }
index 778b0dbe3da64752e3775c670ad7616a2e60e4eb..1556cabd3a7d13d2216f23bb3780f963f5ba365b 100644 (file)
@@ -220,6 +220,16 @@ void ext4_evict_inode(struct inode *inode)
                ext4_begin_ordered_truncate(inode, 0);
        truncate_inode_pages_final(&inode->i_data);
 
+       /*
+        * For inodes with journalled data, transaction commit could have
+        * dirtied the inode. Flush worker is ignoring it because of I_FREEING
+        * flag but we still need to remove the inode from the writeback lists.
+        */
+       if (!list_empty_careful(&inode->i_io_list)) {
+               WARN_ON_ONCE(!ext4_should_journal_data(inode));
+               inode_io_list_del(inode);
+       }
+
        /*
         * Protect us against freezing - iput() caller didn't have to have any
         * protection against it
@@ -432,11 +442,9 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
         */
        down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-               retval = ext4_ext_map_blocks(handle, inode, map, flags &
-                                            EXT4_GET_BLOCKS_KEEP_SIZE);
+               retval = ext4_ext_map_blocks(handle, inode, map, 0);
        } else {
-               retval = ext4_ind_map_blocks(handle, inode, map, flags &
-                                            EXT4_GET_BLOCKS_KEEP_SIZE);
+               retval = ext4_ind_map_blocks(handle, inode, map, 0);
        }
        up_read((&EXT4_I(inode)->i_data_sem));
 
@@ -493,9 +501,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 #endif
 
        map->m_flags = 0;
-       ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
-                 "logical block %lu\n", inode->i_ino, flags, map->m_len,
-                 (unsigned long) map->m_lblk);
+       ext_debug(inode, "flag 0x%x, max_blocks %u, logical block %lu\n",
+                 flags, map->m_len, (unsigned long) map->m_lblk);
 
        /*
         * ext4_map_blocks returns an int, and m_len is an unsigned int
@@ -541,11 +548,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         */
        down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-               retval = ext4_ext_map_blocks(handle, inode, map, flags &
-                                            EXT4_GET_BLOCKS_KEEP_SIZE);
+               retval = ext4_ext_map_blocks(handle, inode, map, 0);
        } else {
-               retval = ext4_ind_map_blocks(handle, inode, map, flags &
-                                            EXT4_GET_BLOCKS_KEEP_SIZE);
+               retval = ext4_ind_map_blocks(handle, inode, map, 0);
        }
        if (retval > 0) {
                unsigned int status;
@@ -726,6 +731,9 @@ out_sem:
                                return ret;
                }
        }
+
+       if (retval < 0)
+               ext_debug(inode, "failed with err %d\n", retval);
        return retval;
 }
 
@@ -1296,7 +1304,7 @@ static int ext4_write_end(struct file *file,
         * filesystems.
         */
        if (i_size_changed || inline_data)
-               ext4_mark_inode_dirty(handle, inode);
+               ret = ext4_mark_inode_dirty(handle, inode);
 
        if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
                /* if we have allocated more blocks and copied
@@ -1526,6 +1534,7 @@ struct mpage_da_data {
        struct ext4_map_blocks map;
        struct ext4_io_submit io_submit;        /* IO submission data */
        unsigned int do_map:1;
+       unsigned int scanned_until_end:1;
 };
 
 static void mpage_release_unused_pages(struct mpage_da_data *mpd,
@@ -1541,6 +1550,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
        if (mpd->first_page >= mpd->next_page)
                return;
 
+       mpd->scanned_until_end = 0;
        index = mpd->first_page;
        end   = mpd->next_page - 1;
        if (invalidate) {
@@ -1681,8 +1691,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                invalid_block = ~0;
 
        map->m_flags = 0;
-       ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
-                 "logical block %lu\n", inode->i_ino, map->m_len,
+       ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len,
                  (unsigned long) map->m_lblk);
 
        /* Lookup extent status tree firstly */
@@ -2078,7 +2087,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
        return err;
 }
 
-#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
+#define BH_FLAGS (BIT(BH_Unwritten) | BIT(BH_Delay))
 
 /*
  * mballoc gives us at most this number of blocks...
@@ -2188,7 +2197,11 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
                if (err < 0)
                        return err;
        }
-       return lblk < blocks;
+       if (lblk >= blocks) {
+               mpd->scanned_until_end = 1;
+               return 0;
+       }
+       return 1;
 }
 
 /*
@@ -2311,7 +2324,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
                         * mapping, or maybe the page was submitted for IO.
                         * So we return to call further extent mapping.
                         */
-                       if (err < 0 || map_bh == true)
+                       if (err < 0 || map_bh)
                                goto out;
                        /* Page fully mapped - let IO run! */
                        err = mpage_submit_page(mpd, page);
@@ -2358,7 +2371,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
        dioread_nolock = ext4_should_dioread_nolock(inode);
        if (dioread_nolock)
                get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
-       if (map->m_flags & (1 << BH_Delay))
+       if (map->m_flags & BIT(BH_Delay))
                get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
 
        err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
@@ -2546,7 +2559,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
                nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
                                tag);
                if (nr_pages == 0)
-                       goto out;
+                       break;
 
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
@@ -2601,6 +2614,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
                pagevec_release(&pvec);
                cond_resched();
        }
+       mpd->scanned_until_end = 1;
        return 0;
 out:
        pagevec_release(&pvec);
@@ -2619,7 +2633,6 @@ static int ext4_writepages(struct address_space *mapping,
        struct inode *inode = mapping->host;
        int needed_blocks, rsv_blocks = 0, ret = 0;
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
-       bool done;
        struct blk_plug plug;
        bool give_up_on_write = false;
 
@@ -2705,7 +2718,6 @@ static int ext4_writepages(struct address_space *mapping,
 retry:
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
-       done = false;
        blk_start_plug(&plug);
 
        /*
@@ -2715,6 +2727,7 @@ retry:
         * started.
         */
        mpd.do_map = 0;
+       mpd.scanned_until_end = 0;
        mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
        if (!mpd.io_submit.io_end) {
                ret = -ENOMEM;
@@ -2730,7 +2743,7 @@ retry:
        if (ret < 0)
                goto unplug;
 
-       while (!done && mpd.first_page <= mpd.last_page) {
+       while (!mpd.scanned_until_end && wbc->nr_to_write > 0) {
                /* For each extent of pages we use new io_end */
                mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
                if (!mpd.io_submit.io_end) {
@@ -2765,20 +2778,9 @@ retry:
 
                trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
                ret = mpage_prepare_extent_to_map(&mpd);
-               if (!ret) {
-                       if (mpd.map.m_len)
-                               ret = mpage_map_and_submit_extent(handle, &mpd,
+               if (!ret && mpd.map.m_len)
+                       ret = mpage_map_and_submit_extent(handle, &mpd,
                                        &give_up_on_write);
-                       else {
-                               /*
-                                * We scanned the whole range (or exhausted
-                                * nr_to_write), submitted what was mapped and
-                                * didn't find anything needing mapping. We are
-                                * done.
-                                */
-                               done = true;
-                       }
-               }
                /*
                 * Caution: If the handle is synchronous,
                 * ext4_journal_stop() can wait for transaction commit
@@ -3077,7 +3079,7 @@ static int ext4_da_write_end(struct file *file,
                         * new_i_size is less that inode->i_size
                         * bu greater than i_disksize.(hint delalloc)
                         */
-                       ext4_mark_inode_dirty(handle, inode);
+                       ret = ext4_mark_inode_dirty(handle, inode);
                }
        }
 
@@ -3094,7 +3096,7 @@ static int ext4_da_write_end(struct file *file,
        if (ret2 < 0)
                ret = ret2;
        ret2 = ext4_journal_stop(handle);
-       if (!ret)
+       if (unlikely(ret2 && !ret))
                ret = ret2;
 
        return ret ? ret : copied;
@@ -3886,6 +3888,8 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
                                      loff_t len)
 {
        handle_t *handle;
+       int ret;
+
        loff_t size = i_size_read(inode);
 
        WARN_ON(!inode_is_locked(inode));
@@ -3899,10 +3903,10 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ext4_update_i_disksize(inode, size);
-       ext4_mark_inode_dirty(handle, inode);
+       ret = ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
 
-       return 0;
+       return ret;
 }
 
 static void ext4_wait_dax_page(struct ext4_inode_info *ei)
@@ -3954,7 +3958,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        loff_t first_block_offset, last_block_offset;
        handle_t *handle;
        unsigned int credits;
-       int ret = 0;
+       int ret = 0, ret2 = 0;
 
        trace_ext4_punch_hole(inode, offset, length, 0);
 
@@ -4077,7 +4081,9 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                ext4_handle_sync(handle);
 
        inode->i_mtime = inode->i_ctime = current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
+       ret2 = ext4_mark_inode_dirty(handle, inode);
+       if (unlikely(ret2))
+               ret = ret2;
        if (ret >= 0)
                ext4_update_inode_fsync_trans(handle, inode, 1);
 out_stop:
@@ -4146,7 +4152,7 @@ int ext4_truncate(struct inode *inode)
 {
        struct ext4_inode_info *ei = EXT4_I(inode);
        unsigned int credits;
-       int err = 0;
+       int err = 0, err2;
        handle_t *handle;
        struct address_space *mapping = inode->i_mapping;
 
@@ -4234,7 +4240,9 @@ out_stop:
                ext4_orphan_del(handle, inode);
 
        inode->i_mtime = inode->i_ctime = current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
+       err2 = ext4_mark_inode_dirty(handle, inode);
+       if (unlikely(err2 && !err))
+               err = err2;
        ext4_journal_stop(handle);
 
        trace_ext4_truncate_exit(inode);
@@ -5306,6 +5314,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        inode->i_gid = attr->ia_gid;
                error = ext4_mark_inode_dirty(handle, inode);
                ext4_journal_stop(handle);
+               if (unlikely(error))
+                       return error;
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
@@ -5791,7 +5801,8 @@ out_unlock:
  * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
  * we start and wait on commits.
  */
-int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
+int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode,
+                               const char *func, unsigned int line)
 {
        struct ext4_iloc iloc;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5801,13 +5812,18 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
        trace_ext4_mark_inode_dirty(inode, _RET_IP_);
        err = ext4_reserve_inode_write(handle, inode, &iloc);
        if (err)
-               return err;
+               goto out;
 
        if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize)
                ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize,
                                               iloc, handle);
 
-       return ext4_mark_iloc_dirty(handle, inode, &iloc);
+       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+out:
+       if (unlikely(err))
+               ext4_error_inode_err(inode, func, line, 0, err,
+                                       "mark_inode_dirty error");
+       return err;
 }
 
 /*
index 1b520d07d37142f188cc3197d7330e89ef29a9ae..999cf6add39c62de85c8accfd60ef515d0fe92bf 100644 (file)
@@ -776,29 +776,6 @@ static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa)
                fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
 }
 
-/* copied from fs/ioctl.c */
-static int fiemap_check_ranges(struct super_block *sb,
-                              u64 start, u64 len, u64 *new_len)
-{
-       u64 maxbytes = (u64) sb->s_maxbytes;
-
-       *new_len = len;
-
-       if (len == 0)
-               return -EINVAL;
-
-       if (start > maxbytes)
-               return -EFBIG;
-
-       /*
-        * Shrink request scope to what the fs can actually handle.
-        */
-       if (len > maxbytes || (maxbytes - len) < start)
-               *new_len = maxbytes - start;
-
-       return 0;
-}
-
 /* So that the fiemap access checks can't overflow on 32 bit machines. */
 #define FIEMAP_MAX_EXTENTS     (UINT_MAX / sizeof(struct fiemap_extent))
 
@@ -808,8 +785,6 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg)
        struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
        struct fiemap_extent_info fieinfo = { 0, };
        struct inode *inode = file_inode(filp);
-       struct super_block *sb = inode->i_sb;
-       u64 len;
        int error;
 
        if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
@@ -818,24 +793,12 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg)
        if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
                return -EINVAL;
 
-       error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
-                                   &len);
-       if (error)
-               return error;
-
        fieinfo.fi_flags = fiemap.fm_flags;
        fieinfo.fi_extents_max = fiemap.fm_extent_count;
        fieinfo.fi_extents_start = ufiemap->fm_extents;
 
-       if (fiemap.fm_extent_count != 0 &&
-           !access_ok(fieinfo.fi_extents_start,
-                      fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
-               return -EFAULT;
-
-       if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
-               filemap_write_and_wait(inode->i_mapping);
-
-       error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start, len);
+       error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start,
+                       fiemap.fm_length);
        fiemap.fm_flags = fieinfo.fi_flags;
        fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
        if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
index 30d5d97548c42117decfcca690f70b9b48dc490b..a9083113a8c0f47d7d4340da5d21fd4fa98073e6 100644 (file)
 #include <linux/backing-dev.h>
 #include <trace/events/ext4.h>
 
-#ifdef CONFIG_EXT4_DEBUG
-ushort ext4_mballoc_debug __read_mostly;
-
-module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
-MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
-#endif
-
 /*
  * MUSTDO:
  *   - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -356,6 +349,36 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                                        ext4_group_t group);
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
                                                ext4_group_t group);
+static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
+
+/*
+ * The algorithm using this percpu seq counter goes below:
+ * 1. We sample the percpu discard_pa_seq counter before trying for block
+ *    allocation in ext4_mb_new_blocks().
+ * 2. We increment this percpu discard_pa_seq counter when we either allocate
+ *    or free these blocks i.e. while marking those blocks as used/free in
+ *    mb_mark_used()/mb_free_blocks().
+ * 3. We also increment this percpu seq counter when we successfully identify
+ *    that the bb_prealloc_list is not empty and hence proceed for discarding
+ *    of those PAs inside ext4_mb_discard_group_preallocations().
+ *
+ * Now to make sure that the regular fast path of block allocation is not
+ * affected, as a small optimization we only sample the percpu seq counter
+ * on that cpu. Only when the block allocation fails and when freed blocks
+ * found were 0, that is when we sample percpu seq counter for all cpus using
+ * below function ext4_get_discard_pa_seq_sum(). This happens after making
+ * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty.
+ */
+static DEFINE_PER_CPU(u64, discard_pa_seq);
+static inline u64 ext4_get_discard_pa_seq_sum(void)
+{
+       int __cpu;
+       u64 __seq = 0;
+
+       for_each_possible_cpu(__cpu)
+               __seq += per_cpu(discard_pa_seq, __cpu);
+       return __seq;
+}
 
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
@@ -493,6 +516,8 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
 
 static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
 {
+       if (unlikely(e4b->bd_info->bb_bitmap == NULL))
+               return;
        if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
                unsigned char *b1, *b2;
                int i;
@@ -511,6 +536,31 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
        }
 }
 
+static void mb_group_bb_bitmap_alloc(struct super_block *sb,
+                       struct ext4_group_info *grp, ext4_group_t group)
+{
+       struct buffer_head *bh;
+
+       grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
+       if (!grp->bb_bitmap)
+               return;
+
+       bh = ext4_read_block_bitmap(sb, group);
+       if (IS_ERR_OR_NULL(bh)) {
+               kfree(grp->bb_bitmap);
+               grp->bb_bitmap = NULL;
+               return;
+       }
+
+       memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize);
+       put_bh(bh);
+}
+
+static void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
+{
+       kfree(grp->bb_bitmap);
+}
+
 #else
 static inline void mb_free_blocks_double(struct inode *inode,
                                struct ext4_buddy *e4b, int first, int count)
@@ -526,6 +576,17 @@ static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
 {
        return;
 }
+
+static inline void mb_group_bb_bitmap_alloc(struct super_block *sb,
+                       struct ext4_group_info *grp, ext4_group_t group)
+{
+       return;
+}
+
+static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
+{
+       return;
+}
 #endif
 
 #ifdef AGGRESSIVE_CHECK
@@ -820,14 +881,14 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
        char *bitmap;
        struct ext4_group_info *grinfo;
 
-       mb_debug(1, "init page %lu\n", page->index);
-
        inode = page->mapping->host;
        sb = inode->i_sb;
        ngroups = ext4_get_groups_count(sb);
        blocksize = i_blocksize(inode);
        blocks_per_page = PAGE_SIZE / blocksize;
 
+       mb_debug(sb, "init page %lu\n", page->index);
+
        groups_per_page = blocks_per_page >> 1;
        if (groups_per_page == 0)
                groups_per_page = 1;
@@ -867,7 +928,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
                        bh[i] = NULL;
                        goto out;
                }
-               mb_debug(1, "read bitmap for group %u\n", group);
+               mb_debug(sb, "read bitmap for group %u\n", group);
        }
 
        /* wait for I/O completion */
@@ -912,7 +973,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
                if ((first_block + i) & 1) {
                        /* this is block of buddy */
                        BUG_ON(incore == NULL);
-                       mb_debug(1, "put buddy for group %u in page %lu/%x\n",
+                       mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
                                group, page->index, i * blocksize);
                        trace_ext4_mb_buddy_bitmap_load(sb, group);
                        grinfo = ext4_get_group_info(sb, group);
@@ -932,7 +993,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
                } else {
                        /* this is block of bitmap */
                        BUG_ON(incore != NULL);
-                       mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
+                       mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
                                group, page->index, i * blocksize);
                        trace_ext4_mb_bitmap_load(sb, group);
 
@@ -1038,7 +1099,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
        int ret = 0;
 
        might_sleep();
-       mb_debug(1, "init group %u\n", group);
+       mb_debug(sb, "init group %u\n", group);
        this_grp = ext4_get_group_info(sb, group);
        /*
         * This ensures that we don't reinit the buddy cache
@@ -1110,7 +1171,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
        struct inode *inode = sbi->s_buddy_cache;
 
        might_sleep();
-       mb_debug(1, "load group %u\n", group);
+       mb_debug(sb, "load group %u\n", group);
 
        blocks_per_page = PAGE_SIZE / sb->s_blocksize;
        grp = ext4_get_group_info(sb, group);
@@ -1430,6 +1491,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
        mb_check_buddy(e4b);
        mb_free_blocks_double(inode, e4b, first, count);
 
+       this_cpu_inc(discard_pa_seq);
        e4b->bd_info->bb_free += count;
        if (first < e4b->bd_info->bb_first_free)
                e4b->bd_info->bb_first_free = first;
@@ -1571,6 +1633,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
        mb_check_buddy(e4b);
        mb_mark_used_double(e4b, start, len);
 
+       this_cpu_inc(discard_pa_seq);
        e4b->bd_info->bb_free -= len;
        if (e4b->bd_info->bb_first_free == start)
                e4b->bd_info->bb_first_free += len;
@@ -1670,6 +1733,14 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
                sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
                spin_unlock(&sbi->s_md_lock);
        }
+       /*
+        * As we've just preallocated more space than
+        * user requested originally, we store allocated
+        * space in a special descriptor.
+        */
+       if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
+               ext4_mb_new_preallocation(ac);
+
 }
 
 /*
@@ -1918,7 +1989,7 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
 
                ext4_mb_use_best_found(ac, e4b);
 
-               BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
+               BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len);
 
                if (EXT4_SB(sb)->s_mb_stats)
                        atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
@@ -2035,15 +2106,14 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
 }
 
 /*
- * This is now called BEFORE we load the buddy bitmap.
+ * This is also called BEFORE we load the buddy bitmap.
  * Returns either 1 or 0 indicating that the group is either suitable
- * for the allocation or not. In addition it can also return negative
- * error code when something goes wrong.
+ * for the allocation or not.
  */
-static int ext4_mb_good_group(struct ext4_allocation_context *ac,
+static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
                                ext4_group_t group, int cr)
 {
-       unsigned free, fragments;
+       ext4_grpblk_t free, fragments;
        int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
        struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
 
@@ -2051,23 +2121,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 
        free = grp->bb_free;
        if (free == 0)
-               return 0;
+               return false;
        if (cr <= 2 && free < ac->ac_g_ex.fe_len)
-               return 0;
+               return false;
 
        if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
-               return 0;
-
-       /* We only do this if the grp has never been initialized */
-       if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-               int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
-               if (ret)
-                       return ret;
-       }
+               return false;
 
        fragments = grp->bb_fragments;
        if (fragments == 0)
-               return 0;
+               return false;
 
        switch (cr) {
        case 0:
@@ -2077,38 +2140,80 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
                if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
                    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
                    ((group % flex_size) == 0))
-                       return 0;
+                       return false;
 
                if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
                    (free / fragments) >= ac->ac_g_ex.fe_len)
-                       return 1;
+                       return true;
 
                if (grp->bb_largest_free_order < ac->ac_2order)
-                       return 0;
+                       return false;
 
-               return 1;
+               return true;
        case 1:
                if ((free / fragments) >= ac->ac_g_ex.fe_len)
-                       return 1;
+                       return true;
                break;
        case 2:
                if (free >= ac->ac_g_ex.fe_len)
-                       return 1;
+                       return true;
                break;
        case 3:
-               return 1;
+               return true;
        default:
                BUG();
        }
 
-       return 0;
+       return false;
+}
+
+/*
+ * This could return negative error code if something goes wrong
+ * during ext4_mb_init_group(). This should not be called with
+ * ext4_lock_group() held.
+ */
+static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
+                                    ext4_group_t group, int cr)
+{
+       struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
+       struct super_block *sb = ac->ac_sb;
+       bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
+       ext4_grpblk_t free;
+       int ret = 0;
+
+       if (should_lock)
+               ext4_lock_group(sb, group);
+       free = grp->bb_free;
+       if (free == 0)
+               goto out;
+       if (cr <= 2 && free < ac->ac_g_ex.fe_len)
+               goto out;
+       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+               goto out;
+       if (should_lock)
+               ext4_unlock_group(sb, group);
+
+       /* We only do this if the grp has never been initialized */
+       if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+               ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
+               if (ret)
+                       return ret;
+       }
+
+       if (should_lock)
+               ext4_lock_group(sb, group);
+       ret = ext4_mb_good_group(ac, group, cr);
+out:
+       if (should_lock)
+               ext4_unlock_group(sb, group);
+       return ret;
 }
 
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
        ext4_group_t ngroups, group, i;
-       int cr;
+       int cr = -1;
        int err = 0, first_err = 0;
        struct ext4_sb_info *sbi;
        struct super_block *sb;
@@ -2189,7 +2294,7 @@ repeat:
                                group = 0;
 
                        /* This now checks without needing the buddy page */
-                       ret = ext4_mb_good_group(ac, group, cr);
+                       ret = ext4_mb_good_group_nolock(ac, group, cr);
                        if (ret <= 0) {
                                if (!first_err)
                                        first_err = ret;
@@ -2207,11 +2312,9 @@ repeat:
                         * block group
                         */
                        ret = ext4_mb_good_group(ac, group, cr);
-                       if (ret <= 0) {
+                       if (ret == 0) {
                                ext4_unlock_group(sb, group);
                                ext4_mb_unload_buddy(&e4b);
-                               if (!first_err)
-                                       first_err = ret;
                                continue;
                        }
 
@@ -2260,6 +2363,10 @@ repeat:
 out:
        if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
                err = first_err;
+
+       mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
+                ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
+                ac->ac_flags, cr, err);
        return err;
 }
 
@@ -2452,20 +2559,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
        meta_group_info[i]->bb_free_root = RB_ROOT;
        meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
 
-#ifdef DOUBLE_CHECK
-       {
-               struct buffer_head *bh;
-               meta_group_info[i]->bb_bitmap =
-                       kmalloc(sb->s_blocksize, GFP_NOFS);
-               BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
-               bh = ext4_read_block_bitmap(sb, group);
-               BUG_ON(IS_ERR_OR_NULL(bh));
-               memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
-                       sb->s_blocksize);
-               put_bh(bh);
-       }
-#endif
-
+       mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
        return 0;
 
 exit_group_info:
@@ -2702,7 +2796,7 @@ out:
 }
 
 /* need to called with the ext4 group lock held */
-static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
+static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 {
        struct ext4_prealloc_space *pa;
        struct list_head *cur, *tmp;
@@ -2714,9 +2808,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
                count++;
                kmem_cache_free(ext4_pspace_cachep, pa);
        }
-       if (count)
-               mb_debug(1, "mballoc: %u PAs left\n", count);
-
+       return count;
 }
 
 int ext4_mb_release(struct super_block *sb)
@@ -2727,16 +2819,18 @@ int ext4_mb_release(struct super_block *sb)
        struct ext4_group_info *grinfo, ***group_info;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
+       int count;
 
        if (sbi->s_group_info) {
                for (i = 0; i < ngroups; i++) {
                        cond_resched();
                        grinfo = ext4_get_group_info(sb, i);
-#ifdef DOUBLE_CHECK
-                       kfree(grinfo->bb_bitmap);
-#endif
+                       mb_group_bb_bitmap_free(grinfo);
                        ext4_lock_group(sb, i);
-                       ext4_mb_cleanup_pa(grinfo);
+                       count = ext4_mb_cleanup_pa(grinfo);
+                       if (count)
+                               mb_debug(sb, "mballoc: %d PAs left\n",
+                                        count);
                        ext4_unlock_group(sb, i);
                        kmem_cache_free(cachep, grinfo);
                }
@@ -2809,7 +2903,7 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
        struct ext4_group_info *db;
        int err, count = 0, count2 = 0;
 
-       mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+       mb_debug(sb, "gonna free %u blocks in group %u (0x%p):",
                 entry->efd_count, entry->efd_group, entry);
 
        err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
@@ -2849,7 +2943,8 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
        kmem_cache_free(ext4_free_data_cachep, entry);
        ext4_mb_unload_buddy(&e4b);
 
-       mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+       mb_debug(sb, "freed %d blocks in %d structures\n", count,
+                count2);
 }
 
 /*
@@ -2909,23 +3004,26 @@ int __init ext4_init_mballoc(void)
        ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
                                        SLAB_RECLAIM_ACCOUNT);
        if (ext4_pspace_cachep == NULL)
-               return -ENOMEM;
+               goto out;
 
        ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
                                    SLAB_RECLAIM_ACCOUNT);
-       if (ext4_ac_cachep == NULL) {
-               kmem_cache_destroy(ext4_pspace_cachep);
-               return -ENOMEM;
-       }
+       if (ext4_ac_cachep == NULL)
+               goto out_pa_free;
 
        ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
                                           SLAB_RECLAIM_ACCOUNT);
-       if (ext4_free_data_cachep == NULL) {
-               kmem_cache_destroy(ext4_pspace_cachep);
-               kmem_cache_destroy(ext4_ac_cachep);
-               return -ENOMEM;
-       }
+       if (ext4_free_data_cachep == NULL)
+               goto out_ac_free;
+
        return 0;
+
+out_ac_free:
+       kmem_cache_destroy(ext4_ac_cachep);
+out_pa_free:
+       kmem_cache_destroy(ext4_pspace_cachep);
+out:
+       return -ENOMEM;
 }
 
 void ext4_exit_mballoc(void)
@@ -3077,8 +3175,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 
        BUG_ON(lg == NULL);
        ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
-       mb_debug(1, "#%u: goal %u blocks for locality group\n",
-               current->pid, ac->ac_g_ex.fe_len);
+       mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len);
 }
 
 /*
@@ -3276,8 +3373,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
        }
 
-       mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
-               (unsigned) orig_size, (unsigned) start);
+       mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size,
+                orig_size, start);
 }
 
 static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
@@ -3366,7 +3463,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
        BUG_ON(pa->pa_free < len);
        pa->pa_free -= len;
 
-       mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
+       mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
 }
 
 /*
@@ -3390,7 +3487,8 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
         * in on-disk bitmap -- see ext4_mb_release_context()
         * Other CPUs are prevented from allocating from this pa by lg_mutex
         */
-       mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
+       mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n",
+                pa->pa_lstart-len, len, pa);
 }
 
 /*
@@ -3425,7 +3523,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
 /*
  * search goal blocks in preallocated space
  */
-static noinline_for_stack int
+static noinline_for_stack bool
 ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
        struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
@@ -3437,7 +3535,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
-               return 0;
+               return false;
 
        /* first, try per-file preallocation */
        rcu_read_lock();
@@ -3464,7 +3562,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                        spin_unlock(&pa->pa_lock);
                        ac->ac_criteria = 10;
                        rcu_read_unlock();
-                       return 1;
+                       return true;
                }
                spin_unlock(&pa->pa_lock);
        }
@@ -3472,12 +3570,12 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 
        /* can we use group allocation? */
        if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
-               return 0;
+               return false;
 
        /* inode may have no locality group for some reason */
        lg = ac->ac_lg;
        if (lg == NULL)
-               return 0;
+               return false;
        order  = fls(ac->ac_o_ex.fe_len) - 1;
        if (order > PREALLOC_TB_SIZE - 1)
                /* The max size of hash table is PREALLOC_TB_SIZE */
@@ -3506,9 +3604,9 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        if (cpa) {
                ext4_mb_use_group_pa(ac, cpa);
                ac->ac_criteria = 20;
-               return 1;
+               return true;
        }
-       return 0;
+       return false;
 }
 
 /*
@@ -3573,7 +3671,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                ext4_set_bits(bitmap, start, len);
                preallocated += len;
        }
-       mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
+       mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
 }
 
 static void ext4_mb_pa_callback(struct rcu_head *head)
@@ -3649,7 +3747,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 /*
  * creates new preallocated space for given inode
  */
-static noinline_for_stack int
+static noinline_for_stack void
 ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
@@ -3662,10 +3760,9 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
        BUG_ON(ac->ac_status != AC_STATUS_FOUND);
        BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
+       BUG_ON(ac->ac_pa == NULL);
 
-       pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
-       if (pa == NULL)
-               return -ENOMEM;
+       pa = ac->ac_pa;
 
        if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
                int winl;
@@ -3709,15 +3806,14 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
        pa->pa_len = ac->ac_b_ex.fe_len;
        pa->pa_free = pa->pa_len;
-       atomic_set(&pa->pa_count, 1);
        spin_lock_init(&pa->pa_lock);
        INIT_LIST_HEAD(&pa->pa_inode_list);
        INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
        pa->pa_type = MB_INODE_PA;
 
-       mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
-                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+       mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
+                pa->pa_len, pa->pa_lstart);
        trace_ext4_mb_new_inode_pa(ac, pa);
 
        ext4_mb_use_inode_pa(ac, pa);
@@ -3729,21 +3825,17 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        pa->pa_obj_lock = &ei->i_prealloc_lock;
        pa->pa_inode = ac->ac_inode;
 
-       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
        list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 
        spin_lock(pa->pa_obj_lock);
        list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
        spin_unlock(pa->pa_obj_lock);
-
-       return 0;
 }
 
 /*
  * creates new preallocated space for locality group inodes belongs to
  */
-static noinline_for_stack int
+static noinline_for_stack void
 ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
@@ -3755,11 +3847,9 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
        BUG_ON(ac->ac_status != AC_STATUS_FOUND);
        BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
+       BUG_ON(ac->ac_pa == NULL);
 
-       BUG_ON(ext4_pspace_cachep == NULL);
-       pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
-       if (pa == NULL)
-               return -ENOMEM;
+       pa = ac->ac_pa;
 
        /* preallocation can change ac_b_ex, thus we store actually
         * allocated blocks for history */
@@ -3769,15 +3859,14 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        pa->pa_lstart = pa->pa_pstart;
        pa->pa_len = ac->ac_b_ex.fe_len;
        pa->pa_free = pa->pa_len;
-       atomic_set(&pa->pa_count, 1);
        spin_lock_init(&pa->pa_lock);
        INIT_LIST_HEAD(&pa->pa_inode_list);
        INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
        pa->pa_type = MB_GROUP_PA;
 
-       mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
-                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+       mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
+                pa->pa_len, pa->pa_lstart);
        trace_ext4_mb_new_group_pa(ac, pa);
 
        ext4_mb_use_group_pa(ac, pa);
@@ -3790,26 +3879,20 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        pa->pa_obj_lock = &lg->lg_prealloc_lock;
        pa->pa_inode = NULL;
 
-       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
        list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 
        /*
         * We will later add the new pa to the right bucket
         * after updating the pa_free in ext4_mb_release_context
         */
-       return 0;
 }
 
-static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
+static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
 {
-       int err;
-
        if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
-               err = ext4_mb_new_group_pa(ac);
+               ext4_mb_new_group_pa(ac);
        else
-               err = ext4_mb_new_inode_pa(ac);
-       return err;
+               ext4_mb_new_inode_pa(ac);
 }
 
 /*
@@ -3844,7 +3927,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                if (bit >= end)
                        break;
                next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
-               mb_debug(1, "    free preallocated %u/%u in group %u\n",
+               mb_debug(sb, "free preallocated %u/%u in group %u\n",
                         (unsigned) ext4_group_first_block_no(sb, group) + bit,
                         (unsigned) next - bit, (unsigned) group);
                free += next - bit;
@@ -3858,10 +3941,10 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
        }
        if (free != pa->pa_free) {
                ext4_msg(e4b->bd_sb, KERN_CRIT,
-                        "pa %p: logic %lu, phys. %lu, len %lu",
+                        "pa %p: logic %lu, phys. %lu, len %d",
                         pa, (unsigned long) pa->pa_lstart,
                         (unsigned long) pa->pa_pstart,
-                        (unsigned long) pa->pa_len);
+                        pa->pa_len);
                ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
                                        free, pa->pa_free);
                /*
@@ -3915,10 +3998,9 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
        int busy = 0;
        int free = 0;
 
-       mb_debug(1, "discard preallocation for group %u\n", group);
-
+       mb_debug(sb, "discard preallocation for group %u\n", group);
        if (list_empty(&grp->bb_prealloc_list))
-               return 0;
+               goto out_dbg;
 
        bitmap_bh = ext4_read_block_bitmap(sb, group);
        if (IS_ERR(bitmap_bh)) {
@@ -3926,7 +4008,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
                ext4_error_err(sb, -err,
                               "Error %d reading block bitmap for %u",
                               err, group);
-               return 0;
+               goto out_dbg;
        }
 
        err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -3934,7 +4016,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
                ext4_warning(sb, "Error %d loading buddy information for %u",
                             err, group);
                put_bh(bitmap_bh);
-               return 0;
+               goto out_dbg;
        }
 
        if (needed == 0)
@@ -3943,6 +4025,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
        INIT_LIST_HEAD(&list);
 repeat:
        ext4_lock_group(sb, group);
+       this_cpu_inc(discard_pa_seq);
        list_for_each_entry_safe(pa, tmp,
                                &grp->bb_prealloc_list, pa_group_list) {
                spin_lock(&pa->pa_lock);
@@ -3979,6 +4062,8 @@ repeat:
        /* found anything to free? */
        if (list_empty(&list)) {
                BUG_ON(free != 0);
+               mb_debug(sb, "Someone else may have freed PA for this group %u\n",
+                        group);
                goto out;
        }
 
@@ -4003,6 +4088,9 @@ out:
        ext4_unlock_group(sb, group);
        ext4_mb_unload_buddy(&e4b);
        put_bh(bitmap_bh);
+out_dbg:
+       mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
+                free, group, grp->bb_free);
        return free;
 }
 
@@ -4031,7 +4119,8 @@ void ext4_discard_preallocations(struct inode *inode)
                return;
        }
 
-       mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
+       mb_debug(sb, "discard preallocation for inode %lu\n",
+                inode->i_ino);
        trace_ext4_discard_preallocations(inode);
 
        INIT_LIST_HEAD(&list);
@@ -4119,22 +4208,74 @@ repeat:
        }
 }
 
+static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac)
+{
+       struct ext4_prealloc_space *pa;
+
+       BUG_ON(ext4_pspace_cachep == NULL);
+       pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS);
+       if (!pa)
+               return -ENOMEM;
+       atomic_set(&pa->pa_count, 1);
+       ac->ac_pa = pa;
+       return 0;
+}
+
+static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
+{
+       struct ext4_prealloc_space *pa = ac->ac_pa;
+
+       BUG_ON(!pa);
+       ac->ac_pa = NULL;
+       WARN_ON(!atomic_dec_and_test(&pa->pa_count));
+       kmem_cache_free(ext4_pspace_cachep, pa);
+}
+
 #ifdef CONFIG_EXT4_DEBUG
+static inline void ext4_mb_show_pa(struct super_block *sb)
+{
+       ext4_group_t i, ngroups;
+
+       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+               return;
+
+       ngroups = ext4_get_groups_count(sb);
+       mb_debug(sb, "groups: ");
+       for (i = 0; i < ngroups; i++) {
+               struct ext4_group_info *grp = ext4_get_group_info(sb, i);
+               struct ext4_prealloc_space *pa;
+               ext4_grpblk_t start;
+               struct list_head *cur;
+               ext4_lock_group(sb, i);
+               list_for_each(cur, &grp->bb_prealloc_list) {
+                       pa = list_entry(cur, struct ext4_prealloc_space,
+                                       pa_group_list);
+                       spin_lock(&pa->pa_lock);
+                       ext4_get_group_no_and_offset(sb, pa->pa_pstart,
+                                                    NULL, &start);
+                       spin_unlock(&pa->pa_lock);
+                       mb_debug(sb, "PA:%u:%d:%d\n", i, start,
+                                pa->pa_len);
+               }
+               ext4_unlock_group(sb, i);
+               mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free,
+                        grp->bb_fragments);
+       }
+}
+
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
-       ext4_group_t ngroups, i;
 
-       if (!ext4_mballoc_debug ||
-           (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
+       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
                return;
 
-       ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
+       mb_debug(sb, "Can't allocate:"
                        " Allocation context details:");
-       ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
+       mb_debug(sb, "status %u flags 0x%x",
                        ac->ac_status, ac->ac_flags);
-       ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
-                       "goal %lu/%lu/%lu@%lu, "
+       mb_debug(sb, "orig %lu/%lu/%lu@%lu, "
+                       "goal %lu/%lu/%lu@%lu, "
                        "best %lu/%lu/%lu@%lu cr %d",
                        (unsigned long)ac->ac_o_ex.fe_group,
                        (unsigned long)ac->ac_o_ex.fe_start,
@@ -4149,37 +4290,17 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
                        (unsigned long)ac->ac_b_ex.fe_len,
                        (unsigned long)ac->ac_b_ex.fe_logical,
                        (int)ac->ac_criteria);
-       ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
-       ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
-       ngroups = ext4_get_groups_count(sb);
-       for (i = 0; i < ngroups; i++) {
-               struct ext4_group_info *grp = ext4_get_group_info(sb, i);
-               struct ext4_prealloc_space *pa;
-               ext4_grpblk_t start;
-               struct list_head *cur;
-               ext4_lock_group(sb, i);
-               list_for_each(cur, &grp->bb_prealloc_list) {
-                       pa = list_entry(cur, struct ext4_prealloc_space,
-                                       pa_group_list);
-                       spin_lock(&pa->pa_lock);
-                       ext4_get_group_no_and_offset(sb, pa->pa_pstart,
-                                                    NULL, &start);
-                       spin_unlock(&pa->pa_lock);
-                       printk(KERN_ERR "PA:%u:%d:%u \n", i,
-                              start, pa->pa_len);
-               }
-               ext4_unlock_group(sb, i);
-
-               if (grp->bb_free == 0)
-                       continue;
-               printk(KERN_ERR "%u: %d/%d \n",
-                      i, grp->bb_free, grp->bb_fragments);
-       }
-       printk(KERN_ERR "\n");
+       mb_debug(sb, "%u found", ac->ac_found);
+       ext4_mb_show_pa(sb);
 }
 #else
+static inline void ext4_mb_show_pa(struct super_block *sb)
+{
+       return;
+}
 static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
+       ext4_mb_show_pa(ac->ac_sb);
        return;
 }
 #endif
@@ -4282,7 +4403,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
         * locality group. this is a policy, actually */
        ext4_mb_group_or_file(ac);
 
-       mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
+       mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, "
                        "left: %u/%u, right %u/%u to %swritable\n",
                        (unsigned) ar->len, (unsigned) ar->logical,
                        (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
@@ -4303,7 +4424,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
        struct list_head discard_list;
        struct ext4_prealloc_space *pa, *tmp;
 
-       mb_debug(1, "discard locality group preallocation\n");
+       mb_debug(sb, "discard locality group preallocation\n");
 
        INIT_LIST_HEAD(&discard_list);
 
@@ -4486,6 +4607,30 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
        return freed;
 }
 
+static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
+                       struct ext4_allocation_context *ac, u64 *seq)
+{
+       int freed;
+       u64 seq_retry = 0;
+       bool ret = false;
+
+       freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
+       if (freed) {
+               ret = true;
+               goto out_dbg;
+       }
+       seq_retry = ext4_get_discard_pa_seq_sum();
+       if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
+               ac->ac_flags |= EXT4_MB_STRICT_CHECK;
+               *seq = seq_retry;
+               ret = true;
+       }
+
+out_dbg:
+       mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
+       return ret;
+}
+
 /*
  * Main entry point into mballoc to allocate blocks
  * it tries to use preallocation first, then falls back
@@ -4494,13 +4639,13 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                                struct ext4_allocation_request *ar, int *errp)
 {
-       int freed;
        struct ext4_allocation_context *ac = NULL;
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        ext4_fsblk_t block = 0;
        unsigned int inquota = 0;
        unsigned int reserv_clstrs = 0;
+       u64 seq;
 
        might_sleep();
        sb = ar->inode->i_sb;
@@ -4525,6 +4670,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                        ar->len = ar->len >> 1;
                }
                if (!ar->len) {
+                       ext4_mb_show_pa(sb);
                        *errp = -ENOSPC;
                        return 0;
                }
@@ -4562,26 +4708,32 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        }
 
        ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
+       seq = *this_cpu_ptr(&discard_pa_seq);
        if (!ext4_mb_use_preallocated(ac)) {
                ac->ac_op = EXT4_MB_HISTORY_ALLOC;
                ext4_mb_normalize_request(ac, ar);
+
+               *errp = ext4_mb_pa_alloc(ac);
+               if (*errp)
+                       goto errout;
 repeat:
                /* allocate space in core */
                *errp = ext4_mb_regular_allocator(ac);
-               if (*errp)
-                       goto discard_and_exit;
-
-               /* as we've just preallocated more space than
-                * user requested originally, we store allocated
-                * space in a special descriptor */
-               if (ac->ac_status == AC_STATUS_FOUND &&
-                   ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
-                       *errp = ext4_mb_new_preallocation(ac);
+               /*
+                * pa allocated above is added to grp->bb_prealloc_list only
+                * when we were able to allocate some block i.e. when
+                * ac->ac_status == AC_STATUS_FOUND.
+                * And error from above mean ac->ac_status != AC_STATUS_FOUND
+                * So we have to free this pa here itself.
+                */
                if (*errp) {
-               discard_and_exit:
+                       ext4_mb_pa_free(ac);
                        ext4_discard_allocated_blocks(ac);
                        goto errout;
                }
+               if (ac->ac_status == AC_STATUS_FOUND &&
+                       ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
+                       ext4_mb_pa_free(ac);
        }
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
                *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
@@ -4593,9 +4745,13 @@ repeat:
                        ar->len = ac->ac_b_ex.fe_len;
                }
        } else {
-               freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
-               if (freed)
+               if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
                        goto repeat;
+               /*
+                * If block allocation fails then the pa allocated above
+                * needs to be freed here itself.
+                */
+               ext4_mb_pa_free(ac);
                *errp = -ENOSPC;
        }
 
index 88c98f17e3d9eea879d25fd7e360a61297753c51..6b4d17c2935d6eca8e0cae5d69b8d283ce754175 100644 (file)
 #include "ext4.h"
 
 /*
+ * mb_debug() dynamic printk msgs could be used to debug mballoc code.
  */
 #ifdef CONFIG_EXT4_DEBUG
-extern ushort ext4_mballoc_debug;
-
-#define mb_debug(n, fmt, ...)                                          \
-do {                                                                   \
-       if ((n) <= ext4_mballoc_debug) {                                \
-               printk(KERN_DEBUG "(%s, %d): %s: " fmt,                 \
-                      __FILE__, __LINE__, __func__, ##__VA_ARGS__);    \
-       }                                                               \
-} while (0)
+#define mb_debug(sb, fmt, ...)                                         \
+       pr_debug("[%s/%d] EXT4-fs (%s): (%s, %d): %s: " fmt,            \
+               current->comm, task_pid_nr(current), sb->s_id,          \
+              __FILE__, __LINE__, __func__, ##__VA_ARGS__)
 #else
-#define mb_debug(n, fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
+#define mb_debug(sb, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #define EXT4_MB_HISTORY_ALLOC          1       /* allocation */
index fb6520f37135509791c35b782ba14dfe328cfa49..c5e3fc998211acfe9449e8be42ef2c6cd96542d7 100644 (file)
@@ -287,7 +287,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
                                                struct inode *tmp_inode)
 {
-       int retval;
+       int retval, retval2 = 0;
        __le32  i_data[3];
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
@@ -342,7 +342,9 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
         * i_blocks when freeing the indirect meta-data blocks
         */
        retval = free_ind_block(handle, inode, i_data);
-       ext4_mark_inode_dirty(handle, inode);
+       retval2 = ext4_mark_inode_dirty(handle, inode);
+       if (unlikely(retval2 && !retval))
+               retval = retval2;
 
 err_out:
        return retval;
@@ -601,7 +603,7 @@ int ext4_ind_migrate(struct inode *inode)
        ext4_lblk_t                     start, end;
        ext4_fsblk_t                    blk;
        handle_t                        *handle;
-       int                             ret;
+       int                             ret, ret2 = 0;
 
        if (!ext4_has_feature_extents(inode->i_sb) ||
            (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
@@ -655,7 +657,9 @@ int ext4_ind_migrate(struct inode *inode)
        memset(ei->i_data, 0, sizeof(ei->i_data));
        for (i = start; i <= end; i++)
                ei->i_data[i] = cpu_to_le32(blk++);
-       ext4_mark_inode_dirty(handle, inode);
+       ret2 = ext4_mark_inode_dirty(handle, inode);
+       if (unlikely(ret2 && !ret))
+               ret = ret2;
 errout:
        ext4_journal_stop(handle);
        up_write(&EXT4_I(inode)->i_data_sem);
index a8aca4772aaa72abf08d4c4788ef0680b42835be..56738b538ddf4c066df33a59a06020352313c749 100644 (file)
@@ -1993,7 +1993,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 {
        unsigned int    blocksize = dir->i_sb->s_blocksize;
        int             csum_size = 0;
-       int             err;
+       int             err, err2;
 
        if (ext4_has_metadata_csum(inode->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
@@ -2028,12 +2028,12 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
        dir->i_mtime = dir->i_ctime = current_time(dir);
        ext4_update_dx_flag(dir);
        inode_inc_iversion(dir);
-       ext4_mark_inode_dirty(handle, dir);
+       err2 = ext4_mark_inode_dirty(handle, dir);
        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
        err = ext4_handle_dirty_dirblock(handle, dir, bh);
        if (err)
                ext4_std_error(dir->i_sb, err);
-       return 0;
+       return err ? err : err2;
 }
 
 /*
@@ -2223,7 +2223,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                }
                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
                dx_fallback++;
-               ext4_mark_inode_dirty(handle, dir);
+               retval = ext4_mark_inode_dirty(handle, dir);
+               if (unlikely(retval))
+                       goto out;
        }
        blocks = dir->i_size >> sb->s_blocksize_bits;
        for (block = 0; block < blocks; block++) {
@@ -2576,12 +2578,12 @@ static int ext4_add_nondir(handle_t *handle,
        struct inode *inode = *inodep;
        int err = ext4_add_entry(handle, dentry, inode);
        if (!err) {
-               ext4_mark_inode_dirty(handle, inode);
+               err = ext4_mark_inode_dirty(handle, inode);
                if (IS_DIRSYNC(dir))
                        ext4_handle_sync(handle);
                d_instantiate_new(dentry, inode);
                *inodep = NULL;
-               return 0;
+               return err;
        }
        drop_nlink(inode);
        ext4_orphan_add(handle, inode);
@@ -2775,7 +2777,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        handle_t *handle;
        struct inode *inode;
-       int err, credits, retries = 0;
+       int err, err2 = 0, credits, retries = 0;
 
        if (EXT4_DIR_LINK_MAX(dir))
                return -EMLINK;
@@ -2808,7 +2810,9 @@ out_clear_inode:
                clear_nlink(inode);
                ext4_orphan_add(handle, inode);
                unlock_new_inode(inode);
-               ext4_mark_inode_dirty(handle, inode);
+               err2 = ext4_mark_inode_dirty(handle, inode);
+               if (unlikely(err2))
+                       err = err2;
                ext4_journal_stop(handle);
                iput(inode);
                goto out_retry;
@@ -3148,10 +3152,12 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
        inode->i_size = 0;
        ext4_orphan_add(handle, inode);
        inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
+       retval = ext4_mark_inode_dirty(handle, inode);
+       if (retval)
+               goto end_rmdir;
        ext4_dec_count(handle, dir);
        ext4_update_dx_flag(dir);
-       ext4_mark_inode_dirty(handle, dir);
+       retval = ext4_mark_inode_dirty(handle, dir);
 
 #ifdef CONFIG_UNICODE
        /* VFS negative dentries are incompatible with Encoding and
@@ -3221,7 +3227,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
                goto end_unlink;
        dir->i_ctime = dir->i_mtime = current_time(dir);
        ext4_update_dx_flag(dir);
-       ext4_mark_inode_dirty(handle, dir);
+       retval = ext4_mark_inode_dirty(handle, dir);
+       if (retval)
+               goto end_unlink;
        if (inode->i_nlink == 0)
                ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
                                   dentry->d_name.len, dentry->d_name.name);
@@ -3230,7 +3238,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        if (!inode->i_nlink)
                ext4_orphan_add(handle, inode);
        inode->i_ctime = current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
+       retval = ext4_mark_inode_dirty(handle, inode);
 
 #ifdef CONFIG_UNICODE
        /* VFS negative dentries are incompatible with Encoding and
@@ -3419,7 +3427,7 @@ retry:
 
        err = ext4_add_entry(handle, dentry, inode);
        if (!err) {
-               ext4_mark_inode_dirty(handle, inode);
+               err = ext4_mark_inode_dirty(handle, inode);
                /* this can happen only for tmpfile being
                 * linked the first time
                 */
@@ -3531,7 +3539,7 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
 static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
                       unsigned ino, unsigned file_type)
 {
-       int retval;
+       int retval, retval2;
 
        BUFFER_TRACE(ent->bh, "get write access");
        retval = ext4_journal_get_write_access(handle, ent->bh);
@@ -3543,19 +3551,19 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
        inode_inc_iversion(ent->dir);
        ent->dir->i_ctime = ent->dir->i_mtime =
                current_time(ent->dir);
-       ext4_mark_inode_dirty(handle, ent->dir);
+       retval = ext4_mark_inode_dirty(handle, ent->dir);
        BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
        if (!ent->inlined) {
-               retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
-               if (unlikely(retval)) {
-                       ext4_std_error(ent->dir->i_sb, retval);
-                       return retval;
+               retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
+               if (unlikely(retval2)) {
+                       ext4_std_error(ent->dir->i_sb, retval2);
+                       return retval2;
                }
        }
        brelse(ent->bh);
        ent->bh = NULL;
 
-       return 0;
+       return retval;
 }
 
 static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
@@ -3790,7 +3798,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                                     EXT4_FT_CHRDEV);
                if (retval)
                        goto end_rename;
-               ext4_mark_inode_dirty(handle, whiteout);
+               retval = ext4_mark_inode_dirty(handle, whiteout);
+               if (unlikely(retval))
+                       goto end_rename;
        }
        if (!new.bh) {
                retval = ext4_add_entry(handle, new.dentry, old.inode);
@@ -3811,7 +3821,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
         * rename.
         */
        old.inode->i_ctime = current_time(old.inode);
-       ext4_mark_inode_dirty(handle, old.inode);
+       retval = ext4_mark_inode_dirty(handle, old.inode);
+       if (unlikely(retval))
+               goto end_rename;
 
        if (!whiteout) {
                /*
@@ -3840,12 +3852,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                } else {
                        ext4_inc_count(handle, new.dir);
                        ext4_update_dx_flag(new.dir);
-                       ext4_mark_inode_dirty(handle, new.dir);
+                       retval = ext4_mark_inode_dirty(handle, new.dir);
+                       if (unlikely(retval))
+                               goto end_rename;
                }
        }
-       ext4_mark_inode_dirty(handle, old.dir);
+       retval = ext4_mark_inode_dirty(handle, old.dir);
+       if (unlikely(retval))
+               goto end_rename;
        if (new.inode) {
-               ext4_mark_inode_dirty(handle, new.inode);
+               retval = ext4_mark_inode_dirty(handle, new.inode);
+               if (unlikely(retval))
+                       goto end_rename;
                if (!new.inode->i_nlink)
                        ext4_orphan_add(handle, new.inode);
        }
@@ -3979,8 +3997,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        ctime = current_time(old.inode);
        old.inode->i_ctime = ctime;
        new.inode->i_ctime = ctime;
-       ext4_mark_inode_dirty(handle, old.inode);
-       ext4_mark_inode_dirty(handle, new.inode);
+       retval = ext4_mark_inode_dirty(handle, old.inode);
+       if (unlikely(retval))
+               goto end_rename;
+       retval = ext4_mark_inode_dirty(handle, new.inode);
+       if (unlikely(retval))
+               goto end_rename;
 
        if (old.dir_bh) {
                retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
index 3658e301699999863e976d76f005d156191f1eda..a22d67c5bc00bedd8344bf4dd71db56b5aff0c13 100644 (file)
@@ -3723,7 +3723,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        int blocksize, clustersize;
        unsigned int db_count;
        unsigned int i;
-       int needs_recovery, has_huge_files, has_bigalloc;
+       int needs_recovery, has_huge_files;
        __u64 blocks_count;
        int err = 0;
        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -4015,17 +4015,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
+               /* can't mount with both data=journal and dioread_nolock. */
                clear_opt(sb, DIOREAD_NOLOCK);
                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
                        ext4_msg(sb, KERN_ERR, "can't mount with "
                                 "both data=journal and delalloc");
                        goto failed_mount;
                }
-               if (test_opt(sb, DIOREAD_NOLOCK)) {
-                       ext4_msg(sb, KERN_ERR, "can't mount with "
-                                "both data=journal and dioread_nolock");
-                       goto failed_mount;
-               }
                if (test_opt(sb, DAX_ALWAYS)) {
                        ext4_msg(sb, KERN_ERR, "can't mount with "
                                 "both data=journal and dax");
@@ -4245,8 +4241,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
        /* Handle clustersize */
        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
-       has_bigalloc = ext4_has_feature_bigalloc(sb);
-       if (has_bigalloc) {
+       if (ext4_has_feature_bigalloc(sb)) {
                if (clustersize < blocksize) {
                        ext4_msg(sb, KERN_ERR,
                                 "cluster size (%d) smaller than "
@@ -5938,7 +5933,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
                inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
                                S_NOATIME | S_IMMUTABLE);
-               ext4_mark_inode_dirty(handle, inode);
+               err = ext4_mark_inode_dirty(handle, inode);
                ext4_journal_stop(handle);
        unlock_inode:
                inode_unlock(inode);
@@ -6040,12 +6035,14 @@ static int ext4_quota_off(struct super_block *sb, int type)
         * this is not a hard failure and quotas are already disabled.
         */
        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
-       if (IS_ERR(handle))
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
                goto out_unlock;
+       }
        EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
        inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
        inode->i_mtime = inode->i_ctime = current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
+       err = ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
 out_unlock:
        inode_unlock(inode);
@@ -6103,7 +6100,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 {
        struct inode *inode = sb_dqopt(sb)->files[type];
        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
-       int err, offset = off & (sb->s_blocksize - 1);
+       int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
        int retries = 0;
        struct buffer_head *bh;
        handle_t *handle = journal_current_handle();
@@ -6151,9 +6148,11 @@ out:
        if (inode->i_size < off + len) {
                i_size_write(inode, off + len);
                EXT4_I(inode)->i_disksize = inode->i_size;
-               ext4_mark_inode_dirty(handle, inode);
+               err2 = ext4_mark_inode_dirty(handle, inode);
+               if (unlikely(err2 && !err))
+                       err = err2;
        }
-       return len;
+       return err ? err : len;
 }
 #endif
 
index 21df43a25328ebf10a6a2acc8664bbc8a978e7ac..9b29a40738acc03b3aad5a0c934fdfc6c84425cd 100644 (file)
@@ -1327,7 +1327,7 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
        int blocksize = ea_inode->i_sb->s_blocksize;
        int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
        int csize, wsize = 0;
-       int ret = 0;
+       int ret = 0, ret2 = 0;
        int retries = 0;
 
 retry:
@@ -1385,7 +1385,9 @@ retry:
        ext4_update_i_disksize(ea_inode, wsize);
        inode_unlock(ea_inode);
 
-       ext4_mark_inode_dirty(handle, ea_inode);
+       ret2 = ext4_mark_inode_dirty(handle, ea_inode);
+       if (unlikely(ret2 && !ret))
+               ret = ret2;
 
 out:
        brelse(bh);
@@ -1800,8 +1802,11 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
        if (EXT4_I(inode)->i_file_acl) {
                /* The inode already has an extended attribute block. */
                bs->bh = ext4_sb_bread(sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
-               if (IS_ERR(bs->bh))
-                       return PTR_ERR(bs->bh);
+               if (IS_ERR(bs->bh)) {
+                       error = PTR_ERR(bs->bh);
+                       bs->bh = NULL;
+                       return error;
+               }
                ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
                        atomic_read(&(bs->bh->b_count)),
                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
index cdf2f626bea7abc896a5379a1fa815b5367f0afc..9de7dc476ed16c31b39a6a2565b24dab01ae6a7e 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/uio.h>
 #include <linux/cleancache.h>
 #include <linux/sched/signal.h>
+#include <linux/fiemap.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -1824,7 +1825,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        return ret;
        }
 
-       ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
+       ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
        if (ret)
                return ret;
 
index 4167e540815185a2a467e7e3d00a598dd9adbf7a..9686ffea177e7e8d251e21f3a101bcda3ee1e21e 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
+#include <linux/fiemap.h>
 
 #include "f2fs.h"
 #include "node.h"
index 76ac9c7d32ec7ee2e719e48d2956276676810d92..e58bd5f758d0321f006a449c9d109c55cd08933c 100644 (file)
@@ -1126,6 +1126,7 @@ void inode_io_list_del(struct inode *inode)
        inode_io_list_del_locked(inode, wb);
        spin_unlock(&wb->list_lock);
 }
+EXPORT_SYMBOL(inode_io_list_del);
 
 /*
  * mark an inode as under writeback on the sb
index 70b2d3a1e86683ae91e26109e1f88c8923f87a25..4842f313a8084292900fdb231b664e98fdb6d86d 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/crc32.h>
 #include <linux/iomap.h>
 #include <linux/security.h>
+#include <linux/fiemap.h>
 #include <linux/uaccess.h>
 
 #include "gfs2.h"
index b36abf9cb345a513bf79d8765fda2a44dae1c91a..62959a8e43ad8d66311076d2109fc8245732338e 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "hpfs_fn.h"
 #include <linux/mpage.h>
+#include <linux/fiemap.h>
 
 #define BLOCKS(size) (((size) + 511) >> 9)
 
index aa5d45524e87da09d728abe6c7868bce8fb4d030..8819d0d58b0342de2a2cca352131446e21e4c0f5 100644 (file)
@@ -143,8 +143,6 @@ extern int dentry_needs_remove_privs(struct dentry *dentry);
 /*
  * fs-writeback.c
  */
-extern void inode_io_list_del(struct inode *inode);
-
 extern long get_nr_dirty_inodes(void);
 extern int invalidate_inodes(struct super_block *, bool);
 
index 5e80b40bc1b5cab63aa4472a2616aa06d6169a45..d69786d1dd911502bea4b089efe8f57c205c69a2 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/buffer_head.h>
 #include <linux/falloc.h>
 #include <linux/sched/signal.h>
+#include <linux/fiemap.h>
 
 #include "internal.h"
 
@@ -148,61 +149,55 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
 EXPORT_SYMBOL(fiemap_fill_next_extent);
 
 /**
- * fiemap_check_flags - check validity of requested flags for fiemap
+ * fiemap_prep - check validity of requested flags for fiemap
+ * @inode:     Inode to operate on
  * @fieinfo:   Fiemap context passed into ->fiemap
- * @fs_flags:  Set of fiemap flags that the file system understands
+ * @start:     Start of the mapped range
+ * @len:       Length of the mapped range, can be truncated by this function.
+ * @supported_flags:   Set of fiemap flags that the file system understands
  *
- * Called from file system ->fiemap callback. This will compute the
- * intersection of valid fiemap flags and those that the fs supports. That
- * value is then compared against the user supplied flags. In case of bad user
- * flags, the invalid values will be written into the fieinfo structure, and
- * -EBADR is returned, which tells ioctl_fiemap() to return those values to
- * userspace. For this reason, a return code of -EBADR should be preserved.
+ * This function must be called from each ->fiemap instance to validate the
+ * fiemap request against the file system parameters.
  *
- * Returns 0 on success, -EBADR on bad flags.
+ * Returns 0 on success, or a negative error on failure.
  */
-int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags)
+int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               u64 start, u64 *len, u32 supported_flags)
 {
+       u64 maxbytes = inode->i_sb->s_maxbytes;
        u32 incompat_flags;
+       int ret = 0;
 
-       incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags);
-       if (incompat_flags) {
-               fieinfo->fi_flags = incompat_flags;
-               return -EBADR;
-       }
-       return 0;
-}
-EXPORT_SYMBOL(fiemap_check_flags);
-
-static int fiemap_check_ranges(struct super_block *sb,
-                              u64 start, u64 len, u64 *new_len)
-{
-       u64 maxbytes = (u64) sb->s_maxbytes;
-
-       *new_len = len;
-
-       if (len == 0)
+       if (*len == 0)
                return -EINVAL;
-
        if (start > maxbytes)
                return -EFBIG;
 
        /*
         * Shrink request scope to what the fs can actually handle.
         */
-       if (len > maxbytes || (maxbytes - len) < start)
-               *new_len = maxbytes - start;
+       if (*len > maxbytes || (maxbytes - *len) < start)
+               *len = maxbytes - start;
+
+       supported_flags |= FIEMAP_FLAG_SYNC;
+       supported_flags &= FIEMAP_FLAGS_COMPAT;
+       incompat_flags = fieinfo->fi_flags & ~supported_flags;
+       if (incompat_flags) {
+               fieinfo->fi_flags = incompat_flags;
+               return -EBADR;
+       }
 
-       return 0;
+       if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
+               ret = filemap_write_and_wait(inode->i_mapping);
+       return ret;
 }
+EXPORT_SYMBOL(fiemap_prep);
 
 static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap)
 {
        struct fiemap fiemap;
        struct fiemap_extent_info fieinfo = { 0, };
        struct inode *inode = file_inode(filp);
-       struct super_block *sb = inode->i_sb;
-       u64 len;
        int error;
 
        if (!inode->i_op->fiemap)
@@ -214,24 +209,13 @@ static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap)
        if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
                return -EINVAL;
 
-       error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
-                                   &len);
-       if (error)
-               return error;
-
        fieinfo.fi_flags = fiemap.fm_flags;
        fieinfo.fi_extents_max = fiemap.fm_extent_count;
        fieinfo.fi_extents_start = ufiemap->fm_extents;
 
-       if (fiemap.fm_extent_count != 0 &&
-           !access_ok(fieinfo.fi_extents_start,
-                      fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
-               return -EFAULT;
-
-       if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
-               filemap_write_and_wait(inode->i_mapping);
+       error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start,
+                       fiemap.fm_length);
 
-       error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len);
        fiemap.fm_flags = fieinfo.fi_flags;
        fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
        if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
@@ -307,8 +291,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
  * If you use this function directly, you need to do your own locking. Use
  * generic_block_fiemap if you want the locking done for you.
  */
-
-int __generic_block_fiemap(struct inode *inode,
+static int __generic_block_fiemap(struct inode *inode,
                           struct fiemap_extent_info *fieinfo, loff_t start,
                           loff_t len, get_block_t *get_block)
 {
@@ -320,7 +303,7 @@ int __generic_block_fiemap(struct inode *inode,
        bool past_eof = false, whole_file = false;
        int ret = 0;
 
-       ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+       ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC);
        if (ret)
                return ret;
 
@@ -453,7 +436,6 @@ int __generic_block_fiemap(struct inode *inode,
 
        return ret;
 }
-EXPORT_SYMBOL(__generic_block_fiemap);
 
 /**
  * generic_block_fiemap - FIEMAP for block based inodes
index d55e8f491a5e51d1d45c0053db38a41af883eb4f..aab070df4a21758ae407bcefc6ac4476423d7674 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/iomap.h>
+#include <linux/fiemap.h>
 
 struct fiemap_ctx {
        struct fiemap_extent_info *fi;
@@ -65,7 +66,7 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 }
 
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
-               loff_t start, loff_t len, const struct iomap_ops *ops)
+               u64 start, u64 len, const struct iomap_ops *ops)
 {
        struct fiemap_ctx ctx;
        loff_t ret;
@@ -74,16 +75,10 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
        ctx.fi = fi;
        ctx.prev.type = IOMAP_HOLE;
 
-       ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
+       ret = fiemap_prep(inode, fi, start, &len, 0);
        if (ret)
                return ret;
 
-       if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
-               ret = filemap_write_and_wait(inode->i_mapping);
-               if (ret)
-                       return ret;
-       }
-
        while (len > 0) {
                ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
                                iomap_fiemap_actor);
index 3dccc23cf0102337398c2c7ec99a31aa4e45b192..e91aad3637a23fb63d4421d9a441c1ac7ef5dc04 100644 (file)
@@ -541,17 +541,24 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 }
 EXPORT_SYMBOL(jbd2_journal_start);
 
-static void __jbd2_journal_unreserve_handle(handle_t *handle)
+static void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t)
 {
        journal_t *journal = handle->h_journal;
 
        WARN_ON(!handle->h_reserved);
        sub_reserved_credits(journal, handle->h_total_credits);
+       if (t)
+               atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
 }
 
 void jbd2_journal_free_reserved(handle_t *handle)
 {
-       __jbd2_journal_unreserve_handle(handle);
+       journal_t *journal = handle->h_journal;
+
+       /* Get j_state_lock to pin running transaction if it exists */
+       read_lock(&journal->j_state_lock);
+       __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
+       read_unlock(&journal->j_state_lock);
        jbd2_free_handle(handle);
 }
 EXPORT_SYMBOL(jbd2_journal_free_reserved);
@@ -722,7 +729,8 @@ static void stop_this_handle(handle_t *handle)
        atomic_sub(handle->h_total_credits,
                   &transaction->t_outstanding_credits);
        if (handle->h_rsv_handle)
-               __jbd2_journal_unreserve_handle(handle->h_rsv_handle);
+               __jbd2_journal_unreserve_handle(handle->h_rsv_handle,
+                                               transaction);
        if (atomic_dec_and_test(&transaction->t_updates))
                wake_up(&journal->j_wait_updates);
 
index 671085512e0fde9e8be3274629db21e8c0561b96..25b0d368ecdb26762373ffc1934a136c3481b898 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/uio.h>
+#include <linux/fiemap.h>
 #include "nilfs.h"
 #include "btnode.h"
 #include "segment.h"
@@ -1005,7 +1006,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        unsigned int blkbits = inode->i_blkbits;
        int ret, n;
 
-       ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+       ret = fiemap_prep(inode, fieinfo, start, &len, 0);
        if (ret)
                return ret;
 
index e3e2d1b2af51a267cb056efad36042ee3ee88bf1..a94852af5510daf7ff7fb891034ed683c930d843 100644 (file)
@@ -733,8 +733,6 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
        return 0;
 }
 
-#define OCFS2_FIEMAP_FLAGS     (FIEMAP_FLAG_SYNC)
-
 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 u64 map_start, u64 map_len)
 {
@@ -746,7 +744,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        struct buffer_head *di_bh = NULL;
        struct ocfs2_extent_rec rec;
 
-       ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
+       ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0);
        if (ret)
                return ret;
 
index b0d42ece4d7ccc470534576817278f8cdff0f08c..c7cb883c47b861acfe5cf6ff4ce58630d9055cc1 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/ratelimit.h>
+#include <linux/fiemap.h>
 #include "overlayfs.h"
 
 
@@ -461,10 +462,6 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                return -EOPNOTSUPP;
 
        old_cred = ovl_override_creds(inode->i_sb);
-
-       if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
-               filemap_write_and_wait(realinode->i_mapping);
-
        err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
        revert_creds(old_cred);
 
index f7a99b3bbcf7a37e4ce4fba965b2c53a424c9b0a..44c353998ac5c157e64d805f1a2672ab8184a6c9 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/posix_acl.h>
 #include <linux/security.h>
 #include <linux/iversion.h>
+#include <linux/fiemap.h>
 
 /*
  * Directories have different lock order w.r.t. mmap_sem compared to regular
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
new file mode 100644 (file)
index 0000000..4e624c4
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FIEMAP_H
+#define _LINUX_FIEMAP_H 1
+
+#include <uapi/linux/fiemap.h>
+#include <linux/fs.h>
+
+struct fiemap_extent_info {
+       unsigned int fi_flags;          /* Flags as passed from user */
+       unsigned int fi_extents_mapped; /* Number of mapped extents */
+       unsigned int fi_extents_max;    /* Size of fiemap_extent array */
+       struct fiemap_extent __user *fi_extents_start; /* Start of
+                                                       fiemap_extent array */
+};
+
+int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               u64 start, u64 *len, u32 supported_flags);
+int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
+                           u64 phys, u64 len, u32 flags);
+
+int generic_block_fiemap(struct inode *inode,
+               struct fiemap_extent_info *fieinfo, u64 start, u64 len,
+               get_block_t *get_block);
+
+#endif /* _LINUX_FIEMAP_H 1 */
index 7c3e8c0306e0c4c9b75793a3698a05ce173c213c..92eb6c7dab8e26ba63346387441d7294d956e55c 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/capability.h>
 #include <linux/semaphore.h>
 #include <linux/fcntl.h>
-#include <linux/fiemap.h>
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
 #include <linux/shrinker.h>
@@ -48,6 +47,7 @@ struct backing_dev_info;
 struct bdi_writeback;
 struct bio;
 struct export_operations;
+struct fiemap_extent_info;
 struct hd_geometry;
 struct iovec;
 struct kiocb;
@@ -1745,19 +1745,6 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
 extern void inode_init_owner(struct inode *inode, const struct inode *dir,
                        umode_t mode);
 extern bool may_open_dev(const struct path *path);
-/*
- * VFS FS_IOC_FIEMAP helper definitions.
- */
-struct fiemap_extent_info {
-       unsigned int fi_flags;          /* Flags as passed from user */
-       unsigned int fi_extents_mapped; /* Number of mapped extents */
-       unsigned int fi_extents_max;    /* Size of fiemap_extent array */
-       struct fiemap_extent __user *fi_extents_start; /* Start of
-                                                       fiemap_extent array */
-};
-int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
-                           u64 phys, u64 len, u32 flags);
-int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
 
 /*
  * This is the "filldir" function type, used by readdir() to let
@@ -3304,14 +3291,6 @@ static inline int vfs_fstat(int fd, struct kstat *stat)
 extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
 extern int vfs_readlink(struct dentry *, char __user *, int);
 
-extern int __generic_block_fiemap(struct inode *inode,
-                                 struct fiemap_extent_info *fieinfo,
-                                 loff_t start, loff_t len,
-                                 get_block_t *get_block);
-extern int generic_block_fiemap(struct inode *inode,
-                               struct fiemap_extent_info *fieinfo, u64 start,
-                               u64 len, get_block_t *get_block);
-
 extern struct file_system_type *get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
index 8b09463dae0dba2a4a60ce2372863ba0e8fd16b9..63db02528b702cc6d1676a61ce34ded6ca99f799 100644 (file)
@@ -178,7 +178,7 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
                        const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-               loff_t start, loff_t len, const struct iomap_ops *ops);
+               u64 start, u64 len, const struct iomap_ops *ops);
 loff_t iomap_seek_hole(struct inode *inode, loff_t offset,
                const struct iomap_ops *ops);
 loff_t iomap_seek_data(struct inode *inode, loff_t offset,
index a19d845dd7eb96fd9414ad23d8d929fc34737cc9..902aa317621bfce0e800a730628ef334c38d9dae 100644 (file)
@@ -197,6 +197,7 @@ void wakeup_flusher_threads(enum wb_reason reason);
 void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
                                enum wb_reason reason);
 void inode_wait_for_writeback(struct inode *inode);
+void inode_io_list_del(struct inode *inode);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
index 19c87661eeec79f5e4707d175f4a43623d1d906c..cc41d692ae8edfa4421221da51c528d57eb8bf8f 100644 (file)
@@ -35,7 +35,8 @@ struct partial_cluster;
        { EXT4_MB_DELALLOC_RESERVED,    "DELALLOC_RESV" },      \
        { EXT4_MB_STREAM_ALLOC,         "STREAM_ALLOC" },       \
        { EXT4_MB_USE_ROOT_BLOCKS,      "USE_ROOT_BLKS" },      \
-       { EXT4_MB_USE_RESERVED,         "USE_RESV" })
+       { EXT4_MB_USE_RESERVED,         "USE_RESV" },           \
+       { EXT4_MB_STRICT_CHECK,         "STRICT_CHECK" })
 
 #define show_map_flags(flags) __print_flags(flags, "|",                        \
        { EXT4_GET_BLOCKS_CREATE,               "CREATE" },             \
@@ -45,8 +46,10 @@ struct partial_cluster;
        { EXT4_GET_BLOCKS_CONVERT,              "CONVERT" },            \
        { EXT4_GET_BLOCKS_METADATA_NOFAIL,      "METADATA_NOFAIL" },    \
        { EXT4_GET_BLOCKS_NO_NORMALIZE,         "NO_NORMALIZE" },       \
-       { EXT4_GET_BLOCKS_KEEP_SIZE,            "KEEP_SIZE" },          \
-       { EXT4_GET_BLOCKS_ZERO,                 "ZERO" })
+       { EXT4_GET_BLOCKS_CONVERT_UNWRITTEN,    "CONVERT_UNWRITTEN" },  \
+       { EXT4_GET_BLOCKS_ZERO,                 "ZERO" },               \
+       { EXT4_GET_BLOCKS_IO_SUBMIT,            "IO_SUBMIT" },          \
+       { EXT4_EX_NOCACHE,                      "EX_NOCACHE" })
 
 /*
  * __print_flags() requires that all enum values be wrapped in the
index 7a900b2377b603df04f61461888566aa125fc061..24ca0c00cae3628572a11f22c44d5357b9c327c4 100644 (file)
@@ -9,8 +9,8 @@
  *          Andreas Dilger <adilger@sun.com>
  */
 
-#ifndef _LINUX_FIEMAP_H
-#define _LINUX_FIEMAP_H
+#ifndef _UAPI_LINUX_FIEMAP_H
+#define _UAPI_LINUX_FIEMAP_H
 
 #include <linux/types.h>
 
@@ -67,4 +67,4 @@ struct fiemap {
 #define FIEMAP_EXTENT_SHARED           0x00002000 /* Space shared with other
                                                    * files. */
 
-#endif /* _LINUX_FIEMAP_H */
+#endif /* _UAPI_LINUX_FIEMAP_H */