Merge tag 'xfs-for-linus-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)
Pull xfs updates from Dave Chinner:
 "There's quite a lot in this request, and there's some cross-over with
  ext4, dax and quota code due to the nature of the changes being made.

  As for the rest of the XFS changes, there are lots of little things
  all over the place, which add up to a lot of changes in the end.

  The major changes are that we've reduced the size of the struct
  xfs_inode by ~100 bytes (gives an inode cache footprint reduction of
  >10%), the writepage code now only does a single set of mapping tree
  lockups so uses less CPU, delayed allocation reservations won't
  overrun under random write loads anymore, and we added compile time
  verification for on-disk structure sizes so we find out when a commit
  or platform/compiler change breaks the on disk structure as early as
  possible.

  Change summary:

   - error propagation for direct IO failures fixes for both XFS and
     ext4
   - new quota interfaces and XFS implementation for iterating all the
     quota IDs in the filesystem
   - locking fixes for real-time device extent allocation
   - reduction of duplicate information in the xfs and vfs inode, saving
     roughly 100 bytes of memory per cached inode.
   - buffer flag cleanup
   - rework of the writepage code to use the generic write clustering
     mechanisms
   - several fixes for inode flag based DAX enablement
   - rework of remount option parsing
   - compile time verification of on-disk format structure sizes
   - delayed allocation reservation overrun fixes
   - lots of little error handling fixes
   - small memory leak fixes
   - enable xfsaild freezing again"

* tag 'xfs-for-linus-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (66 commits)
  xfs: always set rvalp in xfs_dir2_node_trim_free
  xfs: ensure committed is initialized in xfs_trans_roll
  xfs: borrow indirect blocks from freed extent when available
  xfs: refactor delalloc indlen reservation split into helper
  xfs: update freeblocks counter after extent deletion
  xfs: debug mode forced buffered write failure
  xfs: remove impossible condition
  xfs: check sizes of XFS on-disk structures at compile time
  xfs: ioends require logically contiguous file offsets
  xfs: use named array initializers for log item dumping
  xfs: fix computation of inode btree maxlevels
  xfs: reinitialise per-AG structures if geometry changes during recovery
  xfs: remove xfs_trans_get_block_res
  xfs: fix up inode32/64 (re)mount handling
  xfs: fix format specifier , should be %llx and not %llu
  xfs: sanitize remount options
  xfs: convert mount option parsing to tokens
  xfs: fix two memory leaks in xfs_attr_list.c error paths
  xfs: XFS_DIFLAG2_DAX limited by PAGE_SIZE
  xfs: dynamically switch modes when XFS_DIFLAG2_DAX is set/cleared
  ...

70 files changed:
fs/dax.c
fs/direct-io.c
fs/ext4/ext4.h
fs/ext4/inode.c
fs/ext4/page-io.c
fs/ocfs2/aops.c
fs/quota/quota.c
fs/xfs/libxfs/xfs_alloc_btree.c
fs/xfs/libxfs/xfs_attr_sf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap_btree.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_da_format.h
fs/xfs/libxfs/xfs_dir2.c
fs/xfs/libxfs/xfs_dir2_node.c
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_inode_buf.h
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/libxfs/xfs_log_format.h
fs/xfs/libxfs/xfs_quota_defs.h
fs/xfs/libxfs/xfs_rtbitmap.c
fs/xfs/libxfs/xfs_sb.h
fs/xfs/libxfs/xfs_shared.h
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.h
fs/xfs/xfs_attr_list.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_dir2_readdir.c
fs/xfs/xfs_discard.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_export.c
fs/xfs/xfs_file.c
fs/xfs/xfs_filestream.c
fs/xfs/xfs_fsops.h
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_ondisk.h [new file with mode: 0644]
fs/xfs/xfs_qm.c
fs/xfs/xfs_qm.h
fs/xfs/xfs_qm_syscalls.c
fs/xfs/xfs_quotaops.c
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_super.c
fs/xfs/xfs_super.h
fs/xfs/xfs_sysfs.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_dquot.c
fs/xfs/xfs_trans_inode.c
include/linux/fs.h
include/linux/quota.h
include/uapi/linux/dqblk_xfs.h
include/uapi/linux/quota.h

index bbb2ad78377020ac85158fb61df067aceadaafe5..90322eb7498c13289a346ce82a64d8ab34c23851 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -286,8 +286,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
                inode_unlock(inode);
 
-       if ((retval > 0) && end_io)
-               end_io(iocb, pos, retval, bh.b_private);
+       if (end_io) {
+               int err;
+
+               err = end_io(iocb, pos, retval, bh.b_private);
+               if (err)
+                       retval = err;
+       }
 
        if (!(flags & DIO_SKIP_DIO_COUNT))
                inode_dio_end(inode);
index 0a8d937c6775577d64c40cdaa250cc632619f339..476f1ecbd1f0e585171b8dfa2970ada3cb5cc2c7 100644 (file)
@@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
        if (ret == 0)
                ret = transferred;
 
-       if (dio->end_io && dio->result)
-               dio->end_io(dio->iocb, offset, transferred, dio->private);
+       if (dio->end_io) {
+               int err;
+
+               err = dio->end_io(dio->iocb, offset, ret, dio->private);
+               if (err)
+                       ret = err;
+       }
 
        if (!(dio->flags & DIO_SKIP_DIO_COUNT))
                inode_dio_end(dio->inode);
index 393689dfa1aff8f17aad517c12a0a55b808a505e..c0474351986597cda60b574cacf325490ab2b157 100644 (file)
@@ -1511,15 +1511,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
                 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
 
-static inline void ext4_set_io_unwritten_flag(struct inode *inode,
-                                             struct ext4_io_end *io_end)
-{
-       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-               io_end->flag |= EXT4_IO_END_UNWRITTEN;
-               atomic_inc(&EXT4_I(inode)->i_unwritten);
-       }
-}
-
 /*
  * Inode dynamic state flags
  */
@@ -3293,6 +3284,27 @@ extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
 extern int ext4_resize_begin(struct super_block *sb);
 extern void ext4_resize_end(struct super_block *sb);
 
+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+                                             struct ext4_io_end *io_end)
+{
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_unwritten);
+       }
+}
+
+static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
+{
+       struct inode *inode = io_end->inode;
+
+       if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+               io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
+               /* Wake up anyone waiting on unwritten extent conversion */
+               if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+                       wake_up_all(ext4_ioend_wq(inode));
+       }
+}
+
 #endif /* __KERNEL__ */
 
 #define EFSBADCRC      EBADMSG         /* Bad CRC detected */
index b2e9576450eb92c3f254a51e8b2392f5ba30b054..dab84a2530ff3e05794d9b3c602e0d246937c8a3 100644 (file)
@@ -3289,22 +3289,32 @@ out:
 }
 #endif
 
-static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
+static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private)
 {
         ext4_io_end_t *io_end = private;
 
        /* if not async direct IO just return */
        if (!io_end)
-               return;
+               return 0;
 
        ext_debug("ext4_end_io_dio(): io_end 0x%p "
                  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
                  io_end, io_end->inode->i_ino, iocb, offset, size);
 
+       /*
+        * Error during AIO DIO. We cannot convert unwritten extents as the
+        * data was not written. Just clear the unwritten flag and drop io_end.
+        */
+       if (size <= 0) {
+               ext4_clear_io_unwritten_flag(io_end);
+               size = 0;
+       }
        io_end->offset = offset;
        io_end->size = size;
        ext4_put_io_end(io_end);
+
+       return 0;
 }
 
 /*
index 349d7aa04fe70e1938456362474baa5a421cf390..d77d15f4b674485de79f7c72d3a36049fd4aa986 100644 (file)
@@ -136,16 +136,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
        kmem_cache_free(io_end_cachep, io_end);
 }
 
-static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
-{
-       struct inode *inode = io_end->inode;
-
-       io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
-       /* Wake up anyone waiting on unwritten extent conversion */
-       if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
-               wake_up_all(ext4_ioend_wq(inode));
-}
-
 /*
  * Check a range of space and convert unwritten extents to written. Note that
  * we are protected from truncate touching same part of extent tree by the
index cda0361e95a403e887d912174ec296aa4cbb6e4d..043110e5212dd8c6ab2e7118ac8f17746448dac0 100644 (file)
@@ -620,7 +620,7 @@ bail:
  * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
  * to protect io on one node from truncation on another.
  */
-static void ocfs2_dio_end_io(struct kiocb *iocb,
+static int ocfs2_dio_end_io(struct kiocb *iocb,
                             loff_t offset,
                             ssize_t bytes,
                             void *private)
@@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
        struct inode *inode = file_inode(iocb->ki_filp);
        int level;
 
+       if (bytes <= 0)
+               return 0;
+
        /* this io's submitter should not have unlocked this before we could */
        BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
@@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
                level = ocfs2_iocb_rw_locked_level(iocb);
                ocfs2_rw_unlock(inode, level);
        }
+
+       return 0;
 }
 
 static int ocfs2_releasepage(struct page *page, gfp_t wait)
index 3746367098fda369120ccf0eab4ca0249ed64cb4..0ebc90496525d13684ed96402cab612b0c2da356 100644 (file)
@@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type)
        return 0;
 }
 
-static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
+static int quota_quotaon(struct super_block *sb, int type, qid_t id,
                         struct path *path)
 {
        if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
@@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
        return 0;
 }
 
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ESRCH via ->get_nextdqblk
+ */
+static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
+                         void __user *addr)
+{
+       struct kqid qid;
+       struct qc_dqblk fdq;
+       struct if_nextdqblk idq;
+       int ret;
+
+       if (!sb->s_qcop->get_nextdqblk)
+               return -ENOSYS;
+       qid = make_kqid(current_user_ns(), type, id);
+       if (!qid_valid(qid))
+               return -EINVAL;
+       ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
+       if (ret)
+               return ret;
+       /* struct if_nextdqblk is a superset of struct if_dqblk */
+       copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq);
+       idq.dqb_id = from_kqid(current_user_ns(), qid);
+       if (copy_to_user(addr, &idq, sizeof(idq)))
+               return -EFAULT;
+       return 0;
+}
+
 static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src)
 {
        dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit);
@@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
        return ret;
 }
 
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ESRCH via ->get_nextdqblk.
+ */
+static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
+                           void __user *addr)
+{
+       struct fs_disk_quota fdq;
+       struct qc_dqblk qdq;
+       struct kqid qid;
+       qid_t id_out;
+       int ret;
+
+       if (!sb->s_qcop->get_nextdqblk)
+               return -ENOSYS;
+       qid = make_kqid(current_user_ns(), type, id);
+       if (!qid_valid(qid))
+               return -EINVAL;
+       ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
+       if (ret)
+               return ret;
+       id_out = from_kqid(current_user_ns(), qid);
+       copy_to_xfs_dqblk(&fdq, &qdq, type, id_out);
+       if (copy_to_user(addr, &fdq, sizeof(fdq)))
+               return -EFAULT;
+       return ret;
+}
+
 static int quota_rmxquota(struct super_block *sb, void __user *addr)
 {
        __u32 flags;
@@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 
        switch (cmd) {
        case Q_QUOTAON:
-               return quota_quotaon(sb, type, cmd, id, path);
+               return quota_quotaon(sb, type, id, path);
        case Q_QUOTAOFF:
                return quota_quotaoff(sb, type);
        case Q_GETFMT:
@@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
                return quota_setinfo(sb, type, addr);
        case Q_GETQUOTA:
                return quota_getquota(sb, type, id, addr);
+       case Q_GETNEXTQUOTA:
+               return quota_getnextquota(sb, type, id, addr);
        case Q_SETQUOTA:
                return quota_setquota(sb, type, id, addr);
        case Q_SYNC:
@@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
                return quota_setxquota(sb, type, id, addr);
        case Q_XGETQUOTA:
                return quota_getxquota(sb, type, id, addr);
+       case Q_XGETNEXTQUOTA:
+               return quota_getnextxquota(sb, type, id, addr);
        case Q_XQUOTASYNC:
                if (sb->s_flags & MS_RDONLY)
                        return -EROFS;
@@ -708,10 +768,12 @@ static int quotactl_cmd_write(int cmd)
        switch (cmd) {
        case Q_GETFMT:
        case Q_GETINFO:
+       case Q_GETNEXTQUOTA:
        case Q_SYNC:
        case Q_XGETQSTAT:
        case Q_XGETQSTATV:
        case Q_XGETQUOTA:
+       case Q_XGETNEXTQUOTA:
        case Q_XQUOTASYNC:
                return 0;
        }
index 444626ddbd1b9ba2baca676b48a62d90299d515e..d9b42425291e37c6a4845c21dd0e1f61d8a76e86 100644 (file)
@@ -118,8 +118,6 @@ xfs_allocbt_free_block(
        xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
                              XFS_EXTENT_BUSY_SKIP_DISCARD);
        xfs_trans_agbtree_delta(cur->bc_tp, -1);
-
-       xfs_trans_binval(cur->bc_tp, bp);
        return 0;
 }
 
index 919756e3ba53591a9132849a6dc7c57771f1aebb..90928bbe693c03bcb5a74aecaac421ba3132bebe 100644 (file)
  * Small attribute lists are packed as tightly as possible so as
  * to fit into the literal area of the inode.
  */
-
-/*
- * Entries are packed toward the top as tight as possible.
- */
-typedef struct xfs_attr_shortform {
-       struct xfs_attr_sf_hdr {        /* constant-structure header block */
-               __be16  totsize;        /* total bytes in shortform list */
-               __u8    count;  /* count of active entries */
-       } hdr;
-       struct xfs_attr_sf_entry {
-               __uint8_t namelen;      /* actual length of name (no NULL) */
-               __uint8_t valuelen;     /* actual length of value (no NULL) */
-               __uint8_t flags;        /* flags bits (see xfs_attr_leaf.h) */
-               __uint8_t nameval[1];   /* name & value bytes concatenated */
-       } list[1];                      /* variable sized array */
-} xfs_attr_shortform_t;
 typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
 typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
 
index ef00156f4f9616178006df4ef6248b1abb2297ab..041b6948aeccd928f6f88d95ed9985052d915390 100644 (file)
@@ -477,10 +477,7 @@ xfs_bmap_check_leaf_extents(
                }
                block = XFS_BUF_TO_BLOCK(bp);
        }
-       if (bp_release) {
-               bp_release = 0;
-               xfs_trans_brelse(NULL, bp);
-       }
+
        return;
 
 error0:
@@ -912,7 +909,7 @@ xfs_bmap_local_to_extents(
         * We don't want to deal with the case of keeping inode data inline yet.
         * So sending the data fork of a regular inode is invalid.
         */
-       ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
+       ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
 
@@ -1079,7 +1076,7 @@ xfs_bmap_add_attrfork_local(
        if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
                return 0;
 
-       if (S_ISDIR(ip->i_d.di_mode)) {
+       if (S_ISDIR(VFS_I(ip)->i_mode)) {
                memset(&dargs, 0, sizeof(dargs));
                dargs.geo = ip->i_mount->m_dir_geo;
                dargs.dp = ip;
@@ -1091,7 +1088,7 @@ xfs_bmap_add_attrfork_local(
                return xfs_dir2_sf_to_block(&dargs);
        }
 
-       if (S_ISLNK(ip->i_d.di_mode))
+       if (S_ISLNK(VFS_I(ip)->i_mode))
                return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
                                                 flags, XFS_DATA_FORK,
                                                 xfs_symlink_local_to_remote);
@@ -4720,6 +4717,66 @@ error0:
        return error;
 }
 
+/*
+ * When a delalloc extent is split (e.g., due to a hole punch), the original
+ * indlen reservation must be shared across the two new extents that are left
+ * behind.
+ *
+ * Given the original reservation and the worst case indlen for the two new
+ * extents (as calculated by xfs_bmap_worst_indlen()), split the original
+ * reservation fairly across the two new extents. If necessary, steal available
+ * blocks from a deleted extent to make up a reservation deficiency (e.g., if
+ * ores == 1). The number of stolen blocks is returned. The availability and
+ * subsequent accounting of stolen blocks is the responsibility of the caller.
+ */
+static xfs_filblks_t
+xfs_bmap_split_indlen(
+       xfs_filblks_t                   ores,           /* original res. */
+       xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
+       xfs_filblks_t                   *indlen2,       /* ext2 worst indlen */
+       xfs_filblks_t                   avail)          /* stealable blocks */
+{
+       xfs_filblks_t                   len1 = *indlen1;
+       xfs_filblks_t                   len2 = *indlen2;
+       xfs_filblks_t                   nres = len1 + len2; /* new total res. */
+       xfs_filblks_t                   stolen = 0;
+
+       /*
+        * Steal as many blocks as we can to try and satisfy the worst case
+        * indlen for both new extents.
+        */
+       while (nres > ores && avail) {
+               nres--;
+               avail--;
+               stolen++;
+       }
+
+       /*
+        * The only blocks available are those reserved for the original
+        * extent and what we can steal from the extent being removed.
+        * If this still isn't enough to satisfy the combined
+        * requirements for the two new extents, skim blocks off of each
+        * of the new reservations until they match what is available.
+        */
+       while (nres > ores) {
+               if (len1) {
+                       len1--;
+                       nres--;
+               }
+               if (nres == ores)
+                       break;
+               if (len2) {
+                       len2--;
+                       nres--;
+               }
+       }
+
+       *indlen1 = len1;
+       *indlen2 = len2;
+
+       return stolen;
+}
+
 /*
  * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
@@ -4984,28 +5041,29 @@ xfs_bmap_del_extent(
                        XFS_IFORK_NEXT_SET(ip, whichfork,
                                XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
                } else {
+                       xfs_filblks_t   stolen;
                        ASSERT(whichfork == XFS_DATA_FORK);
-                       temp = xfs_bmap_worst_indlen(ip, temp);
+
+                       /*
+                        * Distribute the original indlen reservation across the
+                        * two new extents. Steal blocks from the deleted extent
+                        * if necessary. Stealing blocks simply fudges the
+                        * fdblocks accounting in xfs_bunmapi().
+                        */
+                       temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
+                       temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+                       stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
+                                                      del->br_blockcount);
+                       da_new = temp + temp2 - stolen;
+                       del->br_blockcount -= stolen;
+
+                       /*
+                        * Set the reservation for each extent. Warn if either
+                        * is zero as this can lead to delalloc problems.
+                        */
+                       WARN_ON_ONCE(!temp || !temp2);
                        xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
                        new.br_startblock = nullstartblock((int)temp2);
-                       da_new = temp + temp2;
-                       while (da_new > da_old) {
-                               if (temp) {
-                                       temp--;
-                                       da_new--;
-                                       xfs_bmbt_set_startblock(ep,
-                                               nullstartblock((int)temp));
-                               }
-                               if (da_new == da_old)
-                                       break;
-                               if (temp2) {
-                                       temp2--;
-                                       da_new--;
-                                       new.br_startblock =
-                                               nullstartblock((int)temp2);
-                               }
-                       }
                }
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                xfs_iext_insert(ip, *idx + 1, 1, &new, state);
@@ -5210,7 +5268,7 @@ xfs_bunmapi(
                         * This is better than zeroing it.
                         */
                        ASSERT(del.br_state == XFS_EXT_NORM);
-                       ASSERT(xfs_trans_get_block_res(tp) > 0);
+                       ASSERT(tp->t_blk_res > 0);
                        /*
                         * If this spans a realtime extent boundary,
                         * chop it back to the start of the one we end at.
@@ -5241,7 +5299,7 @@ xfs_bunmapi(
                                del.br_startblock += mod;
                        } else if ((del.br_startoff == start &&
                                    (del.br_state == XFS_EXT_UNWRITTEN ||
-                                    xfs_trans_get_block_res(tp) == 0)) ||
+                                    tp->t_blk_res == 0)) ||
                                   !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
                                /*
                                 * Can't make it unwritten.  There isn't
@@ -5296,9 +5354,37 @@ xfs_bunmapi(
                                goto nodelete;
                        }
                }
+
+               /*
+                * If it's the case where the directory code is running
+                * with no block reservation, and the deleted block is in
+                * the middle of its extent, and the resulting insert
+                * of an extent would cause transformation to btree format,
+                * then reject it.  The calling code will then swap
+                * blocks around instead.
+                * We have to do this now, rather than waiting for the
+                * conversion to btree format, since the transaction
+                * will be dirty.
+                */
+               if (!wasdel && tp->t_blk_res == 0 &&
+                   XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+                   XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
+                       XFS_IFORK_MAXEXT(ip, whichfork) &&
+                   del.br_startoff > got.br_startoff &&
+                   del.br_startoff + del.br_blockcount <
+                   got.br_startoff + got.br_blockcount) {
+                       error = -ENOSPC;
+                       goto error0;
+               }
+
+               /*
+                * Unreserve quota and update realtime free space, if
+                * appropriate. If delayed allocation, update the inode delalloc
+                * counter now and wait to update the sb counters as
+                * xfs_bmap_del_extent() might need to borrow some blocks.
+                */
                if (wasdel) {
                        ASSERT(startblockval(del.br_startblock) > 0);
-                       /* Update realtime/data freespace, unreserve quota */
                        if (isrt) {
                                xfs_filblks_t rtexts;
 
@@ -5309,8 +5395,6 @@ xfs_bunmapi(
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
-                               xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
-                                                false);
                                (void)xfs_trans_reserve_quota_nblks(NULL,
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_REGBLKS);
@@ -5321,32 +5405,16 @@ xfs_bunmapi(
                                        XFS_BTCUR_BPRV_WASDEL;
                } else if (cur)
                        cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
-               /*
-                * If it's the case where the directory code is running
-                * with no block reservation, and the deleted block is in
-                * the middle of its extent, and the resulting insert
-                * of an extent would cause transformation to btree format,
-                * then reject it.  The calling code will then swap
-                * blocks around instead.
-                * We have to do this now, rather than waiting for the
-                * conversion to btree format, since the transaction
-                * will be dirty.
-                */
-               if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
-                   XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-                   XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
-                       XFS_IFORK_MAXEXT(ip, whichfork) &&
-                   del.br_startoff > got.br_startoff &&
-                   del.br_startoff + del.br_blockcount <
-                   got.br_startoff + got.br_blockcount) {
-                       error = -ENOSPC;
-                       goto error0;
-               }
+
                error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
                                &tmp_logflags, whichfork);
                logflags |= tmp_logflags;
                if (error)
                        goto error0;
+
+               if (!isrt && wasdel)
+                       xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
+
                bno = del.br_startoff - 1;
 nodelete:
                /*
index 1637c37bfbaa1cb61ef69e48c52eb95716ecd649..6282f6e708afaf4ca2a15999b1864b507c6d5ed5 100644 (file)
@@ -461,7 +461,7 @@ xfs_bmbt_alloc_block(
                 * reservation amount is insufficient then we may fail a
                 * block allocation here and corrupt the filesystem.
                 */
-               args.minleft = xfs_trans_get_block_res(args.tp);
+               args.minleft = args.tp->t_blk_res;
        } else if (cur->bc_private.b.flist->xbf_low) {
                args.type = XFS_ALLOCTYPE_START_BNO;
        } else {
@@ -470,7 +470,7 @@ xfs_bmbt_alloc_block(
 
        args.minlen = args.maxlen = args.prod = 1;
        args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
-       if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+       if (!args.wasdel && args.tp->t_blk_res == 0) {
                error = -ENOSPC;
                goto error0;
        }
@@ -531,7 +531,6 @@ xfs_bmbt_free_block(
 
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
-       xfs_trans_binval(tp, bp);
        return 0;
 }
 
index a0eb18ce3ad38f205f5f3487ea4684937b7b287c..1f88e1ce770f35442f0161466632c68fe0e46153 100644 (file)
@@ -294,6 +294,21 @@ xfs_btree_sblock_verify_crc(
        return true;
 }
 
+static int
+xfs_btree_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       int                     error;
+
+       error = cur->bc_ops->free_block(cur, bp);
+       if (!error) {
+               xfs_trans_binval(cur->bc_tp, bp);
+               XFS_BTREE_STATS_INC(cur, free);
+       }
+       return error;
+}
+
 /*
  * Delete the btree cursor.
  */
@@ -3209,6 +3224,7 @@ xfs_btree_kill_iroot(
        int                     level;
        int                     index;
        int                     numrecs;
+       int                     error;
 #ifdef DEBUG
        union xfs_btree_ptr     ptr;
        int                     i;
@@ -3272,8 +3288,6 @@ xfs_btree_kill_iroot(
        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
 #ifdef DEBUG
        for (i = 0; i < numrecs; i++) {
-               int             error;
-
                error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
                if (error) {
                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
@@ -3283,8 +3297,11 @@ xfs_btree_kill_iroot(
 #endif
        xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
 
-       cur->bc_ops->free_block(cur, cbp);
-       XFS_BTREE_STATS_INC(cur, free);
+       error = xfs_btree_free_block(cur, cbp);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
 
        cur->bc_bufs[level - 1] = NULL;
        be16_add_cpu(&block->bb_level, -1);
@@ -3317,14 +3334,12 @@ xfs_btree_kill_root(
         */
        cur->bc_ops->set_root(cur, newroot, -1);
 
-       error = cur->bc_ops->free_block(cur, bp);
+       error = xfs_btree_free_block(cur, bp);
        if (error) {
                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
                return error;
        }
 
-       XFS_BTREE_STATS_INC(cur, free);
-
        cur->bc_bufs[level] = NULL;
        cur->bc_ra[level] = 0;
        cur->bc_nlevels--;
@@ -3830,10 +3845,9 @@ xfs_btree_delrec(
        }
 
        /* Free the deleted block. */
-       error = cur->bc_ops->free_block(cur, rbp);
+       error = xfs_btree_free_block(cur, rbp);
        if (error)
                goto error0;
-       XFS_BTREE_STATS_INC(cur, free);
 
        /*
         * If we joined with the left neighbor, set the buffer in the
index b14bbd6bb05fad090571bcada4e4867e35040b87..8d4d8bce41bf7873fec0fc8211801207a0a46494 100644 (file)
@@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
  */
 #define XFS_ATTR_LEAF_MAPSIZE  3       /* how many freespace slots */
 
+/*
+ * Entries are packed toward the top as tight as possible.
+ */
+typedef struct xfs_attr_shortform {
+       struct xfs_attr_sf_hdr {        /* constant-structure header block */
+               __be16  totsize;        /* total bytes in shortform list */
+               __u8    count;  /* count of active entries */
+       } hdr;
+       struct xfs_attr_sf_entry {
+               __uint8_t namelen;      /* actual length of name (no NULL) */
+               __uint8_t valuelen;     /* actual length of value (no NULL) */
+               __uint8_t flags;        /* flags bits (see xfs_attr_leaf.h) */
+               __uint8_t nameval[1];   /* name & value bytes concatenated */
+       } list[1];                      /* variable sized array */
+} xfs_attr_shortform_t;
+
 typedef struct xfs_attr_leaf_map {     /* RLE map of free bytes */
        __be16  base;                     /* base of free region */
        __be16  size;                     /* length of free region */
index 2fb53a5c0a745259d2e18164e8505cb21d704724..af0f9d171f8a012758d778a0bd105e51448e5cf3 100644 (file)
@@ -176,7 +176,7 @@ xfs_dir_isempty(
 {
        xfs_dir2_sf_hdr_t       *sfp;
 
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        if (dp->i_d.di_size == 0)       /* might happen during shutdown. */
                return 1;
        if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -231,7 +231,7 @@ xfs_dir_init(
        struct xfs_da_args *args;
        int             error;
 
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
        if (error)
                return error;
@@ -266,7 +266,7 @@ xfs_dir_createname(
        int                     rval;
        int                     v;              /* type-checking value */
 
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        if (inum) {
                rval = xfs_dir_ino_validate(tp->t_mountp, inum);
                if (rval)
@@ -364,7 +364,7 @@ xfs_dir_lookup(
        int             v;              /* type-checking value */
        int             lock_mode;
 
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
 
        /*
@@ -443,7 +443,7 @@ xfs_dir_removename(
        int             rval;
        int             v;              /* type-checking value */
 
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        XFS_STATS_INC(dp->i_mount, xs_dir_remove);
 
        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -505,7 +505,7 @@ xfs_dir_replace(
        int             rval;
        int             v;              /* type-checking value */
 
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
 
        rval = xfs_dir_ino_validate(tp->t_mountp, inum);
        if (rval)
index 63ee03db796ca9a55b9f9f10a427a6fd539c30d4..75a557432d0f87fdb4cfa866332c403023d169dc 100644 (file)
@@ -2235,6 +2235,9 @@ xfs_dir2_node_trim_free(
 
        dp = args->dp;
        tp = args->trans;
+
+       *rvalp = 0;
+
        /*
         * Read the freespace block.
         */
@@ -2255,7 +2258,6 @@ xfs_dir2_node_trim_free(
         */
        if (freehdr.nused > 0) {
                xfs_trans_brelse(tp, bp);
-               *rvalp = 0;
                return 0;
        }
        /*
index 66d702e6b9ff3f7d6cc5a0b47a216171815e9fa4..22297f9b0fd52c8a6d3cc75bb0ac699bd985b0f0 100644 (file)
@@ -2403,8 +2403,8 @@ xfs_ialloc_compute_maxlevels(
 
        maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
                XFS_INODES_PER_CHUNK_LOG;
-       minleafrecs = mp->m_alloc_mnr[0];
-       minnoderecs = mp->m_alloc_mnr[1];
+       minleafrecs = mp->m_inobt_mnr[0];
+       minnoderecs = mp->m_inobt_mnr[1];
        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
        for (level = 1; maxblocks > 1; level++)
                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
index c679f3c05b63cb535de34e7de5543a75ac57faac..89c21d771e35edbc026eb7fe7cb373280774b162 100644 (file)
@@ -125,16 +125,8 @@ xfs_inobt_free_block(
        struct xfs_btree_cur    *cur,
        struct xfs_buf          *bp)
 {
-       xfs_fsblock_t           fsbno;
-       int                     error;
-
-       fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
-       error = xfs_free_extent(cur->bc_tp, fsbno, 1);
-       if (error)
-               return error;
-
-       xfs_trans_binval(cur->bc_tp, bp);
-       return error;
+       return xfs_free_extent(cur->bc_tp,
+                       XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
 }
 
 STATIC int
index 1aabfda669b0bb7bb85dbc8d9889ffd452d17c79..9d9559eb2835a33621e568392fab2c1074022da3 100644 (file)
@@ -195,28 +195,50 @@ xfs_imap_to_bp(
 }
 
 void
-xfs_dinode_from_disk(
-       xfs_icdinode_t          *to,
-       xfs_dinode_t            *from)
+xfs_inode_from_disk(
+       struct xfs_inode        *ip,
+       struct xfs_dinode       *from)
 {
-       to->di_magic = be16_to_cpu(from->di_magic);
-       to->di_mode = be16_to_cpu(from->di_mode);
-       to->di_version = from ->di_version;
+       struct xfs_icdinode     *to = &ip->i_d;
+       struct inode            *inode = VFS_I(ip);
+
+
+       /*
+        * Convert v1 inodes immediately to v2 inode format as this is the
+        * minimum inode version format we support in the rest of the code.
+        */
+       to->di_version = from->di_version;
+       if (to->di_version == 1) {
+               set_nlink(inode, be16_to_cpu(from->di_onlink));
+               to->di_projid_lo = 0;
+               to->di_projid_hi = 0;
+               to->di_version = 2;
+       } else {
+               set_nlink(inode, be32_to_cpu(from->di_nlink));
+               to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+               to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+       }
+
        to->di_format = from->di_format;
-       to->di_onlink = be16_to_cpu(from->di_onlink);
        to->di_uid = be32_to_cpu(from->di_uid);
        to->di_gid = be32_to_cpu(from->di_gid);
-       to->di_nlink = be32_to_cpu(from->di_nlink);
-       to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
-       to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
-       memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
        to->di_flushiter = be16_to_cpu(from->di_flushiter);
-       to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
-       to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
-       to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
-       to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
-       to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
-       to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
+
+       /*
+        * Time is signed, so need to convert to signed 32 bit before
+        * storing in inode timestamp which may be 64 bit. Otherwise
+        * a time before epoch is converted to a time long after epoch
+        * on 64 bit systems.
+        */
+       inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
+       inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
+       inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
+       inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
+       inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
+       inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
+       inode->i_generation = be32_to_cpu(from->di_gen);
+       inode->i_mode = be16_to_cpu(from->di_mode);
+
        to->di_size = be64_to_cpu(from->di_size);
        to->di_nblocks = be64_to_cpu(from->di_nblocks);
        to->di_extsize = be32_to_cpu(from->di_extsize);
@@ -227,42 +249,96 @@ xfs_dinode_from_disk(
        to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
        to->di_dmstate  = be16_to_cpu(from->di_dmstate);
        to->di_flags    = be16_to_cpu(from->di_flags);
-       to->di_gen      = be32_to_cpu(from->di_gen);
 
        if (to->di_version == 3) {
-               to->di_changecount = be64_to_cpu(from->di_changecount);
+               inode->i_version = be64_to_cpu(from->di_changecount);
                to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
                to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
                to->di_flags2 = be64_to_cpu(from->di_flags2);
-               to->di_ino = be64_to_cpu(from->di_ino);
-               to->di_lsn = be64_to_cpu(from->di_lsn);
-               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
-               uuid_copy(&to->di_uuid, &from->di_uuid);
        }
 }
 
 void
-xfs_dinode_to_disk(
-       xfs_dinode_t            *to,
-       xfs_icdinode_t          *from)
+xfs_inode_to_disk(
+       struct xfs_inode        *ip,
+       struct xfs_dinode       *to,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_icdinode     *from = &ip->i_d;
+       struct inode            *inode = VFS_I(ip);
+
+       to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+       to->di_onlink = 0;
+
+       to->di_version = from->di_version;
+       to->di_format = from->di_format;
+       to->di_uid = cpu_to_be32(from->di_uid);
+       to->di_gid = cpu_to_be32(from->di_gid);
+       to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+       to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+
+       memset(to->di_pad, 0, sizeof(to->di_pad));
+       to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
+       to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
+       to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
+       to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
+       to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
+       to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+       to->di_nlink = cpu_to_be32(inode->i_nlink);
+       to->di_gen = cpu_to_be32(inode->i_generation);
+       to->di_mode = cpu_to_be16(inode->i_mode);
+
+       to->di_size = cpu_to_be64(from->di_size);
+       to->di_nblocks = cpu_to_be64(from->di_nblocks);
+       to->di_extsize = cpu_to_be32(from->di_extsize);
+       to->di_nextents = cpu_to_be32(from->di_nextents);
+       to->di_anextents = cpu_to_be16(from->di_anextents);
+       to->di_forkoff = from->di_forkoff;
+       to->di_aformat = from->di_aformat;
+       to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+       to->di_dmstate = cpu_to_be16(from->di_dmstate);
+       to->di_flags = cpu_to_be16(from->di_flags);
+
+       if (from->di_version == 3) {
+               to->di_changecount = cpu_to_be64(inode->i_version);
+               to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
+               to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+               to->di_flags2 = cpu_to_be64(from->di_flags2);
+
+               to->di_ino = cpu_to_be64(ip->i_ino);
+               to->di_lsn = cpu_to_be64(lsn);
+               memset(to->di_pad2, 0, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
+               to->di_flushiter = 0;
+       } else {
+               to->di_flushiter = cpu_to_be16(from->di_flushiter);
+       }
+}
+
+void
+xfs_log_dinode_to_disk(
+       struct xfs_log_dinode   *from,
+       struct xfs_dinode       *to)
 {
        to->di_magic = cpu_to_be16(from->di_magic);
        to->di_mode = cpu_to_be16(from->di_mode);
-       to->di_version = from ->di_version;
+       to->di_version = from->di_version;
        to->di_format = from->di_format;
-       to->di_onlink = cpu_to_be16(from->di_onlink);
+       to->di_onlink = 0;
        to->di_uid = cpu_to_be32(from->di_uid);
        to->di_gid = cpu_to_be32(from->di_gid);
        to->di_nlink = cpu_to_be32(from->di_nlink);
        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+
        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
        to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
        to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
        to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
        to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
        to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
+
        to->di_size = cpu_to_be64(from->di_size);
        to->di_nblocks = cpu_to_be64(from->di_nblocks);
        to->di_extsize = cpu_to_be32(from->di_extsize);
@@ -367,13 +443,10 @@ xfs_iread(
            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
                /* initialise the on-disk inode core */
                memset(&ip->i_d, 0, sizeof(ip->i_d));
-               ip->i_d.di_magic = XFS_DINODE_MAGIC;
-               ip->i_d.di_gen = prandom_u32();
-               if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               VFS_I(ip)->i_generation = prandom_u32();
+               if (xfs_sb_version_hascrc(&mp->m_sb))
                        ip->i_d.di_version = 3;
-                       ip->i_d.di_ino = ip->i_ino;
-                       uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid);
-               } else
+               else
                        ip->i_d.di_version = 2;
                return 0;
        }
@@ -403,7 +476,7 @@ xfs_iread(
         * Otherwise, just get the truly permanent information.
         */
        if (dip->di_mode) {
-               xfs_dinode_from_disk(&ip->i_d, dip);
+               xfs_inode_from_disk(ip, dip);
                error = xfs_iformat_fork(ip, dip);
                if (error)  {
 #ifdef DEBUG
@@ -417,16 +490,10 @@ xfs_iread(
                 * Partial initialisation of the in-core inode. Just the bits
                 * that xfs_ialloc won't overwrite or relies on being correct.
                 */
-               ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
                ip->i_d.di_version = dip->di_version;
-               ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
+               VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
                ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
 
-               if (dip->di_version == 3) {
-                       ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
-                       uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
-               }
-
                /*
                 * Make sure to pull in the mode here as well in
                 * case the inode is released without being used.
@@ -434,25 +501,10 @@ xfs_iread(
                 * the inode is already free and not try to mess
                 * with the uninitialized part of it.
                 */
-               ip->i_d.di_mode = 0;
-       }
-
-       /*
-        * Automatically convert version 1 inode formats in memory to version 2
-        * inode format. If the inode is modified, it will get logged and
-        * rewritten as a version 2 inode. We can do this because we set the
-        * superblock feature bit for v2 inodes unconditionally during mount
-        * and it means the reast of the code can assume the inode version is 2
-        * or higher.
-        */
-       if (ip->i_d.di_version == 1) {
-               ip->i_d.di_version = 2;
-               memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
-               ip->i_d.di_nlink = ip->i_d.di_onlink;
-               ip->i_d.di_onlink = 0;
-               xfs_set_projid(ip, 0);
+               VFS_I(ip)->i_mode = 0;
        }
 
+       ASSERT(ip->i_d.di_version >= 2);
        ip->i_delayed_blks = 0;
 
        /*
index 9308c47f2a527dc08b75b66de5d064e0b13e0cfe..7c4dd321b2152915c2d9075222b4d757a08539cf 100644 (file)
 
 struct xfs_inode;
 struct xfs_dinode;
-struct xfs_icdinode;
+
+/*
+ * In memory representation of the XFS inode. This is held in the in-core struct
+ * xfs_inode and represents the current on disk values but the structure is not
+ * in on-disk format.  That is, this structure is always translated to on-disk
+ * format specific structures at the appropriate time.
+ */
+struct xfs_icdinode {
+       __int8_t        di_version;     /* inode version */
+       __int8_t        di_format;      /* format of di_c data */
+       __uint16_t      di_flushiter;   /* incremented on flush */
+       __uint32_t      di_uid;         /* owner's user id */
+       __uint32_t      di_gid;         /* owner's group id */
+       __uint16_t      di_projid_lo;   /* lower part of owner's project id */
+       __uint16_t      di_projid_hi;   /* higher part of owner's project id */
+       xfs_fsize_t     di_size;        /* number of bytes in file */
+       xfs_rfsblock_t  di_nblocks;     /* # of direct & btree blocks used */
+       xfs_extlen_t    di_extsize;     /* basic/minimum extent size for file */
+       xfs_extnum_t    di_nextents;    /* number of extents in data fork */
+       xfs_aextnum_t   di_anextents;   /* number of extents in attribute fork*/
+       __uint8_t       di_forkoff;     /* attr fork offs, <<3 for 64b align */
+       __int8_t        di_aformat;     /* format of attr fork's data */
+       __uint32_t      di_dmevmask;    /* DMIG event mask */
+       __uint16_t      di_dmstate;     /* DMIG state info */
+       __uint16_t      di_flags;       /* random flags, XFS_DIFLAG_... */
+
+       __uint64_t      di_flags2;      /* more random flags */
+
+       xfs_ictimestamp_t di_crtime;    /* time created */
+};
 
 /*
  * Inode location information.  Stored in the inode and passed to
@@ -38,8 +67,11 @@ int  xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
 int    xfs_iread(struct xfs_mount *, struct xfs_trans *,
                  struct xfs_inode *, uint);
 void   xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
-void   xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
-void   xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
+void   xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
+                         xfs_lsn_t lsn);
+void   xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
+void   xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
+                              struct xfs_dinode *to);
 
 #if defined(DEBUG)
 void   xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
index 0defbd02f62d58bb36e62f2cc4738cf862de4e1c..11faf7df14c8099e49759f51f0315dd5caec6632 100644 (file)
@@ -31,6 +31,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_attr_sf.h"
+#include "xfs_da_format.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
@@ -120,7 +121,7 @@ xfs_iformat_fork(
                return -EFSCORRUPTED;
        }
 
-       switch (ip->i_d.di_mode & S_IFMT) {
+       switch (VFS_I(ip)->i_mode & S_IFMT) {
        case S_IFIFO:
        case S_IFCHR:
        case S_IFBLK:
index 2653146904153178d474172bcacfec55e7742907..d54a8018b079dd3f0c078e5fdf56cf48a151a545 100644 (file)
@@ -290,6 +290,7 @@ typedef struct xfs_inode_log_format_64 {
        __int32_t               ilf_boffset;    /* off of inode in buffer */
 } xfs_inode_log_format_64_t;
 
+
 /*
  * Flags for xfs_trans_log_inode flags field.
  */
@@ -360,15 +361,15 @@ typedef struct xfs_ictimestamp {
 } xfs_ictimestamp_t;
 
 /*
- * NOTE:  This structure must be kept identical to struct xfs_dinode
- *       except for the endianness annotations.
+ * Define the format of the inode core that is logged. This structure must be
+ * kept identical to struct xfs_dinode except for the endianness annotations.
  */
-typedef struct xfs_icdinode {
+struct xfs_log_dinode {
        __uint16_t      di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
        __uint16_t      di_mode;        /* mode and type of file */
        __int8_t        di_version;     /* inode version */
        __int8_t        di_format;      /* format of di_c data */
-       __uint16_t      di_onlink;      /* old number of links to file */
+       __uint8_t       di_pad3[2];     /* unused in v2/3 inodes */
        __uint32_t      di_uid;         /* owner's user id */
        __uint32_t      di_gid;         /* owner's group id */
        __uint32_t      di_nlink;       /* number of links to file */
@@ -407,13 +408,13 @@ typedef struct xfs_icdinode {
        uuid_t          di_uuid;        /* UUID of the filesystem */
 
        /* structure must be padded to 64 bit alignment */
-} xfs_icdinode_t;
+};
 
-static inline uint xfs_icdinode_size(int version)
+static inline uint xfs_log_dinode_size(int version)
 {
        if (version == 3)
-               return sizeof(struct xfs_icdinode);
-       return offsetof(struct xfs_icdinode, di_next_unlinked);
+               return sizeof(struct xfs_log_dinode);
+       return offsetof(struct xfs_log_dinode, di_next_unlinked);
 }
 
 /*
@@ -495,6 +496,8 @@ enum xfs_blft {
        XFS_BLFT_ATTR_LEAF_BUF,
        XFS_BLFT_ATTR_RMT_BUF,
        XFS_BLFT_SB_BUF,
+       XFS_BLFT_RTBITMAP_BUF,
+       XFS_BLFT_RTSUMMARY_BUF,
        XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
 };
 
index f51078f1e92ad4e29b6dbe4c9e4fe8f46f9b2be6..8eed51275bb39b4466f8457b3dc7ba0aa592a583 100644 (file)
@@ -37,7 +37,7 @@ typedef __uint16_t    xfs_qwarncnt_t;
 #define XFS_DQ_PROJ            0x0002          /* project quota */
 #define XFS_DQ_GROUP           0x0004          /* a group quota */
 #define XFS_DQ_DIRTY           0x0008          /* dquot is dirty */
-#define XFS_DQ_FREEING         0x0010          /* dquot is beeing torn down */
+#define XFS_DQ_FREEING         0x0010          /* dquot is being torn down */
 
 #define XFS_DQ_ALLTYPES                (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
 
@@ -116,6 +116,7 @@ typedef __uint16_t  xfs_qwarncnt_t;
 #define XFS_QMOPT_DQREPAIR     0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA       0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC       0x0004000 /* enospc instead of edquot (prj) */
+#define XFS_QMOPT_DQNEXT       0x0008000 /* return next dquot >= this ID */
 
 /*
  * flags to xfs_trans_mod_dquot to indicate which field needs to be
index 9b59ffa1fc198d4934a575af40716837bc54c11b..951c044e24e40d024e5abf48057c0f0942111acc 100644 (file)
  * Realtime allocator bitmap functions shared with userspace.
  */
 
+/*
+ * Real time buffers need verifiers to avoid runtime warnings during IO.
+ * We don't have anything to verify, however, so these are just dummy
+ * operations.
+ */
+static void
+xfs_rtbuf_verify_read(
+       struct xfs_buf  *bp)
+{
+       return;
+}
+
+static void
+xfs_rtbuf_verify_write(
+       struct xfs_buf  *bp)
+{
+       return;
+}
+
+const struct xfs_buf_ops xfs_rtbuf_ops = {
+       .name = "rtbuf",
+       .verify_read = xfs_rtbuf_verify_read,
+       .verify_write = xfs_rtbuf_verify_write,
+};
+
 /*
  * Get a buffer for the bitmap or summary file block specified.
  * The buffer is returned read and locked.
@@ -68,9 +93,12 @@ xfs_rtbuf_get(
        ASSERT(map.br_startblock != NULLFSBLOCK);
        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                                   XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                                  mp->m_bsize, 0, &bp, NULL);
+                                  mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
        if (error)
                return error;
+
+       xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
+                                            : XFS_BLFT_RTBITMAP_BUF);
        *bpp = bp;
        return 0;
 }
@@ -983,7 +1011,7 @@ xfs_rtfree_extent(
            mp->m_sb.sb_rextents) {
                if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
                        mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
-               *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
+               *(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
                xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
        }
        return 0;
index b25bb9a343f33f99ca2bf4392d696c59f80178b4..961e6475a3099bb9acf2c5df67f355f35ffbb3c7 100644 (file)
@@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
 extern void    xfs_perag_put(struct xfs_perag *pag);
 extern int     xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
 
-extern void    xfs_sb_calc_crc(struct xfs_buf *bp);
 extern void    xfs_log_sb(struct xfs_trans *tp);
 extern int     xfs_sync_sb(struct xfs_mount *mp, bool wait);
 extern void    xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
index 15c3ceb845b91a31353a21123450c820fef26c49..81ac870834da9e63515553e3fa291318acd1e73a 100644 (file)
@@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
 extern const struct xfs_buf_ops xfs_sb_buf_ops;
 extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
 extern const struct xfs_buf_ops xfs_symlink_buf_ops;
+extern const struct xfs_buf_ops xfs_rtbuf_ops;
 
 /*
  * Transaction types.  Used to distinguish types of buffers. These never reach
index 5c57b7b40728904e0ad9c5641563df57400e76f9..d445a64b979e963dccbef0a721989c112351a97e 100644 (file)
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 
+/* flags for direct write completions */
+#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
+#define XFS_DIO_FLAG_APPEND    (1 << 1)
+
+/*
+ * structure owned by writepages passed to individual writepage calls
+ */
+struct xfs_writepage_ctx {
+       struct xfs_bmbt_irec    imap;
+       bool                    imap_valid;
+       unsigned int            io_type;
+       struct xfs_ioend        *ioend;
+       sector_t                last_block;
+};
+
 void
 xfs_count_page_state(
        struct page             *page,
@@ -214,10 +229,12 @@ xfs_end_io(
        struct xfs_inode *ip = XFS_I(ioend->io_inode);
        int             error = 0;
 
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+       /*
+        * Set an error if the mount has shut down and proceed with end I/O
+        * processing so it can perform whatever cleanups are necessary.
+        */
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                ioend->io_error = -EIO;
-               goto done;
-       }
 
        /*
         * For unwritten extents we need to issue transactions to convert a
@@ -265,7 +282,7 @@ xfs_alloc_ioend(
         */
        atomic_set(&ioend->io_remaining, 1);
        ioend->io_error = 0;
-       ioend->io_list = NULL;
+       INIT_LIST_HEAD(&ioend->io_list);
        ioend->io_type = type;
        ioend->io_inode = inode;
        ioend->io_buffer_head = NULL;
@@ -283,8 +300,7 @@ xfs_map_blocks(
        struct inode            *inode,
        loff_t                  offset,
        struct xfs_bmbt_irec    *imap,
-       int                     type,
-       int                     nonblocking)
+       int                     type)
 {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
@@ -300,12 +316,7 @@ xfs_map_blocks(
        if (type == XFS_IO_UNWRITTEN)
                bmapi_flags |= XFS_BMAPI_IGSTATE;
 
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-               if (nonblocking)
-                       return -EAGAIN;
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-       }
-
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
        ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
               (ip->i_df.if_flags & XFS_IFEXTENTS));
        ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -341,7 +352,7 @@ xfs_map_blocks(
        return 0;
 }
 
-STATIC int
+STATIC bool
 xfs_imap_valid(
        struct inode            *inode,
        struct xfs_bmbt_irec    *imap,
@@ -414,8 +425,7 @@ xfs_start_buffer_writeback(
 STATIC void
 xfs_start_page_writeback(
        struct page             *page,
-       int                     clear_dirty,
-       int                     buffers)
+       int                     clear_dirty)
 {
        ASSERT(PageLocked(page));
        ASSERT(!PageWriteback(page));
@@ -434,10 +444,6 @@ xfs_start_page_writeback(
                set_page_writeback_keepwrite(page);
 
        unlock_page(page);
-
-       /* If no buffers on the page are to be written, finish it here */
-       if (!buffers)
-               end_page_writeback(page);
 }
 
 static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
@@ -446,153 +452,101 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 }
 
 /*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
+ * Submit all of the bios for an ioend. We are only passed a single ioend at a
+ * time; the caller is responsible for chaining prior to submission.
  *
  * If @fail is non-zero, it means that we have a situation where some part of
  * the submission process has failed after we have marked paged for writeback
  * and unlocked them. In this situation, we need to fail the ioend chain rather
  * than submit it to IO. This typically only happens on a filesystem shutdown.
  */
-STATIC void
+STATIC int
 xfs_submit_ioend(
        struct writeback_control *wbc,
        xfs_ioend_t             *ioend,
-       int                     fail)
+       int                     status)
 {
-       xfs_ioend_t             *head = ioend;
-       xfs_ioend_t             *next;
        struct buffer_head      *bh;
        struct bio              *bio;
        sector_t                lastblock = 0;
 
-       /* Pass 1 - start writeback */
-       do {
-               next = ioend->io_list;
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
-                       xfs_start_buffer_writeback(bh);
-       } while ((ioend = next) != NULL);
+       /* Reserve log space if we might write beyond the on-disk inode size. */
+       if (!status &&
+            ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+               status = xfs_setfilesize_trans_alloc(ioend);
+       /*
+        * If we are failing the IO now, just mark the ioend with an
+        * error and finish it. This will run IO completion immediately
+        * as there is only one reference to the ioend at this point in
+        * time.
+        */
+       if (status) {
+               ioend->io_error = status;
+               xfs_finish_ioend(ioend);
+               return status;
+       }
 
-       /* Pass 2 - submit I/O */
-       ioend = head;
-       do {
-               next = ioend->io_list;
-               bio = NULL;
+       bio = NULL;
+       for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
 
-               /*
-                * If we are failing the IO now, just mark the ioend with an
-                * error and finish it. This will run IO completion immediately
-                * as there is only one reference to the ioend at this point in
-                * time.
-                */
-               if (fail) {
-                       ioend->io_error = fail;
-                       xfs_finish_ioend(ioend);
-                       continue;
+               if (!bio) {
+retry:
+                       bio = xfs_alloc_ioend_bio(bh);
+               } else if (bh->b_blocknr != lastblock + 1) {
+                       xfs_submit_ioend_bio(wbc, ioend, bio);
+                       goto retry;
                }
 
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
-                       if (!bio) {
- retry:
-                               bio = xfs_alloc_ioend_bio(bh);
-                       } else if (bh->b_blocknr != lastblock + 1) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       lastblock = bh->b_blocknr;
-               }
-               if (bio)
+               if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
                        xfs_submit_ioend_bio(wbc, ioend, bio);
-               xfs_finish_ioend(ioend);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too.  Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh, *next_bh;
-
-       do {
-               next = ioend->io_list;
-               bh = ioend->io_buffer_head;
-               do {
-                       next_bh = bh->b_private;
-                       clear_buffer_async_write(bh);
-                       /*
-                        * The unwritten flag is cleared when added to the
-                        * ioend. We're not submitting for I/O so mark the
-                        * buffer unwritten again for next time around.
-                        */
-                       if (ioend->io_type == XFS_IO_UNWRITTEN)
-                               set_buffer_unwritten(bh);
-                       unlock_buffer(bh);
-               } while ((bh = next_bh) != NULL);
+                       goto retry;
+               }
 
-               mempool_free(ioend, xfs_ioend_pool);
-       } while ((ioend = next) != NULL);
+               lastblock = bh->b_blocknr;
+       }
+       if (bio)
+               xfs_submit_ioend_bio(wbc, ioend, bio);
+       xfs_finish_ioend(ioend);
+       return 0;
 }
 
 /*
  * Test to see if we've been building up a completion structure for
  * earlier buffers -- if so, we try to append to this ioend if we
  * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
+ * Return the ioend we finished off so that the caller can submit it
+ * once it has finished processing the dirty page.
  */
 STATIC void
 xfs_add_to_ioend(
        struct inode            *inode,
        struct buffer_head      *bh,
        xfs_off_t               offset,
-       unsigned int            type,
-       xfs_ioend_t             **result,
-       int                     need_ioend)
+       struct xfs_writepage_ctx *wpc,
+       struct list_head        *iolist)
 {
-       xfs_ioend_t             *ioend = *result;
-
-       if (!ioend || need_ioend || type != ioend->io_type) {
-               xfs_ioend_t     *previous = *result;
-
-               ioend = xfs_alloc_ioend(inode, type);
-               ioend->io_offset = offset;
-               ioend->io_buffer_head = bh;
-               ioend->io_buffer_tail = bh;
-               if (previous)
-                       previous->io_list = ioend;
-               *result = ioend;
+       if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
+           bh->b_blocknr != wpc->last_block + 1 ||
+           offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
+               struct xfs_ioend        *new;
+
+               if (wpc->ioend)
+                       list_add(&wpc->ioend->io_list, iolist);
+
+               new = xfs_alloc_ioend(inode, wpc->io_type);
+               new->io_offset = offset;
+               new->io_buffer_head = bh;
+               new->io_buffer_tail = bh;
+               wpc->ioend = new;
        } else {
-               ioend->io_buffer_tail->b_private = bh;
-               ioend->io_buffer_tail = bh;
+               wpc->ioend->io_buffer_tail->b_private = bh;
+               wpc->ioend->io_buffer_tail = bh;
        }
 
        bh->b_private = NULL;
-       ioend->io_size += bh->b_size;
+       wpc->ioend->io_size += bh->b_size;
+       wpc->last_block = bh->b_blocknr;
+       xfs_start_buffer_writeback(bh);
 }
 
 STATIC void
@@ -678,183 +632,6 @@ xfs_check_page_type(
        return false;
 }
 
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
-       struct inode            *inode,
-       struct page             *page,
-       loff_t                  tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc)
-{
-       struct buffer_head      *bh, *head;
-       xfs_off_t               end_offset;
-       unsigned long           p_offset;
-       unsigned int            type;
-       int                     len, page_dirty;
-       int                     count = 0, done = 0, uptodate = 1;
-       xfs_off_t               offset = page_offset(page);
-
-       if (page->index != tindex)
-               goto fail;
-       if (!trylock_page(page))
-               goto fail;
-       if (PageWriteback(page))
-               goto fail_unlock_page;
-       if (page->mapping != inode->i_mapping)
-               goto fail_unlock_page;
-       if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
-               goto fail_unlock_page;
-
-       /*
-        * page_dirty is initially a count of buffers on the page before
-        * EOF and is decremented as we move each into a cleanable state.
-        *
-        * Derivation:
-        *
-        * End offset is the highest offset that this page should represent.
-        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
-        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
-        * hence give us the correct page_dirty count. On any other page,
-        * it will be zero and in that case we need page_dirty to be the
-        * count of buffers on the page.
-        */
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       i_size_read(inode));
-
-       /*
-        * If the current map does not span the entire page we are about to try
-        * to write, then give up. The only way we can write a page that spans
-        * multiple mappings in a single writeback iteration is via the
-        * xfs_vm_writepage() function. Data integrity writeback requires the
-        * entire page to be written in a single attempt, otherwise the part of
-        * the page we don't write here doesn't get written as part of the data
-        * integrity sync.
-        *
-        * For normal writeback, we also don't attempt to write partial pages
-        * here as it simply means that write_cache_pages() will see it under
-        * writeback and ignore the page until some point in the future, at
-        * which time this will be the only page in the file that needs
-        * writeback.  Hence for more optimal IO patterns, we should always
-        * avoid partial page writeback due to multiple mappings on a page here.
-        */
-       if (!xfs_imap_valid(inode, imap, end_offset))
-               goto fail_unlock_page;
-
-       len = 1 << inode->i_blkbits;
-       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-                                       PAGE_CACHE_SIZE);
-       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
-       page_dirty = p_offset / len;
-
-       /*
-        * The moment we find a buffer that doesn't match our current type
-        * specification or can't be written, abort the loop and start
-        * writeback. As per the above xfs_imap_valid() check, only
-        * xfs_vm_writepage() can handle partial page writeback fully - we are
-        * limited here to the buffers that are contiguous with the current
-        * ioend, and hence a buffer we can't write breaks that contiguity and
-        * we have to defer the rest of the IO to xfs_vm_writepage().
-        */
-       bh = head = page_buffers(page);
-       do {
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
-                       done = 1;
-                       break;
-               }
-
-               if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                   buffer_mapped(bh)) {
-                       if (buffer_unwritten(bh))
-                               type = XFS_IO_UNWRITTEN;
-                       else if (buffer_delay(bh))
-                               type = XFS_IO_DELALLOC;
-                       else
-                               type = XFS_IO_OVERWRITE;
-
-                       /*
-                        * imap should always be valid because of the above
-                        * partial page end_offset check on the imap.
-                        */
-                       ASSERT(xfs_imap_valid(inode, imap, offset));
-
-                       lock_buffer(bh);
-                       if (type != XFS_IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type,
-                                        ioendp, done);
-
-                       page_dirty--;
-                       count++;
-               } else {
-                       done = 1;
-                       break;
-               }
-       } while (offset += len, (bh = bh->b_this_page) != head);
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       if (count) {
-               if (--wbc->nr_to_write <= 0 &&
-                   wbc->sync_mode == WB_SYNC_NONE)
-                       done = 1;
-       }
-       xfs_start_page_writeback(page, !page_dirty, count);
-
-       return done;
- fail_unlock_page:
-       unlock_page(page);
- fail:
-       return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
-       struct inode            *inode,
-       pgoff_t                 tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc,
-       pgoff_t                 tlast)
-{
-       struct pagevec          pvec;
-       int                     done = 0, i;
-
-       pagevec_init(&pvec, 0);
-       while (!done && tindex <= tlast) {
-               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
-               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
-                       break;
-
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-                                       imap, ioendp, wbc);
-                       if (done)
-                               break;
-               }
-
-               pagevec_release(&pvec);
-               cond_resched();
-       }
-}
-
 STATIC void
 xfs_vm_invalidatepage(
        struct page             *page,
@@ -931,6 +708,164 @@ out_invalidate:
        return;
 }
 
+/*
+ * We implement an immediate ioend submission policy here to avoid needing to
+ * chain multiple ioends and hence nest mempool allocations which can violate
+ * forward progress guarantees we need to provide. The current ioend we are
+ * adding buffers to is cached on the writepage context, and if the new buffer
+ * does not append to the cached ioend it will create a new ioend and cache that
+ * instead.
+ *
+ * If a new ioend is created and cached, the old ioend is returned and queued
+ * locally for submission once the entire page is processed or an error has been
+ * detected.  While ioends are submitted immediately after they are completed,
+ * batching optimisations are provided by higher level block plugging.
+ *
+ * At the end of a writeback pass, there will be a cached ioend remaining on the
+ * writepage context that the caller will need to submit.
+ */
+static int
+xfs_writepage_map(
+       struct xfs_writepage_ctx *wpc,
+       struct writeback_control *wbc,
+       struct inode            *inode,
+       struct page             *page,
+       loff_t                  offset,
+       __uint64_t              end_offset)
+{
+       LIST_HEAD(submit_list);
+       struct xfs_ioend        *ioend, *next;
+       struct buffer_head      *bh, *head;
+       ssize_t                 len = 1 << inode->i_blkbits;
+       int                     error = 0;
+       int                     count = 0;
+       int                     uptodate = 1;
+
+       bh = head = page_buffers(page);
+       offset = page_offset(page);
+       do {
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+
+               /*
+                * set_page_dirty dirties all buffers in a page, independent
+                * of their state.  The dirty state however is entirely
+                * meaningless for holes (!mapped && uptodate), so skip
+                * buffers covering holes here.
+                */
+               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+                       wpc->imap_valid = false;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh)) {
+                       if (wpc->io_type != XFS_IO_UNWRITTEN) {
+                               wpc->io_type = XFS_IO_UNWRITTEN;
+                               wpc->imap_valid = false;
+                       }
+               } else if (buffer_delay(bh)) {
+                       if (wpc->io_type != XFS_IO_DELALLOC) {
+                               wpc->io_type = XFS_IO_DELALLOC;
+                               wpc->imap_valid = false;
+                       }
+               } else if (buffer_uptodate(bh)) {
+                       if (wpc->io_type != XFS_IO_OVERWRITE) {
+                               wpc->io_type = XFS_IO_OVERWRITE;
+                               wpc->imap_valid = false;
+                       }
+               } else {
+                       if (PageUptodate(page))
+                               ASSERT(buffer_mapped(bh));
+                       /*
+                        * This buffer is not uptodate and will not be
+                        * written to disk.  Ensure that we will put any
+                        * subsequent writeable buffers into a new
+                        * ioend.
+                        */
+                       wpc->imap_valid = false;
+                       continue;
+               }
+
+               if (wpc->imap_valid)
+                       wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+                                                        offset);
+               if (!wpc->imap_valid) {
+                       error = xfs_map_blocks(inode, offset, &wpc->imap,
+                                            wpc->io_type);
+                       if (error)
+                               goto out;
+                       wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+                                                        offset);
+               }
+               if (wpc->imap_valid) {
+                       lock_buffer(bh);
+                       if (wpc->io_type != XFS_IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, &wpc->imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
+                       count++;
+               }
+
+       } while (offset += len, ((bh = bh->b_this_page) != head));
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       ASSERT(wpc->ioend || list_empty(&submit_list));
+
+out:
+       /*
+        * On error, we have to fail the ioend here because we have locked
+        * buffers in the ioend. If we don't do this, we'll deadlock
+        * invalidating the page as that tries to lock the buffers on the page.
+        * Also, because we may have set pages under writeback, we have to make
+        * sure we run IO completion to mark the error state of the IO
+        * appropriately, so we can't cancel the ioend directly here. That means
+        * we have to mark this page as under writeback if we included any
+        * buffers from it in the ioend chain so that completion treats it
+        * correctly.
+        *
+        * If we didn't include the page in the ioend, the on error we can
+        * simply discard and unlock it as there are no other users of the page
+        * or it's buffers right now. The caller will still need to trigger
+        * submission of outstanding ioends on the writepage context so they are
+        * treated correctly on error.
+        */
+       if (count) {
+               xfs_start_page_writeback(page, !error);
+
+               /*
+                * Preserve the original error if there was one, otherwise catch
+                * submission errors here and propagate into subsequent ioend
+                * submissions.
+                */
+               list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
+                       int error2;
+
+                       list_del_init(&ioend->io_list);
+                       error2 = xfs_submit_ioend(wbc, ioend, error);
+                       if (error2 && !error)
+                               error = error2;
+               }
+       } else if (error) {
+               xfs_aops_discard_page(page);
+               ClearPageUptodate(page);
+               unlock_page(page);
+       } else {
+               /*
+                * We can end up here with no error and nothing to write if we
+                * race with a partial page truncate on a sub-page block sized
+                * filesystem. In that case we need to mark the page clean.
+                */
+               xfs_start_page_writeback(page, 1);
+               end_page_writeback(page);
+       }
+
+       mapping_set_error(page->mapping, error);
+       return error;
+}
+
 /*
  * Write out a dirty page.
  *
@@ -940,22 +875,16 @@ out_invalidate:
  * For any other dirty buffer heads on the page we should flush them.
  */
 STATIC int
-xfs_vm_writepage(
+xfs_do_writepage(
        struct page             *page,
-       struct writeback_control *wbc)
+       struct writeback_control *wbc,
+       void                    *data)
 {
+       struct xfs_writepage_ctx *wpc = data;
        struct inode            *inode = page->mapping->host;
-       struct buffer_head      *bh, *head;
-       struct xfs_bmbt_irec    imap;
-       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
        loff_t                  offset;
-       unsigned int            type;
        __uint64_t              end_offset;
-       pgoff_t                 end_index, last_index;
-       ssize_t                 len;
-       int                     err, imap_valid = 0, uptodate = 1;
-       int                     count = 0;
-       int                     nonblocking = 0;
+       pgoff_t                 end_index;
 
        trace_xfs_writepage(inode, page, 0, 0);
 
@@ -982,12 +911,9 @@ xfs_vm_writepage(
        if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
                goto redirty;
 
-       /* Is this page beyond the end of the file? */
-       offset = i_size_read(inode);
-       end_index = offset >> PAGE_CACHE_SHIFT;
-       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-
        /*
+        * Is this page beyond the end of the file?
+        *
         * The page index is less than the end_index, adjust the end_offset
         * to the highest offset that this page should represent.
         * -----------------------------------------------------
@@ -998,6 +924,8 @@ xfs_vm_writepage(
         * |     desired writeback range    |      see else    |
         * ---------------------------------^------------------|
         */
+       offset = i_size_read(inode);
+       end_index = offset >> PAGE_CACHE_SHIFT;
        if (page->index < end_index)
                end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
        else {
@@ -1049,152 +977,7 @@ xfs_vm_writepage(
                end_offset = offset;
        }
 
-       len = 1 << inode->i_blkbits;
-
-       bh = head = page_buffers(page);
-       offset = page_offset(page);
-       type = XFS_IO_OVERWRITE;
-
-       if (wbc->sync_mode == WB_SYNC_NONE)
-               nonblocking = 1;
-
-       do {
-               int new_ioend = 0;
-
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-
-               /*
-                * set_page_dirty dirties all buffers in a page, independent
-                * of their state.  The dirty state however is entirely
-                * meaningless for holes (!mapped && uptodate), so skip
-                * buffers covering holes here.
-                */
-               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-                       imap_valid = 0;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh)) {
-                       if (type != XFS_IO_UNWRITTEN) {
-                               type = XFS_IO_UNWRITTEN;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_delay(bh)) {
-                       if (type != XFS_IO_DELALLOC) {
-                               type = XFS_IO_DELALLOC;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_uptodate(bh)) {
-                       if (type != XFS_IO_OVERWRITE) {
-                               type = XFS_IO_OVERWRITE;
-                               imap_valid = 0;
-                       }
-               } else {
-                       if (PageUptodate(page))
-                               ASSERT(buffer_mapped(bh));
-                       /*
-                        * This buffer is not uptodate and will not be
-                        * written to disk.  Ensure that we will put any
-                        * subsequent writeable buffers into a new
-                        * ioend.
-                        */
-                       imap_valid = 0;
-                       continue;
-               }
-
-               if (imap_valid)
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               if (!imap_valid) {
-                       /*
-                        * If we didn't have a valid mapping then we need to
-                        * put the new mapping into a separate ioend structure.
-                        * This ensures non-contiguous extents always have
-                        * separate ioends, which is particularly important
-                        * for unwritten extent conversion at I/O completion
-                        * time.
-                        */
-                       new_ioend = 1;
-                       err = xfs_map_blocks(inode, offset, &imap, type,
-                                            nonblocking);
-                       if (err)
-                               goto error;
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               }
-               if (imap_valid) {
-                       lock_buffer(bh);
-                       if (type != XFS_IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, &imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
-                                        new_ioend);
-                       count++;
-               }
-
-               if (!iohead)
-                       iohead = ioend;
-
-       } while (offset += len, ((bh = bh->b_this_page) != head));
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       xfs_start_page_writeback(page, 1, count);
-
-       /* if there is no IO to be submitted for this page, we are done */
-       if (!ioend)
-               return 0;
-
-       ASSERT(iohead);
-
-       /*
-        * Any errors from this point onwards need tobe reported through the IO
-        * completion path as we have marked the initial page as under writeback
-        * and unlocked it.
-        */
-       if (imap_valid) {
-               xfs_off_t               end_index;
-
-               end_index = imap.br_startoff + imap.br_blockcount;
-
-               /* to bytes */
-               end_index <<= inode->i_blkbits;
-
-               /* to pages */
-               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
-               /* check against file size */
-               if (end_index > last_index)
-                       end_index = last_index;
-
-               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-                                 wbc, end_index);
-       }
-
-
-       /*
-        * Reserve log space if we might write beyond the on-disk inode size.
-        */
-       err = 0;
-       if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
-               err = xfs_setfilesize_trans_alloc(ioend);
-
-       xfs_submit_ioend(wbc, iohead, err);
-
-       return 0;
-
-error:
-       if (iohead)
-               xfs_cancel_ioend(iohead);
-
-       if (err == -EAGAIN)
-               goto redirty;
-
-       xfs_aops_discard_page(page);
-       ClearPageUptodate(page);
-       unlock_page(page);
-       return err;
+       return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
 
 redirty:
        redirty_page_for_writepage(wbc, page);
@@ -1202,17 +985,41 @@ redirty:
        return 0;
 }
 
+STATIC int
+xfs_vm_writepage(
+       struct page             *page,
+       struct writeback_control *wbc)
+{
+       struct xfs_writepage_ctx wpc = {
+               .io_type = XFS_IO_INVALID,
+       };
+       int                     ret;
+
+       ret = xfs_do_writepage(page, wbc, &wpc);
+       if (wpc.ioend)
+               ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
+       return ret;
+}
+
 STATIC int
 xfs_vm_writepages(
        struct address_space    *mapping,
        struct writeback_control *wbc)
 {
+       struct xfs_writepage_ctx wpc = {
+               .io_type = XFS_IO_INVALID,
+       };
+       int                     ret;
+
        xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
        if (dax_mapping(mapping))
                return dax_writeback_mapping_range(mapping,
                                xfs_find_bdev_for_inode(mapping->host), wbc);
 
-       return generic_writepages(mapping, wbc);
+       ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
+       if (wpc.ioend)
+               ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
+       return ret;
 }
 
 /*
@@ -1242,27 +1049,8 @@ xfs_vm_releasepage(
 }
 
 /*
- * When we map a DIO buffer, we may need to attach an ioend that describes the
- * type of write IO we are doing. This passes to the completion function the
- * operations it needs to perform. If the mapping is for an overwrite wholly
- * within the EOF then we don't need an ioend and so we don't allocate one.
- * This avoids the unnecessary overhead of allocating and freeing ioends for
- * workloads that don't require transactions on IO completion.
- *
- * If we get multiple mappings in a single IO, we might be mapping different
- * types. But because the direct IO can only have a single private pointer, we
- * need to ensure that:
- *
- * a) i) the ioend spans the entire region of unwritten mappings; or
- *    ii) the ioend spans all the mappings that cross or are beyond EOF; and
- * b) if it contains unwritten extents, it is *permanently* marked as such
- *
- * We could do this by chaining ioends like buffered IO does, but we only
- * actually get one IO completion callback from the direct IO, and that spans
- * the entire IO regardless of how many mappings and IOs are needed to complete
- * the DIO. There is only going to be one reference to the ioend and its life
- * cycle is constrained by the DIO completion code. hence we don't need
- * reference counting here.
+ * When we map a DIO buffer, we may need to pass flags to
+ * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
  *
  * Note that for DIO, an IO to the highest supported file block offset (i.e.
  * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
@@ -1270,68 +1058,26 @@ xfs_vm_releasepage(
  * extending the file size. We won't know for sure until IO completion is run
  * and the actual max write offset is communicated to the IO completion
  * routine.
- *
- * For DAX page faults, we are preparing to never see unwritten extents here,
- * nor should we ever extend the inode size. Hence we will soon have nothing to
- * do here for this case, ensuring we don't have to provide an IO completion
- * callback to free an ioend that we don't actually need for a fault into the
- * page at offset (2^63 - 1FSB) bytes.
  */
-
 static void
 xfs_map_direct(
        struct inode            *inode,
        struct buffer_head      *bh_result,
        struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset,
-       bool                    dax_fault)
+       xfs_off_t               offset)
 {
-       struct xfs_ioend        *ioend;
+       uintptr_t               *flags = (uintptr_t *)&bh_result->b_private;
        xfs_off_t               size = bh_result->b_size;
-       int                     type;
-
-       if (ISUNWRITTEN(imap))
-               type = XFS_IO_UNWRITTEN;
-       else
-               type = XFS_IO_OVERWRITE;
 
-       trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
-
-       if (dax_fault) {
-               ASSERT(type == XFS_IO_OVERWRITE);
-               trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
-                                           imap);
-               return;
-       }
+       trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
+               ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
 
-       if (bh_result->b_private) {
-               ioend = bh_result->b_private;
-               ASSERT(ioend->io_size > 0);
-               ASSERT(offset >= ioend->io_offset);
-               if (offset + size > ioend->io_offset + ioend->io_size)
-                       ioend->io_size = offset - ioend->io_offset + size;
-
-               if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
-                       ioend->io_type = XFS_IO_UNWRITTEN;
-
-               trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
-                                             ioend->io_size, ioend->io_type,
-                                             imap);
-       } else if (type == XFS_IO_UNWRITTEN ||
-                  offset + size > i_size_read(inode) ||
-                  offset + size < 0) {
-               ioend = xfs_alloc_ioend(inode, type);
-               ioend->io_offset = offset;
-               ioend->io_size = size;
-
-               bh_result->b_private = ioend;
+       if (ISUNWRITTEN(imap)) {
+               *flags |= XFS_DIO_FLAG_UNWRITTEN;
+               set_buffer_defer_completion(bh_result);
+       } else if (offset + size > i_size_read(inode) || offset + size < 0) {
+               *flags |= XFS_DIO_FLAG_APPEND;
                set_buffer_defer_completion(bh_result);
-
-               trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
-                                          imap);
-       } else {
-               trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
-                                           imap);
        }
 }
 
@@ -1502,9 +1248,12 @@ __xfs_get_blocks(
                if (ISUNWRITTEN(&imap))
                        set_buffer_unwritten(bh_result);
                /* direct IO needs special help */
-               if (create && direct)
-                       xfs_map_direct(inode, bh_result, &imap, offset,
-                                      dax_fault);
+               if (create && direct) {
+                       if (dax_fault)
+                               ASSERT(!ISUNWRITTEN(&imap));
+                       else
+                               xfs_map_direct(inode, bh_result, &imap, offset);
+               }
        }
 
        /*
@@ -1574,42 +1323,50 @@ xfs_get_blocks_dax_fault(
        return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
 }
 
-static void
-__xfs_end_io_direct_write(
-       struct inode            *inode,
-       struct xfs_ioend        *ioend,
+/*
+ * Complete a direct I/O write request.
+ *
+ * xfs_map_direct passes us some flags in the private data to tell us what to
+ * do.  If no flags are set, then the write IO is an overwrite wholly within
+ * the existing allocated file size and so there is nothing for us to do.
+ *
+ * Note that in this case the completion can be called in interrupt context,
+ * whereas if we have flags set we will always be called in task context
+ * (i.e. from a workqueue).
+ */
+STATIC int
+xfs_end_io_direct_write(
+       struct kiocb            *iocb,
        loff_t                  offset,
-       ssize_t                 size)
+       ssize_t                 size,
+       void                    *private)
 {
-       struct xfs_mount        *mp = XFS_I(inode)->i_mount;
+       struct inode            *inode = file_inode(iocb->ki_filp);
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       uintptr_t               flags = (uintptr_t)private;
+       int                     error = 0;
 
-       if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
-               goto out_end_io;
+       trace_xfs_end_io_direct_write(ip, offset, size);
 
-       /*
-        * dio completion end_io functions are only called on writes if more
-        * than 0 bytes was written.
-        */
-       ASSERT(size > 0);
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
 
-       /*
-        * The ioend only maps whole blocks, while the IO may be sector aligned.
-        * Hence the ioend offset/size may not match the IO offset/size exactly.
-        * Because we don't map overwrites within EOF into the ioend, the offset
-        * may not match, but only if the endio spans EOF.  Either way, write
-        * the IO sizes into the ioend so that completion processing does the
-        * right thing.
-        */
-       ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
-       ioend->io_size = size;
-       ioend->io_offset = offset;
+       if (size <= 0)
+               return size;
 
        /*
-        * The ioend tells us whether we are doing unwritten extent conversion
+        * The flags tell us whether we are doing unwritten extent conversions
         * or an append transaction that updates the on-disk file size. These
         * cases are the only cases where we should *potentially* be needing
         * to update the VFS inode size.
-        *
+        */
+       if (flags == 0) {
+               ASSERT(offset + size <= i_size_read(inode));
+               return 0;
+       }
+
+       /*
         * We need to update the in-core inode size here so that we don't end up
         * with the on-disk inode size being outside the in-core inode size. We
         * have no other method of updating EOF for AIO, so always do it here
@@ -1620,91 +1377,56 @@ __xfs_end_io_direct_write(
         * here can result in EOF moving backwards and Bad Things Happen when
         * that occurs.
         */
-       spin_lock(&XFS_I(inode)->i_flags_lock);
+       spin_lock(&ip->i_flags_lock);
        if (offset + size > i_size_read(inode))
                i_size_write(inode, offset + size);
-       spin_unlock(&XFS_I(inode)->i_flags_lock);
+       spin_unlock(&ip->i_flags_lock);
 
-       /*
-        * If we are doing an append IO that needs to update the EOF on disk,
-        * do the transaction reserve now so we can use common end io
-        * processing. Stashing the error (if there is one) in the ioend will
-        * result in the ioend processing passing on the error if it is
-        * possible as we can't return it from here.
-        */
-       if (ioend->io_type == XFS_IO_OVERWRITE)
-               ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
+       if (flags & XFS_DIO_FLAG_UNWRITTEN) {
+               trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
 
-out_end_io:
-       xfs_end_io(&ioend->io_work);
-       return;
-}
+               error = xfs_iomap_write_unwritten(ip, offset, size);
+       } else if (flags & XFS_DIO_FLAG_APPEND) {
+               struct xfs_trans *tp;
 
-/*
- * Complete a direct I/O write request.
- *
- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
- * wholly within the EOF and so there is nothing for us to do. Note that in this
- * case the completion can be called in interrupt context, whereas if we have an
- * ioend we will always be called in task context (i.e. from a workqueue).
- */
-STATIC void
-xfs_end_io_direct_write(
-       struct kiocb            *iocb,
-       loff_t                  offset,
-       ssize_t                 size,
-       void                    *private)
-{
-       struct inode            *inode = file_inode(iocb->ki_filp);
-       struct xfs_ioend        *ioend = private;
-
-       trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
-                                    ioend ? ioend->io_type : 0, NULL);
+               trace_xfs_end_io_direct_write_append(ip, offset, size);
 
-       if (!ioend) {
-               ASSERT(offset + size <= i_size_read(inode));
-               return;
+               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
+               if (error) {
+                       xfs_trans_cancel(tp);
+                       return error;
+               }
+               error = xfs_setfilesize(ip, tp, offset, size);
        }
 
-       __xfs_end_io_direct_write(inode, ioend, offset, size);
+       return error;
 }
 
-static inline ssize_t
-xfs_vm_do_dio(
-       struct inode            *inode,
+STATIC ssize_t
+xfs_vm_direct_IO(
        struct kiocb            *iocb,
        struct iov_iter         *iter,
-       loff_t                  offset,
-       void                    (*endio)(struct kiocb   *iocb,
-                                        loff_t         offset,
-                                        ssize_t        size,
-                                        void           *private),
-       int                     flags)
+       loff_t                  offset)
 {
+       struct inode            *inode = iocb->ki_filp->f_mapping->host;
+       dio_iodone_t            *endio = NULL;
+       int                     flags = 0;
        struct block_device     *bdev;
 
-       if (IS_DAX(inode))
+       if (iov_iter_rw(iter) == WRITE) {
+               endio = xfs_end_io_direct_write;
+               flags = DIO_ASYNC_EXTEND;
+       }
+
+       if (IS_DAX(inode)) {
                return dax_do_io(iocb, inode, iter, offset,
                                 xfs_get_blocks_direct, endio, 0);
+       }
 
        bdev = xfs_find_bdev_for_inode(inode);
        return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
-                                    xfs_get_blocks_direct, endio, NULL, flags);
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
-       struct kiocb            *iocb,
-       struct iov_iter         *iter,
-       loff_t                  offset)
-{
-       struct inode            *inode = iocb->ki_filp->f_mapping->host;
-
-       if (iov_iter_rw(iter) == WRITE)
-               return xfs_vm_do_dio(inode, iocb, iter, offset,
-                                    xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
-       return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
+                       xfs_get_blocks_direct, endio, NULL, flags);
 }
 
 /*
@@ -1756,6 +1478,7 @@ xfs_vm_write_failed(
        loff_t                  from = pos & (PAGE_CACHE_SIZE - 1);
        loff_t                  to = from + len;
        struct buffer_head      *bh, *head;
+       struct xfs_mount        *mp = XFS_I(inode)->i_mount;
 
        /*
         * The request pos offset might be 32 or 64 bit, this is all fine
@@ -1787,14 +1510,23 @@ xfs_vm_write_failed(
                if (block_start >= to)
                        break;
 
-               if (!buffer_delay(bh))
+               /*
+                * Process delalloc and unwritten buffers beyond EOF. We can
+                * encounter unwritten buffers in the event that a file has
+                * post-EOF unwritten extents and an extending write happens to
+                * fail (e.g., an unaligned write that also involves a delalloc
+                * to the same page).
+                */
+               if (!buffer_delay(bh) && !buffer_unwritten(bh))
                        continue;
 
-               if (!buffer_new(bh) && block_offset < i_size_read(inode))
+               if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
+                   block_offset < i_size_read(inode))
                        continue;
 
-               xfs_vm_kill_delalloc_range(inode, block_offset,
-                                          block_offset + bh->b_size);
+               if (buffer_delay(bh))
+                       xfs_vm_kill_delalloc_range(inode, block_offset,
+                                                  block_offset + bh->b_size);
 
                /*
                 * This buffer does not contain data anymore. make sure anyone
@@ -1805,6 +1537,7 @@ xfs_vm_write_failed(
                clear_buffer_mapped(bh);
                clear_buffer_new(bh);
                clear_buffer_dirty(bh);
+               clear_buffer_unwritten(bh);
        }
 
 }
@@ -1828,6 +1561,7 @@ xfs_vm_write_begin(
        pgoff_t                 index = pos >> PAGE_CACHE_SHIFT;
        struct page             *page;
        int                     status;
+       struct xfs_mount        *mp = XFS_I(mapping->host)->i_mount;
 
        ASSERT(len <= PAGE_CACHE_SIZE);
 
@@ -1836,6 +1570,8 @@ xfs_vm_write_begin(
                return -ENOMEM;
 
        status = __block_write_begin(page, pos, len, xfs_get_blocks);
+       if (xfs_mp_fail_writes(mp))
+               status = -EIO;
        if (unlikely(status)) {
                struct inode    *inode = mapping->host;
                size_t          isize = i_size_read(inode);
@@ -1848,6 +1584,8 @@ xfs_vm_write_begin(
                 * allocated in this write, not blocks that were previously
                 * written successfully.
                 */
+               if (xfs_mp_fail_writes(mp))
+                       isize = 0;
                if (pos + len > isize) {
                        ssize_t start = max_t(ssize_t, pos, isize);
 
index a4343c63fb38c60336abbaf8741a75c2a9298a4d..b4421177b68dc1ba619625876c05ee8d22caf570 100644 (file)
@@ -24,12 +24,14 @@ extern mempool_t *xfs_ioend_pool;
  * Types of I/O for bmap clustering and I/O completion tracking.
  */
 enum {
+       XFS_IO_INVALID,         /* initial state */
        XFS_IO_DELALLOC,        /* covers delalloc region */
        XFS_IO_UNWRITTEN,       /* covers allocated but uninitialized data */
        XFS_IO_OVERWRITE,       /* covers already allocated extent */
 };
 
 #define XFS_IO_TYPES \
+       { XFS_IO_INVALID,               "invalid" }, \
        { XFS_IO_DELALLOC,              "delalloc" }, \
        { XFS_IO_UNWRITTEN,             "unwritten" }, \
        { XFS_IO_OVERWRITE,             "overwrite" }
@@ -39,7 +41,7 @@ enum {
  * It can manage several multi-page bio's at once.
  */
 typedef struct xfs_ioend {
-       struct xfs_ioend        *io_list;       /* next ioend in chain */
+       struct list_head        io_list;        /* next ioend in chain */
        unsigned int            io_type;        /* delalloc / unwritten */
        int                     io_error;       /* I/O error code */
        atomic_t                io_remaining;   /* hold count */
index 0ef7c2ed3f8a8e30e260485ee130d1a3b008ea07..4fa14820e2e22b687ef852b81e1d6b9f9028caf3 100644 (file)
@@ -202,8 +202,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
                                        sbp->namelen,
                                        sbp->valuelen,
                                        &sbp->name[sbp->namelen]);
-               if (error)
+               if (error) {
+                       kmem_free(sbuf);
                        return error;
+               }
                if (context->seen_enough)
                        break;
                cursor->offset++;
@@ -454,14 +456,13 @@ xfs_attr3_leaf_list_int(
                                args.rmtblkcnt = xfs_attr3_rmt_blocks(
                                                        args.dp->i_mount, valuelen);
                                retval = xfs_attr_rmtval_get(&args);
-                               if (retval)
-                                       return retval;
-                               retval = context->put_listent(context,
-                                               entry->flags,
-                                               name_rmt->name,
-                                               (int)name_rmt->namelen,
-                                               valuelen,
-                                               args.value);
+                               if (!retval)
+                                       retval = context->put_listent(context,
+                                                       entry->flags,
+                                                       name_rmt->name,
+                                                       (int)name_rmt->namelen,
+                                                       valuelen,
+                                                       args.value);
                                kmem_free(args.value);
                        } else {
                                retval = context->put_listent(context,
index 6c876012b2e53246bf38ca3af8dd2d23861151fe..a32c1dcae2ff37b3ee1542fd3fbc5dd74a1e0a86 100644 (file)
@@ -203,10 +203,12 @@ xfs_bmap_rtalloc(
                ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
 
        /*
-        * Lock out other modifications to the RT bitmap inode.
+        * Lock out modifications to both the RT bitmap and summary inodes
         */
        xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+       xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
 
        /*
         * If it's an allocation to an empty file at offset 0,
@@ -822,7 +824,7 @@ bool
 xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
 {
        /* prealloc/delalloc exists only on regular files */
-       if (!S_ISREG(ip->i_d.di_mode))
+       if (!S_ISREG(VFS_I(ip)->i_mode))
                return false;
 
        /*
@@ -1727,7 +1729,7 @@ xfs_swap_extents(
        xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
 
        /* Verify that both files have the same format */
-       if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
+       if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
                error = -EINVAL;
                goto out_unlock;
        }
index 435c7de42e5f322a82845382ad7e1fa54dfe3d0b..9a2191b911377f94e38d81d57d5d037a7e19ae8b 100644 (file)
@@ -650,7 +650,7 @@ xfs_buf_read_map(
        if (bp) {
                trace_xfs_buf_read(bp, flags, _RET_IP_);
 
-               if (!XFS_BUF_ISDONE(bp)) {
+               if (!(bp->b_flags & XBF_DONE)) {
                        XFS_STATS_INC(target->bt_mount, xb_get_read);
                        bp->b_ops = ops;
                        _xfs_buf_read(bp, flags);
index c75721acd8679687ae403d9eb5ee463a3cb0dc17..4eb89bd4ee73b6f4265eb63b8238e9571150bf26 100644 (file)
@@ -302,6 +302,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
 
 /* Buffer Utility Routines */
 extern void *xfs_buf_offset(struct xfs_buf *, size_t);
+extern void xfs_buf_stale(struct xfs_buf *bp);
 
 /* Delayed Write Buffer Routines */
 extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
@@ -312,31 +313,6 @@ extern int xfs_buf_delwri_submit_nowait(struct list_head *);
 extern int xfs_buf_init(void);
 extern void xfs_buf_terminate(void);
 
-#define XFS_BUF_ZEROFLAGS(bp) \
-       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
-                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
-                           XBF_WRITE_FAIL))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
-
-#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
-
 /*
  * These macros use the IO block map rather than b_bn. b_bn is now really
  * just for the buffer cache index for cached buffers. As IO does not use b_bn
index 7e986da34f6cb40ad3aca9e9845f81a070dd2d4d..99e91a0e554ea6512ce5eb43cb8a338804f550ae 100644 (file)
@@ -431,7 +431,7 @@ xfs_buf_item_unpin(
        if (freed && stale) {
                ASSERT(bip->bli_flags & XFS_BLI_STALE);
                ASSERT(xfs_buf_islocked(bp));
-               ASSERT(XFS_BUF_ISSTALE(bp));
+               ASSERT(bp->b_flags & XBF_STALE);
                ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
 
                trace_xfs_buf_item_unpin_stale(bip);
@@ -493,7 +493,7 @@ xfs_buf_item_unpin(
                xfs_buf_hold(bp);
                bp->b_flags |= XBF_ASYNC;
                xfs_buf_ioerror(bp, -EIO);
-               XFS_BUF_UNDONE(bp);
+               bp->b_flags &= ~XBF_DONE;
                xfs_buf_stale(bp);
                xfs_buf_ioend(bp);
        }
@@ -1067,7 +1067,7 @@ xfs_buf_iodone_callbacks(
         */
        if (XFS_FORCED_SHUTDOWN(mp)) {
                xfs_buf_stale(bp);
-               XFS_BUF_DONE(bp);
+               bp->b_flags |= XBF_DONE;
                trace_xfs_buf_item_iodone(bp, _RET_IP_);
                goto do_callbacks;
        }
@@ -1090,7 +1090,7 @@ xfs_buf_iodone_callbacks(
         * errors tend to affect the whole device and a failing log write
         * will make us give up.  But we really ought to do better here.
         */
-       if (XFS_BUF_ISASYNC(bp)) {
+       if (bp->b_flags & XBF_ASYNC) {
                ASSERT(bp->b_iodone != NULL);
 
                trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
@@ -1113,7 +1113,7 @@ xfs_buf_iodone_callbacks(
         * sure to return the error to the caller of xfs_bwrite().
         */
        xfs_buf_stale(bp);
-       XFS_BUF_DONE(bp);
+       bp->b_flags |= XBF_DONE;
 
        trace_xfs_buf_error_relse(bp, _RET_IP_);
 
index 642d55d100758b10fb3b9deec90de526707c3d98..93b3ab0c54350fbdd6e977e787d6b7c3911b792d 100644 (file)
@@ -665,7 +665,7 @@ xfs_readdir(
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return -EIO;
 
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
 
        args.dp = dp;
index e85a9519a5aee71c23b7eeb44f282675d427c4b4..272c3f8b6f7d0f11a0564e40b00b0568a28683dd 100644 (file)
@@ -227,7 +227,7 @@ xfs_discard_extents(
                                GFP_NOFS, 0);
                if (error && error != -EOPNOTSUPP) {
                        xfs_info(mp,
-        "discard failed for extent [0x%llu,%u], error %d",
+        "discard failed for extent [0x%llx,%u], error %d",
                                 (unsigned long long)busyp->bno,
                                 busyp->length,
                                 error);
index 9c44d38dcd1f8ac9a11525e1a4e651a8e298b404..316b2a1bdba5f6da82f1bad0dcbc0708151a59d2 100644 (file)
@@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits(
 {
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        struct xfs_disk_dquot   *d = &dq->q_core;
+       struct xfs_def_quota    *defq;
        int                     prealloc = 0;
 
        ASSERT(d->d_id);
+       defq = xfs_get_defquota(dq, q);
 
-       if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
-               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+       if (defq->bsoftlimit && !d->d_blk_softlimit) {
+               d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
                prealloc = 1;
        }
-       if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
-               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+       if (defq->bhardlimit && !d->d_blk_hardlimit) {
+               d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
                prealloc = 1;
        }
-       if (q->qi_isoftlimit && !d->d_ino_softlimit)
-               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
-       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
-               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
-       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
-               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
-       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
-               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+       if (defq->isoftlimit && !d->d_ino_softlimit)
+               d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
+       if (defq->ihardlimit && !d->d_ino_hardlimit)
+               d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
+       if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
+               d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
+       if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
+               d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
 
        if (prealloc)
                xfs_dquot_set_prealloc_limits(dq);
@@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk(
 {
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        xfs_dqblk_t     *d;
-       int             curid, i;
+       xfs_dqid_t      curid;
+       int             i;
 
        ASSERT(tp);
        ASSERT(xfs_buf_islocked(bp));
@@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk(
         * ID of the first dquot in the block - id's are zero based.
         */
        curid = id - (id % q->qi_dqperchunk);
-       ASSERT(curid >= 0);
        memset(d, 0, BBTOB(q->qi_dqchunklen));
        for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
                d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -464,12 +466,13 @@ xfs_qm_dqtobp(
        struct xfs_bmbt_irec    map;
        int                     nmaps = 1, error;
        struct xfs_buf          *bp;
-       struct xfs_inode        *quotip = xfs_dq_to_quota_inode(dqp);
+       struct xfs_inode        *quotip;
        struct xfs_mount        *mp = dqp->q_mount;
        xfs_dqid_t              id = be32_to_cpu(dqp->q_core.d_id);
        struct xfs_trans        *tp = (tpp ? *tpp : NULL);
        uint                    lock_mode;
 
+       quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 
        lock_mode = xfs_ilock_data_map_shared(quotip);
@@ -684,6 +687,56 @@ error0:
        return error;
 }
 
+/*
+ * Advance to the next id in the current chunk, or if at the
+ * end of the chunk, skip ahead to first id in next allocated chunk
+ * using the SEEK_DATA interface.
+ */
+int
+xfs_dq_get_next_id(
+       xfs_mount_t             *mp,
+       uint                    type,
+       xfs_dqid_t              *id,
+       loff_t                  eof)
+{
+       struct xfs_inode        *quotip;
+       xfs_fsblock_t           start;
+       loff_t                  offset;
+       uint                    lock;
+       xfs_dqid_t              next_id;
+       int                     error = 0;
+
+       /* Simple advance */
+       next_id = *id + 1;
+
+       /* If new ID is within the current chunk, advancing it sufficed */
+       if (next_id % mp->m_quotainfo->qi_dqperchunk) {
+               *id = next_id;
+               return 0;
+       }
+
+       /* Nope, next_id is now past the current chunk, so find the next one */
+       start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
+
+       quotip = xfs_quota_inode(mp, type);
+       lock = xfs_ilock_data_map_shared(quotip);
+
+       offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
+                                     eof, SEEK_DATA);
+       if (offset < 0)
+               error = offset;
+
+       xfs_iunlock(quotip, lock);
+
+       /* -ENXIO is essentially "no more data" */
+       if (error)
+               return (error == -ENXIO ? -ENOENT: error);
+
+       /* Convert next data offset back to a quota id */
+       *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
+       return 0;
+}
+
 /*
  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
  * a locked dquot, doing an allocation (if requested) as needed.
@@ -704,6 +757,7 @@ xfs_qm_dqget(
        struct xfs_quotainfo    *qi = mp->m_quotainfo;
        struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
        struct xfs_dquot        *dqp;
+       loff_t                  eof = 0;
        int                     error;
 
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -731,6 +785,21 @@ xfs_qm_dqget(
        }
 #endif
 
+       /* Get the end of the quota file if we need it */
+       if (flags & XFS_QMOPT_DQNEXT) {
+               struct xfs_inode        *quotip;
+               xfs_fileoff_t           last;
+               uint                    lock_mode;
+
+               quotip = xfs_quota_inode(mp, type);
+               lock_mode = xfs_ilock_data_map_shared(quotip);
+               error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
+               xfs_iunlock(quotip, lock_mode);
+               if (error)
+                       return error;
+               eof = XFS_FSB_TO_B(mp, last);
+       }
+
 restart:
        mutex_lock(&qi->qi_tree_lock);
        dqp = radix_tree_lookup(tree, id);
@@ -744,6 +813,18 @@ restart:
                        goto restart;
                }
 
+               /* uninit / unused quota found in radix tree, keep looking  */
+               if (flags & XFS_QMOPT_DQNEXT) {
+                       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+                               xfs_dqunlock(dqp);
+                               mutex_unlock(&qi->qi_tree_lock);
+                               error = xfs_dq_get_next_id(mp, type, &id, eof);
+                               if (error)
+                                       return error;
+                               goto restart;
+                       }
+               }
+
                dqp->q_nrefs++;
                mutex_unlock(&qi->qi_tree_lock);
 
@@ -770,6 +851,13 @@ restart:
        if (ip)
                xfs_ilock(ip, XFS_ILOCK_EXCL);
 
+       /* If we are asked to find next active id, keep looking */
+       if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
+               error = xfs_dq_get_next_id(mp, type, &id, eof);
+               if (!error)
+                       goto restart;
+       }
+
        if (error)
                return error;
 
@@ -820,6 +908,17 @@ restart:
        qi->qi_dquots++;
        mutex_unlock(&qi->qi_tree_lock);
 
+       /* If we are asked to find next active id, keep looking */
+       if (flags & XFS_QMOPT_DQNEXT) {
+               if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+                       xfs_qm_dqput(dqp);
+                       error = xfs_dq_get_next_id(mp, type, &id, eof);
+                       if (error)
+                               return error;
+                       goto restart;
+               }
+       }
+
  dqret:
        ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
        trace_xfs_dqget_miss(dqp);
index 652cd3c5b58c1cac1562239c9c644a19dbe588b7..2816d42507bc8ab7cf00fecb775b11e19dfb6088 100644 (file)
@@ -152,7 +152,7 @@ xfs_nfs_get_inode(
                return ERR_PTR(error);
        }
 
-       if (ip->i_d.di_gen != generation) {
+       if (VFS_I(ip)->i_generation != generation) {
                IRELE(ip);
                return ERR_PTR(-ESTALE);
        }
index 52883ac3cf84c06761afcf0792bbafcf781d509b..ac0fd32de31e4e5455e43da208cdef4861710a21 100644 (file)
@@ -156,9 +156,9 @@ xfs_update_prealloc_flags(
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
        if (!(flags & XFS_PREALLOC_INVISIBLE)) {
-               ip->i_d.di_mode &= ~S_ISUID;
-               if (ip->i_d.di_mode & S_IXGRP)
-                       ip->i_d.di_mode &= ~S_ISGID;
+               VFS_I(ip)->i_mode &= ~S_ISUID;
+               if (VFS_I(ip)->i_mode & S_IXGRP)
+                       VFS_I(ip)->i_mode &= ~S_ISGID;
                xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        }
 
@@ -1337,31 +1337,31 @@ out:
        return found;
 }
 
-STATIC loff_t
-xfs_seek_hole_data(
-       struct file             *file,
+/*
+ * caller must lock inode with xfs_ilock_data_map_shared,
+ * can we craft an appropriate ASSERT?
+ *
+ * end is because the VFS-level lseek interface is defined such that any
+ * offset past i_size shall return -ENXIO, but we use this for quota code
+ * which does not maintain i_size, and we want to SEEK_DATA past i_size.
+ */
+loff_t
+__xfs_seek_hole_data(
+       struct inode            *inode,
        loff_t                  start,
+       loff_t                  end,
        int                     whence)
 {
-       struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
        loff_t                  uninitialized_var(offset);
-       xfs_fsize_t             isize;
        xfs_fileoff_t           fsbno;
-       xfs_filblks_t           end;
-       uint                    lock;
+       xfs_filblks_t           lastbno;
        int                     error;
 
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       lock = xfs_ilock_data_map_shared(ip);
-
-       isize = i_size_read(inode);
-       if (start >= isize) {
+       if (start >= end) {
                error = -ENXIO;
-               goto out_unlock;
+               goto out_error;
        }
 
        /*
@@ -1369,22 +1369,22 @@ xfs_seek_hole_data(
         * by fsbno to the end block of the file.
         */
        fsbno = XFS_B_TO_FSBT(mp, start);
-       end = XFS_B_TO_FSB(mp, isize);
+       lastbno = XFS_B_TO_FSB(mp, end);
 
        for (;;) {
                struct xfs_bmbt_irec    map[2];
                int                     nmap = 2;
                unsigned int            i;
 
-               error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+               error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
                                       XFS_BMAPI_ENTIRE);
                if (error)
-                       goto out_unlock;
+                       goto out_error;
 
                /* No extents at given offset, must be beyond EOF */
                if (nmap == 0) {
                        error = -ENXIO;
-                       goto out_unlock;
+                       goto out_error;
                }
 
                for (i = 0; i < nmap; i++) {
@@ -1426,7 +1426,7 @@ xfs_seek_hole_data(
                         * hole at the end of any file).
                         */
                        if (whence == SEEK_HOLE) {
-                               offset = isize;
+                               offset = end;
                                break;
                        }
                        /*
@@ -1434,7 +1434,7 @@ xfs_seek_hole_data(
                         */
                        ASSERT(whence == SEEK_DATA);
                        error = -ENXIO;
-                       goto out_unlock;
+                       goto out_error;
                }
 
                ASSERT(i > 1);
@@ -1445,14 +1445,14 @@ xfs_seek_hole_data(
                 */
                fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
                start = XFS_FSB_TO_B(mp, fsbno);
-               if (start >= isize) {
+               if (start >= end) {
                        if (whence == SEEK_HOLE) {
-                               offset = isize;
+                               offset = end;
                                break;
                        }
                        ASSERT(whence == SEEK_DATA);
                        error = -ENXIO;
-                       goto out_unlock;
+                       goto out_error;
                }
        }
 
@@ -1464,7 +1464,39 @@ out:
         * situation in particular.
         */
        if (whence == SEEK_HOLE)
-               offset = min_t(loff_t, offset, isize);
+               offset = min_t(loff_t, offset, end);
+
+       return offset;
+
+out_error:
+       return error;
+}
+
+STATIC loff_t
+xfs_seek_hole_data(
+       struct file             *file,
+       loff_t                  start,
+       int                     whence)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       uint                    lock;
+       loff_t                  offset, end;
+       int                     error = 0;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       lock = xfs_ilock_data_map_shared(ip);
+
+       end = i_size_read(inode);
+       offset = __xfs_seek_hole_data(inode, start, end, whence);
+       if (offset < 0) {
+               error = offset;
+               goto out_unlock;
+       }
+
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 
 out_unlock:
index c4c130f9bfb64fec1d7d5dccb27963a236477ced..a51353a1f87f1a5e78064c0598f42397ece8f767 100644 (file)
@@ -151,7 +151,7 @@ xfs_filestream_pick_ag(
        xfs_agnumber_t          ag, max_ag = NULLAGNUMBER;
        int                     err, trylock, nscan;
 
-       ASSERT(S_ISDIR(ip->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
 
        /* 2% of an AG's blocks must be free for it to be chosen. */
        minfree = mp->m_sb.sb_agblocks / 50;
@@ -319,7 +319,7 @@ xfs_filestream_lookup_ag(
        xfs_agnumber_t          startag, ag = NULLAGNUMBER;
        struct xfs_mru_cache_elem *mru;
 
-       ASSERT(S_ISREG(ip->i_d.di_mode));
+       ASSERT(S_ISREG(VFS_I(ip)->i_mode));
 
        pip = xfs_filestream_get_parent(ip);
        if (!pip)
index 1b6a98b66886c76d1fab032673ec88f4cb11afa0..f32713f14f9a21c1b752e2e8eb889dea72411f8e 100644 (file)
@@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
                                xfs_fsop_resblks_t *outval);
 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(struct xfs_mount *mp);
 
 #endif /* __XFS_FSOPS_H__ */
index d7a490f24ead08e3abf5019654ee5ee6e2e1eb7b..bf2d60749278602b5b4afcda09ede7d3dd89fd1e 100644 (file)
@@ -63,6 +63,9 @@ xfs_inode_alloc(
                return NULL;
        }
 
+       /* VFS doesn't initialise i_mode! */
+       VFS_I(ip)->i_mode = 0;
+
        XFS_STATS_INC(mp, vn_active);
        ASSERT(atomic_read(&ip->i_pincount) == 0);
        ASSERT(!spin_is_locked(&ip->i_flags_lock));
@@ -79,7 +82,7 @@ xfs_inode_alloc(
        memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
        ip->i_flags = 0;
        ip->i_delayed_blks = 0;
-       memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
+       memset(&ip->i_d, 0, sizeof(ip->i_d));
 
        return ip;
 }
@@ -98,7 +101,7 @@ void
 xfs_inode_free(
        struct xfs_inode        *ip)
 {
-       switch (ip->i_d.di_mode & S_IFMT) {
+       switch (VFS_I(ip)->i_mode & S_IFMT) {
        case S_IFREG:
        case S_IFDIR:
        case S_IFLNK:
@@ -134,6 +137,34 @@ xfs_inode_free(
        call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
 }
 
+/*
+ * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
+ * part of the structure. This is made more complex by the fact we store
+ * information about the on-disk values in the VFS inode and so we can't just
+ * overwrite the values unconditionally. Hence we save the parameters we
+ * need to retain across reinitialisation, and rewrite them into the VFS inode
+ * after reinitialisation even if it fails.
+ */
+static int
+xfs_reinit_inode(
+       struct xfs_mount        *mp,
+       struct inode            *inode)
+{
+       int             error;
+       uint32_t        nlink = inode->i_nlink;
+       uint32_t        generation = inode->i_generation;
+       uint64_t        version = inode->i_version;
+       umode_t         mode = inode->i_mode;
+
+       error = inode_init_always(mp->m_super, inode);
+
+       set_nlink(inode, nlink);
+       inode->i_generation = generation;
+       inode->i_version = version;
+       inode->i_mode = mode;
+       return error;
+}
+
 /*
  * Check the validity of the inode we just found it the cache
  */
@@ -185,7 +216,7 @@ xfs_iget_cache_hit(
        /*
         * If lookup is racing with unlink return an error immediately.
         */
-       if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+       if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) {
                error = -ENOENT;
                goto out_error;
        }
@@ -208,7 +239,7 @@ xfs_iget_cache_hit(
                spin_unlock(&ip->i_flags_lock);
                rcu_read_unlock();
 
-               error = inode_init_always(mp->m_super, inode);
+               error = xfs_reinit_inode(mp, inode);
                if (error) {
                        /*
                         * Re-initializing the inode failed, and we are in deep
@@ -295,7 +326,7 @@ xfs_iget_cache_miss(
 
        trace_xfs_iget_miss(ip);
 
-       if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+       if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
                error = -ENOENT;
                goto out_destroy;
        }
@@ -444,7 +475,7 @@ again:
         * If we have a real type for an on-disk inode, we can setup the inode
         * now.  If it's a new inode being created, xfs_ialloc will handle it.
         */
-       if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
+       if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
                xfs_setup_existing_inode(ip);
        return 0;
 
index ceba1a83cacccd649caf473ebcf2dfae984bb243..96f606deee313aed506b7e7ee229fc801ba5de80 100644 (file)
@@ -57,9 +57,9 @@ kmem_zone_t *xfs_inode_zone;
  */
 #define        XFS_ITRUNC_MAX_EXTENTS  2
 
-STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
-
-STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
+STATIC int xfs_iflush_int(struct xfs_inode *, struct xfs_buf *);
+STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
+STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *);
 
 /*
  * helper function to extract extent size hint from inode
@@ -766,6 +766,7 @@ xfs_ialloc(
        uint            flags;
        int             error;
        struct timespec tv;
+       struct inode    *inode;
 
        /*
         * Call the space management code to pick
@@ -791,6 +792,7 @@ xfs_ialloc(
        if (error)
                return error;
        ASSERT(ip != NULL);
+       inode = VFS_I(ip);
 
        /*
         * We always convert v1 inodes to v2 now - we only support filesystems
@@ -800,20 +802,16 @@ xfs_ialloc(
        if (ip->i_d.di_version == 1)
                ip->i_d.di_version = 2;
 
-       ip->i_d.di_mode = mode;
-       ip->i_d.di_onlink = 0;
-       ip->i_d.di_nlink = nlink;
-       ASSERT(ip->i_d.di_nlink == nlink);
+       inode->i_mode = mode;
+       set_nlink(inode, nlink);
        ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
        ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
        xfs_set_projid(ip, prid);
-       memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 
        if (pip && XFS_INHERIT_GID(pip)) {
                ip->i_d.di_gid = pip->i_d.di_gid;
-               if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
-                       ip->i_d.di_mode |= S_ISGID;
-               }
+               if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
+                       inode->i_mode |= S_ISGID;
        }
 
        /*
@@ -822,38 +820,29 @@ xfs_ialloc(
         * (and only if the irix_sgid_inherit compatibility variable is set).
         */
        if ((irix_sgid_inherit) &&
-           (ip->i_d.di_mode & S_ISGID) &&
-           (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
-               ip->i_d.di_mode &= ~S_ISGID;
-       }
+           (inode->i_mode & S_ISGID) &&
+           (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid))))
+               inode->i_mode &= ~S_ISGID;
 
        ip->i_d.di_size = 0;
        ip->i_d.di_nextents = 0;
        ASSERT(ip->i_d.di_nblocks == 0);
 
        tv = current_fs_time(mp->m_super);
-       ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
-       ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
-       ip->i_d.di_atime = ip->i_d.di_mtime;
-       ip->i_d.di_ctime = ip->i_d.di_mtime;
+       inode->i_mtime = tv;
+       inode->i_atime = tv;
+       inode->i_ctime = tv;
 
-       /*
-        * di_gen will have been taken care of in xfs_iread.
-        */
        ip->i_d.di_extsize = 0;
        ip->i_d.di_dmevmask = 0;
        ip->i_d.di_dmstate = 0;
        ip->i_d.di_flags = 0;
 
        if (ip->i_d.di_version == 3) {
-               ASSERT(ip->i_d.di_ino == ino);
-               ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
-               ip->i_d.di_crc = 0;
-               ip->i_d.di_changecount = 1;
-               ip->i_d.di_lsn = 0;
+               inode->i_version = 1;
                ip->i_d.di_flags2 = 0;
-               memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
-               ip->i_d.di_crtime = ip->i_d.di_mtime;
+               ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
+               ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
        }
 
 
@@ -1092,35 +1081,24 @@ xfs_dir_ialloc(
 }
 
 /*
- * Decrement the link count on an inode & log the change.
- * If this causes the link count to go to zero, initiate the
- * logging activity required to truncate a file.
+ * Decrement the link count on an inode & log the change.  If this causes the
+ * link count to go to zero, move the inode to AGI unlinked list so that it can
+ * be freed when the last active reference goes away via xfs_inactive().
  */
 int                            /* error */
 xfs_droplink(
        xfs_trans_t *tp,
        xfs_inode_t *ip)
 {
-       int     error;
-
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
-       ASSERT (ip->i_d.di_nlink > 0);
-       ip->i_d.di_nlink--;
        drop_nlink(VFS_I(ip));
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-       error = 0;
-       if (ip->i_d.di_nlink == 0) {
-               /*
-                * We're dropping the last link to this file.
-                * Move the on-disk inode to the AGI unlinked list.
-                * From xfs_inactive() we will pull the inode from
-                * the list and free it.
-                */
-               error = xfs_iunlink(tp, ip);
-       }
-       return error;
+       if (VFS_I(ip)->i_nlink)
+               return 0;
+
+       return xfs_iunlink(tp, ip);
 }
 
 /*
@@ -1134,8 +1112,6 @@ xfs_bumplink(
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
        ASSERT(ip->i_d.di_version > 1);
-       ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
-       ip->i_d.di_nlink++;
        inc_nlink(VFS_I(ip));
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        return 0;
@@ -1393,7 +1369,6 @@ xfs_create_tmpfile(
         */
        xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
 
-       ip->i_d.di_nlink--;
        error = xfs_iunlink(tp, ip);
        if (error)
                goto out_trans_cancel;
@@ -1444,7 +1419,7 @@ xfs_link(
 
        trace_xfs_link(tdp, target_name);
 
-       ASSERT(!S_ISDIR(sip->i_d.di_mode));
+       ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
 
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
@@ -1492,7 +1467,10 @@ xfs_link(
 
        xfs_bmap_init(&free_list, &first_block);
 
-       if (sip->i_d.di_nlink == 0) {
+       /*
+        * Handle initial link state of O_TMPFILE inode
+        */
+       if (VFS_I(sip)->i_nlink == 0) {
                error = xfs_iunlink_remove(tp, sip);
                if (error)
                        goto error_return;
@@ -1648,7 +1626,7 @@ xfs_release(
        xfs_mount_t     *mp = ip->i_mount;
        int             error;
 
-       if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
+       if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0))
                return 0;
 
        /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1679,7 +1657,7 @@ xfs_release(
                }
        }
 
-       if (ip->i_d.di_nlink == 0)
+       if (VFS_I(ip)->i_nlink == 0)
                return 0;
 
        if (xfs_can_free_eofblocks(ip, false)) {
@@ -1883,7 +1861,7 @@ xfs_inactive(
         * If the inode is already free, then there can be nothing
         * to clean up here.
         */
-       if (ip->i_d.di_mode == 0) {
+       if (VFS_I(ip)->i_mode == 0) {
                ASSERT(ip->i_df.if_real_bytes == 0);
                ASSERT(ip->i_df.if_broot_bytes == 0);
                return;
@@ -1895,7 +1873,7 @@ xfs_inactive(
        if (mp->m_flags & XFS_MOUNT_RDONLY)
                return;
 
-       if (ip->i_d.di_nlink != 0) {
+       if (VFS_I(ip)->i_nlink != 0) {
                /*
                 * force is true because we are evicting an inode from the
                 * cache. Post-eof blocks must be freed, lest we end up with
@@ -1907,7 +1885,7 @@ xfs_inactive(
                return;
        }
 
-       if (S_ISREG(ip->i_d.di_mode) &&
+       if (S_ISREG(VFS_I(ip)->i_mode) &&
            (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
             ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
                truncate = 1;
@@ -1916,7 +1894,7 @@ xfs_inactive(
        if (error)
                return;
 
-       if (S_ISLNK(ip->i_d.di_mode))
+       if (S_ISLNK(VFS_I(ip)->i_mode))
                error = xfs_inactive_symlink(ip);
        else if (truncate)
                error = xfs_inactive_truncate(ip);
@@ -1952,16 +1930,21 @@ xfs_inactive(
 }
 
 /*
- * This is called when the inode's link count goes to 0.
- * We place the on-disk inode on a list in the AGI.  It
- * will be pulled from this list when the inode is freed.
+ * This is called when the inode's link count goes to 0 or we are creating a
+ * tmpfile via O_TMPFILE. In the case of a tmpfile, @ignore_linkcount will be
+ * set to true as the link count is dropped to zero by the VFS after we've
+ * created the file successfully, so we have to add it to the unlinked list
+ * while the link count is non-zero.
+ *
+ * We place the on-disk inode on a list in the AGI.  It will be pulled from this
+ * list when the inode is freed.
  */
-int
+STATIC int
 xfs_iunlink(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip)
+       struct xfs_trans *tp,
+       struct xfs_inode *ip)
 {
-       xfs_mount_t     *mp;
+       xfs_mount_t     *mp = tp->t_mountp;
        xfs_agi_t       *agi;
        xfs_dinode_t    *dip;
        xfs_buf_t       *agibp;
@@ -1971,10 +1954,7 @@ xfs_iunlink(
        int             offset;
        int             error;
 
-       ASSERT(ip->i_d.di_nlink == 0);
-       ASSERT(ip->i_d.di_mode != 0);
-
-       mp = tp->t_mountp;
+       ASSERT(VFS_I(ip)->i_mode != 0);
 
        /*
         * Get the agi buffer first.  It ensures lock ordering
@@ -2412,10 +2392,10 @@ xfs_ifree(
        struct xfs_icluster     xic = { 0 };
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(ip->i_d.di_nlink == 0);
+       ASSERT(VFS_I(ip)->i_nlink == 0);
        ASSERT(ip->i_d.di_nextents == 0);
        ASSERT(ip->i_d.di_anextents == 0);
-       ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
+       ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
        ASSERT(ip->i_d.di_nblocks == 0);
 
        /*
@@ -2429,7 +2409,7 @@ xfs_ifree(
        if (error)
                return error;
 
-       ip->i_d.di_mode = 0;            /* mark incore inode as free */
+       VFS_I(ip)->i_mode = 0;          /* mark incore inode as free */
        ip->i_d.di_flags = 0;
        ip->i_d.di_dmevmask = 0;
        ip->i_d.di_forkoff = 0;         /* mark the attr fork not in use */
@@ -2439,7 +2419,7 @@ xfs_ifree(
         * Bump the generation count so no one will be confused
         * by reincarnations of this inode.
         */
-       ip->i_d.di_gen++;
+       VFS_I(ip)->i_generation++;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
        if (xic.deleted)
@@ -2526,7 +2506,7 @@ xfs_remove(
 {
        xfs_mount_t             *mp = dp->i_mount;
        xfs_trans_t             *tp = NULL;
-       int                     is_dir = S_ISDIR(ip->i_d.di_mode);
+       int                     is_dir = S_ISDIR(VFS_I(ip)->i_mode);
        int                     error = 0;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
@@ -2580,8 +2560,8 @@ xfs_remove(
         * If we're removing a directory perform some additional validation.
         */
        if (is_dir) {
-               ASSERT(ip->i_d.di_nlink >= 2);
-               if (ip->i_d.di_nlink != 2) {
+               ASSERT(VFS_I(ip)->i_nlink >= 2);
+               if (VFS_I(ip)->i_nlink != 2) {
                        error = -ENOTEMPTY;
                        goto out_trans_cancel;
                }
@@ -2771,7 +2751,7 @@ xfs_cross_rename(
        if (dp1 != dp2) {
                dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
 
-               if (S_ISDIR(ip2->i_d.di_mode)) {
+               if (S_ISDIR(VFS_I(ip2)->i_mode)) {
                        error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
                                                dp1->i_ino, first_block,
                                                free_list, spaceres);
@@ -2779,7 +2759,7 @@ xfs_cross_rename(
                                goto out_trans_abort;
 
                        /* transfer ip2 ".." reference to dp1 */
-                       if (!S_ISDIR(ip1->i_d.di_mode)) {
+                       if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
                                error = xfs_droplink(tp, dp2);
                                if (error)
                                        goto out_trans_abort;
@@ -2798,7 +2778,7 @@ xfs_cross_rename(
                        ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
                }
 
-               if (S_ISDIR(ip1->i_d.di_mode)) {
+               if (S_ISDIR(VFS_I(ip1)->i_mode)) {
                        error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
                                                dp2->i_ino, first_block,
                                                free_list, spaceres);
@@ -2806,7 +2786,7 @@ xfs_cross_rename(
                                goto out_trans_abort;
 
                        /* transfer ip1 ".." reference to dp2 */
-                       if (!S_ISDIR(ip2->i_d.di_mode)) {
+                       if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
                                error = xfs_droplink(tp, dp1);
                                if (error)
                                        goto out_trans_abort;
@@ -2903,7 +2883,7 @@ xfs_rename(
        struct xfs_inode        *inodes[__XFS_SORT_INODES];
        int                     num_inodes = __XFS_SORT_INODES;
        bool                    new_parent = (src_dp != target_dp);
-       bool                    src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       bool                    src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
        int                     spaceres;
        int                     error;
 
@@ -3032,12 +3012,12 @@ xfs_rename(
                 * target and source are directories and that target can be
                 * destroyed, or that neither is a directory.
                 */
-               if (S_ISDIR(target_ip->i_d.di_mode)) {
+               if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
                        /*
                         * Make sure target dir is empty.
                         */
                        if (!(xfs_dir_isempty(target_ip)) ||
-                           (target_ip->i_d.di_nlink > 2)) {
+                           (VFS_I(target_ip)->i_nlink > 2)) {
                                error = -EEXIST;
                                goto out_trans_cancel;
                        }
@@ -3144,7 +3124,7 @@ xfs_rename(
         * intermediate state on disk.
         */
        if (wip) {
-               ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
+               ASSERT(VFS_I(wip)->i_nlink == 0);
                error = xfs_bumplink(tp, wip);
                if (error)
                        goto out_bmap_cancel;
@@ -3313,7 +3293,7 @@ cluster_corrupt_out:
                 * mark it as stale and brelse.
                 */
                if (bp->b_iodone) {
-                       XFS_BUF_UNDONE(bp);
+                       bp->b_flags &= ~XBF_DONE;
                        xfs_buf_stale(bp);
                        xfs_buf_ioerror(bp, -EIO);
                        xfs_buf_ioend(bp);
@@ -3462,14 +3442,7 @@ xfs_iflush_int(
                        __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
                goto corrupt_out;
        }
-       if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
-                               mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
-               xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
-                       "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
-                       __func__, ip->i_ino, ip, ip->i_d.di_magic);
-               goto corrupt_out;
-       }
-       if (S_ISREG(ip->i_d.di_mode)) {
+       if (S_ISREG(VFS_I(ip)->i_mode)) {
                if (XFS_TEST_ERROR(
                    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
                    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -3479,7 +3452,7 @@ xfs_iflush_int(
                                __func__, ip->i_ino, ip);
                        goto corrupt_out;
                }
-       } else if (S_ISDIR(ip->i_d.di_mode)) {
+       } else if (S_ISDIR(VFS_I(ip)->i_mode)) {
                if (XFS_TEST_ERROR(
                    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
                    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
@@ -3523,12 +3496,11 @@ xfs_iflush_int(
                ip->i_d.di_flushiter++;
 
        /*
-        * Copy the dirty parts of the inode into the on-disk
-        * inode.  We always copy out the core of the inode,
-        * because if the inode is dirty at all the core must
-        * be.
+        * Copy the dirty parts of the inode into the on-disk inode.  We always
+        * copy out the core of the inode, because if the inode is dirty at all
+        * the core must be.
         */
-       xfs_dinode_to_disk(dip, &ip->i_d);
+       xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
 
        /* Wrap, we never let the log put out DI_MAX_FLUSH */
        if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
@@ -3580,10 +3552,6 @@ xfs_iflush_int(
         */
        xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
 
-       /* update the lsn in the on disk inode if required */
-       if (ip->i_d.di_version == 3)
-               dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
-
        /* generate the checksum. */
        xfs_dinode_calc_crc(mp, dip);
 
index ca9e11989cbd4f330c6cb0d1a1bede113fd9c8b2..43e1d51b15eb84ca34e978166025b74d30e5b573 100644 (file)
@@ -63,7 +63,7 @@ typedef struct xfs_inode {
        unsigned long           i_flags;        /* see defined flags below */
        unsigned int            i_delayed_blks; /* count of delay alloc blks */
 
-       xfs_icdinode_t          i_d;            /* most of ondisk inode */
+       struct xfs_icdinode     i_d;            /* most of ondisk inode */
 
        /* VFS inode */
        struct inode            i_vnode;        /* embedded VFS inode */
@@ -88,7 +88,7 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
  */
 static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
 {
-       if (S_ISREG(ip->i_d.di_mode))
+       if (S_ISREG(VFS_I(ip)->i_mode))
                return i_size_read(VFS_I(ip));
        return ip->i_d.di_size;
 }
@@ -369,7 +369,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  */
 #define XFS_INHERIT_GID(pip)   \
        (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
-        ((pip)->i_d.di_mode & S_ISGID))
+        (VFS_I(pip)->i_mode & S_ISGID))
 
 int            xfs_release(struct xfs_inode *ip);
 void           xfs_inactive(struct xfs_inode *ip);
@@ -405,8 +405,6 @@ int         xfs_ifree(struct xfs_trans *, xfs_inode_t *,
                           struct xfs_bmap_free *);
 int            xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
                                      int, xfs_fsize_t);
-int            xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-
 void           xfs_iext_realloc(xfs_inode_t *, int, int);
 
 void           xfs_iunpin_wait(xfs_inode_t *);
@@ -437,6 +435,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
 int    xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
                     xfs_fsize_t isize, bool *did_zeroing);
 int    xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
+                            loff_t eof, int whence);
 
 
 /* from xfs_iops.c */
index d14b12b8cfefb90f8fe4c92a0033a41cbde2e552..c48b5b18d771fab685e23c03613a1c6e762efcb4 100644 (file)
@@ -135,7 +135,7 @@ xfs_inode_item_size(
 
        *nvecs += 2;
        *nbytes += sizeof(struct xfs_inode_log_format) +
-                  xfs_icdinode_size(ip->i_d.di_version);
+                  xfs_log_dinode_size(ip->i_d.di_version);
 
        xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
        if (XFS_IFORK_Q(ip))
@@ -322,6 +322,81 @@ xfs_inode_item_format_attr_fork(
        }
 }
 
+static void
+xfs_inode_to_log_dinode(
+       struct xfs_inode        *ip,
+       struct xfs_log_dinode   *to,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_icdinode     *from = &ip->i_d;
+       struct inode            *inode = VFS_I(ip);
+
+       to->di_magic = XFS_DINODE_MAGIC;
+
+       to->di_version = from->di_version;
+       to->di_format = from->di_format;
+       to->di_uid = from->di_uid;
+       to->di_gid = from->di_gid;
+       to->di_projid_lo = from->di_projid_lo;
+       to->di_projid_hi = from->di_projid_hi;
+
+       memset(to->di_pad, 0, sizeof(to->di_pad));
+       memset(to->di_pad3, 0, sizeof(to->di_pad3));
+       to->di_atime.t_sec = inode->i_atime.tv_sec;
+       to->di_atime.t_nsec = inode->i_atime.tv_nsec;
+       to->di_mtime.t_sec = inode->i_mtime.tv_sec;
+       to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
+       to->di_ctime.t_sec = inode->i_ctime.tv_sec;
+       to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
+       to->di_nlink = inode->i_nlink;
+       to->di_gen = inode->i_generation;
+       to->di_mode = inode->i_mode;
+
+       to->di_size = from->di_size;
+       to->di_nblocks = from->di_nblocks;
+       to->di_extsize = from->di_extsize;
+       to->di_nextents = from->di_nextents;
+       to->di_anextents = from->di_anextents;
+       to->di_forkoff = from->di_forkoff;
+       to->di_aformat = from->di_aformat;
+       to->di_dmevmask = from->di_dmevmask;
+       to->di_dmstate = from->di_dmstate;
+       to->di_flags = from->di_flags;
+
+       if (from->di_version == 3) {
+               to->di_changecount = inode->i_version;
+               to->di_crtime.t_sec = from->di_crtime.t_sec;
+               to->di_crtime.t_nsec = from->di_crtime.t_nsec;
+               to->di_flags2 = from->di_flags2;
+
+               to->di_ino = ip->i_ino;
+               to->di_lsn = lsn;
+               memset(to->di_pad2, 0, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
+               to->di_flushiter = 0;
+       } else {
+               to->di_flushiter = from->di_flushiter;
+       }
+}
+
+/*
+ * Format the inode core. Current timestamp data is only in the VFS inode
+ * fields, so we need to grab them from there. Hence rather than just copying
+ * the XFS inode core structure, format the fields directly into the iovec.
+ */
+static void
+xfs_inode_item_format_core(
+       struct xfs_inode        *ip,
+       struct xfs_log_vec      *lv,
+       struct xfs_log_iovec    **vecp)
+{
+       struct xfs_log_dinode   *dic;
+
+       dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
+       xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
+       xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
+}
+
 /*
  * This is called to fill in the vector of log iovecs for the given inode
  * log item.  It fills the first item with an inode log format structure,
@@ -351,10 +426,7 @@ xfs_inode_item_format(
        ilf->ilf_size = 2; /* format + core */
        xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
 
-       xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
-                       &ip->i_d,
-                       xfs_icdinode_size(ip->i_d.di_version));
-
+       xfs_inode_item_format_core(ip, lv, &vecp);
        xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
        if (XFS_IFORK_Q(ip)) {
                xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
index 478d04e07f9500d6ceed20231deb6b474161c708..bcb6c19ce3ea4fea69c536343c07ea222de3155d 100644 (file)
@@ -114,7 +114,7 @@ xfs_find_handle(
                handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
                                        sizeof(handle.ha_fid.fid_len);
                handle.ha_fid.fid_pad = 0;
-               handle.ha_fid.fid_gen = ip->i_d.di_gen;
+               handle.ha_fid.fid_gen = inode->i_generation;
                handle.ha_fid.fid_ino = ip->i_ino;
 
                hsize = XFS_HSIZE(handle);
@@ -963,7 +963,7 @@ xfs_set_diflags(
                di_flags |= XFS_DIFLAG_NODEFRAG;
        if (xflags & FS_XFLAG_FILESTREAM)
                di_flags |= XFS_DIFLAG_FILESTREAM;
-       if (S_ISDIR(ip->i_d.di_mode)) {
+       if (S_ISDIR(VFS_I(ip)->i_mode)) {
                if (xflags & FS_XFLAG_RTINHERIT)
                        di_flags |= XFS_DIFLAG_RTINHERIT;
                if (xflags & FS_XFLAG_NOSYMLINKS)
@@ -972,7 +972,7 @@ xfs_set_diflags(
                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
                if (xflags & FS_XFLAG_PROJINHERIT)
                        di_flags |= XFS_DIFLAG_PROJINHERIT;
-       } else if (S_ISREG(ip->i_d.di_mode)) {
+       } else if (S_ISREG(VFS_I(ip)->i_mode)) {
                if (xflags & FS_XFLAG_REALTIME)
                        di_flags |= XFS_DIFLAG_REALTIME;
                if (xflags & FS_XFLAG_EXTSIZE)
@@ -1059,24 +1059,87 @@ xfs_ioctl_setattr_xflags(
        return 0;
 }
 
+/*
+ * If we are changing DAX flags, we have to ensure the file is clean and any
+ * cached objects in the address space are invalidated and removed. This
+ * requires us to lock out other IO and page faults similar to a truncate
+ * operation. The locks need to be held until the transaction has been committed
+ * so that the cache invalidation is atomic with respect to the DAX flag
+ * manipulation.
+ */
+static int
+xfs_ioctl_setattr_dax_invalidate(
+       struct xfs_inode        *ip,
+       struct fsxattr          *fa,
+       int                     *join_flags)
+{
+       struct inode            *inode = VFS_I(ip);
+       int                     error;
+
+       *join_flags = 0;
+
+       /*
+        * It is only valid to set the DAX flag on regular files and
+        * directories on filesystems where the block size is equal to the page
+        * size. On directories it serves as an inherit hint.
+        */
+       if (fa->fsx_xflags & FS_XFLAG_DAX) {
+               if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+                       return -EINVAL;
+               if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+                       return -EINVAL;
+       }
+
+       /* If the DAX state is not changing, we have nothing to do here. */
+       if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
+               return 0;
+       if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
+               return 0;
+
+       /* lock, flush and invalidate mapping in preparation for flag change */
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
+       error = filemap_write_and_wait(inode->i_mapping);
+       if (error)
+               goto out_unlock;
+       error = invalidate_inode_pages2(inode->i_mapping);
+       if (error)
+               goto out_unlock;
+
+       *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
+       return 0;
+
+out_unlock:
+       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
+       return error;
+
+}
+
 /*
  * Set up the transaction structure for the setattr operation, checking that we
  * have permission to do so. On success, return a clean transaction and the
  * inode locked exclusively ready for further operation specific checks. On
  * failure, return an error without modifying or locking the inode.
+ *
+ * The inode might already be IO locked on call. If this is the case, it is
+ * indicated in @join_flags and we take full responsibility for ensuring they
+ * are unlocked from now on. Hence if we have an error here, we still have to
+ * unlock them. Otherwise, once they are joined to the transaction, they will
+ * be unlocked on commit/cancel.
  */
 static struct xfs_trans *
 xfs_ioctl_setattr_get_trans(
-       struct xfs_inode        *ip)
+       struct xfs_inode        *ip,
+       int                     join_flags)
 {
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp;
-       int                     error;
+       int                     error = -EROFS;
 
        if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return ERR_PTR(-EROFS);
+               goto out_unlock;
+       error = -EIO;
        if (XFS_FORCED_SHUTDOWN(mp))
-               return ERR_PTR(-EIO);
+               goto out_unlock;
 
        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
@@ -1084,7 +1147,8 @@ xfs_ioctl_setattr_get_trans(
                goto out_cancel;
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
+       join_flags = 0;
 
        /*
         * CAP_FOWNER overrides the following restrictions:
@@ -1104,6 +1168,9 @@ xfs_ioctl_setattr_get_trans(
 
 out_cancel:
        xfs_trans_cancel(tp);
+out_unlock:
+       if (join_flags)
+               xfs_iunlock(ip, join_flags);
        return ERR_PTR(error);
 }
 
@@ -1128,14 +1195,14 @@ xfs_ioctl_setattr_check_extsize(
 {
        struct xfs_mount        *mp = ip->i_mount;
 
-       if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode))
                return -EINVAL;
 
        if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
-           !S_ISDIR(ip->i_d.di_mode))
+           !S_ISDIR(VFS_I(ip)->i_mode))
                return -EINVAL;
 
-       if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents &&
+       if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
            ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
                return -EINVAL;
 
@@ -1202,6 +1269,7 @@ xfs_ioctl_setattr(
        struct xfs_dquot        *pdqp = NULL;
        struct xfs_dquot        *olddquot = NULL;
        int                     code;
+       int                     join_flags = 0;
 
        trace_xfs_ioctl_setattr(ip);
 
@@ -1225,7 +1293,18 @@ xfs_ioctl_setattr(
                        return code;
        }
 
-       tp = xfs_ioctl_setattr_get_trans(ip);
+       /*
+        * Changing DAX config may require inode locking for mapping
+        * invalidation. These need to be held all the way to transaction commit
+        * or cancel time, so need to be passed through to
+        * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
+        * appropriately.
+        */
+       code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
+       if (code)
+               goto error_free_dquots;
+
+       tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
        if (IS_ERR(tp)) {
                code = PTR_ERR(tp);
                goto error_free_dquots;
@@ -1256,9 +1335,9 @@ xfs_ioctl_setattr(
         * successful return from chown()
         */
 
-       if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+       if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) &&
            !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
-               ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+               VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
 
        /* Change the ownerships and register project quota modifications */
        if (xfs_get_projid(ip) != fa->fsx_projid) {
@@ -1341,6 +1420,7 @@ xfs_ioc_setxflags(
        struct xfs_trans        *tp;
        struct fsxattr          fa;
        unsigned int            flags;
+       int                     join_flags = 0;
        int                     error;
 
        if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1357,7 +1437,18 @@ xfs_ioc_setxflags(
        if (error)
                return error;
 
-       tp = xfs_ioctl_setattr_get_trans(ip);
+       /*
+        * Changing DAX config may require inode locking for mapping
+        * invalidation. These need to be held all the way to transaction commit
+        * or cancel time, so need to be passed through to
+        * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
+        * appropriately.
+        */
+       error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
+       if (error)
+               goto out_drop_write;
+
+       tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
        if (IS_ERR(tp)) {
                error = PTR_ERR(tp);
                goto out_drop_write;
index 76b71a1c6c323e2043aeab1e93fb5a66db9f39d0..fb7dc61f4a29d7cee3d4683c675c4551e7669e52 100644 (file)
@@ -459,8 +459,8 @@ xfs_vn_getattr(
 
        stat->size = XFS_ISIZE(ip);
        stat->dev = inode->i_sb->s_dev;
-       stat->mode = ip->i_d.di_mode;
-       stat->nlink = ip->i_d.di_nlink;
+       stat->mode = inode->i_mode;
+       stat->nlink = inode->i_nlink;
        stat->uid = inode->i_uid;
        stat->gid = inode->i_gid;
        stat->ino = ip->i_ino;
@@ -506,9 +506,6 @@ xfs_setattr_mode(
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
-       ip->i_d.di_mode &= S_IFMT;
-       ip->i_d.di_mode |= mode & ~S_IFMT;
-
        inode->i_mode &= S_IFMT;
        inode->i_mode |= mode & ~S_IFMT;
 }
@@ -522,21 +519,12 @@ xfs_setattr_time(
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
-       if (iattr->ia_valid & ATTR_ATIME) {
+       if (iattr->ia_valid & ATTR_ATIME)
                inode->i_atime = iattr->ia_atime;
-               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-       }
-       if (iattr->ia_valid & ATTR_CTIME) {
+       if (iattr->ia_valid & ATTR_CTIME)
                inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-       }
-       if (iattr->ia_valid & ATTR_MTIME) {
+       if (iattr->ia_valid & ATTR_MTIME)
                inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-       }
 }
 
 int
@@ -661,9 +649,9 @@ xfs_setattr_nonsize(
                 * The set-user-ID and set-group-ID bits of a file will be
                 * cleared upon successful return from chown()
                 */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+               if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
                    !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+                       inode->i_mode &= ~(S_ISUID|S_ISGID);
 
                /*
                 * Change the ownerships and register quota modifications
@@ -773,7 +761,7 @@ xfs_setattr_size(
 
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
-       ASSERT(S_ISREG(ip->i_d.di_mode));
+       ASSERT(S_ISREG(inode->i_mode));
        ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
                ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
 
@@ -991,21 +979,13 @@ xfs_vn_update_time(
        }
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (flags & S_CTIME) {
+       if (flags & S_CTIME)
                inode->i_ctime = *now;
-               ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec;
-               ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec;
-       }
-       if (flags & S_MTIME) {
+       if (flags & S_MTIME)
                inode->i_mtime = *now;
-               ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec;
-               ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec;
-       }
-       if (flags & S_ATIME) {
+       if (flags & S_ATIME)
                inode->i_atime = *now;
-               ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec;
-               ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec;
-       }
+
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
        return xfs_trans_commit(tp);
@@ -1205,8 +1185,10 @@ xfs_diflags_to_iflags(
                inode->i_flags |= S_SYNC;
        if (flags & XFS_DIFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
-       if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
-           ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+       if (S_ISREG(inode->i_mode) &&
+           ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
+           (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+            ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
                inode->i_flags |= S_DAX;
 }
 
@@ -1232,8 +1214,6 @@ xfs_setup_inode(
        /* make the inode look hashed for the writeback code */
        hlist_add_fake(&inode->i_hash);
 
-       inode->i_mode   = ip->i_d.di_mode;
-       set_nlink(inode, ip->i_d.di_nlink);
        inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
        inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
 
@@ -1249,14 +1229,7 @@ xfs_setup_inode(
                break;
        }
 
-       inode->i_generation = ip->i_d.di_gen;
        i_size_write(inode, ip->i_d.di_size);
-       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
-       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
-       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
-       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
-       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
-       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
        xfs_diflags_to_iflags(inode, ip);
 
        ip->d_ops = ip->i_mount->m_nondir_inode_ops;
index 930ebd86bebac3a300faf44fabe77aa28258cf60..ce73eb34620dbbf06570650a582163a98a0d8f92 100644 (file)
@@ -57,6 +57,7 @@ xfs_bulkstat_one_int(
 {
        struct xfs_icdinode     *dic;           /* dinode core info pointer */
        struct xfs_inode        *ip;            /* incore inode pointer */
+       struct inode            *inode;
        struct xfs_bstat        *buf;           /* return buffer */
        int                     error = 0;      /* error value */
 
@@ -77,30 +78,33 @@ xfs_bulkstat_one_int(
 
        ASSERT(ip != NULL);
        ASSERT(ip->i_imap.im_blkno != 0);
+       inode = VFS_I(ip);
 
        dic = &ip->i_d;
 
        /* xfs_iget returns the following without needing
         * further change.
         */
-       buf->bs_nlink = dic->di_nlink;
        buf->bs_projid_lo = dic->di_projid_lo;
        buf->bs_projid_hi = dic->di_projid_hi;
        buf->bs_ino = ino;
-       buf->bs_mode = dic->di_mode;
        buf->bs_uid = dic->di_uid;
        buf->bs_gid = dic->di_gid;
        buf->bs_size = dic->di_size;
-       buf->bs_atime.tv_sec = dic->di_atime.t_sec;
-       buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
-       buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
-       buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
-       buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
-       buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
+
+       buf->bs_nlink = inode->i_nlink;
+       buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
+       buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
+       buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
+       buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
+       buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
+       buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
+       buf->bs_gen = inode->i_generation;
+       buf->bs_mode = inode->i_mode;
+
        buf->bs_xflags = xfs_ip2xflags(ip);
        buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
        buf->bs_extents = dic->di_nextents;
-       buf->bs_gen = dic->di_gen;
        memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
        buf->bs_dmevmask = dic->di_dmevmask;
        buf->bs_dmstate = dic->di_dmstate;
index 9c9a1c9bcc7f0bf0090fa4ffdffdfa35d6068122..b49ccf5c1d7564402c39671c4e67b7cf92ab8082 100644 (file)
@@ -1212,7 +1212,7 @@ xlog_iodone(xfs_buf_t *bp)
        }
 
        /* log I/O is always issued ASYNC */
-       ASSERT(XFS_BUF_ISASYNC(bp));
+       ASSERT(bp->b_flags & XBF_ASYNC);
        xlog_state_done_syncing(iclog, aborted);
 
        /*
@@ -1864,9 +1864,8 @@ xlog_sync(
 
        bp->b_io_length = BTOBB(count);
        bp->b_fspriv = iclog;
-       XFS_BUF_ZEROFLAGS(bp);
-       XFS_BUF_ASYNC(bp);
-       bp->b_flags |= XBF_SYNCIO;
+       bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+       bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
 
        if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
                bp->b_flags |= XBF_FUA;
@@ -1893,12 +1892,11 @@ xlog_sync(
 
        /* account for log which doesn't start at block #0 */
        XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
+
        /*
         * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
         * is shutting down.
         */
-       XFS_BUF_WRITE(bp);
-
        error = xlog_bdstrat(bp);
        if (error) {
                xfs_buf_ioerror_alert(bp, "xlog_sync");
@@ -1910,9 +1908,8 @@ xlog_sync(
                xfs_buf_associate_memory(bp,
                                (char *)&iclog->ic_header + count, split);
                bp->b_fspriv = iclog;
-               XFS_BUF_ZEROFLAGS(bp);
-               XFS_BUF_ASYNC(bp);
-               bp->b_flags |= XBF_SYNCIO;
+               bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+               bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
                if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
                        bp->b_flags |= XBF_FUA;
 
@@ -1921,7 +1918,6 @@ xlog_sync(
 
                /* account for internal log which doesn't start at block #0 */
                XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-               XFS_BUF_WRITE(bp);
                error = xlog_bdstrat(bp);
                if (error) {
                        xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
@@ -2012,77 +2008,81 @@ xlog_print_tic_res(
        uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
 
        /* match with XLOG_REG_TYPE_* in xfs_log.h */
-       static char *res_type_str[XLOG_REG_TYPE_MAX] = {
-           "bformat",
-           "bchunk",
-           "efi_format",
-           "efd_format",
-           "iformat",
-           "icore",
-           "iext",
-           "ibroot",
-           "ilocal",
-           "iattr_ext",
-           "iattr_broot",
-           "iattr_local",
-           "qformat",
-           "dquot",
-           "quotaoff",
-           "LR header",
-           "unmount",
-           "commit",
-           "trans header"
+#define REG_TYPE_STR(type, str)        [XLOG_REG_TYPE_##type] = str
+       static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = {
+           REG_TYPE_STR(BFORMAT, "bformat"),
+           REG_TYPE_STR(BCHUNK, "bchunk"),
+           REG_TYPE_STR(EFI_FORMAT, "efi_format"),
+           REG_TYPE_STR(EFD_FORMAT, "efd_format"),
+           REG_TYPE_STR(IFORMAT, "iformat"),
+           REG_TYPE_STR(ICORE, "icore"),
+           REG_TYPE_STR(IEXT, "iext"),
+           REG_TYPE_STR(IBROOT, "ibroot"),
+           REG_TYPE_STR(ILOCAL, "ilocal"),
+           REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
+           REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
+           REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
+           REG_TYPE_STR(QFORMAT, "qformat"),
+           REG_TYPE_STR(DQUOT, "dquot"),
+           REG_TYPE_STR(QUOTAOFF, "quotaoff"),
+           REG_TYPE_STR(LRHEADER, "LR header"),
+           REG_TYPE_STR(UNMOUNT, "unmount"),
+           REG_TYPE_STR(COMMIT, "commit"),
+           REG_TYPE_STR(TRANSHDR, "trans header"),
+           REG_TYPE_STR(ICREATE, "inode create")
        };
+#undef REG_TYPE_STR
+#define TRANS_TYPE_STR(type)   [XFS_TRANS_##type] = #type
        static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
-           "SETATTR_NOT_SIZE",
-           "SETATTR_SIZE",
-           "INACTIVE",
-           "CREATE",
-           "CREATE_TRUNC",
-           "TRUNCATE_FILE",
-           "REMOVE",
-           "LINK",
-           "RENAME",
-           "MKDIR",
-           "RMDIR",
-           "SYMLINK",
-           "SET_DMATTRS",
-           "GROWFS",
-           "STRAT_WRITE",
-           "DIOSTRAT",
-           "WRITE_SYNC",
-           "WRITEID",
-           "ADDAFORK",
-           "ATTRINVAL",
-           "ATRUNCATE",
-           "ATTR_SET",
-           "ATTR_RM",
-           "ATTR_FLAG",
-           "CLEAR_AGI_BUCKET",
-           "QM_SBCHANGE",
-           "DUMMY1",
-           "DUMMY2",
-           "QM_QUOTAOFF",
-           "QM_DQALLOC",
-           "QM_SETQLIM",
-           "QM_DQCLUSTER",
-           "QM_QINOCREATE",
-           "QM_QUOTAOFF_END",
-           "FSYNC_TS",
-           "GROWFSRT_ALLOC",
-           "GROWFSRT_ZERO",
-           "GROWFSRT_FREE",
-           "SWAPEXT",
-           "CHECKPOINT",
-           "ICREATE",
-           "CREATE_TMPFILE"
+           TRANS_TYPE_STR(SETATTR_NOT_SIZE),
+           TRANS_TYPE_STR(SETATTR_SIZE),
+           TRANS_TYPE_STR(INACTIVE),
+           TRANS_TYPE_STR(CREATE),
+           TRANS_TYPE_STR(CREATE_TRUNC),
+           TRANS_TYPE_STR(TRUNCATE_FILE),
+           TRANS_TYPE_STR(REMOVE),
+           TRANS_TYPE_STR(LINK),
+           TRANS_TYPE_STR(RENAME),
+           TRANS_TYPE_STR(MKDIR),
+           TRANS_TYPE_STR(RMDIR),
+           TRANS_TYPE_STR(SYMLINK),
+           TRANS_TYPE_STR(SET_DMATTRS),
+           TRANS_TYPE_STR(GROWFS),
+           TRANS_TYPE_STR(STRAT_WRITE),
+           TRANS_TYPE_STR(DIOSTRAT),
+           TRANS_TYPE_STR(WRITEID),
+           TRANS_TYPE_STR(ADDAFORK),
+           TRANS_TYPE_STR(ATTRINVAL),
+           TRANS_TYPE_STR(ATRUNCATE),
+           TRANS_TYPE_STR(ATTR_SET),
+           TRANS_TYPE_STR(ATTR_RM),
+           TRANS_TYPE_STR(ATTR_FLAG),
+           TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
+           TRANS_TYPE_STR(SB_CHANGE),
+           TRANS_TYPE_STR(DUMMY1),
+           TRANS_TYPE_STR(DUMMY2),
+           TRANS_TYPE_STR(QM_QUOTAOFF),
+           TRANS_TYPE_STR(QM_DQALLOC),
+           TRANS_TYPE_STR(QM_SETQLIM),
+           TRANS_TYPE_STR(QM_DQCLUSTER),
+           TRANS_TYPE_STR(QM_QINOCREATE),
+           TRANS_TYPE_STR(QM_QUOTAOFF_END),
+           TRANS_TYPE_STR(FSYNC_TS),
+           TRANS_TYPE_STR(GROWFSRT_ALLOC),
+           TRANS_TYPE_STR(GROWFSRT_ZERO),
+           TRANS_TYPE_STR(GROWFSRT_FREE),
+           TRANS_TYPE_STR(SWAPEXT),
+           TRANS_TYPE_STR(CHECKPOINT),
+           TRANS_TYPE_STR(ICREATE),
+           TRANS_TYPE_STR(CREATE_TMPFILE)
        };
+#undef TRANS_TYPE_STR
 
        xfs_warn(mp, "xlog_write: reservation summary:");
        xfs_warn(mp, "  trans type  = %s (%u)",
                 ((ticket->t_trans_type <= 0 ||
                   ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
-                 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
+                 "bad-trans-type" : trans_type_str[ticket->t_trans_type]),
                 ticket->t_trans_type);
        xfs_warn(mp, "  unit res    = %d bytes",
                 ticket->t_unit_res);
@@ -2101,7 +2101,7 @@ xlog_print_tic_res(
                uint r_type = ticket->t_res_arr[i].r_type;
                xfs_warn(mp, "region[%u]: %s - %u bytes", i,
                            ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
-                           "bad-rtype" : res_type_str[r_type-1]),
+                           "bad-rtype" : res_type_str[r_type]),
                            ticket->t_res_arr[i].r_len);
        }
 
@@ -3979,7 +3979,7 @@ xfs_log_force_umount(
            log->l_flags & XLOG_ACTIVE_RECOVERY) {
                mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
                if (mp->m_sb_bp)
-                       XFS_BUF_DONE(mp->m_sb_bp);
+                       mp->m_sb_bp->b_flags |= XBF_DONE;
                return 0;
        }
 
@@ -4009,7 +4009,7 @@ xfs_log_force_umount(
        spin_lock(&log->l_icloglock);
        mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
        if (mp->m_sb_bp)
-               XFS_BUF_DONE(mp->m_sb_bp);
+               mp->m_sb_bp->b_flags |= XBF_DONE;
 
        /*
         * Mark the log and the iclogs with IO error flags to prevent any
index be5568839442d1ab50bf5cae293b7b2b133b525e..396565f4324764058b979cf5e4c5bd96744f8ef8 100644 (file)
@@ -190,7 +190,7 @@ xlog_bread_noalign(
        ASSERT(nbblks <= bp->b_length);
 
        XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       XFS_BUF_READ(bp);
+       bp->b_flags |= XBF_READ;
        bp->b_io_length = nbblks;
        bp->b_error = 0;
 
@@ -275,7 +275,6 @@ xlog_bwrite(
        ASSERT(nbblks <= bp->b_length);
 
        XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       XFS_BUF_ZEROFLAGS(bp);
        xfs_buf_hold(bp);
        xfs_buf_lock(bp);
        bp->b_io_length = nbblks;
@@ -2538,6 +2537,13 @@ xlog_recover_validate_buf_type(
                }
                bp->b_ops = &xfs_sb_buf_ops;
                break;
+#ifdef CONFIG_XFS_RT
+       case XFS_BLFT_RTBITMAP_BUF:
+       case XFS_BLFT_RTSUMMARY_BUF:
+               /* no magic numbers for verification of RT buffers */
+               bp->b_ops = &xfs_rtbuf_ops;
+               break;
+#endif /* CONFIG_XFS_RT */
        default:
                xfs_warn(mp, "Unknown buffer type %d!",
                         xfs_blft_from_flags(buf_f));
@@ -2858,7 +2864,7 @@ xfs_recover_inode_owner_change(
                return -ENOMEM;
 
        /* instantiate the inode */
-       xfs_dinode_from_disk(&ip->i_d, dip);
+       xfs_inode_from_disk(ip, dip);
        ASSERT(ip->i_d.di_version >= 3);
 
        error = xfs_iformat_fork(ip, dip);
@@ -2904,7 +2910,7 @@ xlog_recover_inode_pass2(
        int                     error;
        int                     attr_index;
        uint                    fields;
-       xfs_icdinode_t          *dicp;
+       struct xfs_log_dinode   *ldip;
        uint                    isize;
        int                     need_free = 0;
 
@@ -2957,8 +2963,8 @@ xlog_recover_inode_pass2(
                error = -EFSCORRUPTED;
                goto out_release;
        }
-       dicp = item->ri_buf[1].i_addr;
-       if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
+       ldip = item->ri_buf[1].i_addr;
+       if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
                xfs_alert(mp,
                        "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
                        __func__, item, in_f->ilf_ino);
@@ -2994,13 +3000,13 @@ xlog_recover_inode_pass2(
         * to skip replay when the on disk inode is newer than the log one
         */
        if (!xfs_sb_version_hascrc(&mp->m_sb) &&
-           dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+           ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
                /*
                 * Deal with the wrap case, DI_MAX_FLUSH is less
                 * than smaller numbers
                 */
                if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
-                   dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+                   ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
                        /* do nothing */
                } else {
                        trace_xfs_log_recover_inode_skip(log, in_f);
@@ -3010,13 +3016,13 @@ xlog_recover_inode_pass2(
        }
 
        /* Take the opportunity to reset the flush iteration count */
-       dicp->di_flushiter = 0;
+       ldip->di_flushiter = 0;
 
-       if (unlikely(S_ISREG(dicp->di_mode))) {
-               if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
-                   (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
+       if (unlikely(S_ISREG(ldip->di_mode))) {
+               if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+                   (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
-                                        XFS_ERRLEVEL_LOW, mp, dicp);
+                                        XFS_ERRLEVEL_LOW, mp, ldip);
                        xfs_alert(mp,
                "%s: Bad regular inode log record, rec ptr 0x%p, "
                "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@@ -3024,12 +3030,12 @@ xlog_recover_inode_pass2(
                        error = -EFSCORRUPTED;
                        goto out_release;
                }
-       } else if (unlikely(S_ISDIR(dicp->di_mode))) {
-               if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
-                   (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
-                   (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
+       } else if (unlikely(S_ISDIR(ldip->di_mode))) {
+               if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+                   (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
+                   (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
-                                            XFS_ERRLEVEL_LOW, mp, dicp);
+                                            XFS_ERRLEVEL_LOW, mp, ldip);
                        xfs_alert(mp,
                "%s: Bad dir inode log record, rec ptr 0x%p, "
                "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@@ -3038,32 +3044,32 @@ xlog_recover_inode_pass2(
                        goto out_release;
                }
        }
-       if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
+       if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
-                                    XFS_ERRLEVEL_LOW, mp, dicp);
+                                    XFS_ERRLEVEL_LOW, mp, ldip);
                xfs_alert(mp,
        "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
        "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
                        __func__, item, dip, bp, in_f->ilf_ino,
-                       dicp->di_nextents + dicp->di_anextents,
-                       dicp->di_nblocks);
+                       ldip->di_nextents + ldip->di_anextents,
+                       ldip->di_nblocks);
                error = -EFSCORRUPTED;
                goto out_release;
        }
-       if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
+       if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
-                                    XFS_ERRLEVEL_LOW, mp, dicp);
+                                    XFS_ERRLEVEL_LOW, mp, ldip);
                xfs_alert(mp,
        "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
        "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
-                       item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
+                       item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
                error = -EFSCORRUPTED;
                goto out_release;
        }
-       isize = xfs_icdinode_size(dicp->di_version);
+       isize = xfs_log_dinode_size(ldip->di_version);
        if (unlikely(item->ri_buf[1].i_len > isize)) {
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
-                                    XFS_ERRLEVEL_LOW, mp, dicp);
+                                    XFS_ERRLEVEL_LOW, mp, ldip);
                xfs_alert(mp,
                        "%s: Bad inode log record length %d, rec ptr 0x%p",
                        __func__, item->ri_buf[1].i_len, item);
@@ -3071,8 +3077,8 @@ xlog_recover_inode_pass2(
                goto out_release;
        }
 
-       /* The core is in in-core format */
-       xfs_dinode_to_disk(dip, dicp);
+       /* recover the log dinode inode into the on disk inode */
+       xfs_log_dinode_to_disk(ldip, dip);
 
        /* the rest is in on-disk format */
        if (item->ri_buf[1].i_len > isize) {
@@ -4402,8 +4408,8 @@ xlog_recover_process_one_iunlink(
        if (error)
                goto fail_iput;
 
-       ASSERT(ip->i_d.di_nlink == 0);
-       ASSERT(ip->i_d.di_mode != 0);
+       ASSERT(VFS_I(ip)->i_nlink == 0);
+       ASSERT(VFS_I(ip)->i_mode != 0);
 
        /* setup for the next pass */
        agino = be32_to_cpu(dip->di_next_unlinked);
@@ -4957,6 +4963,7 @@ xlog_do_recover(
        xfs_daddr_t     head_blk,
        xfs_daddr_t     tail_blk)
 {
+       struct xfs_mount *mp = log->l_mp;
        int             error;
        xfs_buf_t       *bp;
        xfs_sb_t        *sbp;
@@ -4971,7 +4978,7 @@ xlog_do_recover(
        /*
         * If IO errors happened during recovery, bail out.
         */
-       if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
+       if (XFS_FORCED_SHUTDOWN(mp)) {
                return -EIO;
        }
 
@@ -4984,22 +4991,21 @@ xlog_do_recover(
         * or iunlinks they will have some entries in the AIL; so we look at
         * the AIL to determine how to set the tail_lsn.
         */
-       xlog_assign_tail_lsn(log->l_mp);
+       xlog_assign_tail_lsn(mp);
 
        /*
         * Now that we've finished replaying all buffer and inode
         * updates, re-read in the superblock and reverify it.
         */
-       bp = xfs_getsb(log->l_mp, 0);
-       XFS_BUF_UNDONE(bp);
-       ASSERT(!(XFS_BUF_ISWRITE(bp)));
-       XFS_BUF_READ(bp);
-       XFS_BUF_UNASYNC(bp);
+       bp = xfs_getsb(mp, 0);
+       bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
+       ASSERT(!(bp->b_flags & XBF_WRITE));
+       bp->b_flags |= XBF_READ;
        bp->b_ops = &xfs_sb_buf_ops;
 
        error = xfs_buf_submit_wait(bp);
        if (error) {
-               if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
+               if (!XFS_FORCED_SHUTDOWN(mp)) {
                        xfs_buf_ioerror_alert(bp, __func__);
                        ASSERT(0);
                }
@@ -5008,14 +5014,17 @@ xlog_do_recover(
        }
 
        /* Convert superblock from on-disk format */
-       sbp = &log->l_mp->m_sb;
+       sbp = &mp->m_sb;
        xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
-       ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
-       ASSERT(xfs_sb_good_version(sbp));
-       xfs_reinit_percpu_counters(log->l_mp);
-
        xfs_buf_relse(bp);
 
+       /* re-initialise in-core superblock and geometry structures */
+       xfs_reinit_percpu_counters(mp);
+       error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
+       if (error) {
+               xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
+               return error;
+       }
 
        xlog_recover_check_summary(log);
 
index bb753b359bee188b13023caf4597582b402cd31d..536a0ee9cd5af1fbbc89960a597eb7158b676771 100644 (file)
@@ -185,9 +185,6 @@ xfs_initialize_perag(
        xfs_agnumber_t  index;
        xfs_agnumber_t  first_initialised = 0;
        xfs_perag_t     *pag;
-       xfs_agino_t     agino;
-       xfs_ino_t       ino;
-       xfs_sb_t        *sbp = &mp->m_sb;
        int             error = -ENOMEM;
 
        /*
@@ -230,22 +227,7 @@ xfs_initialize_perag(
                radix_tree_preload_end();
        }
 
-       /*
-        * If we mount with the inode64 option, or no inode overflows
-        * the legacy 32-bit address space clear the inode32 option.
-        */
-       agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
-       ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
-
-       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
-               mp->m_flags |= XFS_MOUNT_32BITINODES;
-       else
-               mp->m_flags &= ~XFS_MOUNT_32BITINODES;
-
-       if (mp->m_flags & XFS_MOUNT_32BITINODES)
-               index = xfs_set_inode32(mp, agcount);
-       else
-               index = xfs_set_inode64(mp, agcount);
+       index = xfs_set_inode_alloc(mp, agcount);
 
        if (maxagi)
                *maxagi = index;
@@ -865,7 +847,7 @@ xfs_mountfs(
 
        ASSERT(rip != NULL);
 
-       if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
+       if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
                xfs_warn(mp, "corrupted root inode %llu: not a directory",
                        (unsigned long long)rip->i_ino);
                xfs_iunlock(rip, XFS_ILOCK_EXCL);
@@ -1284,7 +1266,7 @@ xfs_getsb(
        }
 
        xfs_buf_hold(bp);
-       ASSERT(XFS_BUF_ISDONE(bp));
+       ASSERT(bp->b_flags & XBF_DONE);
        return bp;
 }
 
index b57098481c10a2a55a05bf6e75e6e43f7e224401..bac6b3435591b6725ea2b4a6115fd7bf89db8c15 100644 (file)
@@ -147,6 +147,17 @@ typedef struct xfs_mount {
         * to various other kinds of pain inflicted on the pNFS server.
         */
        __uint32_t              m_generation;
+
+#ifdef DEBUG
+       /*
+        * DEBUG mode instrumentation to test and/or trigger delayed allocation
+        * block killing in the event of failed writes. When enabled, all
+        * buffered writes are forced to fail. All delalloc blocks in the range
+        * of the write (including pre-existing delalloc blocks!) are tossed as
+        * part of the write failure error handling sequence.
+        */
+       bool                    m_fail_writes;
+#endif
 } xfs_mount_t;
 
 /*
@@ -166,9 +177,8 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_GRPID                (1ULL << 9)     /* group-ID assigned from directory */
 #define XFS_MOUNT_NORECOVERY   (1ULL << 10)    /* no recovery - dirty fs */
 #define XFS_MOUNT_DFLT_IOSIZE  (1ULL << 12)    /* set default i/o size */
-#define XFS_MOUNT_32BITINODES  (1ULL << 14)    /* do not create inodes above
-                                                * 32 bits in size */
-#define XFS_MOUNT_SMALL_INUMS  (1ULL << 15)    /* users wants 32bit inodes */
+#define XFS_MOUNT_SMALL_INUMS  (1ULL << 14)    /* user wants 32bit inodes */
+#define XFS_MOUNT_32BITINODES  (1ULL << 15)    /* inode32 allocator active */
 #define XFS_MOUNT_NOUUID       (1ULL << 16)    /* ignore uuid during mount */
 #define XFS_MOUNT_BARRIER      (1ULL << 17)
 #define XFS_MOUNT_IKEEP                (1ULL << 18)    /* keep empty inode clusters*/
@@ -264,6 +274,20 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
        return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
+#ifdef DEBUG
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+       return mp->m_fail_writes;
+}
+#else
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+       return 0;
+}
+#endif
+
 /*
  * Per-ag incore structure, copies of information in agf and agi, to improve the
  * performance of allocation group selection.
@@ -327,7 +351,6 @@ extern int  xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
                                 bool reserved);
 extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
 
-extern int     xfs_mount_log_sb(xfs_mount_t *);
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int     xfs_readsb(xfs_mount_t *, int);
 extern void    xfs_freesb(xfs_mount_t *);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
new file mode 100644 (file)
index 0000000..184c44e
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_ONDISK_H
+#define __XFS_ONDISK_H
+
+#define XFS_CHECK_STRUCT_SIZE(structname, size) \
+       BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
+               #structname ") is wrong, expected " #size)
+
+static inline void __init
+xfs_check_ondisk_structs(void)
+{
+       /* ag/file structures */
+       XFS_CHECK_STRUCT_SIZE(struct xfs_acl,                   4);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry,             12);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_agf,                   224);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_agfl,                  36);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_agi,                   336);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key,              8);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec,              16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block,            4);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block,           72);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,                176);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot,            104);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk,                 136);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dsb,                   264);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr,          56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key,             4);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec,             16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp,             8);
+       XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t,                  4);
+       XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t,                  4);
+
+       /* dir/attr trees */
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr,        80);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock,       88);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr,         56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo,           56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode,           64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_da3_node_hdr,          64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_blk_hdr,          48);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_data_hdr,         64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free,             64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr,         64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf,             64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr,         64);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t,            8);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t,              32);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t,              4);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t,       4);
+
+       /*
+        * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
+        * 4 bytes anyway so it's not obviously a problem.  Hence for the moment
+        * we don't check this structure. This can be re-instated when the attr
+        * definitions are updated to use c99 VLA definitions.
+        *
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t,      12);
+        */
+
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t,             40);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_shortform_t,             8);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t,                 12);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t,                 16);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t,              8);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t,                16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t,             4);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t,              16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_unused_t,           6);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t,              16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t,                  16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino4_t,                  4);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino8_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_inou_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t,            8);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t,              16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t,                  16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t,             4);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t,              3);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t,                10);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_off_t,                2);
+
+       /* log structures */
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat,          24);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32,     28);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64,     32);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32,     28);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64,     32);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32,             12);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64,             16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode,            176);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log,           28);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp,           8);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32,   52);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64,   56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat,        20);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header,          16);
+}
+
+#endif /* __XFS_ONDISK_H */
index 532ab79d38fe376c14a5463a97195b59a61d8f84..be125e1758c1a5e4df36cfb8ec6e3e3643adc534 100644 (file)
@@ -560,6 +560,37 @@ xfs_qm_shrink_count(
        return list_lru_shrink_count(&qi->qi_lru, sc);
 }
 
+STATIC void
+xfs_qm_set_defquota(
+       xfs_mount_t     *mp,
+       uint            type,
+       xfs_quotainfo_t *qinf)
+{
+       xfs_dquot_t             *dqp;
+       struct xfs_def_quota    *defq;
+       int                     error;
+
+       error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
+
+       if (!error) {
+               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
+
+               defq = xfs_get_defquota(dqp, qinf);
+
+               /*
+                * Timers and warnings have been already set, let's just set the
+                * default limits for this quota type
+                */
+               defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+               defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+               defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+               defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+               defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+               defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+               xfs_qm_dqdestroy(dqp);
+       }
+}
+
 /*
  * This initializes all the quota information that's kept in the
  * mount structure
@@ -606,19 +637,19 @@ xfs_qm_init_quotainfo(
         * We try to get the limits from the superuser's limits fields.
         * This is quite hacky, but it is standard quota practice.
         *
-        * We look at the USR dquot with id == 0 first, but if user quotas
-        * are not enabled we goto the GRP dquot with id == 0.
-        * We don't really care to keep separate default limits for user
-        * and group quotas, at least not at this point.
-        *
         * Since we may not have done a quotacheck by this point, just read
         * the dquot without attaching it to any hashtables or lists.
+        *
+        * Timers and warnings are globally set by the first timer found in
+        * user/group/proj quota types, otherwise a default value is used.
+        * This should be split into different fields per quota type.
         */
        error = xfs_qm_dqread(mp, 0,
                        XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
                         (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
                          XFS_DQ_PROJ),
                        XFS_QMOPT_DOWARN, &dqp);
+
        if (!error) {
                xfs_disk_dquot_t        *ddqp = &dqp->q_core;
 
@@ -639,13 +670,6 @@ xfs_qm_init_quotainfo(
                        be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
                qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
                        be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-
                xfs_qm_dqdestroy(dqp);
        } else {
                qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -656,6 +680,13 @@ xfs_qm_init_quotainfo(
                qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
        }
 
+       if (XFS_IS_UQUOTA_RUNNING(mp))
+               xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
+       if (XFS_IS_GQUOTA_RUNNING(mp))
+               xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf);
+       if (XFS_IS_PQUOTA_RUNNING(mp))
+               xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf);
+
        qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
        qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
        qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
index 996a04064894cf4c07ffa28d058150243ffd86a4..2975a822e9f044cbb1ff66ec217d29ab29bf9f22 100644 (file)
@@ -53,6 +53,15 @@ extern struct kmem_zone      *xfs_qm_dqtrxzone;
  */
 #define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
 
+struct xfs_def_quota {
+       xfs_qcnt_t       bhardlimit;     /* default data blk hard limit */
+       xfs_qcnt_t       bsoftlimit;     /* default data blk soft limit */
+       xfs_qcnt_t       ihardlimit;     /* default inode count hard limit */
+       xfs_qcnt_t       isoftlimit;     /* default inode count soft limit */
+       xfs_qcnt_t       rtbhardlimit;   /* default realtime blk hard limit */
+       xfs_qcnt_t       rtbsoftlimit;   /* default realtime blk soft limit */
+};
+
 /*
  * Various quota information for individual filesystems.
  * The mount structure keeps a pointer to this.
@@ -76,12 +85,9 @@ typedef struct xfs_quotainfo {
        struct mutex     qi_quotaofflock;/* to serialize quotaoff */
        xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
        uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
-       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
-       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
-       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
-       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
-       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
-       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
+       struct xfs_def_quota    qi_usr_default;
+       struct xfs_def_quota    qi_grp_default;
+       struct xfs_def_quota    qi_prj_default;
        struct shrinker  qi_shrinker;
 } xfs_quotainfo_t;
 
@@ -104,15 +110,15 @@ xfs_dquot_tree(
 }
 
 static inline struct xfs_inode *
-xfs_dq_to_quota_inode(struct xfs_dquot *dqp)
+xfs_quota_inode(xfs_mount_t *mp, uint dq_flags)
 {
-       switch (dqp->dq_flags & XFS_DQ_ALLTYPES) {
+       switch (dq_flags & XFS_DQ_ALLTYPES) {
        case XFS_DQ_USER:
-               return dqp->q_mount->m_quotainfo->qi_uquotaip;
+               return mp->m_quotainfo->qi_uquotaip;
        case XFS_DQ_GROUP:
-               return dqp->q_mount->m_quotainfo->qi_gquotaip;
+               return mp->m_quotainfo->qi_gquotaip;
        case XFS_DQ_PROJ:
-               return dqp->q_mount->m_quotainfo->qi_pquotaip;
+               return mp->m_quotainfo->qi_pquotaip;
        default:
                ASSERT(0);
        }
@@ -164,11 +170,27 @@ extern void               xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
 
 /* quota ops */
 extern int             xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
-extern int             xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
-                                       uint, struct qc_dqblk *);
+extern int             xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *,
+                                       uint, struct qc_dqblk *, uint);
 extern int             xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
                                        struct qc_dqblk *);
 extern int             xfs_qm_scall_quotaon(struct xfs_mount *, uint);
 extern int             xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
 
+static inline struct xfs_def_quota *
+xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi)
+{
+       struct xfs_def_quota *defq;
+
+       if (XFS_QM_ISUDQ(dqp))
+               defq = &qi->qi_usr_default;
+       else if (XFS_QM_ISGDQ(dqp))
+               defq = &qi->qi_grp_default;
+       else {
+               ASSERT(XFS_QM_ISPDQ(dqp));
+               defq = &qi->qi_prj_default;
+       }
+       return defq;
+}
+
 #endif /* __XFS_QM_H__ */
index 3640c6e896af70eb2e910a31786cb7ac2298f847..f4d0e0a8f517c65913b8d45f383450384576b39e 100644 (file)
@@ -404,6 +404,7 @@ xfs_qm_scall_setqlim(
        struct xfs_disk_dquot   *ddq;
        struct xfs_dquot        *dqp;
        struct xfs_trans        *tp;
+       struct xfs_def_quota    *defq;
        int                     error;
        xfs_qcnt_t              hard, soft;
 
@@ -431,6 +432,8 @@ xfs_qm_scall_setqlim(
                ASSERT(error != -ENOENT);
                goto out_unlock;
        }
+
+       defq = xfs_get_defquota(dqp, q);
        xfs_dqunlock(dqp);
 
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
@@ -458,8 +461,8 @@ xfs_qm_scall_setqlim(
                ddq->d_blk_softlimit = cpu_to_be64(soft);
                xfs_dquot_set_prealloc_limits(dqp);
                if (id == 0) {
-                       q->qi_bhardlimit = hard;
-                       q->qi_bsoftlimit = soft;
+                       defq->bhardlimit = hard;
+                       defq->bsoftlimit = soft;
                }
        } else {
                xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
@@ -474,8 +477,8 @@ xfs_qm_scall_setqlim(
                ddq->d_rtb_hardlimit = cpu_to_be64(hard);
                ddq->d_rtb_softlimit = cpu_to_be64(soft);
                if (id == 0) {
-                       q->qi_rtbhardlimit = hard;
-                       q->qi_rtbsoftlimit = soft;
+                       defq->rtbhardlimit = hard;
+                       defq->rtbsoftlimit = soft;
                }
        } else {
                xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
@@ -491,8 +494,8 @@ xfs_qm_scall_setqlim(
                ddq->d_ino_hardlimit = cpu_to_be64(hard);
                ddq->d_ino_softlimit = cpu_to_be64(soft);
                if (id == 0) {
-                       q->qi_ihardlimit = hard;
-                       q->qi_isoftlimit = soft;
+                       defq->ihardlimit = hard;
+                       defq->isoftlimit = soft;
                }
        } else {
                xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);
@@ -635,9 +638,10 @@ out:
 int
 xfs_qm_scall_getquota(
        struct xfs_mount        *mp,
-       xfs_dqid_t              id,
+       xfs_dqid_t              *id,
        uint                    type,
-       struct qc_dqblk         *dst)
+       struct qc_dqblk         *dst,
+       uint                    dqget_flags)
 {
        struct xfs_dquot        *dqp;
        int                     error;
@@ -647,7 +651,7 @@ xfs_qm_scall_getquota(
         * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
         * exist, we'll get ENOENT back.
         */
-       error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
+       error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp);
        if (error)
                return error;
 
@@ -660,6 +664,9 @@ xfs_qm_scall_getquota(
                goto out_put;
        }
 
+       /* Fill in the ID we actually read from disk */
+       *id = be32_to_cpu(dqp->q_core.d_id);
+
        memset(dst, 0, sizeof(*dst));
        dst->d_spc_hardlimit =
                XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
@@ -701,7 +708,7 @@ xfs_qm_scall_getquota(
        if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
             (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
             (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
-           id != 0) {
+           *id != 0) {
                if ((dst->d_space > dst->d_spc_softlimit) &&
                    (dst->d_spc_softlimit > 0)) {
                        ASSERT(dst->d_spc_timer != 0);
index 7795e0d01382a60798b4e83f35ba6db725fe8779..f82d79a8c694a8f32427b8d0e2923dbde1670d88 100644 (file)
@@ -231,14 +231,45 @@ xfs_fs_get_dqblk(
        struct qc_dqblk         *qdq)
 {
        struct xfs_mount        *mp = XFS_M(sb);
+       xfs_dqid_t              id;
 
        if (!XFS_IS_QUOTA_RUNNING(mp))
                return -ENOSYS;
        if (!XFS_IS_QUOTA_ON(mp))
                return -ESRCH;
 
-       return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
-                                     xfs_quota_type(qid.type), qdq);
+       id = from_kqid(&init_user_ns, qid);
+       return xfs_qm_scall_getquota(mp, &id,
+                                     xfs_quota_type(qid.type), qdq, 0);
+}
+
+/* Return quota info for active quota >= this qid */
+STATIC int
+xfs_fs_get_nextdqblk(
+       struct super_block      *sb,
+       struct kqid             *qid,
+       struct qc_dqblk         *qdq)
+{
+       int                     ret;
+       struct xfs_mount        *mp = XFS_M(sb);
+       xfs_dqid_t              id;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       id = from_kqid(&init_user_ns, *qid);
+       ret = xfs_qm_scall_getquota(mp, &id,
+                                   xfs_quota_type(qid->type), qdq,
+                                   XFS_QMOPT_DQNEXT);
+       if (ret)
+               return ret;
+
+       /* ID may be different, so convert back what we got */
+       *qid = make_kqid(current_user_ns(), qid->type, id);
+       return 0;
+       
 }
 
 STATIC int
@@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = {
        .quota_disable          = xfs_quota_disable,
        .rm_xquota              = xfs_fs_rm_xquota,
        .get_dqblk              = xfs_fs_get_dqblk,
+       .get_nextdqblk          = xfs_fs_get_nextdqblk,
        .set_dqblk              = xfs_fs_set_dqblk,
 };
index be02a68b2fe292e077c84862f93271dd049c3359..abf44435d04a3f4b898e21a00e45ee8ae607738a 100644 (file)
@@ -1272,7 +1272,7 @@ xfs_rtpick_extent(
 
        ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
 
-       seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime;
+       seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
        if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
                mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
                *seqp = 0;
index 59c9b7bd958d6a034d792d3c777042173b3b817e..d760934109b5d628891ea3e91fa1a9e3f68ec36b 100644 (file)
@@ -45,6 +45,7 @@
 #include "xfs_filestream.h"
 #include "xfs_quota.h"
 #include "xfs_sysfs.h"
+#include "xfs_ondisk.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -65,83 +66,85 @@ static struct kset *xfs_kset;               /* top-level xfs sysfs dir */
 static struct xfs_kobj xfs_dbg_kobj;   /* global debug sysfs attrs */
 #endif
 
-#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
-#define MNTOPT_LOGDEV  "logdev"        /* log device */
-#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
-#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
-#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
-#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
-#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
-#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
-#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
-#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
-#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
-                                        * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
-#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
-#define MNTOPT_32BITINODE   "inode32"  /* inode allocation limited to
-                                        * XFS_MAXINUMBER_32 */
-#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
-#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
-                                        * in stat(). */
-#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
-#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
-#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
-#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
-#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
-#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
-#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
-
-#define MNTOPT_DAX     "dax"           /* Enable direct access to bdev pages */
-
 /*
  * Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
  */
 enum {
-       Opt_barrier,
-       Opt_nobarrier,
-       Opt_inode64,
-       Opt_inode32,
-       Opt_err
+       Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
+       Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
+       Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
+       Opt_allocsize, Opt_norecovery, Opt_barrier, Opt_nobarrier,
+       Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep,
+       Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams,
+       Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota,
+       Opt_uquota, Opt_gquota, Opt_pquota,
+       Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
+       Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
 };
 
 static const match_table_t tokens = {
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_inode64, "inode64"},
-       {Opt_inode32, "inode32"},
-       {Opt_err, NULL}
+       {Opt_logbufs,   "logbufs=%u"},  /* number of XFS log buffers */
+       {Opt_logbsize,  "logbsize=%s"}, /* size of XFS log buffers */
+       {Opt_logdev,    "logdev=%s"},   /* log device */
+       {Opt_rtdev,     "rtdev=%s"},    /* realtime I/O device */
+       {Opt_biosize,   "biosize=%u"},  /* log2 of preferred buffered io size */
+       {Opt_wsync,     "wsync"},       /* safe-mode nfs compatible mount */
+       {Opt_noalign,   "noalign"},     /* turn off stripe alignment */
+       {Opt_swalloc,   "swalloc"},     /* turn on stripe width allocation */
+       {Opt_sunit,     "sunit=%u"},    /* data volume stripe unit */
+       {Opt_swidth,    "swidth=%u"},   /* data volume stripe width */
+       {Opt_nouuid,    "nouuid"},      /* ignore filesystem UUID */
+       {Opt_mtpt,      "mtpt"},        /* filesystem mount point */
+       {Opt_grpid,     "grpid"},       /* group-ID from parent directory */
+       {Opt_nogrpid,   "nogrpid"},     /* group-ID from current process */
+       {Opt_bsdgroups, "bsdgroups"},   /* group-ID from parent directory */
+       {Opt_sysvgroups,"sysvgroups"},  /* group-ID from current process */
+       {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
+       {Opt_norecovery,"norecovery"},  /* don't run XFS recovery */
+       {Opt_barrier,   "barrier"},     /* use writer barriers for log write and
+                                        * unwritten extent conversion */
+       {Opt_nobarrier, "nobarrier"},   /* .. disable */
+       {Opt_inode64,   "inode64"},     /* inodes can be allocated anywhere */
+       {Opt_inode32,   "inode32"},     /* inode allocation limited to
+                                        * XFS_MAXINUMBER_32 */
+       {Opt_ikeep,     "ikeep"},       /* do not free empty inode clusters */
+       {Opt_noikeep,   "noikeep"},     /* free empty inode clusters */
+       {Opt_largeio,   "largeio"},     /* report large I/O sizes in stat() */
+       {Opt_nolargeio, "nolargeio"},   /* do not report large I/O sizes
+                                        * in stat(). */
+       {Opt_attr2,     "attr2"},       /* do use attr2 attribute format */
+       {Opt_noattr2,   "noattr2"},     /* do not use attr2 attribute format */
+       {Opt_filestreams,"filestreams"},/* use filestreams allocator */
+       {Opt_quota,     "quota"},       /* disk quotas (user) */
+       {Opt_noquota,   "noquota"},     /* no quotas */
+       {Opt_usrquota,  "usrquota"},    /* user quota enabled */
+       {Opt_grpquota,  "grpquota"},    /* group quota enabled */
+       {Opt_prjquota,  "prjquota"},    /* project quota enabled */
+       {Opt_uquota,    "uquota"},      /* user quota (IRIX variant) */
+       {Opt_gquota,    "gquota"},      /* group quota (IRIX variant) */
+       {Opt_pquota,    "pquota"},      /* project quota (IRIX variant) */
+       {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
+       {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
+       {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
+       {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
+       {Opt_discard,   "discard"},     /* Discard unused blocks */
+       {Opt_nodiscard, "nodiscard"},   /* Do not discard unused blocks */
+
+       {Opt_dax,       "dax"},         /* Enable direct access to bdev pages */
+       {Opt_err,       NULL},
 };
 
 
 STATIC int
-suffix_kstrtoint(char *s, unsigned int base, int *res)
+suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
 {
        int     last, shift_left_factor = 0, _res;
-       char    *value = s;
+       char    *value;
+       int     ret = 0;
+
+       value = match_strdup(s);
+       if (!value)
+               return -ENOMEM;
 
        last = strlen(value) - 1;
        if (value[last] == 'K' || value[last] == 'k') {
@@ -157,10 +160,11 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
                value[last] = '\0';
        }
 
-       if (kstrtoint(s, base, &_res))
-               return -EINVAL;
+       if (kstrtoint(value, base, &_res))
+               ret = -EINVAL;
+       kfree(value);
        *res = _res << shift_left_factor;
-       return 0;
+       return ret;
 }
 
 /*
@@ -169,14 +173,19 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
  *
  * Note that this function leaks the various device name allocations on
  * failure.  The caller takes care of them.
+ *
+ * *sb is const because this is also used to test options on the remount
+ * path, and we don't want this to have any side effects at remount time.
+ * Today this function does not change *sb, but just to future-proof...
  */
 STATIC int
 xfs_parseargs(
        struct xfs_mount        *mp,
        char                    *options)
 {
-       struct super_block      *sb = mp->m_super;
-       char                    *this_char, *value;
+       const struct super_block *sb = mp->m_super;
+       char                    *p;
+       substring_t             args[MAX_OPT_ARGS];
        int                     dsunit = 0;
        int                     dswidth = 0;
        int                     iosize = 0;
@@ -217,152 +226,152 @@ xfs_parseargs(
        if (!options)
                goto done;
 
-       while ((this_char = strsep(&options, ",")) != NULL) {
-               if (!*this_char)
+       while ((p = strsep(&options, ",")) != NULL) {
+               int             token;
+
+               if (!*p)
                        continue;
-               if ((value = strchr(this_char, '=')) != NULL)
-                       *value++ = 0;
 
-               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (kstrtoint(value, 10, &mp->m_logbufs))
-                               return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_logbufs:
+                       if (match_int(args, &mp->m_logbufs))
                                return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
+                       break;
+               case Opt_logbsize:
+                       if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
                                return -EINVAL;
-                       }
-                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       break;
+               case Opt_logdev:
+                       mp->m_logname = match_strdup(args);
                        if (!mp->m_logname)
                                return -ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
+                       break;
+               case Opt_mtpt:
+                       xfs_warn(mp, "%s option not allowed on this system", p);
                        return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+               case Opt_rtdev:
+                       mp->m_rtname = match_strdup(args);
                        if (!mp->m_rtname)
                                return -ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE) ||
-                          !strcmp(this_char, MNTOPT_BIOSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (suffix_kstrtoint(value, 10, &iosize))
+                       break;
+               case Opt_allocsize:
+               case Opt_biosize:
+                       if (suffix_kstrtoint(args, 10, &iosize))
                                return -EINVAL;
                        iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
-                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+                       break;
+               case Opt_grpid:
+               case Opt_bsdgroups:
                        mp->m_flags |= XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
-                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+                       break;
+               case Opt_nogrpid:
+               case Opt_sysvgroups:
                        mp->m_flags &= ~XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+                       break;
+               case Opt_wsync:
                        mp->m_flags |= XFS_MOUNT_WSYNC;
-               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+                       break;
+               case Opt_norecovery:
                        mp->m_flags |= XFS_MOUNT_NORECOVERY;
-               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+                       break;
+               case Opt_noalign:
                        mp->m_flags |= XFS_MOUNT_NOALIGN;
-               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+                       break;
+               case Opt_swalloc:
                        mp->m_flags |= XFS_MOUNT_SWALLOC;
-               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (kstrtoint(value, 10, &dsunit))
-                               return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
+                       break;
+               case Opt_sunit:
+                       if (match_int(args, &dsunit))
                                return -EINVAL;
-                       }
-                       if (kstrtoint(value, 10, &dswidth))
+                       break;
+               case Opt_swidth:
+                       if (match_int(args, &dswidth))
                                return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
+                       break;
+               case Opt_inode32:
                        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+                       break;
+               case Opt_inode64:
                        mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+                       break;
+               case Opt_nouuid:
                        mp->m_flags |= XFS_MOUNT_NOUUID;
-               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+                       break;
+               case Opt_barrier:
                        mp->m_flags |= XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                       break;
+               case Opt_nobarrier:
                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+                       break;
+               case Opt_ikeep:
                        mp->m_flags |= XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+                       break;
+               case Opt_noikeep:
                        mp->m_flags &= ~XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+                       break;
+               case Opt_largeio:
                        mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+                       break;
+               case Opt_nolargeio:
                        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+                       break;
+               case Opt_attr2:
                        mp->m_flags |= XFS_MOUNT_ATTR2;
-               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+                       break;
+               case Opt_noattr2:
                        mp->m_flags &= ~XFS_MOUNT_ATTR2;
                        mp->m_flags |= XFS_MOUNT_NOATTR2;
-               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+                       break;
+               case Opt_filestreams:
                        mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+                       break;
+               case Opt_noquota:
                        mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
                        mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
                        mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
-               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
-                          !strcmp(this_char, MNTOPT_UQUOTA) ||
-                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
+                       break;
+               case Opt_quota:
+               case Opt_uquota:
+               case Opt_usrquota:
                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
                                         XFS_UQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
-                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+                       break;
+               case Opt_qnoenforce:
+               case Opt_uqnoenforce:
                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
                        mp->m_qflags &= ~XFS_UQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
-                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+                       break;
+               case Opt_pquota:
+               case Opt_prjquota:
                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
                                         XFS_PQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+                       break;
+               case Opt_pqnoenforce:
                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
                        mp->m_qflags &= ~XFS_PQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
-                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+               case Opt_gquota:
+               case Opt_grpquota:
                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
                                         XFS_GQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+                       break;
+               case Opt_gqnoenforce:
                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
                        mp->m_qflags &= ~XFS_GQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+                       break;
+               case Opt_discard:
                        mp->m_flags |= XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+                       break;
+               case Opt_nodiscard:
                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
+                       break;
 #ifdef CONFIG_FS_DAX
-               } else if (!strcmp(this_char, MNTOPT_DAX)) {
+               case Opt_dax:
                        mp->m_flags |= XFS_MOUNT_DAX;
+                       break;
 #endif
-               } else {
-                       xfs_warn(mp, "unknown mount option [%s].", this_char);
+               default:
+                       xfs_warn(mp, "unknown mount option [%s].", p);
                        return -EINVAL;
                }
        }
@@ -461,25 +470,25 @@ xfs_showargs(
 {
        static struct proc_xfs_info xfs_info_set[] = {
                /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
-               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
-               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
-               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
-               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
-               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
-               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
-               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
-               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
-               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
-               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_32BITINODE },
-               { XFS_MOUNT_DAX,                "," MNTOPT_DAX },
+               { XFS_MOUNT_IKEEP,              ",ikeep" },
+               { XFS_MOUNT_WSYNC,              ",wsync" },
+               { XFS_MOUNT_NOALIGN,            ",noalign" },
+               { XFS_MOUNT_SWALLOC,            ",swalloc" },
+               { XFS_MOUNT_NOUUID,             ",nouuid" },
+               { XFS_MOUNT_NORECOVERY,         ",norecovery" },
+               { XFS_MOUNT_ATTR2,              ",attr2" },
+               { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
+               { XFS_MOUNT_GRPID,              ",grpid" },
+               { XFS_MOUNT_DISCARD,            ",discard" },
+               { XFS_MOUNT_SMALL_INUMS,        ",inode32" },
+               { XFS_MOUNT_DAX,                ",dax" },
                { 0, NULL }
        };
        static struct proc_xfs_info xfs_info_unset[] = {
                /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
-               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
-               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
+               { XFS_MOUNT_COMPAT_IOSIZE,      ",largeio" },
+               { XFS_MOUNT_BARRIER,            ",nobarrier" },
+               { XFS_MOUNT_SMALL_INUMS,        ",inode64" },
                { 0, NULL }
        };
        struct proc_xfs_info    *xfs_infop;
@@ -494,46 +503,46 @@ xfs_showargs(
        }
 
        if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+               seq_printf(m, ",allocsize=%dk",
                                (int)(1 << mp->m_writeio_log) >> 10);
 
        if (mp->m_logbufs > 0)
-               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+               seq_printf(m, ",logbufs=%d", mp->m_logbufs);
        if (mp->m_logbsize > 0)
-               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+               seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
 
        if (mp->m_logname)
-               seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname);
+               seq_show_option(m, "logdev", mp->m_logname);
        if (mp->m_rtname)
-               seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname);
+               seq_show_option(m, "rtdev", mp->m_rtname);
 
        if (mp->m_dalign > 0)
-               seq_printf(m, "," MNTOPT_SUNIT "=%d",
+               seq_printf(m, ",sunit=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
        if (mp->m_swidth > 0)
-               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+               seq_printf(m, ",swidth=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
        if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_USRQUOTA);
+               seq_puts(m, ",usrquota");
        else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_UQUOTANOENF);
+               seq_puts(m, ",uqnoenforce");
 
        if (mp->m_qflags & XFS_PQUOTA_ACCT) {
                if (mp->m_qflags & XFS_PQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_PRJQUOTA);
+                       seq_puts(m, ",prjquota");
                else
-                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
+                       seq_puts(m, ",pqnoenforce");
        }
        if (mp->m_qflags & XFS_GQUOTA_ACCT) {
                if (mp->m_qflags & XFS_GQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_GRPQUOTA);
+                       seq_puts(m, ",grpquota");
                else
-                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
+                       seq_puts(m, ",gqnoenforce");
        }
 
        if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
-               seq_puts(m, "," MNTOPT_NOQUOTA);
+               seq_puts(m, ",noquota");
 
        return 0;
 }
@@ -572,23 +581,35 @@ xfs_max_file_offset(
 }
 
 /*
- * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
- * because in the growfs case, mp->m_sb.sb_agcount is not updated
- * yet to the potentially higher ag count.
+ * Set parameters for inode allocation heuristics, taking into account
+ * filesystem size and inode32/inode64 mount options; i.e. specifically
+ * whether or not XFS_MOUNT_SMALL_INUMS is set.
+ *
+ * Inode allocation patterns are altered only if inode32 is requested
+ * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
+ * If altered, XFS_MOUNT_32BITINODES is set as well.
+ *
+ * An agcount independent of that in the mount structure is provided
+ * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
+ * to the potentially higher ag count.
+ *
+ * Returns the maximum AG index which may contain inodes.
  */
 xfs_agnumber_t
-xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
+xfs_set_inode_alloc(
+       struct xfs_mount *mp,
+       xfs_agnumber_t  agcount)
 {
-       xfs_agnumber_t  index = 0;
+       xfs_agnumber_t  index;
        xfs_agnumber_t  maxagi = 0;
        xfs_sb_t        *sbp = &mp->m_sb;
        xfs_agnumber_t  max_metadata;
        xfs_agino_t     agino;
        xfs_ino_t       ino;
-       xfs_perag_t     *pag;
 
-       /* Calculate how much should be reserved for inodes to meet
-        * the max inode percentage.
+       /*
+        * Calculate how much should be reserved for inodes to meet
+        * the max inode percentage.  Used only for inode32.
         */
        if (mp->m_maxicount) {
                __uint64_t      icount;
@@ -602,54 +623,48 @@ xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
                max_metadata = agcount;
        }
 
+       /* Get the last possible inode in the filesystem */
        agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+       ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+
+       /*
+        * If user asked for no more than 32-bit inodes, and the fs is
+        * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+        * the allocator to accommodate the request.
+        */
+       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
+               mp->m_flags |= XFS_MOUNT_32BITINODES;
+       else
+               mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 
        for (index = 0; index < agcount; index++) {
-               ino = XFS_AGINO_TO_INO(mp, index, agino);
+               struct xfs_perag        *pag;
 
-               if (ino > XFS_MAXINUMBER_32) {
-                       pag = xfs_perag_get(mp, index);
-                       pag->pagi_inodeok = 0;
-                       pag->pagf_metadata = 0;
-                       xfs_perag_put(pag);
-                       continue;
-               }
+               ino = XFS_AGINO_TO_INO(mp, index, agino);
 
                pag = xfs_perag_get(mp, index);
-               pag->pagi_inodeok = 1;
-               maxagi++;
-               if (index < max_metadata)
-                       pag->pagf_metadata = 1;
-               xfs_perag_put(pag);
-       }
-       mp->m_flags |= (XFS_MOUNT_32BITINODES |
-                       XFS_MOUNT_SMALL_INUMS);
 
-       return maxagi;
-}
-
-xfs_agnumber_t
-xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
-{
-       xfs_agnumber_t index = 0;
-
-       for (index = 0; index < agcount; index++) {
-               struct xfs_perag        *pag;
+               if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+                       if (ino > XFS_MAXINUMBER_32) {
+                               pag->pagi_inodeok = 0;
+                               pag->pagf_metadata = 0;
+                       } else {
+                               pag->pagi_inodeok = 1;
+                               maxagi++;
+                               if (index < max_metadata)
+                                       pag->pagf_metadata = 1;
+                               else
+                                       pag->pagf_metadata = 0;
+                       }
+               } else {
+                       pag->pagi_inodeok = 1;
+                       pag->pagf_metadata = 0;
+               }
 
-               pag = xfs_perag_get(mp, index);
-               pag->pagi_inodeok = 1;
-               pag->pagf_metadata = 0;
                xfs_perag_put(pag);
        }
 
-       /* There is no need for lock protection on m_flags,
-        * the rw_semaphore of the VFS superblock is locked
-        * during mount/umount/remount operations, so this is
-        * enough to avoid concurency on the m_flags field
-        */
-       mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
-                        XFS_MOUNT_SMALL_INUMS);
-       return index;
+       return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
 }
 
 STATIC int
@@ -1165,6 +1180,27 @@ xfs_quiesce_attr(
        xfs_log_quiesce(mp);
 }
 
+STATIC int
+xfs_test_remount_options(
+       struct super_block      *sb,
+       struct xfs_mount        *mp,
+       char                    *options)
+{
+       int                     error = 0;
+       struct xfs_mount        *tmp_mp;
+
+       tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
+       if (!tmp_mp)
+               return -ENOMEM;
+
+       tmp_mp->m_super = sb;
+       error = xfs_parseargs(tmp_mp, options);
+       xfs_free_fsname(tmp_mp);
+       kfree(tmp_mp);
+
+       return error;
+}
+
 STATIC int
 xfs_fs_remount(
        struct super_block      *sb,
@@ -1177,6 +1213,11 @@ xfs_fs_remount(
        char                    *p;
        int                     error;
 
+       /* First, check for complete junk; i.e. invalid options */
+       error = xfs_test_remount_options(sb, mp, options);
+       if (error)
+               return error;
+
        sync_filesystem(sb);
        while ((p = strsep(&options, ",")) != NULL) {
                int token;
@@ -1193,10 +1234,12 @@ xfs_fs_remount(
                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
                        break;
                case Opt_inode64:
-                       mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
+                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+                       mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
                        break;
                case Opt_inode32:
-                       mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
+                       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+                       mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
                        break;
                default:
                        /*
@@ -1344,9 +1387,8 @@ xfs_finish_flags(
         */
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
            (mp->m_flags & XFS_MOUNT_NOATTR2)) {
-               xfs_warn(mp,
-"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
-                       MNTOPT_NOATTR2, MNTOPT_ATTR2);
+               xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
+                            "attr2 is always enabled for V5 filesystems.");
                return -EINVAL;
        }
 
@@ -1817,6 +1859,8 @@ init_xfs_fs(void)
 {
        int                     error;
 
+       xfs_check_ondisk_structs();
+
        printk(KERN_INFO XFS_VERSION_STRING " with "
                         XFS_BUILD_OPTIONS " enabled\n");
 
index 499058fea303a81fa3d77f15123a83a2bf6f28b8..2dfb1ce4585f2feee7546483021f3d7593fc29fa 100644 (file)
@@ -65,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
 
 extern void xfs_flush_inodes(struct xfs_mount *mp);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount);
-extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount);
+extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
+                                          xfs_agnumber_t agcount);
 
 extern const struct export_operations xfs_export_operations;
 extern const struct xattr_handler *xfs_xattr_handlers[];
index 641d625eb334c3230191dce12ecacad4e41d6e4b..6ced4f1434948d3757077c732a93d6fac88ea0cc 100644 (file)
 
 #include "xfs.h"
 #include "xfs_sysfs.h"
+#include "xfs_format.h"
 #include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_stats.h"
+#include "xfs_mount.h"
 
 struct xfs_sysfs_attr {
        struct attribute attr;
@@ -45,16 +48,6 @@ to_attr(struct attribute *attr)
 
 #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
 
-/*
- * xfs_mount kobject. This currently has no attributes and thus no need for show
- * and store helpers. The mp kobject serves as the per-mount parent object that
- * is identified by the fsname under sysfs.
- */
-
-struct kobj_type xfs_mp_ktype = {
-       .release = xfs_sysfs_release,
-};
-
 STATIC ssize_t
 xfs_sysfs_object_show(
        struct kobject          *kobject,
@@ -83,6 +76,71 @@ static const struct sysfs_ops xfs_sysfs_ops = {
        .store = xfs_sysfs_object_store,
 };
 
+/*
+ * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
+ * that is identified by the fsname under sysfs.
+ */
+
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+       struct xfs_kobj *kobj = to_kobj(kobject);
+
+       return container_of(kobj, struct xfs_mount, m_kobj);
+}
+
+#ifdef DEBUG
+
+STATIC ssize_t
+fail_writes_store(
+       struct kobject          *kobject,
+       const char              *buf,
+       size_t                  count)
+{
+       struct xfs_mount        *mp = to_mp(kobject);
+       int                     ret;
+       int                     val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val == 1)
+               mp->m_fail_writes = true;
+       else if (val == 0)
+               mp->m_fail_writes = false;
+       else
+               return -EINVAL;
+
+       return count;
+}
+
+STATIC ssize_t
+fail_writes_show(
+       struct kobject          *kobject,
+       char                    *buf)
+{
+       struct xfs_mount        *mp = to_mp(kobject);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
+}
+XFS_SYSFS_ATTR_RW(fail_writes);
+
+#endif /* DEBUG */
+
+static struct attribute *xfs_mp_attrs[] = {
+#ifdef DEBUG
+       ATTR_LIST(fail_writes),
+#endif
+       NULL,
+};
+
+struct kobj_type xfs_mp_ktype = {
+       .release = xfs_sysfs_release,
+       .sysfs_ops = &xfs_sysfs_ops,
+       .default_attrs = xfs_mp_attrs,
+};
+
 #ifdef DEBUG
 /* debug */
 
index 391d797cb53fee0a9196d3e392dfd8fa95d5d33a..c8d58426008ed7ef49096097904ed13653a8cfe9 100644 (file)
@@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
 DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
 DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
 DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
 
 DECLARE_EVENT_CLASS(xfs_itrunc_class,
        TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
index 748b16aff45a1cf959603b24131a69e289d99a95..20c53666cb4b3272400bc7111d285f104e555d75 100644 (file)
@@ -1028,6 +1028,8 @@ __xfs_trans_roll(
        struct xfs_trans_res    tres;
        int                     error;
 
+       *committed = 0;
+
        /*
         * Ensure that the inode is always logged.
         */
@@ -1082,6 +1084,6 @@ xfs_trans_roll(
        struct xfs_trans        **tpp,
        struct xfs_inode        *dp)
 {
-       int                     committed = 0;
+       int                     committed;
        return __xfs_trans_roll(tpp, dp, &committed);
 }
index 4643070d7cae4b814a36b101ba0a88fcd0c6287e..e7c49cf43fbc85c183e1728966d4f4506b94eaf6 100644 (file)
@@ -133,7 +133,6 @@ typedef struct xfs_trans {
  * XFS transaction mechanism exported interfaces that are
  * actually macros.
  */
-#define        xfs_trans_get_block_res(tp)     ((tp)->t_blk_res)
 #define        xfs_trans_set_sync(tp)          ((tp)->t_flags |= XFS_TRANS_SYNC)
 
 #if defined(DEBUG) || defined(XFS_WARN)
index 4f18fd92ca13b21d8fd68e955e082d9db9a61195..d6c9c3e9e02b2c45f2cd57074cbbcdebde1e804a 100644 (file)
@@ -497,6 +497,7 @@ xfsaild(
        long            tout = 0;       /* milliseconds */
 
        current->flags |= PF_MEMALLOC;
+       set_freezable();
 
        while (!kthread_should_stop()) {
                if (tout && tout <= 20)
@@ -519,14 +520,14 @@ xfsaild(
                if (!xfs_ail_min(ailp) &&
                    ailp->xa_target == ailp->xa_target_prev) {
                        spin_unlock(&ailp->xa_lock);
-                       schedule();
+                       freezable_schedule();
                        tout = 0;
                        continue;
                }
                spin_unlock(&ailp->xa_lock);
 
                if (tout)
-                       schedule_timeout(msecs_to_jiffies(tout));
+                       freezable_schedule_timeout(msecs_to_jiffies(tout));
 
                __set_current_state(TASK_RUNNING);
 
index 75798412859a7ba2f47b01945c54b7ee82ff4e7e..8ee29ca132dc13c0f302fa470cfaffc788cb9938 100644 (file)
@@ -155,7 +155,7 @@ xfs_trans_get_buf_map(
                ASSERT(xfs_buf_islocked(bp));
                if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
                        xfs_buf_stale(bp);
-                       XFS_BUF_DONE(bp);
+                       bp->b_flags |= XBF_DONE;
                }
 
                ASSERT(bp->b_transp == tp);
@@ -518,7 +518,7 @@ xfs_trans_log_buf(xfs_trans_t       *tp,
         * inside the b_bdstrat callback so that this won't get written to
         * disk.
         */
-       XFS_BUF_DONE(bp);
+       bp->b_flags |= XBF_DONE;
 
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bp->b_iodone = xfs_buf_iodone_callbacks;
@@ -534,8 +534,8 @@ xfs_trans_log_buf(xfs_trans_t       *tp,
         */
        if (bip->bli_flags & XFS_BLI_STALE) {
                bip->bli_flags &= ~XFS_BLI_STALE;
-               ASSERT(XFS_BUF_ISSTALE(bp));
-               XFS_BUF_UNSTALE(bp);
+               ASSERT(bp->b_flags & XBF_STALE);
+               bp->b_flags &= ~XBF_STALE;
                bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
        }
 
@@ -600,7 +600,7 @@ xfs_trans_binval(
                 * If the buffer is already invalidated, then
                 * just return.
                 */
-               ASSERT(XFS_BUF_ISSTALE(bp));
+               ASSERT(bp->b_flags & XBF_STALE);
                ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
                ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
                ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
index 995170194df040b5b3e02cab80b5942ee92cb2ad..c3d547211d16001ad686c543fd960996b2321c75 100644 (file)
@@ -609,17 +609,20 @@ xfs_trans_dqresv(
        xfs_qcnt_t      total_count;
        xfs_qcnt_t      *resbcountp;
        xfs_quotainfo_t *q = mp->m_quotainfo;
+       struct xfs_def_quota    *defq;
 
 
        xfs_dqlock(dqp);
 
+       defq = xfs_get_defquota(dqp, q);
+
        if (flags & XFS_TRANS_DQ_RES_BLKS) {
                hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
                if (!hardlimit)
-                       hardlimit = q->qi_bhardlimit;
+                       hardlimit = defq->bhardlimit;
                softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
                if (!softlimit)
-                       softlimit = q->qi_bsoftlimit;
+                       softlimit = defq->bsoftlimit;
                timer = be32_to_cpu(dqp->q_core.d_btimer);
                warns = be16_to_cpu(dqp->q_core.d_bwarns);
                warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
@@ -628,10 +631,10 @@ xfs_trans_dqresv(
                ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
                hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
                if (!hardlimit)
-                       hardlimit = q->qi_rtbhardlimit;
+                       hardlimit = defq->rtbhardlimit;
                softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
                if (!softlimit)
-                       softlimit = q->qi_rtbsoftlimit;
+                       softlimit = defq->rtbsoftlimit;
                timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
                warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
                warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
@@ -672,10 +675,10 @@ xfs_trans_dqresv(
                        warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
                        hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
                        if (!hardlimit)
-                               hardlimit = q->qi_ihardlimit;
+                               hardlimit = defq->ihardlimit;
                        softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
                        if (!softlimit)
-                               softlimit = q->qi_isoftlimit;
+                               softlimit = defq->isoftlimit;
 
                        if (hardlimit && total_count > hardlimit) {
                                xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
index b97f1df910abb0bd60ac5850fbe549237806d078..11a3af08b5c7ea1e40dcd348606414e147e6a0cd 100644 (file)
@@ -75,18 +75,10 @@ xfs_trans_ichgtime(
 
        tv = current_fs_time(inode->i_sb);
 
-       if ((flags & XFS_ICHGTIME_MOD) &&
-           !timespec_equal(&inode->i_mtime, &tv)) {
+       if (flags & XFS_ICHGTIME_MOD)
                inode->i_mtime = tv;
-               ip->i_d.di_mtime.t_sec = tv.tv_sec;
-               ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
-       }
-       if ((flags & XFS_ICHGTIME_CHG) &&
-           !timespec_equal(&inode->i_ctime, &tv)) {
+       if (flags & XFS_ICHGTIME_CHG)
                inode->i_ctime = tv;
-               ip->i_d.di_ctime.t_sec = tv.tv_sec;
-               ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
-       }
 }
 
 /*
@@ -125,7 +117,7 @@ xfs_trans_log_inode(
         */
        if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
            IS_I_VERSION(VFS_I(ip))) {
-               ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
+               VFS_I(ip)->i_version++;
                flags |= XFS_ILOG_CORE;
        }
 
index cc08198358d4ee4cd03d68d8f681a76bb96f4521..35d99266ca9a392b695f39dc5fc1f7c03cc7bf80 100644 (file)
@@ -72,7 +72,7 @@ extern int sysctl_protected_hardlinks;
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create);
-typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
+typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
                        ssize_t bytes, void *private);
 typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
 
index b2505acfd3c078c70e733f7d9e826cfc4b6c9524..fba92f5c1a631a7c43552a357a51d8d3eb896a01 100644 (file)
@@ -425,6 +425,8 @@ struct quotactl_ops {
        int (*quota_sync)(struct super_block *, int);
        int (*set_info)(struct super_block *, int, struct qc_info *);
        int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
+       int (*get_nextdqblk)(struct super_block *, struct kqid *,
+                            struct qc_dqblk *);
        int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
        int (*get_state)(struct super_block *, struct qc_state *);
        int (*rm_xquota)(struct super_block *, unsigned int);
index dcd75cc261962f65c909a6efa0defe3f1dcdc281..11b3b31faf1483a46122a67e93b823182b768cb9 100644 (file)
@@ -39,6 +39,7 @@
 #define Q_XQUOTARM     XQM_CMD(6)      /* free disk space used by dquots */
 #define Q_XQUOTASYNC   XQM_CMD(7)      /* delalloc flush, updates dquots */
 #define Q_XGETQSTATV   XQM_CMD(8)      /* newer version of get quota */
+#define Q_XGETNEXTQUOTA        XQM_CMD(9)      /* get disk limits and usage >= ID */
 
 /*
  * fs_disk_quota structure:
index 9c95b2c1c88a6ef0a6bb4207cd5122011d3007f9..38baddb807f503f5f526d93377df677ab80c5ad1 100644 (file)
@@ -71,6 +71,7 @@
 #define Q_SETINFO  0x800006    /* set information about quota files */
 #define Q_GETQUOTA 0x800007    /* get user quota structure */
 #define Q_SETQUOTA 0x800008    /* set user quota structure */
+#define Q_GETNEXTQUOTA 0x800009        /* get disk limits and usage >= ID */
 
 /* Quota format type IDs */
 #define        QFMT_VFS_OLD 1
@@ -119,6 +120,19 @@ struct if_dqblk {
        __u32 dqb_valid;
 };
 
+struct if_nextdqblk {
+       __u64 dqb_bhardlimit;
+       __u64 dqb_bsoftlimit;
+       __u64 dqb_curspace;
+       __u64 dqb_ihardlimit;
+       __u64 dqb_isoftlimit;
+       __u64 dqb_curinodes;
+       __u64 dqb_btime;
+       __u64 dqb_itime;
+       __u32 dqb_valid;
+       __u32 dqb_id;
+};
+
 /*
  * Structure used for setting quota information about file via quotactl
  * Following flags are used to specify which fields are valid