Merge tag 'xfs-for-linus-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)
diff --git a/fs/dax.c b/fs/dax.c

index bbb2ad78377020ac85158fb61df067aceadaafe5..90322eb7498c13289a346ce82a64d8ab34c23851 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -286,8 +286,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
         if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
                 inode_unlock(inode);
  
-       if ((retval > 0) && end_io)
-               end_io(iocb, pos, retval, bh.b_private);
+       if (end_io) {
+               int err;
+
+               err = end_io(iocb, pos, retval, bh.b_private);
+               if (err)
+                       retval = err;
+       }
  
         if (!(flags & DIO_SKIP_DIO_COUNT))
                 inode_dio_end(inode);
diff --git a/fs/direct-io.c b/fs/direct-io.c

index 0a8d937c6775577d64c40cdaa250cc632619f339..476f1ecbd1f0e585171b8dfa2970ada3cb5cc2c7 100644 (file)
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
         if (ret == 0)
                 ret = transferred;
  
-       if (dio->end_io && dio->result)
-               dio->end_io(dio->iocb, offset, transferred, dio->private);
+       if (dio->end_io) {
+               int err;
+
+               err = dio->end_io(dio->iocb, offset, ret, dio->private);
+               if (err)
+                       ret = err;
+       }
  
         if (!(dio->flags & DIO_SKIP_DIO_COUNT))
                 inode_dio_end(dio->inode);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 393689dfa1aff8f17aad517c12a0a55b808a505e..c0474351986597cda60b574cacf325490ab2b157 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1511,15 +1511,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
                  ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
  }
  
-static inline void ext4_set_io_unwritten_flag(struct inode *inode,
-                                             struct ext4_io_end *io_end)
-{
-       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-               io_end->flag |= EXT4_IO_END_UNWRITTEN;
-               atomic_inc(&EXT4_I(inode)->i_unwritten);
-       }
-}
-
  /*
   * Inode dynamic state flags
   */
@@ -3293,6 +3284,27 @@ extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
  extern int ext4_resize_begin(struct super_block *sb);
  extern void ext4_resize_end(struct super_block *sb);
  
+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+                                             struct ext4_io_end *io_end)
+{
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_unwritten);
+       }
+}
+
+static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
+{
+       struct inode *inode = io_end->inode;
+
+       if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+               io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
+               /* Wake up anyone waiting on unwritten extent conversion */
+               if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+                       wake_up_all(ext4_ioend_wq(inode));
+       }
+}
+
  #endif /* __KERNEL__ */
  
  #define EFSBADCRC      EBADMSG         /* Bad CRC detected */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index b2e9576450eb92c3f254a51e8b2392f5ba30b054..dab84a2530ff3e05794d9b3c602e0d246937c8a3 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3289,22 +3289,32 @@ out:
  }
  #endif
  
-static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
+static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                             ssize_t size, void *private)
  {
          ext4_io_end_t *io_end = private;
  
         /* if not async direct IO just return */
         if (!io_end)
-               return;
+               return 0;
  
         ext_debug("ext4_end_io_dio(): io_end 0x%p "
                   "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
                   io_end, io_end->inode->i_ino, iocb, offset, size);
  
+       /*
+        * Error during AIO DIO. We cannot convert unwritten extents as the
+        * data was not written. Just clear the unwritten flag and drop io_end.
+        */
+       if (size <= 0) {
+               ext4_clear_io_unwritten_flag(io_end);
+               size = 0;
+       }
         io_end->offset = offset;
         io_end->size = size;
         ext4_put_io_end(io_end);
+
+       return 0;
  }
  
  /*
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c

index 349d7aa04fe70e1938456362474baa5a421cf390..d77d15f4b674485de79f7c72d3a36049fd4aa986 100644 (file)
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -136,16 +136,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
         kmem_cache_free(io_end_cachep, io_end);
  }
  
-static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
-{
-       struct inode *inode = io_end->inode;
-
-       io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
-       /* Wake up anyone waiting on unwritten extent conversion */
-       if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
-               wake_up_all(ext4_ioend_wq(inode));
-}
-
  /*
   * Check a range of space and convert unwritten extents to written. Note that
   * we are protected from truncate touching same part of extent tree by the
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index cda0361e95a403e887d912174ec296aa4cbb6e4d..043110e5212dd8c6ab2e7118ac8f17746448dac0 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -620,7 +620,7 @@ bail:
   * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
   * to protect io on one node from truncation on another.
   */
-static void ocfs2_dio_end_io(struct kiocb *iocb,
+static int ocfs2_dio_end_io(struct kiocb *iocb,
                              loff_t offset,
                              ssize_t bytes,
                              void *private)
@@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
         struct inode *inode = file_inode(iocb->ki_filp);
         int level;
  
+       if (bytes <= 0)
+               return 0;
+
         /* this io's submitter should not have unlocked this before we could */
         BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
  
@@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
                 level = ocfs2_iocb_rw_locked_level(iocb);
                 ocfs2_rw_unlock(inode, level);
         }
+
+       return 0;
  }
  
  static int ocfs2_releasepage(struct page *page, gfp_t wait)
diff --git a/fs/quota/quota.c b/fs/quota/quota.c

index 3746367098fda369120ccf0eab4ca0249ed64cb4..0ebc90496525d13684ed96402cab612b0c2da356 100644 (file)
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type)
         return 0;
  }
  
-static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
+static int quota_quotaon(struct super_block *sb, int type, qid_t id,
                          struct path *path)
  {
         if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
@@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
         return 0;
  }
  
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ESRCH via ->get_nextdqblk
+ */
+static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
+                         void __user *addr)
+{
+       struct kqid qid;
+       struct qc_dqblk fdq;
+       struct if_nextdqblk idq;
+       int ret;
+
+       if (!sb->s_qcop->get_nextdqblk)
+               return -ENOSYS;
+       qid = make_kqid(current_user_ns(), type, id);
+       if (!qid_valid(qid))
+               return -EINVAL;
+       ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
+       if (ret)
+               return ret;
+       /* struct if_nextdqblk is a superset of struct if_dqblk */
+       copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq);
+       idq.dqb_id = from_kqid(current_user_ns(), qid);
+       if (copy_to_user(addr, &idq, sizeof(idq)))
+               return -EFAULT;
+       return 0;
+}
+
  static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src)
  {
         dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit);
@@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
         return ret;
  }
  
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ESRCH via ->get_nextdqblk.
+ */
+static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
+                           void __user *addr)
+{
+       struct fs_disk_quota fdq;
+       struct qc_dqblk qdq;
+       struct kqid qid;
+       qid_t id_out;
+       int ret;
+
+       if (!sb->s_qcop->get_nextdqblk)
+               return -ENOSYS;
+       qid = make_kqid(current_user_ns(), type, id);
+       if (!qid_valid(qid))
+               return -EINVAL;
+       ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
+       if (ret)
+               return ret;
+       id_out = from_kqid(current_user_ns(), qid);
+       copy_to_xfs_dqblk(&fdq, &qdq, type, id_out);
+       if (copy_to_user(addr, &fdq, sizeof(fdq)))
+               return -EFAULT;
+       return ret;
+}
+
  static int quota_rmxquota(struct super_block *sb, void __user *addr)
  {
         __u32 flags;
@@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
  
         switch (cmd) {
         case Q_QUOTAON:
-               return quota_quotaon(sb, type, cmd, id, path);
+               return quota_quotaon(sb, type, id, path);
         case Q_QUOTAOFF:
                 return quota_quotaoff(sb, type);
         case Q_GETFMT:
@@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
                 return quota_setinfo(sb, type, addr);
         case Q_GETQUOTA:
                 return quota_getquota(sb, type, id, addr);
+       case Q_GETNEXTQUOTA:
+               return quota_getnextquota(sb, type, id, addr);
         case Q_SETQUOTA:
                 return quota_setquota(sb, type, id, addr);
         case Q_SYNC:
@@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
                 return quota_setxquota(sb, type, id, addr);
         case Q_XGETQUOTA:
                 return quota_getxquota(sb, type, id, addr);
+       case Q_XGETNEXTQUOTA:
+               return quota_getnextxquota(sb, type, id, addr);
         case Q_XQUOTASYNC:
                 if (sb->s_flags & MS_RDONLY)
                         return -EROFS;
@@ -708,10 +768,12 @@ static int quotactl_cmd_write(int cmd)
         switch (cmd) {
         case Q_GETFMT:
         case Q_GETINFO:
+       case Q_GETNEXTQUOTA:
         case Q_SYNC:
         case Q_XGETQSTAT:
         case Q_XGETQSTATV:
         case Q_XGETQUOTA:
+       case Q_XGETNEXTQUOTA:
         case Q_XQUOTASYNC:
                 return 0;
         }
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c

index 444626ddbd1b9ba2baca676b48a62d90299d515e..d9b42425291e37c6a4845c21dd0e1f61d8a76e86 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -118,8 +118,6 @@ xfs_allocbt_free_block(
         xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
                               XFS_EXTENT_BUSY_SKIP_DISCARD);
         xfs_trans_agbtree_delta(cur->bc_tp, -1);
-
-       xfs_trans_binval(cur->bc_tp, bp);
         return 0;
  }
  
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h

index 919756e3ba53591a9132849a6dc7c57771f1aebb..90928bbe693c03bcb5a74aecaac421ba3132bebe 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -24,22 +24,6 @@
   * Small attribute lists are packed as tightly as possible so as
   * to fit into the literal area of the inode.
   */
-
-/*
- * Entries are packed toward the top as tight as possible.
- */
-typedef struct xfs_attr_shortform {
-       struct xfs_attr_sf_hdr {        /* constant-structure header block */
-               __be16  totsize;        /* total bytes in shortform list */
-               __u8    count;  /* count of active entries */
-       } hdr;
-       struct xfs_attr_sf_entry {
-               __uint8_t namelen;      /* actual length of name (no NULL) */
-               __uint8_t valuelen;     /* actual length of value (no NULL) */
-               __uint8_t flags;        /* flags bits (see xfs_attr_leaf.h) */
-               __uint8_t nameval[1];   /* name & value bytes concatenated */
-       } list[1];                      /* variable sized array */
-} xfs_attr_shortform_t;
  typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
  typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
  
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index ef00156f4f9616178006df4ef6248b1abb2297ab..041b6948aeccd928f6f88d95ed9985052d915390 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -477,10 +477,7 @@ xfs_bmap_check_leaf_extents(
                 }
                 block = XFS_BUF_TO_BLOCK(bp);
         }
-       if (bp_release) {
-               bp_release = 0;
-               xfs_trans_brelse(NULL, bp);
-       }
+
         return;
  
  error0:
@@ -912,7 +909,7 @@ xfs_bmap_local_to_extents(
          * We don't want to deal with the case of keeping inode data inline yet.
          * So sending the data fork of a regular inode is invalid.
          */
-       ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
+       ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
         ifp = XFS_IFORK_PTR(ip, whichfork);
         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
  
@@ -1079,7 +1076,7 @@ xfs_bmap_add_attrfork_local(
         if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
                 return 0;
  
-       if (S_ISDIR(ip->i_d.di_mode)) {
+       if (S_ISDIR(VFS_I(ip)->i_mode)) {
                 memset(&dargs, 0, sizeof(dargs));
                 dargs.geo = ip->i_mount->m_dir_geo;
                 dargs.dp = ip;
@@ -1091,7 +1088,7 @@ xfs_bmap_add_attrfork_local(
                 return xfs_dir2_sf_to_block(&dargs);
         }
  
-       if (S_ISLNK(ip->i_d.di_mode))
+       if (S_ISLNK(VFS_I(ip)->i_mode))
                 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
                                                  flags, XFS_DATA_FORK,
                                                  xfs_symlink_local_to_remote);
@@ -4720,6 +4717,66 @@ error0:
         return error;
  }
  
+/*
+ * When a delalloc extent is split (e.g., due to a hole punch), the original
+ * indlen reservation must be shared across the two new extents that are left
+ * behind.
+ *
+ * Given the original reservation and the worst case indlen for the two new
+ * extents (as calculated by xfs_bmap_worst_indlen()), split the original
+ * reservation fairly across the two new extents. If necessary, steal available
+ * blocks from a deleted extent to make up a reservation deficiency (e.g., if
+ * ores == 1). The number of stolen blocks is returned. The availability and
+ * subsequent accounting of stolen blocks is the responsibility of the caller.
+ */
+static xfs_filblks_t
+xfs_bmap_split_indlen(
+       xfs_filblks_t                   ores,           /* original res. */
+       xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
+       xfs_filblks_t                   *indlen2,       /* ext2 worst indlen */
+       xfs_filblks_t                   avail)          /* stealable blocks */
+{
+       xfs_filblks_t                   len1 = *indlen1;
+       xfs_filblks_t                   len2 = *indlen2;
+       xfs_filblks_t                   nres = len1 + len2; /* new total res. */
+       xfs_filblks_t                   stolen = 0;
+
+       /*
+        * Steal as many blocks as we can to try and satisfy the worst case
+        * indlen for both new extents.
+        */
+       while (nres > ores && avail) {
+               nres--;
+               avail--;
+               stolen++;
+       }
+
+       /*
+        * The only blocks available are those reserved for the original
+        * extent and what we can steal from the extent being removed.
+        * If this still isn't enough to satisfy the combined
+        * requirements for the two new extents, skim blocks off of each
+        * of the new reservations until they match what is available.
+        */
+       while (nres > ores) {
+               if (len1) {
+                       len1--;
+                       nres--;
+               }
+               if (nres == ores)
+                       break;
+               if (len2) {
+                       len2--;
+                       nres--;
+               }
+       }
+
+       *indlen1 = len1;
+       *indlen2 = len2;
+
+       return stolen;
+}
+
  /*
   * Called by xfs_bmapi to update file extent records and the btree
   * after removing space (or undoing a delayed allocation).
@@ -4984,28 +5041,29 @@ xfs_bmap_del_extent(
                         XFS_IFORK_NEXT_SET(ip, whichfork,
                                 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
                 } else {
+                       xfs_filblks_t   stolen;
                         ASSERT(whichfork == XFS_DATA_FORK);
-                       temp = xfs_bmap_worst_indlen(ip, temp);
+
+                       /*
+                        * Distribute the original indlen reservation across the
+                        * two new extents. Steal blocks from the deleted extent
+                        * if necessary. Stealing blocks simply fudges the
+                        * fdblocks accounting in xfs_bunmapi().
+                        */
+                       temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
+                       temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+                       stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
+                                                      del->br_blockcount);
+                       da_new = temp + temp2 - stolen;
+                       del->br_blockcount -= stolen;
+
+                       /*
+                        * Set the reservation for each extent. Warn if either
+                        * is zero as this can lead to delalloc problems.
+                        */
+                       WARN_ON_ONCE(!temp || !temp2);
                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
                         new.br_startblock = nullstartblock((int)temp2);
-                       da_new = temp + temp2;
-                       while (da_new > da_old) {
-                               if (temp) {
-                                       temp--;
-                                       da_new--;
-                                       xfs_bmbt_set_startblock(ep,
-                                               nullstartblock((int)temp));
-                               }
-                               if (da_new == da_old)
-                                       break;
-                               if (temp2) {
-                                       temp2--;
-                                       da_new--;
-                                       new.br_startblock =
-                                               nullstartblock((int)temp2);
-                               }
-                       }
                 }
                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
@@ -5210,7 +5268,7 @@ xfs_bunmapi(
                          * This is better than zeroing it.
                          */
                         ASSERT(del.br_state == XFS_EXT_NORM);
-                       ASSERT(xfs_trans_get_block_res(tp) > 0);
+                       ASSERT(tp->t_blk_res > 0);
                         /*
                          * If this spans a realtime extent boundary,
                          * chop it back to the start of the one we end at.
@@ -5241,7 +5299,7 @@ xfs_bunmapi(
                                 del.br_startblock += mod;
                         } else if ((del.br_startoff == start &&
                                     (del.br_state == XFS_EXT_UNWRITTEN ||
-                                    xfs_trans_get_block_res(tp) == 0)) ||
+                                    tp->t_blk_res == 0)) ||
                                    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
                                 /*
                                  * Can't make it unwritten.  There isn't
@@ -5296,9 +5354,37 @@ xfs_bunmapi(
                                 goto nodelete;
                         }
                 }
+
+               /*
+                * If it's the case where the directory code is running
+                * with no block reservation, and the deleted block is in
+                * the middle of its extent, and the resulting insert
+                * of an extent would cause transformation to btree format,
+                * then reject it.  The calling code will then swap
+                * blocks around instead.
+                * We have to do this now, rather than waiting for the
+                * conversion to btree format, since the transaction
+                * will be dirty.
+                */
+               if (!wasdel && tp->t_blk_res == 0 &&
+                   XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+                   XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
+                       XFS_IFORK_MAXEXT(ip, whichfork) &&
+                   del.br_startoff > got.br_startoff &&
+                   del.br_startoff + del.br_blockcount <
+                   got.br_startoff + got.br_blockcount) {
+                       error = -ENOSPC;
+                       goto error0;
+               }
+
+               /*
+                * Unreserve quota and update realtime free space, if
+                * appropriate. If delayed allocation, update the inode delalloc
+                * counter now and wait to update the sb counters as
+                * xfs_bmap_del_extent() might need to borrow some blocks.
+                */
                 if (wasdel) {
                         ASSERT(startblockval(del.br_startblock) > 0);
-                       /* Update realtime/data freespace, unreserve quota */
                         if (isrt) {
                                 xfs_filblks_t rtexts;
  
@@ -5309,8 +5395,6 @@ xfs_bunmapi(
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_RTBLKS);
                         } else {
-                               xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
-                                                false);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_REGBLKS);
@@ -5321,32 +5405,16 @@ xfs_bunmapi(
                                         XFS_BTCUR_BPRV_WASDEL;
                 } else if (cur)
                         cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
-               /*
-                * If it's the case where the directory code is running
-                * with no block reservation, and the deleted block is in
-                * the middle of its extent, and the resulting insert
-                * of an extent would cause transformation to btree format,
-                * then reject it.  The calling code will then swap
-                * blocks around instead.
-                * We have to do this now, rather than waiting for the
-                * conversion to btree format, since the transaction
-                * will be dirty.
-                */
-               if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
-                   XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-                   XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
-                       XFS_IFORK_MAXEXT(ip, whichfork) &&
-                   del.br_startoff > got.br_startoff &&
-                   del.br_startoff + del.br_blockcount <
-                   got.br_startoff + got.br_blockcount) {
-                       error = -ENOSPC;
-                       goto error0;
-               }
+
                 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
                                 &tmp_logflags, whichfork);
                 logflags |= tmp_logflags;
                 if (error)
                         goto error0;
+
+               if (!isrt && wasdel)
+                       xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
+
                 bno = del.br_startoff - 1;
  nodelete:
                 /*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c

index 1637c37bfbaa1cb61ef69e48c52eb95716ecd649..6282f6e708afaf4ca2a15999b1864b507c6d5ed5 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -461,7 +461,7 @@ xfs_bmbt_alloc_block(
                  * reservation amount is insufficient then we may fail a
                  * block allocation here and corrupt the filesystem.
                  */
-               args.minleft = xfs_trans_get_block_res(args.tp);
+               args.minleft = args.tp->t_blk_res;
         } else if (cur->bc_private.b.flist->xbf_low) {
                 args.type = XFS_ALLOCTYPE_START_BNO;
         } else {
@@ -470,7 +470,7 @@ xfs_bmbt_alloc_block(
  
         args.minlen = args.maxlen = args.prod = 1;
         args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
-       if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+       if (!args.wasdel && args.tp->t_blk_res == 0) {
                 error = -ENOSPC;
                 goto error0;
         }
@@ -531,7 +531,6 @@ xfs_bmbt_free_block(
  
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
-       xfs_trans_binval(tp, bp);
         return 0;
  }
  
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c

index a0eb18ce3ad38f205f5f3487ea4684937b7b287c..1f88e1ce770f35442f0161466632c68fe0e46153 100644 (file)
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -294,6 +294,21 @@ xfs_btree_sblock_verify_crc(
         return true;
  }
  
+static int
+xfs_btree_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       int                     error;
+
+       error = cur->bc_ops->free_block(cur, bp);
+       if (!error) {
+               xfs_trans_binval(cur->bc_tp, bp);
+               XFS_BTREE_STATS_INC(cur, free);
+       }
+       return error;
+}
+
  /*
   * Delete the btree cursor.
   */
@@ -3209,6 +3224,7 @@ xfs_btree_kill_iroot(
         int                     level;
         int                     index;
         int                     numrecs;
+       int                     error;
  #ifdef DEBUG
         union xfs_btree_ptr     ptr;
         int                     i;
@@ -3272,8 +3288,6 @@ xfs_btree_kill_iroot(
         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
  #ifdef DEBUG
         for (i = 0; i < numrecs; i++) {
-               int             error;
-
                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
                 if (error) {
                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
@@ -3283,8 +3297,11 @@ xfs_btree_kill_iroot(
  #endif
         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
  
-       cur->bc_ops->free_block(cur, cbp);
-       XFS_BTREE_STATS_INC(cur, free);
+       error = xfs_btree_free_block(cur, cbp);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
  
         cur->bc_bufs[level - 1] = NULL;
         be16_add_cpu(&block->bb_level, -1);
@@ -3317,14 +3334,12 @@ xfs_btree_kill_root(
          */
         cur->bc_ops->set_root(cur, newroot, -1);
  
-       error = cur->bc_ops->free_block(cur, bp);
+       error = xfs_btree_free_block(cur, bp);
         if (error) {
                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
                 return error;
         }
  
-       XFS_BTREE_STATS_INC(cur, free);
-
         cur->bc_bufs[level] = NULL;
         cur->bc_ra[level] = 0;
         cur->bc_nlevels--;
@@ -3830,10 +3845,9 @@ xfs_btree_delrec(
         }
  
         /* Free the deleted block. */
-       error = cur->bc_ops->free_block(cur, rbp);
+       error = xfs_btree_free_block(cur, rbp);
         if (error)
                 goto error0;
-       XFS_BTREE_STATS_INC(cur, free);
  
         /*
          * If we joined with the left neighbor, set the buffer in the
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h

index b14bbd6bb05fad090571bcada4e4867e35040b87..8d4d8bce41bf7873fec0fc8211801207a0a46494 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
   */
  #define XFS_ATTR_LEAF_MAPSIZE  3       /* how many freespace slots */
  
+/*
+ * Entries are packed toward the top as tight as possible.
+ */
+typedef struct xfs_attr_shortform {
+       struct xfs_attr_sf_hdr {        /* constant-structure header block */
+               __be16  totsize;        /* total bytes in shortform list */
+               __u8    count;  /* count of active entries */
+       } hdr;
+       struct xfs_attr_sf_entry {
+               __uint8_t namelen;      /* actual length of name (no NULL) */
+               __uint8_t valuelen;     /* actual length of value (no NULL) */
+               __uint8_t flags;        /* flags bits (see xfs_attr_leaf.h) */
+               __uint8_t nameval[1];   /* name & value bytes concatenated */
+       } list[1];                      /* variable sized array */
+} xfs_attr_shortform_t;
+
  typedef struct xfs_attr_leaf_map {     /* RLE map of free bytes */
         __be16  base;                     /* base of free region */
         __be16  size;                     /* length of free region */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c

index 2fb53a5c0a745259d2e18164e8505cb21d704724..af0f9d171f8a012758d778a0bd105e51448e5cf3 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -176,7 +176,7 @@ xfs_dir_isempty(
  {
         xfs_dir2_sf_hdr_t       *sfp;
  
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
         if (dp->i_d.di_size == 0)       /* might happen during shutdown. */
                 return 1;
         if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -231,7 +231,7 @@ xfs_dir_init(
         struct xfs_da_args *args;
         int             error;
  
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
         error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
         if (error)
                 return error;
@@ -266,7 +266,7 @@ xfs_dir_createname(
         int                     rval;
         int                     v;              /* type-checking value */
  
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
         if (inum) {
                 rval = xfs_dir_ino_validate(tp->t_mountp, inum);
                 if (rval)
@@ -364,7 +364,7 @@ xfs_dir_lookup(
         int             v;              /* type-checking value */
         int             lock_mode;
  
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
         XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
  
         /*
@@ -443,7 +443,7 @@ xfs_dir_removename(
         int             rval;
         int             v;              /* type-checking value */
  
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
         XFS_STATS_INC(dp->i_mount, xs_dir_remove);
  
         args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -505,7 +505,7 @@ xfs_dir_replace(
         int             rval;
         int             v;              /* type-checking value */
  
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
  
         rval = xfs_dir_ino_validate(tp->t_mountp, inum);
         if (rval)
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c

index 63ee03db796ca9a55b9f9f10a427a6fd539c30d4..75a557432d0f87fdb4cfa866332c403023d169dc 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -2235,6 +2235,9 @@ xfs_dir2_node_trim_free(
  
         dp = args->dp;
         tp = args->trans;
+
+       *rvalp = 0;
+
         /*
          * Read the freespace block.
          */
@@ -2255,7 +2258,6 @@ xfs_dir2_node_trim_free(
          */
         if (freehdr.nused > 0) {
                 xfs_trans_brelse(tp, bp);
-               *rvalp = 0;
                 return 0;
         }
         /*
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index 66d702e6b9ff3f7d6cc5a0b47a216171815e9fa4..22297f9b0fd52c8a6d3cc75bb0ac699bd985b0f0 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2403,8 +2403,8 @@ xfs_ialloc_compute_maxlevels(
  
         maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
                 XFS_INODES_PER_CHUNK_LOG;
-       minleafrecs = mp->m_alloc_mnr[0];
-       minnoderecs = mp->m_alloc_mnr[1];
+       minleafrecs = mp->m_inobt_mnr[0];
+       minnoderecs = mp->m_inobt_mnr[1];
         maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
         for (level = 1; maxblocks > 1; level++)
                 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c

index c679f3c05b63cb535de34e7de5543a75ac57faac..89c21d771e35edbc026eb7fe7cb373280774b162 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -125,16 +125,8 @@ xfs_inobt_free_block(
         struct xfs_btree_cur    *cur,
         struct xfs_buf          *bp)
  {
-       xfs_fsblock_t           fsbno;
-       int                     error;
-
-       fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
-       error = xfs_free_extent(cur->bc_tp, fsbno, 1);
-       if (error)
-               return error;
-
-       xfs_trans_binval(cur->bc_tp, bp);
-       return error;
+       return xfs_free_extent(cur->bc_tp,
+                       XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
  }
  
  STATIC int
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c

index 1aabfda669b0bb7bb85dbc8d9889ffd452d17c79..9d9559eb2835a33621e568392fab2c1074022da3 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -195,28 +195,50 @@ xfs_imap_to_bp(
  }
  
  void
-xfs_dinode_from_disk(
-       xfs_icdinode_t          *to,
-       xfs_dinode_t            *from)
+xfs_inode_from_disk(
+       struct xfs_inode        *ip,
+       struct xfs_dinode       *from)
  {
-       to->di_magic = be16_to_cpu(from->di_magic);
-       to->di_mode = be16_to_cpu(from->di_mode);
-       to->di_version = from ->di_version;
+       struct xfs_icdinode     *to = &ip->i_d;
+       struct inode            *inode = VFS_I(ip);
+
+
+       /*
+        * Convert v1 inodes immediately to v2 inode format as this is the
+        * minimum inode version format we support in the rest of the code.
+        */
+       to->di_version = from->di_version;
+       if (to->di_version == 1) {
+               set_nlink(inode, be16_to_cpu(from->di_onlink));
+               to->di_projid_lo = 0;
+               to->di_projid_hi = 0;
+               to->di_version = 2;
+       } else {
+               set_nlink(inode, be32_to_cpu(from->di_nlink));
+               to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+               to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+       }
+
         to->di_format = from->di_format;
-       to->di_onlink = be16_to_cpu(from->di_onlink);
         to->di_uid = be32_to_cpu(from->di_uid);
         to->di_gid = be32_to_cpu(from->di_gid);
-       to->di_nlink = be32_to_cpu(from->di_nlink);
-       to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
-       to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
-       memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
         to->di_flushiter = be16_to_cpu(from->di_flushiter);
-       to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
-       to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
-       to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
-       to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
-       to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
-       to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
+
+       /*
+        * Time is signed, so need to convert to signed 32 bit before
+        * storing in inode timestamp which may be 64 bit. Otherwise
+        * a time before epoch is converted to a time long after epoch
+        * on 64 bit systems.
+        */
+       inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
+       inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
+       inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
+       inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
+       inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
+       inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
+       inode->i_generation = be32_to_cpu(from->di_gen);
+       inode->i_mode = be16_to_cpu(from->di_mode);
+
         to->di_size = be64_to_cpu(from->di_size);
         to->di_nblocks = be64_to_cpu(from->di_nblocks);
         to->di_extsize = be32_to_cpu(from->di_extsize);
@@ -227,42 +249,96 @@ xfs_dinode_from_disk(
         to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
         to->di_dmstate  = be16_to_cpu(from->di_dmstate);
         to->di_flags    = be16_to_cpu(from->di_flags);
-       to->di_gen      = be32_to_cpu(from->di_gen);
  
         if (to->di_version == 3) {
-               to->di_changecount = be64_to_cpu(from->di_changecount);
+               inode->i_version = be64_to_cpu(from->di_changecount);
                 to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
                 to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
                 to->di_flags2 = be64_to_cpu(from->di_flags2);
-               to->di_ino = be64_to_cpu(from->di_ino);
-               to->di_lsn = be64_to_cpu(from->di_lsn);
-               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
-               uuid_copy(&to->di_uuid, &from->di_uuid);
         }
  }
  
  void
-xfs_dinode_to_disk(
-       xfs_dinode_t            *to,
-       xfs_icdinode_t          *from)
+xfs_inode_to_disk(
+       struct xfs_inode        *ip,
+       struct xfs_dinode       *to,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_icdinode     *from = &ip->i_d;
+       struct inode            *inode = VFS_I(ip);
+
+       to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+       to->di_onlink = 0;
+
+       to->di_version = from->di_version;
+       to->di_format = from->di_format;
+       to->di_uid = cpu_to_be32(from->di_uid);
+       to->di_gid = cpu_to_be32(from->di_gid);
+       to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+       to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+
+       memset(to->di_pad, 0, sizeof(to->di_pad));
+       to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
+       to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
+       to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
+       to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
+       to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
+       to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+       to->di_nlink = cpu_to_be32(inode->i_nlink);
+       to->di_gen = cpu_to_be32(inode->i_generation);
+       to->di_mode = cpu_to_be16(inode->i_mode);
+
+       to->di_size = cpu_to_be64(from->di_size);
+       to->di_nblocks = cpu_to_be64(from->di_nblocks);
+       to->di_extsize = cpu_to_be32(from->di_extsize);
+       to->di_nextents = cpu_to_be32(from->di_nextents);
+       to->di_anextents = cpu_to_be16(from->di_anextents);
+       to->di_forkoff = from->di_forkoff;
+       to->di_aformat = from->di_aformat;
+       to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+       to->di_dmstate = cpu_to_be16(from->di_dmstate);
+       to->di_flags = cpu_to_be16(from->di_flags);
+
+       if (from->di_version == 3) {
+               to->di_changecount = cpu_to_be64(inode->i_version);
+               to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
+               to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+               to->di_flags2 = cpu_to_be64(from->di_flags2);
+
+               to->di_ino = cpu_to_be64(ip->i_ino);
+               to->di_lsn = cpu_to_be64(lsn);
+               memset(to->di_pad2, 0, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
+               to->di_flushiter = 0;
+       } else {
+               to->di_flushiter = cpu_to_be16(from->di_flushiter);
+       }
+}
+
+void
+xfs_log_dinode_to_disk(
+       struct xfs_log_dinode   *from,
+       struct xfs_dinode       *to)
  {
         to->di_magic = cpu_to_be16(from->di_magic);
         to->di_mode = cpu_to_be16(from->di_mode);
-       to->di_version = from ->di_version;
+       to->di_version = from->di_version;
         to->di_format = from->di_format;
-       to->di_onlink = cpu_to_be16(from->di_onlink);
+       to->di_onlink = 0;
         to->di_uid = cpu_to_be32(from->di_uid);
         to->di_gid = cpu_to_be32(from->di_gid);
         to->di_nlink = cpu_to_be32(from->di_nlink);
         to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
         to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
         memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+
         to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
         to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
         to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
         to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
         to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
         to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
+
         to->di_size = cpu_to_be64(from->di_size);
         to->di_nblocks = cpu_to_be64(from->di_nblocks);
         to->di_extsize = cpu_to_be32(from->di_extsize);
@@ -367,13 +443,10 @@ xfs_iread(
             !(mp->m_flags & XFS_MOUNT_IKEEP)) {
                 /* initialise the on-disk inode core */
                 memset(&ip->i_d, 0, sizeof(ip->i_d));
-               ip->i_d.di_magic = XFS_DINODE_MAGIC;
-               ip->i_d.di_gen = prandom_u32();
-               if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               VFS_I(ip)->i_generation = prandom_u32();
+               if (xfs_sb_version_hascrc(&mp->m_sb))
                         ip->i_d.di_version = 3;
-                       ip->i_d.di_ino = ip->i_ino;
-                       uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid);
-               } else
+               else
                         ip->i_d.di_version = 2;
                 return 0;
         }
@@ -403,7 +476,7 @@ xfs_iread(
          * Otherwise, just get the truly permanent information.
          */
         if (dip->di_mode) {
-               xfs_dinode_from_disk(&ip->i_d, dip);
+               xfs_inode_from_disk(ip, dip);
                 error = xfs_iformat_fork(ip, dip);
                 if (error)  {
  #ifdef DEBUG
@@ -417,16 +490,10 @@ xfs_iread(
                  * Partial initialisation of the in-core inode. Just the bits
                  * that xfs_ialloc won't overwrite or relies on being correct.
                  */
-               ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
                 ip->i_d.di_version = dip->di_version;
-               ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
+               VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
                 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
  
-               if (dip->di_version == 3) {
-                       ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
-                       uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
-               }
-
                 /*
                  * Make sure to pull in the mode here as well in
                  * case the inode is released without being used.
@@ -434,25 +501,10 @@ xfs_iread(
                  * the inode is already free and not try to mess
                  * with the uninitialized part of it.
                  */
-               ip->i_d.di_mode = 0;
-       }
-
-       /*
-        * Automatically convert version 1 inode formats in memory to version 2
-        * inode format. If the inode is modified, it will get logged and
-        * rewritten as a version 2 inode. We can do this because we set the
-        * superblock feature bit for v2 inodes unconditionally during mount
-        * and it means the reast of the code can assume the inode version is 2
-        * or higher.
-        */
-       if (ip->i_d.di_version == 1) {
-               ip->i_d.di_version = 2;
-               memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
-               ip->i_d.di_nlink = ip->i_d.di_onlink;
-               ip->i_d.di_onlink = 0;
-               xfs_set_projid(ip, 0);
+               VFS_I(ip)->i_mode = 0;
         }
  
+       ASSERT(ip->i_d.di_version >= 2);
         ip->i_delayed_blks = 0;
  
         /*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h

index 9308c47f2a527dc08b75b66de5d064e0b13e0cfe..7c4dd321b2152915c2d9075222b4d757a08539cf 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -20,7 +20,36 @@
  
  struct xfs_inode;
  struct xfs_dinode;
-struct xfs_icdinode;
+
+/*
+ * In memory representation of the XFS inode. This is held in the in-core struct
+ * xfs_inode and represents the current on disk values but the structure is not
+ * in on-disk format.  That is, this structure is always translated to on-disk
+ * format specific structures at the appropriate time.
+ */
+struct xfs_icdinode {
+       __int8_t        di_version;     /* inode version */
+       __int8_t        di_format;      /* format of di_c data */
+       __uint16_t      di_flushiter;   /* incremented on flush */
+       __uint32_t      di_uid;         /* owner's user id */
+       __uint32_t      di_gid;         /* owner's group id */
+       __uint16_t      di_projid_lo;   /* lower part of owner's project id */
+       __uint16_t      di_projid_hi;   /* higher part of owner's project id */
+       xfs_fsize_t     di_size;        /* number of bytes in file */
+       xfs_rfsblock_t  di_nblocks;     /* # of direct & btree blocks used */
+       xfs_extlen_t    di_extsize;     /* basic/minimum extent size for file */
+       xfs_extnum_t    di_nextents;    /* number of extents in data fork */
+       xfs_aextnum_t   di_anextents;   /* number of extents in attribute fork*/
+       __uint8_t       di_forkoff;     /* attr fork offs, <<3 for 64b align */
+       __int8_t        di_aformat;     /* format of attr fork's data */
+       __uint32_t      di_dmevmask;    /* DMIG event mask */
+       __uint16_t      di_dmstate;     /* DMIG state info */
+       __uint16_t      di_flags;       /* random flags, XFS_DIFLAG_... */
+
+       __uint64_t      di_flags2;      /* more random flags */
+
+       xfs_ictimestamp_t di_crtime;    /* time created */
+};
  
  /*
   * Inode location information.  Stored in the inode and passed to
@@ -38,8 +67,11 @@ int  xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
  int    xfs_iread(struct xfs_mount *, struct xfs_trans *,
                   struct xfs_inode *, uint);
  void   xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
-void   xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
-void   xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
+void   xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
+                         xfs_lsn_t lsn);
+void   xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
+void   xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
+                              struct xfs_dinode *to);
  
  #if defined(DEBUG)
  void   xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c

index 0defbd02f62d58bb36e62f2cc4738cf862de4e1c..11faf7df14c8099e49759f51f0315dd5caec6632 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -31,6 +31,7 @@
  #include "xfs_error.h"
  #include "xfs_trace.h"
  #include "xfs_attr_sf.h"
+#include "xfs_da_format.h"
  
  kmem_zone_t *xfs_ifork_zone;
  
@@ -120,7 +121,7 @@ xfs_iformat_fork(
                 return -EFSCORRUPTED;
         }
  
-       switch (ip->i_d.di_mode & S_IFMT) {
+       switch (VFS_I(ip)->i_mode & S_IFMT) {
         case S_IFIFO:
         case S_IFCHR:
         case S_IFBLK:
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h

index 2653146904153178d474172bcacfec55e7742907..d54a8018b079dd3f0c078e5fdf56cf48a151a545 100644 (file)
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -290,6 +290,7 @@ typedef struct xfs_inode_log_format_64 {
         __int32_t               ilf_boffset;    /* off of inode in buffer */
  } xfs_inode_log_format_64_t;
  
+
  /*
   * Flags for xfs_trans_log_inode flags field.
   */
@@ -360,15 +361,15 @@ typedef struct xfs_ictimestamp {
  } xfs_ictimestamp_t;
  
  /*
- * NOTE:  This structure must be kept identical to struct xfs_dinode
- *       except for the endianness annotations.
+ * Define the format of the inode core that is logged. This structure must be
+ * kept identical to struct xfs_dinode except for the endianness annotations.
   */
-typedef struct xfs_icdinode {
+struct xfs_log_dinode {
         __uint16_t      di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
         __uint16_t      di_mode;        /* mode and type of file */
         __int8_t        di_version;     /* inode version */
         __int8_t        di_format;      /* format of di_c data */
-       __uint16_t      di_onlink;      /* old number of links to file */
+       __uint8_t       di_pad3[2];     /* unused in v2/3 inodes */
         __uint32_t      di_uid;         /* owner's user id */
         __uint32_t      di_gid;         /* owner's group id */
         __uint32_t      di_nlink;       /* number of links to file */
@@ -407,13 +408,13 @@ typedef struct xfs_icdinode {
         uuid_t          di_uuid;        /* UUID of the filesystem */
  
         /* structure must be padded to 64 bit alignment */
-} xfs_icdinode_t;
+};
  
-static inline uint xfs_icdinode_size(int version)
+static inline uint xfs_log_dinode_size(int version)
  {
         if (version == 3)
-               return sizeof(struct xfs_icdinode);
-       return offsetof(struct xfs_icdinode, di_next_unlinked);
+               return sizeof(struct xfs_log_dinode);
+       return offsetof(struct xfs_log_dinode, di_next_unlinked);
  }
  
  /*
@@ -495,6 +496,8 @@ enum xfs_blft {
         XFS_BLFT_ATTR_LEAF_BUF,
         XFS_BLFT_ATTR_RMT_BUF,
         XFS_BLFT_SB_BUF,
+       XFS_BLFT_RTBITMAP_BUF,
+       XFS_BLFT_RTSUMMARY_BUF,
         XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
  };
  
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h

index f51078f1e92ad4e29b6dbe4c9e4fe8f46f9b2be6..8eed51275bb39b4466f8457b3dc7ba0aa592a583 100644 (file)
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -37,7 +37,7 @@ typedef __uint16_t    xfs_qwarncnt_t;
  #define XFS_DQ_PROJ            0x0002          /* project quota */
  #define XFS_DQ_GROUP           0x0004          /* a group quota */
  #define XFS_DQ_DIRTY           0x0008          /* dquot is dirty */
-#define XFS_DQ_FREEING         0x0010          /* dquot is beeing torn down */
+#define XFS_DQ_FREEING         0x0010          /* dquot is being torn down */
  
  #define XFS_DQ_ALLTYPES                (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
  
@@ -116,6 +116,7 @@ typedef __uint16_t  xfs_qwarncnt_t;
  #define XFS_QMOPT_DQREPAIR     0x0001000 /* repair dquot if damaged */
  #define XFS_QMOPT_GQUOTA       0x0002000 /* group dquot requested */
  #define XFS_QMOPT_ENOSPC       0x0004000 /* enospc instead of edquot (prj) */
+#define XFS_QMOPT_DQNEXT       0x0008000 /* return next dquot >= this ID */
  
  /*
   * flags to xfs_trans_mod_dquot to indicate which field needs to be
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c

index 9b59ffa1fc198d4934a575af40716837bc54c11b..951c044e24e40d024e5abf48057c0f0942111acc 100644 (file)
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -41,6 +41,31 @@
   * Realtime allocator bitmap functions shared with userspace.
   */
  
+/*
+ * Real time buffers need verifiers to avoid runtime warnings during IO.
+ * We don't have anything to verify, however, so these are just dummy
+ * operations.
+ */
+static void
+xfs_rtbuf_verify_read(
+       struct xfs_buf  *bp)
+{
+       return;
+}
+
+static void
+xfs_rtbuf_verify_write(
+       struct xfs_buf  *bp)
+{
+       return;
+}
+
+const struct xfs_buf_ops xfs_rtbuf_ops = {
+       .name = "rtbuf",
+       .verify_read = xfs_rtbuf_verify_read,
+       .verify_write = xfs_rtbuf_verify_write,
+};
+
  /*
   * Get a buffer for the bitmap or summary file block specified.
   * The buffer is returned read and locked.
@@ -68,9 +93,12 @@ xfs_rtbuf_get(
         ASSERT(map.br_startblock != NULLFSBLOCK);
         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                                    XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                                  mp->m_bsize, 0, &bp, NULL);
+                                  mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
         if (error)
                 return error;
+
+       xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
+                                            : XFS_BLFT_RTBITMAP_BUF);
         *bpp = bp;
         return 0;
  }
@@ -983,7 +1011,7 @@ xfs_rtfree_extent(
             mp->m_sb.sb_rextents) {
                 if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
                         mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
-               *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
+               *(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
                 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
         }
         return 0;
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h

index b25bb9a343f33f99ca2bf4392d696c59f80178b4..961e6475a3099bb9acf2c5df67f355f35ffbb3c7 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
  extern void    xfs_perag_put(struct xfs_perag *pag);
  extern int     xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
  
-extern void    xfs_sb_calc_crc(struct xfs_buf *bp);
  extern void    xfs_log_sb(struct xfs_trans *tp);
  extern int     xfs_sync_sb(struct xfs_mount *mp, bool wait);
  extern void    xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h

index 15c3ceb845b91a31353a21123450c820fef26c49..81ac870834da9e63515553e3fa291318acd1e73a 100644 (file)
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
  extern const struct xfs_buf_ops xfs_sb_buf_ops;
  extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
  extern const struct xfs_buf_ops xfs_symlink_buf_ops;
+extern const struct xfs_buf_ops xfs_rtbuf_ops;
  
  /*
   * Transaction types.  Used to distinguish types of buffers. These never reach
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 5c57b7b40728904e0ad9c5641563df57400e76f9..d445a64b979e963dccbef0a721989c112351a97e 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -36,6 +36,21 @@
  #include <linux/pagevec.h>
  #include <linux/writeback.h>
  
+/* flags for direct write completions */
+#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
+#define XFS_DIO_FLAG_APPEND    (1 << 1)
+
+/*
+ * structure owned by writepages passed to individual writepage calls
+ */
+struct xfs_writepage_ctx {
+       struct xfs_bmbt_irec    imap;
+       bool                    imap_valid;
+       unsigned int            io_type;
+       struct xfs_ioend        *ioend;
+       sector_t                last_block;
+};
+
  void
  xfs_count_page_state(
         struct page             *page,
@@ -214,10 +229,12 @@ xfs_end_io(
         struct xfs_inode *ip = XFS_I(ioend->io_inode);
         int             error = 0;
  
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+       /*
+        * Set an error if the mount has shut down and proceed with end I/O
+        * processing so it can perform whatever cleanups are necessary.
+        */
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                 ioend->io_error = -EIO;
-               goto done;
-       }
  
         /*
          * For unwritten extents we need to issue transactions to convert a
@@ -265,7 +282,7 @@ xfs_alloc_ioend(
          */
         atomic_set(&ioend->io_remaining, 1);
         ioend->io_error = 0;
-       ioend->io_list = NULL;
+       INIT_LIST_HEAD(&ioend->io_list);
         ioend->io_type = type;
         ioend->io_inode = inode;
         ioend->io_buffer_head = NULL;
@@ -283,8 +300,7 @@ xfs_map_blocks(
         struct inode            *inode,
         loff_t                  offset,
         struct xfs_bmbt_irec    *imap,
-       int                     type,
-       int                     nonblocking)
+       int                     type)
  {
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
@@ -300,12 +316,7 @@ xfs_map_blocks(
         if (type == XFS_IO_UNWRITTEN)
                 bmapi_flags |= XFS_BMAPI_IGSTATE;
  
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-               if (nonblocking)
-                       return -EAGAIN;
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-       }
-
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
         ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
                (ip->i_df.if_flags & XFS_IFEXTENTS));
         ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -341,7 +352,7 @@ xfs_map_blocks(
         return 0;
  }
  
-STATIC int
+STATIC bool
  xfs_imap_valid(
         struct inode            *inode,
         struct xfs_bmbt_irec    *imap,
@@ -414,8 +425,7 @@ xfs_start_buffer_writeback(
  STATIC void
  xfs_start_page_writeback(
         struct page             *page,
-       int                     clear_dirty,
-       int                     buffers)
+       int                     clear_dirty)
  {
         ASSERT(PageLocked(page));
         ASSERT(!PageWriteback(page));
@@ -434,10 +444,6 @@ xfs_start_page_writeback(
                 set_page_writeback_keepwrite(page);
  
         unlock_page(page);
-
-       /* If no buffers on the page are to be written, finish it here */
-       if (!buffers)
-               end_page_writeback(page);
  }
  
  static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
@@ -446,153 +452,101 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
  }
  
  /*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
+ * Submit all of the bios for an ioend. We are only passed a single ioend at a
+ * time; the caller is responsible for chaining prior to submission.
   *
   * If @fail is non-zero, it means that we have a situation where some part of
   * the submission process has failed after we have marked paged for writeback
   * and unlocked them. In this situation, we need to fail the ioend chain rather
   * than submit it to IO. This typically only happens on a filesystem shutdown.
   */
-STATIC void
+STATIC int
  xfs_submit_ioend(
         struct writeback_control *wbc,
         xfs_ioend_t             *ioend,
-       int                     fail)
+       int                     status)
  {
-       xfs_ioend_t             *head = ioend;
-       xfs_ioend_t             *next;
         struct buffer_head      *bh;
         struct bio              *bio;
         sector_t                lastblock = 0;
  
-       /* Pass 1 - start writeback */
-       do {
-               next = ioend->io_list;
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
-                       xfs_start_buffer_writeback(bh);
-       } while ((ioend = next) != NULL);
+       /* Reserve log space if we might write beyond the on-disk inode size. */
+       if (!status &&
+            ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+               status = xfs_setfilesize_trans_alloc(ioend);
+       /*
+        * If we are failing the IO now, just mark the ioend with an
+        * error and finish it. This will run IO completion immediately
+        * as there is only one reference to the ioend at this point in
+        * time.
+        */
+       if (status) {
+               ioend->io_error = status;
+               xfs_finish_ioend(ioend);
+               return status;
+       }
  
-       /* Pass 2 - submit I/O */
-       ioend = head;
-       do {
-               next = ioend->io_list;
-               bio = NULL;
+       bio = NULL;
+       for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
  
-               /*
-                * If we are failing the IO now, just mark the ioend with an
-                * error and finish it. This will run IO completion immediately
-                * as there is only one reference to the ioend at this point in
-                * time.
-                */
-               if (fail) {
-                       ioend->io_error = fail;
-                       xfs_finish_ioend(ioend);
-                       continue;
+               if (!bio) {
+retry:
+                       bio = xfs_alloc_ioend_bio(bh);
+               } else if (bh->b_blocknr != lastblock + 1) {
+                       xfs_submit_ioend_bio(wbc, ioend, bio);
+                       goto retry;
                 }
  
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
-                       if (!bio) {
- retry:
-                               bio = xfs_alloc_ioend_bio(bh);
-                       } else if (bh->b_blocknr != lastblock + 1) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       lastblock = bh->b_blocknr;
-               }
-               if (bio)
+               if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
                         xfs_submit_ioend_bio(wbc, ioend, bio);
-               xfs_finish_ioend(ioend);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too.  Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh, *next_bh;
-
-       do {
-               next = ioend->io_list;
-               bh = ioend->io_buffer_head;
-               do {
-                       next_bh = bh->b_private;
-                       clear_buffer_async_write(bh);
-                       /*
-                        * The unwritten flag is cleared when added to the
-                        * ioend. We're not submitting for I/O so mark the
-                        * buffer unwritten again for next time around.
-                        */
-                       if (ioend->io_type == XFS_IO_UNWRITTEN)
-                               set_buffer_unwritten(bh);
-                       unlock_buffer(bh);
-               } while ((bh = next_bh) != NULL);
+                       goto retry;
+               }
  
-               mempool_free(ioend, xfs_ioend_pool);
-       } while ((ioend = next) != NULL);
+               lastblock = bh->b_blocknr;
+       }
+       if (bio)
+               xfs_submit_ioend_bio(wbc, ioend, bio);
+       xfs_finish_ioend(ioend);
+       return 0;
  }
  
  /*
   * Test to see if we've been building up a completion structure for
   * earlier buffers -- if so, we try to append to this ioend if we
   * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
+ * Return the ioend we finished off so that the caller can submit it
+ * once it has finished processing the dirty page.
   */
  STATIC void
  xfs_add_to_ioend(
         struct inode            *inode,
         struct buffer_head      *bh,
         xfs_off_t               offset,
-       unsigned int            type,
-       xfs_ioend_t             **result,
-       int                     need_ioend)
+       struct xfs_writepage_ctx *wpc,
+       struct list_head        *iolist)
  {
-       xfs_ioend_t             *ioend = *result;
-
-       if (!ioend || need_ioend || type != ioend->io_type) {
-               xfs_ioend_t     *previous = *result;
-
-               ioend = xfs_alloc_ioend(inode, type);
-               ioend->io_offset = offset;
-               ioend->io_buffer_head = bh;
-               ioend->io_buffer_tail = bh;
-               if (previous)
-                       previous->io_list = ioend;
-               *result = ioend;
+       if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
+           bh->b_blocknr != wpc->last_block + 1 ||
+           offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
+               struct xfs_ioend        *new;
+
+               if (wpc->ioend)
+                       list_add(&wpc->ioend->io_list, iolist);
+
+               new = xfs_alloc_ioend(inode, wpc->io_type);
+               new->io_offset = offset;
+               new->io_buffer_head = bh;
+               new->io_buffer_tail = bh;
+               wpc->ioend = new;
         } else {
-               ioend->io_buffer_tail->b_private = bh;
-               ioend->io_buffer_tail = bh;
+               wpc->ioend->io_buffer_tail->b_private = bh;
+               wpc->ioend->io_buffer_tail = bh;
         }
  
         bh->b_private = NULL;
-       ioend->io_size += bh->b_size;
+       wpc->ioend->io_size += bh->b_size;
+       wpc->last_block = bh->b_blocknr;
+       xfs_start_buffer_writeback(bh);
  }
  
  STATIC void
@@ -678,183 +632,6 @@ xfs_check_page_type(
         return false;
  }
  
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
-       struct inode            *inode,
-       struct page             *page,
-       loff_t                  tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc)
-{
-       struct buffer_head      *bh, *head;
-       xfs_off_t               end_offset;
-       unsigned long           p_offset;
-       unsigned int            type;
-       int                     len, page_dirty;
-       int                     count = 0, done = 0, uptodate = 1;
-       xfs_off_t               offset = page_offset(page);
-
-       if (page->index != tindex)
-               goto fail;
-       if (!trylock_page(page))
-               goto fail;
-       if (PageWriteback(page))
-               goto fail_unlock_page;
-       if (page->mapping != inode->i_mapping)
-               goto fail_unlock_page;
-       if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
-               goto fail_unlock_page;
-
-       /*
-        * page_dirty is initially a count of buffers on the page before
-        * EOF and is decremented as we move each into a cleanable state.
-        *
-        * Derivation:
-        *
-        * End offset is the highest offset that this page should represent.
-        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
-        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
-        * hence give us the correct page_dirty count. On any other page,
-        * it will be zero and in that case we need page_dirty to be the
-        * count of buffers on the page.
-        */
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       i_size_read(inode));
-
-       /*
-        * If the current map does not span the entire page we are about to try
-        * to write, then give up. The only way we can write a page that spans
-        * multiple mappings in a single writeback iteration is via the
-        * xfs_vm_writepage() function. Data integrity writeback requires the
-        * entire page to be written in a single attempt, otherwise the part of
-        * the page we don't write here doesn't get written as part of the data
-        * integrity sync.
-        *
-        * For normal writeback, we also don't attempt to write partial pages
-        * here as it simply means that write_cache_pages() will see it under
-        * writeback and ignore the page until some point in the future, at
-        * which time this will be the only page in the file that needs
-        * writeback.  Hence for more optimal IO patterns, we should always
-        * avoid partial page writeback due to multiple mappings on a page here.
-        */
-       if (!xfs_imap_valid(inode, imap, end_offset))
-               goto fail_unlock_page;
-
-       len = 1 << inode->i_blkbits;
-       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-                                       PAGE_CACHE_SIZE);
-       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
-       page_dirty = p_offset / len;
-
-       /*
-        * The moment we find a buffer that doesn't match our current type
-        * specification or can't be written, abort the loop and start
-        * writeback. As per the above xfs_imap_valid() check, only
-        * xfs_vm_writepage() can handle partial page writeback fully - we are
-        * limited here to the buffers that are contiguous with the current
-        * ioend, and hence a buffer we can't write breaks that contiguity and
-        * we have to defer the rest of the IO to xfs_vm_writepage().
-        */
-       bh = head = page_buffers(page);
-       do {
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
-                       done = 1;
-                       break;
-               }
-
-               if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                   buffer_mapped(bh)) {
-                       if (buffer_unwritten(bh))
-                               type = XFS_IO_UNWRITTEN;
-                       else if (buffer_delay(bh))
-                               type = XFS_IO_DELALLOC;
-                       else
-                               type = XFS_IO_OVERWRITE;
-
-                       /*
-                        * imap should always be valid because of the above
-                        * partial page end_offset check on the imap.
-                        */
-                       ASSERT(xfs_imap_valid(inode, imap, offset));
-
-                       lock_buffer(bh);
-                       if (type != XFS_IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type,
-                                        ioendp, done);
-
-                       page_dirty--;
-                       count++;
-               } else {
-                       done = 1;
-                       break;
-               }
-       } while (offset += len, (bh = bh->b_this_page) != head);
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       if (count) {
-               if (--wbc->nr_to_write <= 0 &&
-                   wbc->sync_mode == WB_SYNC_NONE)
-                       done = 1;
-       }
-       xfs_start_page_writeback(page, !page_dirty, count);
-
-       return done;
- fail_unlock_page:
-       unlock_page(page);
- fail:
-       return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
-       struct inode            *inode,
-       pgoff_t                 tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc,
-       pgoff_t                 tlast)
-{
-       struct pagevec          pvec;
-       int                     done = 0, i;
-
-       pagevec_init(&pvec, 0);
-       while (!done && tindex <= tlast) {
-               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
-               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
-                       break;
-
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-                                       imap, ioendp, wbc);
-                       if (done)
-                               break;
-               }
-
-               pagevec_release(&pvec);
-               cond_resched();
-       }
-}
-
  STATIC void
  xfs_vm_invalidatepage(
         struct page             *page,
@@ -931,6 +708,164 @@ out_invalidate:
         return;
  }
  
+/*
+ * We implement an immediate ioend submission policy here to avoid needing to
+ * chain multiple ioends and hence nest mempool allocations which can violate
+ * forward progress guarantees we need to provide. The current ioend we are
+ * adding buffers to is cached on the writepage context, and if the new buffer
+ * does not append to the cached ioend it will create a new ioend and cache that
+ * instead.
+ *
+ * If a new ioend is created and cached, the old ioend is returned and queued
+ * locally for submission once the entire page is processed or an error has been
+ * detected.  While ioends are submitted immediately after they are completed,
+ * batching optimisations are provided by higher level block plugging.
+ *
+ * At the end of a writeback pass, there will be a cached ioend remaining on the
+ * writepage context that the caller will need to submit.
+ */
+static int
+xfs_writepage_map(
+       struct xfs_writepage_ctx *wpc,
+       struct writeback_control *wbc,
+       struct inode            *inode,
+       struct page             *page,
+       loff_t                  offset,
+       __uint64_t              end_offset)
+{
+       LIST_HEAD(submit_list);
+       struct xfs_ioend        *ioend, *next;
+       struct buffer_head      *bh, *head;
+       ssize_t                 len = 1 << inode->i_blkbits;
+       int                     error = 0;
+       int                     count = 0;
+       int                     uptodate = 1;
+
+       bh = head = page_buffers(page);
+       offset = page_offset(page);
+       do {
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+
+               /*
+                * set_page_dirty dirties all buffers in a page, independent
+                * of their state.  The dirty state however is entirely
+                * meaningless for holes (!mapped && uptodate), so skip
+                * buffers covering holes here.
+                */
+               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+                       wpc->imap_valid = false;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh)) {
+                       if (wpc->io_type != XFS_IO_UNWRITTEN) {
+                               wpc->io_type = XFS_IO_UNWRITTEN;
+                               wpc->imap_valid = false;
+                       }
+               } else if (buffer_delay(bh)) {
+                       if (wpc->io_type != XFS_IO_DELALLOC) {
+                               wpc->io_type = XFS_IO_DELALLOC;
+                               wpc->imap_valid = false;
+                       }
+               } else if (buffer_uptodate(bh)) {
+                       if (wpc->io_type != XFS_IO_OVERWRITE) {
+                               wpc->io_type = XFS_IO_OVERWRITE;
+                               wpc->imap_valid = false;
+                       }
+               } else {
+                       if (PageUptodate(page))
+                               ASSERT(buffer_mapped(bh));
+                       /*
+                        * This buffer is not uptodate and will not be
+                        * written to disk.  Ensure that we will put any
+                        * subsequent writeable buffers into a new
+                        * ioend.
+                        */
+                       wpc->imap_valid = false;
+                       continue;
+               }
+
+               if (wpc->imap_valid)
+                       wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+                                                        offset);
+               if (!wpc->imap_valid) {
+                       error = xfs_map_blocks(inode, offset, &wpc->imap,
+                                            wpc->io_type);
+                       if (error)
+                               goto out;
+                       wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+                                                        offset);
+               }
+               if (wpc->imap_valid) {
+                       lock_buffer(bh);
+                       if (wpc->io_type != XFS_IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, &wpc->imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
+                       count++;
+               }
+
+       } while (offset += len, ((bh = bh->b_this_page) != head));
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       ASSERT(wpc->ioend || list_empty(&submit_list));
+
+out:
+       /*
+        * On error, we have to fail the ioend here because we have locked
+        * buffers in the ioend. If we don't do this, we'll deadlock
+        * invalidating the page as that tries to lock the buffers on the page.
+        * Also, because we may have set pages under writeback, we have to make
+        * sure we run IO completion to mark the error state of the IO
+        * appropriately, so we can't cancel the ioend directly here. That means
+        * we have to mark this page as under writeback if we included any
+        * buffers from it in the ioend chain so that completion treats it
+        * correctly.
+        *
+        * If we didn't include the page in the ioend, the on error we can
+        * simply discard and unlock it as there are no other users of the page
+        * or it's buffers right now. The caller will still need to trigger
+        * submission of outstanding ioends on the writepage context so they are
+        * treated correctly on error.
+        */
+       if (count) {
+               xfs_start_page_writeback(page, !error);
+
+               /*
+                * Preserve the original error if there was one, otherwise catch
+                * submission errors here and propagate into subsequent ioend
+                * submissions.
+                */
+               list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
+                       int error2;
+
+                       list_del_init(&ioend->io_list);
+                       error2 = xfs_submit_ioend(wbc, ioend, error);
+                       if (error2 && !error)
+                               error = error2;
+               }
+       } else if (error) {
+               xfs_aops_discard_page(page);
+               ClearPageUptodate(page);
+               unlock_page(page);
+       } else {
+               /*
+                * We can end up here with no error and nothing to write if we
+                * race with a partial page truncate on a sub-page block sized
+                * filesystem. In that case we need to mark the page clean.
+                */
+               xfs_start_page_writeback(page, 1);
+               end_page_writeback(page);
+       }
+
+       mapping_set_error(page->mapping, error);
+       return error;
+}
+
  /*
   * Write out a dirty page.
   *
@@ -940,22 +875,16 @@ out_invalidate:
   * For any other dirty buffer heads on the page we should flush them.
   */
  STATIC int
-xfs_vm_writepage(
+xfs_do_writepage(
         struct page             *page,
-       struct writeback_control *wbc)
+       struct writeback_control *wbc,
+       void                    *data)
  {
+       struct xfs_writepage_ctx *wpc = data;
         struct inode            *inode = page->mapping->host;
-       struct buffer_head      *bh, *head;
-       struct xfs_bmbt_irec    imap;
-       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
         loff_t                  offset;
-       unsigned int            type;
         __uint64_t              end_offset;
-       pgoff_t                 end_index, last_index;
-       ssize_t                 len;
-       int                     err, imap_valid = 0, uptodate = 1;
-       int                     count = 0;
-       int                     nonblocking = 0;
+       pgoff_t                 end_index;
  
         trace_xfs_writepage(inode, page, 0, 0);
  
@@ -982,12 +911,9 @@ xfs_vm_writepage(
         if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
                 goto redirty;
  
-       /* Is this page beyond the end of the file? */
-       offset = i_size_read(inode);
-       end_index = offset >> PAGE_CACHE_SHIFT;
-       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-
         /*
+        * Is this page beyond the end of the file?
+        *
          * The page index is less than the end_index, adjust the end_offset
          * to the highest offset that this page should represent.
          * -----------------------------------------------------
@@ -998,6 +924,8 @@ xfs_vm_writepage(
          * |     desired writeback range    |      see else    |
          * ---------------------------------^------------------|
          */
+       offset = i_size_read(inode);
+       end_index = offset >> PAGE_CACHE_SHIFT;
         if (page->index < end_index)
                 end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
         else {
@@ -1049,152 +977,7 @@ xfs_vm_writepage(
                 end_offset = offset;
         }
  
-       len = 1 << inode->i_blkbits;
-
-       bh = head = page_buffers(page);
-       offset = page_offset(page);
-       type = XFS_IO_OVERWRITE;
-
-       if (wbc->sync_mode == WB_SYNC_NONE)
-               nonblocking = 1;
-
-       do {
-               int new_ioend = 0;
-
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-
-               /*
-                * set_page_dirty dirties all buffers in a page, independent
-                * of their state.  The dirty state however is entirely
-                * meaningless for holes (!mapped && uptodate), so skip
-                * buffers covering holes here.
-                */
-               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-                       imap_valid = 0;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh)) {
-                       if (type != XFS_IO_UNWRITTEN) {
-                               type = XFS_IO_UNWRITTEN;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_delay(bh)) {
-                       if (type != XFS_IO_DELALLOC) {
-                               type = XFS_IO_DELALLOC;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_uptodate(bh)) {
-                       if (type != XFS_IO_OVERWRITE) {
-                               type = XFS_IO_OVERWRITE;
-                               imap_valid = 0;
-                       }
-               } else {
-                       if (PageUptodate(page))
-                               ASSERT(buffer_mapped(bh));
-                       /*
-                        * This buffer is not uptodate and will not be
-                        * written to disk.  Ensure that we will put any
-                        * subsequent writeable buffers into a new
-                        * ioend.
-                        */
-                       imap_valid = 0;
-                       continue;
-               }
-
-               if (imap_valid)
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               if (!imap_valid) {
-                       /*
-                        * If we didn't have a valid mapping then we need to
-                        * put the new mapping into a separate ioend structure.
-                        * This ensures non-contiguous extents always have
-                        * separate ioends, which is particularly important
-                        * for unwritten extent conversion at I/O completion
-                        * time.
-                        */
-                       new_ioend = 1;
-                       err = xfs_map_blocks(inode, offset, &imap, type,
-                                            nonblocking);
-                       if (err)
-                               goto error;
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               }
-               if (imap_valid) {
-                       lock_buffer(bh);
-                       if (type != XFS_IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, &imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
-                                        new_ioend);
-                       count++;
-               }
-
-               if (!iohead)
-                       iohead = ioend;
-
-       } while (offset += len, ((bh = bh->b_this_page) != head));
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       xfs_start_page_writeback(page, 1, count);
-
-       /* if there is no IO to be submitted for this page, we are done */
-       if (!ioend)
-               return 0;
-
-       ASSERT(iohead);
-
-       /*
-        * Any errors from this point onwards need tobe reported through the IO
-        * completion path as we have marked the initial page as under writeback
-        * and unlocked it.
-        */
-       if (imap_valid) {
-               xfs_off_t               end_index;
-
-               end_index = imap.br_startoff + imap.br_blockcount;
-
-               /* to bytes */
-               end_index <<= inode->i_blkbits;
-
-               /* to pages */
-               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
-               /* check against file size */
-               if (end_index > last_index)
-                       end_index = last_index;
-
-               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-                                 wbc, end_index);
-       }
-
-
-       /*
-        * Reserve log space if we might write beyond the on-disk inode size.
-        */
-       err = 0;
-       if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
-               err = xfs_setfilesize_trans_alloc(ioend);
-
-       xfs_submit_ioend(wbc, iohead, err);
-
-       return 0;
-
-error:
-       if (iohead)
-               xfs_cancel_ioend(iohead);
-
-       if (err == -EAGAIN)
-               goto redirty;
-
-       xfs_aops_discard_page(page);
-       ClearPageUptodate(page);
-       unlock_page(page);
-       return err;
+       return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
  
  redirty:
         redirty_page_for_writepage(wbc, page);
@@ -1202,17 +985,41 @@ redirty:
         return 0;
  }
  
+STATIC int
+xfs_vm_writepage(
+       struct page             *page,
+       struct writeback_control *wbc)
+{
+       struct xfs_writepage_ctx wpc = {
+               .io_type = XFS_IO_INVALID,
+       };
+       int                     ret;
+
+       ret = xfs_do_writepage(page, wbc, &wpc);
+       if (wpc.ioend)
+               ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
+       return ret;
+}
+
  STATIC int
  xfs_vm_writepages(
         struct address_space    *mapping,
         struct writeback_control *wbc)
  {
+       struct xfs_writepage_ctx wpc = {
+               .io_type = XFS_IO_INVALID,
+       };
+       int                     ret;
+
         xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
         if (dax_mapping(mapping))
                 return dax_writeback_mapping_range(mapping,
                                 xfs_find_bdev_for_inode(mapping->host), wbc);
  
-       return generic_writepages(mapping, wbc);
+       ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
+       if (wpc.ioend)
+               ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
+       return ret;
  }
  
  /*
@@ -1242,27 +1049,8 @@ xfs_vm_releasepage(
  }
  
  /*
- * When we map a DIO buffer, we may need to attach an ioend that describes the
- * type of write IO we are doing. This passes to the completion function the
- * operations it needs to perform. If the mapping is for an overwrite wholly
- * within the EOF then we don't need an ioend and so we don't allocate one.
- * This avoids the unnecessary overhead of allocating and freeing ioends for
- * workloads that don't require transactions on IO completion.
- *
- * If we get multiple mappings in a single IO, we might be mapping different
- * types. But because the direct IO can only have a single private pointer, we
- * need to ensure that:
- *
- * a) i) the ioend spans the entire region of unwritten mappings; or
- *    ii) the ioend spans all the mappings that cross or are beyond EOF; and
- * b) if it contains unwritten extents, it is *permanently* marked as such
- *
- * We could do this by chaining ioends like buffered IO does, but we only
- * actually get one IO completion callback from the direct IO, and that spans
- * the entire IO regardless of how many mappings and IOs are needed to complete
- * the DIO. There is only going to be one reference to the ioend and its life
- * cycle is constrained by the DIO completion code. hence we don't need
- * reference counting here.
+ * When we map a DIO buffer, we may need to pass flags to
+ * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
   *
   * Note that for DIO, an IO to the highest supported file block offset (i.e.
   * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
@@ -1270,68 +1058,26 @@ xfs_vm_releasepage(
   * extending the file size. We won't know for sure until IO completion is run
   * and the actual max write offset is communicated to the IO completion
   * routine.
- *
- * For DAX page faults, we are preparing to never see unwritten extents here,
- * nor should we ever extend the inode size. Hence we will soon have nothing to
- * do here for this case, ensuring we don't have to provide an IO completion
- * callback to free an ioend that we don't actually need for a fault into the
- * page at offset (2^63 - 1FSB) bytes.
   */
-
  static void
  xfs_map_direct(
         struct inode            *inode,
         struct buffer_head      *bh_result,
         struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset,
-       bool                    dax_fault)
+       xfs_off_t               offset)
  {
-       struct xfs_ioend        *ioend;
+       uintptr_t               *flags = (uintptr_t *)&bh_result->b_private;
         xfs_off_t               size = bh_result->b_size;
-       int                     type;
-
-       if (ISUNWRITTEN(imap))
-               type = XFS_IO_UNWRITTEN;
-       else
-               type = XFS_IO_OVERWRITE;
  
-       trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
-
-       if (dax_fault) {
-               ASSERT(type == XFS_IO_OVERWRITE);
-               trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
-                                           imap);
-               return;
-       }
+       trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
+               ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
  
-       if (bh_result->b_private) {
-               ioend = bh_result->b_private;
-               ASSERT(ioend->io_size > 0);
-               ASSERT(offset >= ioend->io_offset);
-               if (offset + size > ioend->io_offset + ioend->io_size)
-                       ioend->io_size = offset - ioend->io_offset + size;
-
-               if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
-                       ioend->io_type = XFS_IO_UNWRITTEN;
-
-               trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
-                                             ioend->io_size, ioend->io_type,
-                                             imap);
-       } else if (type == XFS_IO_UNWRITTEN ||
-                  offset + size > i_size_read(inode) ||
-                  offset + size < 0) {
-               ioend = xfs_alloc_ioend(inode, type);
-               ioend->io_offset = offset;
-               ioend->io_size = size;
-
-               bh_result->b_private = ioend;
+       if (ISUNWRITTEN(imap)) {
+               *flags |= XFS_DIO_FLAG_UNWRITTEN;
+               set_buffer_defer_completion(bh_result);
+       } else if (offset + size > i_size_read(inode) || offset + size < 0) {
+               *flags |= XFS_DIO_FLAG_APPEND;
                 set_buffer_defer_completion(bh_result);
-
-               trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
-                                          imap);
-       } else {
-               trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
-                                           imap);
         }
  }
  
@@ -1502,9 +1248,12 @@ __xfs_get_blocks(
                 if (ISUNWRITTEN(&imap))
                         set_buffer_unwritten(bh_result);
                 /* direct IO needs special help */
-               if (create && direct)
-                       xfs_map_direct(inode, bh_result, &imap, offset,
-                                      dax_fault);
+               if (create && direct) {
+                       if (dax_fault)
+                               ASSERT(!ISUNWRITTEN(&imap));
+                       else
+                               xfs_map_direct(inode, bh_result, &imap, offset);
+               }
         }
  
         /*
@@ -1574,42 +1323,50 @@ xfs_get_blocks_dax_fault(
         return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
  }
  
-static void
-__xfs_end_io_direct_write(
-       struct inode            *inode,
-       struct xfs_ioend        *ioend,
+/*
+ * Complete a direct I/O write request.
+ *
+ * xfs_map_direct passes us some flags in the private data to tell us what to
+ * do.  If no flags are set, then the write IO is an overwrite wholly within
+ * the existing allocated file size and so there is nothing for us to do.
+ *
+ * Note that in this case the completion can be called in interrupt context,
+ * whereas if we have flags set we will always be called in task context
+ * (i.e. from a workqueue).
+ */
+STATIC int
+xfs_end_io_direct_write(
+       struct kiocb            *iocb,
         loff_t                  offset,
-       ssize_t                 size)
+       ssize_t                 size,
+       void                    *private)
  {
-       struct xfs_mount        *mp = XFS_I(inode)->i_mount;
+       struct inode            *inode = file_inode(iocb->ki_filp);
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       uintptr_t               flags = (uintptr_t)private;
+       int                     error = 0;
  
-       if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
-               goto out_end_io;
+       trace_xfs_end_io_direct_write(ip, offset, size);
  
-       /*
-        * dio completion end_io functions are only called on writes if more
-        * than 0 bytes was written.
-        */
-       ASSERT(size > 0);
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
  
-       /*
-        * The ioend only maps whole blocks, while the IO may be sector aligned.
-        * Hence the ioend offset/size may not match the IO offset/size exactly.
-        * Because we don't map overwrites within EOF into the ioend, the offset
-        * may not match, but only if the endio spans EOF.  Either way, write
-        * the IO sizes into the ioend so that completion processing does the
-        * right thing.
-        */
-       ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
-       ioend->io_size = size;
-       ioend->io_offset = offset;
+       if (size <= 0)
+               return size;
  
         /*
-        * The ioend tells us whether we are doing unwritten extent conversion
+        * The flags tell us whether we are doing unwritten extent conversions
          * or an append transaction that updates the on-disk file size. These
          * cases are the only cases where we should *potentially* be needing
          * to update the VFS inode size.
-        *
+        */
+       if (flags == 0) {
+               ASSERT(offset + size <= i_size_read(inode));
+               return 0;
+       }
+
+       /*
          * We need to update the in-core inode size here so that we don't end up
          * with the on-disk inode size being outside the in-core inode size. We
          * have no other method of updating EOF for AIO, so always do it here
@@ -1620,91 +1377,56 @@ __xfs_end_io_direct_write(
          * here can result in EOF moving backwards and Bad Things Happen when
          * that occurs.
          */
-       spin_lock(&XFS_I(inode)->i_flags_lock);
+       spin_lock(&ip->i_flags_lock);
         if (offset + size > i_size_read(inode))
                 i_size_write(inode, offset + size);
-       spin_unlock(&XFS_I(inode)->i_flags_lock);
+       spin_unlock(&ip->i_flags_lock);
  
-       /*
-        * If we are doing an append IO that needs to update the EOF on disk,
-        * do the transaction reserve now so we can use common end io
-        * processing. Stashing the error (if there is one) in the ioend will
-        * result in the ioend processing passing on the error if it is
-        * possible as we can't return it from here.
-        */
-       if (ioend->io_type == XFS_IO_OVERWRITE)
-               ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
+       if (flags & XFS_DIO_FLAG_UNWRITTEN) {
+               trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
  
-out_end_io:
-       xfs_end_io(&ioend->io_work);
-       return;
-}
+               error = xfs_iomap_write_unwritten(ip, offset, size);
+       } else if (flags & XFS_DIO_FLAG_APPEND) {
+               struct xfs_trans *tp;
  
-/*
- * Complete a direct I/O write request.
- *
- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
- * wholly within the EOF and so there is nothing for us to do. Note that in this
- * case the completion can be called in interrupt context, whereas if we have an
- * ioend we will always be called in task context (i.e. from a workqueue).
- */
-STATIC void
-xfs_end_io_direct_write(
-       struct kiocb            *iocb,
-       loff_t                  offset,
-       ssize_t                 size,
-       void                    *private)
-{
-       struct inode            *inode = file_inode(iocb->ki_filp);
-       struct xfs_ioend        *ioend = private;
-
-       trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
-                                    ioend ? ioend->io_type : 0, NULL);
+               trace_xfs_end_io_direct_write_append(ip, offset, size);
  
-       if (!ioend) {
-               ASSERT(offset + size <= i_size_read(inode));
-               return;
+               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
+               if (error) {
+                       xfs_trans_cancel(tp);
+                       return error;
+               }
+               error = xfs_setfilesize(ip, tp, offset, size);
         }
  
-       __xfs_end_io_direct_write(inode, ioend, offset, size);
+       return error;
  }
  
-static inline ssize_t
-xfs_vm_do_dio(
-       struct inode            *inode,
+STATIC ssize_t
+xfs_vm_direct_IO(
         struct kiocb            *iocb,
         struct iov_iter         *iter,
-       loff_t                  offset,
-       void                    (*endio)(struct kiocb   *iocb,
-                                        loff_t         offset,
-                                        ssize_t        size,
-                                        void           *private),
-       int                     flags)
+       loff_t                  offset)
  {
+       struct inode            *inode = iocb->ki_filp->f_mapping->host;
+       dio_iodone_t            *endio = NULL;
+       int                     flags = 0;
         struct block_device     *bdev;
  
-       if (IS_DAX(inode))
+       if (iov_iter_rw(iter) == WRITE) {
+               endio = xfs_end_io_direct_write;
+               flags = DIO_ASYNC_EXTEND;
+       }
+
+       if (IS_DAX(inode)) {
                 return dax_do_io(iocb, inode, iter, offset,
                                  xfs_get_blocks_direct, endio, 0);
+       }
  
         bdev = xfs_find_bdev_for_inode(inode);
         return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
-                                    xfs_get_blocks_direct, endio, NULL, flags);
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
-       struct kiocb            *iocb,
-       struct iov_iter         *iter,
-       loff_t                  offset)
-{
-       struct inode            *inode = iocb->ki_filp->f_mapping->host;
-
-       if (iov_iter_rw(iter) == WRITE)
-               return xfs_vm_do_dio(inode, iocb, iter, offset,
-                                    xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
-       return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
+                       xfs_get_blocks_direct, endio, NULL, flags);
  }
  
  /*
@@ -1756,6 +1478,7 @@ xfs_vm_write_failed(
         loff_t                  from = pos & (PAGE_CACHE_SIZE - 1);
         loff_t                  to = from + len;
         struct buffer_head      *bh, *head;
+       struct xfs_mount        *mp = XFS_I(inode)->i_mount;
  
         /*
          * The request pos offset might be 32 or 64 bit, this is all fine
@@ -1787,14 +1510,23 @@ xfs_vm_write_failed(
                 if (block_start >= to)
                         break;
  
-               if (!buffer_delay(bh))
+               /*
+                * Process delalloc and unwritten buffers beyond EOF. We can
+                * encounter unwritten buffers in the event that a file has
+                * post-EOF unwritten extents and an extending write happens to
+                * fail (e.g., an unaligned write that also involves a delalloc
+                * to the same page).
+                */
+               if (!buffer_delay(bh) && !buffer_unwritten(bh))
                         continue;
  
-               if (!buffer_new(bh) && block_offset < i_size_read(inode))
+               if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
+                   block_offset < i_size_read(inode))
                         continue;
  
-               xfs_vm_kill_delalloc_range(inode, block_offset,
-                                          block_offset + bh->b_size);
+               if (buffer_delay(bh))
+                       xfs_vm_kill_delalloc_range(inode, block_offset,
+                                                  block_offset + bh->b_size);
  
                 /*
                  * This buffer does not contain data anymore. make sure anyone
@@ -1805,6 +1537,7 @@ xfs_vm_write_failed(
                 clear_buffer_mapped(bh);
                 clear_buffer_new(bh);
                 clear_buffer_dirty(bh);
+               clear_buffer_unwritten(bh);
         }
  
  }
@@ -1828,6 +1561,7 @@ xfs_vm_write_begin(
         pgoff_t                 index = pos >> PAGE_CACHE_SHIFT;
         struct page             *page;
         int                     status;
+       struct xfs_mount        *mp = XFS_I(mapping->host)->i_mount;
  
         ASSERT(len <= PAGE_CACHE_SIZE);
  
@@ -1836,6 +1570,8 @@ xfs_vm_write_begin(
                 return -ENOMEM;
  
         status = __block_write_begin(page, pos, len, xfs_get_blocks);
+       if (xfs_mp_fail_writes(mp))
+               status = -EIO;
         if (unlikely(status)) {
                 struct inode    *inode = mapping->host;
                 size_t          isize = i_size_read(inode);
@@ -1848,6 +1584,8 @@ xfs_vm_write_begin(
                  * allocated in this write, not blocks that were previously
                  * written successfully.
                  */
+               if (xfs_mp_fail_writes(mp))
+                       isize = 0;
                 if (pos + len > isize) {
                         ssize_t start = max_t(ssize_t, pos, isize);
  
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h

index a4343c63fb38c60336abbaf8741a75c2a9298a4d..b4421177b68dc1ba619625876c05ee8d22caf570 100644 (file)
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,12 +24,14 @@ extern mempool_t *xfs_ioend_pool;
   * Types of I/O for bmap clustering and I/O completion tracking.
   */
  enum {
+       XFS_IO_INVALID,         /* initial state */
         XFS_IO_DELALLOC,        /* covers delalloc region */
         XFS_IO_UNWRITTEN,       /* covers allocated but uninitialized data */
         XFS_IO_OVERWRITE,       /* covers already allocated extent */
  };
  
  #define XFS_IO_TYPES \
+       { XFS_IO_INVALID,               "invalid" }, \
         { XFS_IO_DELALLOC,              "delalloc" }, \
         { XFS_IO_UNWRITTEN,             "unwritten" }, \
         { XFS_IO_OVERWRITE,             "overwrite" }
@@ -39,7 +41,7 @@ enum {
   * It can manage several multi-page bio's at once.
   */
  typedef struct xfs_ioend {
-       struct xfs_ioend        *io_list;       /* next ioend in chain */
+       struct list_head        io_list;        /* next ioend in chain */
         unsigned int            io_type;        /* delalloc / unwritten */
         int                     io_error;       /* I/O error code */
         atomic_t                io_remaining;   /* hold count */
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c

index 0ef7c2ed3f8a8e30e260485ee130d1a3b008ea07..4fa14820e2e22b687ef852b81e1d6b9f9028caf3 100644 (file)
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -202,8 +202,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
                                         sbp->namelen,
                                         sbp->valuelen,
                                         &sbp->name[sbp->namelen]);
-               if (error)
+               if (error) {
+                       kmem_free(sbuf);
                         return error;
+               }
                 if (context->seen_enough)
                         break;
                 cursor->offset++;
@@ -454,14 +456,13 @@ xfs_attr3_leaf_list_int(
                                 args.rmtblkcnt = xfs_attr3_rmt_blocks(
                                                         args.dp->i_mount, valuelen);
                                 retval = xfs_attr_rmtval_get(&args);
-                               if (retval)
-                                       return retval;
-                               retval = context->put_listent(context,
-                                               entry->flags,
-                                               name_rmt->name,
-                                               (int)name_rmt->namelen,
-                                               valuelen,
-                                               args.value);
+                               if (!retval)
+                                       retval = context->put_listent(context,
+                                                       entry->flags,
+                                                       name_rmt->name,
+                                                       (int)name_rmt->namelen,
+                                                       valuelen,
+                                                       args.value);
                                 kmem_free(args.value);
                         } else {
                                 retval = context->put_listent(context,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 6c876012b2e53246bf38ca3af8dd2d23861151fe..a32c1dcae2ff37b3ee1542fd3fbc5dd74a1e0a86 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -203,10 +203,12 @@ xfs_bmap_rtalloc(
                 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
  
         /*
-        * Lock out other modifications to the RT bitmap inode.
+        * Lock out modifications to both the RT bitmap and summary inodes
          */
         xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+       xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
  
         /*
          * If it's an allocation to an empty file at offset 0,
@@ -822,7 +824,7 @@ bool
  xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
  {
         /* prealloc/delalloc exists only on regular files */
-       if (!S_ISREG(ip->i_d.di_mode))
+       if (!S_ISREG(VFS_I(ip)->i_mode))
                 return false;
  
         /*
@@ -1727,7 +1729,7 @@ xfs_swap_extents(
         xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
  
         /* Verify that both files have the same format */
-       if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
+       if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
                 error = -EINVAL;
                 goto out_unlock;
         }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 435c7de42e5f322a82845382ad7e1fa54dfe3d0b..9a2191b911377f94e38d81d57d5d037a7e19ae8b 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -650,7 +650,7 @@ xfs_buf_read_map(
         if (bp) {
                 trace_xfs_buf_read(bp, flags, _RET_IP_);
  
-               if (!XFS_BUF_ISDONE(bp)) {
+               if (!(bp->b_flags & XBF_DONE)) {
                         XFS_STATS_INC(target->bt_mount, xb_get_read);
                         bp->b_ops = ops;
                         _xfs_buf_read(bp, flags);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h

index c75721acd8679687ae403d9eb5ee463a3cb0dc17..4eb89bd4ee73b6f4265eb63b8238e9571150bf26 100644 (file)
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -302,6 +302,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
  
  /* Buffer Utility Routines */
  extern void *xfs_buf_offset(struct xfs_buf *, size_t);
+extern void xfs_buf_stale(struct xfs_buf *bp);
  
  /* Delayed Write Buffer Routines */
  extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
@@ -312,31 +313,6 @@ extern int xfs_buf_delwri_submit_nowait(struct list_head *);
  extern int xfs_buf_init(void);
  extern void xfs_buf_terminate(void);
  
-#define XFS_BUF_ZEROFLAGS(bp) \
-       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
-                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
-                           XBF_WRITE_FAIL))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
-
-#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
-
  /*
   * These macros use the IO block map rather than b_bn. b_bn is now really
   * just for the buffer cache index for cached buffers. As IO does not use b_bn
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 7e986da34f6cb40ad3aca9e9845f81a070dd2d4d..99e91a0e554ea6512ce5eb43cb8a338804f550ae 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -431,7 +431,7 @@ xfs_buf_item_unpin(
         if (freed && stale) {
                 ASSERT(bip->bli_flags & XFS_BLI_STALE);
                 ASSERT(xfs_buf_islocked(bp));
-               ASSERT(XFS_BUF_ISSTALE(bp));
+               ASSERT(bp->b_flags & XBF_STALE);
                 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
  
                 trace_xfs_buf_item_unpin_stale(bip);
@@ -493,7 +493,7 @@ xfs_buf_item_unpin(
                 xfs_buf_hold(bp);
                 bp->b_flags |= XBF_ASYNC;
                 xfs_buf_ioerror(bp, -EIO);
-               XFS_BUF_UNDONE(bp);
+               bp->b_flags &= ~XBF_DONE;
                 xfs_buf_stale(bp);
                 xfs_buf_ioend(bp);
         }
@@ -1067,7 +1067,7 @@ xfs_buf_iodone_callbacks(
          */
         if (XFS_FORCED_SHUTDOWN(mp)) {
                 xfs_buf_stale(bp);
-               XFS_BUF_DONE(bp);
+               bp->b_flags |= XBF_DONE;
                 trace_xfs_buf_item_iodone(bp, _RET_IP_);
                 goto do_callbacks;
         }
@@ -1090,7 +1090,7 @@ xfs_buf_iodone_callbacks(
          * errors tend to affect the whole device and a failing log write
          * will make us give up.  But we really ought to do better here.
          */
-       if (XFS_BUF_ISASYNC(bp)) {
+       if (bp->b_flags & XBF_ASYNC) {
                 ASSERT(bp->b_iodone != NULL);
  
                 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
@@ -1113,7 +1113,7 @@ xfs_buf_iodone_callbacks(
          * sure to return the error to the caller of xfs_bwrite().
          */
         xfs_buf_stale(bp);
-       XFS_BUF_DONE(bp);
+       bp->b_flags |= XBF_DONE;
  
         trace_xfs_buf_error_relse(bp, _RET_IP_);
  
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c

index 642d55d100758b10fb3b9deec90de526707c3d98..93b3ab0c54350fbdd6e977e787d6b7c3911b792d 100644 (file)
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -665,7 +665,7 @@ xfs_readdir(
         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                 return -EIO;
  
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
         XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
  
         args.dp = dp;
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c

index e85a9519a5aee71c23b7eeb44f282675d427c4b4..272c3f8b6f7d0f11a0564e40b00b0568a28683dd 100644 (file)
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -227,7 +227,7 @@ xfs_discard_extents(
                                 GFP_NOFS, 0);
                 if (error && error != -EOPNOTSUPP) {
                         xfs_info(mp,
-        "discard failed for extent [0x%llu,%u], error %d",
+        "discard failed for extent [0x%llx,%u], error %d",
                                  (unsigned long long)busyp->bno,
                                  busyp->length,
                                  error);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

index 9c44d38dcd1f8ac9a11525e1a4e651a8e298b404..316b2a1bdba5f6da82f1bad0dcbc0708151a59d2 100644 (file)
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits(
  {
         struct xfs_quotainfo    *q = mp->m_quotainfo;
         struct xfs_disk_dquot   *d = &dq->q_core;
+       struct xfs_def_quota    *defq;
         int                     prealloc = 0;
  
         ASSERT(d->d_id);
+       defq = xfs_get_defquota(dq, q);
  
-       if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
-               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+       if (defq->bsoftlimit && !d->d_blk_softlimit) {
+               d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
                 prealloc = 1;
         }
-       if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
-               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+       if (defq->bhardlimit && !d->d_blk_hardlimit) {
+               d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
                 prealloc = 1;
         }
-       if (q->qi_isoftlimit && !d->d_ino_softlimit)
-               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
-       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
-               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
-       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
-               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
-       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
-               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+       if (defq->isoftlimit && !d->d_ino_softlimit)
+               d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
+       if (defq->ihardlimit && !d->d_ino_hardlimit)
+               d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
+       if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
+               d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
+       if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
+               d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
  
         if (prealloc)
                 xfs_dquot_set_prealloc_limits(dq);
@@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk(
  {
         struct xfs_quotainfo    *q = mp->m_quotainfo;
         xfs_dqblk_t     *d;
-       int             curid, i;
+       xfs_dqid_t      curid;
+       int             i;
  
         ASSERT(tp);
         ASSERT(xfs_buf_islocked(bp));
@@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk(
          * ID of the first dquot in the block - id's are zero based.
          */
         curid = id - (id % q->qi_dqperchunk);
-       ASSERT(curid >= 0);
         memset(d, 0, BBTOB(q->qi_dqchunklen));
         for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
                 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -464,12 +466,13 @@ xfs_qm_dqtobp(
         struct xfs_bmbt_irec    map;
         int                     nmaps = 1, error;
         struct xfs_buf          *bp;
-       struct xfs_inode        *quotip = xfs_dq_to_quota_inode(dqp);
+       struct xfs_inode        *quotip;
         struct xfs_mount        *mp = dqp->q_mount;
         xfs_dqid_t              id = be32_to_cpu(dqp->q_core.d_id);
         struct xfs_trans        *tp = (tpp ? *tpp : NULL);
         uint                    lock_mode;
  
+       quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
         dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
  
         lock_mode = xfs_ilock_data_map_shared(quotip);
@@ -684,6 +687,56 @@ error0:
         return error;
  }
  
+/*
+ * Advance to the next id in the current chunk, or if at the
+ * end of the chunk, skip ahead to first id in next allocated chunk
+ * using the SEEK_DATA interface.
+ */
+int
+xfs_dq_get_next_id(
+       xfs_mount_t             *mp,
+       uint                    type,
+       xfs_dqid_t              *id,
+       loff_t                  eof)
+{
+       struct xfs_inode        *quotip;
+       xfs_fsblock_t           start;
+       loff_t                  offset;
+       uint                    lock;
+       xfs_dqid_t              next_id;
+       int                     error = 0;
+
+       /* Simple advance */
+       next_id = *id + 1;
+
+       /* If new ID is within the current chunk, advancing it sufficed */
+       if (next_id % mp->m_quotainfo->qi_dqperchunk) {
+               *id = next_id;
+               return 0;
+       }
+
+       /* Nope, next_id is now past the current chunk, so find the next one */
+       start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
+
+       quotip = xfs_quota_inode(mp, type);
+       lock = xfs_ilock_data_map_shared(quotip);
+
+       offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
+                                     eof, SEEK_DATA);
+       if (offset < 0)
+               error = offset;
+
+       xfs_iunlock(quotip, lock);
+
+       /* -ENXIO is essentially "no more data" */
+       if (error)
+               return (error == -ENXIO ? -ENOENT: error);
+
+       /* Convert next data offset back to a quota id */
+       *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
+       return 0;
+}
+
  /*
   * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
   * a locked dquot, doing an allocation (if requested) as needed.
@@ -704,6 +757,7 @@ xfs_qm_dqget(
         struct xfs_quotainfo    *qi = mp->m_quotainfo;
         struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
         struct xfs_dquot        *dqp;
+       loff_t                  eof = 0;
         int                     error;
  
         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -731,6 +785,21 @@ xfs_qm_dqget(
         }
  #endif
  
+       /* Get the end of the quota file if we need it */
+       if (flags & XFS_QMOPT_DQNEXT) {
+               struct xfs_inode        *quotip;
+               xfs_fileoff_t           last;
+               uint                    lock_mode;
+
+               quotip = xfs_quota_inode(mp, type);
+               lock_mode = xfs_ilock_data_map_shared(quotip);
+               error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
+               xfs_iunlock(quotip, lock_mode);
+               if (error)
+                       return error;
+               eof = XFS_FSB_TO_B(mp, last);
+       }
+
  restart:
         mutex_lock(&qi->qi_tree_lock);
         dqp = radix_tree_lookup(tree, id);
@@ -744,6 +813,18 @@ restart:
                         goto restart;
                 }
  
+               /* uninit / unused quota found in radix tree, keep looking  */
+               if (flags & XFS_QMOPT_DQNEXT) {
+                       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+                               xfs_dqunlock(dqp);
+                               mutex_unlock(&qi->qi_tree_lock);
+                               error = xfs_dq_get_next_id(mp, type, &id, eof);
+                               if (error)
+                                       return error;
+                               goto restart;
+                       }
+               }
+
                 dqp->q_nrefs++;
                 mutex_unlock(&qi->qi_tree_lock);
  
@@ -770,6 +851,13 @@ restart:
         if (ip)
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
  
+       /* If we are asked to find next active id, keep looking */
+       if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
+               error = xfs_dq_get_next_id(mp, type, &id, eof);
+               if (!error)
+                       goto restart;
+       }
+
         if (error)
                 return error;
  
@@ -820,6 +908,17 @@ restart:
         qi->qi_dquots++;
         mutex_unlock(&qi->qi_tree_lock);
  
+       /* If we are asked to find next active id, keep looking */
+       if (flags & XFS_QMOPT_DQNEXT) {
+               if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+                       xfs_qm_dqput(dqp);
+                       error = xfs_dq_get_next_id(mp, type, &id, eof);
+                       if (error)
+                               return error;
+                       goto restart;
+               }
+       }
+
   dqret:
         ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
         trace_xfs_dqget_miss(dqp);
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c

index 652cd3c5b58c1cac1562239c9c644a19dbe588b7..2816d42507bc8ab7cf00fecb775b11e19dfb6088 100644 (file)
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -152,7 +152,7 @@ xfs_nfs_get_inode(
                 return ERR_PTR(error);
         }
  
-       if (ip->i_d.di_gen != generation) {
+       if (VFS_I(ip)->i_generation != generation) {
                 IRELE(ip);
                 return ERR_PTR(-ESTALE);
         }
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 52883ac3cf84c06761afcf0792bbafcf781d509b..ac0fd32de31e4e5455e43da208cdef4861710a21 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -156,9 +156,9 @@ xfs_update_prealloc_flags(
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  
         if (!(flags & XFS_PREALLOC_INVISIBLE)) {
-               ip->i_d.di_mode &= ~S_ISUID;
-               if (ip->i_d.di_mode & S_IXGRP)
-                       ip->i_d.di_mode &= ~S_ISGID;
+               VFS_I(ip)->i_mode &= ~S_ISUID;
+               if (VFS_I(ip)->i_mode & S_IXGRP)
+                       VFS_I(ip)->i_mode &= ~S_ISGID;
                 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         }
  
@@ -1337,31 +1337,31 @@ out:
         return found;
  }
  
-STATIC loff_t
-xfs_seek_hole_data(
-       struct file             *file,
+/*
+ * caller must lock inode with xfs_ilock_data_map_shared,
+ * can we craft an appropriate ASSERT?
+ *
+ * end is because the VFS-level lseek interface is defined such that any
+ * offset past i_size shall return -ENXIO, but we use this for quota code
+ * which does not maintain i_size, and we want to SEEK_DATA past i_size.
+ */
+loff_t
+__xfs_seek_hole_data(
+       struct inode            *inode,
         loff_t                  start,
+       loff_t                  end,
         int                     whence)
  {
-       struct inode            *inode = file->f_mapping->host;
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
         loff_t                  uninitialized_var(offset);
-       xfs_fsize_t             isize;
         xfs_fileoff_t           fsbno;
-       xfs_filblks_t           end;
-       uint                    lock;
+       xfs_filblks_t           lastbno;
         int                     error;
  
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       lock = xfs_ilock_data_map_shared(ip);
-
-       isize = i_size_read(inode);
-       if (start >= isize) {
+       if (start >= end) {
                 error = -ENXIO;
-               goto out_unlock;
+               goto out_error;
         }
  
         /*
@@ -1369,22 +1369,22 @@ xfs_seek_hole_data(
          * by fsbno to the end block of the file.
          */
         fsbno = XFS_B_TO_FSBT(mp, start);
-       end = XFS_B_TO_FSB(mp, isize);
+       lastbno = XFS_B_TO_FSB(mp, end);
  
         for (;;) {
                 struct xfs_bmbt_irec    map[2];
                 int                     nmap = 2;
                 unsigned int            i;
  
-               error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+               error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
                                        XFS_BMAPI_ENTIRE);
                 if (error)
-                       goto out_unlock;
+                       goto out_error;
  
                 /* No extents at given offset, must be beyond EOF */
                 if (nmap == 0) {
                         error = -ENXIO;
-                       goto out_unlock;
+                       goto out_error;
                 }
  
                 for (i = 0; i < nmap; i++) {
@@ -1426,7 +1426,7 @@ xfs_seek_hole_data(
                          * hole at the end of any file).
                          */
                         if (whence == SEEK_HOLE) {
-                               offset = isize;
+                               offset = end;
                                 break;
                         }
                         /*
@@ -1434,7 +1434,7 @@ xfs_seek_hole_data(
                          */
                         ASSERT(whence == SEEK_DATA);
                         error = -ENXIO;
-                       goto out_unlock;
+                       goto out_error;
                 }
  
                 ASSERT(i > 1);
@@ -1445,14 +1445,14 @@ xfs_seek_hole_data(
                  */
                 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
                 start = XFS_FSB_TO_B(mp, fsbno);
-               if (start >= isize) {
+               if (start >= end) {
                         if (whence == SEEK_HOLE) {
-                               offset = isize;
+                               offset = end;
                                 break;
                         }
                         ASSERT(whence == SEEK_DATA);
                         error = -ENXIO;
-                       goto out_unlock;
+                       goto out_error;
                 }
         }
  
@@ -1464,7 +1464,39 @@ out:
          * situation in particular.
          */
         if (whence == SEEK_HOLE)
-               offset = min_t(loff_t, offset, isize);
+               offset = min_t(loff_t, offset, end);
+
+       return offset;
+
+out_error:
+       return error;
+}
+
+STATIC loff_t
+xfs_seek_hole_data(
+       struct file             *file,
+       loff_t                  start,
+       int                     whence)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       uint                    lock;
+       loff_t                  offset, end;
+       int                     error = 0;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       lock = xfs_ilock_data_map_shared(ip);
+
+       end = i_size_read(inode);
+       offset = __xfs_seek_hole_data(inode, start, end, whence);
+       if (offset < 0) {
+               error = offset;
+               goto out_unlock;
+       }
+
         offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
  
  out_unlock:
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c

index c4c130f9bfb64fec1d7d5dccb27963a236477ced..a51353a1f87f1a5e78064c0598f42397ece8f767 100644 (file)
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -151,7 +151,7 @@ xfs_filestream_pick_ag(
         xfs_agnumber_t          ag, max_ag = NULLAGNUMBER;
         int                     err, trylock, nscan;
  
-       ASSERT(S_ISDIR(ip->i_d.di_mode));
+       ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
  
         /* 2% of an AG's blocks must be free for it to be chosen. */
         minfree = mp->m_sb.sb_agblocks / 50;
@@ -319,7 +319,7 @@ xfs_filestream_lookup_ag(
         xfs_agnumber_t          startag, ag = NULLAGNUMBER;
         struct xfs_mru_cache_elem *mru;
  
-       ASSERT(S_ISREG(ip->i_d.di_mode));
+       ASSERT(S_ISREG(VFS_I(ip)->i_mode));
  
         pip = xfs_filestream_get_parent(ip);
         if (!pip)
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h

index 1b6a98b66886c76d1fab032673ec88f4cb11afa0..f32713f14f9a21c1b752e2e8eb889dea72411f8e 100644 (file)
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
  extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
                                 xfs_fsop_resblks_t *outval);
  extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(struct xfs_mount *mp);
  
  #endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index d7a490f24ead08e3abf5019654ee5ee6e2e1eb7b..bf2d60749278602b5b4afcda09ede7d3dd89fd1e 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -63,6 +63,9 @@ xfs_inode_alloc(
                 return NULL;
         }
  
+       /* VFS doesn't initialise i_mode! */
+       VFS_I(ip)->i_mode = 0;
+
         XFS_STATS_INC(mp, vn_active);
         ASSERT(atomic_read(&ip->i_pincount) == 0);
         ASSERT(!spin_is_locked(&ip->i_flags_lock));
@@ -79,7 +82,7 @@ xfs_inode_alloc(
         memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
         ip->i_flags = 0;
         ip->i_delayed_blks = 0;
-       memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
+       memset(&ip->i_d, 0, sizeof(ip->i_d));
  
         return ip;
  }
@@ -98,7 +101,7 @@ void
  xfs_inode_free(
         struct xfs_inode        *ip)
  {
-       switch (ip->i_d.di_mode & S_IFMT) {
+       switch (VFS_I(ip)->i_mode & S_IFMT) {
         case S_IFREG:
         case S_IFDIR:
         case S_IFLNK:
@@ -134,6 +137,34 @@ xfs_inode_free(
         call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
  }
  
+/*
+ * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
+ * part of the structure. This is made more complex by the fact we store
+ * information about the on-disk values in the VFS inode and so we can't just
+ * overwrite the values unconditionally. Hence we save the parameters we
+ * need to retain across reinitialisation, and rewrite them into the VFS inode
+ * after reinitialisation even if it fails.
+ */
+static int
+xfs_reinit_inode(
+       struct xfs_mount        *mp,
+       struct inode            *inode)
+{
+       int             error;
+       uint32_t        nlink = inode->i_nlink;
+       uint32_t        generation = inode->i_generation;
+       uint64_t        version = inode->i_version;
+       umode_t         mode = inode->i_mode;
+
+       error = inode_init_always(mp->m_super, inode);
+
+       set_nlink(inode, nlink);
+       inode->i_generation = generation;
+       inode->i_version = version;
+       inode->i_mode = mode;
+       return error;
+}
+
  /*
   * Check the validity of the inode we just found it the cache
   */
@@ -185,7 +216,7 @@ xfs_iget_cache_hit(
         /*
          * If lookup is racing with unlink return an error immediately.
          */
-       if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+       if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) {
                 error = -ENOENT;
                 goto out_error;
         }
@@ -208,7 +239,7 @@ xfs_iget_cache_hit(
                 spin_unlock(&ip->i_flags_lock);
                 rcu_read_unlock();
  
-               error = inode_init_always(mp->m_super, inode);
+               error = xfs_reinit_inode(mp, inode);
                 if (error) {
                         /*
                          * Re-initializing the inode failed, and we are in deep
@@ -295,7 +326,7 @@ xfs_iget_cache_miss(
  
         trace_xfs_iget_miss(ip);
  
-       if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+       if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
                 error = -ENOENT;
                 goto out_destroy;
         }
@@ -444,7 +475,7 @@ again:
          * If we have a real type for an on-disk inode, we can setup the inode
          * now.  If it's a new inode being created, xfs_ialloc will handle it.
          */
-       if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
+       if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
                 xfs_setup_existing_inode(ip);
         return 0;
  
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index ceba1a83cacccd649caf473ebcf2dfae984bb243..96f606deee313aed506b7e7ee229fc801ba5de80 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -57,9 +57,9 @@ kmem_zone_t *xfs_inode_zone;
   */
  #define        XFS_ITRUNC_MAX_EXTENTS  2
  
-STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
-
-STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
+STATIC int xfs_iflush_int(struct xfs_inode *, struct xfs_buf *);
+STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
+STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *);
  
  /*
   * helper function to extract extent size hint from inode
@@ -766,6 +766,7 @@ xfs_ialloc(
         uint            flags;
         int             error;
         struct timespec tv;
+       struct inode    *inode;
  
         /*
          * Call the space management code to pick
@@ -791,6 +792,7 @@ xfs_ialloc(
         if (error)
                 return error;
         ASSERT(ip != NULL);
+       inode = VFS_I(ip);
  
         /*
          * We always convert v1 inodes to v2 now - we only support filesystems
@@ -800,20 +802,16 @@ xfs_ialloc(
         if (ip->i_d.di_version == 1)
                 ip->i_d.di_version = 2;
  
-       ip->i_d.di_mode = mode;
-       ip->i_d.di_onlink = 0;
-       ip->i_d.di_nlink = nlink;
-       ASSERT(ip->i_d.di_nlink == nlink);
+       inode->i_mode = mode;
+       set_nlink(inode, nlink);
         ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
         ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
         xfs_set_projid(ip, prid);
-       memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
  
         if (pip && XFS_INHERIT_GID(pip)) {
                 ip->i_d.di_gid = pip->i_d.di_gid;
-               if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
-                       ip->i_d.di_mode |= S_ISGID;
-               }
+               if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
+                       inode->i_mode |= S_ISGID;
         }
  
         /*
@@ -822,38 +820,29 @@ xfs_ialloc(
          * (and only if the irix_sgid_inherit compatibility variable is set).
          */
         if ((irix_sgid_inherit) &&
-           (ip->i_d.di_mode & S_ISGID) &&
-           (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
-               ip->i_d.di_mode &= ~S_ISGID;
-       }
+           (inode->i_mode & S_ISGID) &&
+           (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid))))
+               inode->i_mode &= ~S_ISGID;
  
         ip->i_d.di_size = 0;
         ip->i_d.di_nextents = 0;
         ASSERT(ip->i_d.di_nblocks == 0);
  
         tv = current_fs_time(mp->m_super);
-       ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
-       ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
-       ip->i_d.di_atime = ip->i_d.di_mtime;
-       ip->i_d.di_ctime = ip->i_d.di_mtime;
+       inode->i_mtime = tv;
+       inode->i_atime = tv;
+       inode->i_ctime = tv;
  
-       /*
-        * di_gen will have been taken care of in xfs_iread.
-        */
         ip->i_d.di_extsize = 0;
         ip->i_d.di_dmevmask = 0;
         ip->i_d.di_dmstate = 0;
         ip->i_d.di_flags = 0;
  
         if (ip->i_d.di_version == 3) {
-               ASSERT(ip->i_d.di_ino == ino);
-               ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
-               ip->i_d.di_crc = 0;
-               ip->i_d.di_changecount = 1;
-               ip->i_d.di_lsn = 0;
+               inode->i_version = 1;
                 ip->i_d.di_flags2 = 0;
-               memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
-               ip->i_d.di_crtime = ip->i_d.di_mtime;
+               ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
+               ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
         }
  
  
@@ -1092,35 +1081,24 @@ xfs_dir_ialloc(
  }
  
  /*
- * Decrement the link count on an inode & log the change.
- * If this causes the link count to go to zero, initiate the
- * logging activity required to truncate a file.
+ * Decrement the link count on an inode & log the change.  If this causes the
+ * link count to go to zero, move the inode to AGI unlinked list so that it can
+ * be freed when the last active reference goes away via xfs_inactive().
   */
  int                            /* error */
  xfs_droplink(
         xfs_trans_t *tp,
         xfs_inode_t *ip)
  {
-       int     error;
-
         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
  
-       ASSERT (ip->i_d.di_nlink > 0);
-       ip->i_d.di_nlink--;
         drop_nlink(VFS_I(ip));
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  
-       error = 0;
-       if (ip->i_d.di_nlink == 0) {
-               /*
-                * We're dropping the last link to this file.
-                * Move the on-disk inode to the AGI unlinked list.
-                * From xfs_inactive() we will pull the inode from
-                * the list and free it.
-                */
-               error = xfs_iunlink(tp, ip);
-       }
-       return error;
+       if (VFS_I(ip)->i_nlink)
+               return 0;
+
+       return xfs_iunlink(tp, ip);
  }
  
  /*
@@ -1134,8 +1112,6 @@ xfs_bumplink(
         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
  
         ASSERT(ip->i_d.di_version > 1);
-       ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
-       ip->i_d.di_nlink++;
         inc_nlink(VFS_I(ip));
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
         return 0;
@@ -1393,7 +1369,6 @@ xfs_create_tmpfile(
          */
         xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
  
-       ip->i_d.di_nlink--;
         error = xfs_iunlink(tp, ip);
         if (error)
                 goto out_trans_cancel;
@@ -1444,7 +1419,7 @@ xfs_link(
  
         trace_xfs_link(tdp, target_name);
  
-       ASSERT(!S_ISDIR(sip->i_d.di_mode));
+       ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
  
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
@@ -1492,7 +1467,10 @@ xfs_link(
  
         xfs_bmap_init(&free_list, &first_block);
  
-       if (sip->i_d.di_nlink == 0) {
+       /*
+        * Handle initial link state of O_TMPFILE inode
+        */
+       if (VFS_I(sip)->i_nlink == 0) {
                 error = xfs_iunlink_remove(tp, sip);
                 if (error)
                         goto error_return;
@@ -1648,7 +1626,7 @@ xfs_release(
         xfs_mount_t     *mp = ip->i_mount;
         int             error;
  
-       if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
+       if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0))
                 return 0;
  
         /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1679,7 +1657,7 @@ xfs_release(
                 }
         }
  
-       if (ip->i_d.di_nlink == 0)
+       if (VFS_I(ip)->i_nlink == 0)
                 return 0;
  
         if (xfs_can_free_eofblocks(ip, false)) {
@@ -1883,7 +1861,7 @@ xfs_inactive(
          * If the inode is already free, then there can be nothing
          * to clean up here.
          */
-       if (ip->i_d.di_mode == 0) {
+       if (VFS_I(ip)->i_mode == 0) {
                 ASSERT(ip->i_df.if_real_bytes == 0);
                 ASSERT(ip->i_df.if_broot_bytes == 0);
                 return;
@@ -1895,7 +1873,7 @@ xfs_inactive(
         if (mp->m_flags & XFS_MOUNT_RDONLY)
                 return;
  
-       if (ip->i_d.di_nlink != 0) {
+       if (VFS_I(ip)->i_nlink != 0) {
                 /*
                  * force is true because we are evicting an inode from the
                  * cache. Post-eof blocks must be freed, lest we end up with
@@ -1907,7 +1885,7 @@ xfs_inactive(
                 return;
         }
  
-       if (S_ISREG(ip->i_d.di_mode) &&
+       if (S_ISREG(VFS_I(ip)->i_mode) &&
             (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
              ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
                 truncate = 1;
@@ -1916,7 +1894,7 @@ xfs_inactive(
         if (error)
                 return;
  
-       if (S_ISLNK(ip->i_d.di_mode))
+       if (S_ISLNK(VFS_I(ip)->i_mode))
                 error = xfs_inactive_symlink(ip);
         else if (truncate)
                 error = xfs_inactive_truncate(ip);
@@ -1952,16 +1930,21 @@ xfs_inactive(
  }
  
  /*
- * This is called when the inode's link count goes to 0.
- * We place the on-disk inode on a list in the AGI.  It
- * will be pulled from this list when the inode is freed.
+ * This is called when the inode's link count goes to 0 or we are creating a
+ * tmpfile via O_TMPFILE. In the case of a tmpfile, @ignore_linkcount will be
+ * set to true as the link count is dropped to zero by the VFS after we've
+ * created the file successfully, so we have to add it to the unlinked list
+ * while the link count is non-zero.
+ *
+ * We place the on-disk inode on a list in the AGI.  It will be pulled from this
+ * list when the inode is freed.
   */
-int
+STATIC int
  xfs_iunlink(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip)
+       struct xfs_trans *tp,
+       struct xfs_inode *ip)
  {
-       xfs_mount_t     *mp;
+       xfs_mount_t     *mp = tp->t_mountp;
         xfs_agi_t       *agi;
         xfs_dinode_t    *dip;
         xfs_buf_t       *agibp;
@@ -1971,10 +1954,7 @@ xfs_iunlink(
         int             offset;
         int             error;
  
-       ASSERT(ip->i_d.di_nlink == 0);
-       ASSERT(ip->i_d.di_mode != 0);
-
-       mp = tp->t_mountp;
+       ASSERT(VFS_I(ip)->i_mode != 0);
  
         /*
          * Get the agi buffer first.  It ensures lock ordering
@@ -2412,10 +2392,10 @@ xfs_ifree(
         struct xfs_icluster     xic = { 0 };
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(ip->i_d.di_nlink == 0);
+       ASSERT(VFS_I(ip)->i_nlink == 0);
         ASSERT(ip->i_d.di_nextents == 0);
         ASSERT(ip->i_d.di_anextents == 0);
-       ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
+       ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
         ASSERT(ip->i_d.di_nblocks == 0);
  
         /*
@@ -2429,7 +2409,7 @@ xfs_ifree(
         if (error)
                 return error;
  
-       ip->i_d.di_mode = 0;            /* mark incore inode as free */
+       VFS_I(ip)->i_mode = 0;          /* mark incore inode as free */
         ip->i_d.di_flags = 0;
         ip->i_d.di_dmevmask = 0;
         ip->i_d.di_forkoff = 0;         /* mark the attr fork not in use */
@@ -2439,7 +2419,7 @@ xfs_ifree(
          * Bump the generation count so no one will be confused
          * by reincarnations of this inode.
          */
-       ip->i_d.di_gen++;
+       VFS_I(ip)->i_generation++;
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  
         if (xic.deleted)
@@ -2526,7 +2506,7 @@ xfs_remove(
  {
         xfs_mount_t             *mp = dp->i_mount;
         xfs_trans_t             *tp = NULL;
-       int                     is_dir = S_ISDIR(ip->i_d.di_mode);
+       int                     is_dir = S_ISDIR(VFS_I(ip)->i_mode);
         int                     error = 0;
         xfs_bmap_free_t         free_list;
         xfs_fsblock_t           first_block;
@@ -2580,8 +2560,8 @@ xfs_remove(
          * If we're removing a directory perform some additional validation.
          */
         if (is_dir) {
-               ASSERT(ip->i_d.di_nlink >= 2);
-               if (ip->i_d.di_nlink != 2) {
+               ASSERT(VFS_I(ip)->i_nlink >= 2);
+               if (VFS_I(ip)->i_nlink != 2) {
                         error = -ENOTEMPTY;
                         goto out_trans_cancel;
                 }
@@ -2771,7 +2751,7 @@ xfs_cross_rename(
         if (dp1 != dp2) {
                 dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
  
-               if (S_ISDIR(ip2->i_d.di_mode)) {
+               if (S_ISDIR(VFS_I(ip2)->i_mode)) {
                         error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
                                                 dp1->i_ino, first_block,
                                                 free_list, spaceres);
@@ -2779,7 +2759,7 @@ xfs_cross_rename(
                                 goto out_trans_abort;
  
                         /* transfer ip2 ".." reference to dp1 */
-                       if (!S_ISDIR(ip1->i_d.di_mode)) {
+                       if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
                                 error = xfs_droplink(tp, dp2);
                                 if (error)
                                         goto out_trans_abort;
@@ -2798,7 +2778,7 @@ xfs_cross_rename(
                         ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
                 }
  
-               if (S_ISDIR(ip1->i_d.di_mode)) {
+               if (S_ISDIR(VFS_I(ip1)->i_mode)) {
                         error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
                                                 dp2->i_ino, first_block,
                                                 free_list, spaceres);
@@ -2806,7 +2786,7 @@ xfs_cross_rename(
                                 goto out_trans_abort;
  
                         /* transfer ip1 ".." reference to dp2 */
-                       if (!S_ISDIR(ip2->i_d.di_mode)) {
+                       if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
                                 error = xfs_droplink(tp, dp1);
                                 if (error)
                                         goto out_trans_abort;
@@ -2903,7 +2883,7 @@ xfs_rename(
         struct xfs_inode        *inodes[__XFS_SORT_INODES];
         int                     num_inodes = __XFS_SORT_INODES;
         bool                    new_parent = (src_dp != target_dp);
-       bool                    src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       bool                    src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
         int                     spaceres;
         int                     error;
  
@@ -3032,12 +3012,12 @@ xfs_rename(
                  * target and source are directories and that target can be
                  * destroyed, or that neither is a directory.
                  */
-               if (S_ISDIR(target_ip->i_d.di_mode)) {
+               if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
                         /*
                          * Make sure target dir is empty.
                          */
                         if (!(xfs_dir_isempty(target_ip)) ||
-                           (target_ip->i_d.di_nlink > 2)) {
+                           (VFS_I(target_ip)->i_nlink > 2)) {
                                 error = -EEXIST;
                                 goto out_trans_cancel;
                         }
@@ -3144,7 +3124,7 @@ xfs_rename(
          * intermediate state on disk.
          */
         if (wip) {
-               ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
+               ASSERT(VFS_I(wip)->i_nlink == 0);
                 error = xfs_bumplink(tp, wip);
                 if (error)
                         goto out_bmap_cancel;
@@ -3313,7 +3293,7 @@ cluster_corrupt_out:
                  * mark it as stale and brelse.
                  */
                 if (bp->b_iodone) {
-                       XFS_BUF_UNDONE(bp);
+                       bp->b_flags &= ~XBF_DONE;
                         xfs_buf_stale(bp);
                         xfs_buf_ioerror(bp, -EIO);
                         xfs_buf_ioend(bp);
@@ -3462,14 +3442,7 @@ xfs_iflush_int(
                         __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
                 goto corrupt_out;
         }
-       if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
-                               mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
-               xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
-                       "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
-                       __func__, ip->i_ino, ip, ip->i_d.di_magic);
-               goto corrupt_out;
-       }
-       if (S_ISREG(ip->i_d.di_mode)) {
+       if (S_ISREG(VFS_I(ip)->i_mode)) {
                 if (XFS_TEST_ERROR(
                     (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
                     (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -3479,7 +3452,7 @@ xfs_iflush_int(
                                 __func__, ip->i_ino, ip);
                         goto corrupt_out;
                 }
-       } else if (S_ISDIR(ip->i_d.di_mode)) {
+       } else if (S_ISDIR(VFS_I(ip)->i_mode)) {
                 if (XFS_TEST_ERROR(
                     (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
                     (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
@@ -3523,12 +3496,11 @@ xfs_iflush_int(
                 ip->i_d.di_flushiter++;
  
         /*
-        * Copy the dirty parts of the inode into the on-disk
-        * inode.  We always copy out the core of the inode,
-        * because if the inode is dirty at all the core must
-        * be.
+        * Copy the dirty parts of the inode into the on-disk inode.  We always
+        * copy out the core of the inode, because if the inode is dirty at all
+        * the core must be.
          */
-       xfs_dinode_to_disk(dip, &ip->i_d);
+       xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
  
         /* Wrap, we never let the log put out DI_MAX_FLUSH */
         if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
@@ -3580,10 +3552,6 @@ xfs_iflush_int(
          */
         xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
  
-       /* update the lsn in the on disk inode if required */
-       if (ip->i_d.di_version == 3)
-               dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
-
         /* generate the checksum. */
         xfs_dinode_calc_crc(mp, dip);
  
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index ca9e11989cbd4f330c6cb0d1a1bede113fd9c8b2..43e1d51b15eb84ca34e978166025b74d30e5b573 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -63,7 +63,7 @@ typedef struct xfs_inode {
         unsigned long           i_flags;        /* see defined flags below */
         unsigned int            i_delayed_blks; /* count of delay alloc blks */
  
-       xfs_icdinode_t          i_d;            /* most of ondisk inode */
+       struct xfs_icdinode     i_d;            /* most of ondisk inode */
  
         /* VFS inode */
         struct inode            i_vnode;        /* embedded VFS inode */
@@ -88,7 +88,7 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
   */
  static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
  {
-       if (S_ISREG(ip->i_d.di_mode))
+       if (S_ISREG(VFS_I(ip)->i_mode))
                 return i_size_read(VFS_I(ip));
         return ip->i_d.di_size;
  }
@@ -369,7 +369,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
   */
  #define XFS_INHERIT_GID(pip)   \
         (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
-        ((pip)->i_d.di_mode & S_ISGID))
+        (VFS_I(pip)->i_mode & S_ISGID))
  
  int            xfs_release(struct xfs_inode *ip);
  void           xfs_inactive(struct xfs_inode *ip);
@@ -405,8 +405,6 @@ int         xfs_ifree(struct xfs_trans *, xfs_inode_t *,
                            struct xfs_bmap_free *);
  int            xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
                                       int, xfs_fsize_t);
-int            xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-
  void           xfs_iext_realloc(xfs_inode_t *, int, int);
  
  void           xfs_iunpin_wait(xfs_inode_t *);
@@ -437,6 +435,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
  int    xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
                      xfs_fsize_t isize, bool *did_zeroing);
  int    xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
+                            loff_t eof, int whence);
  
  
  /* from xfs_iops.c */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index d14b12b8cfefb90f8fe4c92a0033a41cbde2e552..c48b5b18d771fab685e23c03613a1c6e762efcb4 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -135,7 +135,7 @@ xfs_inode_item_size(
  
         *nvecs += 2;
         *nbytes += sizeof(struct xfs_inode_log_format) +
-                  xfs_icdinode_size(ip->i_d.di_version);
+                  xfs_log_dinode_size(ip->i_d.di_version);
  
         xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
         if (XFS_IFORK_Q(ip))
@@ -322,6 +322,81 @@ xfs_inode_item_format_attr_fork(
         }
  }
  
+static void
+xfs_inode_to_log_dinode(
+       struct xfs_inode        *ip,
+       struct xfs_log_dinode   *to,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_icdinode     *from = &ip->i_d;
+       struct inode            *inode = VFS_I(ip);
+
+       to->di_magic = XFS_DINODE_MAGIC;
+
+       to->di_version = from->di_version;
+       to->di_format = from->di_format;
+       to->di_uid = from->di_uid;
+       to->di_gid = from->di_gid;
+       to->di_projid_lo = from->di_projid_lo;
+       to->di_projid_hi = from->di_projid_hi;
+
+       memset(to->di_pad, 0, sizeof(to->di_pad));
+       memset(to->di_pad3, 0, sizeof(to->di_pad3));
+       to->di_atime.t_sec = inode->i_atime.tv_sec;
+       to->di_atime.t_nsec = inode->i_atime.tv_nsec;
+       to->di_mtime.t_sec = inode->i_mtime.tv_sec;
+       to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
+       to->di_ctime.t_sec = inode->i_ctime.tv_sec;
+       to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
+       to->di_nlink = inode->i_nlink;
+       to->di_gen = inode->i_generation;
+       to->di_mode = inode->i_mode;
+
+       to->di_size = from->di_size;
+       to->di_nblocks = from->di_nblocks;
+       to->di_extsize = from->di_extsize;
+       to->di_nextents = from->di_nextents;
+       to->di_anextents = from->di_anextents;
+       to->di_forkoff = from->di_forkoff;
+       to->di_aformat = from->di_aformat;
+       to->di_dmevmask = from->di_dmevmask;
+       to->di_dmstate = from->di_dmstate;
+       to->di_flags = from->di_flags;
+
+       if (from->di_version == 3) {
+               to->di_changecount = inode->i_version;
+               to->di_crtime.t_sec = from->di_crtime.t_sec;
+               to->di_crtime.t_nsec = from->di_crtime.t_nsec;
+               to->di_flags2 = from->di_flags2;
+
+               to->di_ino = ip->i_ino;
+               to->di_lsn = lsn;
+               memset(to->di_pad2, 0, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
+               to->di_flushiter = 0;
+       } else {
+               to->di_flushiter = from->di_flushiter;
+       }
+}
+
+/*
+ * Format the inode core. Current timestamp data is only in the VFS inode
+ * fields, so we need to grab them from there. Hence rather than just copying
+ * the XFS inode core structure, format the fields directly into the iovec.
+ */
+static void
+xfs_inode_item_format_core(
+       struct xfs_inode        *ip,
+       struct xfs_log_vec      *lv,
+       struct xfs_log_iovec    **vecp)
+{
+       struct xfs_log_dinode   *dic;
+
+       dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
+       xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
+       xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
+}
+
  /*
   * This is called to fill in the vector of log iovecs for the given inode
   * log item.  It fills the first item with an inode log format structure,
@@ -351,10 +426,7 @@ xfs_inode_item_format(
         ilf->ilf_size = 2; /* format + core */
         xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
  
-       xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
-                       &ip->i_d,
-                       xfs_icdinode_size(ip->i_d.di_version));
-
+       xfs_inode_item_format_core(ip, lv, &vecp);
         xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
         if (XFS_IFORK_Q(ip)) {
                 xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 478d04e07f9500d6ceed20231deb6b474161c708..bcb6c19ce3ea4fea69c536343c07ea222de3155d 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -114,7 +114,7 @@ xfs_find_handle(
                 handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
                                         sizeof(handle.ha_fid.fid_len);
                 handle.ha_fid.fid_pad = 0;
-               handle.ha_fid.fid_gen = ip->i_d.di_gen;
+               handle.ha_fid.fid_gen = inode->i_generation;
                 handle.ha_fid.fid_ino = ip->i_ino;
  
                 hsize = XFS_HSIZE(handle);
@@ -963,7 +963,7 @@ xfs_set_diflags(
                 di_flags |= XFS_DIFLAG_NODEFRAG;
         if (xflags & FS_XFLAG_FILESTREAM)
                 di_flags |= XFS_DIFLAG_FILESTREAM;
-       if (S_ISDIR(ip->i_d.di_mode)) {
+       if (S_ISDIR(VFS_I(ip)->i_mode)) {
                 if (xflags & FS_XFLAG_RTINHERIT)
                         di_flags |= XFS_DIFLAG_RTINHERIT;
                 if (xflags & FS_XFLAG_NOSYMLINKS)
@@ -972,7 +972,7 @@ xfs_set_diflags(
                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
                 if (xflags & FS_XFLAG_PROJINHERIT)
                         di_flags |= XFS_DIFLAG_PROJINHERIT;
-       } else if (S_ISREG(ip->i_d.di_mode)) {
+       } else if (S_ISREG(VFS_I(ip)->i_mode)) {
                 if (xflags & FS_XFLAG_REALTIME)
                         di_flags |= XFS_DIFLAG_REALTIME;
                 if (xflags & FS_XFLAG_EXTSIZE)
@@ -1059,24 +1059,87 @@ xfs_ioctl_setattr_xflags(
         return 0;
  }
  
+/*
+ * If we are changing DAX flags, we have to ensure the file is clean and any
+ * cached objects in the address space are invalidated and removed. This
+ * requires us to lock out other IO and page faults similar to a truncate
+ * operation. The locks need to be held until the transaction has been committed
+ * so that the cache invalidation is atomic with respect to the DAX flag
+ * manipulation.
+ */
+static int
+xfs_ioctl_setattr_dax_invalidate(
+       struct xfs_inode        *ip,
+       struct fsxattr          *fa,
+       int                     *join_flags)
+{
+       struct inode            *inode = VFS_I(ip);
+       int                     error;
+
+       *join_flags = 0;
+
+       /*
+        * It is only valid to set the DAX flag on regular files and
+        * directories on filesystems where the block size is equal to the page
+        * size. On directories it serves as an inherit hint.
+        */
+       if (fa->fsx_xflags & FS_XFLAG_DAX) {
+               if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+                       return -EINVAL;
+               if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+                       return -EINVAL;
+       }
+
+       /* If the DAX state is not changing, we have nothing to do here. */
+       if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
+               return 0;
+       if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
+               return 0;
+
+       /* lock, flush and invalidate mapping in preparation for flag change */
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
+       error = filemap_write_and_wait(inode->i_mapping);
+       if (error)
+               goto out_unlock;
+       error = invalidate_inode_pages2(inode->i_mapping);
+       if (error)
+               goto out_unlock;
+
+       *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
+       return 0;
+
+out_unlock:
+       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
+       return error;
+
+}
+
  /*
   * Set up the transaction structure for the setattr operation, checking that we
   * have permission to do so. On success, return a clean transaction and the
   * inode locked exclusively ready for further operation specific checks. On
   * failure, return an error without modifying or locking the inode.
+ *
+ * The inode might already be IO locked on call. If this is the case, it is
+ * indicated in @join_flags and we take full responsibility for ensuring they
+ * are unlocked from now on. Hence if we have an error here, we still have to
+ * unlock them. Otherwise, once they are joined to the transaction, they will
+ * be unlocked on commit/cancel.
   */
  static struct xfs_trans *
  xfs_ioctl_setattr_get_trans(
-       struct xfs_inode        *ip)
+       struct xfs_inode        *ip,
+       int                     join_flags)
  {
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_trans        *tp;
-       int                     error;
+       int                     error = -EROFS;
  
         if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return ERR_PTR(-EROFS);
+               goto out_unlock;
+       error = -EIO;
         if (XFS_FORCED_SHUTDOWN(mp))
-               return ERR_PTR(-EIO);
+               goto out_unlock;
  
         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
@@ -1084,7 +1147,8 @@ xfs_ioctl_setattr_get_trans(
                 goto out_cancel;
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
+       join_flags = 0;
  
         /*
          * CAP_FOWNER overrides the following restrictions:
@@ -1104,6 +1168,9 @@ xfs_ioctl_setattr_get_trans(
  
  out_cancel:
         xfs_trans_cancel(tp);
+out_unlock:
+       if (join_flags)
+               xfs_iunlock(ip, join_flags);
         return ERR_PTR(error);
  }
  
@@ -1128,14 +1195,14 @@ xfs_ioctl_setattr_check_extsize(
  {
         struct xfs_mount        *mp = ip->i_mount;
  
-       if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode))
                 return -EINVAL;
  
         if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
-           !S_ISDIR(ip->i_d.di_mode))
+           !S_ISDIR(VFS_I(ip)->i_mode))
                 return -EINVAL;
  
-       if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents &&
+       if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
             ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
                 return -EINVAL;
  
@@ -1202,6 +1269,7 @@ xfs_ioctl_setattr(
         struct xfs_dquot        *pdqp = NULL;
         struct xfs_dquot        *olddquot = NULL;
         int                     code;
+       int                     join_flags = 0;
  
         trace_xfs_ioctl_setattr(ip);
  
@@ -1225,7 +1293,18 @@ xfs_ioctl_setattr(
                         return code;
         }
  
-       tp = xfs_ioctl_setattr_get_trans(ip);
+       /*
+        * Changing DAX config may require inode locking for mapping
+        * invalidation. These need to be held all the way to transaction commit
+        * or cancel time, so need to be passed through to
+        * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
+        * appropriately.
+        */
+       code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
+       if (code)
+               goto error_free_dquots;
+
+       tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
         if (IS_ERR(tp)) {
                 code = PTR_ERR(tp);
                 goto error_free_dquots;
@@ -1256,9 +1335,9 @@ xfs_ioctl_setattr(
          * successful return from chown()
          */
  
-       if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+       if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) &&
             !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
-               ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+               VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
  
         /* Change the ownerships and register project quota modifications */
         if (xfs_get_projid(ip) != fa->fsx_projid) {
@@ -1341,6 +1420,7 @@ xfs_ioc_setxflags(
         struct xfs_trans        *tp;
         struct fsxattr          fa;
         unsigned int            flags;
+       int                     join_flags = 0;
         int                     error;
  
         if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1357,7 +1437,18 @@ xfs_ioc_setxflags(
         if (error)
                 return error;
  
-       tp = xfs_ioctl_setattr_get_trans(ip);
+       /*
+        * Changing DAX config may require inode locking for mapping
+        * invalidation. These need to be held all the way to transaction commit
+        * or cancel time, so need to be passed through to
+        * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
+        * appropriately.
+        */
+       error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
+       if (error)
+               goto out_drop_write;
+
+       tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
         if (IS_ERR(tp)) {
                 error = PTR_ERR(tp);
                 goto out_drop_write;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index 76b71a1c6c323e2043aeab1e93fb5a66db9f39d0..fb7dc61f4a29d7cee3d4683c675c4551e7669e52 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -459,8 +459,8 @@ xfs_vn_getattr(
  
         stat->size = XFS_ISIZE(ip);
         stat->dev = inode->i_sb->s_dev;
-       stat->mode = ip->i_d.di_mode;
-       stat->nlink = ip->i_d.di_nlink;
+       stat->mode = inode->i_mode;
+       stat->nlink = inode->i_nlink;
         stat->uid = inode->i_uid;
         stat->gid = inode->i_gid;
         stat->ino = ip->i_ino;
@@ -506,9 +506,6 @@ xfs_setattr_mode(
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  
-       ip->i_d.di_mode &= S_IFMT;
-       ip->i_d.di_mode |= mode & ~S_IFMT;
-
         inode->i_mode &= S_IFMT;
         inode->i_mode |= mode & ~S_IFMT;
  }
@@ -522,21 +519,12 @@ xfs_setattr_time(
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  
-       if (iattr->ia_valid & ATTR_ATIME) {
+       if (iattr->ia_valid & ATTR_ATIME)
                 inode->i_atime = iattr->ia_atime;
-               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-       }
-       if (iattr->ia_valid & ATTR_CTIME) {
+       if (iattr->ia_valid & ATTR_CTIME)
                 inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-       }
-       if (iattr->ia_valid & ATTR_MTIME) {
+       if (iattr->ia_valid & ATTR_MTIME)
                 inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-       }
  }
  
  int
@@ -661,9 +649,9 @@ xfs_setattr_nonsize(
                  * The set-user-ID and set-group-ID bits of a file will be
                  * cleared upon successful return from chown()
                  */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+               if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
                     !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+                       inode->i_mode &= ~(S_ISUID|S_ISGID);
  
                 /*
                  * Change the ownerships and register quota modifications
@@ -773,7 +761,7 @@ xfs_setattr_size(
  
         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
         ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
-       ASSERT(S_ISREG(ip->i_d.di_mode));
+       ASSERT(S_ISREG(inode->i_mode));
         ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
                 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
  
@@ -991,21 +979,13 @@ xfs_vn_update_time(
         }
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (flags & S_CTIME) {
+       if (flags & S_CTIME)
                 inode->i_ctime = *now;
-               ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec;
-               ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec;
-       }
-       if (flags & S_MTIME) {
+       if (flags & S_MTIME)
                 inode->i_mtime = *now;
-               ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec;
-               ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec;
-       }
-       if (flags & S_ATIME) {
+       if (flags & S_ATIME)
                 inode->i_atime = *now;
-               ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec;
-               ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec;
-       }
+
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
         xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
         return xfs_trans_commit(tp);
@@ -1205,8 +1185,10 @@ xfs_diflags_to_iflags(
                 inode->i_flags |= S_SYNC;
         if (flags & XFS_DIFLAG_NOATIME)
                 inode->i_flags |= S_NOATIME;
-       if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
-           ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+       if (S_ISREG(inode->i_mode) &&
+           ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
+           (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+            ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
                 inode->i_flags |= S_DAX;
  }
  
@@ -1232,8 +1214,6 @@ xfs_setup_inode(
         /* make the inode look hashed for the writeback code */
         hlist_add_fake(&inode->i_hash);
  
-       inode->i_mode   = ip->i_d.di_mode;
-       set_nlink(inode, ip->i_d.di_nlink);
         inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
         inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
  
@@ -1249,14 +1229,7 @@ xfs_setup_inode(
                 break;
         }
  
-       inode->i_generation = ip->i_d.di_gen;
         i_size_write(inode, ip->i_d.di_size);
-       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
-       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
-       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
-       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
-       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
-       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
         xfs_diflags_to_iflags(inode, ip);
  
         ip->d_ops = ip->i_mount->m_nondir_inode_ops;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c

index 930ebd86bebac3a300faf44fabe77aa28258cf60..ce73eb34620dbbf06570650a582163a98a0d8f92 100644 (file)
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -57,6 +57,7 @@ xfs_bulkstat_one_int(
  {
         struct xfs_icdinode     *dic;           /* dinode core info pointer */
         struct xfs_inode        *ip;            /* incore inode pointer */
+       struct inode            *inode;
         struct xfs_bstat        *buf;           /* return buffer */
         int                     error = 0;      /* error value */
  
@@ -77,30 +78,33 @@ xfs_bulkstat_one_int(
  
         ASSERT(ip != NULL);
         ASSERT(ip->i_imap.im_blkno != 0);
+       inode = VFS_I(ip);
  
         dic = &ip->i_d;
  
         /* xfs_iget returns the following without needing
          * further change.
          */
-       buf->bs_nlink = dic->di_nlink;
         buf->bs_projid_lo = dic->di_projid_lo;
         buf->bs_projid_hi = dic->di_projid_hi;
         buf->bs_ino = ino;
-       buf->bs_mode = dic->di_mode;
         buf->bs_uid = dic->di_uid;
         buf->bs_gid = dic->di_gid;
         buf->bs_size = dic->di_size;
-       buf->bs_atime.tv_sec = dic->di_atime.t_sec;
-       buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
-       buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
-       buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
-       buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
-       buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
+
+       buf->bs_nlink = inode->i_nlink;
+       buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
+       buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
+       buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
+       buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
+       buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
+       buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
+       buf->bs_gen = inode->i_generation;
+       buf->bs_mode = inode->i_mode;
+
         buf->bs_xflags = xfs_ip2xflags(ip);
         buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
         buf->bs_extents = dic->di_nextents;
-       buf->bs_gen = dic->di_gen;
         memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
         buf->bs_dmevmask = dic->di_dmevmask;
         buf->bs_dmstate = dic->di_dmstate;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index 9c9a1c9bcc7f0bf0090fa4ffdffdfa35d6068122..b49ccf5c1d7564402c39671c4e67b7cf92ab8082 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1212,7 +1212,7 @@ xlog_iodone(xfs_buf_t *bp)
         }
  
         /* log I/O is always issued ASYNC */
-       ASSERT(XFS_BUF_ISASYNC(bp));
+       ASSERT(bp->b_flags & XBF_ASYNC);
         xlog_state_done_syncing(iclog, aborted);
  
         /*
@@ -1864,9 +1864,8 @@ xlog_sync(
  
         bp->b_io_length = BTOBB(count);
         bp->b_fspriv = iclog;
-       XFS_BUF_ZEROFLAGS(bp);
-       XFS_BUF_ASYNC(bp);
-       bp->b_flags |= XBF_SYNCIO;
+       bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+       bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
  
         if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
                 bp->b_flags |= XBF_FUA;
@@ -1893,12 +1892,11 @@ xlog_sync(
  
         /* account for log which doesn't start at block #0 */
         XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
+
         /*
          * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
          * is shutting down.
          */
-       XFS_BUF_WRITE(bp);
-
         error = xlog_bdstrat(bp);
         if (error) {
                 xfs_buf_ioerror_alert(bp, "xlog_sync");
@@ -1910,9 +1908,8 @@ xlog_sync(
                 xfs_buf_associate_memory(bp,
                                 (char *)&iclog->ic_header + count, split);
                 bp->b_fspriv = iclog;
-               XFS_BUF_ZEROFLAGS(bp);
-               XFS_BUF_ASYNC(bp);
-               bp->b_flags |= XBF_SYNCIO;
+               bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+               bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
                 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
                         bp->b_flags |= XBF_FUA;
  
@@ -1921,7 +1918,6 @@ xlog_sync(
  
                 /* account for internal log which doesn't start at block #0 */
                 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-               XFS_BUF_WRITE(bp);
                 error = xlog_bdstrat(bp);
                 if (error) {
                         xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
@@ -2012,77 +2008,81 @@ xlog_print_tic_res(
         uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
  
         /* match with XLOG_REG_TYPE_* in xfs_log.h */
-       static char *res_type_str[XLOG_REG_TYPE_MAX] = {
-           "bformat",
-           "bchunk",
-           "efi_format",
-           "efd_format",
-           "iformat",
-           "icore",
-           "iext",
-           "ibroot",
-           "ilocal",
-           "iattr_ext",
-           "iattr_broot",
-           "iattr_local",
-           "qformat",
-           "dquot",
-           "quotaoff",
-           "LR header",
-           "unmount",
-           "commit",
-           "trans header"
+#define REG_TYPE_STR(type, str)        [XLOG_REG_TYPE_##type] = str
+       static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = {
+           REG_TYPE_STR(BFORMAT, "bformat"),
+           REG_TYPE_STR(BCHUNK, "bchunk"),
+           REG_TYPE_STR(EFI_FORMAT, "efi_format"),
+           REG_TYPE_STR(EFD_FORMAT, "efd_format"),
+           REG_TYPE_STR(IFORMAT, "iformat"),
+           REG_TYPE_STR(ICORE, "icore"),
+           REG_TYPE_STR(IEXT, "iext"),
+           REG_TYPE_STR(IBROOT, "ibroot"),
+           REG_TYPE_STR(ILOCAL, "ilocal"),
+           REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
+           REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
+           REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
+           REG_TYPE_STR(QFORMAT, "qformat"),
+           REG_TYPE_STR(DQUOT, "dquot"),
+           REG_TYPE_STR(QUOTAOFF, "quotaoff"),
+           REG_TYPE_STR(LRHEADER, "LR header"),
+           REG_TYPE_STR(UNMOUNT, "unmount"),
+           REG_TYPE_STR(COMMIT, "commit"),
+           REG_TYPE_STR(TRANSHDR, "trans header"),
+           REG_TYPE_STR(ICREATE, "inode create")
         };
+#undef REG_TYPE_STR
+#define TRANS_TYPE_STR(type)   [XFS_TRANS_##type] = #type
         static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
-           "SETATTR_NOT_SIZE",
-           "SETATTR_SIZE",
-           "INACTIVE",
-           "CREATE",
-           "CREATE_TRUNC",
-           "TRUNCATE_FILE",
-           "REMOVE",
-           "LINK",
-           "RENAME",
-           "MKDIR",
-           "RMDIR",
-           "SYMLINK",
-           "SET_DMATTRS",
-           "GROWFS",
-           "STRAT_WRITE",
-           "DIOSTRAT",
-           "WRITE_SYNC",
-           "WRITEID",
-           "ADDAFORK",
-           "ATTRINVAL",
-           "ATRUNCATE",
-           "ATTR_SET",
-           "ATTR_RM",
-           "ATTR_FLAG",
-           "CLEAR_AGI_BUCKET",
-           "QM_SBCHANGE",
-           "DUMMY1",
-           "DUMMY2",
-           "QM_QUOTAOFF",
-           "QM_DQALLOC",
-           "QM_SETQLIM",
-           "QM_DQCLUSTER",
-           "QM_QINOCREATE",
-           "QM_QUOTAOFF_END",
-           "FSYNC_TS",
-           "GROWFSRT_ALLOC",
-           "GROWFSRT_ZERO",
-           "GROWFSRT_FREE",
-           "SWAPEXT",
-           "CHECKPOINT",
-           "ICREATE",
-           "CREATE_TMPFILE"
+           TRANS_TYPE_STR(SETATTR_NOT_SIZE),
+           TRANS_TYPE_STR(SETATTR_SIZE),
+           TRANS_TYPE_STR(INACTIVE),
+           TRANS_TYPE_STR(CREATE),
+           TRANS_TYPE_STR(CREATE_TRUNC),
+           TRANS_TYPE_STR(TRUNCATE_FILE),
+           TRANS_TYPE_STR(REMOVE),
+           TRANS_TYPE_STR(LINK),
+           TRANS_TYPE_STR(RENAME),
+           TRANS_TYPE_STR(MKDIR),
+           TRANS_TYPE_STR(RMDIR),
+           TRANS_TYPE_STR(SYMLINK),
+           TRANS_TYPE_STR(SET_DMATTRS),
+           TRANS_TYPE_STR(GROWFS),
+           TRANS_TYPE_STR(STRAT_WRITE),
+           TRANS_TYPE_STR(DIOSTRAT),
+           TRANS_TYPE_STR(WRITEID),
+           TRANS_TYPE_STR(ADDAFORK),
+           TRANS_TYPE_STR(ATTRINVAL),
+           TRANS_TYPE_STR(ATRUNCATE),
+           TRANS_TYPE_STR(ATTR_SET),
+           TRANS_TYPE_STR(ATTR_RM),
+           TRANS_TYPE_STR(ATTR_FLAG),
+           TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
+           TRANS_TYPE_STR(SB_CHANGE),
+           TRANS_TYPE_STR(DUMMY1),
+           TRANS_TYPE_STR(DUMMY2),
+           TRANS_TYPE_STR(QM_QUOTAOFF),
+           TRANS_TYPE_STR(QM_DQALLOC),
+           TRANS_TYPE_STR(QM_SETQLIM),
+           TRANS_TYPE_STR(QM_DQCLUSTER),
+           TRANS_TYPE_STR(QM_QINOCREATE),
+           TRANS_TYPE_STR(QM_QUOTAOFF_END),
+           TRANS_TYPE_STR(FSYNC_TS),
+           TRANS_TYPE_STR(GROWFSRT_ALLOC),
+           TRANS_TYPE_STR(GROWFSRT_ZERO),
+           TRANS_TYPE_STR(GROWFSRT_FREE),
+           TRANS_TYPE_STR(SWAPEXT),
+           TRANS_TYPE_STR(CHECKPOINT),
+           TRANS_TYPE_STR(ICREATE),
+           TRANS_TYPE_STR(CREATE_TMPFILE)
         };
+#undef TRANS_TYPE_STR
  
         xfs_warn(mp, "xlog_write: reservation summary:");
         xfs_warn(mp, "  trans type  = %s (%u)",
                  ((ticket->t_trans_type <= 0 ||
                    ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
-                 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
+                 "bad-trans-type" : trans_type_str[ticket->t_trans_type]),
                  ticket->t_trans_type);
         xfs_warn(mp, "  unit res    = %d bytes",
                  ticket->t_unit_res);
@@ -2101,7 +2101,7 @@ xlog_print_tic_res(
                 uint r_type = ticket->t_res_arr[i].r_type;
                 xfs_warn(mp, "region[%u]: %s - %u bytes", i,
                             ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
-                           "bad-rtype" : res_type_str[r_type-1]),
+                           "bad-rtype" : res_type_str[r_type]),
                             ticket->t_res_arr[i].r_len);
         }
  
@@ -3979,7 +3979,7 @@ xfs_log_force_umount(
             log->l_flags & XLOG_ACTIVE_RECOVERY) {
                 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
                 if (mp->m_sb_bp)
-                       XFS_BUF_DONE(mp->m_sb_bp);
+                       mp->m_sb_bp->b_flags |= XBF_DONE;
                 return 0;
         }
  
@@ -4009,7 +4009,7 @@ xfs_log_force_umount(
         spin_lock(&log->l_icloglock);
         mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
         if (mp->m_sb_bp)
-               XFS_BUF_DONE(mp->m_sb_bp);
+               mp->m_sb_bp->b_flags |= XBF_DONE;
  
         /*
          * Mark the log and the iclogs with IO error flags to prevent any
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index be5568839442d1ab50bf5cae293b7b2b133b525e..396565f4324764058b979cf5e4c5bd96744f8ef8 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -190,7 +190,7 @@ xlog_bread_noalign(
         ASSERT(nbblks <= bp->b_length);
  
         XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       XFS_BUF_READ(bp);
+       bp->b_flags |= XBF_READ;
         bp->b_io_length = nbblks;
         bp->b_error = 0;
  
@@ -275,7 +275,6 @@ xlog_bwrite(
         ASSERT(nbblks <= bp->b_length);
  
         XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       XFS_BUF_ZEROFLAGS(bp);
         xfs_buf_hold(bp);
         xfs_buf_lock(bp);
         bp->b_io_length = nbblks;
@@ -2538,6 +2537,13 @@ xlog_recover_validate_buf_type(
                 }
                 bp->b_ops = &xfs_sb_buf_ops;
                 break;
+#ifdef CONFIG_XFS_RT
+       case XFS_BLFT_RTBITMAP_BUF:
+       case XFS_BLFT_RTSUMMARY_BUF:
+               /* no magic numbers for verification of RT buffers */
+               bp->b_ops = &xfs_rtbuf_ops;
+               break;
+#endif /* CONFIG_XFS_RT */
         default:
                 xfs_warn(mp, "Unknown buffer type %d!",
                          xfs_blft_from_flags(buf_f));
@@ -2858,7 +2864,7 @@ xfs_recover_inode_owner_change(
                 return -ENOMEM;
  
         /* instantiate the inode */
-       xfs_dinode_from_disk(&ip->i_d, dip);
+       xfs_inode_from_disk(ip, dip);
         ASSERT(ip->i_d.di_version >= 3);
  
         error = xfs_iformat_fork(ip, dip);
@@ -2904,7 +2910,7 @@ xlog_recover_inode_pass2(
         int                     error;
         int                     attr_index;
         uint                    fields;
-       xfs_icdinode_t          *dicp;
+       struct xfs_log_dinode   *ldip;
         uint                    isize;
         int                     need_free = 0;
  
@@ -2957,8 +2963,8 @@ xlog_recover_inode_pass2(
                 error = -EFSCORRUPTED;
                 goto out_release;
         }
-       dicp = item->ri_buf[1].i_addr;
-       if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
+       ldip = item->ri_buf[1].i_addr;
+       if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
                 xfs_alert(mp,
                         "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
                         __func__, item, in_f->ilf_ino);
@@ -2994,13 +3000,13 @@ xlog_recover_inode_pass2(
          * to skip replay when the on disk inode is newer than the log one
          */
         if (!xfs_sb_version_hascrc(&mp->m_sb) &&
-           dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+           ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
                 /*
                  * Deal with the wrap case, DI_MAX_FLUSH is less
                  * than smaller numbers
                  */
                 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
-                   dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+                   ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
                         /* do nothing */
                 } else {
                         trace_xfs_log_recover_inode_skip(log, in_f);
@@ -3010,13 +3016,13 @@ xlog_recover_inode_pass2(
         }
  
         /* Take the opportunity to reset the flush iteration count */
-       dicp->di_flushiter = 0;
+       ldip->di_flushiter = 0;
  
-       if (unlikely(S_ISREG(dicp->di_mode))) {
-               if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
-                   (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
+       if (unlikely(S_ISREG(ldip->di_mode))) {
+               if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+                   (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
                         XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
-                                        XFS_ERRLEVEL_LOW, mp, dicp);
+                                        XFS_ERRLEVEL_LOW, mp, ldip);
                         xfs_alert(mp,
                 "%s: Bad regular inode log record, rec ptr 0x%p, "
                 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@@ -3024,12 +3030,12 @@ xlog_recover_inode_pass2(
                         error = -EFSCORRUPTED;
                         goto out_release;
                 }
-       } else if (unlikely(S_ISDIR(dicp->di_mode))) {
-               if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
-                   (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
-                   (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
+       } else if (unlikely(S_ISDIR(ldip->di_mode))) {
+               if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+                   (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
+                   (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
                         XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
-                                            XFS_ERRLEVEL_LOW, mp, dicp);
+                                            XFS_ERRLEVEL_LOW, mp, ldip);
                         xfs_alert(mp,
                 "%s: Bad dir inode log record, rec ptr 0x%p, "
                 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@@ -3038,32 +3044,32 @@ xlog_recover_inode_pass2(
                         goto out_release;
                 }
         }
-       if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
+       if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
                 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
-                                    XFS_ERRLEVEL_LOW, mp, dicp);
+                                    XFS_ERRLEVEL_LOW, mp, ldip);
                 xfs_alert(mp,
         "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
         "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
                         __func__, item, dip, bp, in_f->ilf_ino,
-                       dicp->di_nextents + dicp->di_anextents,
-                       dicp->di_nblocks);
+                       ldip->di_nextents + ldip->di_anextents,
+                       ldip->di_nblocks);
                 error = -EFSCORRUPTED;
                 goto out_release;
         }
-       if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
+       if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
                 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
-                                    XFS_ERRLEVEL_LOW, mp, dicp);
+                                    XFS_ERRLEVEL_LOW, mp, ldip);
                 xfs_alert(mp,
         "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
         "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
-                       item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
+                       item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
                 error = -EFSCORRUPTED;
                 goto out_release;
         }
-       isize = xfs_icdinode_size(dicp->di_version);
+       isize = xfs_log_dinode_size(ldip->di_version);
         if (unlikely(item->ri_buf[1].i_len > isize)) {
                 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
-                                    XFS_ERRLEVEL_LOW, mp, dicp);
+                                    XFS_ERRLEVEL_LOW, mp, ldip);
                 xfs_alert(mp,
                         "%s: Bad inode log record length %d, rec ptr 0x%p",
                         __func__, item->ri_buf[1].i_len, item);
@@ -3071,8 +3077,8 @@ xlog_recover_inode_pass2(
                 goto out_release;
         }
  
-       /* The core is in in-core format */
-       xfs_dinode_to_disk(dip, dicp);
+       /* recover the log dinode inode into the on disk inode */
+       xfs_log_dinode_to_disk(ldip, dip);
  
         /* the rest is in on-disk format */
         if (item->ri_buf[1].i_len > isize) {
@@ -4402,8 +4408,8 @@ xlog_recover_process_one_iunlink(
         if (error)
                 goto fail_iput;
  
-       ASSERT(ip->i_d.di_nlink == 0);
-       ASSERT(ip->i_d.di_mode != 0);
+       ASSERT(VFS_I(ip)->i_nlink == 0);
+       ASSERT(VFS_I(ip)->i_mode != 0);
  
         /* setup for the next pass */
         agino = be32_to_cpu(dip->di_next_unlinked);
@@ -4957,6 +4963,7 @@ xlog_do_recover(
         xfs_daddr_t     head_blk,
         xfs_daddr_t     tail_blk)
  {
+       struct xfs_mount *mp = log->l_mp;
         int             error;
         xfs_buf_t       *bp;
         xfs_sb_t        *sbp;
@@ -4971,7 +4978,7 @@ xlog_do_recover(
         /*
          * If IO errors happened during recovery, bail out.
          */
-       if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
+       if (XFS_FORCED_SHUTDOWN(mp)) {
                 return -EIO;
         }
  
@@ -4984,22 +4991,21 @@ xlog_do_recover(
          * or iunlinks they will have some entries in the AIL; so we look at
          * the AIL to determine how to set the tail_lsn.
          */
-       xlog_assign_tail_lsn(log->l_mp);
+       xlog_assign_tail_lsn(mp);
  
         /*
          * Now that we've finished replaying all buffer and inode
          * updates, re-read in the superblock and reverify it.
          */
-       bp = xfs_getsb(log->l_mp, 0);
-       XFS_BUF_UNDONE(bp);
-       ASSERT(!(XFS_BUF_ISWRITE(bp)));
-       XFS_BUF_READ(bp);
-       XFS_BUF_UNASYNC(bp);
+       bp = xfs_getsb(mp, 0);
+       bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
+       ASSERT(!(bp->b_flags & XBF_WRITE));
+       bp->b_flags |= XBF_READ;
         bp->b_ops = &xfs_sb_buf_ops;
  
         error = xfs_buf_submit_wait(bp);
         if (error) {
-               if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
+               if (!XFS_FORCED_SHUTDOWN(mp)) {
                         xfs_buf_ioerror_alert(bp, __func__);
                         ASSERT(0);
                 }
@@ -5008,14 +5014,17 @@ xlog_do_recover(
         }
  
         /* Convert superblock from on-disk format */
-       sbp = &log->l_mp->m_sb;
+       sbp = &mp->m_sb;
         xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
-       ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
-       ASSERT(xfs_sb_good_version(sbp));
-       xfs_reinit_percpu_counters(log->l_mp);
-
         xfs_buf_relse(bp);
  
+       /* re-initialise in-core superblock and geometry structures */
+       xfs_reinit_percpu_counters(mp);
+       error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
+       if (error) {
+               xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
+               return error;
+       }
  
         xlog_recover_check_summary(log);
  
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index bb753b359bee188b13023caf4597582b402cd31d..536a0ee9cd5af1fbbc89960a597eb7158b676771 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -185,9 +185,6 @@ xfs_initialize_perag(
         xfs_agnumber_t  index;
         xfs_agnumber_t  first_initialised = 0;
         xfs_perag_t     *pag;
-       xfs_agino_t     agino;
-       xfs_ino_t       ino;
-       xfs_sb_t        *sbp = &mp->m_sb;
         int             error = -ENOMEM;
  
         /*
@@ -230,22 +227,7 @@ xfs_initialize_perag(
                 radix_tree_preload_end();
         }
  
-       /*
-        * If we mount with the inode64 option, or no inode overflows
-        * the legacy 32-bit address space clear the inode32 option.
-        */
-       agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
-       ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
-
-       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
-               mp->m_flags |= XFS_MOUNT_32BITINODES;
-       else
-               mp->m_flags &= ~XFS_MOUNT_32BITINODES;
-
-       if (mp->m_flags & XFS_MOUNT_32BITINODES)
-               index = xfs_set_inode32(mp, agcount);
-       else
-               index = xfs_set_inode64(mp, agcount);
+       index = xfs_set_inode_alloc(mp, agcount);
  
         if (maxagi)
                 *maxagi = index;
@@ -865,7 +847,7 @@ xfs_mountfs(
  
         ASSERT(rip != NULL);
  
-       if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
+       if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
                 xfs_warn(mp, "corrupted root inode %llu: not a directory",
                         (unsigned long long)rip->i_ino);
                 xfs_iunlock(rip, XFS_ILOCK_EXCL);
@@ -1284,7 +1266,7 @@ xfs_getsb(
         }
  
         xfs_buf_hold(bp);
-       ASSERT(XFS_BUF_ISDONE(bp));
+       ASSERT(bp->b_flags & XBF_DONE);
         return bp;
  }
  
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index b57098481c10a2a55a05bf6e75e6e43f7e224401..bac6b3435591b6725ea2b4a6115fd7bf89db8c15 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -147,6 +147,17 @@ typedef struct xfs_mount {
          * to various other kinds of pain inflicted on the pNFS server.
          */
         __uint32_t              m_generation;
+
+#ifdef DEBUG
+       /*
+        * DEBUG mode instrumentation to test and/or trigger delayed allocation
+        * block killing in the event of failed writes. When enabled, all
+        * buffered writes are forced to fail. All delalloc blocks in the range
+        * of the write (including pre-existing delalloc blocks!) are tossed as
+        * part of the write failure error handling sequence.
+        */
+       bool                    m_fail_writes;
+#endif
  } xfs_mount_t;
  
  /*
@@ -166,9 +177,8 @@ typedef struct xfs_mount {
  #define XFS_MOUNT_GRPID                (1ULL << 9)     /* group-ID assigned from directory */
  #define XFS_MOUNT_NORECOVERY   (1ULL << 10)    /* no recovery - dirty fs */
  #define XFS_MOUNT_DFLT_IOSIZE  (1ULL << 12)    /* set default i/o size */
-#define XFS_MOUNT_32BITINODES  (1ULL << 14)    /* do not create inodes above
-                                                * 32 bits in size */
-#define XFS_MOUNT_SMALL_INUMS  (1ULL << 15)    /* users wants 32bit inodes */
+#define XFS_MOUNT_SMALL_INUMS  (1ULL << 14)    /* user wants 32bit inodes */
+#define XFS_MOUNT_32BITINODES  (1ULL << 15)    /* inode32 allocator active */
  #define XFS_MOUNT_NOUUID       (1ULL << 16)    /* ignore uuid during mount */
  #define XFS_MOUNT_BARRIER      (1ULL << 17)
  #define XFS_MOUNT_IKEEP                (1ULL << 18)    /* keep empty inode clusters*/
@@ -264,6 +274,20 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
         return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
  }
  
+#ifdef DEBUG
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+       return mp->m_fail_writes;
+}
+#else
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+       return 0;
+}
+#endif
+
  /*
   * Per-ag incore structure, copies of information in agf and agi, to improve the
   * performance of allocation group selection.
@@ -327,7 +351,6 @@ extern int  xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
                                  bool reserved);
  extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
  
-extern int     xfs_mount_log_sb(xfs_mount_t *);
  extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
  extern int     xfs_readsb(xfs_mount_t *, int);
  extern void    xfs_freesb(xfs_mount_t *);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h

new file mode 100644 (file)

index 0000000..184c44e
--- /dev/null
+++ b/fs/xfs/xfs_ondisk.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_ONDISK_H
+#define __XFS_ONDISK_H
+
+#define XFS_CHECK_STRUCT_SIZE(structname, size) \
+       BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
+               #structname ") is wrong, expected " #size)
+
+static inline void __init
+xfs_check_ondisk_structs(void)
+{
+       /* ag/file structures */
+       XFS_CHECK_STRUCT_SIZE(struct xfs_acl,                   4);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry,             12);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_agf,                   224);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_agfl,                  36);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_agi,                   336);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key,              8);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec,              16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block,            4);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block,           72);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,                176);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot,            104);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk,                 136);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dsb,                   264);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr,          56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key,             4);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec,             16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp,             8);
+       XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t,                  4);
+       XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t,                  4);
+
+       /* dir/attr trees */
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr,        80);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock,       88);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr,         56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo,           56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode,           64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_da3_node_hdr,          64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_blk_hdr,          48);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_data_hdr,         64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free,             64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr,         64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf,             64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr,         64);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t,            8);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t,              32);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t,              4);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t,       4);
+
+       /*
+        * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
+        * 4 bytes anyway so it's not obviously a problem.  Hence for the moment
+        * we don't check this structure. This can be re-instated when the attr
+        * definitions are updated to use c99 VLA definitions.
+        *
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t,      12);
+        */
+
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t,             40);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_shortform_t,             8);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t,                 12);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t,                 16);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t,              8);
+       XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t,                16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t,             4);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t,              16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_unused_t,           6);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t,              16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t,                  16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino4_t,                  4);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino8_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_inou_t,                  8);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t,            8);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t,              16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t,                  16);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t,             4);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t,              3);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t,                10);
+       XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_off_t,                2);
+
+       /* log structures */
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat,          24);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32,     28);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64,     32);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32,     28);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64,     32);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32,             12);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64,             16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode,            176);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log,           28);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp,           8);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32,   52);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64,   56);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat,        20);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header,          16);
+}
+
+#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index 532ab79d38fe376c14a5463a97195b59a61d8f84..be125e1758c1a5e4df36cfb8ec6e3e3643adc534 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -560,6 +560,37 @@ xfs_qm_shrink_count(
         return list_lru_shrink_count(&qi->qi_lru, sc);
  }
  
+STATIC void
+xfs_qm_set_defquota(
+       xfs_mount_t     *mp,
+       uint            type,
+       xfs_quotainfo_t *qinf)
+{
+       xfs_dquot_t             *dqp;
+       struct xfs_def_quota    *defq;
+       int                     error;
+
+       error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
+
+       if (!error) {
+               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
+
+               defq = xfs_get_defquota(dqp, qinf);
+
+               /*
+                * Timers and warnings have been already set, let's just set the
+                * default limits for this quota type
+                */
+               defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+               defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+               defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+               defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+               defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+               defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+               xfs_qm_dqdestroy(dqp);
+       }
+}
+
  /*
   * This initializes all the quota information that's kept in the
   * mount structure
@@ -606,19 +637,19 @@ xfs_qm_init_quotainfo(
          * We try to get the limits from the superuser's limits fields.
          * This is quite hacky, but it is standard quota practice.
          *
-        * We look at the USR dquot with id == 0 first, but if user quotas
-        * are not enabled we goto the GRP dquot with id == 0.
-        * We don't really care to keep separate default limits for user
-        * and group quotas, at least not at this point.
-        *
          * Since we may not have done a quotacheck by this point, just read
          * the dquot without attaching it to any hashtables or lists.
+        *
+        * Timers and warnings are globally set by the first timer found in
+        * user/group/proj quota types, otherwise a default value is used.
+        * This should be split into different fields per quota type.
          */
         error = xfs_qm_dqread(mp, 0,
                         XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
                          (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
                           XFS_DQ_PROJ),
                         XFS_QMOPT_DOWARN, &dqp);
+
         if (!error) {
                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
  
@@ -639,13 +670,6 @@ xfs_qm_init_quotainfo(
                         be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
                 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
                         be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-
                 xfs_qm_dqdestroy(dqp);
         } else {
                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -656,6 +680,13 @@ xfs_qm_init_quotainfo(
                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
         }
  
+       if (XFS_IS_UQUOTA_RUNNING(mp))
+               xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
+       if (XFS_IS_GQUOTA_RUNNING(mp))
+               xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf);
+       if (XFS_IS_PQUOTA_RUNNING(mp))
+               xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf);
+
         qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
         qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
         qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h

index 996a04064894cf4c07ffa28d058150243ffd86a4..2975a822e9f044cbb1ff66ec217d29ab29bf9f22 100644 (file)
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -53,6 +53,15 @@ extern struct kmem_zone      *xfs_qm_dqtrxzone;
   */
  #define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
  
+struct xfs_def_quota {
+       xfs_qcnt_t       bhardlimit;     /* default data blk hard limit */
+       xfs_qcnt_t       bsoftlimit;     /* default data blk soft limit */
+       xfs_qcnt_t       ihardlimit;     /* default inode count hard limit */
+       xfs_qcnt_t       isoftlimit;     /* default inode count soft limit */
+       xfs_qcnt_t       rtbhardlimit;   /* default realtime blk hard limit */
+       xfs_qcnt_t       rtbsoftlimit;   /* default realtime blk soft limit */
+};
+
  /*
   * Various quota information for individual filesystems.
   * The mount structure keeps a pointer to this.
@@ -76,12 +85,9 @@ typedef struct xfs_quotainfo {
         struct mutex     qi_quotaofflock;/* to serialize quotaoff */
         xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
         uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
-       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
-       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
-       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
-       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
-       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
-       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
+       struct xfs_def_quota    qi_usr_default;
+       struct xfs_def_quota    qi_grp_default;
+       struct xfs_def_quota    qi_prj_default;
         struct shrinker  qi_shrinker;
  } xfs_quotainfo_t;
  
@@ -104,15 +110,15 @@ xfs_dquot_tree(
  }
  
  static inline struct xfs_inode *
-xfs_dq_to_quota_inode(struct xfs_dquot *dqp)
+xfs_quota_inode(xfs_mount_t *mp, uint dq_flags)
  {
-       switch (dqp->dq_flags & XFS_DQ_ALLTYPES) {
+       switch (dq_flags & XFS_DQ_ALLTYPES) {
         case XFS_DQ_USER:
-               return dqp->q_mount->m_quotainfo->qi_uquotaip;
+               return mp->m_quotainfo->qi_uquotaip;
         case XFS_DQ_GROUP:
-               return dqp->q_mount->m_quotainfo->qi_gquotaip;
+               return mp->m_quotainfo->qi_gquotaip;
         case XFS_DQ_PROJ:
-               return dqp->q_mount->m_quotainfo->qi_pquotaip;
+               return mp->m_quotainfo->qi_pquotaip;
         default:
                 ASSERT(0);
         }
@@ -164,11 +170,27 @@ extern void               xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
  
  /* quota ops */
  extern int             xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
-extern int             xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
-                                       uint, struct qc_dqblk *);
+extern int             xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *,
+                                       uint, struct qc_dqblk *, uint);
  extern int             xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
                                         struct qc_dqblk *);
  extern int             xfs_qm_scall_quotaon(struct xfs_mount *, uint);
  extern int             xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
  
+static inline struct xfs_def_quota *
+xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi)
+{
+       struct xfs_def_quota *defq;
+
+       if (XFS_QM_ISUDQ(dqp))
+               defq = &qi->qi_usr_default;
+       else if (XFS_QM_ISGDQ(dqp))
+               defq = &qi->qi_grp_default;
+       else {
+               ASSERT(XFS_QM_ISPDQ(dqp));
+               defq = &qi->qi_prj_default;
+       }
+       return defq;
+}
+
  #endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c

index 3640c6e896af70eb2e910a31786cb7ac2298f847..f4d0e0a8f517c65913b8d45f383450384576b39e 100644 (file)
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -404,6 +404,7 @@ xfs_qm_scall_setqlim(
         struct xfs_disk_dquot   *ddq;
         struct xfs_dquot        *dqp;
         struct xfs_trans        *tp;
+       struct xfs_def_quota    *defq;
         int                     error;
         xfs_qcnt_t              hard, soft;
  
@@ -431,6 +432,8 @@ xfs_qm_scall_setqlim(
                 ASSERT(error != -ENOENT);
                 goto out_unlock;
         }
+
+       defq = xfs_get_defquota(dqp, q);
         xfs_dqunlock(dqp);
  
         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
@@ -458,8 +461,8 @@ xfs_qm_scall_setqlim(
                 ddq->d_blk_softlimit = cpu_to_be64(soft);
                 xfs_dquot_set_prealloc_limits(dqp);
                 if (id == 0) {
-                       q->qi_bhardlimit = hard;
-                       q->qi_bsoftlimit = soft;
+                       defq->bhardlimit = hard;
+                       defq->bsoftlimit = soft;
                 }
         } else {
                 xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
@@ -474,8 +477,8 @@ xfs_qm_scall_setqlim(
                 ddq->d_rtb_hardlimit = cpu_to_be64(hard);
                 ddq->d_rtb_softlimit = cpu_to_be64(soft);
                 if (id == 0) {
-                       q->qi_rtbhardlimit = hard;
-                       q->qi_rtbsoftlimit = soft;
+                       defq->rtbhardlimit = hard;
+                       defq->rtbsoftlimit = soft;
                 }
         } else {
                 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
@@ -491,8 +494,8 @@ xfs_qm_scall_setqlim(
                 ddq->d_ino_hardlimit = cpu_to_be64(hard);
                 ddq->d_ino_softlimit = cpu_to_be64(soft);
                 if (id == 0) {
-                       q->qi_ihardlimit = hard;
-                       q->qi_isoftlimit = soft;
+                       defq->ihardlimit = hard;
+                       defq->isoftlimit = soft;
                 }
         } else {
                 xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);
@@ -635,9 +638,10 @@ out:
  int
  xfs_qm_scall_getquota(
         struct xfs_mount        *mp,
-       xfs_dqid_t              id,
+       xfs_dqid_t              *id,
         uint                    type,
-       struct qc_dqblk         *dst)
+       struct qc_dqblk         *dst,
+       uint                    dqget_flags)
  {
         struct xfs_dquot        *dqp;
         int                     error;
@@ -647,7 +651,7 @@ xfs_qm_scall_getquota(
          * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
          * exist, we'll get ENOENT back.
          */
-       error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
+       error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp);
         if (error)
                 return error;
  
@@ -660,6 +664,9 @@ xfs_qm_scall_getquota(
                 goto out_put;
         }
  
+       /* Fill in the ID we actually read from disk */
+       *id = be32_to_cpu(dqp->q_core.d_id);
+
         memset(dst, 0, sizeof(*dst));
         dst->d_spc_hardlimit =
                 XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
@@ -701,7 +708,7 @@ xfs_qm_scall_getquota(
         if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
              (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
              (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
-           id != 0) {
+           *id != 0) {
                 if ((dst->d_space > dst->d_spc_softlimit) &&
                     (dst->d_spc_softlimit > 0)) {
                         ASSERT(dst->d_spc_timer != 0);
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c

index 7795e0d01382a60798b4e83f35ba6db725fe8779..f82d79a8c694a8f32427b8d0e2923dbde1670d88 100644 (file)
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -231,14 +231,45 @@ xfs_fs_get_dqblk(
         struct qc_dqblk         *qdq)
  {
         struct xfs_mount        *mp = XFS_M(sb);
+       xfs_dqid_t              id;
  
         if (!XFS_IS_QUOTA_RUNNING(mp))
                 return -ENOSYS;
         if (!XFS_IS_QUOTA_ON(mp))
                 return -ESRCH;
  
-       return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
-                                     xfs_quota_type(qid.type), qdq);
+       id = from_kqid(&init_user_ns, qid);
+       return xfs_qm_scall_getquota(mp, &id,
+                                     xfs_quota_type(qid.type), qdq, 0);
+}
+
+/* Return quota info for active quota >= this qid */
+STATIC int
+xfs_fs_get_nextdqblk(
+       struct super_block      *sb,
+       struct kqid             *qid,
+       struct qc_dqblk         *qdq)
+{
+       int                     ret;
+       struct xfs_mount        *mp = XFS_M(sb);
+       xfs_dqid_t              id;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       id = from_kqid(&init_user_ns, *qid);
+       ret = xfs_qm_scall_getquota(mp, &id,
+                                   xfs_quota_type(qid->type), qdq,
+                                   XFS_QMOPT_DQNEXT);
+       if (ret)
+               return ret;
+
+       /* ID may be different, so convert back what we got */
+       *qid = make_kqid(current_user_ns(), qid->type, id);
+       return 0;
+       
  }
  
  STATIC int
@@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = {
         .quota_disable          = xfs_quota_disable,
         .rm_xquota              = xfs_fs_rm_xquota,
         .get_dqblk              = xfs_fs_get_dqblk,
+       .get_nextdqblk          = xfs_fs_get_nextdqblk,
         .set_dqblk              = xfs_fs_set_dqblk,
  };
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index be02a68b2fe292e077c84862f93271dd049c3359..abf44435d04a3f4b898e21a00e45ee8ae607738a 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1272,7 +1272,7 @@ xfs_rtpick_extent(
  
         ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
  
-       seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime;
+       seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
         if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
                 mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
                 *seqp = 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 59c9b7bd958d6a034d792d3c777042173b3b817e..d760934109b5d628891ea3e91fa1a9e3f68ec36b 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -45,6 +45,7 @@
  #include "xfs_filestream.h"
  #include "xfs_quota.h"
  #include "xfs_sysfs.h"
+#include "xfs_ondisk.h"
  
  #include <linux/namei.h>
  #include <linux/init.h>
@@ -65,83 +66,85 @@ static struct kset *xfs_kset;               /* top-level xfs sysfs dir */
  static struct xfs_kobj xfs_dbg_kobj;   /* global debug sysfs attrs */
  #endif
  
-#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
-#define MNTOPT_LOGDEV  "logdev"        /* log device */
-#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
-#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
-#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
-#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
-#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
-#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
-#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
-#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
-#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
-                                        * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
-#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
-#define MNTOPT_32BITINODE   "inode32"  /* inode allocation limited to
-                                        * XFS_MAXINUMBER_32 */
-#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
-#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
-                                        * in stat(). */
-#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
-#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
-#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
-#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
-#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
-#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
-#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
-
-#define MNTOPT_DAX     "dax"           /* Enable direct access to bdev pages */
-
  /*
   * Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
   */
  enum {
-       Opt_barrier,
-       Opt_nobarrier,
-       Opt_inode64,
-       Opt_inode32,
-       Opt_err
+       Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
+       Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
+       Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
+       Opt_allocsize, Opt_norecovery, Opt_barrier, Opt_nobarrier,
+       Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep,
+       Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams,
+       Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota,
+       Opt_uquota, Opt_gquota, Opt_pquota,
+       Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
+       Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
  };
  
  static const match_table_t tokens = {
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_inode64, "inode64"},
-       {Opt_inode32, "inode32"},
-       {Opt_err, NULL}
+       {Opt_logbufs,   "logbufs=%u"},  /* number of XFS log buffers */
+       {Opt_logbsize,  "logbsize=%s"}, /* size of XFS log buffers */
+       {Opt_logdev,    "logdev=%s"},   /* log device */
+       {Opt_rtdev,     "rtdev=%s"},    /* realtime I/O device */
+       {Opt_biosize,   "biosize=%u"},  /* log2 of preferred buffered io size */
+       {Opt_wsync,     "wsync"},       /* safe-mode nfs compatible mount */
+       {Opt_noalign,   "noalign"},     /* turn off stripe alignment */
+       {Opt_swalloc,   "swalloc"},     /* turn on stripe width allocation */
+       {Opt_sunit,     "sunit=%u"},    /* data volume stripe unit */
+       {Opt_swidth,    "swidth=%u"},   /* data volume stripe width */
+       {Opt_nouuid,    "nouuid"},      /* ignore filesystem UUID */
+       {Opt_mtpt,      "mtpt"},        /* filesystem mount point */
+       {Opt_grpid,     "grpid"},       /* group-ID from parent directory */
+       {Opt_nogrpid,   "nogrpid"},     /* group-ID from current process */
+       {Opt_bsdgroups, "bsdgroups"},   /* group-ID from parent directory */
+       {Opt_sysvgroups,"sysvgroups"},  /* group-ID from current process */
+       {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
+       {Opt_norecovery,"norecovery"},  /* don't run XFS recovery */
+       {Opt_barrier,   "barrier"},     /* use writer barriers for log write and
+                                        * unwritten extent conversion */
+       {Opt_nobarrier, "nobarrier"},   /* .. disable */
+       {Opt_inode64,   "inode64"},     /* inodes can be allocated anywhere */
+       {Opt_inode32,   "inode32"},     /* inode allocation limited to
+                                        * XFS_MAXINUMBER_32 */
+       {Opt_ikeep,     "ikeep"},       /* do not free empty inode clusters */
+       {Opt_noikeep,   "noikeep"},     /* free empty inode clusters */
+       {Opt_largeio,   "largeio"},     /* report large I/O sizes in stat() */
+       {Opt_nolargeio, "nolargeio"},   /* do not report large I/O sizes
+                                        * in stat(). */
+       {Opt_attr2,     "attr2"},       /* do use attr2 attribute format */
+       {Opt_noattr2,   "noattr2"},     /* do not use attr2 attribute format */
+       {Opt_filestreams,"filestreams"},/* use filestreams allocator */
+       {Opt_quota,     "quota"},       /* disk quotas (user) */
+       {Opt_noquota,   "noquota"},     /* no quotas */
+       {Opt_usrquota,  "usrquota"},    /* user quota enabled */
+       {Opt_grpquota,  "grpquota"},    /* group quota enabled */
+       {Opt_prjquota,  "prjquota"},    /* project quota enabled */
+       {Opt_uquota,    "uquota"},      /* user quota (IRIX variant) */
+       {Opt_gquota,    "gquota"},      /* group quota (IRIX variant) */
+       {Opt_pquota,    "pquota"},      /* project quota (IRIX variant) */
+       {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
+       {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
+       {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
+       {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
+       {Opt_discard,   "discard"},     /* Discard unused blocks */
+       {Opt_nodiscard, "nodiscard"},   /* Do not discard unused blocks */
+
+       {Opt_dax,       "dax"},         /* Enable direct access to bdev pages */
+       {Opt_err,       NULL},
  };
  
  
  STATIC int
-suffix_kstrtoint(char *s, unsigned int base, int *res)
+suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
  {
         int     last, shift_left_factor = 0, _res;
-       char    *value = s;
+       char    *value;
+       int     ret = 0;
+
+       value = match_strdup(s);
+       if (!value)
+               return -ENOMEM;
  
         last = strlen(value) - 1;
         if (value[last] == 'K' || value[last] == 'k') {
@@ -157,10 +160,11 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
                 value[last] = '\0';
         }
  
-       if (kstrtoint(s, base, &_res))
-               return -EINVAL;
+       if (kstrtoint(value, base, &_res))
+               ret = -EINVAL;
+       kfree(value);
         *res = _res << shift_left_factor;
-       return 0;
+       return ret;
  }
  
  /*
@@ -169,14 +173,19 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
   *
   * Note that this function leaks the various device name allocations on
   * failure.  The caller takes care of them.
+ *
+ * *sb is const because this is also used to test options on the remount
+ * path, and we don't want this to have any side effects at remount time.
+ * Today this function does not change *sb, but just to future-proof...
   */
  STATIC int
  xfs_parseargs(
         struct xfs_mount        *mp,
         char                    *options)
  {
-       struct super_block      *sb = mp->m_super;
-       char                    *this_char, *value;
+       const struct super_block *sb = mp->m_super;
+       char                    *p;
+       substring_t             args[MAX_OPT_ARGS];
         int                     dsunit = 0;
         int                     dswidth = 0;
         int                     iosize = 0;
@@ -217,152 +226,152 @@ xfs_parseargs(
         if (!options)
                 goto done;
  
-       while ((this_char = strsep(&options, ",")) != NULL) {
-               if (!*this_char)
+       while ((p = strsep(&options, ",")) != NULL) {
+               int             token;
+
+               if (!*p)
                         continue;
-               if ((value = strchr(this_char, '=')) != NULL)
-                       *value++ = 0;
  
-               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (kstrtoint(value, 10, &mp->m_logbufs))
-                               return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_logbufs:
+                       if (match_int(args, &mp->m_logbufs))
                                 return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
+                       break;
+               case Opt_logbsize:
+                       if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
                                 return -EINVAL;
-                       }
-                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       break;
+               case Opt_logdev:
+                       mp->m_logname = match_strdup(args);
                         if (!mp->m_logname)
                                 return -ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
+                       break;
+               case Opt_mtpt:
+                       xfs_warn(mp, "%s option not allowed on this system", p);
                         return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+               case Opt_rtdev:
+                       mp->m_rtname = match_strdup(args);
                         if (!mp->m_rtname)
                                 return -ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE) ||
-                          !strcmp(this_char, MNTOPT_BIOSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (suffix_kstrtoint(value, 10, &iosize))
+                       break;
+               case Opt_allocsize:
+               case Opt_biosize:
+                       if (suffix_kstrtoint(args, 10, &iosize))
                                 return -EINVAL;
                         iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
-                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+                       break;
+               case Opt_grpid:
+               case Opt_bsdgroups:
                         mp->m_flags |= XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
-                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+                       break;
+               case Opt_nogrpid:
+               case Opt_sysvgroups:
                         mp->m_flags &= ~XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+                       break;
+               case Opt_wsync:
                         mp->m_flags |= XFS_MOUNT_WSYNC;
-               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+                       break;
+               case Opt_norecovery:
                         mp->m_flags |= XFS_MOUNT_NORECOVERY;
-               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+                       break;
+               case Opt_noalign:
                         mp->m_flags |= XFS_MOUNT_NOALIGN;
-               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+                       break;
+               case Opt_swalloc:
                         mp->m_flags |= XFS_MOUNT_SWALLOC;
-               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (kstrtoint(value, 10, &dsunit))
-                               return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
+                       break;
+               case Opt_sunit:
+                       if (match_int(args, &dsunit))
                                 return -EINVAL;
-                       }
-                       if (kstrtoint(value, 10, &dswidth))
+                       break;
+               case Opt_swidth:
+                       if (match_int(args, &dswidth))
                                 return -EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
+                       break;
+               case Opt_inode32:
                         mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+                       break;
+               case Opt_inode64:
                         mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+                       break;
+               case Opt_nouuid:
                         mp->m_flags |= XFS_MOUNT_NOUUID;
-               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+                       break;
+               case Opt_barrier:
                         mp->m_flags |= XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                       break;
+               case Opt_nobarrier:
                         mp->m_flags &= ~XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+                       break;
+               case Opt_ikeep:
                         mp->m_flags |= XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+                       break;
+               case Opt_noikeep:
                         mp->m_flags &= ~XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+                       break;
+               case Opt_largeio:
                         mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+                       break;
+               case Opt_nolargeio:
                         mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+                       break;
+               case Opt_attr2:
                         mp->m_flags |= XFS_MOUNT_ATTR2;
-               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+                       break;
+               case Opt_noattr2:
                         mp->m_flags &= ~XFS_MOUNT_ATTR2;
                         mp->m_flags |= XFS_MOUNT_NOATTR2;
-               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+                       break;
+               case Opt_filestreams:
                         mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+                       break;
+               case Opt_noquota:
                         mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
                         mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
                         mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
-               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
-                          !strcmp(this_char, MNTOPT_UQUOTA) ||
-                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
+                       break;
+               case Opt_quota:
+               case Opt_uquota:
+               case Opt_usrquota:
                         mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
                                          XFS_UQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
-                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+                       break;
+               case Opt_qnoenforce:
+               case Opt_uqnoenforce:
                         mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
                         mp->m_qflags &= ~XFS_UQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
-                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+                       break;
+               case Opt_pquota:
+               case Opt_prjquota:
                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
                                          XFS_PQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+                       break;
+               case Opt_pqnoenforce:
                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
                         mp->m_qflags &= ~XFS_PQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
-                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+               case Opt_gquota:
+               case Opt_grpquota:
                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
                                          XFS_GQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+                       break;
+               case Opt_gqnoenforce:
                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
                         mp->m_qflags &= ~XFS_GQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+                       break;
+               case Opt_discard:
                         mp->m_flags |= XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+                       break;
+               case Opt_nodiscard:
                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
+                       break;
  #ifdef CONFIG_FS_DAX
-               } else if (!strcmp(this_char, MNTOPT_DAX)) {
+               case Opt_dax:
                         mp->m_flags |= XFS_MOUNT_DAX;
+                       break;
  #endif
-               } else {
-                       xfs_warn(mp, "unknown mount option [%s].", this_char);
+               default:
+                       xfs_warn(mp, "unknown mount option [%s].", p);
                         return -EINVAL;
                 }
         }
@@ -461,25 +470,25 @@ xfs_showargs(
  {
         static struct proc_xfs_info xfs_info_set[] = {
                 /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
-               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
-               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
-               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
-               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
-               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
-               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
-               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
-               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
-               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
-               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_32BITINODE },
-               { XFS_MOUNT_DAX,                "," MNTOPT_DAX },
+               { XFS_MOUNT_IKEEP,              ",ikeep" },
+               { XFS_MOUNT_WSYNC,              ",wsync" },
+               { XFS_MOUNT_NOALIGN,            ",noalign" },
+               { XFS_MOUNT_SWALLOC,            ",swalloc" },
+               { XFS_MOUNT_NOUUID,             ",nouuid" },
+               { XFS_MOUNT_NORECOVERY,         ",norecovery" },
+               { XFS_MOUNT_ATTR2,              ",attr2" },
+               { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
+               { XFS_MOUNT_GRPID,              ",grpid" },
+               { XFS_MOUNT_DISCARD,            ",discard" },
+               { XFS_MOUNT_SMALL_INUMS,        ",inode32" },
+               { XFS_MOUNT_DAX,                ",dax" },
                 { 0, NULL }
         };
         static struct proc_xfs_info xfs_info_unset[] = {
                 /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
-               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
-               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
+               { XFS_MOUNT_COMPAT_IOSIZE,      ",largeio" },
+               { XFS_MOUNT_BARRIER,            ",nobarrier" },
+               { XFS_MOUNT_SMALL_INUMS,        ",inode64" },
                 { 0, NULL }
         };
         struct proc_xfs_info    *xfs_infop;
@@ -494,46 +503,46 @@ xfs_showargs(
         }
  
         if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+               seq_printf(m, ",allocsize=%dk",
                                 (int)(1 << mp->m_writeio_log) >> 10);
  
         if (mp->m_logbufs > 0)
-               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+               seq_printf(m, ",logbufs=%d", mp->m_logbufs);
         if (mp->m_logbsize > 0)
-               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+               seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
  
         if (mp->m_logname)
-               seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname);
+               seq_show_option(m, "logdev", mp->m_logname);
         if (mp->m_rtname)
-               seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname);
+               seq_show_option(m, "rtdev", mp->m_rtname);
  
         if (mp->m_dalign > 0)
-               seq_printf(m, "," MNTOPT_SUNIT "=%d",
+               seq_printf(m, ",sunit=%d",
                                 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
         if (mp->m_swidth > 0)
-               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+               seq_printf(m, ",swidth=%d",
                                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
  
         if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_USRQUOTA);
+               seq_puts(m, ",usrquota");
         else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_UQUOTANOENF);
+               seq_puts(m, ",uqnoenforce");
  
         if (mp->m_qflags & XFS_PQUOTA_ACCT) {
                 if (mp->m_qflags & XFS_PQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_PRJQUOTA);
+                       seq_puts(m, ",prjquota");
                 else
-                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
+                       seq_puts(m, ",pqnoenforce");
         }
         if (mp->m_qflags & XFS_GQUOTA_ACCT) {
                 if (mp->m_qflags & XFS_GQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_GRPQUOTA);
+                       seq_puts(m, ",grpquota");
                 else
-                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
+                       seq_puts(m, ",gqnoenforce");
         }
  
         if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
-               seq_puts(m, "," MNTOPT_NOQUOTA);
+               seq_puts(m, ",noquota");
  
         return 0;
  }
@@ -572,23 +581,35 @@ xfs_max_file_offset(
  }
  
  /*
- * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
- * because in the growfs case, mp->m_sb.sb_agcount is not updated
- * yet to the potentially higher ag count.
+ * Set parameters for inode allocation heuristics, taking into account
+ * filesystem size and inode32/inode64 mount options; i.e. specifically
+ * whether or not XFS_MOUNT_SMALL_INUMS is set.
+ *
+ * Inode allocation patterns are altered only if inode32 is requested
+ * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
+ * If altered, XFS_MOUNT_32BITINODES is set as well.
+ *
+ * An agcount independent of that in the mount structure is provided
+ * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
+ * to the potentially higher ag count.
+ *
+ * Returns the maximum AG index which may contain inodes.
   */
  xfs_agnumber_t
-xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
+xfs_set_inode_alloc(
+       struct xfs_mount *mp,
+       xfs_agnumber_t  agcount)
  {
-       xfs_agnumber_t  index = 0;
+       xfs_agnumber_t  index;
         xfs_agnumber_t  maxagi = 0;
         xfs_sb_t        *sbp = &mp->m_sb;
         xfs_agnumber_t  max_metadata;
         xfs_agino_t     agino;
         xfs_ino_t       ino;
-       xfs_perag_t     *pag;
  
-       /* Calculate how much should be reserved for inodes to meet
-        * the max inode percentage.
+       /*
+        * Calculate how much should be reserved for inodes to meet
+        * the max inode percentage.  Used only for inode32.
          */
         if (mp->m_maxicount) {
                 __uint64_t      icount;
@@ -602,54 +623,48 @@ xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
                 max_metadata = agcount;
         }
  
+       /* Get the last possible inode in the filesystem */
         agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+       ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+
+       /*
+        * If user asked for no more than 32-bit inodes, and the fs is
+        * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+        * the allocator to accommodate the request.
+        */
+       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
+               mp->m_flags |= XFS_MOUNT_32BITINODES;
+       else
+               mp->m_flags &= ~XFS_MOUNT_32BITINODES;
  
         for (index = 0; index < agcount; index++) {
-               ino = XFS_AGINO_TO_INO(mp, index, agino);
+               struct xfs_perag        *pag;
  
-               if (ino > XFS_MAXINUMBER_32) {
-                       pag = xfs_perag_get(mp, index);
-                       pag->pagi_inodeok = 0;
-                       pag->pagf_metadata = 0;
-                       xfs_perag_put(pag);
-                       continue;
-               }
+               ino = XFS_AGINO_TO_INO(mp, index, agino);
  
                 pag = xfs_perag_get(mp, index);
-               pag->pagi_inodeok = 1;
-               maxagi++;
-               if (index < max_metadata)
-                       pag->pagf_metadata = 1;
-               xfs_perag_put(pag);
-       }
-       mp->m_flags |= (XFS_MOUNT_32BITINODES |
-                       XFS_MOUNT_SMALL_INUMS);
  
-       return maxagi;
-}
-
-xfs_agnumber_t
-xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
-{
-       xfs_agnumber_t index = 0;
-
-       for (index = 0; index < agcount; index++) {
-               struct xfs_perag        *pag;
+               if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+                       if (ino > XFS_MAXINUMBER_32) {
+                               pag->pagi_inodeok = 0;
+                               pag->pagf_metadata = 0;
+                       } else {
+                               pag->pagi_inodeok = 1;
+                               maxagi++;
+                               if (index < max_metadata)
+                                       pag->pagf_metadata = 1;
+                               else
+                                       pag->pagf_metadata = 0;
+                       }
+               } else {
+                       pag->pagi_inodeok = 1;
+                       pag->pagf_metadata = 0;
+               }
  
-               pag = xfs_perag_get(mp, index);
-               pag->pagi_inodeok = 1;
-               pag->pagf_metadata = 0;
                 xfs_perag_put(pag);
         }
  
-       /* There is no need for lock protection on m_flags,
-        * the rw_semaphore of the VFS superblock is locked
-        * during mount/umount/remount operations, so this is
-        * enough to avoid concurency on the m_flags field
-        */
-       mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
-                        XFS_MOUNT_SMALL_INUMS);
-       return index;
+       return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
  }
  
  STATIC int
@@ -1165,6 +1180,27 @@ xfs_quiesce_attr(
         xfs_log_quiesce(mp);
  }
  
+STATIC int
+xfs_test_remount_options(
+       struct super_block      *sb,
+       struct xfs_mount        *mp,
+       char                    *options)
+{
+       int                     error = 0;
+       struct xfs_mount        *tmp_mp;
+
+       tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
+       if (!tmp_mp)
+               return -ENOMEM;
+
+       tmp_mp->m_super = sb;
+       error = xfs_parseargs(tmp_mp, options);
+       xfs_free_fsname(tmp_mp);
+       kfree(tmp_mp);
+
+       return error;
+}
+
  STATIC int
  xfs_fs_remount(
         struct super_block      *sb,
@@ -1177,6 +1213,11 @@ xfs_fs_remount(
         char                    *p;
         int                     error;
  
+       /* First, check for complete junk; i.e. invalid options */
+       error = xfs_test_remount_options(sb, mp, options);
+       if (error)
+               return error;
+
         sync_filesystem(sb);
         while ((p = strsep(&options, ",")) != NULL) {
                 int token;
@@ -1193,10 +1234,12 @@ xfs_fs_remount(
                         mp->m_flags &= ~XFS_MOUNT_BARRIER;
                         break;
                 case Opt_inode64:
-                       mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
+                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+                       mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
                         break;
                 case Opt_inode32:
-                       mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
+                       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+                       mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
                         break;
                 default:
                         /*
@@ -1344,9 +1387,8 @@ xfs_finish_flags(
          */
         if (xfs_sb_version_hascrc(&mp->m_sb) &&
             (mp->m_flags & XFS_MOUNT_NOATTR2)) {
-               xfs_warn(mp,
-"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
-                       MNTOPT_NOATTR2, MNTOPT_ATTR2);
+               xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
+                            "attr2 is always enabled for V5 filesystems.");
                 return -EINVAL;
         }
  
@@ -1817,6 +1859,8 @@ init_xfs_fs(void)
  {
         int                     error;
  
+       xfs_check_ondisk_structs();
+
         printk(KERN_INFO XFS_VERSION_STRING " with "
                          XFS_BUILD_OPTIONS " enabled\n");
  
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h

index 499058fea303a81fa3d77f15123a83a2bf6f28b8..2dfb1ce4585f2feee7546483021f3d7593fc29fa 100644 (file)
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -65,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
  
  extern void xfs_flush_inodes(struct xfs_mount *mp);
  extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount);
-extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount);
+extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
+                                          xfs_agnumber_t agcount);
  
  extern const struct export_operations xfs_export_operations;
  extern const struct xattr_handler *xfs_xattr_handlers[];
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c

index 641d625eb334c3230191dce12ecacad4e41d6e4b..6ced4f1434948d3757077c732a93d6fac88ea0cc 100644 (file)
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -18,10 +18,13 @@
  
  #include "xfs.h"
  #include "xfs_sysfs.h"
+#include "xfs_format.h"
  #include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
  #include "xfs_log.h"
  #include "xfs_log_priv.h"
  #include "xfs_stats.h"
+#include "xfs_mount.h"
  
  struct xfs_sysfs_attr {
         struct attribute attr;
@@ -45,16 +48,6 @@ to_attr(struct attribute *attr)
  
  #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
  
-/*
- * xfs_mount kobject. This currently has no attributes and thus no need for show
- * and store helpers. The mp kobject serves as the per-mount parent object that
- * is identified by the fsname under sysfs.
- */
-
-struct kobj_type xfs_mp_ktype = {
-       .release = xfs_sysfs_release,
-};
-
  STATIC ssize_t
  xfs_sysfs_object_show(
         struct kobject          *kobject,
@@ -83,6 +76,71 @@ static const struct sysfs_ops xfs_sysfs_ops = {
         .store = xfs_sysfs_object_store,
  };
  
+/*
+ * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
+ * that is identified by the fsname under sysfs.
+ */
+
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+       struct xfs_kobj *kobj = to_kobj(kobject);
+
+       return container_of(kobj, struct xfs_mount, m_kobj);
+}
+
+#ifdef DEBUG
+
+STATIC ssize_t
+fail_writes_store(
+       struct kobject          *kobject,
+       const char              *buf,
+       size_t                  count)
+{
+       struct xfs_mount        *mp = to_mp(kobject);
+       int                     ret;
+       int                     val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val == 1)
+               mp->m_fail_writes = true;
+       else if (val == 0)
+               mp->m_fail_writes = false;
+       else
+               return -EINVAL;
+
+       return count;
+}
+
+STATIC ssize_t
+fail_writes_show(
+       struct kobject          *kobject,
+       char                    *buf)
+{
+       struct xfs_mount        *mp = to_mp(kobject);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
+}
+XFS_SYSFS_ATTR_RW(fail_writes);
+
+#endif /* DEBUG */
+
+static struct attribute *xfs_mp_attrs[] = {
+#ifdef DEBUG
+       ATTR_LIST(fail_writes),
+#endif
+       NULL,
+};
+
+struct kobj_type xfs_mp_ktype = {
+       .release = xfs_sysfs_release,
+       .sysfs_ops = &xfs_sysfs_ops,
+       .default_attrs = xfs_mp_attrs,
+};
+
  #ifdef DEBUG
  /* debug */
  
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 391d797cb53fee0a9196d3e392dfd8fa95d5d33a..c8d58426008ed7ef49096097904ed13653a8cfe9 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
  DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
  DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
  DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
  
  DECLARE_EVENT_CLASS(xfs_simple_io_class,
         TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
  DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
  DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
  DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
  
  DECLARE_EVENT_CLASS(xfs_itrunc_class,
         TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index 748b16aff45a1cf959603b24131a69e289d99a95..20c53666cb4b3272400bc7111d285f104e555d75 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1028,6 +1028,8 @@ __xfs_trans_roll(
         struct xfs_trans_res    tres;
         int                     error;
  
+       *committed = 0;
+
         /*
          * Ensure that the inode is always logged.
          */
@@ -1082,6 +1084,6 @@ xfs_trans_roll(
         struct xfs_trans        **tpp,
         struct xfs_inode        *dp)
  {
-       int                     committed = 0;
+       int                     committed;
         return __xfs_trans_roll(tpp, dp, &committed);
  }
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index 4643070d7cae4b814a36b101ba0a88fcd0c6287e..e7c49cf43fbc85c183e1728966d4f4506b94eaf6 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -133,7 +133,6 @@ typedef struct xfs_trans {
   * XFS transaction mechanism exported interfaces that are
   * actually macros.
   */
-#define        xfs_trans_get_block_res(tp)     ((tp)->t_blk_res)
  #define        xfs_trans_set_sync(tp)          ((tp)->t_flags |= XFS_TRANS_SYNC)
  
  #if defined(DEBUG) || defined(XFS_WARN)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index 4f18fd92ca13b21d8fd68e955e082d9db9a61195..d6c9c3e9e02b2c45f2cd57074cbbcdebde1e804a 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -497,6 +497,7 @@ xfsaild(
         long            tout = 0;       /* milliseconds */
  
         current->flags |= PF_MEMALLOC;
+       set_freezable();
  
         while (!kthread_should_stop()) {
                 if (tout && tout <= 20)
@@ -519,14 +520,14 @@ xfsaild(
                 if (!xfs_ail_min(ailp) &&
                     ailp->xa_target == ailp->xa_target_prev) {
                         spin_unlock(&ailp->xa_lock);
-                       schedule();
+                       freezable_schedule();
                         tout = 0;
                         continue;
                 }
                 spin_unlock(&ailp->xa_lock);
  
                 if (tout)
-                       schedule_timeout(msecs_to_jiffies(tout));
+                       freezable_schedule_timeout(msecs_to_jiffies(tout));
  
                 __set_current_state(TASK_RUNNING);
  
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 75798412859a7ba2f47b01945c54b7ee82ff4e7e..8ee29ca132dc13c0f302fa470cfaffc788cb9938 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -155,7 +155,7 @@ xfs_trans_get_buf_map(
                 ASSERT(xfs_buf_islocked(bp));
                 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
                         xfs_buf_stale(bp);
-                       XFS_BUF_DONE(bp);
+                       bp->b_flags |= XBF_DONE;
                 }
  
                 ASSERT(bp->b_transp == tp);
@@ -518,7 +518,7 @@ xfs_trans_log_buf(xfs_trans_t       *tp,
          * inside the b_bdstrat callback so that this won't get written to
          * disk.
          */
-       XFS_BUF_DONE(bp);
+       bp->b_flags |= XBF_DONE;
  
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
         bp->b_iodone = xfs_buf_iodone_callbacks;
@@ -534,8 +534,8 @@ xfs_trans_log_buf(xfs_trans_t       *tp,
          */
         if (bip->bli_flags & XFS_BLI_STALE) {
                 bip->bli_flags &= ~XFS_BLI_STALE;
-               ASSERT(XFS_BUF_ISSTALE(bp));
-               XFS_BUF_UNSTALE(bp);
+               ASSERT(bp->b_flags & XBF_STALE);
+               bp->b_flags &= ~XBF_STALE;
                 bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
         }
  
@@ -600,7 +600,7 @@ xfs_trans_binval(
                  * If the buffer is already invalidated, then
                  * just return.
                  */
-               ASSERT(XFS_BUF_ISSTALE(bp));
+               ASSERT(bp->b_flags & XBF_STALE);
                 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
                 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
                 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c

index 995170194df040b5b3e02cab80b5942ee92cb2ad..c3d547211d16001ad686c543fd960996b2321c75 100644 (file)
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -609,17 +609,20 @@ xfs_trans_dqresv(
         xfs_qcnt_t      total_count;
         xfs_qcnt_t      *resbcountp;
         xfs_quotainfo_t *q = mp->m_quotainfo;
+       struct xfs_def_quota    *defq;
  
  
         xfs_dqlock(dqp);
  
+       defq = xfs_get_defquota(dqp, q);
+
         if (flags & XFS_TRANS_DQ_RES_BLKS) {
                 hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
                 if (!hardlimit)
-                       hardlimit = q->qi_bhardlimit;
+                       hardlimit = defq->bhardlimit;
                 softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
                 if (!softlimit)
-                       softlimit = q->qi_bsoftlimit;
+                       softlimit = defq->bsoftlimit;
                 timer = be32_to_cpu(dqp->q_core.d_btimer);
                 warns = be16_to_cpu(dqp->q_core.d_bwarns);
                 warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
@@ -628,10 +631,10 @@ xfs_trans_dqresv(
                 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
                 hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
                 if (!hardlimit)
-                       hardlimit = q->qi_rtbhardlimit;
+                       hardlimit = defq->rtbhardlimit;
                 softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
                 if (!softlimit)
-                       softlimit = q->qi_rtbsoftlimit;
+                       softlimit = defq->rtbsoftlimit;
                 timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
                 warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
                 warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
@@ -672,10 +675,10 @@ xfs_trans_dqresv(
                         warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
                         hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
                         if (!hardlimit)
-                               hardlimit = q->qi_ihardlimit;
+                               hardlimit = defq->ihardlimit;
                         softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
                         if (!softlimit)
-                               softlimit = q->qi_isoftlimit;
+                               softlimit = defq->isoftlimit;
  
                         if (hardlimit && total_count > hardlimit) {
                                 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c

index b97f1df910abb0bd60ac5850fbe549237806d078..11a3af08b5c7ea1e40dcd348606414e147e6a0cd 100644 (file)
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -75,18 +75,10 @@ xfs_trans_ichgtime(
  
         tv = current_fs_time(inode->i_sb);
  
-       if ((flags & XFS_ICHGTIME_MOD) &&
-           !timespec_equal(&inode->i_mtime, &tv)) {
+       if (flags & XFS_ICHGTIME_MOD)
                 inode->i_mtime = tv;
-               ip->i_d.di_mtime.t_sec = tv.tv_sec;
-               ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
-       }
-       if ((flags & XFS_ICHGTIME_CHG) &&
-           !timespec_equal(&inode->i_ctime, &tv)) {
+       if (flags & XFS_ICHGTIME_CHG)
                 inode->i_ctime = tv;
-               ip->i_d.di_ctime.t_sec = tv.tv_sec;
-               ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
-       }
  }
  
  /*
@@ -125,7 +117,7 @@ xfs_trans_log_inode(
          */
         if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
             IS_I_VERSION(VFS_I(ip))) {
-               ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
+               VFS_I(ip)->i_version++;
                 flags |= XFS_ILOG_CORE;
         }
  
diff --git a/include/linux/fs.h b/include/linux/fs.h

index cc08198358d4ee4cd03d68d8f681a76bb96f4521..35d99266ca9a392b695f39dc5fc1f7c03cc7bf80 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -72,7 +72,7 @@ extern int sysctl_protected_hardlinks;
  struct buffer_head;
  typedef int (get_block_t)(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create);
-typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
+typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
                         ssize_t bytes, void *private);
  typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
  
diff --git a/include/linux/quota.h b/include/linux/quota.h

index b2505acfd3c078c70e733f7d9e826cfc4b6c9524..fba92f5c1a631a7c43552a357a51d8d3eb896a01 100644 (file)
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -425,6 +425,8 @@ struct quotactl_ops {
         int (*quota_sync)(struct super_block *, int);
         int (*set_info)(struct super_block *, int, struct qc_info *);
         int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
+       int (*get_nextdqblk)(struct super_block *, struct kqid *,
+                            struct qc_dqblk *);
         int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
         int (*get_state)(struct super_block *, struct qc_state *);
         int (*rm_xquota)(struct super_block *, unsigned int);
diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h

index dcd75cc261962f65c909a6efa0defe3f1dcdc281..11b3b31faf1483a46122a67e93b823182b768cb9 100644 (file)
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -39,6 +39,7 @@
  #define Q_XQUOTARM     XQM_CMD(6)      /* free disk space used by dquots */
  #define Q_XQUOTASYNC   XQM_CMD(7)      /* delalloc flush, updates dquots */
  #define Q_XGETQSTATV   XQM_CMD(8)      /* newer version of get quota */
+#define Q_XGETNEXTQUOTA        XQM_CMD(9)      /* get disk limits and usage >= ID */
  
  /*
   * fs_disk_quota structure:
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h

index 9c95b2c1c88a6ef0a6bb4207cd5122011d3007f9..38baddb807f503f5f526d93377df677ab80c5ad1 100644 (file)
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -71,6 +71,7 @@
  #define Q_SETINFO  0x800006    /* set information about quota files */
  #define Q_GETQUOTA 0x800007    /* get user quota structure */
  #define Q_SETQUOTA 0x800008    /* set user quota structure */
+#define Q_GETNEXTQUOTA 0x800009        /* get disk limits and usage >= ID */
  
  /* Quota format type IDs */
  #define        QFMT_VFS_OLD 1
@@ -119,6 +120,19 @@ struct if_dqblk {
         __u32 dqb_valid;
  };
  
+struct if_nextdqblk {
+       __u64 dqb_bhardlimit;
+       __u64 dqb_bsoftlimit;
+       __u64 dqb_curspace;
+       __u64 dqb_ihardlimit;
+       __u64 dqb_isoftlimit;
+       __u64 dqb_curinodes;
+       __u64 dqb_btime;
+       __u64 dqb_itime;
+       __u32 dqb_valid;
+       __u32 dqb_id;
+};
+
  /*
   * Structure used for setting quota information about file via quotactl
   * Following flags are used to specify which fields are valid
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 21 Mar 2016 18:53:05 +0000 (11:53 -0700)
fs/dax.c		patch \| blob \| blame \| history
fs/direct-io.c		patch \| blob \| blame \| history
fs/ext4/ext4.h		patch \| blob \| blame \| history
fs/ext4/inode.c		patch \| blob \| blame \| history
fs/ext4/page-io.c		patch \| blob \| blame \| history
fs/ocfs2/aops.c		patch \| blob \| blame \| history
fs/quota/quota.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_alloc_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_attr_sf.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_bmap_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_da_format.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_dir2.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_dir2_node.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_ialloc_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_inode_buf.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_inode_buf.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_inode_fork.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_log_format.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_quota_defs.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_rtbitmap.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_sb.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_shared.h		patch \| blob \| blame \| history
fs/xfs/xfs_aops.c		patch \| blob \| blame \| history
fs/xfs/xfs_aops.h		patch \| blob \| blame \| history
fs/xfs/xfs_attr_list.c		patch \| blob \| blame \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf.h		patch \| blob \| blame \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| blame \| history
fs/xfs/xfs_dir2_readdir.c		patch \| blob \| blame \| history
fs/xfs/xfs_discard.c		patch \| blob \| blame \| history
fs/xfs/xfs_dquot.c		patch \| blob \| blame \| history
fs/xfs/xfs_export.c		patch \| blob \| blame \| history
fs/xfs/xfs_file.c		patch \| blob \| blame \| history
fs/xfs/xfs_filestream.c		patch \| blob \| blame \| history
fs/xfs/xfs_fsops.h		patch \| blob \| blame \| history
fs/xfs/xfs_icache.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.h		patch \| blob \| blame \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| blame \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| blame \| history
fs/xfs/xfs_iops.c		patch \| blob \| blame \| history
fs/xfs/xfs_itable.c		patch \| blob \| blame \| history
fs/xfs/xfs_log.c		patch \| blob \| blame \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| blame \| history
fs/xfs/xfs_mount.c		patch \| blob \| blame \| history
fs/xfs/xfs_mount.h		patch \| blob \| blame \| history
fs/xfs/xfs_ondisk.h	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_qm.c		patch \| blob \| blame \| history
fs/xfs/xfs_qm.h		patch \| blob \| blame \| history
fs/xfs/xfs_qm_syscalls.c		patch \| blob \| blame \| history
fs/xfs/xfs_quotaops.c		patch \| blob \| blame \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| blame \| history
fs/xfs/xfs_super.c		patch \| blob \| blame \| history
fs/xfs/xfs_super.h		patch \| blob \| blame \| history
fs/xfs/xfs_sysfs.c		patch \| blob \| blame \| history
fs/xfs/xfs_trace.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans_ail.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans_dquot.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans_inode.c		patch \| blob \| blame \| history
include/linux/fs.h		patch \| blob \| blame \| history
include/linux/quota.h		patch \| blob \| blame \| history
include/uapi/linux/dqblk_xfs.h		patch \| blob \| blame \| history
include/uapi/linux/quota.h		patch \| blob \| blame \| history