Merge tag 'xfs-4.20-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 24 Oct 2018 16:36:12 +0000 (17:36 +0100)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 24 Oct 2018 16:36:12 +0000 (17:36 +0100)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Oct 2018 16:36:12 +0000 (17:36 +0100)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Oct 2018 16:36:12 +0000 (17:36 +0100)
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c

index c6299f82a6e496ac00b1ef27953a5ca5313cb9f8..844ed87b190077115c760204659179bca1da8c43 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -191,6 +191,128 @@ xfs_attr_calc_size(
         return nblks;
  }
  
+STATIC int
+xfs_attr_try_sf_addname(
+       struct xfs_inode        *dp,
+       struct xfs_da_args      *args)
+{
+
+       struct xfs_mount        *mp = dp->i_mount;
+       int                     error, error2;
+
+       error = xfs_attr_shortform_addname(args);
+       if (error == -ENOSPC)
+               return error;
+
+       /*
+        * Commit the shortform mods, and we're done.
+        * NOTE: this is also the error path (EEXIST, etc).
+        */
+       if (!error && (args->flags & ATTR_KERNOTIME) == 0)
+               xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(args->trans);
+
+       error2 = xfs_trans_commit(args->trans);
+       args->trans = NULL;
+       return error ? error : error2;
+}
+
+/*
+ * Set the attribute specified in @args.
+ */
+int
+xfs_attr_set_args(
+       struct xfs_da_args      *args,
+       struct xfs_buf          **leaf_bp)
+{
+       struct xfs_inode        *dp = args->dp;
+       int                     error;
+
+       /*
+        * If the attribute list is non-existent or a shortform list,
+        * upgrade it to a single-leaf-block attribute list.
+        */
+       if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
+           (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+            dp->i_d.di_anextents == 0)) {
+
+               /*
+                * Build initial attribute list (if required).
+                */
+               if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
+                       xfs_attr_shortform_create(args);
+
+               /*
+                * Try to add the attr to the attribute list in the inode.
+                */
+               error = xfs_attr_try_sf_addname(dp, args);
+               if (error != -ENOSPC)
+                       return error;
+
+               /*
+                * It won't fit in the shortform, transform to a leaf block.
+                * GROT: another possible req'mt for a double-split btree op.
+                */
+               error = xfs_attr_shortform_to_leaf(args, leaf_bp);
+               if (error)
+                       return error;
+
+               /*
+                * Prevent the leaf buffer from being unlocked so that a
+                * concurrent AIL push cannot grab the half-baked leaf
+                * buffer and run into problems with the write verifier.
+                */
+               xfs_trans_bhold(args->trans, *leaf_bp);
+
+               error = xfs_defer_finish(&args->trans);
+               if (error)
+                       return error;
+
+               /*
+                * Commit the leaf transformation.  We'll need another
+                * (linked) transaction to add the new attribute to the
+                * leaf.
+                */
+               error = xfs_trans_roll_inode(&args->trans, dp);
+               if (error)
+                       return error;
+               xfs_trans_bjoin(args->trans, *leaf_bp);
+               *leaf_bp = NULL;
+       }
+
+       if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+               error = xfs_attr_leaf_addname(args);
+       else
+               error = xfs_attr_node_addname(args);
+       return error;
+}
+
+/*
+ * Remove the attribute specified in @args.
+ */
+int
+xfs_attr_remove_args(
+       struct xfs_da_args      *args)
+{
+       struct xfs_inode        *dp = args->dp;
+       int                     error;
+
+       if (!xfs_inode_hasattr(dp)) {
+               error = -ENOATTR;
+       } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+               ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
+               error = xfs_attr_shortform_remove(args);
+       } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+               error = xfs_attr_leaf_removename(args);
+       } else {
+               error = xfs_attr_node_removename(args);
+       }
+
+       return error;
+}
+
  int
  xfs_attr_set(
         struct xfs_inode        *dp,
@@ -204,7 +326,7 @@ xfs_attr_set(
         struct xfs_da_args      args;
         struct xfs_trans_res    tres;
         int                     rsvd = (flags & ATTR_ROOT) != 0;
-       int                     error, err2, local;
+       int                     error, local;
  
         XFS_STATS_INC(mp, xs_attr_set);
  
@@ -255,93 +377,17 @@ xfs_attr_set(
         error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
                                 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                                        XFS_QMOPT_RES_REGBLKS);
-       if (error) {
-               xfs_iunlock(dp, XFS_ILOCK_EXCL);
-               xfs_trans_cancel(args.trans);
-               return error;
-       }
+       if (error)
+               goto out_trans_cancel;
  
         xfs_trans_ijoin(args.trans, dp, 0);
-
-       /*
-        * If the attribute list is non-existent or a shortform list,
-        * upgrade it to a single-leaf-block attribute list.
-        */
-       if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
-           (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-            dp->i_d.di_anextents == 0)) {
-
-               /*
-                * Build initial attribute list (if required).
-                */
-               if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
-                       xfs_attr_shortform_create(&args);
-
-               /*
-                * Try to add the attr to the attribute list in
-                * the inode.
-                */
-               error = xfs_attr_shortform_addname(&args);
-               if (error != -ENOSPC) {
-                       /*
-                        * Commit the shortform mods, and we're done.
-                        * NOTE: this is also the error path (EEXIST, etc).
-                        */
-                       ASSERT(args.trans != NULL);
-
-                       /*
-                        * If this is a synchronous mount, make sure that
-                        * the transaction goes to disk before returning
-                        * to the user.
-                        */
-                       if (mp->m_flags & XFS_MOUNT_WSYNC)
-                               xfs_trans_set_sync(args.trans);
-
-                       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                               xfs_trans_ichgtime(args.trans, dp,
-                                                       XFS_ICHGTIME_CHG);
-                       }
-                       err2 = xfs_trans_commit(args.trans);
-                       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-                       return error ? error : err2;
-               }
-
-               /*
-                * It won't fit in the shortform, transform to a leaf block.
-                * GROT: another possible req'mt for a double-split btree op.
-                */
-               error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
-               if (error)
-                       goto out;
-               /*
-                * Prevent the leaf buffer from being unlocked so that a
-                * concurrent AIL push cannot grab the half-baked leaf
-                * buffer and run into problems with the write verifier.
-                */
-               xfs_trans_bhold(args.trans, leaf_bp);
-               error = xfs_defer_finish(&args.trans);
-               if (error)
-                       goto out;
-
-               /*
-                * Commit the leaf transformation.  We'll need another (linked)
-                * transaction to add the new attribute to the leaf, which
-                * means that we have to hold & join the leaf buffer here too.
-                */
-               error = xfs_trans_roll_inode(&args.trans, dp);
-               if (error)
-                       goto out;
-               xfs_trans_bjoin(args.trans, leaf_bp);
-               leaf_bp = NULL;
-       }
-
-       if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
-               error = xfs_attr_leaf_addname(&args);
-       else
-               error = xfs_attr_node_addname(&args);
+       error = xfs_attr_set_args(&args, &leaf_bp);
         if (error)
-               goto out;
+               goto out_release_leaf;
+       if (!args.trans) {
+               /* shortform attribute has already been committed */
+               goto out_unlock;
+       }
  
         /*
          * If this is a synchronous mount, make sure that the
@@ -358,17 +404,17 @@ xfs_attr_set(
          */
         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
         error = xfs_trans_commit(args.trans);
+out_unlock:
         xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
         return error;
  
-out:
+out_release_leaf:
         if (leaf_bp)
                 xfs_trans_brelse(args.trans, leaf_bp);
+out_trans_cancel:
         if (args.trans)
                 xfs_trans_cancel(args.trans);
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-       return error;
+       goto out_unlock;
  }
  
  /*
@@ -423,17 +469,7 @@ xfs_attr_remove(
          */
         xfs_trans_ijoin(args.trans, dp, 0);
  
-       if (!xfs_inode_hasattr(dp)) {
-               error = -ENOATTR;
-       } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-               ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
-               error = xfs_attr_shortform_remove(&args);
-       } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
-               error = xfs_attr_leaf_removename(&args);
-       } else {
-               error = xfs_attr_node_removename(&args);
-       }
-
+       error = xfs_attr_remove_args(&args);
         if (error)
                 goto out;
  
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h

new file mode 100644 (file)

index 0000000..bdf52a3
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_ATTR_H__
+#define        __XFS_ATTR_H__
+
+struct xfs_inode;
+struct xfs_da_args;
+struct xfs_attr_list_context;
+
+/*
+ * Large attribute lists are structured around Btrees where all the data
+ * elements are in the leaf nodes.  Attribute names are hashed into an int,
+ * then that int is used as the index into the Btree.  Since the hashval
+ * of an attribute name may not be unique, we may have duplicate keys.
+ * The internal links in the Btree are logical block offsets into the file.
+ *
+ * Small attribute lists use a different format and are packed as tightly
+ * as possible so as to fit into the literal area of the inode.
+ */
+
+/*========================================================================
+ * External interfaces
+ *========================================================================*/
+
+
+#define ATTR_DONTFOLLOW        0x0001  /* -- unused, from IRIX -- */
+#define ATTR_ROOT      0x0002  /* use attrs in root (trusted) namespace */
+#define ATTR_TRUST     0x0004  /* -- unused, from IRIX -- */
+#define ATTR_SECURE    0x0008  /* use attrs in security namespace */
+#define ATTR_CREATE    0x0010  /* pure create: fail if attr already exists */
+#define ATTR_REPLACE   0x0020  /* pure set: fail if attr does not exist */
+
+#define ATTR_KERNOTIME 0x1000  /* [kernel] don't update inode timestamps */
+#define ATTR_KERNOVAL  0x2000  /* [kernel] get attr size only, not value */
+
+#define ATTR_INCOMPLETE        0x4000  /* [kernel] return INCOMPLETE attr keys */
+
+#define XFS_ATTR_FLAGS \
+       { ATTR_DONTFOLLOW,      "DONTFOLLOW" }, \
+       { ATTR_ROOT,            "ROOT" }, \
+       { ATTR_TRUST,           "TRUST" }, \
+       { ATTR_SECURE,          "SECURE" }, \
+       { ATTR_CREATE,          "CREATE" }, \
+       { ATTR_REPLACE,         "REPLACE" }, \
+       { ATTR_KERNOTIME,       "KERNOTIME" }, \
+       { ATTR_KERNOVAL,        "KERNOVAL" }, \
+       { ATTR_INCOMPLETE,      "INCOMPLETE" }
+
+/*
+ * The maximum size (into the kernel or returned from the kernel) of an
+ * attribute value or the buffer used for an attr_list() call.  Larger
+ * sizes will result in an ERANGE return code.
+ */
+#define        ATTR_MAX_VALUELEN       (64*1024)       /* max length of a value */
+
+/*
+ * Define how lists of attribute names are returned to the user from
+ * the attr_list() call.  A large, 32bit aligned, buffer is passed in
+ * along with its size.  We put an array of offsets at the top that each
+ * reference an attrlist_ent_t and pack the attrlist_ent_t's at the bottom.
+ */
+typedef struct attrlist {
+       __s32   al_count;       /* number of entries in attrlist */
+       __s32   al_more;        /* T/F: more attrs (do call again) */
+       __s32   al_offset[1];   /* byte offsets of attrs [var-sized] */
+} attrlist_t;
+
+/*
+ * Show the interesting info about one attribute.  This is what the
+ * al_offset[i] entry points to.
+ */
+typedef struct attrlist_ent {  /* data from attr_list() */
+       __u32   a_valuelen;     /* number bytes in value of attr */
+       char    a_name[1];      /* attr name (NULL terminated) */
+} attrlist_ent_t;
+
+/*
+ * Given a pointer to the (char*) buffer containing the attr_list() result,
+ * and an index, return a pointer to the indicated attribute in the buffer.
+ */
+#define        ATTR_ENTRY(buffer, index)               \
+       ((attrlist_ent_t *)                     \
+        &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
+
+/*
+ * Kernel-internal version of the attrlist cursor.
+ */
+typedef struct attrlist_cursor_kern {
+       __u32   hashval;        /* hash value of next entry to add */
+       __u32   blkno;          /* block containing entry (suggestion) */
+       __u32   offset;         /* offset in list of equal-hashvals */
+       __u16   pad1;           /* padding to match user-level */
+       __u8    pad2;           /* padding to match user-level */
+       __u8    initted;        /* T/F: cursor has been initialized */
+} attrlist_cursor_kern_t;
+
+
+/*========================================================================
+ * Structure used to pass context around among the routines.
+ *========================================================================*/
+
+
+/* void; state communicated via *context */
+typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
+                             unsigned char *, int, int);
+
+typedef struct xfs_attr_list_context {
+       struct xfs_trans                *tp;
+       struct xfs_inode                *dp;            /* inode */
+       struct attrlist_cursor_kern     *cursor;        /* position in list */
+       char                            *alist;         /* output buffer */
+       int                             seen_enough;    /* T/F: seen enough of list? */
+       ssize_t                         count;          /* num used entries */
+       int                             dupcnt;         /* count dup hashvals seen */
+       int                             bufsize;        /* total buffer size */
+       int                             firstu;         /* first used byte in buffer */
+       int                             flags;          /* from VOP call */
+       int                             resynch;        /* T/F: resynch with cursor */
+       put_listent_func_t              put_listent;    /* list output fmt function */
+       int                             index;          /* index into output buffer */
+} xfs_attr_list_context_t;
+
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Overall external interface routines.
+ */
+int xfs_attr_inactive(struct xfs_inode *dp);
+int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *);
+int xfs_attr_list_int(struct xfs_attr_list_context *);
+int xfs_inode_hasattr(struct xfs_inode *ip);
+int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
+int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
+                unsigned char *value, int *valuelenp, int flags);
+int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
+                unsigned char *value, int valuelen, int flags);
+int xfs_attr_set_args(struct xfs_da_args *args, struct xfs_buf **leaf_bp);
+int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
+int xfs_attr_remove_args(struct xfs_da_args *args);
+int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
+                 int flags, struct attrlist_cursor_kern *cursor);
+
+
+#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index a47670332326449cb97f73850887cea799ea684c..74d7228e755b3ade097457c0ae68e07e252e67a4 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1019,6 +1019,34 @@ xfs_bmap_add_attrfork_local(
         return -EFSCORRUPTED;
  }
  
+/* Set an inode attr fork off based on the format */
+int
+xfs_bmap_set_attrforkoff(
+       struct xfs_inode        *ip,
+       int                     size,
+       int                     *version)
+{
+       switch (ip->i_d.di_format) {
+       case XFS_DINODE_FMT_DEV:
+               ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
+               break;
+       case XFS_DINODE_FMT_LOCAL:
+       case XFS_DINODE_FMT_EXTENTS:
+       case XFS_DINODE_FMT_BTREE:
+               ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
+               if (!ip->i_d.di_forkoff)
+                       ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
+               else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
+                       *version = 2;
+               break;
+       default:
+               ASSERT(0);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
  /*
   * Convert inode from non-attributed to attributed.
   * Must not be in a transaction, ip must not be locked.
@@ -1070,26 +1098,9 @@ xfs_bmap_add_attrfork(
  
         xfs_trans_ijoin(tp, ip, 0);
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       switch (ip->i_d.di_format) {
-       case XFS_DINODE_FMT_DEV:
-               ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
-               break;
-       case XFS_DINODE_FMT_LOCAL:
-       case XFS_DINODE_FMT_EXTENTS:
-       case XFS_DINODE_FMT_BTREE:
-               ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
-               if (!ip->i_d.di_forkoff)
-                       ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
-               else if (mp->m_flags & XFS_MOUNT_ATTR2)
-                       version = 2;
-               break;
-       default:
-               ASSERT(0);
-               error = -EINVAL;
+       error = xfs_bmap_set_attrforkoff(ip, size, &version);
+       if (error)
                 goto trans_cancel;
-       }
-
         ASSERT(ip->i_afp == NULL);
         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
         ip->i_afp->if_flags = XFS_IFEXTENTS;
@@ -4081,8 +4092,7 @@ xfs_bmapi_allocate(
          * extents to real extents when we're about to write the data.
          */
         if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
-           (bma->flags & XFS_BMAPI_PREALLOC) &&
-           xfs_sb_version_hasextflgbit(&mp->m_sb))
+           (bma->flags & XFS_BMAPI_PREALLOC))
                 bma->got.br_state = XFS_EXT_UNWRITTEN;
  
         if (bma->wasdel)
@@ -5245,8 +5255,7 @@ __xfs_bunmapi(
                          * unmapping part of it.  But we can't really
                          * get rid of part of a realtime extent.
                          */
-                       if (del.br_state == XFS_EXT_UNWRITTEN ||
-                           !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+                       if (del.br_state == XFS_EXT_UNWRITTEN) {
                                 /*
                                  * This piece is unwritten, or we're not
                                  * using unwritten extents.  Skip over it.
@@ -5296,10 +5305,9 @@ __xfs_bunmapi(
                                 del.br_blockcount -= mod;
                                 del.br_startoff += mod;
                                 del.br_startblock += mod;
-                       } else if ((del.br_startoff == start &&
-                                   (del.br_state == XFS_EXT_UNWRITTEN ||
-                                    tp->t_blk_res == 0)) ||
-                                  !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+                       } else if (del.br_startoff == start &&
+                                  (del.br_state == XFS_EXT_UNWRITTEN ||
+                                   tp->t_blk_res == 0)) {
                                 /*
                                  * Can't make it unwritten.  There isn't
                                  * a full extent here so just skip it.
@@ -6114,11 +6122,7 @@ xfs_bmap_validate_extent(
                     XFS_FSB_TO_AGNO(mp, endfsb))
                         return __this_address;
         }
-       if (irec->br_state != XFS_EXT_NORM) {
-               if (whichfork != XFS_DATA_FORK)
-                       return __this_address;
-               if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
-                       return __this_address;
-       }
+       if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
+               return __this_address;
         return NULL;
  }
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h

index b6e9b639e731a1fafd1116b9decb22872dc765af..488dc8860fd7c551b02e21eeae3462b96e268eb8 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -183,6 +183,7 @@ void        xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
                 xfs_filblks_t len);
  void   xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
  int    xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+int    xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
  void   xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
  void   __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
                 xfs_filblks_t len, struct xfs_owner_info *oinfo,
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h

index afbe336600e165e2fe475ebcf6683e3f677c1cc4..9995d5ae380b9cb0df10274a7824f12b069ddb95 100644 (file)
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -287,6 +287,8 @@ static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)
  {
         if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
                 return false;
+       if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
+               return false;
  
         /* check for unknown features in the fs */
         if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
@@ -357,12 +359,6 @@ static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)
                (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
  }
  
-static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp)
-{
-       return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
-              (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
-}
-
  static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)
  {
         return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index 081f46e30556637326ef480ccb976b8cb8d0926c..b5a82acd7dfe01d9225c345bbd15740fb4995e83 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1115,7 +1115,8 @@ xfs_fs_geometry(
  
         geo->version = XFS_FSOP_GEOM_VERSION;
         geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
-                    XFS_FSOP_GEOM_FLAGS_DIRV2;
+                    XFS_FSOP_GEOM_FLAGS_DIRV2 |
+                    XFS_FSOP_GEOM_FLAGS_EXTFLG;
         if (xfs_sb_version_hasattr(sbp))
                 geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
         if (xfs_sb_version_hasquota(sbp))
@@ -1124,8 +1125,6 @@ xfs_fs_geometry(
                 geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN;
         if (xfs_sb_version_hasdalign(sbp))
                 geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN;
-       if (xfs_sb_version_hasextflgbit(sbp))
-               geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG;
         if (xfs_sb_version_hassector(sbp))
                 geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
         if (xfs_sb_version_hasasciici(sbp))
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c

index 9f08dd9bf1d553934bf2aa2f8a108ab8435bc19c..4fc0a5ea76733df273a4c3dcd4e184a40f9768f5 100644 (file)
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -29,6 +29,8 @@
  #include "xfs_ag_resv.h"
  #include "xfs_trans_space.h"
  #include "xfs_quota.h"
+#include "xfs_attr.h"
+#include "xfs_reflink.h"
  #include "scrub/xfs_scrub.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
@@ -692,13 +694,14 @@ xrep_findroot_block(
         struct xrep_find_ag_btree       *fab,
         uint64_t                        owner,
         xfs_agblock_t                   agbno,
-       bool                            *found_it)
+       bool                            *done_with_block)
  {
         struct xfs_mount                *mp = ri->sc->mp;
         struct xfs_buf                  *bp;
         struct xfs_btree_block          *btblock;
         xfs_daddr_t                     daddr;
-       int                             error;
+       int                             block_level;
+       int                             error = 0;
  
         daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
  
@@ -717,36 +720,111 @@ xrep_findroot_block(
                         return error;
         }
  
+       /*
+        * Read the buffer into memory so that we can see if it's a match for
+        * our btree type.  We have no clue if it is beforehand, and we want to
+        * avoid xfs_trans_read_buf's behavior of dumping the DONE state (which
+        * will cause needless disk reads in subsequent calls to this function)
+        * and logging metadata verifier failures.
+        *
+        * Therefore, pass in NULL buffer ops.  If the buffer was already in
+        * memory from some other caller it will already have b_ops assigned.
+        * If it was in memory from a previous unsuccessful findroot_block
+        * call, the buffer won't have b_ops but it should be clean and ready
+        * for us to try to verify if the read call succeeds.  The same applies
+        * if the buffer wasn't in memory at all.
+        *
+        * Note: If we never match a btree type with this buffer, it will be
+        * left in memory with NULL b_ops.  This shouldn't be a problem unless
+        * the buffer gets written.
+        */
         error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
                         mp->m_bsize, 0, &bp, NULL);
         if (error)
                 return error;
  
-       /*
-        * Does this look like a block matching our fs and higher than any
-        * other block we've found so far?  If so, reattach buffer verifiers
-        * so the AIL won't complain if the buffer is also dirty.
-        */
+       /* Ensure the block magic matches the btree type we're looking for. */
         btblock = XFS_BUF_TO_BLOCK(bp);
         if (be32_to_cpu(btblock->bb_magic) != fab->magic)
                 goto out;
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
-               goto out;
-       bp->b_ops = fab->buf_ops;
  
-       /* Ignore this block if it's lower in the tree than we've seen. */
-       if (fab->root != NULLAGBLOCK &&
-           xfs_btree_get_level(btblock) < fab->height)
-               goto out;
+       /*
+        * If the buffer already has ops applied and they're not the ones for
+        * this btree type, we know this block doesn't match the btree and we
+        * can bail out.
+        *
+        * If the buffer ops match ours, someone else has already validated
+        * the block for us, so we can move on to checking if this is a root
+        * block candidate.
+        *
+        * If the buffer does not have ops, nobody has successfully validated
+        * the contents and the buffer cannot be dirty.  If the magic, uuid,
+        * and structure match this btree type then we'll move on to checking
+        * if it's a root block candidate.  If there is no match, bail out.
+        */
+       if (bp->b_ops) {
+               if (bp->b_ops != fab->buf_ops)
+                       goto out;
+       } else {
+               ASSERT(!xfs_trans_buf_is_dirty(bp));
+               if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
+                               &mp->m_sb.sb_meta_uuid))
+                       goto out;
+               fab->buf_ops->verify_read(bp);
+               if (bp->b_error) {
+                       bp->b_error = 0;
+                       goto out;
+               }
  
-       /* Make sure we pass the verifiers. */
-       bp->b_ops->verify_read(bp);
-       if (bp->b_error)
+               /*
+                * Some read verifiers will (re)set b_ops, so we must be
+                * careful not to blow away any such assignment.
+                */
+               if (!bp->b_ops)
+                       bp->b_ops = fab->buf_ops;
+       }
+
+       /*
+        * This block passes the magic/uuid and verifier tests for this btree
+        * type.  We don't need the caller to try the other tree types.
+        */
+       *done_with_block = true;
+
+       /*
+        * Compare this btree block's level to the height of the current
+        * candidate root block.
+        *
+        * If the level matches the root we found previously, throw away both
+        * blocks because there can't be two candidate roots.
+        *
+        * If level is lower in the tree than the root we found previously,
+        * ignore this block.
+        */
+       block_level = xfs_btree_get_level(btblock);
+       if (block_level + 1 == fab->height) {
+               fab->root = NULLAGBLOCK;
                 goto out;
-       fab->root = agbno;
-       fab->height = xfs_btree_get_level(btblock) + 1;
-       *found_it = true;
+       } else if (block_level < fab->height) {
+               goto out;
+       }
+
+       /*
+        * This is the highest block in the tree that we've found so far.
+        * Update the btree height to reflect what we've learned from this
+        * block.
+        */
+       fab->height = block_level + 1;
+
+       /*
+        * If this block doesn't have sibling pointers, then it's the new root
+        * block candidate.  Otherwise, the root will be found farther up the
+        * tree.
+        */
+       if (btblock->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) &&
+           btblock->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
+               fab->root = agbno;
+       else
+               fab->root = NULLAGBLOCK;
  
         trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
                         be32_to_cpu(btblock->bb_magic), fab->height - 1);
@@ -768,7 +846,7 @@ xrep_findroot_rmap(
         struct xrep_findroot            *ri = priv;
         struct xrep_find_ag_btree       *fab;
         xfs_agblock_t                   b;
-       bool                            found_it;
+       bool                            done;
         int                             error = 0;
  
         /* Ignore anything that isn't AG metadata. */
@@ -777,16 +855,16 @@ xrep_findroot_rmap(
  
         /* Otherwise scan each block + btree type. */
         for (b = 0; b < rec->rm_blockcount; b++) {
-               found_it = false;
+               done = false;
                 for (fab = ri->btree_info; fab->buf_ops; fab++) {
                         if (rec->rm_owner != fab->rmap_owner)
                                 continue;
                         error = xrep_findroot_block(ri, fab,
                                         rec->rm_owner, rec->rm_startblock + b,
-                                       &found_it);
+                                       &done);
                         if (error)
                                 return error;
-                       if (found_it)
+                       if (done)
                                 break;
                 }
         }
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c

index 4bfae1e61d30ee60b956ec7d3495fbbc8e0856b8..1b2344d0052549d588c61192fc332330f1d42350 100644 (file)
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -412,19 +412,6 @@ xchk_validate_inputs(
                 goto out;
         }
  
-       error = -EOPNOTSUPP;
-       /*
-        * We won't scrub any filesystem that doesn't have the ability
-        * to record unwritten extents.  The option was made default in
-        * 2003, removed from mkfs in 2007, and cannot be disabled in
-        * v5, so if we find a filesystem without this flag it's either
-        * really old or totally unsupported.  Avoid it either way.
-        * We also don't support v1-v3 filesystems, which aren't
-        * mountable.
-        */
-       if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
-               goto out;
-
         /*
          * We only want to repair read-write v5+ filesystems.  Defer the check
          * for ops->repair until after our scrub confirms that we need to
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 49f5f5896a43532d76768f5df6a03a693a25e6fb..338b9d9984e04a62d790ae72638017c9ac3329d5 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -917,7 +917,7 @@ xfs_vm_writepage(
         struct writeback_control *wbc)
  {
         struct xfs_writepage_ctx wpc = {
-               .io_type = XFS_IO_INVALID,
+               .io_type = XFS_IO_HOLE,
         };
         int                     ret;
  
@@ -933,7 +933,7 @@ xfs_vm_writepages(
         struct writeback_control *wbc)
  {
         struct xfs_writepage_ctx wpc = {
-               .io_type = XFS_IO_INVALID,
+               .io_type = XFS_IO_HOLE,
         };
         int                     ret;
  
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h

index 9af867951a1077a2fdab44badb8bd743ef75b2b7..494b4338446ef2ccc519b5cc0bc1a13eeb6dbd4d 100644 (file)
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -12,21 +12,19 @@ extern struct bio_set xfs_ioend_bioset;
   * Types of I/O for bmap clustering and I/O completion tracking.
   */
  enum {
-       XFS_IO_INVALID,         /* initial state */
+       XFS_IO_HOLE,            /* covers region without any block allocation */
         XFS_IO_DELALLOC,        /* covers delalloc region */
         XFS_IO_UNWRITTEN,       /* covers allocated but uninitialized data */
         XFS_IO_OVERWRITE,       /* covers already allocated extent */
         XFS_IO_COW,             /* covers copy-on-write extent */
-       XFS_IO_HOLE,            /* covers region without any block allocation */
  };
  
  #define XFS_IO_TYPES \
-       { XFS_IO_INVALID,               "invalid" }, \
-       { XFS_IO_DELALLOC,              "delalloc" }, \
-       { XFS_IO_UNWRITTEN,             "unwritten" }, \
-       { XFS_IO_OVERWRITE,             "overwrite" }, \
-       { XFS_IO_COW,                   "CoW" }, \
-       { XFS_IO_HOLE,                  "hole" }
+       { XFS_IO_HOLE,                  "hole" },       \
+       { XFS_IO_DELALLOC,              "delalloc" },   \
+       { XFS_IO_UNWRITTEN,             "unwritten" },  \
+       { XFS_IO_OVERWRITE,             "overwrite" },  \
+       { XFS_IO_COW,                   "CoW" }
  
  /*
   * Structure for buffered I/O completions.
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h

deleted file mode 100644 (file)

index 033ff8c..0000000
--- a/fs/xfs/xfs_attr.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#ifndef __XFS_ATTR_H__
-#define        __XFS_ATTR_H__
-
-struct xfs_inode;
-struct xfs_da_args;
-struct xfs_attr_list_context;
-
-/*
- * Large attribute lists are structured around Btrees where all the data
- * elements are in the leaf nodes.  Attribute names are hashed into an int,
- * then that int is used as the index into the Btree.  Since the hashval
- * of an attribute name may not be unique, we may have duplicate keys.
- * The internal links in the Btree are logical block offsets into the file.
- *
- * Small attribute lists use a different format and are packed as tightly
- * as possible so as to fit into the literal area of the inode.
- */
-
-/*========================================================================
- * External interfaces
- *========================================================================*/
-
-
-#define ATTR_DONTFOLLOW        0x0001  /* -- unused, from IRIX -- */
-#define ATTR_ROOT      0x0002  /* use attrs in root (trusted) namespace */
-#define ATTR_TRUST     0x0004  /* -- unused, from IRIX -- */
-#define ATTR_SECURE    0x0008  /* use attrs in security namespace */
-#define ATTR_CREATE    0x0010  /* pure create: fail if attr already exists */
-#define ATTR_REPLACE   0x0020  /* pure set: fail if attr does not exist */
-
-#define ATTR_KERNOTIME 0x1000  /* [kernel] don't update inode timestamps */
-#define ATTR_KERNOVAL  0x2000  /* [kernel] get attr size only, not value */
-
-#define ATTR_INCOMPLETE        0x4000  /* [kernel] return INCOMPLETE attr keys */
-
-#define XFS_ATTR_FLAGS \
-       { ATTR_DONTFOLLOW,      "DONTFOLLOW" }, \
-       { ATTR_ROOT,            "ROOT" }, \
-       { ATTR_TRUST,           "TRUST" }, \
-       { ATTR_SECURE,          "SECURE" }, \
-       { ATTR_CREATE,          "CREATE" }, \
-       { ATTR_REPLACE,         "REPLACE" }, \
-       { ATTR_KERNOTIME,       "KERNOTIME" }, \
-       { ATTR_KERNOVAL,        "KERNOVAL" }, \
-       { ATTR_INCOMPLETE,      "INCOMPLETE" }
-
-/*
- * The maximum size (into the kernel or returned from the kernel) of an
- * attribute value or the buffer used for an attr_list() call.  Larger
- * sizes will result in an ERANGE return code.
- */
-#define        ATTR_MAX_VALUELEN       (64*1024)       /* max length of a value */
-
-/*
- * Define how lists of attribute names are returned to the user from
- * the attr_list() call.  A large, 32bit aligned, buffer is passed in
- * along with its size.  We put an array of offsets at the top that each
- * reference an attrlist_ent_t and pack the attrlist_ent_t's at the bottom.
- */
-typedef struct attrlist {
-       __s32   al_count;       /* number of entries in attrlist */
-       __s32   al_more;        /* T/F: more attrs (do call again) */
-       __s32   al_offset[1];   /* byte offsets of attrs [var-sized] */
-} attrlist_t;
-
-/*
- * Show the interesting info about one attribute.  This is what the
- * al_offset[i] entry points to.
- */
-typedef struct attrlist_ent {  /* data from attr_list() */
-       __u32   a_valuelen;     /* number bytes in value of attr */
-       char    a_name[1];      /* attr name (NULL terminated) */
-} attrlist_ent_t;
-
-/*
- * Given a pointer to the (char*) buffer containing the attr_list() result,
- * and an index, return a pointer to the indicated attribute in the buffer.
- */
-#define        ATTR_ENTRY(buffer, index)               \
-       ((attrlist_ent_t *)                     \
-        &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
-
-/*
- * Kernel-internal version of the attrlist cursor.
- */
-typedef struct attrlist_cursor_kern {
-       __u32   hashval;        /* hash value of next entry to add */
-       __u32   blkno;          /* block containing entry (suggestion) */
-       __u32   offset;         /* offset in list of equal-hashvals */
-       __u16   pad1;           /* padding to match user-level */
-       __u8    pad2;           /* padding to match user-level */
-       __u8    initted;        /* T/F: cursor has been initialized */
-} attrlist_cursor_kern_t;
-
-
-/*========================================================================
- * Structure used to pass context around among the routines.
- *========================================================================*/
-
-
-/* void; state communicated via *context */
-typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
-                             unsigned char *, int, int);
-
-typedef struct xfs_attr_list_context {
-       struct xfs_trans                *tp;
-       struct xfs_inode                *dp;            /* inode */
-       struct attrlist_cursor_kern     *cursor;        /* position in list */
-       char                            *alist;         /* output buffer */
-       int                             seen_enough;    /* T/F: seen enough of list? */
-       ssize_t                         count;          /* num used entries */
-       int                             dupcnt;         /* count dup hashvals seen */
-       int                             bufsize;        /* total buffer size */
-       int                             firstu;         /* first used byte in buffer */
-       int                             flags;          /* from VOP call */
-       int                             resynch;        /* T/F: resynch with cursor */
-       put_listent_func_t              put_listent;    /* list output fmt function */
-       int                             index;          /* index into output buffer */
-} xfs_attr_list_context_t;
-
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-/*
- * Overall external interface routines.
- */
-int xfs_attr_inactive(struct xfs_inode *dp);
-int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *);
-int xfs_attr_list_int(struct xfs_attr_list_context *);
-int xfs_inode_hasattr(struct xfs_inode *ip);
-int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
-int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
-                unsigned char *value, int *valuelenp, int flags);
-int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
-                unsigned char *value, int valuelen, int flags);
-int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
-int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
-                 int flags, struct attrlist_cursor_kern *cursor);
-
-
-#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 6de8d90041ff0e676e85e559c1b2eef65dd74946..5d263dfdb3bcc60ca30708622397e42de9fbaeac 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -406,10 +406,10 @@ xfs_getbmap_report_one(
         struct xfs_bmbt_irec    *got)
  {
         struct kgetbmap         *p = out + bmv->bmv_entries;
-       bool                    shared = false, trimmed = false;
+       bool                    shared = false;
         int                     error;
  
-       error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
+       error = xfs_reflink_trim_around_shared(ip, got, &shared);
         if (error)
                 return error;
  
@@ -1042,44 +1042,6 @@ out_trans_cancel:
         goto out_unlock;
  }
  
-static int
-xfs_adjust_extent_unmap_boundaries(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           *startoffset_fsb,
-       xfs_fileoff_t           *endoffset_fsb)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_bmbt_irec    imap;
-       int                     nimap, error;
-       xfs_extlen_t            mod = 0;
-
-       nimap = 1;
-       error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
-       if (error)
-               return error;
-
-       if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-               ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-               div_u64_rem(imap.br_startblock, mp->m_sb.sb_rextsize, &mod);
-               if (mod)
-                       *startoffset_fsb += mp->m_sb.sb_rextsize - mod;
-       }
-
-       nimap = 1;
-       error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
-       if (error)
-               return error;
-
-       if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-               ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-               mod++;
-               if (mod && mod != mp->m_sb.sb_rextsize)
-                       *endoffset_fsb -= mod;
-       }
-
-       return 0;
-}
-
  static int
  xfs_flush_unmap_range(
         struct xfs_inode        *ip,
@@ -1133,19 +1095,8 @@ xfs_free_file_space(
         endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
  
         /*
-        * Need to zero the stuff we're not freeing, on disk.  If it's a RT file
-        * and we can't use unwritten extents then we actually need to ensure
-        * to zero the whole extent, otherwise we just need to take of block
-        * boundaries, and xfs_bunmapi will handle the rest.
+        * Need to zero the stuff we're not freeing, on disk.
          */
-       if (XFS_IS_REALTIME_INODE(ip) &&
-           !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
-               error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
-                               &endoffset_fsb);
-               if (error)
-                       return error;
-       }
-
         if (endoffset_fsb > startoffset_fsb) {
                 while (!done) {
                         error = xfs_unmap_extent(ip, startoffset_fsb,
@@ -1824,6 +1775,12 @@ xfs_swap_extents(
         if (error)
                 goto out_unlock;
  
+       if (xfs_inode_has_cow_data(tip)) {
+               error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
+               if (error)
+                       return error;
+       }
+
         /*
          * Extent "swapping" with rmap requires a permanent reservation and
          * a block reservation because it's really just a remap operation
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index e839907e8492f940b431a7f28621d4148ad9a4a1..b21ea2ba768d624329d76078cfe8bab228d40cdd 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -37,6 +37,32 @@ static kmem_zone_t *xfs_buf_zone;
  #define xb_to_gfp(flags) \
         ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
  
+/*
+ * Locking orders
+ *
+ * xfs_buf_ioacct_inc:
+ * xfs_buf_ioacct_dec:
+ *     b_sema (caller holds)
+ *       b_lock
+ *
+ * xfs_buf_stale:
+ *     b_sema (caller holds)
+ *       b_lock
+ *         lru_lock
+ *
+ * xfs_buf_rele:
+ *     b_lock
+ *       pag_buf_lock
+ *         lru_lock
+ *
+ * xfs_buftarg_wait_rele
+ *     lru_lock
+ *       b_lock (trylock due to inversion)
+ *
+ * xfs_buftarg_isolate
+ *     lru_lock
+ *       b_lock (trylock due to inversion)
+ */
  
  static inline int
  xfs_buf_is_vmapped(
@@ -749,6 +775,30 @@ _xfs_buf_read(
         return xfs_buf_submit(bp);
  }
  
+/*
+ * If the caller passed in an ops structure and the buffer doesn't have ops
+ * assigned, set the ops and use them to verify the contents.  If the contents
+ * cannot be verified, we'll clear XBF_DONE.  We assume the buffer has no
+ * recorded errors and is already in XBF_DONE state.
+ */
+int
+xfs_buf_ensure_ops(
+       struct xfs_buf          *bp,
+       const struct xfs_buf_ops *ops)
+{
+       ASSERT(bp->b_flags & XBF_DONE);
+       ASSERT(bp->b_error == 0);
+
+       if (!ops || bp->b_ops)
+               return 0;
+
+       bp->b_ops = ops;
+       bp->b_ops->verify_read(bp);
+       if (bp->b_error)
+               bp->b_flags &= ~XBF_DONE;
+       return bp->b_error;
+}
+
  xfs_buf_t *
  xfs_buf_read_map(
         struct xfs_buftarg      *target,
@@ -762,26 +812,32 @@ xfs_buf_read_map(
         flags |= XBF_READ;
  
         bp = xfs_buf_get_map(target, map, nmaps, flags);
-       if (bp) {
-               trace_xfs_buf_read(bp, flags, _RET_IP_);
+       if (!bp)
+               return NULL;
  
-               if (!(bp->b_flags & XBF_DONE)) {
-                       XFS_STATS_INC(target->bt_mount, xb_get_read);
-                       bp->b_ops = ops;
-                       _xfs_buf_read(bp, flags);
-               } else if (flags & XBF_ASYNC) {
-                       /*
-                        * Read ahead call which is already satisfied,
-                        * drop the buffer
-                        */
-                       xfs_buf_relse(bp);
-                       return NULL;
-               } else {
-                       /* We do not want read in the flags */
-                       bp->b_flags &= ~XBF_READ;
-               }
+       trace_xfs_buf_read(bp, flags, _RET_IP_);
+
+       if (!(bp->b_flags & XBF_DONE)) {
+               XFS_STATS_INC(target->bt_mount, xb_get_read);
+               bp->b_ops = ops;
+               _xfs_buf_read(bp, flags);
+               return bp;
         }
  
+       xfs_buf_ensure_ops(bp, ops);
+
+       if (flags & XBF_ASYNC) {
+               /*
+                * Read ahead call which is already satisfied,
+                * drop the buffer
+                */
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+
+       /* We do not want read in the flags */
+       bp->b_flags &= ~XBF_READ;
+       ASSERT(bp->b_ops != NULL || ops == NULL);
         return bp;
  }
  
@@ -1006,8 +1062,18 @@ xfs_buf_rele(
  
         ASSERT(atomic_read(&bp->b_hold) > 0);
  
-       release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
+       /*
+        * We grab the b_lock here first to serialise racing xfs_buf_rele()
+        * calls. The pag_buf_lock being taken on the last reference only
+        * serialises against racing lookups in xfs_buf_find(). IOWs, the second
+        * to last reference we drop here is not serialised against the last
+        * reference until we take bp->b_lock. Hence if we don't grab b_lock
+        * first, the last "release" reference can win the race to the lock and
+        * free the buffer before the second-to-last reference is processed,
+        * leading to a use-after-free scenario.
+        */
         spin_lock(&bp->b_lock);
+       release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
         if (!release) {
                 /*
                  * Drop the in-flight state if the buffer is already on the LRU
@@ -1989,6 +2055,13 @@ xfs_buf_delwri_submit_buffers(
   * is only safely useable for callers that can track I/O completion by higher
   * level means, e.g. AIL pushing as the @buffer_list is consumed in this
   * function.
+ *
+ * Note: this function will skip buffers it would block on, and in doing so
+ * leaves them on @buffer_list so they can be retried on a later pass. As such,
+ * it is up to the caller to ensure that the buffer list is fully submitted or
+ * cancelled appropriately when they are finished with the list. Failure to
+ * cancel or resubmit the list until it is empty will result in leaked buffers
+ * at unmount time.
   */
  int
  xfs_buf_delwri_submit_nowait(
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h

index 4e3171acd0f82bb3e2ad962b68119458ab99b64b..b9f5511ea998a22927f141ecf446f63e3c99f60c 100644 (file)
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -385,4 +385,6 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
  #define xfs_getsize_buftarg(buftarg)   block_size((buftarg)->bt_bdev)
  #define xfs_readonly_buftarg(buftarg)  bdev_read_only((buftarg)->bt_bdev)
  
+int xfs_buf_ensure_ops(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
+
  #endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 7c00b8bedfe358ae508f8e10ff25771618529a1d..093c2b8d7e20bc7a3408843ed6cf06604d87b0c1 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -470,20 +470,13 @@ xfs_fs_goingdown(
   */
  void
  xfs_do_force_shutdown(
-       xfs_mount_t     *mp,
+       struct xfs_mount *mp,
         int             flags,
         char            *fname,
         int             lnnum)
  {
-       int             logerror;
-
-       logerror = flags & SHUTDOWN_LOG_IO_ERROR;
+       bool            logerror = flags & SHUTDOWN_LOG_IO_ERROR;
  
-       if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
-               xfs_notice(mp,
-       "%s(0x%x) called from line %d of file %s.  Return address = "PTR_FMT,
-                       __func__, flags, lnnum, fname, __return_address);
-       }
         /*
          * No need to duplicate efforts.
          */
@@ -499,27 +492,34 @@ xfs_do_force_shutdown(
         if (xfs_log_force_umount(mp, logerror))
                 return;
  
+       if (flags & SHUTDOWN_FORCE_UMOUNT) {
+               xfs_alert(mp,
+"User initiated shutdown received. Shutting down filesystem");
+               return;
+       }
+
+       xfs_notice(mp,
+"%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT,
+               __func__, flags, lnnum, fname, __return_address);
+
         if (flags & SHUTDOWN_CORRUPT_INCORE) {
                 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
-    "Corruption of in-memory data detected.  Shutting down filesystem");
+"Corruption of in-memory data detected.  Shutting down filesystem");
                 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
                         xfs_stack_trace();
-       } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
-               if (logerror) {
-                       xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
-               "Log I/O Error Detected.  Shutting down filesystem");
-               } else if (flags & SHUTDOWN_DEVICE_REQ) {
-                       xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
-               "All device paths lost.  Shutting down filesystem");
-               } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
-                       xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
-               "I/O Error Detected. Shutting down filesystem");
-               }
-       }
-       if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
-               xfs_alert(mp,
-       "Please umount the filesystem and rectify the problem(s)");
+       } else if (logerror) {
+               xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
+                       "Log I/O Error Detected. Shutting down filesystem");
+       } else if (flags & SHUTDOWN_DEVICE_REQ) {
+               xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
+                       "All device paths lost. Shutting down filesystem");
+       } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
+               xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
+                       "I/O Error Detected. Shutting down filesystem");
         }
+
+       xfs_alert(mp,
+               "Please unmount the filesystem and rectify the problem(s)");
  }
  
  /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 0ef5ece5634c11099d04112359daadb4fdbcab03..6e2c08f30f602deb360e737003cc3ae1abf4bfc7 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -604,14 +604,6 @@ xfs_ioc_space(
         uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
         int                     error;
  
-       /*
-        * Only allow the sys admin to reserve space unless
-        * unwritten extents are enabled.
-        */
-       if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
-           !capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
         if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
                 return -EPERM;
  
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 6320aca39f39415257f3bbb9b0313dbc26284861..27c93b5f029df92b17c22456c0bfe7a5a3fa085c 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -62,6 +62,21 @@ xfs_bmbt_to_iomap(
         iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
  }
  
+static void
+xfs_hole_to_iomap(
+       struct xfs_inode        *ip,
+       struct iomap            *iomap,
+       xfs_fileoff_t           offset_fsb,
+       xfs_fileoff_t           end_fsb)
+{
+       iomap->addr = IOMAP_NULL_ADDR;
+       iomap->type = IOMAP_HOLE;
+       iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb);
+       iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb);
+       iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
+       iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
+}
+
  xfs_extlen_t
  xfs_eof_alignment(
         struct xfs_inode        *ip,
@@ -502,6 +517,7 @@ xfs_file_iomap_begin_delay(
         struct inode            *inode,
         loff_t                  offset,
         loff_t                  count,
+       unsigned                flags,
         struct iomap            *iomap)
  {
         struct xfs_inode        *ip = XFS_I(inode);
@@ -538,15 +554,23 @@ xfs_file_iomap_begin_delay(
                         goto out_unlock;
         }
  
+       end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
+
         eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got);
-       if (!eof && got.br_startoff <= offset_fsb) {
-               if (xfs_is_reflink_inode(ip)) {
-                       bool            shared;
+       if (eof)
+               got.br_startoff = end_fsb; /* fake hole until the end */
  
-                       end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
-                                       maxbytes_fsb);
+       if (got.br_startoff <= offset_fsb) {
+               /*
+                * For reflink files we may need a delalloc reservation when
+                * overwriting shared extents.   This includes zeroing of
+                * existing extents that contain data.
+                */
+               if (xfs_is_reflink_inode(ip) &&
+                   ((flags & IOMAP_WRITE) ||
+                    got.br_state != XFS_EXT_UNWRITTEN)) {
                         xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
-                       error = xfs_reflink_reserve_cow(ip, &got, &shared);
+                       error = xfs_reflink_reserve_cow(ip, &got);
                         if (error)
                                 goto out_unlock;
                 }
@@ -555,6 +579,11 @@ xfs_file_iomap_begin_delay(
                 goto done;
         }
  
+       if (flags & IOMAP_ZERO) {
+               xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff);
+               goto out_unlock;
+       }
+
         error = xfs_qm_dqattach_locked(ip, false);
         if (error)
                 goto out_unlock;
@@ -1003,16 +1032,17 @@ xfs_file_iomap_begin(
         struct xfs_bmbt_irec    imap;
         xfs_fileoff_t           offset_fsb, end_fsb;
         int                     nimaps = 1, error = 0;
-       bool                    shared = false, trimmed = false;
+       bool                    shared = false;
         unsigned                lockmode;
  
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
-       if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
+       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && !(flags & IOMAP_DIRECT) &&
                         !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
                 /* Reserve delalloc blocks for regular writeback. */
-               return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
+               return xfs_file_iomap_begin_delay(inode, offset, length, flags,
+                               iomap);
         }
  
         /*
@@ -1038,8 +1068,7 @@ xfs_file_iomap_begin(
  
         if (flags & IOMAP_REPORT) {
                 /* Trim the mapping to the nearest shared extent boundary. */
-               error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
-                               &trimmed);
+               error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
                 if (error)
                         goto out_unlock;
         }
@@ -1065,7 +1094,7 @@ xfs_file_iomap_begin(
                         if (error)
                                 goto out_unlock;
                 } else {
-                       error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+                       error = xfs_reflink_reserve_cow(ip, &imap);
                         if (error)
                                 goto out_unlock;
                 }
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index 42ea7bab9144cc026f50d802acc31c42ab7f0858..8eaeec9d58ed6799898753f49f0ad895b5db4cb5 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -182,8 +182,7 @@ int
  xfs_reflink_trim_around_shared(
         struct xfs_inode        *ip,
         struct xfs_bmbt_irec    *irec,
-       bool                    *shared,
-       bool                    *trimmed)
+       bool                    *shared)
  {
         xfs_agnumber_t          agno;
         xfs_agblock_t           agbno;
@@ -209,7 +208,7 @@ xfs_reflink_trim_around_shared(
         if (error)
                 return error;
  
-       *shared = *trimmed = false;
+       *shared = false;
         if (fbno == NULLAGBLOCK) {
                 /* No shared blocks at all. */
                 return 0;
@@ -222,8 +221,6 @@ xfs_reflink_trim_around_shared(
                  */
                 irec->br_blockcount = flen;
                 *shared = true;
-               if (flen != aglen)
-                       *trimmed = true;
                 return 0;
         } else {
                 /*
@@ -233,7 +230,6 @@ xfs_reflink_trim_around_shared(
                  * start of the shared region.
                  */
                 irec->br_blockcount = fbno - agbno;
-               *trimmed = true;
                 return 0;
         }
  }
@@ -241,7 +237,7 @@ xfs_reflink_trim_around_shared(
  /*
   * Trim the passed in imap to the next shared/unshared extent boundary, and
   * if imap->br_startoff points to a shared extent reserve space for it in the
- * COW fork.  In this case *shared is set to true, else to false.
+ * COW fork.
   *
   * Note that imap will always contain the block numbers for the existing blocks
   * in the data fork, as the upper layers need them for read-modify-write
@@ -250,14 +246,14 @@ xfs_reflink_trim_around_shared(
  int
  xfs_reflink_reserve_cow(
         struct xfs_inode        *ip,
-       struct xfs_bmbt_irec    *imap,
-       bool                    *shared)
+       struct xfs_bmbt_irec    *imap)
  {
         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
         struct xfs_bmbt_irec    got;
         int                     error = 0;
-       bool                    eof = false, trimmed;
+       bool                    eof = false;
         struct xfs_iext_cursor  icur;
+       bool                    shared;
  
         /*
          * Search the COW fork extent list first.  This serves two purposes:
@@ -273,18 +269,16 @@ xfs_reflink_reserve_cow(
         if (!eof && got.br_startoff <= imap->br_startoff) {
                 trace_xfs_reflink_cow_found(ip, imap);
                 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
-
-               *shared = true;
                 return 0;
         }
  
         /* Trim the mapping to the nearest shared extent boundary. */
-       error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+       error = xfs_reflink_trim_around_shared(ip, imap, &shared);
         if (error)
                 return error;
  
         /* Not shared?  Just report the (potentially capped) extent. */
-       if (!*shared)
+       if (!shared)
                 return 0;
  
         /*
@@ -368,7 +362,6 @@ xfs_find_trim_cow_extent(
         xfs_filblks_t           count_fsb = imap->br_blockcount;
         struct xfs_iext_cursor  icur;
         struct xfs_bmbt_irec    got;
-       bool                    trimmed;
  
         *found = false;
  
@@ -376,9 +369,13 @@ xfs_find_trim_cow_extent(
          * If we don't find an overlapping extent, trim the range we need to
          * allocate to fit the hole we found.
          */
-       if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) ||
-           got.br_startoff > offset_fsb)
-               return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+       if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got))
+               got.br_startoff = offset_fsb + count_fsb;
+       if (got.br_startoff > offset_fsb) {
+               xfs_trim_extent(imap, imap->br_startoff,
+                               got.br_startoff - imap->br_startoff);
+               return xfs_reflink_trim_around_shared(ip, imap, shared);
+       }
  
         *shared = true;
         if (isnullstartblock(got.br_startblock)) {
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h

index c585ad9552b23ff375fd40c21a6aadab694b8e14..7f47202b5639142054420b2fb2384e9c44cfbb0d 100644 (file)
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -10,10 +10,10 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
                 xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
                 xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
  extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
-               struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
+               struct xfs_bmbt_irec *irec, bool *shared);
  
  extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
-               struct xfs_bmbt_irec *imap, bool *shared);
+               struct xfs_bmbt_irec *imap);
  extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
                 struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
  extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c

index 4e4423153071c788bb278d3856e3ff346cb915f8..cc509743facd8ddfedc6c8946446f515a79dfb69 100644 (file)
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -29,30 +29,30 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
                 char    *desc;
                 int     endpoint;
         } xstats[] = {
-               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
-               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
-               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
-               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
-               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
-               { "trans",              XFSSTAT_END_TRANSACTIONS        },
-               { "ig",                 XFSSTAT_END_INODE_OPS           },
-               { "log",                XFSSTAT_END_LOG_OPS             },
-               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
-               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
-               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
-               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
-               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
-               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
-               { "buf",                XFSSTAT_END_BUF                 },
-               { "abtb2",              XFSSTAT_END_ABTB_V2             },
-               { "abtc2",              XFSSTAT_END_ABTC_V2             },
-               { "bmbt2",              XFSSTAT_END_BMBT_V2             },
-               { "ibt2",               XFSSTAT_END_IBT_V2              },
-               { "fibt2",              XFSSTAT_END_FIBT_V2             },
-               { "rmapbt",             XFSSTAT_END_RMAP_V2             },
-               { "refcntbt",           XFSSTAT_END_REFCOUNT            },
+               { "extent_alloc",       xfsstats_offset(xs_abt_lookup)  },
+               { "abt",                xfsstats_offset(xs_blk_mapr)    },
+               { "blk_map",            xfsstats_offset(xs_bmbt_lookup) },
+               { "bmbt",               xfsstats_offset(xs_dir_lookup)  },
+               { "dir",                xfsstats_offset(xs_trans_sync)  },
+               { "trans",              xfsstats_offset(xs_ig_attempts) },
+               { "ig",                 xfsstats_offset(xs_log_writes)  },
+               { "log",                xfsstats_offset(xs_try_logspace)},
+               { "push_ail",           xfsstats_offset(xs_xstrat_quick)},
+               { "xstrat",             xfsstats_offset(xs_write_calls) },
+               { "rw",                 xfsstats_offset(xs_attr_get)    },
+               { "attr",               xfsstats_offset(xs_iflush_count)},
+               { "icluster",           xfsstats_offset(vn_active)      },
+               { "vnodes",             xfsstats_offset(xb_get)         },
+               { "buf",                xfsstats_offset(xs_abtb_2)      },
+               { "abtb2",              xfsstats_offset(xs_abtc_2)      },
+               { "abtc2",              xfsstats_offset(xs_bmbt_2)      },
+               { "bmbt2",              xfsstats_offset(xs_ibt_2)       },
+               { "ibt2",               xfsstats_offset(xs_fibt_2)      },
+               { "fibt2",              xfsstats_offset(xs_rmap_2)      },
+               { "rmapbt",             xfsstats_offset(xs_refcbt_2)    },
+               { "refcntbt",           xfsstats_offset(xs_qm_dqreclaims)},
                 /* we print both series of quota information together */
-               { "qm",                 XFSSTAT_END_QM                  },
+               { "qm",                 xfsstats_offset(xs_xstrat_bytes)},
         };
  
         /* Loop over all stats groups */
@@ -104,6 +104,10 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
  #ifdef CONFIG_PROC_FS
  /* legacy quota interfaces */
  #ifdef CONFIG_XFS_QUOTA
+
+#define XFSSTAT_START_XQMSTAT xfsstats_offset(xs_qm_dqreclaims)
+#define XFSSTAT_END_XQMSTAT xfsstats_offset(xs_qm_dquot)
+
  static int xqm_proc_show(struct seq_file *m, void *v)
  {
         /* maximum; incore; ratio free to inuse; freelist */
@@ -119,7 +123,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
         int j;
  
         seq_printf(m, "qm");
-       for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
+       for (j = XFSSTAT_START_XQMSTAT; j < XFSSTAT_END_XQMSTAT; j++)
                 seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j));
         seq_putc(m, '\n');
         return 0;
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h

index 130db070e4d8a9ae0ad22b89d85cdd3eb3255f7e..34d704f703d25e07ab8af65f9ed0205a9ab6e655 100644 (file)
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -41,17 +41,14 @@ enum {
   * XFS global statistics
   */
  struct __xfsstats {
-# define XFSSTAT_END_EXTENT_ALLOC      4
         uint32_t                xs_allocx;
         uint32_t                xs_allocb;
         uint32_t                xs_freex;
         uint32_t                xs_freeb;
-# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
         uint32_t                xs_abt_lookup;
         uint32_t                xs_abt_compare;
         uint32_t                xs_abt_insrec;
         uint32_t                xs_abt_delrec;
-# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
         uint32_t                xs_blk_mapr;
         uint32_t                xs_blk_mapw;
         uint32_t                xs_blk_unmap;
@@ -59,21 +56,17 @@ struct __xfsstats {
         uint32_t                xs_del_exlist;
         uint32_t                xs_look_exlist;
         uint32_t                xs_cmp_exlist;
-# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
         uint32_t                xs_bmbt_lookup;
         uint32_t                xs_bmbt_compare;
         uint32_t                xs_bmbt_insrec;
         uint32_t                xs_bmbt_delrec;
-# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
         uint32_t                xs_dir_lookup;
         uint32_t                xs_dir_create;
         uint32_t                xs_dir_remove;
         uint32_t                xs_dir_getdents;
-# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
         uint32_t                xs_trans_sync;
         uint32_t                xs_trans_async;
         uint32_t                xs_trans_empty;
-# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
         uint32_t                xs_ig_attempts;
         uint32_t                xs_ig_found;
         uint32_t                xs_ig_frecycle;
@@ -81,13 +74,11 @@ struct __xfsstats {
         uint32_t                xs_ig_dup;
         uint32_t                xs_ig_reclaims;
         uint32_t                xs_ig_attrchg;
-# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
         uint32_t                xs_log_writes;
         uint32_t                xs_log_blocks;
         uint32_t                xs_log_noiclogs;
         uint32_t                xs_log_force;
         uint32_t                xs_log_force_sleep;
-# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
         uint32_t                xs_try_logspace;
         uint32_t                xs_sleep_logspace;
         uint32_t                xs_push_ail;
@@ -98,22 +89,17 @@ struct __xfsstats {
         uint32_t                xs_push_ail_flushing;
         uint32_t                xs_push_ail_restarts;
         uint32_t                xs_push_ail_flush;
-# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
         uint32_t                xs_xstrat_quick;
         uint32_t                xs_xstrat_split;
-# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
         uint32_t                xs_write_calls;
         uint32_t                xs_read_calls;
-# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
         uint32_t                xs_attr_get;
         uint32_t                xs_attr_set;
         uint32_t                xs_attr_remove;
         uint32_t                xs_attr_list;
-# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
         uint32_t                xs_iflush_count;
         uint32_t                xs_icluster_flushcnt;
         uint32_t                xs_icluster_flushinode;
-# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
         uint32_t                vn_active;      /* # vnodes not on free lists */
         uint32_t                vn_alloc;       /* # times vn_alloc called */
         uint32_t                vn_get;         /* # times vn_get called */
@@ -122,7 +108,6 @@ struct __xfsstats {
         uint32_t                vn_reclaim;     /* # times vn_reclaim called */
         uint32_t                vn_remove;      /* # times vn_remove called */
         uint32_t                vn_free;        /* # times vn_free called */
-#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
         uint32_t                xb_get;
         uint32_t                xb_create;
         uint32_t                xb_get_locked;
@@ -133,28 +118,19 @@ struct __xfsstats {
         uint32_t                xb_page_found;
         uint32_t                xb_get_read;
  /* Version 2 btree counters */
-#define XFSSTAT_END_ABTB_V2            (XFSSTAT_END_BUF + __XBTS_MAX)
         uint32_t                xs_abtb_2[__XBTS_MAX];
-#define XFSSTAT_END_ABTC_V2            (XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
         uint32_t                xs_abtc_2[__XBTS_MAX];
-#define XFSSTAT_END_BMBT_V2            (XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
         uint32_t                xs_bmbt_2[__XBTS_MAX];
-#define XFSSTAT_END_IBT_V2             (XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
         uint32_t                xs_ibt_2[__XBTS_MAX];
-#define XFSSTAT_END_FIBT_V2            (XFSSTAT_END_IBT_V2 + __XBTS_MAX)
         uint32_t                xs_fibt_2[__XBTS_MAX];
-#define XFSSTAT_END_RMAP_V2            (XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
         uint32_t                xs_rmap_2[__XBTS_MAX];
-#define XFSSTAT_END_REFCOUNT           (XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
         uint32_t                xs_refcbt_2[__XBTS_MAX];
-#define XFSSTAT_END_XQMSTAT            (XFSSTAT_END_REFCOUNT + 6)
         uint32_t                xs_qm_dqreclaims;
         uint32_t                xs_qm_dqreclaim_misses;
         uint32_t                xs_qm_dquot_dups;
         uint32_t                xs_qm_dqcachemisses;
         uint32_t                xs_qm_dqcachehits;
         uint32_t                xs_qm_dqwants;
-#define XFSSTAT_END_QM                 (XFSSTAT_END_XQMSTAT+2)
         uint32_t                xs_qm_dquot;
         uint32_t                xs_qm_dquot_unused;
  /* Extra precision counters */
@@ -163,10 +139,12 @@ struct __xfsstats {
         uint64_t                xs_read_bytes;
  };
  
+#define        xfsstats_offset(f)      (offsetof(struct __xfsstats, f)/sizeof(uint32_t))
+
  struct xfsstats {
         union {
                 struct __xfsstats       s;
-               uint32_t                a[XFSSTAT_END_XQMSTAT];
+               uint32_t                a[xfsstats_offset(xs_qm_dquot)];
         };
  };
  
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 207ee302b1bb9f4039b8963b7ae1a68ce8ac8487..d3e6cd063688406ae69b0625db862f09cdbcd260 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -43,6 +43,7 @@
  #include <linux/dax.h>
  #include <linux/init.h>
  #include <linux/slab.h>
+#include <linux/magic.h>
  #include <linux/mount.h>
  #include <linux/mempool.h>
  #include <linux/writeback.h>
@@ -933,6 +934,32 @@ xfs_fs_alloc_inode(
         return NULL;
  }
  
+#ifdef DEBUG
+static void
+xfs_check_delalloc(
+       struct xfs_inode        *ip,
+       int                     whichfork)
+{
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       struct xfs_bmbt_irec    got;
+       struct xfs_iext_cursor  icur;
+
+       if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
+               return;
+       do {
+               if (isnullstartblock(got.br_startblock)) {
+                       xfs_warn(ip->i_mount,
+       "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
+                               ip->i_ino,
+                               whichfork == XFS_DATA_FORK ? "data" : "cow",
+                               got.br_startoff, got.br_blockcount);
+               }
+       } while (xfs_iext_next_extent(ifp, &icur, &got));
+}
+#else
+#define xfs_check_delalloc(ip, whichfork)      do { } while (0)
+#endif
+
  /*
   * Now that the generic code is guaranteed not to be accessing
   * the linux inode, we can inactivate and reclaim the inode.
@@ -951,7 +978,12 @@ xfs_fs_destroy_inode(
  
         xfs_inactive(ip);
  
-       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+       if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
+               xfs_check_delalloc(ip, XFS_DATA_FORK);
+               xfs_check_delalloc(ip, XFS_COW_FORK);
+               ASSERT(0);
+       }
+
         XFS_STATS_INC(ip->i_mount, vn_reclaim);
  
         /*
@@ -1097,7 +1129,7 @@ xfs_fs_statfs(
         xfs_extlen_t            lsize;
         int64_t                 ffree;
  
-       statp->f_type = XFS_SB_MAGIC;
+       statp->f_type = XFS_SUPER_MAGIC;
         statp->f_namelen = MAXNAMELEN - 1;
  
         id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
@@ -1650,7 +1682,7 @@ xfs_fs_fill_super(
          * we must configure the block size in the superblock before we run the
          * full mount process as the mount process can lookup and cache inodes.
          */
-       sb->s_magic = XFS_SB_MAGIC;
+       sb->s_magic = XFS_SUPER_MAGIC;
         sb->s_blocksize = mp->m_sb.sb_blocksize;
         sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index c3d278e96ad1a73462808c1867ed7615974011e0..a0c5dbda18aabfa84c4f0ca94543b80174e5ba19 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -220,6 +220,7 @@ void                xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
  void           xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint,
                                   uint);
  void           xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
+bool           xfs_trans_buf_is_dirty(struct xfs_buf *bp);
  void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
  
  void           xfs_extent_free_init_defer_op(void);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index 55326f971cb36bb87e35552f461a81a1c206ef3f..d3a4e89bf4a0ddb916ed4f5d395285e2e2188869 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -531,17 +531,33 @@ xfsaild(
                         set_current_state(TASK_INTERRUPTIBLE);
  
                 /*
-                * Check kthread_should_stop() after we set the task state
-                * to guarantee that we either see the stop bit and exit or
-                * the task state is reset to runnable such that it's not
-                * scheduled out indefinitely and detects the stop bit at
-                * next iteration.
-                *
+                * Check kthread_should_stop() after we set the task state to
+                * guarantee that we either see the stop bit and exit or the
+                * task state is reset to runnable such that it's not scheduled
+                * out indefinitely and detects the stop bit at next iteration.
                  * A memory barrier is included in above task state set to
                  * serialize again kthread_stop().
                  */
                 if (kthread_should_stop()) {
                         __set_current_state(TASK_RUNNING);
+
+                       /*
+                        * The caller forces out the AIL before stopping the
+                        * thread in the common case, which means the delwri
+                        * queue is drained. In the shutdown case, the queue may
+                        * still hold relogged buffers that haven't been
+                        * submitted because they were pinned since added to the
+                        * queue.
+                        *
+                        * Log I/O error processing stales the underlying buffer
+                        * and clears the delwri state, expecting the buf to be
+                        * removed on the next submission attempt. That won't
+                        * happen if we're shutting down, so this is the last
+                        * opportunity to release such buffers from the queue.
+                        */
+                       ASSERT(list_empty(&ailp->ail_buf_list) ||
+                              XFS_FORCED_SHUTDOWN(ailp->ail_mount));
+                       xfs_buf_delwri_cancel(&ailp->ail_buf_list);
                         break;
                 }
  
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 286a287ac57acc5c5abcbe51a881af09026ecf0e..629f1479c9d234492d3a7d431dd21bb30f4db393 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -264,11 +264,39 @@ xfs_trans_read_buf_map(
                         return -EIO;
                 }
  
+               /*
+                * Check if the caller is trying to read a buffer that is
+                * already attached to the transaction yet has no buffer ops
+                * assigned.  Ops are usually attached when the buffer is
+                * attached to the transaction, or by the read caller if
+                * special circumstances.  That didn't happen, which is not
+                * how this is supposed to go.
+                *
+                * If the buffer passes verification we'll let this go, but if
+                * not we have to shut down.  Let the transaction cleanup code
+                * release this buffer when it kills the tranaction.
+                */
+               ASSERT(bp->b_ops != NULL);
+               error = xfs_buf_ensure_ops(bp, ops);
+               if (error) {
+                       xfs_buf_ioerror_alert(bp, __func__);
+
+                       if (tp->t_flags & XFS_TRANS_DIRTY)
+                               xfs_force_shutdown(tp->t_mountp,
+                                               SHUTDOWN_META_IO_ERROR);
+
+                       /* bad CRC means corrupted metadata */
+                       if (error == -EFSBADCRC)
+                               error = -EFSCORRUPTED;
+                       return error;
+               }
+
                 bip = bp->b_log_item;
                 bip->bli_recur++;
  
                 ASSERT(atomic_read(&bip->bli_refcount) > 0);
                 trace_xfs_trans_read_buf_recur(bip);
+               ASSERT(bp->b_ops != NULL || ops == NULL);
                 *bpp = bp;
                 return 0;
         }
@@ -316,11 +344,25 @@ xfs_trans_read_buf_map(
                 _xfs_trans_bjoin(tp, bp, 1);
                 trace_xfs_trans_read_buf(bp->b_log_item);
         }
+       ASSERT(bp->b_ops != NULL || ops == NULL);
         *bpp = bp;
         return 0;
  
  }
  
+/* Has this buffer been dirtied by anyone? */
+bool
+xfs_trans_buf_is_dirty(
+       struct xfs_buf          *bp)
+{
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+
+       if (!bip)
+               return false;
+       ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
+       return test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
+}
+
  /*
   * Release a buffer previously joined to the transaction. If the buffer is
   * modified within this transaction, decrement the recursion count but do not
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h

index 1a6fee974116a406deb077bd8be5585e5c4591cb..96c24478d8cedb50de46c7b27662f9b81008ce53 100644 (file)
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -29,6 +29,7 @@
  #define HPFS_SUPER_MAGIC       0xf995e849
  #define ISOFS_SUPER_MAGIC      0x9660
  #define JFFS2_SUPER_MAGIC      0x72b6
+#define XFS_SUPER_MAGIC                0x58465342      /* "XFSB" */
  #define PSTOREFS_MAGIC         0x6165676C
  #define EFIVARFS_MAGIC         0xde5e81e4
  #define HOSTFS_SUPER_MAGIC     0x00c0ffee
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 24 Oct 2018 16:36:12 +0000 (17:36 +0100)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 24 Oct 2018 16:36:12 +0000 (17:36 +0100)
fs/xfs/libxfs/xfs_attr.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_attr.h	[new file with mode: 0644]	patch \| blob
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_bmap.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_format.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| blame \| history
fs/xfs/scrub/repair.c		patch \| blob \| blame \| history
fs/xfs/scrub/scrub.c		patch \| blob \| blame \| history
fs/xfs/xfs_aops.c		patch \| blob \| blame \| history
fs/xfs/xfs_aops.h		patch \| blob \| blame \| history
fs/xfs/xfs_attr.h	[deleted file]	patch \| blob \| blame \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf.h		patch \| blob \| blame \| history
fs/xfs/xfs_fsops.c		patch \| blob \| blame \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| blame \| history
fs/xfs/xfs_iomap.c		patch \| blob \| blame \| history
fs/xfs/xfs_reflink.c		patch \| blob \| blame \| history
fs/xfs/xfs_reflink.h		patch \| blob \| blame \| history
fs/xfs/xfs_stats.c		patch \| blob \| blame \| history
fs/xfs/xfs_stats.h		patch \| blob \| blame \| history
fs/xfs/xfs_super.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans_ail.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| blame \| history
include/uapi/linux/magic.h		patch \| blob \| blame \| history