Merge tag 'xfs-for-linus-3.15-rc1' of git://oss.sgi.com/xfs/xfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)
Pull xfs update from Dave Chinner:
 "There are a couple of new fallocate features in this request - it was
  decided that it was easiest to push them through the XFS tree using
  topic branches and have the ext4 support be based on those branches.
  Hence you may see some overlap with the ext4 tree merge depending on
  how they including those topic branches into their tree.  Other than
  that, there is O_TMPFILE support, some cleanups and bug fixes.

  The main changes in the XFS tree for 3.15-rc1 are:

   - O_TMPFILE support
   - allowing AIO+DIO writes beyond EOF
   - FALLOC_FL_COLLAPSE_RANGE support for fallocate syscall and XFS
     implementation
   - FALLOC_FL_ZERO_RANGE support for fallocate syscall and XFS
     implementation
   - IO verifier cleanup and rework
   - stack usage reduction changes
   - vm_map_ram NOIO context fixes to remove lockdep warings
   - various bug fixes and cleanups"

* tag 'xfs-for-linus-3.15-rc1' of git://oss.sgi.com/xfs/xfs: (34 commits)
  xfs: fix directory hash ordering bug
  xfs: extra semi-colon breaks a condition
  xfs: Add support for FALLOC_FL_ZERO_RANGE
  fs: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate
  xfs: inode log reservations are still too small
  xfs: xfs_check_page_type buffer checks need help
  xfs: avoid AGI/AGF deadlock scenario for inode chunk allocation
  xfs: use NOIO contexts for vm_map_ram
  xfs: don't leak EFSBADCRC to userspace
  xfs: fix directory inode iolock lockdep false positive
  xfs: allocate xfs_da_args to reduce stack footprint
  xfs: always do log forces via the workqueue
  xfs: modify verifiers to differentiate CRC from other errors
  xfs: print useful caller information in xfs_error_report
  xfs: add xfs_verifier_error()
  xfs: add helper for updating checksums on xfs_bufs
  xfs: add helper for verifying checksums on xfs_bufs
  xfs: Use defines for CRC offsets in all cases
  xfs: skip pointless CRC updates after verifier failures
  xfs: Add support FALLOC_FL_COLLAPSE_RANGE for fallocate
  ...

56 files changed:
fs/direct-io.c
fs/open.c
fs/xfs/kmem.c
fs/xfs/xfs_acl.c
fs/xfs/xfs_ag.h
fs/xfs/xfs_alloc.c
fs/xfs/xfs_alloc_btree.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_attr_leaf.c
fs/xfs/xfs_attr_remote.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_bmap.h
fs/xfs/xfs_bmap_btree.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_bmap_util.h
fs/xfs/xfs_btree.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_da_btree.c
fs/xfs/xfs_dinode.h
fs/xfs/xfs_dir2.c
fs/xfs/xfs_dir2_block.c
fs/xfs/xfs_dir2_data.c
fs/xfs/xfs_dir2_leaf.c
fs/xfs/xfs_dir2_node.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot_buf.c
fs/xfs/xfs_error.c
fs/xfs/xfs_error.h
fs/xfs/xfs_file.c
fs/xfs/xfs_format.h
fs/xfs/xfs_ialloc.c
fs/xfs/xfs_ialloc_btree.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_buf.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_linux.h
fs/xfs/xfs_log.h
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_sb.c
fs/xfs/xfs_sb.h
fs/xfs/xfs_shared.h
fs/xfs/xfs_symlink.c
fs/xfs/xfs_symlink_remote.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_resv.c
fs/xfs/xfs_trans_resv.h
include/linux/fs.h
include/uapi/linux/falloc.h

index 6e6bff3752446036990fb4a5d0eca4cdaad3fac5..31ba0935e32ed2f271253a1d828778a91193b211 100644 (file)
@@ -1193,13 +1193,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        }
 
        /*
-        * For file extending writes updating i_size before data
-        * writeouts complete can expose uninitialized blocks. So
-        * even for AIO, we need to wait for i/o to complete before
-        * returning in this case.
+        * For file extending writes updating i_size before data writeouts
+        * complete can expose uninitialized blocks in dumb filesystems.
+        * In that case we need to wait for I/O completion even if asked
+        * for an asynchronous write.
         */
-       dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
-               (end > i_size_read(inode)));
+       if (is_sync_kiocb(iocb))
+               dio->is_async = false;
+       else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
+            (rw & WRITE) && end > i_size_read(inode))
+               dio->is_async = false;
+       else
+               dio->is_async = true;
+
        dio->inode = inode;
        dio->rw = rw;
 
index b9ed8b25c108c69d68889b5b6eadac9878eb7bd9..631aea815def32946433b8aebed9a312d0fc872c 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EINVAL;
 
        /* Return error if mode is not supported */
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+               return -EOPNOTSUPP;
+
+       /* Punch hole and zero range are mutually exclusive */
+       if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
+           (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
                return -EOPNOTSUPP;
 
        /* Punch hole must have keep size set */
@@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
            !(mode & FALLOC_FL_KEEP_SIZE))
                return -EOPNOTSUPP;
 
+       /* Collapse range should only be used exclusively. */
+       if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+           (mode & ~FALLOC_FL_COLLAPSE_RANGE))
+               return -EINVAL;
+
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
 
-       /* It's not possible punch hole on append only file */
-       if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+       /*
+        * It's not possible to punch hole or perform collapse range
+        * on append only file
+        */
+       if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+           && IS_APPEND(inode))
                return -EPERM;
 
        if (IS_IMMUTABLE(inode))
@@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;
 
+       /*
+        * There is no need to overlap collapse range with EOF, in which case
+        * it is effectively a truncate operation
+        */
+       if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+           (offset + len >= i_size_read(inode)))
+               return -EINVAL;
+
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
 
index 66a36befc5c0723d217bdd5aa1a61b1b610e42a4..844e288b9576d1202d88e6628ca5731149fb9b92 100644 (file)
@@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
 void *
 kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
 {
+       unsigned noio_flag = 0;
        void    *ptr;
+       gfp_t   lflags;
 
        ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
        if (ptr)
                return ptr;
-       return vzalloc(size);
+
+       /*
+        * __vmalloc() will allocate data pages and auxillary structures (e.g.
+        * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
+        * here. Hence we need to tell memory reclaim that we are in such a
+        * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
+        * the filesystem here and potentially deadlocking.
+        */
+       if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+               noio_flag = memalloc_noio_save();
+
+       lflags = kmem_flags_convert(flags);
+       ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+
+       if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+               memalloc_noio_restore(noio_flag);
+
+       return ptr;
 }
 
 void
index 0ecec1896f25439f198c53c93bc058ff3bff9c89..6888ad886ff6205cc0baf0aa36352fa92f9d93ea 100644 (file)
@@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        if (!acl)
                goto set_acl;
 
-       error = -EINVAL;
+       error = -E2BIG;
        if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
                return error;
 
index 3fc109819c34eb7a42973d7640525fefe66cc08e..0fdd4109c62439b7471b46135a068148b1006d87 100644 (file)
@@ -89,6 +89,8 @@ typedef struct xfs_agf {
        /* structure must be padded to 64 bit alignment */
 } xfs_agf_t;
 
+#define XFS_AGF_CRC_OFF                offsetof(struct xfs_agf, agf_crc)
+
 #define        XFS_AGF_MAGICNUM        0x00000001
 #define        XFS_AGF_VERSIONNUM      0x00000002
 #define        XFS_AGF_SEQNO           0x00000004
@@ -167,6 +169,8 @@ typedef struct xfs_agi {
        /* structure must be padded to 64 bit alignment */
 } xfs_agi_t;
 
+#define XFS_AGI_CRC_OFF                offsetof(struct xfs_agi, agi_crc)
+
 #define        XFS_AGI_MAGICNUM        0x00000001
 #define        XFS_AGI_VERSIONNUM      0x00000002
 #define        XFS_AGI_SEQNO           0x00000004
@@ -222,6 +226,8 @@ typedef struct xfs_agfl {
        __be32          agfl_bno[];     /* actually XFS_AGFL_SIZE(mp) */
 } xfs_agfl_t;
 
+#define XFS_AGFL_CRC_OFF       offsetof(struct xfs_agfl, agfl_crc)
+
 /*
  * tags for inode radix tree
  */
index 9eab2dfdcbb54cc1337f4d6f248c808fcb2cc426..c1cf6a336a72fb178602ad10908825caeecc20b7 100644 (file)
@@ -474,7 +474,6 @@ xfs_agfl_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-       int             agfl_ok = 1;
 
        /*
         * There is no verification of non-crc AGFLs because mkfs does not
@@ -485,15 +484,13 @@ xfs_agfl_read_verify(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
 
-       agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                  offsetof(struct xfs_agfl, agfl_crc));
-
-       agfl_ok = agfl_ok && xfs_agfl_verify(bp);
-
-       if (!agfl_ok) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_agfl_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -508,16 +505,15 @@ xfs_agfl_write_verify(
                return;
 
        if (!xfs_agfl_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
        if (bip)
                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_agfl, agfl_crc));
+       xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -2238,19 +2234,17 @@ xfs_agf_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-       int             agf_ok = 1;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         offsetof(struct xfs_agf, agf_crc));
 
-       agf_ok = agf_ok && xfs_agf_verify(mp, bp);
-
-       if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
-                       XFS_RANDOM_ALLOC_READ_AGF))) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
+                               XFS_ERRTAG_ALLOC_READ_AGF,
+                               XFS_RANDOM_ALLOC_READ_AGF))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -2261,8 +2255,8 @@ xfs_agf_write_verify(
        struct xfs_buf_log_item *bip = bp->b_fspriv;
 
        if (!xfs_agf_verify(mp, bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -2272,8 +2266,7 @@ xfs_agf_write_verify(
        if (bip)
                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_agf, agf_crc));
+       xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_agf_buf_ops = {
index 13085429e5234783c93e40c3a31492ec7d560428..cc1eadcbb0497ea6163b045dacb740481db27fe6 100644 (file)
@@ -355,12 +355,14 @@ static void
 xfs_allocbt_read_verify(
        struct xfs_buf  *bp)
 {
-       if (!(xfs_btree_sblock_verify_crc(bp) &&
-             xfs_allocbt_verify(bp))) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_allocbt_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
        }
 }
 
@@ -370,9 +372,9 @@ xfs_allocbt_write_verify(
 {
        if (!xfs_allocbt_verify(bp)) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
        }
        xfs_btree_sblock_calc_crc(bp);
 
index db2cfb067d0b1ea88f8b64875ceb174d3ae582d2..75df77d09f757d4786c889679c35c5cac2ca00b3 100644 (file)
@@ -632,38 +632,46 @@ xfs_map_at_offset(
 }
 
 /*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
+ * Test if a given page contains at least one buffer of a given @type.
+ * If @check_all_buffers is true, then we walk all the buffers in the page to
+ * try to find one of the type passed in. If it is not set, then the caller only
+ * needs to check the first buffer on the page for a match.
  */
-STATIC int
+STATIC bool
 xfs_check_page_type(
        struct page             *page,
-       unsigned int            type)
+       unsigned int            type,
+       bool                    check_all_buffers)
 {
-       if (PageWriteback(page))
-               return 0;
+       struct buffer_head      *bh;
+       struct buffer_head      *head;
 
-       if (page->mapping && page_has_buffers(page)) {
-               struct buffer_head      *bh, *head;
-               int                     acceptable = 0;
+       if (PageWriteback(page))
+               return false;
+       if (!page->mapping)
+               return false;
+       if (!page_has_buffers(page))
+               return false;
 
-               bh = head = page_buffers(page);
-               do {
-                       if (buffer_unwritten(bh))
-                               acceptable += (type == XFS_IO_UNWRITTEN);
-                       else if (buffer_delay(bh))
-                               acceptable += (type == XFS_IO_DELALLOC);
-                       else if (buffer_dirty(bh) && buffer_mapped(bh))
-                               acceptable += (type == XFS_IO_OVERWRITE);
-                       else
-                               break;
-               } while ((bh = bh->b_this_page) != head);
+       bh = head = page_buffers(page);
+       do {
+               if (buffer_unwritten(bh)) {
+                       if (type == XFS_IO_UNWRITTEN)
+                               return true;
+               } else if (buffer_delay(bh)) {
+                       if (type == XFS_IO_DELALLOC)
+                               return true;
+               } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
+                       if (type == XFS_IO_OVERWRITE)
+                               return true;
+               }
 
-               if (acceptable)
-                       return 1;
-       }
+               /* If we are only checking the first buffer, we are done now. */
+               if (!check_all_buffers)
+                       break;
+       } while ((bh = bh->b_this_page) != head);
 
-       return 0;
+       return false;
 }
 
 /*
@@ -697,7 +705,7 @@ xfs_convert_page(
                goto fail_unlock_page;
        if (page->mapping != inode->i_mapping)
                goto fail_unlock_page;
-       if (!xfs_check_page_type(page, (*ioendp)->io_type))
+       if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
                goto fail_unlock_page;
 
        /*
@@ -742,6 +750,15 @@ xfs_convert_page(
        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
        page_dirty = p_offset / len;
 
+       /*
+        * The moment we find a buffer that doesn't match our current type
+        * specification or can't be written, abort the loop and start
+        * writeback. As per the above xfs_imap_valid() check, only
+        * xfs_vm_writepage() can handle partial page writeback fully - we are
+        * limited here to the buffers that are contiguous with the current
+        * ioend, and hence a buffer we can't write breaks that contiguity and
+        * we have to defer the rest of the IO to xfs_vm_writepage().
+        */
        bh = head = page_buffers(page);
        do {
                if (offset >= end_offset)
@@ -750,7 +767,7 @@ xfs_convert_page(
                        uptodate = 0;
                if (!(PageUptodate(page) || buffer_uptodate(bh))) {
                        done = 1;
-                       continue;
+                       break;
                }
 
                if (buffer_unwritten(bh) || buffer_delay(bh) ||
@@ -762,10 +779,11 @@ xfs_convert_page(
                        else
                                type = XFS_IO_OVERWRITE;
 
-                       if (!xfs_imap_valid(inode, imap, offset)) {
-                               done = 1;
-                               continue;
-                       }
+                       /*
+                        * imap should always be valid because of the above
+                        * partial page end_offset check on the imap.
+                        */
+                       ASSERT(xfs_imap_valid(inode, imap, offset));
 
                        lock_buffer(bh);
                        if (type != XFS_IO_OVERWRITE)
@@ -777,6 +795,7 @@ xfs_convert_page(
                        count++;
                } else {
                        done = 1;
+                       break;
                }
        } while (offset += len, (bh = bh->b_this_page) != head);
 
@@ -868,7 +887,7 @@ xfs_aops_discard_page(
        struct buffer_head      *bh, *head;
        loff_t                  offset = page_offset(page);
 
-       if (!xfs_check_page_type(page, XFS_IO_DELALLOC))
+       if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
                goto out_invalidate;
 
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1441,7 +1460,8 @@ xfs_vm_direct_IO(
                ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
                                            offset, nr_segs,
                                            xfs_get_blocks_direct,
-                                           xfs_end_io_direct_write, NULL, 0);
+                                           xfs_end_io_direct_write, NULL,
+                                           DIO_ASYNC_EXTEND);
                if (ret != -EIOCBQUEUED && iocb->private)
                        goto out_destroy_ioend;
        } else {
index 7b126f46a2f99689cf0bee0efba6c026c0e23186..fe9587fab17a6822d9bec1b5862a6a133adf9dc7 100644 (file)
@@ -213,8 +213,8 @@ xfs_attr3_leaf_write_verify(
        struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_attr3_leaf_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -224,7 +224,7 @@ xfs_attr3_leaf_write_verify(
        if (bip)
                hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
 }
 
 /*
@@ -239,13 +239,14 @@ xfs_attr3_leaf_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_ATTR3_LEAF_CRC_OFF)) ||
-           !xfs_attr3_leaf_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_attr3_leaf_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
index 5549d69ddb45a2038ece8e24085e0f9ef6ce9984..6e37823e2932aeb45d112b55010ab627adb5bde2 100644 (file)
@@ -125,7 +125,6 @@ xfs_attr3_rmt_read_verify(
        struct xfs_mount *mp = bp->b_target->bt_mount;
        char            *ptr;
        int             len;
-       bool            corrupt = false;
        xfs_daddr_t     bno;
 
        /* no verification of non-crc buffers */
@@ -140,11 +139,11 @@ xfs_attr3_rmt_read_verify(
        while (len > 0) {
                if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
                                      XFS_ATTR3_RMT_CRC_OFF)) {
-                       corrupt = true;
+                       xfs_buf_ioerror(bp, EFSBADCRC);
                        break;
                }
                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
-                       corrupt = true;
+                       xfs_buf_ioerror(bp, EFSCORRUPTED);
                        break;
                }
                len -= XFS_LBSIZE(mp);
@@ -152,10 +151,9 @@ xfs_attr3_rmt_read_verify(
                bno += mp->m_bsize;
        }
 
-       if (corrupt) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-       } else
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+       else
                ASSERT(len == 0);
 }
 
@@ -180,9 +178,8 @@ xfs_attr3_rmt_write_verify(
 
        while (len > 0) {
                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
-                       XFS_CORRUPTION_ERROR(__func__,
-                                           XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                        xfs_buf_ioerror(bp, EFSCORRUPTED);
+                       xfs_verifier_error(bp);
                        return;
                }
                if (bip) {
index 152543c4ca7031e718bc921b82e9e0a72955de11..5b6092ef51efa9eb6e02c980980c6aa99e486170 100644 (file)
@@ -5378,3 +5378,196 @@ error0:
        }
        return error;
 }
+
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     *done,
+       xfs_fileoff_t           start_fsb,
+       xfs_fileoff_t           offset_shift_fsb,
+       xfs_extnum_t            *current_ext,
+       xfs_fsblock_t           *firstblock,
+       struct xfs_bmap_free    *flist,
+       int                     num_exts)
+{
+       struct xfs_btree_cur            *cur;
+       struct xfs_bmbt_rec_host        *gotp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            left;
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_ifork                *ifp;
+       xfs_extnum_t                    nexts = 0;
+       xfs_fileoff_t                   startoff;
+       int                             error = 0;
+       int                             i;
+       int                             whichfork = XFS_DATA_FORK;
+       int                             logflags;
+       xfs_filblks_t                   blockcount = 0;
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+                                XFS_ERRLEVEL_LOW, mp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       ASSERT(current_ext != NULL);
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               /* Read in all the extents */
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * If *current_ext is 0, we would need to lookup the extent
+        * from where we would start shifting and store it in gotp.
+        */
+       if (!*current_ext) {
+               gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+               /*
+                * gotp can be null in 2 cases: 1) if there are no extents
+                * or 2) start_fsb lies in a hole beyond which there are
+                * no extents. Either way, we are done.
+                */
+               if (!gotp) {
+                       *done = 1;
+                       return 0;
+               }
+       }
+
+       /* We are going to change core inode */
+       logflags = XFS_ILOG_CORE;
+
+       if (ifp->if_flags & XFS_IFBROOT) {
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+               cur->bc_private.b.firstblock = *firstblock;
+               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.flags = 0;
+       } else {
+               cur = NULL;
+               logflags |= XFS_ILOG_DEXT;
+       }
+
+       while (nexts++ < num_exts &&
+              *current_ext <  XFS_IFORK_NEXTENTS(ip, whichfork)) {
+
+               gotp = xfs_iext_get_ext(ifp, *current_ext);
+               xfs_bmbt_get_all(gotp, &got);
+               startoff = got.br_startoff - offset_shift_fsb;
+
+               /*
+                * Before shifting extent into hole, make sure that the hole
+                * is large enough to accomodate the shift.
+                */
+               if (*current_ext) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+                                               *current_ext - 1), &left);
+
+                       if (startoff < left.br_startoff + left.br_blockcount)
+                               error = XFS_ERROR(EINVAL);
+               } else if (offset_shift_fsb > got.br_startoff) {
+                       /*
+                        * When first extent is shifted, offset_shift_fsb
+                        * should be less than the stating offset of
+                        * the first extent.
+                        */
+                       error = XFS_ERROR(EINVAL);
+               }
+
+               if (error)
+                       goto del_cursor;
+
+               if (cur) {
+                       error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                  got.br_startblock,
+                                                  got.br_blockcount,
+                                                  &i);
+                       if (error)
+                               goto del_cursor;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+               }
+
+               /* Check if we can merge 2 adjacent extents */
+               if (*current_ext &&
+                   left.br_startoff + left.br_blockcount == startoff &&
+                   left.br_startblock + left.br_blockcount ==
+                               got.br_startblock &&
+                   left.br_state == got.br_state &&
+                   left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+                       blockcount = left.br_blockcount +
+                               got.br_blockcount;
+                       xfs_iext_remove(ip, *current_ext, 1, 0);
+                       if (cur) {
+                               error = xfs_btree_delete(cur, &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+                       XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+                       gotp = xfs_iext_get_ext(ifp, --*current_ext);
+                       xfs_bmbt_get_all(gotp, &got);
+
+                       /* Make cursor point to the extent we will update */
+                       if (cur) {
+                               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                          got.br_startblock,
+                                                          got.br_blockcount,
+                                                          &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+
+                       xfs_bmbt_set_blockcount(gotp, blockcount);
+                       got.br_blockcount = blockcount;
+               } else {
+                       /* We have to update the startoff */
+                       xfs_bmbt_set_startoff(gotp, startoff);
+                       got.br_startoff = startoff;
+               }
+
+               if (cur) {
+                       error = xfs_bmbt_update(cur, got.br_startoff,
+                                               got.br_startblock,
+                                               got.br_blockcount,
+                                               got.br_state);
+                       if (error)
+                               goto del_cursor;
+               }
+
+               (*current_ext)++;
+       }
+
+       /* Check if we are done */
+       if (*current_ext ==  XFS_IFORK_NEXTENTS(ip, whichfork))
+               *done = 1;
+
+del_cursor:
+       if (cur)
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+       xfs_trans_log_inode(tp, ip, logflags);
+
+       return error;
+}
index 33b41f35122574e0b1cf7ad7a2a9ae23ecfadddb..f84bd7af43bec38bd4493c473f95d32e7f590337 100644 (file)
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
        { BMAP_RIGHT_FILLING,   "RF" }, \
        { BMAP_ATTRFORK,        "ATTR" }
 
+
+/*
+ * This macro is used to determine how many extents will be shifted
+ * in one write transaction. We could require two splits,
+ * an extent move on the first and an extent merge on the second,
+ * So it is proper that one extent is shifted inside write transaction
+ * at a time.
+ */
+#define XFS_BMAP_MAX_SHIFT_EXTENTS     1
+
 #ifdef DEBUG
 void   xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
                int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int        xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                xfs_extnum_t num);
 uint   xfs_default_attroffset(struct xfs_inode *ip);
+int    xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+               int *done, xfs_fileoff_t start_fsb,
+               xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
+               xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
+               int num_exts);
 
 #endif /* __XFS_BMAP_H__ */
index 706bc3f777cb390cacd78988b8b8ebe1a63799a6..818d546664e7575d4affe27b3e90c178ccd85315 100644 (file)
@@ -780,12 +780,14 @@ static void
 xfs_bmbt_read_verify(
        struct xfs_buf  *bp)
 {
-       if (!(xfs_btree_lblock_verify_crc(bp) &&
-             xfs_bmbt_verify(bp))) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
+       if (!xfs_btree_lblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_bmbt_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
        }
 }
 
@@ -794,11 +796,9 @@ xfs_bmbt_write_verify(
        struct xfs_buf  *bp)
 {
        if (!xfs_bmbt_verify(bp)) {
-               xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
        xfs_btree_lblock_calc_crc(bp);
index f264616080cac0c6bdfdc084c188b15505aa92e7..01f6a646caa121895265cfe33a7d97860f786a18 100644 (file)
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
                 * the freeing of the space succeeds at ENOSPC.
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               tp->t_flags |= XFS_TRANS_RESERVE;
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
 
                /*
@@ -1467,6 +1466,102 @@ out:
 
 }
 
+/*
+ * xfs_collapse_file_space()
+ *     This routine frees disk space and shift extent for the given file.
+ *     The first thing we do is to free data blocks in the specified range
+ *     by calling xfs_free_file_space(). It would also sync dirty data
+ *     and invalidate page cache over the region on which collapse range
+ *     is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ *     0 on success
+ *     errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               len)
+{
+       int                     done = 0;
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+       xfs_extnum_t            current_ext = 0;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           first_block;
+       int                     committed;
+       xfs_fileoff_t           start_fsb;
+       xfs_fileoff_t           shift_fsb;
+
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+       trace_xfs_collapse_file_space(ip);
+
+       start_fsb = XFS_B_TO_FSB(mp, offset + len);
+       shift_fsb = XFS_B_TO_FSB(mp, len);
+
+       error = xfs_free_file_space(ip, offset, len);
+       if (error)
+               return error;
+
+       while (!error && !done) {
+               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+               tp->t_flags |= XFS_TRANS_RESERVE;
+               /*
+                * We would need to reserve permanent block for transaction.
+                * This will come into picture when after shifting extent into
+                * hole we found that adjacent extents can be merged which
+                * may lead to freeing of a block during record update.
+                */
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+               if (error) {
+                       ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+                       xfs_trans_cancel(tp, 0);
+                       break;
+               }
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+                               ip->i_gdquot, ip->i_pdquot,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+                               XFS_QMOPT_RES_REGBLKS);
+               if (error)
+                       goto out;
+
+               xfs_trans_ijoin(tp, ip, 0);
+
+               xfs_bmap_init(&free_list, &first_block);
+
+               /*
+                * We are using the write transaction in which max 2 bmbt
+                * updates are allowed
+                */
+               error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+                                              shift_fsb, &current_ext,
+                                              &first_block, &free_list,
+                                              XFS_BMAP_MAX_SHIFT_EXTENTS);
+               if (error)
+                       goto out;
+
+               error = xfs_bmap_finish(&tp, &free_list, &committed);
+               if (error)
+                       goto out;
+
+               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+
+       return error;
+
+out:
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+}
+
 /*
  * We need to check that the format of the data fork in the temporary inode is
  * valid for the target inode before doing the swap. This is not a problem with
index 900747b25772c2b1a41821fba8e99c3cde2b3ffc..935ed2b24edfb05b4d5893dccf7cebdb09a374ed 100644 (file)
@@ -99,6 +99,8 @@ int   xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
                            xfs_off_t len);
 int    xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
                            xfs_off_t len);
+int    xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
+                               xfs_off_t len);
 
 /* EOF block manipulation functions */
 bool   xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
index 9adaae4f3e2fd21c647c9e7fa023a120e5012272..e80d59fdf89a8b222f05fd69e7bc970c7586c5b9 100644 (file)
@@ -234,8 +234,7 @@ xfs_btree_lblock_calc_crc(
                return;
        if (bip)
                block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        XFS_BTREE_LBLOCK_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
 }
 
 bool
@@ -243,8 +242,8 @@ xfs_btree_lblock_verify_crc(
        struct xfs_buf          *bp)
 {
        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                       XFS_BTREE_LBLOCK_CRC_OFF);
+               return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+
        return true;
 }
 
@@ -267,8 +266,7 @@ xfs_btree_sblock_calc_crc(
                return;
        if (bip)
                block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        XFS_BTREE_SBLOCK_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
 }
 
 bool
@@ -276,8 +274,8 @@ xfs_btree_sblock_verify_crc(
        struct xfs_buf          *bp)
 {
        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                       XFS_BTREE_SBLOCK_CRC_OFF);
+               return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+
        return true;
 }
 
index 9c061ef2b0d973c913a1baaee4a43bc27523b244..107f2fdfe41fb9ef0e6941bd60542ee12a0510e3 100644 (file)
@@ -396,7 +396,17 @@ _xfs_buf_map_pages(
                bp->b_addr = NULL;
        } else {
                int retried = 0;
+               unsigned noio_flag;
 
+               /*
+                * vm_map_ram() will allocate auxillary structures (e.g.
+                * pagetables) with GFP_KERNEL, yet we are likely to be under
+                * GFP_NOFS context here. Hence we need to tell memory reclaim
+                * that we are in such a context via PF_MEMALLOC_NOIO to prevent
+                * memory reclaim re-entering the filesystem here and
+                * potentially deadlocking.
+                */
+               noio_flag = memalloc_noio_save();
                do {
                        bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
                                                -1, PAGE_KERNEL);
@@ -404,6 +414,7 @@ _xfs_buf_map_pages(
                                break;
                        vm_unmap_aliases();
                } while (retried++ <= 1);
+               memalloc_noio_restore(noio_flag);
 
                if (!bp->b_addr)
                        return -ENOMEM;
index 995339534db6a4b65c6ec332055734f7415ca621..b8a3abf6cf475ee86a5d45f95d70a19a031d889f 100644 (file)
@@ -369,6 +369,20 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
        xfs_buf_rele(bp);
 }
 
+static inline int
+xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+       return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+                               cksum_offset);
+}
+
+static inline void
+xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+                        cksum_offset);
+}
+
 /*
  *     Handling of buftargs.
  */
index 33149113e333bd8b536d71c80b234b5d39fcb53e..8752821443bee039bd7d1f0691afaa9e4515da84 100644 (file)
@@ -796,20 +796,6 @@ xfs_buf_item_init(
                bip->bli_formats[i].blf_map_size = map_size;
        }
 
-#ifdef XFS_TRANS_DEBUG
-       /*
-        * Allocate the arrays for tracking what needs to be logged
-        * and what our callers request to be logged.  bli_orig
-        * holds a copy of the original, clean buffer for comparison
-        * against, and bli_logged keeps a 1 bit flag per byte in
-        * the buffer to indicate which bytes the callers have asked
-        * to have logged.
-        */
-       bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
-       memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
-       bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
-#endif
-
        /*
         * Put the buf item into the list of items attached to the
         * buffer at the front.
@@ -957,11 +943,6 @@ STATIC void
 xfs_buf_item_free(
        xfs_buf_log_item_t      *bip)
 {
-#ifdef XFS_TRANS_DEBUG
-       kmem_free(bip->bli_orig);
-       kmem_free(bip->bli_logged);
-#endif /* XFS_TRANS_DEBUG */
-
        xfs_buf_item_free_format(bip);
        kmem_zone_free(xfs_buf_item_zone, bip);
 }
index 796272a2e1298fca3a5cc08ef4c6b226d6eec15c..6cc5f6785a774045aa5290a530cba441a71d460f 100644 (file)
@@ -185,8 +185,8 @@ xfs_da3_node_write_verify(
        struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_da3_node_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -196,7 +196,7 @@ xfs_da3_node_write_verify(
        if (bip)
                hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
 }
 
 /*
@@ -209,18 +209,20 @@ static void
 xfs_da3_node_read_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_da_blkinfo   *info = bp->b_addr;
 
        switch (be16_to_cpu(info->magic)) {
                case XFS_DA3_NODE_MAGIC:
-                       if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                             XFS_DA3_NODE_CRC_OFF))
+                       if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
+                               xfs_buf_ioerror(bp, EFSBADCRC);
                                break;
+                       }
                        /* fall through */
                case XFS_DA_NODE_MAGIC:
-                       if (!xfs_da3_node_verify(bp))
+                       if (!xfs_da3_node_verify(bp)) {
+                               xfs_buf_ioerror(bp, EFSCORRUPTED);
                                break;
+                       }
                        return;
                case XFS_ATTR_LEAF_MAGIC:
                case XFS_ATTR3_LEAF_MAGIC:
@@ -237,8 +239,7 @@ xfs_da3_node_read_verify(
        }
 
        /* corrupt block */
-       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-       xfs_buf_ioerror(bp, EFSCORRUPTED);
+       xfs_verifier_error(bp);
 }
 
 const struct xfs_buf_ops xfs_da3_node_buf_ops = {
@@ -1295,7 +1296,7 @@ xfs_da3_fixhashpath(
                node = blk->bp->b_addr;
                dp->d_ops->node_hdr_from_disk(&nodehdr, node);
                btree = dp->d_ops->node_tree_p(node);
-               if (be32_to_cpu(btree->hashval) == lasthash)
+               if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
                        break;
                blk->hashval = lasthash;
                btree[blk->index].hashval = cpu_to_be32(lasthash);
index e5869b50dc41acd1788123e0cd49ea2670101e3f..623bbe8fd921d1fa8e3b994dabbf2014be5e7c6c 100644 (file)
@@ -89,6 +89,8 @@ typedef struct xfs_dinode {
        /* structure must be padded to 64 bit alignment */
 } xfs_dinode_t;
 
+#define XFS_DINODE_CRC_OFF     offsetof(struct xfs_dinode, di_crc)
+
 #define DI_MAX_FLUSH 0xffff
 
 /*
index ce16ef02997a9b27e77293033f84366919fe0e48..fda46253966a5cac252e593e14181da56f252700 100644 (file)
@@ -180,16 +180,23 @@ xfs_dir_init(
        xfs_inode_t     *dp,
        xfs_inode_t     *pdp)
 {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
        int             error;
 
-       memset((char *)&args, 0, sizeof(args));
-       args.dp = dp;
-       args.trans = tp;
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-       if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
+       error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+       if (error)
                return error;
-       return xfs_dir2_sf_create(&args, pdp->i_ino);
+
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->dp = dp;
+       args->trans = tp;
+       error = xfs_dir2_sf_create(args, pdp->i_ino);
+       kmem_free(args);
+       return error;
 }
 
 /*
@@ -205,41 +212,56 @@ xfs_dir_createname(
        xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
        xfs_extlen_t            total)          /* bmap's total block count */
 {
-       xfs_da_args_t           args;
+       struct xfs_da_args      *args;
        int                     rval;
        int                     v;              /* type-checking value */
 
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-       if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+       if (rval)
                return rval;
        XFS_STATS_INC(xs_dir_create);
 
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.inumber = inum;
-       args.dp = dp;
-       args.firstblock = first;
-       args.flist = flist;
-       args.total = total;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-       args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_addname(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_addname(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_addname(&args);
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = inum;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_addname(args);
        else
-               rval = xfs_dir2_node_addname(&args);
+               rval = xfs_dir2_node_addname(args);
+
+out_free:
+       kmem_free(args);
        return rval;
 }
 
@@ -282,46 +304,66 @@ xfs_dir_lookup(
        xfs_ino_t       *inum,          /* out: inode number */
        struct xfs_name *ci_name)       /* out: actual name if CI match */
 {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
 
        ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_lookup);
 
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.dp = dp;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-       args.op_flags = XFS_DA_OP_OKNOENT;
+       /*
+        * We need to use KM_NOFS here so that lockdep will not throw false
+        * positive deadlock warnings on a non-transactional lookup path. It is
+        * safe to recurse into inode recalim in that case, but lockdep can't
+        * easily be taught about it. Hence KM_NOFS avoids having to add more
+        * lockdep Doing this avoids having to add a bunch of lockdep class
+        * annotations into the reclaim path for the ilock.
+        */
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->dp = dp;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_OKNOENT;
        if (ci_name)
-               args.op_flags |= XFS_DA_OP_CILOOKUP;
+               args->op_flags |= XFS_DA_OP_CILOOKUP;
 
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_lookup(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_lookup(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_lookup(&args);
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_lookup(args);
+               goto out_check_rval;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_lookup(args);
+               goto out_check_rval;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_lookup(args);
        else
-               rval = xfs_dir2_node_lookup(&args);
+               rval = xfs_dir2_node_lookup(args);
+
+out_check_rval:
        if (rval == EEXIST)
                rval = 0;
        if (!rval) {
-               *inum = args.inumber;
+               *inum = args->inumber;
                if (ci_name) {
-                       ci_name->name = args.value;
-                       ci_name->len = args.valuelen;
+                       ci_name->name = args->value;
+                       ci_name->len = args->valuelen;
                }
        }
+out_free:
+       kmem_free(args);
        return rval;
 }
 
@@ -338,38 +380,51 @@ xfs_dir_removename(
        xfs_bmap_free_t *flist,         /* bmap's freeblock list */
        xfs_extlen_t    total)          /* bmap's total block count */
 {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
 
        ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_remove);
 
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.inumber = ino;
-       args.dp = dp;
-       args.firstblock = first;
-       args.flist = flist;
-       args.total = total;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_removename(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_removename(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_removename(&args);
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = ino;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_removename(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_removename(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_removename(args);
        else
-               rval = xfs_dir2_node_removename(&args);
+               rval = xfs_dir2_node_removename(args);
+out_free:
+       kmem_free(args);
        return rval;
 }
 
@@ -386,40 +441,54 @@ xfs_dir_replace(
        xfs_bmap_free_t *flist,         /* bmap's freeblock list */
        xfs_extlen_t    total)          /* bmap's total block count */
 {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
 
        ASSERT(S_ISDIR(dp->i_d.di_mode));
 
-       if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+       if (rval)
                return rval;
 
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.inumber = inum;
-       args.dp = dp;
-       args.firstblock = first;
-       args.flist = flist;
-       args.total = total;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_replace(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_replace(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_replace(&args);
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = inum;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_replace(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_replace(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_replace(args);
        else
-               rval = xfs_dir2_node_replace(&args);
+               rval = xfs_dir2_node_replace(args);
+out_free:
+       kmem_free(args);
        return rval;
 }
 
@@ -434,7 +503,7 @@ xfs_dir_canenter(
        struct xfs_name *name,          /* name of entry to add */
        uint            resblks)
 {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
 
@@ -443,29 +512,42 @@ xfs_dir_canenter(
 
        ASSERT(S_ISDIR(dp->i_d.di_mode));
 
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.dp = dp;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-       args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->dp = dp;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
                                                        XFS_DA_OP_OKNOENT;
 
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_addname(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_addname(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_addname(&args);
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_addname(args);
        else
-               rval = xfs_dir2_node_addname(&args);
+               rval = xfs_dir2_node_addname(args);
+out_free:
+       kmem_free(args);
        return rval;
 }
 
index 90cdbf4b5f1902f983f504b6b704cd2a433ddfbb..4f6a38cb83a4ee9f467896deabeefb059cbe8d10 100644 (file)
@@ -89,13 +89,14 @@ xfs_dir3_block_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_DATA_CRC_OFF)) ||
-           !xfs_dir3_block_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_block_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -107,8 +108,8 @@ xfs_dir3_block_write_verify(
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_dir3_block_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -118,7 +119,7 @@ xfs_dir3_block_write_verify(
        if (bip)
                hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
index 70acff4ee1739860a4ed8a311aa8ff2f2b33aafe..afa4ad523f3f0278cf7de73a93a25c3b8b191e1d 100644 (file)
@@ -241,7 +241,6 @@ static void
 xfs_dir3_data_reada_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_dir2_data_hdr *hdr = bp->b_addr;
 
        switch (hdr->magic) {
@@ -255,8 +254,8 @@ xfs_dir3_data_reada_verify(
                xfs_dir3_data_verify(bp);
                return;
        default:
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                break;
        }
 }
@@ -267,13 +266,14 @@ xfs_dir3_data_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_DATA_CRC_OFF)) ||
-           !xfs_dir3_data_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+                xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_data_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -285,8 +285,8 @@ xfs_dir3_data_write_verify(
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_dir3_data_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -296,7 +296,7 @@ xfs_dir3_data_write_verify(
        if (bip)
                hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
index ae47ec6e16c4031e50ba9ef63884ce216757d41e..d36e97df1187ebc866e9c3885d6f848ca3373c5a 100644 (file)
@@ -179,13 +179,14 @@ __read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_LEAF_CRC_OFF)) ||
-           !xfs_dir3_leaf_verify(bp, magic)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_leaf_verify(bp, magic))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -198,8 +199,8 @@ __write_verify(
        struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_dir3_leaf_verify(bp, magic)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -209,7 +210,7 @@ __write_verify(
        if (bip)
                hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
 }
 
 static void
index 48c7d18f68c3fb23a89a31a955fc9250fbd63108..cb434d732681a33e9e798278670b0b2ba99bdfbc 100644 (file)
@@ -115,13 +115,14 @@ xfs_dir3_free_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_FREE_CRC_OFF)) ||
-           !xfs_dir3_free_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_free_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -133,8 +134,8 @@ xfs_dir3_free_write_verify(
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_dir3_free_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -144,7 +145,7 @@ xfs_dir3_free_write_verify(
        if (bip)
                hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
index 7aeb4c895b3294e84731b44f6f75487d6a655c3e..868b19f096bfa412223b72ad6d75fd72c909647d 100644 (file)
@@ -615,7 +615,7 @@ xfs_qm_dqread(
 
        if (flags & XFS_QMOPT_DQALLOC) {
                tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm,
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
                                          XFS_QM_DQALLOC_SPACE_RES(mp), 0);
                if (error)
                        goto error1;
index d401457d2f258ac409dfbc4cbfb1af45c6bb07a2..610da81777374587e75c1333435590fe3651c92f 100644 (file)
@@ -257,10 +257,13 @@ xfs_dquot_buf_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
-       if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (!xfs_dquot_buf_verify_crc(mp, bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dquot_buf_verify(mp, bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 /*
@@ -275,8 +278,8 @@ xfs_dquot_buf_write_verify(
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
        if (!xfs_dquot_buf_verify(mp, bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 }
index 9995b807d627eb564da0747124359caa6141ee57..edac5b057d28790b5f0c6f767b12b1360a1488b7 100644 (file)
@@ -156,7 +156,7 @@ xfs_error_report(
 {
        if (level <= xfs_error_level) {
                xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
-               "Internal error %s at line %d of file %s.  Caller 0x%p",
+               "Internal error %s at line %d of file %s.  Caller %pF",
                            tag, linenum, filename, ra);
 
                xfs_stack_trace();
@@ -178,3 +178,28 @@ xfs_corruption_error(
        xfs_error_report(tag, level, mp, filename, linenum, ra);
        xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
 }
+
+/*
+ * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid
+ * values, and omit the stack trace unless the error level is tuned high.
+ */
+void
+xfs_verifier_error(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+
+       xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+                 bp->b_error == EFSBADCRC ? "CRC error" : "corruption",
+                 __return_address, bp->b_bn);
+
+       xfs_alert(mp, "Unmount and run xfs_repair");
+
+       if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+               xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
+               xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
+       }
+
+       if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+               xfs_stack_trace();
+}
index 079a367f44eeb6f67c16666c695a5fd3a246637d..c1c57d4a4b5db6f37a626d7e78a48f32facf8dcd 100644 (file)
@@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
 extern void xfs_corruption_error(const char *tag, int level,
                        struct xfs_mount *mp, void *p, const char *filename,
                        int linenum, inst_t *ra);
+extern void xfs_verifier_error(struct xfs_buf *bp);
 
 #define        XFS_ERROR_REPORT(e, lvl, mp)    \
        xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
index 64b48eade91d14c79408b6863f199e9181350f81..f7abff8c16ca7361d3e32f1842ae1daed15e651e 100644 (file)
@@ -823,7 +823,8 @@ xfs_file_fallocate(
 
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
                return -EOPNOTSUPP;
 
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@ xfs_file_fallocate(
                error = xfs_free_file_space(ip, offset, len);
                if (error)
                        goto out_unlock;
+       } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+               if (offset & blksize_mask || len & blksize_mask) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+
+               ASSERT(offset + len < i_size_read(inode));
+               new_size = i_size_read(inode) - len;
+
+               error = xfs_collapse_file_space(ip, offset, len);
+               if (error)
+                       goto out_unlock;
        } else {
                if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                    offset + len > i_size_read(inode)) {
@@ -840,8 +855,11 @@ xfs_file_fallocate(
                                goto out_unlock;
                }
 
-               error = xfs_alloc_file_space(ip, offset, len,
-                                            XFS_BMAPI_PREALLOC);
+               if (mode & FALLOC_FL_ZERO_RANGE)
+                       error = xfs_zero_file_space(ip, offset, len);
+               else
+                       error = xfs_alloc_file_space(ip, offset, len,
+                                                    XFS_BMAPI_PREALLOC);
                if (error)
                        goto out_unlock;
        }
@@ -859,7 +877,7 @@ xfs_file_fallocate(
        if (ip->i_d.di_mode & S_IXGRP)
                ip->i_d.di_mode &= ~S_ISGID;
 
-       if (!(mode & FALLOC_FL_PUNCH_HOLE))
+       if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
                ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
 
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
index b6ab5a3cfa125d2204d19760dccac917d0ad957f..9898f31d05d8c2f7096647b8750ede07808b5a54 100644 (file)
@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr {
        __be64  sl_lsn;
 };
 
+#define XFS_SYMLINK_CRC_OFF    offsetof(struct xfs_dsymlink_hdr, sl_crc)
+
 /*
  * The maximum pathlen is 1024 bytes. Since the minimum file system
  * blocksize is 512 bytes, we can get a max of 3 extents back from
index 5d7f105a1c82e932175ce61b3dfce66b2282966e..8f711db61a0c2148ecfd98c22bed1509fa437e11 100644 (file)
@@ -363,6 +363,18 @@ xfs_ialloc_ag_alloc(
                args.minleft = args.mp->m_in_maxlevels - 1;
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
+
+               /*
+                * This request might have dirtied the transaction if the AG can
+                * satisfy the request, but the exact block was not available.
+                * If the allocation did fail, subsequent requests will relax
+                * the exact agbno requirement and increase the alignment
+                * instead. It is critical that the total size of the request
+                * (len + alignment + slop) does not increase from this point
+                * on, so reset minalignslop to ensure it is not included in
+                * subsequent requests.
+                */
+               args.minalignslop = 0;
        } else
                args.fsbno = NULLFSBLOCK;
 
@@ -1568,18 +1580,17 @@ xfs_agi_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-       int             agi_ok = 1;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         offsetof(struct xfs_agi, agi_crc));
-       agi_ok = agi_ok && xfs_agi_verify(bp);
 
-       if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
-                       XFS_RANDOM_IALLOC_READ_AGI))) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
+                               XFS_ERRTAG_IALLOC_READ_AGI,
+                               XFS_RANDOM_IALLOC_READ_AGI))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -1590,8 +1601,8 @@ xfs_agi_write_verify(
        struct xfs_buf_log_item *bip = bp->b_fspriv;
 
        if (!xfs_agi_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -1600,8 +1611,7 @@ xfs_agi_write_verify(
 
        if (bip)
                XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_agi, agi_crc));
+       xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_agi_buf_ops = {
index c8fa5bbb36de3163b9fc798fc80025815f46182e..7e309b11e87d75240cfdb41428d1f13dc7d470b8 100644 (file)
@@ -243,12 +243,14 @@ static void
 xfs_inobt_read_verify(
        struct xfs_buf  *bp)
 {
-       if (!(xfs_btree_sblock_verify_crc(bp) &&
-             xfs_inobt_verify(bp))) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_inobt_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
        }
 }
 
@@ -258,9 +260,9 @@ xfs_inobt_write_verify(
 {
        if (!xfs_inobt_verify(bp)) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
        }
        xfs_btree_sblock_calc_crc(bp);
 
index 3a137e9f9a7dac3fb72f1687bf086b1273d83890..5e7a38fa6ee6bd82e43c05f66cdc6d4b8404225e 100644 (file)
@@ -42,7 +42,6 @@
 #include "xfs_bmap_util.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_dinode.h"
 #include "xfs_filestream.h"
 #include "xfs_cksum.h"
 #include "xfs_trace.h"
@@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone;
 
 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
 
+STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
+
 /*
  * helper function to extract extent size hint from inode
  */
@@ -1115,7 +1116,7 @@ xfs_bumplink(
 {
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
-       ASSERT(ip->i_d.di_nlink > 0);
+       ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
        ip->i_d.di_nlink++;
        inc_nlink(VFS_I(ip));
        if ((ip->i_d.di_version == 1) &&
@@ -1165,10 +1166,7 @@ xfs_create(
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
 
-       if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-               prid = xfs_get_projid(dp);
-       else
-               prid = XFS_PROJID_DEFAULT;
+       prid = xfs_get_initial_prid(dp);
 
        /*
         * Make sure that we have allocated dquot(s) on disk.
@@ -1332,6 +1330,113 @@ xfs_create(
        return error;
 }
 
+int
+xfs_create_tmpfile(
+       struct xfs_inode        *dp,
+       struct dentry           *dentry,
+       umode_t                 mode)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_inode        *ip = NULL;
+       struct xfs_trans        *tp = NULL;
+       int                     error;
+       uint                    cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+       prid_t                  prid;
+       struct xfs_dquot        *udqp = NULL;
+       struct xfs_dquot        *gdqp = NULL;
+       struct xfs_dquot        *pdqp = NULL;
+       struct xfs_trans_res    *tres;
+       uint                    resblks;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       prid = xfs_get_initial_prid(dp);
+
+       /*
+        * Make sure that we have allocated dquot(s) on disk.
+        */
+       error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
+                               xfs_kgid_to_gid(current_fsgid()), prid,
+                               XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+                               &udqp, &gdqp, &pdqp);
+       if (error)
+               return error;
+
+       resblks = XFS_IALLOC_SPACE_RES(mp);
+       tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
+
+       tres = &M_RES(mp)->tr_create_tmpfile;
+       error = xfs_trans_reserve(tp, tres, resblks, 0);
+       if (error == ENOSPC) {
+               /* No space at all so try a "no-allocation" reservation */
+               resblks = 0;
+               error = xfs_trans_reserve(tp, tres, 0, 0);
+       }
+       if (error) {
+               cancel_flags = 0;
+               goto out_trans_cancel;
+       }
+
+       error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
+                                               pdqp, resblks, 1, 0);
+       if (error)
+               goto out_trans_cancel;
+
+       error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
+                               prid, resblks > 0, &ip, NULL);
+       if (error) {
+               if (error == ENOSPC)
+                       goto out_trans_cancel;
+               goto out_trans_abort;
+       }
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+
+       /*
+        * Attach the dquot(s) to the inodes and modify them incore.
+        * These ids of the inode couldn't have changed since the new
+        * inode has been locked ever since it was created.
+        */
+       xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+
+       ip->i_d.di_nlink--;
+       d_tmpfile(dentry, VFS_I(ip));
+       error = xfs_iunlink(tp, ip);
+       if (error)
+               goto out_trans_abort;
+
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       if (error)
+               goto out_release_inode;
+
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       xfs_qm_dqrele(pdqp);
+
+       return 0;
+
+ out_trans_abort:
+       cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
+       xfs_trans_cancel(tp, cancel_flags);
+ out_release_inode:
+       /*
+        * Wait until after the current transaction is aborted to
+        * release the inode.  This prevents recursive transactions
+        * and deadlocks from xfs_inactive.
+        */
+       if (ip)
+               IRELE(ip);
+
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       xfs_qm_dqrele(pdqp);
+
+       return error;
+}
+
 int
 xfs_link(
        xfs_inode_t             *tdp,
@@ -1397,6 +1502,12 @@ xfs_link(
 
        xfs_bmap_init(&free_list, &first_block);
 
+       if (sip->i_d.di_nlink == 0) {
+               error = xfs_iunlink_remove(tp, sip);
+               if (error)
+                       goto abort_return;
+       }
+
        error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
                                        &first_block, &free_list, resblks);
        if (error)
index 65e2350f449c9c4c086b8e6cd0de1f07f82c7fc8..396cc1fafd0d5e358c5ea8ccc21d3525d2396bfa 100644 (file)
@@ -20,6 +20,7 @@
 
 #include "xfs_inode_buf.h"
 #include "xfs_inode_fork.h"
+#include "xfs_dinode.h"
 
 /*
  * Kernel only inode definitions
@@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip,
        ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
 }
 
+static inline prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+       if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+               return xfs_get_projid(dp);
+
+       return XFS_PROJID_DEFAULT;
+}
+
 /*
  * In-core inode flags.
  */
@@ -323,6 +333,8 @@ int         xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                           struct xfs_inode **ipp, struct xfs_name *ci_name);
 int            xfs_create(struct xfs_inode *dp, struct xfs_name *name,
                           umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
+int            xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
+                          umode_t mode);
 int            xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                           struct xfs_inode *ip);
 int            xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
index 4fc9f39dd89e7b8ed64e271ca6ced6bc43a191f6..24e993996bdcf90f9f955ed139d2783840082927 100644 (file)
@@ -102,8 +102,7 @@ xfs_inode_buf_verify(
                        }
 
                        xfs_buf_ioerror(bp, EFSCORRUPTED);
-                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
-                                            mp, dip);
+                       xfs_verifier_error(bp);
 #ifdef DEBUG
                        xfs_alert(mp,
                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -306,7 +305,7 @@ xfs_dinode_verify(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return false;
        if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             offsetof(struct xfs_dinode, di_crc)))
+                             XFS_DINODE_CRC_OFF))
                return false;
        if (be64_to_cpu(dip->di_ino) != ip->i_ino)
                return false;
@@ -327,7 +326,7 @@ xfs_dinode_calc_crc(
 
        ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
        crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             offsetof(struct xfs_dinode, di_crc));
+                             XFS_DINODE_CRC_OFF);
        dip->di_crc = xfs_end_cksum(crc);
 }
 
index 22d1cbea283d4734515218ef65b23ec78bfdeff6..3b80ebae05f52eb7d3593b0df92ec834f319948c 100644 (file)
@@ -128,7 +128,6 @@ xfs_iomap_write_direct(
        xfs_fsblock_t   firstfsb;
        xfs_extlen_t    extsz, temp;
        int             nimaps;
-       int             bmapi_flag;
        int             quota_flag;
        int             rt;
        xfs_trans_t     *tp;
@@ -200,18 +199,15 @@ xfs_iomap_write_direct(
 
        xfs_trans_ijoin(tp, ip, 0);
 
-       bmapi_flag = 0;
-       if (offset < XFS_ISIZE(ip) || extsz)
-               bmapi_flag |= XFS_BMAPI_PREALLOC;
-
        /*
         * From this point onwards we overwrite the imap pointer that the
         * caller gave to us.
         */
        xfs_bmap_init(&free_list, &firstfsb);
        nimaps = 1;
-       error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
-                               &firstfsb, 0, imap, &nimaps, &free_list);
+       error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
+                               XFS_BMAPI_PREALLOC, &firstfsb, 0,
+                               imap, &nimaps, &free_list);
        if (error)
                goto out_bmap_cancel;
 
index 9ddfb8190ca1cd56b5f1cd2247e9f54925fce68f..89b07e43ca28811349db39aa1ab2534de220fd87 100644 (file)
@@ -39,6 +39,7 @@
 #include "xfs_da_btree.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_dinode.h"
+#include "xfs_trans_space.h"
 
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/fiemap.h>
 #include <linux/slab.h>
 
+/*
+ * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * files. This is due to readdir potentially triggering page faults on a user
+ * buffer inside filldir(), and this happens with the ilock on the directory
+ * held. For regular files, the lock order is the other way around - the
+ * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * block mapping. Hence we need a different class for the directory ilock so
+ * that lockdep can tell them apart.
+ */
+static struct lock_class_key xfs_nondir_ilock_class;
+static struct lock_class_key xfs_dir_ilock_class;
+
 static int
 xfs_initxattrs(
        struct inode            *inode,
@@ -1034,6 +1047,19 @@ xfs_vn_fiemap(
        return 0;
 }
 
+STATIC int
+xfs_vn_tmpfile(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       umode_t         mode)
+{
+       int             error;
+
+       error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
+
+       return -error;
+}
+
 static const struct inode_operations xfs_inode_operations = {
        .get_acl                = xfs_get_acl,
        .set_acl                = xfs_set_acl,
@@ -1072,6 +1098,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
+       .tmpfile                = xfs_vn_tmpfile,
 };
 
 static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1099,6 +1126,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
+       .tmpfile                = xfs_vn_tmpfile,
 };
 
 static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1191,6 +1219,7 @@ xfs_setup_inode(
        xfs_diflags_to_iflags(inode, ip);
 
        ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+       lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
                inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1227,7 @@ xfs_setup_inode(
                inode->i_mapping->a_ops = &xfs_address_space_operations;
                break;
        case S_IFDIR:
+               lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
                if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
                        inode->i_op = &xfs_dir_ci_inode_operations;
                else
index f9bb590acc0ebfd38a4aaaee3baaa5b0bf6ec505..825249d2dfc1a740b6c5523ac1808af53776efb0 100644 (file)
@@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t;
 #include "xfs_iops.h"
 #include "xfs_aops.h"
 #include "xfs_super.h"
+#include "xfs_cksum.h"
 #include "xfs_buf.h"
 #include "xfs_message.h"
 
@@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t;
 #define ENOATTR                ENODATA         /* Attribute not found */
 #define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
 #define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
+#define EFSBADCRC      EBADMSG         /* Bad CRC detected */
 
 #define SYNCHRONIZE()  barrier()
 #define __return_address __builtin_return_address(0)
index b0f4ef77fa70bd698749e2509e64d79f782fa8aa..2c4004475e71af25dbfa4a6e853af5cfd09bac75 100644 (file)
@@ -175,7 +175,7 @@ void          xlog_iodone(struct xfs_buf *);
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 void     xfs_log_ticket_put(struct xlog_ticket *ticket);
 
-int    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+void   xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                xfs_lsn_t *commit_lsn, int flags);
 bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 
index 4ef6fdbced78b7e99680f9e4e8afbb502a7bce92..7e54553911764e556637855bea0d4dcfdc3e50fd 100644 (file)
@@ -498,13 +498,6 @@ xlog_cil_push(
        new_ctx->cil = cil;
        cil->xc_ctx = new_ctx;
 
-       /*
-        * mirror the new sequence into the cil structure so that we can do
-        * unlocked checks against the current sequence in log forces without
-        * risking deferencing a freed context pointer.
-        */
-       cil->xc_current_sequence = new_ctx->sequence;
-
        /*
         * The switch is now done, so we can drop the context lock and move out
         * of a shared context. We can't just go straight to the commit record,
@@ -523,8 +516,15 @@ xlog_cil_push(
         * Hence we need to add this context to the committing context list so
         * that higher sequences will wait for us to write out a commit record
         * before they do.
+        *
+        * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+        * structure atomically with the addition of this sequence to the
+        * committing list. This also ensures that we can do unlocked checks
+        * against the current sequence in log forces without risking
+        * deferencing a freed context pointer.
         */
        spin_lock(&cil->xc_push_lock);
+       cil->xc_current_sequence = new_ctx->sequence;
        list_add(&ctx->committing, &cil->xc_committing);
        spin_unlock(&cil->xc_push_lock);
        up_write(&cil->xc_ctx_lock);
@@ -662,8 +662,14 @@ xlog_cil_push_background(
 
 }
 
+/*
+ * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
+ * number that is passed. When it returns, the work will be queued for
+ * @push_seq, but it won't be completed. The caller is expected to do any
+ * waiting for push_seq to complete if it is required.
+ */
 static void
-xlog_cil_push_foreground(
+xlog_cil_push_now(
        struct xlog     *log,
        xfs_lsn_t       push_seq)
 {
@@ -688,10 +694,8 @@ xlog_cil_push_foreground(
        }
 
        cil->xc_push_seq = push_seq;
+       queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
        spin_unlock(&cil->xc_push_lock);
-
-       /* do the push now */
-       xlog_cil_push(log);
 }
 
 bool
@@ -721,7 +725,7 @@ xlog_cil_empty(
  * background commit, returns without it held once background commits are
  * allowed again.
  */
-int
+void
 xfs_log_commit_cil(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
@@ -767,7 +771,6 @@ xfs_log_commit_cil(
        xlog_cil_push_background(log);
 
        up_read(&cil->xc_ctx_lock);
-       return 0;
 }
 
 /*
@@ -796,7 +799,8 @@ xlog_cil_force_lsn(
         * xlog_cil_push() handles racing pushes for the same sequence,
         * so no need to deal with it here.
         */
-       xlog_cil_push_foreground(log, sequence);
+restart:
+       xlog_cil_push_now(log, sequence);
 
        /*
         * See if we can find a previous sequence still committing.
@@ -804,7 +808,6 @@ xlog_cil_force_lsn(
         * before allowing the force of push_seq to go ahead. Hence block
         * on commits for those as well.
         */
-restart:
        spin_lock(&cil->xc_push_lock);
        list_for_each_entry(ctx, &cil->xc_committing, committing) {
                if (ctx->sequence > sequence)
@@ -822,6 +825,28 @@ restart:
                /* found it! */
                commit_lsn = ctx->commit_lsn;
        }
+
+       /*
+        * The call to xlog_cil_push_now() executes the push in the background.
+        * Hence by the time we have got here it our sequence may not have been
+        * pushed yet. This is true if the current sequence still matches the
+        * push sequence after the above wait loop and the CIL still contains
+        * dirty objects.
+        *
+        * When the push occurs, it will empty the CIL and
+        * atomically increment the currect sequence past the push sequence and
+        * move it into the committing list. Of course, if the CIL is clean at
+        * the time of the push, it won't have pushed the CIL at all, so in that
+        * case we should try the push for this sequence again from the start
+        * just in case.
+        */
+
+       if (sequence == cil->xc_current_sequence &&
+           !list_empty(&cil->xc_cil)) {
+               spin_unlock(&cil->xc_push_lock);
+               goto restart;
+       }
+
        spin_unlock(&cil->xc_push_lock);
        return commit_lsn;
 }
index f96c05669a9e06298980bd14e377e34536d0119b..993cb19e7d390e03220f265e9ba97e0f0c8dc7fa 100644 (file)
@@ -314,6 +314,9 @@ reread:
                error = bp->b_error;
                if (loud)
                        xfs_warn(mp, "SB validate failed with error %d.", error);
+               /* bad CRC means corrupted metadata */
+               if (error == EFSBADCRC)
+                       error = EFSCORRUPTED;
                goto release_buf;
        }
 
index a6a76b2b6a85db9ece8acb0565e82e310319ec9d..ec5ca65c62116e62d509b987c92c983e79d3cbb6 100644 (file)
@@ -842,7 +842,7 @@ xfs_growfs_rt_alloc(
                /*
                 * Reserve space & log for one extent added to the file.
                 */
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
                                          resblks, 0);
                if (error)
                        goto error_cancel;
index 1e116794bb6622d686487f36a461f8f644fd27c2..0c0e41bbe4e369d7bf5267ed906e59159a4e2ff7 100644 (file)
@@ -288,6 +288,7 @@ xfs_mount_validate_sb(
            sbp->sb_inodelog < XFS_DINODE_MIN_LOG                       ||
            sbp->sb_inodelog > XFS_DINODE_MAX_LOG                       ||
            sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
+           sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
            (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
            (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
            (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)  ||
@@ -610,12 +611,11 @@ xfs_sb_read_verify(
                                                XFS_SB_VERSION_5) ||
             dsb->sb_crc != 0)) {
 
-               if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                     offsetof(struct xfs_sb, sb_crc))) {
+               if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
                        /* Only fail bad secondaries on a known V5 filesystem */
                        if (bp->b_bn == XFS_SB_DADDR ||
                            xfs_sb_version_hascrc(&mp->m_sb)) {
-                               error = EFSCORRUPTED;
+                               error = EFSBADCRC;
                                goto out_error;
                        }
                }
@@ -624,10 +624,9 @@ xfs_sb_read_verify(
 
 out_error:
        if (error) {
-               if (error == EFSCORRUPTED)
-                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                            mp, bp->b_addr);
                xfs_buf_ioerror(bp, error);
+               if (error == EFSCORRUPTED || error == EFSBADCRC)
+                       xfs_verifier_error(bp);
        }
 }
 
@@ -662,9 +661,8 @@ xfs_sb_write_verify(
 
        error = xfs_sb_verify(bp, false);
        if (error) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    mp, bp->b_addr);
                xfs_buf_ioerror(bp, error);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -674,8 +672,7 @@ xfs_sb_write_verify(
        if (bip)
                XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_sb, sb_crc));
+       xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_sb_buf_ops = {
index 35061d4b614c7ab9fabb80e1b93ffb6bc8b586d2..f7b2fe77c5a5bf095a8b01e793fda242c2968cc4 100644 (file)
@@ -182,6 +182,8 @@ typedef struct xfs_sb {
        /* must be padded to 64 bit alignment */
 } xfs_sb_t;
 
+#define XFS_SB_CRC_OFF         offsetof(struct xfs_sb, sb_crc)
+
 /*
  * Superblock - on disk version.  Must match the in core version above.
  * Must be padded to 64 bit alignment.
index 8c5035a13df1a0fd6033a8f222703a9ff50e20c5..4484e515139507fe9e69912da6eb9df6c50e7402 100644 (file)
@@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
 #define        XFS_TRANS_SB_COUNT              41
 #define        XFS_TRANS_CHECKPOINT            42
 #define        XFS_TRANS_ICREATE               43
-#define        XFS_TRANS_TYPE_MAX              43
+#define        XFS_TRANS_CREATE_TMPFILE        44
+#define        XFS_TRANS_TYPE_MAX              44
 /* new transaction types need to be reflected in xfs_logprint(8) */
 
 #define XFS_TRANS_TYPES \
@@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
        { XFS_TRANS_SETATTR_SIZE,       "SETATTR_SIZE" }, \
        { XFS_TRANS_INACTIVE,           "INACTIVE" }, \
        { XFS_TRANS_CREATE,             "CREATE" }, \
+       { XFS_TRANS_CREATE_TMPFILE,     "CREATE_TMPFILE" }, \
        { XFS_TRANS_CREATE_TRUNC,       "CREATE_TRUNC" }, \
        { XFS_TRANS_TRUNCATE_FILE,      "TRUNCATE_FILE" }, \
        { XFS_TRANS_REMOVE,             "REMOVE" }, \
index 14e58f2c96bd71708f1c608536b835b25f05e795..52979aa90986ca3a1a6caada967a460567245265 100644 (file)
@@ -80,6 +80,10 @@ xfs_readlink_bmap(
                if (error) {
                        xfs_buf_ioerror_alert(bp, __func__);
                        xfs_buf_relse(bp);
+
+                       /* bad CRC means corrupted metadata */
+                       if (error == EFSBADCRC)
+                               error = EFSCORRUPTED;
                        goto out;
                }
                byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -208,10 +212,7 @@ xfs_symlink(
                return XFS_ERROR(ENAMETOOLONG);
 
        udqp = gdqp = NULL;
-       if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-               prid = xfs_get_projid(dp);
-       else
-               prid = XFS_PROJID_DEFAULT;
+       prid = xfs_get_initial_prid(dp);
 
        /*
         * Make sure that we have allocated dquot(s) on disk.
index bf59a2b45f8c40c431de3e8f52f3131d80d68a1c..9b32052ff65e771c55814afa8dcb26a8d2f0e42c 100644 (file)
@@ -133,12 +133,13 @@ xfs_symlink_read_verify(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
 
-       if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                 offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
-           !xfs_symlink_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_symlink_verify(bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
 }
 
 static void
@@ -153,8 +154,8 @@ xfs_symlink_write_verify(
                return;
 
        if (!xfs_symlink_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                return;
        }
 
@@ -162,8 +163,7 @@ xfs_symlink_write_verify(
                struct xfs_dsymlink_hdr *dsl = bp->b_addr;
                dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
        }
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_dsymlink_hdr, sl_crc));
+       xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
 }
 
 const struct xfs_buf_ops xfs_symlink_buf_ops = {
index 425dfa45b9a087472676cf4f832138316d4b0fa4..a4ae41c179a8a66a5772914a61642b8a53be1c4b 100644 (file)
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
 DEFINE_INODE_EVENT(xfs_inactive_symlink);
 DEFINE_INODE_EVENT(xfs_alloc_file_space);
 DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_collapse_file_space);
 DEFINE_INODE_EVENT(xfs_readdir);
 #ifdef CONFIG_XFS_POSIX_ACL
 DEFINE_INODE_EVENT(xfs_get_acl);
index c812c5c060de1caa7532f1cdcc63766a4a227207..54a57326d85b1e7b2fc84b5761773453a6c02eaf 100644 (file)
@@ -887,12 +887,7 @@ xfs_trans_commit(
                xfs_trans_apply_sb_deltas(tp);
        xfs_trans_apply_dquot_deltas(tp);
 
-       error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
-       if (error == ENOMEM) {
-               xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
-               error = XFS_ERROR(EIO);
-               goto out_unreserve;
-       }
+       xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
 
        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
        xfs_trans_free(tp);
@@ -902,10 +897,7 @@ xfs_trans_commit(
         * log out now and wait for it.
         */
        if (sync) {
-               if (!error) {
-                       error = _xfs_log_force_lsn(mp, commit_lsn,
-                                     XFS_LOG_SYNC, NULL);
-               }
+               error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
                XFS_STATS_INC(xs_trans_sync);
        } else {
                XFS_STATS_INC(xs_trans_async);
index 647b6f1d8923fee484ada5eea3e2e68d47df8286..b8eef0549f3f9a39cc68d06cf7a9cbcb04af469f 100644 (file)
@@ -275,6 +275,10 @@ xfs_trans_read_buf_map(
                        XFS_BUF_UNDONE(bp);
                        xfs_buf_stale(bp);
                        xfs_buf_relse(bp);
+
+                       /* bad CRC means corrupted metadata */
+                       if (error == EFSBADCRC)
+                               error = EFSCORRUPTED;
                        return error;
                }
 #ifdef DEBUG
@@ -338,6 +342,9 @@ xfs_trans_read_buf_map(
                                if (tp->t_flags & XFS_TRANS_DIRTY)
                                        xfs_force_shutdown(tp->t_mountp,
                                                        SHUTDOWN_META_IO_ERROR);
+                               /* bad CRC means corrupted metadata */
+                               if (error == EFSBADCRC)
+                                       error = EFSCORRUPTED;
                                return error;
                        }
                }
@@ -375,6 +382,10 @@ xfs_trans_read_buf_map(
                if (tp->t_flags & XFS_TRANS_DIRTY)
                        xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
                xfs_buf_relse(bp);
+
+               /* bad CRC means corrupted metadata */
+               if (error == EFSBADCRC)
+                       error = EFSCORRUPTED;
                return error;
        }
 #ifdef DEBUG
index 2ffd3e331b496ae3f366b6669e3207f15becbd4f..ae368165244d49458bd4db2cc4688d196f2e2098 100644 (file)
@@ -81,20 +81,28 @@ xfs_calc_buf_res(
  * on disk. Hence we need an inode reservation function that calculates all this
  * correctly. So, we log:
  *
- * - log op headers for object
+ * - 4 log op headers for object
+ *     - for the ilf, the inode core and 2 forks
  * - inode log format object
- * - the entire inode contents (core + 2 forks)
- * - two bmap btree block headers
+ * - the inode core
+ * - two inode forks containing bmap btree root blocks.
+ *     - the btree data contained by both forks will fit into the inode size,
+ *       hence when combined with the inode core above, we have a total of the
+ *       actual inode size.
+ *     - the BMBT headers need to be accounted separately, as they are
+ *       additional to the records and pointers that fit inside the inode
+ *       forks.
  */
 STATIC uint
 xfs_calc_inode_res(
        struct xfs_mount        *mp,
        uint                    ninodes)
 {
-       return ninodes * (sizeof(struct xlog_op_header) +
-                         sizeof(struct xfs_inode_log_format) +
-                         mp->m_sb.sb_inodesize +
-                         2 * XFS_BMBT_BLOCK_LEN(mp));
+       return ninodes *
+               (4 * sizeof(struct xlog_op_header) +
+                sizeof(struct xfs_inode_log_format) +
+                mp->m_sb.sb_inodesize +
+                2 * XFS_BMBT_BLOCK_LEN(mp));
 }
 
 /*
@@ -203,6 +211,19 @@ xfs_calc_rename_reservation(
                                      XFS_FSB_TO_B(mp, 1))));
 }
 
+/*
+ * For removing an inode from unlinked list at first, we can modify:
+ *    the agi hash list and counters: sector size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ */
+STATIC uint
+xfs_calc_iunlink_remove_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+              max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+}
+
 /*
  * For creating a link to an inode:
  *    the parent directory inode: inode size
@@ -220,6 +241,7 @@ xfs_calc_link_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_iunlink_remove_reservation(mp) +
                MAX((xfs_calc_inode_res(mp, 2) +
                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                      XFS_FSB_TO_B(mp, 1))),
@@ -228,6 +250,18 @@ xfs_calc_link_reservation(
                                      XFS_FSB_TO_B(mp, 1))));
 }
 
+/*
+ * For adding an inode to unlinked list we can modify:
+ *    the agi hash list: sector size
+ *    the unlinked inode: inode size
+ */
+STATIC uint
+xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               xfs_calc_inode_res(mp, 1);
+}
+
 /*
  * For removing a directory entry we can modify:
  *    the parent directory inode: inode size
@@ -245,10 +279,11 @@ xfs_calc_remove_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_inode_res(mp, 2) +
+               xfs_calc_iunlink_add_reservation(mp) +
+               MAX((xfs_calc_inode_res(mp, 1) +
                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                      XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+                   (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
                                      XFS_FSB_TO_B(mp, 1))));
 }
@@ -343,6 +378,20 @@ xfs_calc_create_reservation(
 
 }
 
+STATIC uint
+xfs_calc_create_tmpfile_reservation(
+       struct xfs_mount        *mp)
+{
+       uint    res = XFS_DQUOT_LOGRES(mp);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               res += xfs_calc_icreate_resv_alloc(mp);
+       else
+               res += xfs_calc_create_resv_alloc(mp);
+
+       return res + xfs_calc_iunlink_add_reservation(mp);
+}
+
 /*
  * Making a new directory is the same as creating a new file.
  */
@@ -383,9 +432,9 @@ xfs_calc_ifree_reservation(
 {
        return XFS_DQUOT_LOGRES(mp) +
                xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
                xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-               max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) +
+               xfs_calc_iunlink_remove_reservation(mp) +
                xfs_calc_buf_res(1, 0) +
                xfs_calc_buf_res(2 + mp->m_ialloc_blks +
                                 mp->m_in_maxlevels, 0) +
@@ -644,15 +693,14 @@ xfs_calc_qm_setqlim_reservation(
 
 /*
  * Allocating quota on disk if needed.
- *     the write transaction log space: M_RES(mp)->tr_write.tr_logres
+ *     the write transaction log space for quota file extent allocation
  *     the unit of quota allocation: one system block size
  */
 STATIC uint
 xfs_calc_qm_dqalloc_reservation(
        struct xfs_mount        *mp)
 {
-       ASSERT(M_RES(mp)->tr_write.tr_logres);
-       return M_RES(mp)->tr_write.tr_logres +
+       return xfs_calc_write_reservation(mp) +
                xfs_calc_buf_res(1,
                        XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
 }
@@ -729,6 +777,11 @@ xfs_trans_resv_calc(
        resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
        resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
+       resp->tr_create_tmpfile.tr_logres =
+                       xfs_calc_create_tmpfile_reservation(mp);
+       resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
+       resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
        resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
        resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
        resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -784,7 +837,6 @@ xfs_trans_resv_calc(
        /* The following transaction are logged in logical format */
        resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
        resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
-       resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
        resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
        resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
        resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
index de7de9aaad8a7bef18a18ca54ecc2a909d0ef466..1097d14cd583f974559b33c170467220c8cae2c7 100644 (file)
@@ -38,11 +38,11 @@ struct xfs_trans_resv {
        struct xfs_trans_res    tr_remove;      /* unlink trans */
        struct xfs_trans_res    tr_symlink;     /* symlink trans */
        struct xfs_trans_res    tr_create;      /* create trans */
+       struct xfs_trans_res    tr_create_tmpfile; /* create O_TMPFILE trans */
        struct xfs_trans_res    tr_mkdir;       /* mkdir trans */
        struct xfs_trans_res    tr_ifree;       /* inode free trans */
        struct xfs_trans_res    tr_ichange;     /* inode update trans */
        struct xfs_trans_res    tr_growdata;    /* fs data section grow trans */
-       struct xfs_trans_res    tr_swrite;      /* sync write inode trans */
        struct xfs_trans_res    tr_addafork;    /* add inode attr fork trans */
        struct xfs_trans_res    tr_writeid;     /* write setuid/setgid file */
        struct xfs_trans_res    tr_attrinval;   /* attr fork buffer
@@ -100,6 +100,7 @@ struct xfs_trans_resv {
 #define        XFS_ITRUNCATE_LOG_COUNT         2
 #define XFS_INACTIVE_LOG_COUNT         2
 #define        XFS_CREATE_LOG_COUNT            2
+#define        XFS_CREATE_TMPFILE_LOG_COUNT    2
 #define        XFS_MKDIR_LOG_COUNT             3
 #define        XFS_SYMLINK_LOG_COUNT           3
 #define        XFS_REMOVE_LOG_COUNT            2
index ea80f1cdff064b7c3bf330d186b9954a06985f9b..81048f9bc7837e3ce32fb12dddf158a09fbaf302 100644 (file)
@@ -2550,6 +2550,9 @@ enum {
 
        /* filesystem does not support filling holes */
        DIO_SKIP_HOLES  = 0x02,
+
+       /* filesystem can handle aio writes beyond i_size */
+       DIO_ASYNC_EXTEND = 0x04,
 };
 
 void dio_end_io(struct bio *bio, int error);
index 990c4ccf8b61d575a6db2a787896b027e4789698..d1197ae3723ce5193b0526ea8dfa0511d39c8407 100644 (file)
@@ -5,5 +5,40 @@
 #define FALLOC_FL_PUNCH_HOLE   0x02 /* de-allocates range */
 #define FALLOC_FL_NO_HIDE_STALE        0x04 /* reserved codepoint */
 
+/*
+ * FALLOC_FL_COLLAPSE_RANGE is used to remove a range of a file
+ * without leaving a hole in the file. The contents of the file beyond
+ * the range being removed is appended to the start offset of the range
+ * being removed (i.e. the hole that was punched is "collapsed"),
+ * resulting in a file layout that looks like the range that was
+ * removed never existed. As such collapsing a range of a file changes
+ * the size of the file, reducing it by the same length of the range
+ * that has been removed by the operation.
+ *
+ * Different filesystems may implement different limitations on the
+ * granularity of the operation. Most will limit operations to
+ * filesystem block size boundaries, but this boundary may be larger or
+ * smaller depending on the filesystem and/or the configuration of the
+ * filesystem or file.
+ *
+ * Attempting to collapse a range that crosses the end of the file is
+ * considered an illegal operation - just use ftruncate(2) if you need
+ * to collapse a range that crosses EOF.
+ */
+#define FALLOC_FL_COLLAPSE_RANGE       0x08
+
+/*
+ * FALLOC_FL_ZERO_RANGE is used to convert a range of file to zeros preferably
+ * without issuing data IO. Blocks should be preallocated for the regions that
+ * span holes in the file, and the entire range is preferable converted to
+ * unwritten extents - even though file system may choose to zero out the
+ * extent or do whatever which will result in reading zeros from the range
+ * while the range remains allocated for the file.
+ *
+ * This can be also used to preallocate blocks past EOF in the same way as
+ * with fallocate. Flag FALLOC_FL_KEEP_SIZE should cause the inode
+ * size to remain the same.
+ */
+#define FALLOC_FL_ZERO_RANGE           0x10
 
 #endif /* _UAPI_FALLOC_H_ */