Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 2 Jan 2013 17:57:34 +0000 (09:57 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 2 Jan 2013 17:57:34 +0000 (09:57 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Jan 2013 17:57:34 +0000 (09:57 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Jan 2013 17:57:34 +0000 (09:57 -0800)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index 26af22832a846d43d94c20e67e92a2ab825d835c..5ae1674ec12f16b7eea6635244f9b68f252d9ba2 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2226,13 +2226,14 @@ errout:
   * removes index from the index block.
   */
  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
-                       struct ext4_ext_path *path)
+                       struct ext4_ext_path *path, int depth)
  {
         int err;
         ext4_fsblk_t leaf;
  
         /* free index block */
-       path--;
+       depth--;
+       path = path + depth;
         leaf = ext4_idx_pblock(path->p_idx);
         if (unlikely(path->p_hdr->eh_entries == 0)) {
                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  
         ext4_free_blocks(handle, inode, NULL, leaf, 1,
                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+       while (--depth >= 0) {
+               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+                       break;
+               path--;
+               err = ext4_ext_get_access(handle, inode, path);
+               if (err)
+                       break;
+               path->p_idx->ei_block = (path+1)->p_idx->ei_block;
+               err = ext4_ext_dirty(handle, inode, path);
+               if (err)
+                       break;
+       }
         return err;
  }
  
@@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
         /* if this leaf is free, then we should
          * remove it from index block above */
         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
-               err = ext4_ext_rm_idx(handle, inode, path + depth);
+               err = ext4_ext_rm_idx(handle, inode, path, depth);
  
  out:
         return err;
@@ -2802,7 +2816,7 @@ again:
                                 /* index is empty, remove it;
                                  * handle must be already prepared by the
                                  * truncatei_leaf() */
-                               err = ext4_ext_rm_idx(handle, inode, path + i);
+                               err = ext4_ext_rm_idx(handle, inode, path, i);
                         }
                         /* root level has p_bh == NULL, brelse() eats this */
                         brelse(path[i].p_bh);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index d07c27ca594a4578887ed898e71c9b60684730df..405565a62277c5bbbf9d5ea91efbeea4118531b2 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
  
         /* Unaligned direct AIO must be serialized; see comment above */
         if (unaligned_aio) {
-               static unsigned long unaligned_warn_time;
-
-               /* Warn about this once per day */
-               if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
-                       ext4_msg(inode->i_sb, KERN_WARNING,
-                                "Unaligned AIO/DIO on inode %ld by %s; "
-                                "performance will be poor.",
-                                inode->i_ino, current->comm);
                 mutex_lock(ext4_aio_mutex(inode));
                 ext4_unwritten_wait(inode);
         }
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c

index dfbc1fe9667487518d965ecb361d1724f822f983..3278e64e57b61ac51a41db3ceebeecd21003985a 100644 (file)
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync)
   *
   * What we do is just kick off a commit and wait on it.  This will snapshot the
   * inode to disk.
- *
- * i_mutex lock is held when entering and exiting this function
   */
  
  int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index cb1c1ab2720bd6c08c641879adb5ac77c11ca104..cbfe13bf5b2aa3f39b4845fe4fce4f45a02b5f43 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs
  
  static void ext4_invalidatepage(struct page *page, unsigned long offset)
  {
-       journal_t *journal = EXT4_JOURNAL(page->mapping->host);
-
         trace_ext4_invalidatepage(page, offset);
  
         /*
@@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
          */
         if (ext4_should_dioread_nolock(page->mapping->host))
                 ext4_invalidatepage_free_endio(page, offset);
+
+       /* No journalling happens on data buffers when this function is used */
+       WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
+
+       block_invalidatepage(page, offset);
+}
+
+static int __ext4_journalled_invalidatepage(struct page *page,
+                                           unsigned long offset)
+{
+       journal_t *journal = EXT4_JOURNAL(page->mapping->host);
+
+       trace_ext4_journalled_invalidatepage(page, offset);
+
         /*
          * If it's a full truncate we just forget about the pending dirtying
          */
         if (offset == 0)
                 ClearPageChecked(page);
  
-       if (journal)
-               jbd2_journal_invalidatepage(journal, page, offset);
-       else
-               block_invalidatepage(page, offset);
+       return jbd2_journal_invalidatepage(journal, page, offset);
+}
+
+/* Wrapper for aops... */
+static void ext4_journalled_invalidatepage(struct page *page,
+                                          unsigned long offset)
+{
+       WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
  }
  
  static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = {
         .write_end              = ext4_journalled_write_end,
         .set_page_dirty         = ext4_journalled_set_page_dirty,
         .bmap                   = ext4_bmap,
-       .invalidatepage         = ext4_invalidatepage,
+       .invalidatepage         = ext4_journalled_invalidatepage,
         .releasepage            = ext4_releasepage,
         .direct_IO              = ext4_direct_IO,
         .is_partially_uptodate  = block_is_partially_uptodate,
@@ -4304,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
         return err;
  }
  
+/*
+ * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
+ * buffers that are attached to a page stradding i_size and are undergoing
+ * commit. In that case we have to wait for commit to finish and try again.
+ */
+static void ext4_wait_for_tail_page_commit(struct inode *inode)
+{
+       struct page *page;
+       unsigned offset;
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+       tid_t commit_tid = 0;
+       int ret;
+
+       offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+       /*
+        * All buffers in the last page remain valid? Then there's nothing to
+        * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+        * blocksize case
+        */
+       if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+               return;
+       while (1) {
+               page = find_lock_page(inode->i_mapping,
+                                     inode->i_size >> PAGE_CACHE_SHIFT);
+               if (!page)
+                       return;
+               ret = __ext4_journalled_invalidatepage(page, offset);
+               unlock_page(page);
+               page_cache_release(page);
+               if (ret != -EBUSY)
+                       return;
+               commit_tid = 0;
+               read_lock(&journal->j_state_lock);
+               if (journal->j_committing_transaction)
+                       commit_tid = journal->j_committing_transaction->t_tid;
+               read_unlock(&journal->j_state_lock);
+               if (commit_tid)
+                       jbd2_log_wait_commit(journal, commit_tid);
+       }
+}
+
  /*
   * ext4_setattr()
   *
@@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
         }
  
         if (attr->ia_valid & ATTR_SIZE) {
-               if (attr->ia_size != i_size_read(inode)) {
-                       truncate_setsize(inode, attr->ia_size);
-                       /* Inode size will be reduced, wait for dio in flight.
-                        * Temporarily disable dioread_nolock to prevent
-                        * livelock. */
+               if (attr->ia_size != inode->i_size) {
+                       loff_t oldsize = inode->i_size;
+
+                       i_size_write(inode, attr->ia_size);
+                       /*
+                        * Blocks are going to be removed from the inode. Wait
+                        * for dio in flight.  Temporarily disable
+                        * dioread_nolock to prevent livelock.
+                        */
                         if (orphan) {
-                               ext4_inode_block_unlocked_dio(inode);
-                               inode_dio_wait(inode);
-                               ext4_inode_resume_unlocked_dio(inode);
+                               if (!ext4_should_journal_data(inode)) {
+                                       ext4_inode_block_unlocked_dio(inode);
+                                       inode_dio_wait(inode);
+                                       ext4_inode_resume_unlocked_dio(inode);
+                               } else
+                                       ext4_wait_for_tail_page_commit(inode);
                         }
+                       /*
+                        * Truncate pagecache after we've waited for commit
+                        * in data=journal mode to make pages freeable.
+                        */
+                       truncate_pagecache(inode, oldsize, inode->i_size);
                 }
                 ext4_truncate(inode);
         }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index cac44828233159bfeb1d93c502943fe82b813b2a..8990165346ee6aa01a7f9a3264a092fc9669e85b 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2648,7 +2648,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
         struct ext4_iloc iloc;
         int err = 0;
  
-       if (!EXT4_SB(inode->i_sb)->s_journal)
+       if ((!EXT4_SB(inode->i_sb)->s_journal) &&
+           !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
                 return 0;
  
         mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 3cdb0a2fc64856b041ec4b5f184fb41a5a346771..3d4fb81bacd540ca7b81fd4005f392aa7a516b53 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb,
                          unsigned int *journal_ioprio,
                          int is_remount)
  {
-#ifdef CONFIG_QUOTA
         struct ext4_sb_info *sbi = EXT4_SB(sb);
-#endif
         char *p;
         substring_t args[MAX_OPT_ARGS];
         int token;
@@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb,
                 }
         }
  #endif
+       if (test_opt(sb, DIOREAD_NOLOCK)) {
+               int blocksize =
+                       BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+               if (blocksize < PAGE_CACHE_SIZE) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "dioread_nolock if block size != PAGE_SIZE");
+                       return 0;
+               }
+       }
         return 1;
  }
  
@@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                                 __func__, inode->i_ino, inode->i_size);
                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                   inode->i_ino, inode->i_size);
+                       mutex_lock(&inode->i_mutex);
                         ext4_truncate(inode);
+                       mutex_unlock(&inode->i_mutex);
                         nr_truncates++;
                 } else {
                         ext4_msg(sb, KERN_DEBUG,
@@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb)
                         memset(buf, 0, PAGE_SIZE);
                 cond_resched();
         }
+       /* Add the journal blocks as well */
+       if (sbi->s_journal)
+               overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
+
         sbi->s_overhead = overhead;
         smp_wmb();
         free_page((unsigned long) buf);
@@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                         clear_opt(sb, DELALLOC);
         }
  
-       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
-       if (test_opt(sb, DIOREAD_NOLOCK)) {
-               if (blocksize < PAGE_SIZE) {
-                       ext4_msg(sb, KERN_ERR, "can't mount with "
-                                "dioread_nolock if block size != PAGE_SIZE");
-                       goto failed_mount;
-               }
-       }
-
         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
  
@@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
                 goto failed_mount;
  
+       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
             blocksize > EXT4_MAX_BLOCK_SIZE) {
                 ext4_msg(sb, KERN_ERR,
@@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
         }
  
         ext4_setup_system_zone(sb);
-       if (sbi->s_journal == NULL)
+       if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
                 ext4_commit_super(sb, 1);
  
  #ifdef CONFIG_QUOTA
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index 42f6615af0ac47d984fc5084623570d231832cfb..df9f29760efa99931bef9fdd9609e7580d8852cd 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -209,7 +209,8 @@ repeat:
                 if (!new_transaction)
                         goto alloc_transaction;
                 write_lock(&journal->j_state_lock);
-               if (!journal->j_running_transaction) {
+               if (!journal->j_running_transaction &&
+                   !journal->j_barrier_count) {
                         jbd2_get_transaction(journal, new_transaction);
                         new_transaction = NULL;
                 }
@@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
  
         BUFFER_TRACE(bh, "entry");
  
-retry:
         /*
          * It is safe to proceed here without the j_list_lock because the
          * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1934,14 +1934,11 @@ retry:
                  * for commit and try again.
                  */
                 if (partial_page) {
-                       tid_t tid = journal->j_committing_transaction->t_tid;
-
                         jbd2_journal_put_journal_head(jh);
                         spin_unlock(&journal->j_list_lock);
                         jbd_unlock_bh_state(bh);
                         write_unlock(&journal->j_state_lock);
-                       jbd2_log_wait_commit(journal, tid);
-                       goto retry;
+                       return -EBUSY;
                 }
                 /*
                  * OK, buffer won't be reachable after truncate. We just set
@@ -2002,21 +1999,23 @@ zap_buffer_unlocked:
   * @page:    page to flush
   * @offset:  length of page to invalidate.
   *
- * Reap page buffers containing data after offset in page.
- *
+ * Reap page buffers containing data after offset in page. Can return -EBUSY
+ * if buffers are part of the committing transaction and the page is straddling
+ * i_size. Caller then has to wait for current commit and try again.
   */
-void jbd2_journal_invalidatepage(journal_t *journal,
-                     struct page *page,
-                     unsigned long offset)
+int jbd2_journal_invalidatepage(journal_t *journal,
+                               struct page *page,
+                               unsigned long offset)
  {
         struct buffer_head *head, *bh, *next;
         unsigned int curr_off = 0;
         int may_free = 1;
+       int ret = 0;
  
         if (!PageLocked(page))
                 BUG();
         if (!page_has_buffers(page))
-               return;
+               return 0;
  
         /* We will potentially be playing with lists other than just the
          * data lists (especially for journaled data mode), so be
@@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
                 if (offset <= curr_off) {
                         /* This block is wholly outside the truncation point */
                         lock_buffer(bh);
-                       may_free &= journal_unmap_buffer(journal, bh,
-                                                        offset > 0);
+                       ret = journal_unmap_buffer(journal, bh, offset > 0);
                         unlock_buffer(bh);
+                       if (ret < 0)
+                               return ret;
+                       may_free &= ret;
                 }
                 curr_off = next_off;
                 bh = next;
@@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
                 if (may_free && try_to_free_buffers(page))
                         J_ASSERT(!page_has_buffers(page));
         }
+       return 0;
  }
  
  /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index 1be23d9fdacb5151a6af6f8b30b2086e50c50bb1..e30b66346942a90a4c79cdc5a0362b3899db0521 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1098,7 +1098,7 @@ void               jbd2_journal_set_triggers(struct buffer_head *,
  extern int      jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
  extern int      jbd2_journal_forget (handle_t *, struct buffer_head *);
  extern void     journal_sync_buffer (struct buffer_head *);
-extern void     jbd2_journal_invalidatepage(journal_t *,
+extern int      jbd2_journal_invalidatepage(journal_t *,
                                 struct page *, unsigned long);
  extern int      jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
  extern int      jbd2_journal_stop(handle_t *);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h

index f6372b01136657cd5a0b9213090733c87347250e..7e8c36bc708225c0f3eb79c0b33977c1dcb17519 100644 (file)
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -451,7 +451,7 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage,
         TP_ARGS(page)
  );
  
-TRACE_EVENT(ext4_invalidatepage,
+DECLARE_EVENT_CLASS(ext4_invalidatepage_op,
         TP_PROTO(struct page *page, unsigned long offset),
  
         TP_ARGS(page, offset),
@@ -477,6 +477,18 @@ TRACE_EVENT(ext4_invalidatepage,
                   (unsigned long) __entry->index, __entry->offset)
  );
  
+DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage,
+       TP_PROTO(struct page *page, unsigned long offset),
+
+       TP_ARGS(page, offset)
+);
+
+DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage,
+       TP_PROTO(struct page *page, unsigned long offset),
+
+       TP_ARGS(page, offset)
+);
+
  TRACE_EVENT(ext4_discard_blocks,
         TP_PROTO(struct super_block *sb, unsigned long long blk,
                         unsigned long long count),
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 2 Jan 2013 17:57:34 +0000 (09:57 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 2 Jan 2013 17:57:34 +0000 (09:57 -0800)
fs/ext4/extents.c		patch \| blob \| blame \| history
fs/ext4/file.c		patch \| blob \| blame \| history
fs/ext4/fsync.c		patch \| blob \| blame \| history
fs/ext4/inode.c		patch \| blob \| blame \| history
fs/ext4/namei.c		patch \| blob \| blame \| history
fs/ext4/super.c		patch \| blob \| blame \| history
fs/jbd2/transaction.c		patch \| blob \| blame \| history
include/linux/jbd2.h		patch \| blob \| blame \| history
include/trace/events/ext4.h		patch \| blob \| blame \| history