Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 27 Jul 2009 19:12:10 +0000 (12:12 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 27 Jul 2009 19:12:10 +0000 (12:12 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  jbd: fix race between write_metadata_buffer and get_write_access
  ext3: Get rid of extenddisksize parameter of ext3_get_blocks_handle()
  jbd: Fix a race between checkpointing code and journal_get_write_access()
  ext3: Fix truncation of symlinks after failed write
  jbd: Fail to load a journal if it is too short

fs/ext3/dir.c
fs/ext3/inode.c
fs/jbd/journal.c
fs/jbd/transaction.c
include/linux/ext3_fs.h

index 3d724a95882f618672a45ee8f945d9164f80b89f..373fa90c796a0821c23cd441c0a238a1a52cfb69 100644 (file)
@@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
                struct buffer_head *bh = NULL;
 
                map_bh.b_state = 0;
-               err = ext3_get_blocks_handle(NULL, inode, blk, 1,
-                                               &map_bh, 0, 0);
+               err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
                if (err > 0) {
                        pgoff_t index = map_bh.b_blocknr >>
                                        (PAGE_CACHE_SHIFT - inode->i_blkbits);
index 5f51fed5c750870b8c575fdf7dd8e877fe986bfb..b49908a167ae09d366f76ddcef57432f338b5735 100644 (file)
@@ -788,7 +788,7 @@ err_out:
 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
                sector_t iblock, unsigned long maxblocks,
                struct buffer_head *bh_result,
-               int create, int extend_disksize)
+               int create)
 {
        int err = -EIO;
        int offsets[4];
@@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
        if (!err)
                err = ext3_splice_branch(handle, inode, iblock,
                                        partial, indirect_blks, count);
-       /*
-        * i_disksize growing is protected by truncate_mutex.  Don't forget to
-        * protect it if you're about to implement concurrent
-        * ext3_get_block() -bzzz
-       */
-       if (!err && extend_disksize && inode->i_size > ei->i_disksize)
-               ei->i_disksize = inode->i_size;
        mutex_unlock(&ei->truncate_mutex);
        if (err)
                goto cleanup;
@@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
        }
 
        ret = ext3_get_blocks_handle(handle, inode, iblock,
-                                       max_blocks, bh_result, create, 0);
+                                       max_blocks, bh_result, create);
        if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
                ret = 0;
@@ -1005,7 +998,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
        dummy.b_blocknr = -1000;
        buffer_trace_init(&dummy.b_history);
        err = ext3_get_blocks_handle(handle, inode, block, 1,
-                                       &dummy, create, 1);
+                                       &dummy, create);
        /*
         * ext3_get_blocks_handle() returns number of blocks
         * mapped. 0 in case of a HOLE.
@@ -1193,15 +1186,16 @@ write_begin_failed:
                 * i_size_read because we hold i_mutex.
                 *
                 * Add inode to orphan list in case we crash before truncate
-                * finishes.
+                * finishes. Do this only if ext3_can_truncate() agrees so
+                * that orphan processing code is happy.
                 */
-               if (pos + len > inode->i_size)
+               if (pos + len > inode->i_size && ext3_can_truncate(inode))
                        ext3_orphan_add(handle, inode);
                ext3_journal_stop(handle);
                unlock_page(page);
                page_cache_release(page);
                if (pos + len > inode->i_size)
-                       vmtruncate(inode, inode->i_size);
+                       ext3_truncate(inode);
        }
        if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
@@ -1287,7 +1281,7 @@ static int ext3_ordered_write_end(struct file *file,
         * There may be allocated blocks outside of i_size because
         * we failed to copy some data. Prepare for truncate.
         */
-       if (pos + len > inode->i_size)
+       if (pos + len > inode->i_size && ext3_can_truncate(inode))
                ext3_orphan_add(handle, inode);
        ret2 = ext3_journal_stop(handle);
        if (!ret)
@@ -1296,7 +1290,7 @@ static int ext3_ordered_write_end(struct file *file,
        page_cache_release(page);
 
        if (pos + len > inode->i_size)
-               vmtruncate(inode, inode->i_size);
+               ext3_truncate(inode);
        return ret ? ret : copied;
 }
 
@@ -1315,14 +1309,14 @@ static int ext3_writeback_write_end(struct file *file,
         * There may be allocated blocks outside of i_size because
         * we failed to copy some data. Prepare for truncate.
         */
-       if (pos + len > inode->i_size)
+       if (pos + len > inode->i_size && ext3_can_truncate(inode))
                ext3_orphan_add(handle, inode);
        ret = ext3_journal_stop(handle);
        unlock_page(page);
        page_cache_release(page);
 
        if (pos + len > inode->i_size)
-               vmtruncate(inode, inode->i_size);
+               ext3_truncate(inode);
        return ret ? ret : copied;
 }
 
@@ -1358,7 +1352,7 @@ static int ext3_journalled_write_end(struct file *file,
         * There may be allocated blocks outside of i_size because
         * we failed to copy some data. Prepare for truncate.
         */
-       if (pos + len > inode->i_size)
+       if (pos + len > inode->i_size && ext3_can_truncate(inode))
                ext3_orphan_add(handle, inode);
        EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
        if (inode->i_size > EXT3_I(inode)->i_disksize) {
@@ -1375,7 +1369,7 @@ static int ext3_journalled_write_end(struct file *file,
        page_cache_release(page);
 
        if (pos + len > inode->i_size)
-               vmtruncate(inode, inode->i_size);
+               ext3_truncate(inode);
        return ret ? ret : copied;
 }
 
index 737f7246a4b5ddc750b4a5198928f4b5afacc193..f96f85092d1cf2bffb671d147470666118b4bc7d 100644 (file)
@@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction,
        struct page *new_page;
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
+       journal_t *journal = transaction->t_journal;
 
        /*
         * The buffer really shouldn't be locked: only the current committing
@@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
        J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
 
        new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+       /* keep subsequent assertions sane */
+       new_bh->b_state = 0;
+       init_buffer(new_bh, NULL, NULL);
+       atomic_set(&new_bh->b_count, 1);
+       new_jh = journal_add_journal_head(new_bh);      /* This sleeps */
 
        /*
         * If a new transaction has already done a buffer copy-out, then
@@ -361,14 +367,6 @@ repeat:
                kunmap_atomic(mapped_data, KM_USER0);
        }
 
-       /* keep subsequent assertions sane */
-       new_bh->b_state = 0;
-       init_buffer(new_bh, NULL, NULL);
-       atomic_set(&new_bh->b_count, 1);
-       jbd_unlock_bh_state(bh_in);
-
-       new_jh = journal_add_journal_head(new_bh);      /* This sleeps */
-
        set_bh_page(new_bh, new_page, new_offset);
        new_jh->b_transaction = NULL;
        new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -385,7 +383,11 @@ repeat:
         * copying is moved to the transaction's shadow queue.
         */
        JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-       journal_file_buffer(jh_in, transaction, BJ_Shadow);
+       spin_lock(&journal->j_list_lock);
+       __journal_file_buffer(jh_in, transaction, BJ_Shadow);
+       spin_unlock(&journal->j_list_lock);
+       jbd_unlock_bh_state(bh_in);
+
        JBUFFER_TRACE(new_jh, "file as BJ_IO");
        journal_file_buffer(new_jh, transaction, BJ_IO);
 
@@ -848,6 +850,12 @@ static int journal_reset(journal_t *journal)
 
        first = be32_to_cpu(sb->s_first);
        last = be32_to_cpu(sb->s_maxlen);
+       if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
+               printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
+                      first, last);
+               journal_fail_superblock(journal);
+               return -EINVAL;
+       }
 
        journal->j_first = first;
        journal->j_last = last;
index 73242ba7c7b1315d9cea08de1f85f1189293dd22..c03ac11f74be1313b8f7d09a24eba88f1d27ceed 100644 (file)
@@ -489,34 +489,15 @@ void journal_unlock_updates (journal_t *journal)
        wake_up(&journal->j_wait_transaction_locked);
 }
 
-/*
- * Report any unexpected dirty buffers which turn up.  Normally those
- * indicate an error, but they can occur if the user is running (say)
- * tune2fs to modify the live filesystem, so we need the option of
- * continuing as gracefully as possible.  #
- *
- * The caller should already hold the journal lock and
- * j_list_lock spinlock: most callers will need those anyway
- * in order to probe the buffer's journaling state safely.
- */
-static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
+static void warn_dirty_buffer(struct buffer_head *bh)
 {
-       int jlist;
-
-       /* If this buffer is one which might reasonably be dirty
-        * --- ie. data, or not part of this journal --- then
-        * we're OK to leave it alone, but otherwise we need to
-        * move the dirty bit to the journal's own internal
-        * JBDDirty bit. */
-       jlist = jh->b_jlist;
+       char b[BDEVNAME_SIZE];
 
-       if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
-           jlist == BJ_Shadow || jlist == BJ_Forget) {
-               struct buffer_head *bh = jh2bh(jh);
-
-               if (test_clear_buffer_dirty(bh))
-                       set_buffer_jbddirty(bh);
-       }
+       printk(KERN_WARNING
+              "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
+              "There's a risk of filesystem corruption in case of system "
+              "crash.\n",
+              bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }
 
 /*
@@ -583,14 +564,16 @@ repeat:
                        if (jh->b_next_transaction)
                                J_ASSERT_JH(jh, jh->b_next_transaction ==
                                                        transaction);
+                       warn_dirty_buffer(bh);
                }
                /*
                 * In any case we need to clean the dirty flag and we must
                 * do it under the buffer lock to be sure we don't race
                 * with running write-out.
                 */
-               JBUFFER_TRACE(jh, "Unexpected dirty buffer");
-               jbd_unexpected_dirty_buffer(jh);
+               JBUFFER_TRACE(jh, "Journalling dirty buffer");
+               clear_buffer_dirty(bh);
+               set_buffer_jbddirty(bh);
        }
 
        unlock_buffer(bh);
@@ -826,6 +809,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
 
        if (jh->b_transaction == NULL) {
+               /*
+                * Previous journal_forget() could have left the buffer
+                * with jbddirty bit set because it was being committed. When
+                * the commit finished, we've filed the buffer for
+                * checkpointing and marked it dirty. Now we are reallocating
+                * the buffer so the transaction freeing it must have
+                * committed and so it's safe to clear the dirty bit.
+                */
+               clear_buffer_dirty(jh2bh(jh));
                jh->b_transaction = transaction;
 
                /* first access by this transaction */
@@ -1782,8 +1774,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
 
        if (jh->b_cp_transaction) {
                JBUFFER_TRACE(jh, "on running+cp transaction");
+               /*
+                * We don't want to write the buffer anymore, clear the
+                * bit so that we don't confuse checks in
+                * __journal_file_buffer
+                */
+               clear_buffer_dirty(bh);
                __journal_file_buffer(jh, transaction, BJ_Forget);
-               clear_buffer_jbddirty(bh);
                may_free = 0;
        } else {
                JBUFFER_TRACE(jh, "on running transaction");
@@ -2041,12 +2038,17 @@ void __journal_file_buffer(struct journal_head *jh,
        if (jh->b_transaction && jh->b_jlist == jlist)
                return;
 
-       /* The following list of buffer states needs to be consistent
-        * with __jbd_unexpected_dirty_buffer()'s handling of dirty
-        * state. */
-
        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
            jlist == BJ_Shadow || jlist == BJ_Forget) {
+               /*
+                * For metadata buffers, we track dirty bit in buffer_jbddirty
+                * instead of buffer_dirty. We should not see a dirty bit set
+                * here because we clear it in do_get_write_access but e.g.
+                * tune2fs can modify the sb and set the dirty bit at any time
+                * so we try to gracefully handle that.
+                */
+               if (buffer_dirty(bh))
+                       warn_dirty_buffer(bh);
                if (test_clear_buffer_dirty(bh) ||
                    test_clear_buffer_jbddirty(bh))
                        was_dirty = 1;
index 634a5e5aba3e219844fbffadce72b0361c794203..7499b36677985240f73743c9993c0ceadd16bc5b 100644 (file)
@@ -874,7 +874,7 @@ struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
        sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
-       int create, int extend_disksize);
+       int create);
 
 extern struct inode *ext3_iget(struct super_block *, unsigned long);
 extern int  ext3_write_inode (struct inode *, int);