writeback: turn writeback lists into a flush tree writeback
authorJens Axboe <jens.axboe@oracle.com>
Fri, 18 Dec 2009 11:22:19 +0000 (12:22 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Fri, 18 Dec 2009 11:22:19 +0000 (12:22 +0100)
Basically a port of Googles patch as a starting point, updated
to a current kernel. Not tested yet...

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
fs/Makefile
fs/flushtree.c [new file with mode: 0644]
fs/flushtree.h [new file with mode: 0644]
fs/fs-writeback.c
fs/inode.c
fs/pipe.c
include/linux/backing-dev.h
include/linux/fs.h
include/linux/writeback.h
mm/backing-dev.c

index af6d04700d9c1ff74ff855521d282d6c72fa2bf2..5cb443223d459c744d4e32b21ec771081fd36cdf 100644 (file)
@@ -9,7 +9,7 @@ obj-y :=        open.o read_write.o file_table.o super.o \
                char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
                ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
                attr.o bad_inode.o file.o filesystems.o namespace.o \
-               seq_file.o xattr.o libfs.o fs-writeback.o \
+               seq_file.o xattr.o libfs.o fs-writeback.o flushtree.o \
                pnode.o drop_caches.o splice.o sync.o utimes.o \
                stack.o fs_struct.o
 
diff --git a/fs/flushtree.c b/fs/flushtree.c
new file mode 100644 (file)
index 0000000..4df68b2
--- /dev/null
@@ -0,0 +1,168 @@
+#include <linux/fs.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+#include <linux/rbtree.h>
+
+#include "flushtree.h"
+
+#define rb_to_inode(node) rb_entry((node), struct inode, i_flush_node)
+
+/*
+ * When inodes are parked for writeback they are parked in the
+ * flush_tree. The flush tree is a data structure based on an rb tree.
+ *
+ * Duplicate keys are handled by making a list in the tree for each key
+ * value. The order of how we choose the next inode to flush is decided
+ * by two fields. First the earliest dirtied_when value. If there are
+ * duplicate dirtied_when values then the earliest i_flushed_when value
+ * determines who gets flushed next.
+ *
+ * The flush tree organizes the dirtied_when keys with the rb_tree. Any
+ * inodes with a duplicate dirtied_when value are link listed together. This
+ * link list is sorted by the inode's i_flushed_when. When both the
+ * dirtied_when and the i_flushed_when are indentical the order in the
+ * linked list determines the order we flush the inodes.
+ */
+
+/*
+ * Find a rb_node matching the key in the flush tree. There are no duplicate
+ * rb_nodes in the tree. Instead they are chained off the first node.
+ */
+static struct inode *flush_tree_search(struct bdi_writeback *wb,
+                                      unsigned long ts)
+{
+       struct rb_node *n = wb->flush_tree.rb_node;
+
+       while (n) {
+               struct inode *inode = rb_to_inode(n);
+
+               if (time_before(ts, inode->dirtied_when))
+                       n = n->rb_left;
+               else if (time_after(ts, inode->dirtied_when))
+                       n = n->rb_right;
+               else
+                       return inode;
+       }
+
+       return NULL;
+}
+
+/*
+ * Inserting an inode into the flush tree. The tree is keyed by the
+ * dirtied_when member.
+ *
+ * If there is a duplicate key in the tree already the new inode is put
+ * on the tail of a list of the rb_node.
+ * All inserted inodes must have one of the I_DIRTY flags set.
+ */
+void flush_tree_insert(struct inode *inode)
+{
+       struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+       struct rb_node **new = &wb->flush_tree.rb_node;
+       struct rb_node *parent = NULL;
+
+       BUG_ON((inode->i_state & I_DIRTY) == 0);
+       BUG_ON(inode->i_state & (I_FREEING|I_CLEAR));
+       BUG_ON(!RB_EMPTY_NODE(&inode->i_flush_node));
+
+       list_del_init(&inode->i_list);
+       while (*new) {
+               struct inode *this = rb_to_inode(*new);
+
+               parent = *new;
+               if (time_before(inode->dirtied_when, this->dirtied_when))
+                       new = &parent->rb_left;
+               else if (time_after(inode->dirtied_when, this->dirtied_when))
+                       new = &parent->rb_right;
+               else {
+                       list_add_tail(&inode->i_list, &this->i_list);
+                       return;
+               }
+       }
+
+       /* Add in the new node and rebalance the tree */
+       rb_link_node(&inode->i_flush_node, parent, new);
+       rb_insert_color(&inode->i_flush_node, &wb->flush_tree);
+}
+
+/*
+ * Here we return the inode that has the smallest key in the flush tree
+ * that is greater than the parameter "prev_time".
+ */
+static struct inode *flush_tree_min_greater(struct bdi_writeback *wb,
+                                           unsigned long prev_time)
+{
+       struct rb_node *node = wb->flush_tree.rb_node;
+       struct inode *best = NULL;
+
+       while (node) {
+               struct inode *data = rb_to_inode(node);
+
+               /* Just trying to get lucky */
+               if ((prev_time + 1) == data->dirtied_when)
+                       return data;
+
+               /* If this value is greater than our prev_time and is
+               less than the best so far, this is our new best so far.*/
+               if ((data->dirtied_when > prev_time) &&
+                   (!best || best->dirtied_when > data->dirtied_when))
+                       best = data;
+
+               /* Search all the way down to the bottom of the tree */
+               if (time_before(prev_time, data->dirtied_when))
+                       node = node->rb_left;
+               else if (time_after_eq(prev_time, data->dirtied_when))
+                       node = node->rb_right;
+       }
+
+       return best;
+}
+
+/*
+ * Here is where we interate to find the next inode to process. The
+ * strategy is to first look for any other inodes with the same dirtied_when
+ * value. If we have already processed that node then we need to find
+ * the next highest dirtied_when value in the tree.
+ */
+struct inode *flush_tree_next(struct bdi_writeback *wb,
+                             unsigned long start_time,
+                             unsigned long prev_time)
+{
+       struct inode *inode = flush_tree_search(wb, prev_time);
+
+       /* We have a duplicate timed inode as the last processed */
+       if (inode && time_before(inode->i_flushed_when, start_time))
+               return inode;
+
+       /* Now we have to find the oldest one next */
+       return flush_tree_min_greater(wb, prev_time);
+}
+
+/* Removing a node from the flushtree. */
+void flush_tree_remove(struct inode *inode)
+{
+       struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+       struct rb_node *rb_node = &inode->i_flush_node;
+       struct rb_root *rb_root = &wb->flush_tree;
+
+       BUG_ON((inode->i_state & I_DIRTY) == 0);
+
+       /* There is no chain on this inode. Just remove it from the tree */
+       if (list_empty(&inode->i_list)) {
+               BUG_ON(RB_EMPTY_NODE(rb_node));
+               rb_erase(rb_node, rb_root);
+               RB_CLEAR_NODE(rb_node);
+               return;
+       }
+
+       /* This node is on a chain AND is in the rb_tree */
+       if (!RB_EMPTY_NODE(rb_node)) {
+               struct inode *new = list_entry(inode->i_list.next,
+                                              struct inode, i_list);
+
+               rb_replace_node(rb_node, &new->i_flush_node, rb_root);
+               RB_CLEAR_NODE(rb_node);
+       }
+       /* Take it off the list */
+       list_del_init(&inode->i_list);
+}
diff --git a/fs/flushtree.h b/fs/flushtree.h
new file mode 100644 (file)
index 0000000..489f246
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef WB_FLUSHTREE_H
+#define WB_FLUSHTREE_H
+
+void flush_tree_insert(struct inode *inode);
+void flush_tree_remove(struct inode *inode);
+struct inode *flush_tree_next(struct bdi_writeback *wb, unsigned long start,
+                               unsigned long prev);
+
+#endif
index b2da065cfe3bb4543a4e929e60afb4462b35e568..06a17e79fa7892dcfb6c0ac172bd781c4dd84f26 100644 (file)
@@ -29,7 +29,7 @@
 #include "internal.h"
 #include <trace/events/writeback.h>
 
-#define inode_to_bdi(inode)    ((inode)->i_mapping->backing_dev_info)
+#include "flushtree.h"
 
 /*
  * We don't actually have pdflush, but this one is exported though /proc...
@@ -255,39 +255,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
        bdi_alloc_queue_work(bdi, &args);
 }
 
-/*
- * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
- * furthest end of its superblock's dirty-inode list.
- *
- * Before stamping the inode's ->dirtied_when, we check to see whether it is
- * already the most-recently-dirtied inode on the b_dirty list.  If that is
- * the case then the inode must have been redirtied while it was being written
- * out and we don't reset its dirtied_when.
- */
-static void redirty_tail(struct inode *inode)
-{
-       struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-
-       if (!list_empty(&wb->b_dirty)) {
-               struct inode *tail;
-
-               tail = list_entry(wb->b_dirty.next, struct inode, i_list);
-               if (time_before(inode->dirtied_when, tail->dirtied_when))
-                       inode->dirtied_when = jiffies;
-       }
-       list_move(&inode->i_list, &wb->b_dirty);
-}
-
-/*
- * requeue inode for re-scanning after bdi->b_io list is exhausted.
- */
-static void requeue_io(struct inode *inode)
-{
-       struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-
-       list_move(&inode->i_list, &wb->b_more_io);
-}
-
 static void inode_sync_complete(struct inode *inode)
 {
        /*
@@ -312,57 +279,6 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
        return ret;
 }
 
-/*
- * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
- */
-static void move_expired_inodes(struct list_head *delaying_queue,
-                              struct list_head *dispatch_queue,
-                               unsigned long *older_than_this)
-{
-       LIST_HEAD(tmp);
-       struct list_head *pos, *node;
-       struct super_block *sb = NULL;
-       struct inode *inode;
-       int do_sb_sort = 0;
-
-       while (!list_empty(delaying_queue)) {
-               inode = list_entry(delaying_queue->prev, struct inode, i_list);
-               if (older_than_this &&
-                   inode_dirtied_after(inode, *older_than_this))
-                       break;
-               if (sb && sb != inode->i_sb)
-                       do_sb_sort = 1;
-               sb = inode->i_sb;
-               list_move(&inode->i_list, &tmp);
-       }
-
-       /* just one sb in list, splice to dispatch_queue and we're done */
-       if (!do_sb_sort) {
-               list_splice(&tmp, dispatch_queue);
-               return;
-       }
-
-       /* Move inodes from one superblock together */
-       while (!list_empty(&tmp)) {
-               inode = list_entry(tmp.prev, struct inode, i_list);
-               sb = inode->i_sb;
-               list_for_each_prev_safe(pos, node, &tmp) {
-                       inode = list_entry(pos, struct inode, i_list);
-                       if (inode->i_sb == sb)
-                               list_move(&inode->i_list, dispatch_queue);
-               }
-       }
-}
-
-/*
- * Queue all expired dirty inodes for io, eldest first.
- */
-static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
-{
-       list_splice_init(&wb->b_more_io, wb->b_io.prev);
-       move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
-}
-
 static int write_inode(struct inode *inode, int sync)
 {
        if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
@@ -421,10 +337,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                 * We'll have another go at writing back this inode when we
                 * completed a full scan of b_io.
                 */
-               if (!wait) {
-                       requeue_io(inode);
+               if (!wait)
                        return 0;
-               }
 
                /*
                 * It's a data-integrity sync.  We must wait.
@@ -435,6 +349,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
        BUG_ON(inode->i_state & I_SYNC);
 
        /* Set I_SYNC, reset I_DIRTY */
+       flush_tree_remove(inode);
        dirty = inode->i_state & I_DIRTY;
        inode->i_state |= I_SYNC;
        inode->i_state &= ~I_DIRTY;
@@ -463,13 +378,14 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                        /*
                         * More pages get dirtied by a fast dirtier.
                         */
-                       goto select_queue;
+                       flush_tree_insert(inode);
                } else if (inode->i_state & I_DIRTY) {
                        /*
                         * At least XFS will redirty the inode during the
                         * writeback (delalloc) and on io completion (isize).
                         */
-                       redirty_tail(inode);
+                       inode->dirtied_when = jiffies;
+                       flush_tree_insert(inode);
                } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                        /*
                         * We didn't write back all the pages.  nfs_writepages()
@@ -487,23 +403,14 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                         */
                        if (wbc->for_kupdate) {
                                /*
-                                * For the kupdate function we move the inode
-                                * to b_more_io so it will get more writeout as
-                                * soon as the queue becomes uncongested.
+                                * For the kupdate function we leave
+                                * dirtied_when field untouched and return
+                                * it to the flush_tree. The next iteration
+                                * of kupdate will flush more pages when
+                                * the queue is no longer congested.
                                 */
                                inode->i_state |= I_DIRTY_PAGES;
-select_queue:
-                               if (wbc->nr_to_write <= 0) {
-                                       /*
-                                        * slice used up: queue for next turn
-                                        */
-                                       requeue_io(inode);
-                               } else {
-                                       /*
-                                        * somehow blocked: retry later
-                                        */
-                                       redirty_tail(inode);
-                               }
+                               flush_tree_insert(inode);
                        } else {
                                /*
                                 * Otherwise fully redirty the inode so that
@@ -513,7 +420,8 @@ select_queue:
                                 * all the other files.
                                 */
                                inode->i_state |= I_DIRTY_PAGES;
-                               redirty_tail(inode);
+                               inode->dirtied_when = jiffies;
+                               flush_tree_insert(inode);
                        }
                } else if (atomic_read(&inode->i_count)) {
                        /*
@@ -598,29 +506,25 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
 {
        struct super_block *sb = wbc->sb, *pin_sb = NULL;
        const unsigned long start = jiffies;    /* livelock avoidance */
+       struct inode *inode = NULL;
+       unsigned long prev_time = 0;
 
        spin_lock(&inode_lock);
 
-       if (!wbc->for_kupdate || list_empty(&wb->b_io))
-               queue_io(wb, wbc->older_than_this);
-
-       while (!list_empty(&wb->b_io)) {
-               struct inode *inode = list_entry(wb->b_io.prev,
-                                               struct inode, i_list);
+       while ((inode = flush_tree_next(wb, start, prev_time)) != NULL) {
                long pages_skipped;
 
+               prev_time = inode->dirtied_when;
+               inode->i_flushed_when = start;
+
                /*
                 * super block given and doesn't match, skip this inode
                 */
-               if (sb && sb != inode->i_sb) {
-                       redirty_tail(inode);
+               if (sb && sb != inode->i_sb)
                        continue;
-               }
 
-               if (inode->i_state & (I_NEW | I_WILL_FREE)) {
-                       requeue_io(inode);
+               if (inode->i_state & (I_NEW | I_WILL_FREE))
                        continue;
-               }
 
                /*
                 * Was this inode dirtied after sync_sb_inodes was called?
@@ -629,8 +533,13 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                if (inode_dirtied_after(inode, start))
                        break;
 
+               /* Was this inode dirtied too recently? */
+               if (wbc->older_than_this &&
+                   time_after(inode->dirtied_when, *wbc->older_than_this))
+                       break;
+
                if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
-                       requeue_io(inode);
+                       wbc->more_io = 1;
                        continue;
                }
 
@@ -638,13 +547,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                __iget(inode);
                pages_skipped = wbc->pages_skipped;
                writeback_single_inode(inode, wbc);
-               if (wbc->pages_skipped != pages_skipped) {
-                       /*
-                        * writeback is not making progress due to locked
-                        * buffers.  Skip this inode for now.
-                        */
-                       redirty_tail(inode);
-               }
                spin_unlock(&inode_lock);
                iput(inode);
                cond_resched();
@@ -653,14 +555,11 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                        wbc->more_io = 1;
                        break;
                }
-               if (!list_empty(&wb->b_more_io))
-                       wbc->more_io = 1;
        }
 
        unpin_sb_for_writeback(&pin_sb);
 
        spin_unlock(&inode_lock);
-       /* Leave any unwritten inodes on b_io */
 }
 
 void writeback_inodes_wbc(struct writeback_control *wbc)
@@ -718,7 +617,6 @@ static long wb_writeback(struct bdi_writeback *wb,
        };
        unsigned long oldest_jif;
        long wrote = 0;
-       struct inode *inode;
 
        if (wbc.for_kupdate) {
                wbc.older_than_this = &oldest_jif;
@@ -766,6 +664,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                 */
                if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
                        continue;
+#if 0
                /*
                 * Nothing written. Wait for some inode to
                 * become available for writeback. Otherwise
@@ -780,6 +679,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                        trace_writeback_inode_wait(0);
                }
                spin_unlock(&inode_lock);
+#endif
        }
 
        return wrote;
@@ -1042,6 +942,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
        if ((inode->i_state & flags) == flags)
                return;
 
+#if 0
+       /* anonynous file systems do not write data back */
+       if (inode->i_sb->s_type->fs_flags & FS_ANONYMOUS)
+               return;
+#endif
+
+       if (inode->i_state & I_DIRTY_NEVER)
+               return;
+
        if (unlikely(block_dump))
                block_dump___mark_inode_dirty(inode);
 
@@ -1049,14 +958,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
        if ((inode->i_state & flags) != flags) {
                const int was_dirty = inode->i_state & I_DIRTY;
 
-               inode->i_state |= flags;
-
-               /*
-                * If the inode is being synced, just update its dirty state.
-                * The unlocker will place the inode on the appropriate
-                * superblock list, based upon its state.
-                */
-               if (inode->i_state & I_SYNC)
+               if (inode->i_state & (I_FREEING|I_CLEAR))
                        goto out;
 
                /*
@@ -1067,7 +969,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        if (hlist_unhashed(&inode->i_hash))
                                goto out;
                }
-               if (inode->i_state & (I_FREEING|I_CLEAR))
+
+               inode->i_state |= flags;
+
+               /*
+                * If the inode is being synced, just update its dirty state.
+                * The unlocker will place the inode on the appropriate
+                * superblock list, based upon its state.
+                */
+               if (inode->i_state & I_SYNC)
                        goto out;
 
                /*
@@ -1086,7 +996,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        }
 
                        inode->dirtied_when = jiffies;
-                       list_move(&inode->i_list, &wb->b_dirty);
+                       inode->i_flushed_when = inode->dirtied_when;
+                       flush_tree_insert(inode);
                }
        }
 out:
index 03dfeb2e39287a75b2fccbe6c71d1451ebade3a0..16f83c446787e2eae6d9a3265b876aa402f054d9 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/mount.h>
 #include <linux/async.h>
 #include <linux/posix_acl.h>
+#include "flushtree.h"
 
 /*
  * This is needed for the following functions:
@@ -153,6 +154,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        inode->i_cdev = NULL;
        inode->i_rdev = 0;
        inode->dirtied_when = 0;
+       RB_CLEAR_NODE(&inode->i_flush_node);
 
        if (security_inode_alloc(inode))
                goto out;
@@ -1200,6 +1202,11 @@ void generic_delete_inode(struct inode *inode)
 {
        const struct super_operations *op = inode->i_sb->s_op;
 
+       if ((inode->i_state & I_DIRTY)) {
+               flush_tree_remove(inode);
+               inode->i_state &= ~I_DIRTY;
+       }
+
        list_del_init(&inode->i_list);
        list_del_init(&inode->i_sb_list);
        WARN_ON(inode->i_state & I_NEW);
index 37ba29ff3158aca16ff0febd65fdb3cce24152d6..93d9d184e3c0bee41eab1f88c6343b94e6743613 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -937,12 +937,10 @@ static struct inode * get_pipe_inode(void)
        inode->i_fop = &rdwr_pipefifo_fops;
 
        /*
-        * Mark the inode dirty from the very beginning,
-        * that way it will never be moved to the dirty
-        * list because "mark_inode_dirty()" will think
-        * that it already _is_ on the dirty list.
+        * Mark the inode "never dirty" from the very beginning,
+        * that way it will never be written back.
         */
-       inode->i_state = I_DIRTY;
+       inode->i_state = I_DIRTY_NEVER;
        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
index fcbc26af00e479675db47d8e79d1e31b3e4368bc..7575ab6f160bac50f2df29acfac7beb042beaaf7 100644 (file)
@@ -52,9 +52,8 @@ struct bdi_writeback {
        unsigned long last_old_flush;           /* last old data flush */
 
        struct task_struct      *task;          /* writeback task */
-       struct list_head        b_dirty;        /* dirty inodes */
-       struct list_head        b_io;           /* parked for writeback */
-       struct list_head        b_more_io;      /* parked for more writeback */
+
+       struct rb_root          flush_tree;
 };
 
 struct backing_dev_info {
@@ -104,16 +103,14 @@ void bdi_unregister(struct backing_dev_info *bdi);
 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
                                long nr_pages);
 int bdi_writeback_task(struct bdi_writeback *wb);
-int bdi_has_dirty_io(struct backing_dev_info *bdi);
+bool bdi_has_dirty_io(struct backing_dev_info *bdi);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
 
-static inline int wb_has_dirty_io(struct bdi_writeback *wb)
+static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
 {
-       return !list_empty(&wb->b_dirty) ||
-              !list_empty(&wb->b_io) ||
-              !list_empty(&wb->b_more_io);
+       return !RB_EMPTY_ROOT(&wb->flush_tree);
 }
 
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
index cca191933ff6ec929d2784fc438049d1c60e0c7f..aa2f05a8f86a496a07c010682fef6e46095a2fa7 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/rbtree.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -722,6 +723,8 @@ struct inode {
        struct hlist_node       i_hash;
        struct list_head        i_list;         /* backing dev IO list */
        struct list_head        i_sb_list;
+       struct rb_node          i_flush_node;
+       unsigned long           i_flushed_when;
        struct list_head        i_dentry;
        unsigned long           i_ino;
        atomic_t                i_count;
@@ -1637,6 +1640,7 @@ struct super_operations {
 #define I_CLEAR                        64
 #define __I_SYNC               7
 #define I_SYNC                 (1 << __I_SYNC)
+#define I_DIRTY_NEVER          (1 << 9)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 
index 271079904dbe94a00cb73d4709cf36e06c75ed6f..f8caeee616d9d66c8045f347dbda599b8cdbe41d 100644 (file)
@@ -102,6 +102,8 @@ void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 void wakeup_flusher_threads(long nr_pages);
 
+#define inode_to_bdi(inode)    ((inode)->i_mapping->backing_dev_info)
+
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
 {
index 941e6f0ce4b6f8d569f0f5cd1804d601edf1ae4d..a8e51e7d51b77cb6b4f81bf88723f639a95100b7 100644 (file)
@@ -66,23 +66,23 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
        unsigned long background_thresh;
        unsigned long dirty_thresh;
        unsigned long bdi_thresh;
-       unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
-       struct inode *inode;
+       unsigned long nr_dirty, nr_wb;
 
        /*
         * inode lock is enough here, the bdi->wb_list is protected by
         * RCU on the reader side
         */
-       nr_wb = nr_dirty = nr_io = nr_more_io = 0;
+       nr_wb = nr_dirty = 0;
        spin_lock(&inode_lock);
        list_for_each_entry(wb, &bdi->wb_list, list) {
+               struct rb_node *n;
+
                nr_wb++;
-               list_for_each_entry(inode, &wb->b_dirty, i_list)
+               n = rb_first(&wb->flush_tree);
+               while (n) {
                        nr_dirty++;
-               list_for_each_entry(inode, &wb->b_io, i_list)
-                       nr_io++;
-               list_for_each_entry(inode, &wb->b_more_io, i_list)
-                       nr_more_io++;
+                       n = rb_next(n);
+               }
        }
        spin_unlock(&inode_lock);
 
@@ -97,8 +97,6 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   "BackgroundThresh: %8lu kB\n"
                   "WritebackThreads: %8lu\n"
                   "b_dirty:          %8lu\n"
-                  "b_io:             %8lu\n"
-                  "b_more_io:        %8lu\n"
                   "bdi_list:         %8u\n"
                   "state:            %8lx\n"
                   "wb_mask:          %8lx\n"
@@ -107,7 +105,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
                   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
                   K(bdi_thresh), K(dirty_thresh),
-                  K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
+                  K(background_thresh), nr_wb, nr_dirty,
                   !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
                   !list_empty(&bdi->wb_list), bdi->wb_cnt);
 #undef K
@@ -261,9 +259,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
 
        wb->bdi = bdi;
        wb->last_old_flush = jiffies;
-       INIT_LIST_HEAD(&wb->b_dirty);
-       INIT_LIST_HEAD(&wb->b_io);
-       INIT_LIST_HEAD(&wb->b_more_io);
+       wb->flush_tree = RB_ROOT;
 }
 
 static void bdi_task_init(struct backing_dev_info *bdi,
@@ -326,7 +322,7 @@ static int bdi_start_fn(void *ptr)
        return ret;
 }
 
-int bdi_has_dirty_io(struct backing_dev_info *bdi)
+bool bdi_has_dirty_io(struct backing_dev_info *bdi)
 {
        return wb_has_dirty_io(&bdi->wb);
 }
@@ -693,19 +689,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
 {
        int i;
 
-       /*
-        * Splice our entries to the default_backing_dev_info, if this
-        * bdi disappears
-        */
-       if (bdi_has_dirty_io(bdi)) {
-               struct bdi_writeback *dst = &default_backing_dev_info.wb;
-
-               spin_lock(&inode_lock);
-               list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
-               list_splice(&bdi->wb.b_io, &dst->b_io);
-               list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
-               spin_unlock(&inode_lock);
-       }
+       BUG_ON(bdi_has_dirty_io(bdi));
 
        bdi_unregister(bdi);