writeback: make wb_writeback() take an argument structure

[linux-block.git] / fs / fs-writeback.c
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index da86ef58e4278654b9ae721e73c0ce9815b19b40..c5e91225501dcbe749be8450ad848a6c85a799a1 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -34,6 +34,17 @@
   */
  int nr_pdflush_threads;
  
+/*
+ * Passed into wb_writeback(), essentially a subset of writeback_control
+ */
+struct wb_writeback_args {
+       long nr_pages;
+       struct super_block *sb;
+       enum writeback_sync_modes sync_mode;
+       int for_kupdate;
+       int range_cyclic;
+};
+
  /*
   * Work items for the bdi_writeback threads
   */
@@ -45,9 +56,7 @@ struct bdi_work {
         unsigned long seen;
         atomic_t pending;
  
-       struct super_block *sb;
-       unsigned long nr_pages;
-       enum writeback_sync_modes sync_mode;
+       struct wb_writeback_args args;
  
         unsigned long state;
  };
@@ -69,19 +78,14 @@ static inline void bdi_work_init(struct bdi_work *work,
                                  struct writeback_control *wbc)
  {
         INIT_RCU_HEAD(&work->rcu_head);
-       work->sb = wbc->sb;
-       work->nr_pages = wbc->nr_to_write;
-       work->sync_mode = wbc->sync_mode;
+       work->args.sb = wbc->sb;
+       work->args.nr_pages = wbc->nr_to_write;
+       work->args.sync_mode = wbc->sync_mode;
+       work->args.range_cyclic = wbc->range_cyclic;
+       work->args.for_kupdate = 0;
         work->state = WS_USED;
  }
  
-static inline void bdi_work_init_on_stack(struct bdi_work *work,
-                                         struct writeback_control *wbc)
-{
-       bdi_work_init(work, wbc);
-       work->state |= WS_ONSTACK;
-}
-
  /**
   * writeback_in_progress - determine whether there is writeback in progress
   * @bdi: the device's backing_dev_info structure.
@@ -113,7 +117,7 @@ static void bdi_work_free(struct rcu_head *head)
  
  static void wb_work_complete(struct bdi_work *work)
  {
-       const enum writeback_sync_modes sync_mode = work->sync_mode;
+       const enum writeback_sync_modes sync_mode = work->args.sync_mode;
  
         /*
          * For allocated work, we can clear the done/seen bit right here.
@@ -207,34 +211,23 @@ static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc)
  
  void bdi_start_writeback(struct writeback_control *wbc)
  {
-       const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
-       struct bdi_work work_stack, *work = NULL;
-
-       if (!must_wait)
-               work = bdi_alloc_work(wbc);
+       /*
+        * WB_SYNC_NONE is opportunistic writeback. If this allocation fails,
+        * bdi_queue_work() will wake up the thread and flush old data. This
+        * should ensure some amount of progress in freeing memory.
+        */
+       if (wbc->sync_mode != WB_SYNC_ALL) {
+               struct bdi_work *w = bdi_alloc_work(wbc);
  
-       if (!work) {
-               work = &work_stack;
-               bdi_work_init_on_stack(work, wbc);
-       }
+               bdi_queue_work(wbc->bdi, w);
+       } else {
+               struct bdi_work work;
  
-       bdi_queue_work(wbc->bdi, work);
+               bdi_work_init(&work, wbc);
+               work.state |= WS_ONSTACK;
  
-       /*
-        * If the sync mode is WB_SYNC_ALL, block waiting for the work to
-        * complete. If not, we only need to wait for the work to be started,
-        * if we allocated it on-stack. We use the same mechanism, if the
-        * wait bit is set in the bdi_work struct, then threads will not
-        * clear pending until after they are done.
-        *
-        * Note that work == &work_stack if must_wait is true, so we don't
-        * need to do call_rcu() here ever, since the completion path will
-        * have done that for us.
-        */
-       if (must_wait || work == &work_stack) {
-               bdi_wait_on_work_clear(work);
-               if (work != &work_stack)
-                       call_rcu(&work->rcu_head, bdi_work_free);
+               bdi_queue_work(wbc->bdi, &work);
+               bdi_wait_on_work_clear(&work);
         }
  }
  
@@ -671,17 +664,16 @@ static inline bool over_bground_thresh(void)
   * older_than_this takes precedence over nr_to_write.  So we'll only write back
   * all dirty pages if they are all attached to "old" mappings.
   */
-static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
-                        struct super_block *sb,
-                        enum writeback_sync_modes sync_mode, int for_kupdate)
+static long wb_writeback(struct bdi_writeback *wb,
+                        struct wb_writeback_args *args)
  {
         struct writeback_control wbc = {
                 .bdi                    = wb->bdi,
-               .sb                     = sb,
-               .sync_mode              = sync_mode,
+               .sb                     = args->sb,
+               .sync_mode              = args->sync_mode,
                 .older_than_this        = NULL,
-               .for_kupdate            = for_kupdate,
-               .range_cyclic           = 1,
+               .for_kupdate            = args->for_kupdate,
+               .range_cyclic           = args->range_cyclic,
         };
         unsigned long oldest_jif;
         long wrote = 0;
@@ -691,13 +683,18 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
                 oldest_jif = jiffies -
                                 msecs_to_jiffies(dirty_expire_interval * 10);
         }
+       if (!wbc.range_cyclic) {
+               wbc.range_start = 0;
+               wbc.range_end = LLONG_MAX;
+       }
  
         for (;;) {
                 /*
                  * Don't flush anything for non-integrity writeback where
                  * no nr_pages was given
                  */
-               if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE)
+               if (!args->for_kupdate && args->nr_pages <= 0 &&
+                    args->sync_mode == WB_SYNC_NONE)
                         break;
  
                 /*
@@ -705,7 +702,8 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
                  * periodic background writeout and we are below the
                  * background dirty threshold, don't do anything
                  */
-               if (for_kupdate && nr_pages <= 0 && !over_bground_thresh())
+               if (args->for_kupdate && args->nr_pages <= 0 &&
+                   !over_bground_thresh())
                         break;
  
                 wbc.more_io = 0;
@@ -713,7 +711,7 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
                 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
                 wbc.pages_skipped = 0;
                 writeback_inodes_wb(wb, &wbc);
-               nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+               args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
  
                 /*
@@ -767,8 +765,16 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
                         global_page_state(NR_UNSTABLE_NFS) +
                         (inodes_stat.nr_inodes - inodes_stat.nr_unused);
  
-       if (nr_pages)
-               return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
+       if (nr_pages) {
+               struct wb_writeback_args args = {
+                       .nr_pages       = nr_pages,
+                       .sync_mode      = WB_SYNC_NONE,
+                       .for_kupdate    = 1,
+                       .range_cyclic   = 1,
+               };
+
+               return wb_writeback(wb, &args);
+       }
  
         return 0;
  }
@@ -780,35 +786,31 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
  {
         struct backing_dev_info *bdi = wb->bdi;
         struct bdi_work *work;
-       long nr_pages, wrote = 0;
+       long wrote = 0;
  
         while ((work = get_next_work_item(bdi, wb)) != NULL) {
-               enum writeback_sync_modes sync_mode;
-
-               nr_pages = work->nr_pages;
+               struct wb_writeback_args args = work->args;
  
                 /*
                  * Override sync mode, in case we must wait for completion
                  */
                 if (force_wait)
-                       work->sync_mode = sync_mode = WB_SYNC_ALL;
-               else
-                       sync_mode = work->sync_mode;
+                       work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
  
                 /*
                  * If this isn't a data integrity operation, just notify
                  * that we have seen this work and we are now starting it.
                  */
-               if (sync_mode == WB_SYNC_NONE)
+               if (args.sync_mode == WB_SYNC_NONE)
                         wb_clear_pending(wb, work);
  
-               wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0);
+               wrote += wb_writeback(wb, &args);
  
                 /*
                  * This is a data integrity writeback, so only do the
                  * notification when we have completed the work.
                  */
-               if (sync_mode == WB_SYNC_ALL)
+               if (args.sync_mode == WB_SYNC_ALL)
                         wb_clear_pending(wb, work);
         }
  
@@ -1242,57 +1244,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
         return ret;
  }
  EXPORT_SYMBOL(sync_inode);
-
-/**
- * generic_osync_inode - flush all dirty data for a given inode to disk
- * @inode: inode to write
- * @mapping: the address_space that should be flushed
- * @what:  what to write and wait upon
- *
- * This can be called by file_write functions for files which have the
- * O_SYNC flag set, to flush dirty writes to disk.
- *
- * @what is a bitmask, specifying which part of the inode's data should be
- * written and waited upon.
- *
- *    OSYNC_DATA:     i_mapping's dirty data
- *    OSYNC_METADATA: the buffers at i_mapping->private_list
- *    OSYNC_INODE:    the inode itself
- */
-
-int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
-{
-       int err = 0;
-       int need_write_inode_now = 0;
-       int err2;
-
-       if (what & OSYNC_DATA)
-               err = filemap_fdatawrite(mapping);
-       if (what & (OSYNC_METADATA|OSYNC_DATA)) {
-               err2 = sync_mapping_buffers(mapping);
-               if (!err)
-                       err = err2;
-       }
-       if (what & OSYNC_DATA) {
-               err2 = filemap_fdatawait(mapping);
-               if (!err)
-                       err = err2;
-       }
-
-       spin_lock(&inode_lock);
-       if ((inode->i_state & I_DIRTY) &&
-           ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
-               need_write_inode_now = 1;
-       spin_unlock(&inode_lock);
-
-       if (need_write_inode_now) {
-               err2 = write_inode_now(inode, 1);
-               if (!err)
-                       err = err2;
-       }
-       else
-               inode_sync_wait(inode);
-
-       return err;
-}
-EXPORT_SYMBOL(generic_osync_inode);