Btrfs: Wait for IO on the block device inodes of newly added devices

[linux-2.6-block.git] / fs / btrfs / volumes.c
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 18db4cbe27941ac3d7779c2c2a4d2504975c2940..f63cf7621a01a3b46665fa87100dd31939e6c459 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -64,8 +64,8 @@ static void lock_chunks(struct btrfs_root *root)
  
  static void unlock_chunks(struct btrfs_root *root)
  {
-       mutex_unlock(&root->fs_info->alloc_mutex);
         mutex_unlock(&root->fs_info->chunk_mutex);
+       mutex_unlock(&root->fs_info->alloc_mutex);
  }
  
  int btrfs_cleanup_fs_uuids(void)
@@ -94,8 +94,8 @@ int btrfs_cleanup_fs_uuids(void)
         return 0;
  }
  
-static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
-                                         u8 *uuid)
+static noinline struct btrfs_device *__find_device(struct list_head *head,
+                                                  u64 devid, u8 *uuid)
  {
         struct btrfs_device *dev;
         struct list_head *cur;
@@ -110,7 +110,7 @@ static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
         return NULL;
  }
  
-static struct btrfs_fs_devices *find_fsid(u8 *fsid)
+static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
  {
         struct list_head *cur;
         struct btrfs_fs_devices *fs_devices;
@@ -134,16 +134,22 @@ static struct btrfs_fs_devices *find_fsid(u8 *fsid)
   * the list if the block device is congested.  This way, multiple devices
   * can make progress from a single worker thread.
   */
-int run_scheduled_bios(struct btrfs_device *device)
+static int noinline run_scheduled_bios(struct btrfs_device *device)
  {
         struct bio *pending;
         struct backing_dev_info *bdi;
+       struct btrfs_fs_info *fs_info;
         struct bio *tail;
         struct bio *cur;
         int again = 0;
         unsigned long num_run = 0;
+       unsigned long limit;
  
         bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
+       fs_info = device->dev_root->fs_info;
+       limit = btrfs_async_submit_limit(fs_info);
+       limit = limit * 2 / 3;
+
  loop:
         spin_lock(&device->io_lock);
  
@@ -179,8 +185,16 @@ loop:
                 cur = pending;
                 pending = pending->bi_next;
                 cur->bi_next = NULL;
-               atomic_dec(&device->dev_root->fs_info->nr_async_submits);
+               atomic_dec(&fs_info->nr_async_bios);
+
+               if (atomic_read(&fs_info->nr_async_bios) < limit &&
+                   waitqueue_active(&fs_info->async_submit_wait))
+                       wake_up(&fs_info->async_submit_wait);
+
+               BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+               bio_get(cur);
                 submit_bio(cur->bi_rw, cur);
+               bio_put(cur);
                 num_run++;
  
                 /*
@@ -188,10 +202,11 @@ loop:
                  * is now congested.  Back off and let other work structs
                  * run instead
                  */
-               if (pending && num_run && bdi_write_congested(bdi)) {
+               if (pending && bdi_write_congested(bdi)) {
                         struct bio *old_head;
  
                         spin_lock(&device->io_lock);
+
                         old_head = device->pending_bios;
                         device->pending_bios = pending;
                         if (device->pending_bio_tail)
@@ -218,7 +233,7 @@ void pending_bios_fn(struct btrfs_work *work)
         run_scheduled_bios(device);
  }
  
-static int device_list_add(const char *path,
+static noinline int device_list_add(const char *path,
                            struct btrfs_super_block *disk_super,
                            u64 devid, struct btrfs_fs_devices **fs_devices_ret)
  {
@@ -465,10 +480,10 @@ error:
   * called very infrequently and that a given device has a small number
   * of extents
   */
-static int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                               struct btrfs_device *device,
-                               struct btrfs_path *path,
-                               u64 num_bytes, u64 *start)
+static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                                        struct btrfs_device *device,
+                                        struct btrfs_path *path,
+                                        u64 num_bytes, u64 *start)
  {
         struct btrfs_key key;
         struct btrfs_root *root = device->dev_root;
@@ -630,7 +645,7 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
         return ret;
  }
  
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
                            struct btrfs_device *device,
                            u64 chunk_tree, u64 chunk_objectid,
                            u64 chunk_offset,
@@ -678,7 +693,8 @@ err:
         return ret;
  }
  
-static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
+static noinline int find_next_chunk(struct btrfs_root *root,
+                                   u64 objectid, u64 *offset)
  {
         struct btrfs_path *path;
         int ret;
@@ -720,8 +736,8 @@ error:
         return ret;
  }
  
-static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
-                          u64 *objectid)
+static noinline int find_next_devid(struct btrfs_root *root,
+                                   struct btrfs_path *path, u64 *objectid)
  {
         int ret;
         struct btrfs_key key;
@@ -1022,6 +1038,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                 return -EIO;
         }
  
+       filemap_write_and_wait(bdev->bd_inode->i_mapping);
         mutex_lock(&root->fs_info->volume_mutex);
  
         trans = btrfs_start_transaction(root, 1);
@@ -1063,6 +1080,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
         if (ret)
                 goto out_close_bdev;
  
+       set_blocksize(device->bdev, 4096);
+
         total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
         btrfs_set_super_total_bytes(&root->fs_info->super_copy,
                                     total_bytes + device->total_bytes);
@@ -1088,8 +1107,8 @@ out_close_bdev:
         goto out;
  }
  
-int btrfs_update_device(struct btrfs_trans_handle *trans,
-                       struct btrfs_device *device)
+int noinline btrfs_update_device(struct btrfs_trans_handle *trans,
+                                struct btrfs_device *device)
  {
         int ret;
         struct btrfs_path *path;
@@ -1250,7 +1269,7 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
         em_tree = &root->fs_info->mapping_tree.map_tree;
  
         /* step one, relocate all the extents inside this chunk */
-       ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
+       ret = btrfs_relocate_block_group(extent_root, chunk_offset);
         BUG_ON(ret);
  
         trans = btrfs_start_transaction(root, 1);
@@ -1290,15 +1309,18 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
                 BUG_ON(ret);
         }
  
+       ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
+       BUG_ON(ret);
+
         spin_lock(&em_tree->lock);
         remove_extent_mapping(em_tree, em);
+       spin_unlock(&em_tree->lock);
+
         kfree(map);
         em->bdev = NULL;
  
         /* once for the tree */
         free_extent_map(em);
-       spin_unlock(&em_tree->lock);
-
         /* once for us */
         free_extent_map(em);
  
@@ -1529,8 +1551,8 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
         return 0;
  }
  
-static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
-                              int sub_stripes)
+static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size,
+                                       int num_stripes, int sub_stripes)
  {
         if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
                 return calc_size;
@@ -1650,8 +1672,13 @@ again:
         else
                 min_free = calc_size;
  
-       /* we add 1MB because we never use the first 1MB of the device */
-       min_free += 1024 * 1024;
+       /*
+        * we add 1MB because we never use the first 1MB of the device, unless
+        * we've looped, then we are likely allocating the maximum amount of
+        * space left already
+        */
+       if (!looped)
+               min_free += 1024 * 1024;
  
         /* build a private list of devices we will allocate from */
         while(index < num_stripes) {
@@ -2057,23 +2084,22 @@ int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
  }
  
  
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
  static void end_bio_multi_stripe(struct bio *bio, int err)
-#else
-static int end_bio_multi_stripe(struct bio *bio,
-                                  unsigned int bytes_done, int err)
-#endif
  {
         struct btrfs_multi_bio *multi = bio->bi_private;
+       int is_orig_bio = 0;
  
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       if (bio->bi_size)
-               return 1;
-#endif
         if (err)
                 atomic_inc(&multi->error);
  
+       if (bio == multi->orig_bio)
+               is_orig_bio = 1;
+
         if (atomic_dec_and_test(&multi->stripes_pending)) {
+               if (!is_orig_bio) {
+                       bio_put(bio);
+                       bio = multi->orig_bio;
+               }
                 bio->bi_private = multi->private;
                 bio->bi_end_io = multi->end_io;
                 /* only send an error to the higher layers if it is
@@ -2091,17 +2117,10 @@ static int end_bio_multi_stripe(struct bio *bio,
                 }
                 kfree(multi);
  
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-               bio_endio(bio, bio->bi_size, err);
-#else
                 bio_endio(bio, err);
-#endif
-       } else {
+       } else if (!is_orig_bio) {
                 bio_put(bio);
         }
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       return 0;
-#endif
  }
  
  struct async_sched {
@@ -2118,24 +2137,28 @@ struct async_sched {
   * This will add one bio to the pending list for a device and make sure
   * the work struct is scheduled.
   */
-int schedule_bio(struct btrfs_root *root, struct btrfs_device *device,
-                int rw, struct bio *bio)
+static int noinline schedule_bio(struct btrfs_root *root,
+                                struct btrfs_device *device,
+                                int rw, struct bio *bio)
  {
         int should_queue = 1;
  
         /* don't bother with additional async steps for reads, right now */
         if (!(rw & (1 << BIO_RW))) {
+               bio_get(bio);
                 submit_bio(rw, bio);
+               bio_put(bio);
                 return 0;
         }
  
         /*
-        * nr_async_sumbits allows us to reliably return congestion to the
+        * nr_async_bios allows us to reliably return congestion to the
          * higher layers.  Otherwise, the async bio makes it appear we have
          * made progress against dirty pages when we've really just put it
          * on a queue for later
          */
-       atomic_inc(&root->fs_info->nr_async_submits);
+       atomic_inc(&root->fs_info->nr_async_bios);
+       WARN_ON(bio->bi_next);
         bio->bi_next = NULL;
         bio->bi_rw |= rw;
  
@@ -2188,6 +2211,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
         }
         multi->end_io = first_bio->bi_end_io;
         multi->private = first_bio->bi_private;
+       multi->orig_bio = first_bio;
         atomic_set(&multi->stripes_pending, multi->num_stripes);
  
         while(dev_nr < total_devs) {
@@ -2212,11 +2236,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                 } else {
                         bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
                         bio->bi_sector = logical >> 9;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-                       bio_endio(bio, bio->bi_size, -EIO);
-#else
                         bio_endio(bio, -EIO);
-#endif
                 }
                 dev_nr++;
         }