Btrfs: Wait for IO on the block device inodes of newly added devices
[linux-2.6-block.git] / fs / btrfs / volumes.c
index ba396857102453a60d93b2c43f036e953457ecf1..f63cf7621a01a3b46665fa87100dd31939e6c459 100644 (file)
@@ -56,6 +56,18 @@ void btrfs_unlock_volumes(void)
        mutex_unlock(&uuid_mutex);
 }
 
+static void lock_chunks(struct btrfs_root *root)
+{
+       mutex_lock(&root->fs_info->alloc_mutex);
+       mutex_lock(&root->fs_info->chunk_mutex);
+}
+
+static void unlock_chunks(struct btrfs_root *root)
+{
+       mutex_unlock(&root->fs_info->chunk_mutex);
+       mutex_unlock(&root->fs_info->alloc_mutex);
+}
+
 int btrfs_cleanup_fs_uuids(void)
 {
        struct btrfs_fs_devices *fs_devices;
@@ -82,8 +94,8 @@ int btrfs_cleanup_fs_uuids(void)
        return 0;
 }
 
-static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
-                                         u8 *uuid)
+static noinline struct btrfs_device *__find_device(struct list_head *head,
+                                                  u64 devid, u8 *uuid)
 {
        struct btrfs_device *dev;
        struct list_head *cur;
@@ -98,7 +110,7 @@ static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
        return NULL;
 }
 
-static struct btrfs_fs_devices *find_fsid(u8 *fsid)
+static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
 {
        struct list_head *cur;
        struct btrfs_fs_devices *fs_devices;
@@ -122,16 +134,22 @@ static struct btrfs_fs_devices *find_fsid(u8 *fsid)
  * the list if the block device is congested.  This way, multiple devices
  * can make progress from a single worker thread.
  */
-int run_scheduled_bios(struct btrfs_device *device)
+static int noinline run_scheduled_bios(struct btrfs_device *device)
 {
        struct bio *pending;
        struct backing_dev_info *bdi;
+       struct btrfs_fs_info *fs_info;
        struct bio *tail;
        struct bio *cur;
        int again = 0;
        unsigned long num_run = 0;
+       unsigned long limit;
 
        bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
+       fs_info = device->dev_root->fs_info;
+       limit = btrfs_async_submit_limit(fs_info);
+       limit = limit * 2 / 3;
+
 loop:
        spin_lock(&device->io_lock);
 
@@ -167,8 +185,16 @@ loop:
                cur = pending;
                pending = pending->bi_next;
                cur->bi_next = NULL;
-               atomic_dec(&device->dev_root->fs_info->nr_async_submits);
+               atomic_dec(&fs_info->nr_async_bios);
+
+               if (atomic_read(&fs_info->nr_async_bios) < limit &&
+                   waitqueue_active(&fs_info->async_submit_wait))
+                       wake_up(&fs_info->async_submit_wait);
+
+               BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+               bio_get(cur);
                submit_bio(cur->bi_rw, cur);
+               bio_put(cur);
                num_run++;
 
                /*
@@ -176,10 +202,11 @@ loop:
                 * is now congested.  Back off and let other work structs
                 * run instead
                 */
-               if (pending && num_run && bdi_write_congested(bdi)) {
+               if (pending && bdi_write_congested(bdi)) {
                        struct bio *old_head;
 
                        spin_lock(&device->io_lock);
+
                        old_head = device->pending_bios;
                        device->pending_bios = pending;
                        if (device->pending_bio_tail)
@@ -206,7 +233,7 @@ void pending_bios_fn(struct btrfs_work *work)
        run_scheduled_bios(device);
 }
 
-static int device_list_add(const char *path,
+static noinline int device_list_add(const char *path,
                           struct btrfs_super_block *disk_super,
                           u64 devid, struct btrfs_fs_devices **fs_devices_ret)
 {
@@ -271,13 +298,17 @@ again:
        list_for_each(cur, head) {
                device = list_entry(cur, struct btrfs_device, dev_list);
                if (!device->in_fs_metadata) {
-                       if (device->bdev) {
-                               close_bdev_excl(device->bdev);
-                               fs_devices->open_devices--;
-                       }
+                       struct block_device *bdev;
                        list_del(&device->dev_list);
                        list_del(&device->dev_alloc_list);
                        fs_devices->num_devices--;
+                       if (device->bdev) {
+                               bdev = device->bdev;
+                               fs_devices->open_devices--;
+                               mutex_unlock(&uuid_mutex);
+                               close_bdev_excl(bdev);
+                               mutex_lock(&uuid_mutex);
+                       }
                        kfree(device->name);
                        kfree(device);
                        goto again;
@@ -449,10 +480,10 @@ error:
  * called very infrequently and that a given device has a small number
  * of extents
  */
-static int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                               struct btrfs_device *device,
-                               struct btrfs_path *path,
-                               u64 num_bytes, u64 *start)
+static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                                        struct btrfs_device *device,
+                                        struct btrfs_path *path,
+                                        u64 num_bytes, u64 *start)
 {
        struct btrfs_key key;
        struct btrfs_root *root = device->dev_root;
@@ -614,7 +645,7 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
                           struct btrfs_device *device,
                           u64 chunk_tree, u64 chunk_objectid,
                           u64 chunk_offset,
@@ -662,7 +693,8 @@ err:
        return ret;
 }
 
-static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
+static noinline int find_next_chunk(struct btrfs_root *root,
+                                   u64 objectid, u64 *offset)
 {
        struct btrfs_path *path;
        int ret;
@@ -704,8 +736,8 @@ error:
        return ret;
 }
 
-static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
-                          u64 *objectid)
+static noinline int find_next_devid(struct btrfs_root *root,
+                                   struct btrfs_path *path, u64 *objectid)
 {
        int ret;
        struct btrfs_key key;
@@ -818,6 +850,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = device->devid;
+       lock_chunks(root);
 
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0)
@@ -852,6 +885,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
                                    total_bytes - 1);
 out:
        btrfs_free_path(path);
+       unlock_chunks(root);
        btrfs_commit_transaction(trans, root);
        return ret;
 }
@@ -866,8 +900,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        u64 devid;
        int ret = 0;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        mutex_lock(&uuid_mutex);
+       mutex_lock(&root->fs_info->volume_mutex);
 
        all_avail = root->fs_info->avail_data_alloc_bits |
                root->fs_info->avail_system_alloc_bits |
@@ -983,8 +1017,8 @@ error_close:
        if (bdev)
                close_bdev_excl(bdev);
 out:
+       mutex_unlock(&root->fs_info->volume_mutex);
        mutex_unlock(&uuid_mutex);
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
@@ -1003,8 +1037,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        if (!bdev) {
                return -EIO;
        }
-       mutex_lock(&root->fs_info->fs_mutex);
+
+       filemap_write_and_wait(bdev->bd_inode->i_mapping);
+       mutex_lock(&root->fs_info->volume_mutex);
+
        trans = btrfs_start_transaction(root, 1);
+       lock_chunks(root);
        devices = &root->fs_info->fs_devices->devices;
        list_for_each(cur, devices) {
                device = list_entry(cur, struct btrfs_device, dev_list);
@@ -1042,6 +1080,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        if (ret)
                goto out_close_bdev;
 
+       set_blocksize(device->bdev, 4096);
+
        total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
        btrfs_set_super_total_bytes(&root->fs_info->super_copy,
                                    total_bytes + device->total_bytes);
@@ -1056,8 +1096,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        root->fs_info->fs_devices->num_devices++;
        root->fs_info->fs_devices->open_devices++;
 out:
+       unlock_chunks(root);
        btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
+       mutex_unlock(&root->fs_info->volume_mutex);
+
        return ret;
 
 out_close_bdev:
@@ -1065,8 +1107,8 @@ out_close_bdev:
        goto out;
 }
 
-int btrfs_update_device(struct btrfs_trans_handle *trans,
-                       struct btrfs_device *device)
+int noinline btrfs_update_device(struct btrfs_trans_handle *trans,
+                                struct btrfs_device *device)
 {
        int ret;
        struct btrfs_path *path;
@@ -1111,7 +1153,7 @@ out:
        return ret;
 }
 
-int btrfs_grow_device(struct btrfs_trans_handle *trans,
+static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
                      struct btrfs_device *device, u64 new_size)
 {
        struct btrfs_super_block *super_copy =
@@ -1123,6 +1165,16 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
        return btrfs_update_device(trans, device);
 }
 
+int btrfs_grow_device(struct btrfs_trans_handle *trans,
+                     struct btrfs_device *device, u64 new_size)
+{
+       int ret;
+       lock_chunks(device->dev_root);
+       ret = __btrfs_grow_device(trans, device, new_size);
+       unlock_chunks(device->dev_root);
+       return ret;
+}
+
 static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root,
                            u64 chunk_tree, u64 chunk_objectid,
@@ -1217,12 +1269,14 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
        em_tree = &root->fs_info->mapping_tree.map_tree;
 
        /* step one, relocate all the extents inside this chunk */
-       ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
+       ret = btrfs_relocate_block_group(extent_root, chunk_offset);
        BUG_ON(ret);
 
        trans = btrfs_start_transaction(root, 1);
        BUG_ON(!trans);
 
+       lock_chunks(root);
+
        /*
         * step two, delete the device extents and the
         * chunk tree entries
@@ -1255,18 +1309,22 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
                BUG_ON(ret);
        }
 
+       ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
+       BUG_ON(ret);
+
        spin_lock(&em_tree->lock);
        remove_extent_mapping(em_tree, em);
+       spin_unlock(&em_tree->lock);
+
        kfree(map);
        em->bdev = NULL;
 
        /* once for the tree */
        free_extent_map(em);
-       spin_unlock(&em_tree->lock);
-
        /* once for us */
        free_extent_map(em);
 
+       unlock_chunks(root);
        btrfs_end_transaction(trans, root);
        return 0;
 }
@@ -1297,9 +1355,9 @@ int btrfs_balance(struct btrfs_root *dev_root)
        struct btrfs_key found_key;
 
 
+       mutex_lock(&dev_root->fs_info->volume_mutex);
        dev_root = dev_root->fs_info->dev_root;
 
-       mutex_lock(&dev_root->fs_info->fs_mutex);
        /* step one make some room on all the devices */
        list_for_each(cur, devices) {
                device = list_entry(cur, struct btrfs_device, dev_list);
@@ -1343,13 +1401,14 @@ int btrfs_balance(struct btrfs_root *dev_root)
 
                ret = btrfs_previous_item(chunk_root, path, 0,
                                          BTRFS_CHUNK_ITEM_KEY);
-               if (ret) {
+               if (ret)
                        break;
-               }
+
                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
                                      path->slots[0]);
                if (found_key.objectid != key.objectid)
                        break;
+
                chunk = btrfs_item_ptr(path->nodes[0],
                                       path->slots[0],
                                       struct btrfs_chunk);
@@ -1358,17 +1417,17 @@ int btrfs_balance(struct btrfs_root *dev_root)
                if (key.offset == 0)
                        break;
 
+               btrfs_release_path(chunk_root, path);
                ret = btrfs_relocate_chunk(chunk_root,
                                           chunk_root->root_key.objectid,
                                           found_key.objectid,
                                           found_key.offset);
                BUG_ON(ret);
-               btrfs_release_path(chunk_root, path);
        }
        ret = 0;
 error:
        btrfs_free_path(path);
-       mutex_unlock(&dev_root->fs_info->fs_mutex);
+       mutex_unlock(&dev_root->fs_info->volume_mutex);
        return ret;
 }
 
@@ -1408,14 +1467,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 
        path->reada = 2;
 
+       lock_chunks(root);
+
        device->total_bytes = new_size;
        ret = btrfs_update_device(trans, device);
        if (ret) {
+               unlock_chunks(root);
                btrfs_end_transaction(trans, root);
                goto done;
        }
        WARN_ON(diff > old_total);
        btrfs_set_super_total_bytes(super_copy, old_total - diff);
+       unlock_chunks(root);
        btrfs_end_transaction(trans, root);
 
        key.objectid = device->devid;
@@ -1488,8 +1551,8 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
-                              int sub_stripes)
+static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size,
+                                       int num_stripes, int sub_stripes)
 {
        if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
                return calc_size;
@@ -1609,8 +1672,13 @@ again:
        else
                min_free = calc_size;
 
-       /* we add 1MB because we never use the first 1MB of the device */
-       min_free += 1024 * 1024;
+       /*
+        * we add 1MB because we never use the first 1MB of the device, unless
+        * we've looped, then we are likely allocating the maximum amount of
+        * space left already
+        */
+       if (!looped)
+               min_free += 1024 * 1024;
 
        /* build a private list of devices we will allocate from */
        while(index < num_stripes) {
@@ -2016,23 +2084,22 @@ int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
 }
 
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
 static void end_bio_multi_stripe(struct bio *bio, int err)
-#else
-static int end_bio_multi_stripe(struct bio *bio,
-                                  unsigned int bytes_done, int err)
-#endif
 {
        struct btrfs_multi_bio *multi = bio->bi_private;
+       int is_orig_bio = 0;
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       if (bio->bi_size)
-               return 1;
-#endif
        if (err)
                atomic_inc(&multi->error);
 
+       if (bio == multi->orig_bio)
+               is_orig_bio = 1;
+
        if (atomic_dec_and_test(&multi->stripes_pending)) {
+               if (!is_orig_bio) {
+                       bio_put(bio);
+                       bio = multi->orig_bio;
+               }
                bio->bi_private = multi->private;
                bio->bi_end_io = multi->end_io;
                /* only send an error to the higher layers if it is
@@ -2050,17 +2117,10 @@ static int end_bio_multi_stripe(struct bio *bio,
                }
                kfree(multi);
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-               bio_endio(bio, bio->bi_size, err);
-#else
                bio_endio(bio, err);
-#endif
-       } else {
+       } else if (!is_orig_bio) {
                bio_put(bio);
        }
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       return 0;
-#endif
 }
 
 struct async_sched {
@@ -2077,24 +2137,28 @@ struct async_sched {
  * This will add one bio to the pending list for a device and make sure
  * the work struct is scheduled.
  */
-int schedule_bio(struct btrfs_root *root, struct btrfs_device *device,
-                int rw, struct bio *bio)
+static int noinline schedule_bio(struct btrfs_root *root,
+                                struct btrfs_device *device,
+                                int rw, struct bio *bio)
 {
        int should_queue = 1;
 
        /* don't bother with additional async steps for reads, right now */
        if (!(rw & (1 << BIO_RW))) {
+               bio_get(bio);
                submit_bio(rw, bio);
+               bio_put(bio);
                return 0;
        }
 
        /*
-        * nr_async_sumbits allows us to reliably return congestion to the
+        * nr_async_bios allows us to reliably return congestion to the
         * higher layers.  Otherwise, the async bio makes it appear we have
         * made progress against dirty pages when we've really just put it
         * on a queue for later
         */
-       atomic_inc(&root->fs_info->nr_async_submits);
+       atomic_inc(&root->fs_info->nr_async_bios);
+       WARN_ON(bio->bi_next);
        bio->bi_next = NULL;
        bio->bi_rw |= rw;
 
@@ -2147,6 +2211,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
        }
        multi->end_io = first_bio->bi_end_io;
        multi->private = first_bio->bi_private;
+       multi->orig_bio = first_bio;
        atomic_set(&multi->stripes_pending, multi->num_stripes);
 
        while(dev_nr < total_devs) {
@@ -2171,11 +2236,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                } else {
                        bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
                        bio->bi_sector = logical >> 9;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-                       bio_endio(bio, bio->bi_size, -EIO);
-#else
                        bio_endio(bio, -EIO);
-#endif
                }
                dev_nr++;
        }
@@ -2486,4 +2547,3 @@ again:
 error:
        return ret;
 }
-