Btrfs: Clone file data ioctl
[linux-2.6-block.git] / fs / btrfs / inode.c
index abfe86df02d250675d46e76ca7bdaf86430a3c33..c6fae29c0b9e9d8a248595f5a90fc147ff9f517e 100644 (file)
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/kernel.h>
 #include <linux/bio.h>
 #include <linux/buffer_head.h>
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
@@ -80,6 +82,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
        u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
        u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
        u64 thresh;
+       unsigned long flags;
        int ret = 0;
 
        if (for_del)
@@ -89,10 +92,10 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
 
        do_div(thresh, 100);
 
-       spin_lock(&root->fs_info->delalloc_lock);
+       spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
        if (used + root->fs_info->delalloc_bytes + num_required > thresh)
                ret = -ENOSPC;
-       spin_unlock(&root->fs_info->delalloc_lock);
+       spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
        return ret;
 }
 
@@ -139,7 +142,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
                cur_alloc_size = ins.offset;
                ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
                                               start, ins.objectid, ins.offset,
-                                              ins.offset);
+                                              ins.offset, 0);
                inode->i_blocks += ins.offset >> 9;
                btrfs_check_file(root, inode);
                if (num_bytes < cur_alloc_size) {
@@ -275,12 +278,13 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
                       unsigned long old, unsigned long bits)
 {
+       unsigned long flags;
        if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
-               spin_lock(&root->fs_info->delalloc_lock);
+               spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
                BTRFS_I(inode)->delalloc_bytes += end - start + 1;
                root->fs_info->delalloc_bytes += end - start + 1;
-               spin_unlock(&root->fs_info->delalloc_lock);
+               spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
        }
        return 0;
 }
@@ -290,7 +294,9 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
 {
        if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
-               spin_lock(&root->fs_info->delalloc_lock);
+               unsigned long flags;
+
+               spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
                if (end - start + 1 > root->fs_info->delalloc_bytes) {
                        printk("warning: delalloc account %Lu %Lu\n",
                               end - start + 1, root->fs_info->delalloc_bytes);
@@ -300,7 +306,7 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
                        root->fs_info->delalloc_bytes -= end - start + 1;
                        BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
                }
-               spin_unlock(&root->fs_info->delalloc_lock);
+               spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
        }
        return 0;
 }
@@ -1222,7 +1228,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
                        err = btrfs_insert_file_extent(trans, root,
                                                       inode->i_ino,
                                                       hole_start, 0, 0,
-                                                      hole_size);
+                                                      hole_size, 0);
                        btrfs_drop_extent_cache(inode, hole_start,
                                                (u64)-1);
                        btrfs_check_file(root, inode);
@@ -1415,6 +1421,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
                             inode->i_mapping, GFP_NOFS);
        extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
                             inode->i_mapping, GFP_NOFS);
+       atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
        return 0;
 }
 
@@ -1724,6 +1731,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                             inode->i_mapping, GFP_NOFS);
        extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
                             inode->i_mapping, GFP_NOFS);
+       atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
        BTRFS_I(inode)->delalloc_bytes = 0;
        BTRFS_I(inode)->root = root;
 
@@ -1952,6 +1960,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
                                     inode->i_mapping, GFP_NOFS);
                BTRFS_I(inode)->delalloc_bytes = 0;
+               atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
        dir->i_sb->s_dirt = 1;
@@ -2806,14 +2815,12 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
                              struct file_ra_state *ra, struct file *file,
                              pgoff_t offset, pgoff_t last_index)
 {
-       pgoff_t req_size;
+       pgoff_t req_size = last_index - offset + 1;
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
-       req_size = last_index - offset + 1;
        offset = page_cache_readahead(mapping, ra, file, offset, req_size);
        return offset;
 #else
-       req_size = min(last_index - offset + 1, (pgoff_t)128);
        page_cache_sync_readahead(mapping, ra, file, offset, req_size);
        return offset + req_size;
 #endif
@@ -2825,7 +2832,8 @@ int btrfs_defrag_file(struct file *file) {
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct page *page;
        unsigned long last_index;
-       unsigned long ra_index = 0;
+       unsigned long ra_pages = root->fs_info->bdi.ra_pages;
+       unsigned long total_read = 0;
        u64 page_start;
        u64 page_end;
        unsigned long i;
@@ -2840,11 +2848,11 @@ int btrfs_defrag_file(struct file *file) {
        mutex_lock(&inode->i_mutex);
        last_index = inode->i_size >> PAGE_CACHE_SHIFT;
        for (i = 0; i <= last_index; i++) {
-               if (i == ra_index) {
-                       ra_index = btrfs_force_ra(inode->i_mapping,
-                                                 &file->f_ra,
-                                                 file, ra_index, last_index);
+               if (total_read % ra_pages == 0) {
+                       btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
+                                      min(last_index, i + ra_pages - 1));
                }
+               total_read++;
                page = grab_cache_page(inode->i_mapping, i);
                if (!page)
                        goto out_unlock;
@@ -2857,6 +2865,15 @@ int btrfs_defrag_file(struct file *file) {
                                goto out_unlock;
                        }
                }
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+               ClearPageDirty(page);
+#else
+               cancel_dirty_page(page, PAGE_CACHE_SIZE);
+#endif
+               wait_on_page_writeback(page);
+               set_page_extent_mapped(page);
+
                page_start = (u64)page->index << PAGE_CACHE_SHIFT;
                page_end = page_start + PAGE_CACHE_SIZE - 1;
 
@@ -2880,9 +2897,12 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
 {
        u64 new_size;
        u64 old_size;
+       u64 devid = 1;
        struct btrfs_ioctl_vol_args *vol_args;
        struct btrfs_trans_handle *trans;
+       struct btrfs_device *device = NULL;
        char *sizestr;
+       char *devstr = NULL;
        int ret = 0;
        int namelen;
        int mod = 0;
@@ -2902,9 +2922,25 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
                goto out;
        }
 
+       mutex_lock(&root->fs_info->fs_mutex);
        sizestr = vol_args->name;
+       devstr = strchr(sizestr, ':');
+       if (devstr) {
+               char *end;
+               sizestr = devstr + 1;
+               *devstr = '\0';
+               devstr = vol_args->name;
+               devid = simple_strtoull(devstr, &end, 10);
+printk("resizing devid %Lu\n", devid);
+       }
+       device = btrfs_find_device(root, devid, NULL);
+       if (!device) {
+               printk("resizer unable to find device %Lu\n", devid);
+               ret = -EINVAL;
+               goto out_unlock;
+       }
        if (!strcmp(sizestr, "max"))
-               new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
+               new_size = device->bdev->bd_inode->i_size;
        else {
                if (sizestr[0] == '-') {
                        mod = -1;
@@ -2916,12 +2952,11 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
                new_size = btrfs_parse_size(sizestr);
                if (new_size == 0) {
                        ret = -EINVAL;
-                       goto out;
+                       goto out_unlock;
                }
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
-       old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
+       old_size = device->total_bytes;
 
        if (mod < 0) {
                if (new_size > old_size) {
@@ -2937,7 +2972,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
                ret = -EINVAL;
                goto out_unlock;
        }
-       if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
+       if (new_size > device->bdev->bd_inode->i_size) {
                ret = -EFBIG;
                goto out_unlock;
        }
@@ -2945,13 +2980,14 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
        do_div(new_size, root->sectorsize);
        new_size *= root->sectorsize;
 
-printk("new size is %Lu\n", new_size);
+printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size);
+
        if (new_size > old_size) {
                trans = btrfs_start_transaction(root, 1);
-               ret = btrfs_grow_extent_tree(trans, root, new_size);
+               ret = btrfs_grow_device(trans, device, new_size);
                btrfs_commit_transaction(trans, root);
        } else {
-               ret = btrfs_shrink_extent_tree(root, new_size);
+               ret = btrfs_shrink_device(device, new_size);
        }
 
 out_unlock:
@@ -3044,6 +3080,191 @@ static int btrfs_ioctl_defrag(struct file *file)
        return 0;
 }
 
+long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
+{
+       struct btrfs_ioctl_vol_args *vol_args;
+       int ret;
+
+       vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+
+       if (!vol_args)
+               return -ENOMEM;
+
+       if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
+               ret = -EFAULT;
+               goto out;
+       }
+       ret = btrfs_init_new_device(root, vol_args->name);
+
+out:
+       kfree(vol_args);
+       return ret;
+}
+
+void dup_item_to_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root,
+                      struct btrfs_path *path,
+                      struct extent_buffer *leaf,
+                      int slot,
+                      struct btrfs_key *key,
+                      u64 destino)
+{
+       struct btrfs_path *cpath = btrfs_alloc_path();
+       int len = btrfs_item_size_nr(leaf, slot);
+       int dstoff;
+       struct btrfs_key ckey = *key;
+       int ret;
+
+       ckey.objectid = destino;
+       ret = btrfs_insert_empty_item(trans, root, cpath, &ckey, len);
+       dstoff = btrfs_item_ptr_offset(cpath->nodes[0], cpath->slots[0]);
+       copy_extent_buffer(cpath->nodes[0], leaf, dstoff,
+                          btrfs_item_ptr_offset(leaf, slot),
+                          len);
+       btrfs_release_path(root, cpath);
+}
+
+long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
+{
+       struct inode *inode = fdentry(file)->d_inode;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct file *src_file;
+       struct inode *src;
+       struct btrfs_trans_handle *trans;
+       int ret;
+       u64 pos;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       struct extent_buffer *leaf;
+       u32 nritems;
+       int nextret;
+       int slot;
+
+       src_file = fget(src_fd);
+       if (!src_file)
+               return -EBADF;
+       src = src_file->f_dentry->d_inode;
+
+       ret = -EXDEV;
+       if (src->i_sb != inode->i_sb)
+               goto out_fput;
+
+       if (inode < src) {
+               mutex_lock(&inode->i_mutex);
+               mutex_lock(&src->i_mutex);
+       } else {
+               mutex_lock(&src->i_mutex);
+               mutex_lock(&inode->i_mutex);
+       }
+
+       ret = -ENOTEMPTY;
+       if (inode->i_size)
+               goto out_unlock;
+
+       /* do any pending delalloc/csum calc on src, one way or
+          another, and lock file content */
+       while (1) {
+               filemap_write_and_wait(src->i_mapping);
+               lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
+               if (BTRFS_I(src)->delalloc_bytes == 0)
+                       break;
+               unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
+       }
+
+       mutex_lock(&root->fs_info->fs_mutex);
+       trans = btrfs_start_transaction(root, 0);
+       path = btrfs_alloc_path();
+       pos = 0;
+       while (1) {
+               ret = btrfs_lookup_file_extent(trans, root, path, src->i_ino,
+                                              pos, 0);
+               if (ret < 0)
+                       goto out;
+               if (ret > 0) {
+                       if (path->slots[0] == 0) {
+                               ret = 0;
+                               goto out;
+                       }
+                       path->slots[0]--;
+               }
+next_slot:
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               nritems = btrfs_header_nritems(leaf);
+
+               if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
+                   key.objectid != src->i_ino)
+                       goto out;
+               if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
+                       struct btrfs_file_extent_item *extent;
+                       int found_type;
+                       pos = key.offset;
+                       extent = btrfs_item_ptr(leaf, slot,
+                                               struct btrfs_file_extent_item);
+                       found_type = btrfs_file_extent_type(leaf, extent);
+                       if (found_type == BTRFS_FILE_EXTENT_REG) {
+                               u64 len = btrfs_file_extent_num_bytes(leaf,
+                                                                     extent);
+                               u64 ds = btrfs_file_extent_disk_bytenr(leaf,
+                                                                      extent);
+                               u64 dl = btrfs_file_extent_disk_num_bytes(leaf,
+                                                                extent);
+                               u64 off = btrfs_file_extent_offset(leaf,
+                                                                  extent);
+                               btrfs_insert_file_extent(trans, root,
+                                                        inode->i_ino, pos,
+                                                        ds, dl, len, off);
+                               /* ds == 0 means there's a hole */
+                               if (ds != 0) {
+                                       btrfs_inc_extent_ref(trans, root,
+                                                    ds, dl,
+                                                    root->root_key.objectid,
+                                                    trans->transid,
+                                                    inode->i_ino, pos);
+                               }
+                               pos = key.offset + len;
+                       } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+                               dup_item_to_inode(trans, root, path, leaf, slot,
+                                                 &key, inode->i_ino);
+                               pos = key.offset + btrfs_item_size_nr(leaf,
+                                                                     slot);
+                       }
+               } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY)
+                       dup_item_to_inode(trans, root, path, leaf, slot, &key,
+                                         inode->i_ino);
+
+               if (slot >= nritems - 1) {
+                       nextret = btrfs_next_leaf(root, path);
+                       if (nextret)
+                               goto out;
+               } else {
+                       path->slots[0]++;
+               }
+               goto next_slot;
+       }
+
+out:
+       btrfs_free_path(path);
+       ret = 0;
+
+       inode->i_blocks = src->i_blocks;
+       i_size_write(inode, src->i_size);
+       btrfs_update_inode(trans, root, inode);
+
+       unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
+
+       btrfs_end_transaction(trans, root);
+       mutex_unlock(&root->fs_info->fs_mutex);
+
+out_unlock:
+       mutex_unlock(&src->i_mutex);
+       mutex_unlock(&inode->i_mutex);
+out_fput:
+       fput(src_file);
+       return ret;
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
 {
@@ -3056,6 +3277,12 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_defrag(file);
        case BTRFS_IOC_RESIZE:
                return btrfs_ioctl_resize(root, (void __user *)arg);
+       case BTRFS_IOC_ADD_DEV:
+               return btrfs_ioctl_add_dev(root, (void __user *)arg);
+       case BTRFS_IOC_BALANCE:
+               return btrfs_balance(root->fs_info->dev_root);
+       case BTRFS_IOC_CLONE:
+               return btrfs_ioctl_clone(file, arg);
        }
 
        return -ENOTTY;
@@ -3288,6 +3515,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
                                     inode->i_mapping, GFP_NOFS);
                BTRFS_I(inode)->delalloc_bytes = 0;
+               atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
        dir->i_sb->s_dirt = 1;