Btrfs: Seed device support
[linux-2.6-block.git] / fs / btrfs / ioctl.c
CommitLineData
f46b5a66
CH
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
cb8e7090 24#include <linux/fsnotify.h>
f46b5a66
CH
25#include <linux/pagemap.h>
26#include <linux/highmem.h>
27#include <linux/time.h>
28#include <linux/init.h>
29#include <linux/string.h>
30#include <linux/smp_lock.h>
31#include <linux/backing-dev.h>
cb8e7090 32#include <linux/mount.h>
f46b5a66 33#include <linux/mpage.h>
cb8e7090 34#include <linux/namei.h>
f46b5a66
CH
35#include <linux/swap.h>
36#include <linux/writeback.h>
37#include <linux/statfs.h>
38#include <linux/compat.h>
39#include <linux/bit_spinlock.h>
cb8e7090 40#include <linux/security.h>
f46b5a66
CH
41#include <linux/version.h>
42#include <linux/xattr.h>
7ea394f1 43#include <linux/vmalloc.h>
f46b5a66
CH
44#include "ctree.h"
45#include "disk-io.h"
46#include "transaction.h"
47#include "btrfs_inode.h"
48#include "ioctl.h"
49#include "print-tree.h"
50#include "volumes.h"
925baedd 51#include "locking.h"
f46b5a66
CH
52
53
54
cb8e7090
CH
55static noinline int create_subvol(struct btrfs_root *root,
56 struct dentry *dentry,
57 char *name, int namelen)
f46b5a66
CH
58{
59 struct btrfs_trans_handle *trans;
60 struct btrfs_key key;
61 struct btrfs_root_item root_item;
62 struct btrfs_inode_item *inode_item;
63 struct extent_buffer *leaf;
64 struct btrfs_root *new_root = root;
65 struct inode *dir;
66 int ret;
67 int err;
68 u64 objectid;
69 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
70 unsigned long nr = 1;
71
f46b5a66
CH
72 ret = btrfs_check_free_space(root, 1, 0);
73 if (ret)
74 goto fail_commit;
75
76 trans = btrfs_start_transaction(root, 1);
77 BUG_ON(!trans);
78
79 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
80 0, &objectid);
81 if (ret)
82 goto fail;
83
31840ae1
ZY
84 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
85 objectid, trans->transid, 0, 0, 0);
8e8a1e31
JB
86 if (IS_ERR(leaf)) {
87 ret = PTR_ERR(leaf);
88 goto fail;
89 }
f46b5a66
CH
90
91 btrfs_set_header_nritems(leaf, 0);
92 btrfs_set_header_level(leaf, 0);
93 btrfs_set_header_bytenr(leaf, leaf->start);
94 btrfs_set_header_generation(leaf, trans->transid);
95 btrfs_set_header_owner(leaf, objectid);
96
97 write_extent_buffer(leaf, root->fs_info->fsid,
98 (unsigned long)btrfs_header_fsid(leaf),
99 BTRFS_FSID_SIZE);
100 btrfs_mark_buffer_dirty(leaf);
101
102 inode_item = &root_item.inode;
103 memset(inode_item, 0, sizeof(*inode_item));
104 inode_item->generation = cpu_to_le64(1);
105 inode_item->size = cpu_to_le64(3);
106 inode_item->nlink = cpu_to_le32(1);
a76a3cd4 107 inode_item->nbytes = cpu_to_le64(root->leafsize);
f46b5a66
CH
108 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
109
110 btrfs_set_root_bytenr(&root_item, leaf->start);
84234f3a 111 btrfs_set_root_generation(&root_item, trans->transid);
f46b5a66
CH
112 btrfs_set_root_level(&root_item, 0);
113 btrfs_set_root_refs(&root_item, 1);
114 btrfs_set_root_used(&root_item, 0);
80ff3856 115 btrfs_set_root_last_snapshot(&root_item, 0);
f46b5a66
CH
116
117 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
118 root_item.drop_level = 0;
119
925baedd 120 btrfs_tree_unlock(leaf);
f46b5a66
CH
121 free_extent_buffer(leaf);
122 leaf = NULL;
123
124 btrfs_set_root_dirid(&root_item, new_dirid);
125
126 key.objectid = objectid;
127 key.offset = 1;
128 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
129 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
130 &root_item);
131 if (ret)
132 goto fail;
133
134 /*
135 * insert the directory item
136 */
137 key.offset = (u64)-1;
138 dir = root->fs_info->sb->s_root->d_inode;
139 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
140 name, namelen, dir->i_ino, &key,
aec7477b 141 BTRFS_FT_DIR, 0);
f46b5a66
CH
142 if (ret)
143 goto fail;
144
145 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
146 name, namelen, objectid,
aec7477b 147 root->fs_info->sb->s_root->d_inode->i_ino, 0);
f46b5a66
CH
148 if (ret)
149 goto fail;
150
151 ret = btrfs_commit_transaction(trans, root);
152 if (ret)
153 goto fail_commit;
154
155 new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
156 BUG_ON(!new_root);
157
158 trans = btrfs_start_transaction(new_root, 1);
159 BUG_ON(!trans);
160
cb8e7090 161 ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid,
f46b5a66
CH
162 BTRFS_I(dir)->block_group);
163 if (ret)
164 goto fail;
165
f46b5a66
CH
166fail:
167 nr = trans->blocks_used;
168 err = btrfs_commit_transaction(trans, new_root);
169 if (err && !ret)
170 ret = err;
171fail_commit:
f46b5a66 172 btrfs_btree_balance_dirty(root, nr);
f46b5a66
CH
173 return ret;
174}
175
176static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
177{
178 struct btrfs_pending_snapshot *pending_snapshot;
179 struct btrfs_trans_handle *trans;
180 int ret;
181 int err;
182 unsigned long nr = 0;
183
184 if (!root->ref_cows)
185 return -EINVAL;
186
f46b5a66
CH
187 ret = btrfs_check_free_space(root, 1, 0);
188 if (ret)
189 goto fail_unlock;
190
191 pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
192 if (!pending_snapshot) {
193 ret = -ENOMEM;
194 goto fail_unlock;
195 }
196 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
197 if (!pending_snapshot->name) {
198 ret = -ENOMEM;
199 kfree(pending_snapshot);
200 goto fail_unlock;
201 }
202 memcpy(pending_snapshot->name, name, namelen);
203 pending_snapshot->name[namelen] = '\0';
204 trans = btrfs_start_transaction(root, 1);
205 BUG_ON(!trans);
206 pending_snapshot->root = root;
207 list_add(&pending_snapshot->list,
208 &trans->transaction->pending_snapshots);
209 ret = btrfs_update_inode(trans, root, root->inode);
210 err = btrfs_commit_transaction(trans, root);
211
212fail_unlock:
f46b5a66 213 btrfs_btree_balance_dirty(root, nr);
f46b5a66
CH
214 return ret;
215}
216
cb8e7090
CH
217/* copy of may_create in fs/namei.c() */
218static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
219{
220 if (child->d_inode)
221 return -EEXIST;
222 if (IS_DEADDIR(dir))
223 return -ENOENT;
224 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
225}
226
227/*
228 * Create a new subvolume below @parent. This is largely modeled after
229 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
230 * inside this filesystem so it's quite a bit simpler.
231 */
232static noinline int btrfs_mksubvol(struct path *parent, char *name,
233 int mode, int namelen)
234{
235 struct dentry *dentry;
236 int error;
237
238 mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
239
240 dentry = lookup_one_len(name, parent->dentry, namelen);
241 error = PTR_ERR(dentry);
242 if (IS_ERR(dentry))
243 goto out_unlock;
244
245 error = -EEXIST;
246 if (dentry->d_inode)
247 goto out_dput;
248
249 if (!IS_POSIXACL(parent->dentry->d_inode))
250 mode &= ~current->fs->umask;
251 error = mnt_want_write(parent->mnt);
252 if (error)
253 goto out_dput;
254
255 error = btrfs_may_create(parent->dentry->d_inode, dentry);
256 if (error)
257 goto out_drop_write;
258
cb8e7090
CH
259 /*
260 * Actually perform the low-level subvolume creation after all
261 * this VFS fuzz.
262 *
263 * Eventually we want to pass in an inode under which we create this
264 * subvolume, but for now all are under the filesystem root.
265 *
266 * Also we should pass on the mode eventually to allow creating new
267 * subvolume with specific mode bits.
268 */
269 error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, dentry,
270 name, namelen);
271 if (error)
272 goto out_drop_write;
273
274 fsnotify_mkdir(parent->dentry->d_inode, dentry);
275out_drop_write:
276 mnt_drop_write(parent->mnt);
277out_dput:
278 dput(dentry);
279out_unlock:
280 mutex_unlock(&parent->dentry->d_inode->i_mutex);
281 return error;
282}
283
284
f46b5a66
CH
285int btrfs_defrag_file(struct file *file)
286{
287 struct inode *inode = fdentry(file)->d_inode;
288 struct btrfs_root *root = BTRFS_I(inode)->root;
289 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3eaa2885 290 struct btrfs_ordered_extent *ordered;
f46b5a66
CH
291 struct page *page;
292 unsigned long last_index;
293 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
294 unsigned long total_read = 0;
295 u64 page_start;
296 u64 page_end;
297 unsigned long i;
298 int ret;
299
f46b5a66 300 ret = btrfs_check_free_space(root, inode->i_size, 0);
f46b5a66
CH
301 if (ret)
302 return -ENOSPC;
303
304 mutex_lock(&inode->i_mutex);
305 last_index = inode->i_size >> PAGE_CACHE_SHIFT;
306 for (i = 0; i <= last_index; i++) {
307 if (total_read % ra_pages == 0) {
308 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
309 min(last_index, i + ra_pages - 1));
310 }
311 total_read++;
3eaa2885 312again:
f46b5a66
CH
313 page = grab_cache_page(inode->i_mapping, i);
314 if (!page)
315 goto out_unlock;
316 if (!PageUptodate(page)) {
317 btrfs_readpage(NULL, page);
318 lock_page(page);
319 if (!PageUptodate(page)) {
320 unlock_page(page);
321 page_cache_release(page);
322 goto out_unlock;
323 }
324 }
325
f46b5a66 326 wait_on_page_writeback(page);
f46b5a66
CH
327
328 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
329 page_end = page_start + PAGE_CACHE_SIZE - 1;
f46b5a66 330 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3eaa2885
CM
331
332 ordered = btrfs_lookup_ordered_extent(inode, page_start);
333 if (ordered) {
334 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
335 unlock_page(page);
336 page_cache_release(page);
337 btrfs_start_ordered_extent(inode, ordered, 1);
338 btrfs_put_ordered_extent(ordered);
339 goto again;
340 }
341 set_page_extent_mapped(page);
342
f87f057b
CM
343 /*
344 * this makes sure page_mkwrite is called on the
345 * page if it is dirtied again later
346 */
347 clear_page_dirty_for_io(page);
348
ea8c2819 349 btrfs_set_extent_delalloc(inode, page_start, page_end);
f46b5a66
CH
350
351 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
352 set_page_dirty(page);
353 unlock_page(page);
354 page_cache_release(page);
355 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
356 }
357
358out_unlock:
359 mutex_unlock(&inode->i_mutex);
360 return 0;
361}
362
363/*
364 * Called inside transaction, so use GFP_NOFS
365 */
366
367static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
368{
369 u64 new_size;
370 u64 old_size;
371 u64 devid = 1;
372 struct btrfs_ioctl_vol_args *vol_args;
373 struct btrfs_trans_handle *trans;
374 struct btrfs_device *device = NULL;
375 char *sizestr;
376 char *devstr = NULL;
377 int ret = 0;
378 int namelen;
379 int mod = 0;
380
c146afad
YZ
381 if (root->fs_info->sb->s_flags & MS_RDONLY)
382 return -EROFS;
383
f46b5a66
CH
384 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
385
386 if (!vol_args)
387 return -ENOMEM;
388
389 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
390 ret = -EFAULT;
391 goto out;
392 }
5516e595
MF
393
394 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
f46b5a66 395 namelen = strlen(vol_args->name);
f46b5a66 396
7d9eb12c 397 mutex_lock(&root->fs_info->volume_mutex);
f46b5a66
CH
398 sizestr = vol_args->name;
399 devstr = strchr(sizestr, ':');
400 if (devstr) {
401 char *end;
402 sizestr = devstr + 1;
403 *devstr = '\0';
404 devstr = vol_args->name;
405 devid = simple_strtoull(devstr, &end, 10);
406 printk(KERN_INFO "resizing devid %llu\n", devid);
407 }
2b82032c 408 device = btrfs_find_device(root, devid, NULL, NULL);
f46b5a66
CH
409 if (!device) {
410 printk(KERN_INFO "resizer unable to find device %llu\n", devid);
411 ret = -EINVAL;
412 goto out_unlock;
413 }
414 if (!strcmp(sizestr, "max"))
415 new_size = device->bdev->bd_inode->i_size;
416 else {
417 if (sizestr[0] == '-') {
418 mod = -1;
419 sizestr++;
420 } else if (sizestr[0] == '+') {
421 mod = 1;
422 sizestr++;
423 }
424 new_size = btrfs_parse_size(sizestr);
425 if (new_size == 0) {
426 ret = -EINVAL;
427 goto out_unlock;
428 }
429 }
430
431 old_size = device->total_bytes;
432
433 if (mod < 0) {
434 if (new_size > old_size) {
435 ret = -EINVAL;
436 goto out_unlock;
437 }
438 new_size = old_size - new_size;
439 } else if (mod > 0) {
440 new_size = old_size + new_size;
441 }
442
443 if (new_size < 256 * 1024 * 1024) {
444 ret = -EINVAL;
445 goto out_unlock;
446 }
447 if (new_size > device->bdev->bd_inode->i_size) {
448 ret = -EFBIG;
449 goto out_unlock;
450 }
451
452 do_div(new_size, root->sectorsize);
453 new_size *= root->sectorsize;
454
455 printk(KERN_INFO "new size for %s is %llu\n",
456 device->name, (unsigned long long)new_size);
457
458 if (new_size > old_size) {
459 trans = btrfs_start_transaction(root, 1);
460 ret = btrfs_grow_device(trans, device, new_size);
461 btrfs_commit_transaction(trans, root);
462 } else {
463 ret = btrfs_shrink_device(device, new_size);
464 }
465
466out_unlock:
7d9eb12c 467 mutex_unlock(&root->fs_info->volume_mutex);
f46b5a66
CH
468out:
469 kfree(vol_args);
470 return ret;
471}
472
cb8e7090 473static noinline int btrfs_ioctl_snap_create(struct file *file,
f46b5a66
CH
474 void __user *arg)
475{
cb8e7090 476 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
f46b5a66
CH
477 struct btrfs_ioctl_vol_args *vol_args;
478 struct btrfs_dir_item *di;
479 struct btrfs_path *path;
480 u64 root_dirid;
481 int namelen;
482 int ret;
483
c146afad
YZ
484 if (root->fs_info->sb->s_flags & MS_RDONLY)
485 return -EROFS;
486
f46b5a66
CH
487 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
488
489 if (!vol_args)
490 return -ENOMEM;
491
492 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
493 ret = -EFAULT;
494 goto out;
495 }
496
5516e595 497 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
f46b5a66 498 namelen = strlen(vol_args->name);
f46b5a66
CH
499 if (strchr(vol_args->name, '/')) {
500 ret = -EINVAL;
501 goto out;
502 }
503
504 path = btrfs_alloc_path();
505 if (!path) {
506 ret = -ENOMEM;
507 goto out;
508 }
509
510 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
f46b5a66
CH
511 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
512 path, root_dirid,
513 vol_args->name, namelen, 0);
f46b5a66
CH
514 btrfs_free_path(path);
515
516 if (di && !IS_ERR(di)) {
517 ret = -EEXIST;
518 goto out;
519 }
520
521 if (IS_ERR(di)) {
522 ret = PTR_ERR(di);
523 goto out;
524 }
525
cb8e7090
CH
526 if (root == root->fs_info->tree_root) {
527 ret = btrfs_mksubvol(&file->f_path, vol_args->name,
528 file->f_path.dentry->d_inode->i_mode,
529 namelen);
530 } else {
f46b5a66 531 ret = create_snapshot(root, vol_args->name, namelen);
cb8e7090
CH
532 }
533
f46b5a66
CH
534out:
535 kfree(vol_args);
536 return ret;
537}
538
539static int btrfs_ioctl_defrag(struct file *file)
540{
541 struct inode *inode = fdentry(file)->d_inode;
542 struct btrfs_root *root = BTRFS_I(inode)->root;
c146afad
YZ
543 int ret;
544
545 ret = mnt_want_write(file->f_path.mnt);
546 if (ret)
547 return ret;
f46b5a66
CH
548
549 switch (inode->i_mode & S_IFMT) {
550 case S_IFDIR:
f46b5a66
CH
551 btrfs_defrag_root(root, 0);
552 btrfs_defrag_root(root->fs_info->extent_root, 0);
f46b5a66
CH
553 break;
554 case S_IFREG:
555 btrfs_defrag_file(file);
556 break;
557 }
558
559 return 0;
560}
561
562long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
563{
564 struct btrfs_ioctl_vol_args *vol_args;
565 int ret;
566
567 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
568
569 if (!vol_args)
570 return -ENOMEM;
571
572 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
573 ret = -EFAULT;
574 goto out;
575 }
5516e595 576 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
f46b5a66
CH
577 ret = btrfs_init_new_device(root, vol_args->name);
578
579out:
580 kfree(vol_args);
581 return ret;
582}
583
584long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
585{
586 struct btrfs_ioctl_vol_args *vol_args;
587 int ret;
588
c146afad
YZ
589 if (root->fs_info->sb->s_flags & MS_RDONLY)
590 return -EROFS;
591
f46b5a66
CH
592 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
593
594 if (!vol_args)
595 return -ENOMEM;
596
597 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
598 ret = -EFAULT;
599 goto out;
600 }
5516e595 601 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
f46b5a66
CH
602 ret = btrfs_rm_device(root, vol_args->name);
603
604out:
605 kfree(vol_args);
606 return ret;
607}
608
c5c9cd4d
SW
609long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off,
610 u64 olen, u64 destoff)
f46b5a66
CH
611{
612 struct inode *inode = fdentry(file)->d_inode;
613 struct btrfs_root *root = BTRFS_I(inode)->root;
614 struct file *src_file;
615 struct inode *src;
616 struct btrfs_trans_handle *trans;
f46b5a66 617 struct btrfs_path *path;
f46b5a66 618 struct extent_buffer *leaf;
ae01a0ab
YZ
619 char *buf;
620 struct btrfs_key key;
f46b5a66
CH
621 u32 nritems;
622 int slot;
ae01a0ab 623 int ret;
c5c9cd4d
SW
624 u64 len = olen;
625 u64 bs = root->fs_info->sb->s_blocksize;
626 u64 hint_byte;
f46b5a66 627
c5c9cd4d
SW
628 /*
629 * TODO:
630 * - split compressed inline extents. annoying: we need to
631 * decompress into destination's address_space (the file offset
632 * may change, so source mapping won't do), then recompress (or
633 * otherwise reinsert) a subrange.
634 * - allow ranges within the same file to be cloned (provided
635 * they don't overlap)?
636 */
637
c146afad
YZ
638 ret = mnt_want_write(file->f_path.mnt);
639 if (ret)
640 return ret;
641
c5c9cd4d 642 src_file = fget(srcfd);
f46b5a66
CH
643 if (!src_file)
644 return -EBADF;
645 src = src_file->f_dentry->d_inode;
646
c5c9cd4d
SW
647 ret = -EINVAL;
648 if (src == inode)
649 goto out_fput;
650
ae01a0ab
YZ
651 ret = -EISDIR;
652 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
653 goto out_fput;
654
f46b5a66 655 ret = -EXDEV;
ae01a0ab
YZ
656 if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
657 goto out_fput;
658
659 ret = -ENOMEM;
660 buf = vmalloc(btrfs_level_size(root, 0));
661 if (!buf)
662 goto out_fput;
663
664 path = btrfs_alloc_path();
665 if (!path) {
666 vfree(buf);
f46b5a66 667 goto out_fput;
ae01a0ab
YZ
668 }
669 path->reada = 2;
f46b5a66
CH
670
671 if (inode < src) {
672 mutex_lock(&inode->i_mutex);
673 mutex_lock(&src->i_mutex);
674 } else {
675 mutex_lock(&src->i_mutex);
676 mutex_lock(&inode->i_mutex);
677 }
678
c5c9cd4d
SW
679 /* determine range to clone */
680 ret = -EINVAL;
681 if (off >= src->i_size || off + len > src->i_size)
f46b5a66 682 goto out_unlock;
c5c9cd4d
SW
683 if (len == 0)
684 olen = len = src->i_size - off;
685 /* if we extend to eof, continue to block boundary */
686 if (off + len == src->i_size)
687 len = ((src->i_size + bs-1) & ~(bs-1))
688 - off;
689
690 /* verify the end result is block aligned */
691 if ((off & (bs-1)) ||
692 ((off + len) & (bs-1)))
693 goto out_unlock;
694
695 printk("final src extent is %llu~%llu\n", off, len);
696 printk("final dst extent is %llu~%llu\n", destoff, len);
f46b5a66
CH
697
698 /* do any pending delalloc/csum calc on src, one way or
699 another, and lock file content */
700 while (1) {
31840ae1 701 struct btrfs_ordered_extent *ordered;
c5c9cd4d
SW
702 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
703 ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
ae01a0ab 704 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
f46b5a66 705 break;
c5c9cd4d 706 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
ae01a0ab
YZ
707 if (ordered)
708 btrfs_put_ordered_extent(ordered);
c5c9cd4d 709 btrfs_wait_ordered_range(src, off, off+len);
f46b5a66
CH
710 }
711
ae01a0ab
YZ
712 trans = btrfs_start_transaction(root, 1);
713 BUG_ON(!trans);
714
c5c9cd4d
SW
715 /* punch hole in destination first */
716 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte);
717
718 /* clone data */
f46b5a66 719 key.objectid = src->i_ino;
ae01a0ab
YZ
720 key.type = BTRFS_EXTENT_DATA_KEY;
721 key.offset = 0;
f46b5a66
CH
722
723 while (1) {
724 /*
725 * note the key will change type as we walk through the
726 * tree.
727 */
728 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
729 if (ret < 0)
730 goto out;
731
ae01a0ab
YZ
732 nritems = btrfs_header_nritems(path->nodes[0]);
733 if (path->slots[0] >= nritems) {
f46b5a66
CH
734 ret = btrfs_next_leaf(root, path);
735 if (ret < 0)
736 goto out;
737 if (ret > 0)
738 break;
ae01a0ab 739 nritems = btrfs_header_nritems(path->nodes[0]);
f46b5a66
CH
740 }
741 leaf = path->nodes[0];
742 slot = path->slots[0];
f46b5a66 743
ae01a0ab 744 btrfs_item_key_to_cpu(leaf, &key, slot);
f46b5a66
CH
745 if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
746 key.objectid != src->i_ino)
747 break;
748
c5c9cd4d
SW
749 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
750 struct btrfs_file_extent_item *extent;
751 int type;
31840ae1
ZY
752 u32 size;
753 struct btrfs_key new_key;
c5c9cd4d
SW
754 u64 disko = 0, diskl = 0;
755 u64 datao = 0, datal = 0;
756 u8 comp;
31840ae1
ZY
757
758 size = btrfs_item_size_nr(leaf, slot);
759 read_extent_buffer(leaf, buf,
760 btrfs_item_ptr_offset(leaf, slot),
761 size);
c5c9cd4d
SW
762
763 extent = btrfs_item_ptr(leaf, slot,
764 struct btrfs_file_extent_item);
765 comp = btrfs_file_extent_compression(leaf, extent);
766 type = btrfs_file_extent_type(leaf, extent);
767 if (type == BTRFS_FILE_EXTENT_REG) {
768 disko = btrfs_file_extent_disk_bytenr(leaf, extent);
769 diskl = btrfs_file_extent_disk_num_bytes(leaf, extent);
770 datao = btrfs_file_extent_offset(leaf, extent);
771 datal = btrfs_file_extent_num_bytes(leaf, extent);
772 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
773 /* take upper bound, may be compressed */
774 datal = btrfs_file_extent_ram_bytes(leaf,
775 extent);
776 }
31840ae1
ZY
777 btrfs_release_path(root, path);
778
c5c9cd4d
SW
779 if (key.offset + datal < off ||
780 key.offset >= off+len)
781 goto next;
782
31840ae1
ZY
783 memcpy(&new_key, &key, sizeof(new_key));
784 new_key.objectid = inode->i_ino;
c5c9cd4d 785 new_key.offset = key.offset + destoff - off;
31840ae1 786
c5c9cd4d
SW
787 if (type == BTRFS_FILE_EXTENT_REG) {
788 ret = btrfs_insert_empty_item(trans, root, path,
789 &new_key, size);
790 if (ret)
791 goto out;
792
793 leaf = path->nodes[0];
794 slot = path->slots[0];
795 write_extent_buffer(leaf, buf,
31840ae1
ZY
796 btrfs_item_ptr_offset(leaf, slot),
797 size);
ae01a0ab 798
c5c9cd4d 799 extent = btrfs_item_ptr(leaf, slot,
f46b5a66 800 struct btrfs_file_extent_item);
c5c9cd4d
SW
801 printk(" orig disk %llu~%llu data %llu~%llu\n",
802 disko, diskl, datao, datal);
803
804 if (off > key.offset) {
805 datao += off - key.offset;
806 datal -= off - key.offset;
807 }
808 if (key.offset + datao + datal + key.offset >
809 off + len)
810 datal = off + len - key.offset - datao;
811 /* disko == 0 means it's a hole */
812 if (!disko)
813 datao = 0;
814 printk(" final disk %llu~%llu data %llu~%llu\n",
815 disko, diskl, datao, datal);
816
817 btrfs_set_file_extent_offset(leaf, extent,
818 datao);
819 btrfs_set_file_extent_num_bytes(leaf, extent,
820 datal);
821 if (disko) {
822 inode_add_bytes(inode, datal);
ae01a0ab 823 ret = btrfs_inc_extent_ref(trans, root,
c5c9cd4d
SW
824 disko, diskl, leaf->start,
825 root->root_key.objectid,
826 trans->transid,
827 inode->i_ino);
31840ae1 828 BUG_ON(ret);
f46b5a66 829 }
c5c9cd4d
SW
830 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
831 u64 skip = 0;
832 u64 trim = 0;
833 if (off > key.offset) {
834 skip = off - key.offset;
835 new_key.offset += skip;
836 }
837 if (key.offset + datal > off+len)
838 trim = key.offset + datal - (off+len);
839 printk("len %lld skip %lld trim %lld\n",
840 datal, skip, trim);
841 if (comp && (skip || trim)) {
842 printk("btrfs clone_range can't split compressed inline extents yet\n");
843 ret = -EINVAL;
844 goto out;
845 }
846 size -= skip + trim;
847 datal -= skip + trim;
848 ret = btrfs_insert_empty_item(trans, root, path,
849 &new_key, size);
850 if (ret)
851 goto out;
852
853 if (skip) {
854 u32 start = btrfs_file_extent_calc_inline_size(0);
855 memmove(buf+start, buf+start+skip,
856 datal);
857 }
858
859 leaf = path->nodes[0];
860 slot = path->slots[0];
861 write_extent_buffer(leaf, buf,
862 btrfs_item_ptr_offset(leaf, slot),
863 size);
864 inode_add_bytes(inode, datal);
f46b5a66 865 }
c5c9cd4d
SW
866
867 btrfs_mark_buffer_dirty(leaf);
ae01a0ab 868 }
c5c9cd4d
SW
869
870 if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
871 u32 size;
872 struct btrfs_key new_key;
873 u64 coverslen;
874 int coff, clen;
875
876 size = btrfs_item_size_nr(leaf, slot);
877 coverslen = (size / BTRFS_CRC32_SIZE) <<
878 root->fs_info->sb->s_blocksize_bits;
879 printk("csums for %llu~%llu\n",
880 key.offset, coverslen);
881 if (key.offset + coverslen < off ||
882 key.offset >= off+len)
883 goto next;
884
885 read_extent_buffer(leaf, buf,
886 btrfs_item_ptr_offset(leaf, slot),
887 size);
888 btrfs_release_path(root, path);
889
890 coff = 0;
891 if (off > key.offset)
892 coff = ((off - key.offset) >>
893 root->fs_info->sb->s_blocksize_bits) *
894 BTRFS_CRC32_SIZE;
895 clen = size - coff;
896 if (key.offset + coverslen > off+len)
897 clen -= ((key.offset+coverslen-off-len) >>
898 root->fs_info->sb->s_blocksize_bits) *
899 BTRFS_CRC32_SIZE;
900 printk(" will dup %d~%d of %d\n",
901 coff, clen, size);
902
903 memcpy(&new_key, &key, sizeof(new_key));
904 new_key.objectid = inode->i_ino;
905 new_key.offset = key.offset + destoff - off;
906
907 ret = btrfs_insert_empty_item(trans, root, path,
908 &new_key, clen);
909 if (ret)
910 goto out;
911
912 leaf = path->nodes[0];
913 slot = path->slots[0];
914 write_extent_buffer(leaf, buf + coff,
915 btrfs_item_ptr_offset(leaf, slot),
916 clen);
917 btrfs_mark_buffer_dirty(leaf);
918 }
919
920 next:
31840ae1 921 btrfs_release_path(root, path);
f46b5a66 922 key.offset++;
f46b5a66 923 }
f46b5a66
CH
924 ret = 0;
925out:
ae01a0ab
YZ
926 btrfs_release_path(root, path);
927 if (ret == 0) {
928 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
c5c9cd4d
SW
929 if (destoff + olen > inode->i_size)
930 btrfs_i_size_write(inode, destoff + olen);
ae01a0ab
YZ
931 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
932 ret = btrfs_update_inode(trans, root, inode);
933 }
f46b5a66 934 btrfs_end_transaction(trans, root);
c5c9cd4d 935 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
ae01a0ab
YZ
936 if (ret)
937 vmtruncate(inode, 0);
f46b5a66
CH
938out_unlock:
939 mutex_unlock(&src->i_mutex);
940 mutex_unlock(&inode->i_mutex);
ae01a0ab
YZ
941 vfree(buf);
942 btrfs_free_path(path);
f46b5a66
CH
943out_fput:
944 fput(src_file);
945 return ret;
946}
947
c5c9cd4d
SW
948long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr)
949{
950 struct btrfs_ioctl_clone_range_args args;
951
952 if (copy_from_user(&args, (void *)argptr, sizeof(args)))
953 return -EFAULT;
954 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
955 args.src_length, args.dest_offset);
956}
957
f46b5a66
CH
958/*
959 * there are many ways the trans_start and trans_end ioctls can lead
960 * to deadlocks. They should only be used by applications that
961 * basically own the machine, and have a very in depth understanding
962 * of all the possible deadlocks and enospc problems.
963 */
964long btrfs_ioctl_trans_start(struct file *file)
965{
966 struct inode *inode = fdentry(file)->d_inode;
967 struct btrfs_root *root = BTRFS_I(inode)->root;
968 struct btrfs_trans_handle *trans;
969 int ret = 0;
970
df5b5520
CH
971 if (!capable(CAP_SYS_ADMIN))
972 return -EPERM;
973
f46b5a66
CH
974 if (file->private_data) {
975 ret = -EINPROGRESS;
976 goto out;
977 }
9ca9ee09 978
c146afad
YZ
979 ret = mnt_want_write(file->f_path.mnt);
980 if (ret)
981 goto out;
982
9ca9ee09
SW
983 mutex_lock(&root->fs_info->trans_mutex);
984 root->fs_info->open_ioctl_trans++;
985 mutex_unlock(&root->fs_info->trans_mutex);
986
987 trans = btrfs_start_ioctl_transaction(root, 0);
f46b5a66
CH
988 if (trans)
989 file->private_data = trans;
990 else
991 ret = -ENOMEM;
992 /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
993out:
f46b5a66
CH
994 return ret;
995}
996
997/*
998 * there are many ways the trans_start and trans_end ioctls can lead
999 * to deadlocks. They should only be used by applications that
1000 * basically own the machine, and have a very in depth understanding
1001 * of all the possible deadlocks and enospc problems.
1002 */
1003long btrfs_ioctl_trans_end(struct file *file)
1004{
1005 struct inode *inode = fdentry(file)->d_inode;
1006 struct btrfs_root *root = BTRFS_I(inode)->root;
1007 struct btrfs_trans_handle *trans;
1008 int ret = 0;
1009
f46b5a66
CH
1010 trans = file->private_data;
1011 if (!trans) {
1012 ret = -EINVAL;
1013 goto out;
1014 }
1015 btrfs_end_transaction(trans, root);
b214107e 1016 file->private_data = NULL;
9ca9ee09
SW
1017
1018 mutex_lock(&root->fs_info->trans_mutex);
1019 root->fs_info->open_ioctl_trans--;
1020 mutex_unlock(&root->fs_info->trans_mutex);
1021
f46b5a66 1022out:
f46b5a66
CH
1023 return ret;
1024}
1025
1026long btrfs_ioctl(struct file *file, unsigned int
1027 cmd, unsigned long arg)
1028{
1029 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
1030
1031 switch (cmd) {
1032 case BTRFS_IOC_SNAP_CREATE:
cb8e7090 1033 return btrfs_ioctl_snap_create(file, (void __user *)arg);
f46b5a66
CH
1034 case BTRFS_IOC_DEFRAG:
1035 return btrfs_ioctl_defrag(file);
1036 case BTRFS_IOC_RESIZE:
1037 return btrfs_ioctl_resize(root, (void __user *)arg);
1038 case BTRFS_IOC_ADD_DEV:
1039 return btrfs_ioctl_add_dev(root, (void __user *)arg);
1040 case BTRFS_IOC_RM_DEV:
1041 return btrfs_ioctl_rm_dev(root, (void __user *)arg);
1042 case BTRFS_IOC_BALANCE:
1043 return btrfs_balance(root->fs_info->dev_root);
1044 case BTRFS_IOC_CLONE:
c5c9cd4d
SW
1045 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
1046 case BTRFS_IOC_CLONE_RANGE:
1047 return btrfs_ioctl_clone_range(file, arg);
f46b5a66
CH
1048 case BTRFS_IOC_TRANS_START:
1049 return btrfs_ioctl_trans_start(file);
1050 case BTRFS_IOC_TRANS_END:
1051 return btrfs_ioctl_trans_end(file);
1052 case BTRFS_IOC_SYNC:
ea8c2819 1053 btrfs_start_delalloc_inodes(root);
f46b5a66
CH
1054 btrfs_sync_fs(file->f_dentry->d_sb, 1);
1055 return 0;
1056 }
1057
1058 return -ENOTTY;
1059}