1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
16 #include "transaction.h"
17 #include "btrfs_inode.h"
20 void btrfs_fsinfo_release(struct kobject *obj)
22 struct btrfs_fs_info *fsinfo = container_of(obj,
23 struct btrfs_fs_info, kobj);
27 struct kobj_type btrfs_fsinfo_ktype = {
28 .release = btrfs_fsinfo_release,
31 struct btrfs_iget_args {
33 struct btrfs_root *root;
36 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
38 #define BTRFS_SUPER_MAGIC 0x9123682E
40 static struct inode_operations btrfs_dir_inode_operations;
41 static struct inode_operations btrfs_dir_ro_inode_operations;
42 static struct super_operations btrfs_super_ops;
43 static struct file_operations btrfs_dir_file_operations;
44 static struct inode_operations btrfs_file_inode_operations;
45 static struct address_space_operations btrfs_aops;
46 static struct file_operations btrfs_file_operations;
48 static void btrfs_read_locked_inode(struct inode *inode)
50 struct btrfs_path *path;
51 struct btrfs_inode_item *inode_item;
52 struct btrfs_root *root = BTRFS_I(inode)->root;
53 struct btrfs_key location;
56 path = btrfs_alloc_path();
58 btrfs_init_path(path);
59 mutex_lock(&root->fs_info->fs_mutex);
61 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
62 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
64 btrfs_free_path(path);
67 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
69 struct btrfs_inode_item);
71 inode->i_mode = btrfs_inode_mode(inode_item);
72 inode->i_nlink = btrfs_inode_nlink(inode_item);
73 inode->i_uid = btrfs_inode_uid(inode_item);
74 inode->i_gid = btrfs_inode_gid(inode_item);
75 inode->i_size = btrfs_inode_size(inode_item);
76 inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
77 inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
78 inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
79 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
80 inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
81 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
82 inode->i_blocks = btrfs_inode_nblocks(inode_item);
83 inode->i_generation = btrfs_inode_generation(inode_item);
85 btrfs_free_path(path);
88 mutex_unlock(&root->fs_info->fs_mutex);
90 switch (inode->i_mode & S_IFMT) {
93 init_special_inode(inode, inode->i_mode,
94 btrfs_inode_rdev(inode_item));
98 inode->i_mapping->a_ops = &btrfs_aops;
99 inode->i_fop = &btrfs_file_operations;
100 inode->i_op = &btrfs_file_inode_operations;
103 inode->i_fop = &btrfs_dir_file_operations;
104 if (root == root->fs_info->tree_root)
105 inode->i_op = &btrfs_dir_ro_inode_operations;
107 inode->i_op = &btrfs_dir_inode_operations;
110 // inode->i_op = &page_symlink_inode_operations;
116 btrfs_release_path(root, path);
117 btrfs_free_path(path);
118 mutex_unlock(&root->fs_info->fs_mutex);
119 make_bad_inode(inode);
122 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
123 struct btrfs_root *root,
125 struct dentry *dentry)
127 struct btrfs_path *path;
128 const char *name = dentry->d_name.name;
129 int name_len = dentry->d_name.len;
132 struct btrfs_dir_item *di;
134 path = btrfs_alloc_path();
136 btrfs_init_path(path);
137 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
147 objectid = btrfs_disk_key_objectid(&di->location);
148 ret = btrfs_delete_one_dir_name(trans, root, path, di);
150 btrfs_release_path(root, path);
152 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
153 objectid, name, name_len, -1);
162 ret = btrfs_delete_one_dir_name(trans, root, path, di);
165 dentry->d_inode->i_ctime = dir->i_ctime;
167 btrfs_free_path(path);
169 inode_dec_link_count(dentry->d_inode);
170 dir->i_size -= name_len * 2;
171 mark_inode_dirty(dir);
176 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
178 struct btrfs_root *root;
179 struct btrfs_trans_handle *trans;
182 root = BTRFS_I(dir)->root;
183 mutex_lock(&root->fs_info->fs_mutex);
184 trans = btrfs_start_transaction(root, 1);
185 ret = btrfs_unlink_trans(trans, root, dir, dentry);
186 btrfs_end_transaction(trans, root);
187 mutex_unlock(&root->fs_info->fs_mutex);
191 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
193 struct inode *inode = dentry->d_inode;
196 struct btrfs_root *root = BTRFS_I(dir)->root;
197 struct btrfs_path *path;
198 struct btrfs_key key;
199 struct btrfs_trans_handle *trans;
200 struct btrfs_key found_key;
202 struct btrfs_leaf *leaf;
203 char *goodnames = "..";
205 path = btrfs_alloc_path();
207 btrfs_init_path(path);
208 mutex_lock(&root->fs_info->fs_mutex);
209 trans = btrfs_start_transaction(root, 1);
210 key.objectid = inode->i_ino;
211 key.offset = (u64)-1;
214 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
220 if (path->slots[0] == 0) {
225 leaf = btrfs_buffer_leaf(path->nodes[0]);
226 btrfs_disk_key_to_cpu(&found_key,
227 &leaf->items[path->slots[0]].key);
228 found_type = btrfs_key_type(&found_key);
229 if (found_key.objectid != inode->i_ino) {
233 if ((found_type != BTRFS_DIR_ITEM_KEY &&
234 found_type != BTRFS_DIR_INDEX_KEY) ||
235 (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
236 !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
240 ret = btrfs_del_item(trans, root, path);
243 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
245 btrfs_release_path(root, path);
248 btrfs_release_path(root, path);
250 /* now the directory is empty */
251 err = btrfs_unlink_trans(trans, root, dir, dentry);
256 btrfs_release_path(root, path);
257 btrfs_free_path(path);
258 mutex_unlock(&root->fs_info->fs_mutex);
259 ret = btrfs_end_transaction(trans, root);
265 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
266 struct btrfs_root *root,
269 struct btrfs_path *path;
274 path = btrfs_alloc_path();
276 btrfs_init_path(path);
277 ret = btrfs_lookup_inode(trans, root, path,
278 &BTRFS_I(inode)->location, -1);
280 ret = btrfs_del_item(trans, root, path);
282 btrfs_free_path(path);
286 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
287 struct btrfs_root *root,
291 struct btrfs_path *path;
292 struct btrfs_key key;
293 struct btrfs_disk_key *found_key;
294 struct btrfs_leaf *leaf;
295 struct btrfs_file_extent_item *fi = NULL;
296 u64 extent_start = 0;
297 u64 extent_num_blocks = 0;
300 path = btrfs_alloc_path();
302 /* FIXME, add redo link to tree so we don't leak on crash */
303 key.objectid = inode->i_ino;
304 key.offset = (u64)-1;
307 * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
310 btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
312 btrfs_init_path(path);
313 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
318 BUG_ON(path->slots[0] == 0);
321 leaf = btrfs_buffer_leaf(path->nodes[0]);
322 found_key = &leaf->items[path->slots[0]].key;
323 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
325 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
326 btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY &&
327 btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
329 if (btrfs_disk_key_offset(found_key) < inode->i_size)
332 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
333 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
335 struct btrfs_file_extent_item);
336 if (btrfs_file_extent_type(fi) !=
337 BTRFS_FILE_EXTENT_INLINE) {
339 btrfs_file_extent_disk_blocknr(fi);
341 btrfs_file_extent_disk_num_blocks(fi);
342 /* FIXME blocksize != 4096 */
344 btrfs_file_extent_num_blocks(fi) << 3;
348 ret = btrfs_del_item(trans, root, path);
350 btrfs_release_path(root, path);
352 ret = btrfs_free_extent(trans, root, extent_start,
353 extent_num_blocks, 0);
359 btrfs_release_path(root, path);
360 btrfs_free_path(path);
364 static void btrfs_delete_inode(struct inode *inode)
366 struct btrfs_trans_handle *trans;
367 struct btrfs_root *root = BTRFS_I(inode)->root;
370 truncate_inode_pages(&inode->i_data, 0);
371 if (is_bad_inode(inode)) {
375 mutex_lock(&root->fs_info->fs_mutex);
376 trans = btrfs_start_transaction(root, 1);
377 if (S_ISREG(inode->i_mode)) {
378 ret = btrfs_truncate_in_trans(trans, root, inode);
381 btrfs_free_inode(trans, root, inode);
382 btrfs_end_transaction(trans, root);
383 mutex_unlock(&root->fs_info->fs_mutex);
389 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
390 struct btrfs_key *location)
392 const char *name = dentry->d_name.name;
393 int namelen = dentry->d_name.len;
394 struct btrfs_dir_item *di;
395 struct btrfs_path *path;
396 struct btrfs_root *root = BTRFS_I(dir)->root;
399 path = btrfs_alloc_path();
401 btrfs_init_path(path);
402 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
404 if (!di || IS_ERR(di)) {
405 location->objectid = 0;
409 btrfs_disk_key_to_cpu(location, &di->location);
411 btrfs_release_path(root, path);
412 btrfs_free_path(path);
416 int fixup_tree_root_location(struct btrfs_root *root,
417 struct btrfs_key *location,
418 struct btrfs_root **sub_root)
420 struct btrfs_path *path;
421 struct btrfs_root_item *ri;
423 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
425 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
428 path = btrfs_alloc_path();
430 mutex_lock(&root->fs_info->fs_mutex);
432 *sub_root = btrfs_read_fs_root(root->fs_info, location);
433 if (IS_ERR(*sub_root))
434 return PTR_ERR(*sub_root);
436 ri = &(*sub_root)->root_item;
437 location->objectid = btrfs_root_dirid(ri);
439 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
440 location->offset = 0;
442 btrfs_free_path(path);
443 mutex_unlock(&root->fs_info->fs_mutex);
447 int btrfs_init_locked_inode(struct inode *inode, void *p)
449 struct btrfs_iget_args *args = p;
450 inode->i_ino = args->ino;
451 BTRFS_I(inode)->root = args->root;
455 int btrfs_find_actor(struct inode *inode, void *opaque)
457 struct btrfs_iget_args *args = opaque;
458 return (args->ino == inode->i_ino &&
459 args->root == BTRFS_I(inode)->root);
462 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
463 struct btrfs_root *root)
466 struct btrfs_iget_args args;
470 inode = iget5_locked(s, objectid, btrfs_find_actor,
471 btrfs_init_locked_inode,
476 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
477 struct nameidata *nd)
479 struct inode * inode;
480 struct btrfs_inode *bi = BTRFS_I(dir);
481 struct btrfs_root *root = bi->root;
482 struct btrfs_root *sub_root = root;
483 struct btrfs_key location;
486 if (dentry->d_name.len > BTRFS_NAME_LEN)
487 return ERR_PTR(-ENAMETOOLONG);
488 mutex_lock(&root->fs_info->fs_mutex);
489 ret = btrfs_inode_by_name(dir, dentry, &location);
490 mutex_unlock(&root->fs_info->fs_mutex);
494 if (location.objectid) {
495 ret = fixup_tree_root_location(root, &location, &sub_root);
499 return ERR_PTR(-ENOENT);
500 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
503 return ERR_PTR(-EACCES);
504 if (inode->i_state & I_NEW) {
505 if (sub_root != root) {
506 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
508 sub_root->inode = inode;
510 BTRFS_I(inode)->root = sub_root;
511 memcpy(&BTRFS_I(inode)->location, &location,
513 btrfs_read_locked_inode(inode);
514 unlock_new_inode(inode);
517 return d_splice_alias(inode, dentry);
520 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
522 struct inode *inode = filp->f_path.dentry->d_inode;
523 struct btrfs_root *root = BTRFS_I(inode)->root;
524 struct btrfs_item *item;
525 struct btrfs_dir_item *di;
526 struct btrfs_key key;
527 struct btrfs_path *path;
530 struct btrfs_leaf *leaf;
533 unsigned char d_type = DT_UNKNOWN;
538 int key_type = BTRFS_DIR_INDEX_KEY;
540 /* FIXME, use a real flag for deciding about the key type */
541 if (root->fs_info->tree_root == root)
542 key_type = BTRFS_DIR_ITEM_KEY;
543 mutex_lock(&root->fs_info->fs_mutex);
544 key.objectid = inode->i_ino;
546 btrfs_set_key_type(&key, key_type);
547 key.offset = filp->f_pos;
548 path = btrfs_alloc_path();
549 btrfs_init_path(path);
550 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
555 leaf = btrfs_buffer_leaf(path->nodes[0]);
556 nritems = btrfs_header_nritems(&leaf->header);
557 slot = path->slots[0];
558 if (advance || slot >= nritems) {
559 if (slot >= nritems -1) {
560 ret = btrfs_next_leaf(root, path);
563 leaf = btrfs_buffer_leaf(path->nodes[0]);
564 nritems = btrfs_header_nritems(&leaf->header);
565 slot = path->slots[0];
572 item = leaf->items + slot;
573 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
575 if (btrfs_disk_key_type(&item->key) != key_type)
577 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
579 filp->f_pos = btrfs_disk_key_offset(&item->key);
581 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
583 di_total = btrfs_item_size(leaf->items + slot);
584 while(di_cur < di_total) {
585 over = filldir(dirent, (const char *)(di + 1),
586 btrfs_dir_name_len(di),
587 btrfs_disk_key_offset(&item->key),
588 btrfs_disk_key_objectid(&di->location),
592 di_len = btrfs_dir_name_len(di) + sizeof(*di);
594 di = (struct btrfs_dir_item *)((char *)di + di_len);
601 btrfs_release_path(root, path);
602 btrfs_free_path(path);
603 mutex_unlock(&root->fs_info->fs_mutex);
607 static void btrfs_put_super (struct super_block * sb)
609 struct btrfs_root *root = btrfs_sb(sb);
612 ret = close_ctree(root);
614 printk("close ctree returns %d\n", ret);
616 sb->s_fs_info = NULL;
619 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
621 struct inode * inode;
622 struct dentry * root_dentry;
623 struct btrfs_super_block *disk_super;
624 struct btrfs_root *tree_root;
625 struct btrfs_inode *bi;
627 sb->s_maxbytes = MAX_LFS_FILESIZE;
628 sb->s_magic = BTRFS_SUPER_MAGIC;
629 sb->s_op = &btrfs_super_ops;
632 tree_root = open_ctree(sb);
635 printk("btrfs: open_ctree failed\n");
638 sb->s_fs_info = tree_root;
639 disk_super = tree_root->fs_info->disk_super;
640 printk("read in super total blocks %Lu root %Lu\n",
641 btrfs_super_total_blocks(disk_super),
642 btrfs_super_root_dir(disk_super));
644 inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
647 bi->location.objectid = inode->i_ino;
648 bi->location.offset = 0;
649 bi->location.flags = 0;
650 bi->root = tree_root;
651 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
655 if (inode->i_state & I_NEW) {
656 btrfs_read_locked_inode(inode);
657 unlock_new_inode(inode);
660 root_dentry = d_alloc_root(inode);
665 sb->s_root = root_dentry;
670 static void fill_inode_item(struct btrfs_inode_item *item,
673 btrfs_set_inode_uid(item, inode->i_uid);
674 btrfs_set_inode_gid(item, inode->i_gid);
675 btrfs_set_inode_size(item, inode->i_size);
676 btrfs_set_inode_mode(item, inode->i_mode);
677 btrfs_set_inode_nlink(item, inode->i_nlink);
678 btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
679 btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
680 btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
681 btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
682 btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
683 btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
684 btrfs_set_inode_nblocks(item, inode->i_blocks);
685 btrfs_set_inode_generation(item, inode->i_generation);
688 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
689 struct btrfs_root *root,
692 struct btrfs_inode_item *inode_item;
693 struct btrfs_path *path;
696 path = btrfs_alloc_path();
698 btrfs_init_path(path);
699 ret = btrfs_lookup_inode(trans, root, path,
700 &BTRFS_I(inode)->location, 1);
707 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
709 struct btrfs_inode_item);
711 fill_inode_item(inode_item, inode);
712 btrfs_mark_buffer_dirty(path->nodes[0]);
715 btrfs_release_path(root, path);
716 btrfs_free_path(path);
720 static int btrfs_write_inode(struct inode *inode, int wait)
722 struct btrfs_root *root = BTRFS_I(inode)->root;
723 struct btrfs_trans_handle *trans;
726 mutex_lock(&root->fs_info->fs_mutex);
727 trans = btrfs_start_transaction(root, 1);
728 ret = btrfs_update_inode(trans, root, inode);
730 btrfs_commit_transaction(trans, root);
732 btrfs_end_transaction(trans, root);
733 mutex_unlock(&root->fs_info->fs_mutex);
737 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
738 struct btrfs_root *root,
739 u64 objectid, int mode)
742 struct btrfs_inode_item inode_item;
743 struct btrfs_key *location;
746 inode = new_inode(root->fs_info->sb);
748 return ERR_PTR(-ENOMEM);
750 BTRFS_I(inode)->root = root;
752 inode->i_uid = current->fsuid;
753 inode->i_gid = current->fsgid;
754 inode->i_mode = mode;
755 inode->i_ino = objectid;
757 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
758 fill_inode_item(&inode_item, inode);
759 location = &BTRFS_I(inode)->location;
760 location->objectid = objectid;
762 location->offset = 0;
763 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
765 ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
768 insert_inode_hash(inode);
772 static int btrfs_add_link(struct btrfs_trans_handle *trans,
773 struct dentry *dentry, struct inode *inode)
776 struct btrfs_key key;
777 struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
778 key.objectid = inode->i_ino;
780 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
783 ret = btrfs_insert_dir_item(trans, root,
784 dentry->d_name.name, dentry->d_name.len,
785 dentry->d_parent->d_inode->i_ino,
788 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
789 ret = btrfs_update_inode(trans, root,
790 dentry->d_parent->d_inode);
795 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
796 struct dentry *dentry, struct inode *inode)
798 int err = btrfs_add_link(trans, dentry, inode);
800 d_instantiate(dentry, inode);
808 static int btrfs_create(struct inode *dir, struct dentry *dentry,
809 int mode, struct nameidata *nd)
811 struct btrfs_trans_handle *trans;
812 struct btrfs_root *root = BTRFS_I(dir)->root;
818 mutex_lock(&root->fs_info->fs_mutex);
819 trans = btrfs_start_transaction(root, 1);
821 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
827 inode = btrfs_new_inode(trans, root, objectid, mode);
828 err = PTR_ERR(inode);
831 // FIXME mark the inode dirty
832 err = btrfs_add_nondir(trans, dentry, inode);
836 inode->i_mapping->a_ops = &btrfs_aops;
837 inode->i_fop = &btrfs_file_operations;
838 inode->i_op = &btrfs_file_inode_operations;
840 dir->i_sb->s_dirt = 1;
842 btrfs_end_transaction(trans, root);
843 mutex_unlock(&root->fs_info->fs_mutex);
846 inode_dec_link_count(inode);
852 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
853 struct btrfs_root *root,
854 u64 objectid, u64 dirid)
858 struct btrfs_key key;
863 key.objectid = objectid;
866 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
868 ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
872 key.objectid = dirid;
873 ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
881 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
884 struct btrfs_trans_handle *trans;
885 struct btrfs_root *root = BTRFS_I(dir)->root;
890 mutex_lock(&root->fs_info->fs_mutex);
891 trans = btrfs_start_transaction(root, 1);
893 err = PTR_ERR(trans);
897 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
903 inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode);
905 err = PTR_ERR(inode);
909 inode->i_op = &btrfs_dir_inode_operations;
910 inode->i_fop = &btrfs_dir_file_operations;
912 err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
917 err = btrfs_update_inode(trans, root, inode);
920 err = btrfs_add_link(trans, dentry, inode);
923 d_instantiate(dentry, inode);
927 btrfs_end_transaction(trans, root);
929 mutex_unlock(&root->fs_info->fs_mutex);
935 static int btrfs_sync_fs(struct super_block *sb, int wait)
937 struct btrfs_trans_handle *trans;
938 struct btrfs_root *root;
944 filemap_flush(root->fs_info->btree_inode->i_mapping);
947 filemap_write_and_wait(root->fs_info->btree_inode->i_mapping);
948 mutex_lock(&root->fs_info->fs_mutex);
949 trans = btrfs_start_transaction(root, 1);
950 ret = btrfs_commit_transaction(trans, root);
953 printk("btrfs sync_fs\n");
954 mutex_unlock(&root->fs_info->fs_mutex);
958 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
959 struct buffer_head *result, int create)
964 u64 extent_start = 0;
966 u64 objectid = inode->i_ino;
968 struct btrfs_path *path;
969 struct btrfs_root *root = BTRFS_I(inode)->root;
970 struct btrfs_file_extent_item *item;
971 struct btrfs_leaf *leaf;
972 struct btrfs_disk_key *found_key;
974 path = btrfs_alloc_path();
976 btrfs_init_path(path);
981 ret = btrfs_lookup_file_extent(NULL, root, path,
983 iblock << inode->i_blkbits, 0);
990 if (path->slots[0] == 0) {
991 btrfs_release_path(root, path);
997 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
998 struct btrfs_file_extent_item);
999 leaf = btrfs_buffer_leaf(path->nodes[0]);
1000 blocknr = btrfs_file_extent_disk_blocknr(item);
1001 blocknr += btrfs_file_extent_offset(item);
1003 /* are we inside the extent that was found? */
1004 found_key = &leaf->items[path->slots[0]].key;
1005 found_type = btrfs_disk_key_type(found_key);
1006 if (btrfs_disk_key_objectid(found_key) != objectid ||
1007 found_type != BTRFS_EXTENT_DATA_KEY) {
1010 btrfs_release_path(root, path);
1013 found_type = btrfs_file_extent_type(item);
1014 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1015 if (found_type == BTRFS_FILE_EXTENT_REG) {
1016 extent_start = extent_start >> inode->i_blkbits;
1017 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1018 if (iblock >= extent_start && iblock < extent_end) {
1020 btrfs_map_bh_to_logical(root, result, blocknr +
1021 iblock - extent_start);
1024 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1028 size = btrfs_file_extent_inline_len(leaf->items +
1030 extent_end = (extent_start + size) >> inode->i_blkbits;
1031 extent_start >>= inode->i_blkbits;
1032 if (iblock < extent_start || iblock > extent_end) {
1035 ptr = btrfs_file_extent_inline_start(item);
1036 map = kmap(result->b_page);
1037 memcpy(map, ptr, size);
1038 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1039 flush_dcache_page(result->b_page);
1040 kunmap(result->b_page);
1041 set_buffer_uptodate(result);
1042 SetPageChecked(result->b_page);
1043 btrfs_map_bh_to_logical(root, result, 0);
1046 btrfs_release_path(root, path);
1047 btrfs_free_path(path);
1051 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1052 struct buffer_head *result, int create)
1055 struct btrfs_root *root = BTRFS_I(inode)->root;
1056 mutex_lock(&root->fs_info->fs_mutex);
1057 err = btrfs_get_block_lock(inode, iblock, result, create);
1058 mutex_unlock(&root->fs_info->fs_mutex);
1062 static int btrfs_prepare_write(struct file *file, struct page *page,
1063 unsigned from, unsigned to)
1065 return nobh_prepare_write(page, from, to, btrfs_get_block);
1068 static void btrfs_write_super(struct super_block *sb)
1070 btrfs_sync_fs(sb, 1);
1073 static int btrfs_readpage(struct file *file, struct page *page)
1075 return mpage_readpage(page, btrfs_get_block);
1079 * While block_write_full_page is writing back the dirty buffers under
1080 * the page lock, whoever dirtied the buffers may decide to clean them
1081 * again at any time. We handle that by only looking at the buffer
1082 * state inside lock_buffer().
1084 * If block_write_full_page() is called for regular writeback
1085 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1086 * locked buffer. This only can happen if someone has written the buffer
1087 * directly, with submit_bh(). At the address_space level PageWriteback
1088 * prevents this contention from occurring.
1090 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1091 struct writeback_control *wbc)
1095 sector_t last_block;
1096 struct buffer_head *bh, *head;
1097 const unsigned blocksize = 1 << inode->i_blkbits;
1098 int nr_underway = 0;
1100 BUG_ON(!PageLocked(page));
1102 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1104 if (!page_has_buffers(page)) {
1105 create_empty_buffers(page, blocksize,
1106 (1 << BH_Dirty)|(1 << BH_Uptodate));
1110 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1111 * here, and the (potentially unmapped) buffers may become dirty at
1112 * any time. If a buffer becomes dirty here after we've inspected it
1113 * then we just miss that fact, and the page stays dirty.
1115 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1116 * handle that here by just cleaning them.
1119 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1120 head = page_buffers(page);
1124 * Get all the dirty buffers mapped to disk addresses and
1125 * handle any aliases from the underlying blockdev's mapping.
1128 if (block > last_block) {
1130 * mapped buffers outside i_size will occur, because
1131 * this page can be outside i_size when there is a
1132 * truncate in progress.
1135 * The buffer was zeroed by block_write_full_page()
1137 clear_buffer_dirty(bh);
1138 set_buffer_uptodate(bh);
1139 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1140 WARN_ON(bh->b_size != blocksize);
1141 err = btrfs_get_block(inode, block, bh, 0);
1144 if (buffer_new(bh)) {
1145 /* blockdev mappings never come here */
1146 clear_buffer_new(bh);
1147 unmap_underlying_metadata(bh->b_bdev,
1151 bh = bh->b_this_page;
1153 } while (bh != head);
1156 if (!buffer_mapped(bh))
1159 * If it's a fully non-blocking write attempt and we cannot
1160 * lock the buffer then redirty the page. Note that this can
1161 * potentially cause a busy-wait loop from pdflush and kswapd
1162 * activity, but those code paths have their own higher-level
1165 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1167 } else if (test_set_buffer_locked(bh)) {
1168 redirty_page_for_writepage(wbc, page);
1171 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1172 mark_buffer_async_write(bh);
1176 } while ((bh = bh->b_this_page) != head);
1179 * The page and its buffers are protected by PageWriteback(), so we can
1180 * drop the bh refcounts early.
1182 BUG_ON(PageWriteback(page));
1183 set_page_writeback(page);
1186 struct buffer_head *next = bh->b_this_page;
1187 if (buffer_async_write(bh)) {
1188 submit_bh(WRITE, bh);
1192 } while (bh != head);
1197 if (nr_underway == 0) {
1199 * The page was marked dirty, but the buffers were
1200 * clean. Someone wrote them back by hand with
1201 * ll_rw_block/submit_bh. A rare case.
1205 if (!buffer_uptodate(bh)) {
1209 bh = bh->b_this_page;
1210 } while (bh != head);
1212 SetPageUptodate(page);
1213 end_page_writeback(page);
1215 * The page and buffer_heads can be released at any time from
1218 wbc->pages_skipped++; /* We didn't write this page */
1224 * ENOSPC, or some other error. We may already have added some
1225 * blocks to the file, so we need to write these out to avoid
1226 * exposing stale data.
1227 * The page is currently locked and not marked for writeback
1230 /* Recovery: lock and submit the mapped buffers */
1232 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1234 mark_buffer_async_write(bh);
1237 * The buffer may have been set dirty during
1238 * attachment to a dirty page.
1240 clear_buffer_dirty(bh);
1242 } while ((bh = bh->b_this_page) != head);
1244 BUG_ON(PageWriteback(page));
1245 set_page_writeback(page);
1247 struct buffer_head *next = bh->b_this_page;
1248 if (buffer_async_write(bh)) {
1249 clear_buffer_dirty(bh);
1250 submit_bh(WRITE, bh);
1254 } while (bh != head);
1260 * The generic ->writepage function for buffer-backed address_spaces
1262 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1264 struct inode * const inode = page->mapping->host;
1265 loff_t i_size = i_size_read(inode);
1266 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1270 /* Is the page fully inside i_size? */
1271 if (page->index < end_index)
1272 return __btrfs_write_full_page(inode, page, wbc);
1274 /* Is the page fully outside i_size? (truncate in progress) */
1275 offset = i_size & (PAGE_CACHE_SIZE-1);
1276 if (page->index >= end_index+1 || !offset) {
1278 * The page may have dirty, unmapped buffers. For example,
1279 * they may have been added in ext3_writepage(). Make them
1280 * freeable here, so the page does not leak.
1282 block_invalidatepage(page, 0);
1284 return 0; /* don't care */
1288 * The page straddles i_size. It must be zeroed out on each and every
1289 * writepage invokation because it may be mmapped. "A file is mapped
1290 * in multiples of the page size. For a file that is not a multiple of
1291 * the page size, the remaining memory is zeroed when mapped, and
1292 * writes to that region are not written out to the file."
1294 kaddr = kmap_atomic(page, KM_USER0);
1295 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1296 flush_dcache_page(page);
1297 kunmap_atomic(kaddr, KM_USER0);
1298 return __btrfs_write_full_page(inode, page, wbc);
1301 static void btrfs_truncate(struct inode *inode)
1303 struct btrfs_root *root = BTRFS_I(inode)->root;
1305 struct btrfs_trans_handle *trans;
1307 if (!S_ISREG(inode->i_mode))
1309 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1312 nobh_truncate_page(inode->i_mapping, inode->i_size);
1314 /* FIXME, add redo link to tree so we don't leak on crash */
1315 mutex_lock(&root->fs_info->fs_mutex);
1316 trans = btrfs_start_transaction(root, 1);
1317 ret = btrfs_truncate_in_trans(trans, root, inode);
1319 ret = btrfs_end_transaction(trans, root);
1321 mutex_unlock(&root->fs_info->fs_mutex);
1322 mark_inode_dirty(inode);
1326 * Make sure any changes to nobh_commit_write() are reflected in
1327 * nobh_truncate_page(), since it doesn't call commit_write().
1329 static int btrfs_commit_write(struct file *file, struct page *page,
1330 unsigned from, unsigned to)
1332 struct inode *inode = page->mapping->host;
1333 struct buffer_head *bh;
1334 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1336 SetPageUptodate(page);
1337 bh = page_buffers(page);
1338 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1339 set_page_dirty(page);
1341 if (pos > inode->i_size) {
1342 i_size_write(inode, pos);
1343 mark_inode_dirty(inode);
1348 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1349 struct page **prepared_pages,
1350 const char __user * buf)
1352 long page_fault = 0;
1354 int offset = pos & (PAGE_CACHE_SIZE - 1);
1356 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1357 size_t count = min_t(size_t,
1358 PAGE_CACHE_SIZE - offset, write_bytes);
1359 struct page *page = prepared_pages[i];
1360 fault_in_pages_readable(buf, count);
1362 /* Copy data from userspace to the current page */
1364 page_fault = __copy_from_user(page_address(page) + offset,
1366 /* Flush processor's dcache for this page */
1367 flush_dcache_page(page);
1370 write_bytes -= count;
1375 return page_fault ? -EFAULT : 0;
1378 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1381 for (i = 0; i < num_pages; i++) {
1384 unlock_page(pages[i]);
1385 mark_page_accessed(pages[i]);
1386 page_cache_release(pages[i]);
1389 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1390 struct btrfs_root *root,
1392 struct page **pages,
1402 struct inode *inode = file->f_path.dentry->d_inode;
1403 struct buffer_head *bh;
1404 struct btrfs_file_extent_item *ei;
1406 for (i = 0; i < num_pages; i++) {
1407 offset = pos & (PAGE_CACHE_SIZE -1);
1408 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1409 /* FIXME, one block at a time */
1411 mutex_lock(&root->fs_info->fs_mutex);
1412 trans = btrfs_start_transaction(root, 1);
1414 bh = page_buffers(pages[i]);
1415 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1416 struct btrfs_key key;
1417 struct btrfs_path *path;
1421 path = btrfs_alloc_path();
1423 key.objectid = inode->i_ino;
1424 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1426 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1427 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1429 btrfs_file_extent_calc_inline_size(write_bytes);
1430 ret = btrfs_insert_empty_item(trans, root, path, &key,
1433 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1434 path->slots[0], struct btrfs_file_extent_item);
1435 btrfs_set_file_extent_generation(ei, trans->transid);
1436 btrfs_set_file_extent_type(ei,
1437 BTRFS_FILE_EXTENT_INLINE);
1438 ptr = btrfs_file_extent_inline_start(ei);
1439 memcpy(ptr, bh->b_data, offset + write_bytes);
1440 mark_buffer_dirty(path->nodes[0]);
1441 btrfs_free_path(path);
1443 btrfs_csum_file_block(trans, root, inode->i_ino,
1444 pages[i]->index << PAGE_CACHE_SHIFT,
1445 kmap(pages[i]), PAGE_CACHE_SIZE);
1448 SetPageChecked(pages[i]);
1449 ret = btrfs_end_transaction(trans, root);
1451 mutex_unlock(&root->fs_info->fs_mutex);
1453 ret = btrfs_commit_write(file, pages[i], offset,
1454 offset + this_write);
1460 WARN_ON(this_write > write_bytes);
1461 write_bytes -= this_write;
1467 static int drop_extents(struct btrfs_trans_handle *trans,
1468 struct btrfs_root *root,
1469 struct inode *inode,
1473 struct btrfs_key key;
1474 struct btrfs_leaf *leaf;
1476 struct btrfs_file_extent_item *extent;
1479 struct btrfs_file_extent_item old;
1480 struct btrfs_path *path;
1481 u64 search_start = start;
1487 path = btrfs_alloc_path();
1491 btrfs_release_path(root, path);
1492 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1497 if (path->slots[0] == 0) {
1508 leaf = btrfs_buffer_leaf(path->nodes[0]);
1509 slot = path->slots[0];
1510 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1511 if (key.offset >= end || key.objectid != inode->i_ino) {
1515 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1519 extent = btrfs_item_ptr(leaf, slot,
1520 struct btrfs_file_extent_item);
1521 found_type = btrfs_file_extent_type(extent);
1522 if (found_type == BTRFS_FILE_EXTENT_REG) {
1523 extent_end = key.offset +
1524 (btrfs_file_extent_num_blocks(extent) <<
1527 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1529 extent_end = key.offset +
1530 btrfs_file_extent_inline_len(leaf->items + slot);
1533 if (!found_extent && !found_inline) {
1538 if (search_start >= extent_end) {
1543 search_start = extent_end;
1545 if (end < extent_end && end >= key.offset) {
1547 memcpy(&old, extent, sizeof(old));
1548 ret = btrfs_inc_extent_ref(trans, root,
1549 btrfs_file_extent_disk_blocknr(&old),
1550 btrfs_file_extent_disk_num_blocks(&old));
1553 WARN_ON(found_inline);
1557 if (start > key.offset) {
1560 /* truncate existing extent */
1562 WARN_ON(start & (root->blocksize - 1));
1564 new_num = (start - key.offset) >>
1566 old_num = btrfs_file_extent_num_blocks(extent);
1567 inode->i_blocks -= (old_num - new_num) << 3;
1568 btrfs_set_file_extent_num_blocks(extent,
1570 mark_buffer_dirty(path->nodes[0]);
1574 ret = btrfs_truncate_item(trans, root, path,
1575 start - key.offset);
1581 u64 disk_blocknr = 0;
1582 u64 disk_num_blocks = 0;
1583 u64 extent_num_blocks = 0;
1586 btrfs_file_extent_disk_blocknr(extent);
1588 btrfs_file_extent_disk_num_blocks(extent);
1590 btrfs_file_extent_num_blocks(extent);
1592 ret = btrfs_del_item(trans, root, path);
1594 btrfs_release_path(root, path);
1597 btrfs_file_extent_num_blocks(extent) << 3;
1598 ret = btrfs_free_extent(trans, root,
1600 disk_num_blocks, 0);
1604 if (!bookend && search_start >= end) {
1611 if (bookend && found_extent) {
1612 /* create bookend */
1613 struct btrfs_key ins;
1614 ins.objectid = inode->i_ino;
1617 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1619 btrfs_release_path(root, path);
1620 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1623 extent = btrfs_item_ptr(
1624 btrfs_buffer_leaf(path->nodes[0]),
1626 struct btrfs_file_extent_item);
1627 btrfs_set_file_extent_disk_blocknr(extent,
1628 btrfs_file_extent_disk_blocknr(&old));
1629 btrfs_set_file_extent_disk_num_blocks(extent,
1630 btrfs_file_extent_disk_num_blocks(&old));
1632 btrfs_set_file_extent_offset(extent,
1633 btrfs_file_extent_offset(&old) +
1634 ((end - key.offset) >> inode->i_blkbits));
1635 WARN_ON(btrfs_file_extent_num_blocks(&old) <
1636 (end - key.offset) >> inode->i_blkbits);
1637 btrfs_set_file_extent_num_blocks(extent,
1638 btrfs_file_extent_num_blocks(&old) -
1639 ((end - key.offset) >> inode->i_blkbits));
1641 btrfs_set_file_extent_type(extent,
1642 BTRFS_FILE_EXTENT_REG);
1643 btrfs_set_file_extent_generation(extent,
1644 btrfs_file_extent_generation(&old));
1645 btrfs_mark_buffer_dirty(path->nodes[0]);
1647 btrfs_file_extent_num_blocks(extent) << 3;
1653 btrfs_free_path(path);
1657 static int prepare_pages(struct btrfs_root *root,
1659 struct page **pages,
1662 unsigned long first_index,
1663 unsigned long last_index,
1665 u64 alloc_extent_start)
1668 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1669 struct inode *inode = file->f_path.dentry->d_inode;
1673 struct buffer_head *bh;
1674 struct buffer_head *head;
1675 loff_t isize = i_size_read(inode);
1677 memset(pages, 0, num_pages * sizeof(struct page *));
1679 for (i = 0; i < num_pages; i++) {
1680 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1683 goto failed_release;
1685 offset = pos & (PAGE_CACHE_SIZE -1);
1686 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1687 create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
1688 (1 << BH_Uptodate));
1689 head = page_buffers(pages[i]);
1692 err = btrfs_map_bh_to_logical(root, bh,
1693 alloc_extent_start);
1696 goto failed_truncate;
1697 bh = bh->b_this_page;
1698 if (alloc_extent_start)
1699 alloc_extent_start++;
1700 } while (bh != head);
1702 WARN_ON(this_write > write_bytes);
1703 write_bytes -= this_write;
1708 btrfs_drop_pages(pages, num_pages);
1712 btrfs_drop_pages(pages, num_pages);
1714 vmtruncate(inode, isize);
1718 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1719 size_t count, loff_t *ppos)
1722 size_t num_written = 0;
1725 struct inode *inode = file->f_path.dentry->d_inode;
1726 struct btrfs_root *root = BTRFS_I(inode)->root;
1727 struct page *pages[8];
1728 struct page *pinned[2] = { NULL, NULL };
1729 unsigned long first_index;
1730 unsigned long last_index;
1733 u64 alloc_extent_start;
1734 struct btrfs_trans_handle *trans;
1735 struct btrfs_key ins;
1737 if (file->f_flags & O_DIRECT)
1740 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1741 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1742 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1747 err = remove_suid(file->f_path.dentry);
1750 file_update_time(file);
1752 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1753 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1756 mutex_lock(&inode->i_mutex);
1757 first_index = pos >> PAGE_CACHE_SHIFT;
1758 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1760 if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1761 (pos & (PAGE_CACHE_SIZE - 1))) {
1762 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1763 if (!PageUptodate(pinned[0])) {
1764 ret = mpage_readpage(pinned[0], btrfs_get_block);
1767 unlock_page(pinned[0]);
1770 if (first_index != last_index &&
1771 (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1772 (count & (PAGE_CACHE_SIZE - 1))) {
1773 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1774 if (!PageUptodate(pinned[1])) {
1775 ret = mpage_readpage(pinned[1], btrfs_get_block);
1778 unlock_page(pinned[1]);
1782 mutex_lock(&root->fs_info->fs_mutex);
1783 trans = btrfs_start_transaction(root, 1);
1786 mutex_unlock(&root->fs_info->fs_mutex);
1789 /* FIXME blocksize != 4096 */
1790 inode->i_blocks += num_blocks << 3;
1791 if (start_pos < inode->i_size) {
1792 /* FIXME blocksize != pagesize */
1793 ret = drop_extents(trans, root, inode,
1795 (pos + count + root->blocksize -1) &
1796 ~((u64)root->blocksize - 1));
1799 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1800 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1801 ret = btrfs_alloc_extent(trans, root, num_blocks, 1,
1804 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1805 start_pos, ins.objectid, ins.offset);
1812 alloc_extent_start = ins.objectid;
1813 ret = btrfs_end_transaction(trans, root);
1814 mutex_unlock(&root->fs_info->fs_mutex);
1817 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1818 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1819 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1822 memset(pages, 0, sizeof(pages));
1823 ret = prepare_pages(root, file, pages, num_pages,
1824 pos, first_index, last_index,
1825 write_bytes, alloc_extent_start);
1828 /* FIXME blocks != pagesize */
1829 if (alloc_extent_start)
1830 alloc_extent_start += num_pages;
1831 ret = btrfs_copy_from_user(pos, num_pages,
1832 write_bytes, pages, buf);
1835 ret = dirty_and_release_pages(NULL, root, file, pages,
1836 num_pages, pos, write_bytes);
1838 btrfs_drop_pages(pages, num_pages);
1841 count -= write_bytes;
1843 num_written += write_bytes;
1845 balance_dirty_pages_ratelimited(inode->i_mapping);
1849 mutex_unlock(&inode->i_mutex);
1852 page_cache_release(pinned[0]);
1854 page_cache_release(pinned[1]);
1856 current->backing_dev_info = NULL;
1857 mark_inode_dirty(inode);
1858 return num_written ? num_written : err;
1861 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1862 unsigned long offset, unsigned long size)
1865 unsigned long left, count = desc->count;
1866 struct inode *inode = page->mapping->host;
1871 if (!PageChecked(page)) {
1872 /* FIXME, do it per block */
1873 struct btrfs_root *root = BTRFS_I(inode)->root;
1875 int ret = btrfs_csum_verify_file_block(root,
1876 page->mapping->host->i_ino,
1877 page->index << PAGE_CACHE_SHIFT,
1878 kmap(page), PAGE_CACHE_SIZE);
1880 printk("failed to verify ino %lu page %lu\n",
1881 page->mapping->host->i_ino,
1883 memset(page_address(page), 0, PAGE_CACHE_SIZE);
1885 SetPageChecked(page);
1889 * Faults on the destination of a read are common, so do it before
1892 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1893 kaddr = kmap_atomic(page, KM_USER0);
1894 left = __copy_to_user_inatomic(desc->arg.buf,
1895 kaddr + offset, size);
1896 kunmap_atomic(kaddr, KM_USER0);
1901 /* Do it the slow way */
1903 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1908 desc->error = -EFAULT;
1911 desc->count = count - size;
1912 desc->written += size;
1913 desc->arg.buf += size;
1918 * btrfs_file_aio_read - filesystem read routine
1919 * @iocb: kernel I/O control block
1920 * @iov: io vector request
1921 * @nr_segs: number of segments in the iovec
1922 * @pos: current file position
1924 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1925 unsigned long nr_segs, loff_t pos)
1927 struct file *filp = iocb->ki_filp;
1931 loff_t *ppos = &iocb->ki_pos;
1934 for (seg = 0; seg < nr_segs; seg++) {
1935 const struct iovec *iv = &iov[seg];
1938 * If any segment has a negative length, or the cumulative
1939 * length ever wraps negative then return -EINVAL.
1941 count += iv->iov_len;
1942 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
1944 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
1949 count -= iv->iov_len; /* This segment is no good */
1954 for (seg = 0; seg < nr_segs; seg++) {
1955 read_descriptor_t desc;
1958 desc.arg.buf = iov[seg].iov_base;
1959 desc.count = iov[seg].iov_len;
1960 if (desc.count == 0)
1963 do_generic_file_read(filp, ppos, &desc,
1965 retval += desc.written;
1967 retval = retval ?: desc.error;
1975 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1977 struct btrfs_trans_handle *trans;
1978 struct btrfs_key key;
1979 struct btrfs_root_item root_item;
1980 struct btrfs_inode_item *inode_item;
1981 struct buffer_head *subvol;
1982 struct btrfs_leaf *leaf;
1983 struct btrfs_root *new_root;
1984 struct inode *inode;
1987 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
1989 mutex_lock(&root->fs_info->fs_mutex);
1990 trans = btrfs_start_transaction(root, 1);
1993 subvol = btrfs_alloc_free_block(trans, root);
1994 leaf = btrfs_buffer_leaf(subvol);
1995 btrfs_set_header_nritems(&leaf->header, 0);
1996 btrfs_set_header_level(&leaf->header, 0);
1997 btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
1998 btrfs_set_header_generation(&leaf->header, trans->transid);
1999 memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2000 sizeof(leaf->header.fsid));
2002 inode_item = &root_item.inode;
2003 memset(inode_item, 0, sizeof(*inode_item));
2004 btrfs_set_inode_generation(inode_item, 1);
2005 btrfs_set_inode_size(inode_item, 3);
2006 btrfs_set_inode_nlink(inode_item, 1);
2007 btrfs_set_inode_nblocks(inode_item, 1);
2008 btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2010 btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2011 btrfs_set_root_refs(&root_item, 1);
2013 mark_buffer_dirty(subvol);
2017 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2021 btrfs_set_root_dirid(&root_item, new_dirid);
2023 key.objectid = objectid;
2026 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2027 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2032 * insert the directory item
2034 key.offset = (u64)-1;
2035 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2037 root->fs_info->sb->s_root->d_inode->i_ino,
2041 ret = btrfs_commit_transaction(trans, root);
2044 new_root = btrfs_read_fs_root(root->fs_info, &key);
2047 trans = btrfs_start_transaction(new_root, 1);
2050 inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700);
2051 inode->i_op = &btrfs_dir_inode_operations;
2052 inode->i_fop = &btrfs_dir_file_operations;
2054 ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2059 ret = btrfs_update_inode(trans, new_root, inode);
2062 ret = btrfs_commit_transaction(trans, new_root);
2067 mutex_unlock(&root->fs_info->fs_mutex);
2071 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2073 struct btrfs_trans_handle *trans;
2074 struct btrfs_key key;
2075 struct btrfs_root_item new_root_item;
2079 if (!root->ref_cows)
2082 mutex_lock(&root->fs_info->fs_mutex);
2083 trans = btrfs_start_transaction(root, 1);
2086 ret = btrfs_update_inode(trans, root, root->inode);
2089 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2093 memcpy(&new_root_item, &root->root_item,
2094 sizeof(new_root_item));
2096 key.objectid = objectid;
2099 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2100 btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2102 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2107 * insert the directory item
2109 key.offset = (u64)-1;
2110 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2112 root->fs_info->sb->s_root->d_inode->i_ino,
2117 ret = btrfs_inc_root_ref(trans, root);
2120 ret = btrfs_commit_transaction(trans, root);
2122 mutex_unlock(&root->fs_info->fs_mutex);
2126 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2128 struct block_device *bdev;
2129 struct btrfs_path *path;
2130 struct super_block *sb = root->fs_info->sb;
2131 struct btrfs_root *dev_root = root->fs_info->dev_root;
2132 struct btrfs_trans_handle *trans;
2133 struct btrfs_device_item *dev_item;
2134 struct btrfs_key key;
2141 printk("adding disk %s\n", name);
2142 path = btrfs_alloc_path();
2145 num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2146 bdev = open_bdev_excl(name, O_RDWR, sb);
2148 ret = PTR_ERR(bdev);
2149 printk("open bdev excl failed ret %d\n", ret);
2152 set_blocksize(bdev, sb->s_blocksize);
2153 new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2154 key.objectid = num_blocks;
2155 key.offset = new_blocks;
2157 btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2159 mutex_lock(&dev_root->fs_info->fs_mutex);
2160 trans = btrfs_start_transaction(dev_root, 1);
2161 item_size = sizeof(*dev_item) + namelen;
2162 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2163 ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2165 printk("insert failed %d\n", ret);
2166 close_bdev_excl(bdev);
2171 dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2172 path->slots[0], struct btrfs_device_item);
2173 btrfs_set_device_pathlen(dev_item, namelen);
2174 memcpy(dev_item + 1, name, namelen);
2176 device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2177 btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2178 btrfs_set_device_id(dev_item, device_id);
2179 mark_buffer_dirty(path->nodes[0]);
2181 ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2185 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2186 num_blocks + new_blocks);
2187 i_size_write(root->fs_info->btree_inode,
2188 (num_blocks + new_blocks) <<
2189 root->fs_info->btree_inode->i_blkbits);
2193 ret = btrfs_commit_transaction(trans, dev_root);
2195 mutex_unlock(&root->fs_info->fs_mutex);
2197 btrfs_free_path(path);
2202 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2203 cmd, unsigned long arg)
2205 struct btrfs_root *root = BTRFS_I(inode)->root;
2206 struct btrfs_ioctl_vol_args vol_args;
2208 struct btrfs_dir_item *di;
2210 struct btrfs_path *path;
2214 case BTRFS_IOC_SNAP_CREATE:
2215 if (copy_from_user(&vol_args,
2216 (struct btrfs_ioctl_vol_args __user *)arg,
2219 namelen = strlen(vol_args.name);
2220 if (namelen > BTRFS_VOL_NAME_MAX)
2222 path = btrfs_alloc_path();
2225 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2226 mutex_lock(&root->fs_info->fs_mutex);
2227 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2229 vol_args.name, namelen, 0);
2230 mutex_unlock(&root->fs_info->fs_mutex);
2231 btrfs_free_path(path);
2232 if (di && !IS_ERR(di))
2235 if (root == root->fs_info->tree_root)
2236 ret = create_subvol(root, vol_args.name, namelen);
2238 ret = create_snapshot(root, vol_args.name, namelen);
2241 case BTRFS_IOC_ADD_DISK:
2242 if (copy_from_user(&vol_args,
2243 (struct btrfs_ioctl_vol_args __user *)arg,
2246 namelen = strlen(vol_args.name);
2247 if (namelen > BTRFS_VOL_NAME_MAX)
2249 vol_args.name[namelen] = '\0';
2250 ret = add_disk(root, vol_args.name, namelen);
2258 static struct kmem_cache *btrfs_inode_cachep;
2259 struct kmem_cache *btrfs_trans_handle_cachep;
2260 struct kmem_cache *btrfs_transaction_cachep;
2261 struct kmem_cache *btrfs_bit_radix_cachep;
2262 struct kmem_cache *btrfs_path_cachep;
2265 * Called inside transaction, so use GFP_NOFS
2267 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2269 struct btrfs_inode *ei;
2271 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2274 return &ei->vfs_inode;
2277 static void btrfs_destroy_inode(struct inode *inode)
2279 WARN_ON(!list_empty(&inode->i_dentry));
2280 WARN_ON(inode->i_data.nrpages);
2282 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2285 static void init_once(void * foo, struct kmem_cache * cachep,
2286 unsigned long flags)
2288 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2290 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2291 SLAB_CTOR_CONSTRUCTOR) {
2292 inode_init_once(&ei->vfs_inode);
2296 static int init_inodecache(void)
2298 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2299 sizeof(struct btrfs_inode),
2300 0, (SLAB_RECLAIM_ACCOUNT|
2303 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2304 sizeof(struct btrfs_trans_handle),
2305 0, (SLAB_RECLAIM_ACCOUNT|
2308 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2309 sizeof(struct btrfs_transaction),
2310 0, (SLAB_RECLAIM_ACCOUNT|
2313 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2314 sizeof(struct btrfs_transaction),
2315 0, (SLAB_RECLAIM_ACCOUNT|
2318 btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2320 0, (SLAB_RECLAIM_ACCOUNT|
2322 SLAB_DESTROY_BY_RCU),
2324 if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2325 btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2330 static void destroy_inodecache(void)
2332 kmem_cache_destroy(btrfs_inode_cachep);
2333 kmem_cache_destroy(btrfs_trans_handle_cachep);
2334 kmem_cache_destroy(btrfs_transaction_cachep);
2335 kmem_cache_destroy(btrfs_bit_radix_cachep);
2336 kmem_cache_destroy(btrfs_path_cachep);
2339 static int btrfs_get_sb(struct file_system_type *fs_type,
2340 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2342 return get_sb_bdev(fs_type, flags, dev_name, data,
2343 btrfs_fill_super, mnt);
2347 static int btrfs_getattr(struct vfsmount *mnt,
2348 struct dentry *dentry, struct kstat *stat)
2350 struct inode *inode = dentry->d_inode;
2351 generic_fillattr(inode, stat);
2352 stat->blksize = 256 * 1024;
2356 static struct file_system_type btrfs_fs_type = {
2357 .owner = THIS_MODULE,
2359 .get_sb = btrfs_get_sb,
2360 .kill_sb = kill_block_super,
2361 .fs_flags = FS_REQUIRES_DEV,
2364 static struct super_operations btrfs_super_ops = {
2365 .statfs = simple_statfs,
2366 .delete_inode = btrfs_delete_inode,
2367 .put_super = btrfs_put_super,
2368 .read_inode = btrfs_read_locked_inode,
2369 .write_super = btrfs_write_super,
2370 .sync_fs = btrfs_sync_fs,
2371 .write_inode = btrfs_write_inode,
2372 .alloc_inode = btrfs_alloc_inode,
2373 .destroy_inode = btrfs_destroy_inode,
2376 static struct inode_operations btrfs_dir_inode_operations = {
2377 .lookup = btrfs_lookup,
2378 .create = btrfs_create,
2379 .unlink = btrfs_unlink,
2380 .mkdir = btrfs_mkdir,
2381 .rmdir = btrfs_rmdir,
2384 static struct inode_operations btrfs_dir_ro_inode_operations = {
2385 .lookup = btrfs_lookup,
2388 static struct file_operations btrfs_dir_file_operations = {
2389 .llseek = generic_file_llseek,
2390 .read = generic_read_dir,
2391 .readdir = btrfs_readdir,
2392 .ioctl = btrfs_ioctl,
2395 static struct address_space_operations btrfs_aops = {
2396 .readpage = btrfs_readpage,
2397 .writepage = btrfs_writepage,
2398 .sync_page = block_sync_page,
2399 .prepare_write = btrfs_prepare_write,
2400 .commit_write = btrfs_commit_write,
2403 static struct inode_operations btrfs_file_inode_operations = {
2404 .truncate = btrfs_truncate,
2405 .getattr = btrfs_getattr,
2408 static struct file_operations btrfs_file_operations = {
2409 .llseek = generic_file_llseek,
2410 .read = do_sync_read,
2411 .aio_read = btrfs_file_aio_read,
2412 .write = btrfs_file_write,
2413 .mmap = generic_file_mmap,
2414 .open = generic_file_open,
2415 .ioctl = btrfs_ioctl,
2418 static int __init init_btrfs_fs(void)
2421 printk("btrfs loaded!\n");
2422 err = init_inodecache();
2425 kset_set_kset_s(&btrfs_subsys, fs_subsys);
2426 err = subsystem_register(&btrfs_subsys);
2429 return register_filesystem(&btrfs_fs_type);
2431 destroy_inodecache();
2435 static void __exit exit_btrfs_fs(void)
2437 destroy_inodecache();
2438 unregister_filesystem(&btrfs_fs_type);
2439 subsystem_unregister(&btrfs_subsys);
2440 printk("btrfs unloaded\n");
2443 module_init(init_btrfs_fs)
2444 module_exit(exit_btrfs_fs)
2446 MODULE_LICENSE("GPL");