1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
17 #include "transaction.h"
18 #include "btrfs_inode.h"
21 void btrfs_fsinfo_release(struct kobject *obj)
23 struct btrfs_fs_info *fsinfo = container_of(obj,
24 struct btrfs_fs_info, kobj);
28 struct kobj_type btrfs_fsinfo_ktype = {
29 .release = btrfs_fsinfo_release,
32 struct btrfs_iget_args {
34 struct btrfs_root *root;
37 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
39 #define BTRFS_SUPER_MAGIC 0x9123682E
41 static struct inode_operations btrfs_dir_inode_operations;
42 static struct inode_operations btrfs_dir_ro_inode_operations;
43 static struct super_operations btrfs_super_ops;
44 static struct file_operations btrfs_dir_file_operations;
45 static struct inode_operations btrfs_file_inode_operations;
46 static struct address_space_operations btrfs_aops;
47 static struct file_operations btrfs_file_operations;
49 static void btrfs_read_locked_inode(struct inode *inode)
51 struct btrfs_path *path;
52 struct btrfs_inode_item *inode_item;
53 struct btrfs_root *root = BTRFS_I(inode)->root;
54 struct btrfs_key location;
55 struct btrfs_block_group_cache *alloc_group;
56 u64 alloc_group_block;
59 path = btrfs_alloc_path();
61 btrfs_init_path(path);
62 mutex_lock(&root->fs_info->fs_mutex);
64 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
65 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
67 btrfs_free_path(path);
70 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
72 struct btrfs_inode_item);
74 inode->i_mode = btrfs_inode_mode(inode_item);
75 inode->i_nlink = btrfs_inode_nlink(inode_item);
76 inode->i_uid = btrfs_inode_uid(inode_item);
77 inode->i_gid = btrfs_inode_gid(inode_item);
78 inode->i_size = btrfs_inode_size(inode_item);
79 inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
80 inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
81 inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
82 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
83 inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
84 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
85 inode->i_blocks = btrfs_inode_nblocks(inode_item);
86 inode->i_generation = btrfs_inode_generation(inode_item);
87 alloc_group_block = btrfs_inode_block_group(inode_item);
88 ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
89 (void **)&alloc_group,
90 alloc_group_block, 1);
92 BTRFS_I(inode)->block_group = alloc_group;
94 btrfs_free_path(path);
97 mutex_unlock(&root->fs_info->fs_mutex);
99 switch (inode->i_mode & S_IFMT) {
102 init_special_inode(inode, inode->i_mode,
103 btrfs_inode_rdev(inode_item));
107 inode->i_mapping->a_ops = &btrfs_aops;
108 inode->i_fop = &btrfs_file_operations;
109 inode->i_op = &btrfs_file_inode_operations;
112 inode->i_fop = &btrfs_dir_file_operations;
113 if (root == root->fs_info->tree_root)
114 inode->i_op = &btrfs_dir_ro_inode_operations;
116 inode->i_op = &btrfs_dir_inode_operations;
119 // inode->i_op = &page_symlink_inode_operations;
125 btrfs_release_path(root, path);
126 btrfs_free_path(path);
127 mutex_unlock(&root->fs_info->fs_mutex);
128 make_bad_inode(inode);
131 static void fill_inode_item(struct btrfs_inode_item *item,
134 btrfs_set_inode_uid(item, inode->i_uid);
135 btrfs_set_inode_gid(item, inode->i_gid);
136 btrfs_set_inode_size(item, inode->i_size);
137 btrfs_set_inode_mode(item, inode->i_mode);
138 btrfs_set_inode_nlink(item, inode->i_nlink);
139 btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
140 btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
141 btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
142 btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
143 btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
144 btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
145 btrfs_set_inode_nblocks(item, inode->i_blocks);
146 btrfs_set_inode_generation(item, inode->i_generation);
147 btrfs_set_inode_block_group(item,
148 BTRFS_I(inode)->block_group->key.objectid);
152 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
153 struct btrfs_root *root,
156 struct btrfs_inode_item *inode_item;
157 struct btrfs_path *path;
160 path = btrfs_alloc_path();
162 btrfs_init_path(path);
163 ret = btrfs_lookup_inode(trans, root, path,
164 &BTRFS_I(inode)->location, 1);
171 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
173 struct btrfs_inode_item);
175 fill_inode_item(inode_item, inode);
176 btrfs_mark_buffer_dirty(path->nodes[0]);
179 btrfs_release_path(root, path);
180 btrfs_free_path(path);
185 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
186 struct btrfs_root *root,
188 struct dentry *dentry)
190 struct btrfs_path *path;
191 const char *name = dentry->d_name.name;
192 int name_len = dentry->d_name.len;
195 struct btrfs_dir_item *di;
197 path = btrfs_alloc_path();
199 btrfs_init_path(path);
200 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
210 objectid = btrfs_disk_key_objectid(&di->location);
211 ret = btrfs_delete_one_dir_name(trans, root, path, di);
213 btrfs_release_path(root, path);
215 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
216 objectid, name, name_len, -1);
225 ret = btrfs_delete_one_dir_name(trans, root, path, di);
228 dentry->d_inode->i_ctime = dir->i_ctime;
230 btrfs_free_path(path);
232 dir->i_size -= name_len * 2;
233 btrfs_update_inode(trans, root, dir);
234 drop_nlink(dentry->d_inode);
235 btrfs_update_inode(trans, root, dentry->d_inode);
236 dir->i_sb->s_dirt = 1;
241 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
243 struct btrfs_root *root;
244 struct btrfs_trans_handle *trans;
247 root = BTRFS_I(dir)->root;
248 mutex_lock(&root->fs_info->fs_mutex);
249 trans = btrfs_start_transaction(root, 1);
250 btrfs_set_trans_block_group(trans, dir);
251 ret = btrfs_unlink_trans(trans, root, dir, dentry);
252 btrfs_end_transaction(trans, root);
253 mutex_unlock(&root->fs_info->fs_mutex);
257 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
259 struct inode *inode = dentry->d_inode;
262 struct btrfs_root *root = BTRFS_I(dir)->root;
263 struct btrfs_path *path;
264 struct btrfs_key key;
265 struct btrfs_trans_handle *trans;
266 struct btrfs_key found_key;
268 struct btrfs_leaf *leaf;
269 char *goodnames = "..";
271 path = btrfs_alloc_path();
273 btrfs_init_path(path);
274 mutex_lock(&root->fs_info->fs_mutex);
275 trans = btrfs_start_transaction(root, 1);
276 btrfs_set_trans_block_group(trans, dir);
277 key.objectid = inode->i_ino;
278 key.offset = (u64)-1;
281 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
287 if (path->slots[0] == 0) {
292 leaf = btrfs_buffer_leaf(path->nodes[0]);
293 btrfs_disk_key_to_cpu(&found_key,
294 &leaf->items[path->slots[0]].key);
295 found_type = btrfs_key_type(&found_key);
296 if (found_key.objectid != inode->i_ino) {
300 if ((found_type != BTRFS_DIR_ITEM_KEY &&
301 found_type != BTRFS_DIR_INDEX_KEY) ||
302 (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
303 !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
307 ret = btrfs_del_item(trans, root, path);
310 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
312 btrfs_release_path(root, path);
315 btrfs_release_path(root, path);
317 /* now the directory is empty */
318 err = btrfs_unlink_trans(trans, root, dir, dentry);
323 btrfs_release_path(root, path);
324 btrfs_free_path(path);
325 mutex_unlock(&root->fs_info->fs_mutex);
326 ret = btrfs_end_transaction(trans, root);
332 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
333 struct btrfs_root *root,
336 struct btrfs_path *path;
341 path = btrfs_alloc_path();
343 btrfs_init_path(path);
344 ret = btrfs_lookup_inode(trans, root, path,
345 &BTRFS_I(inode)->location, -1);
347 ret = btrfs_del_item(trans, root, path);
349 btrfs_free_path(path);
353 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
354 struct btrfs_root *root,
358 struct btrfs_path *path;
359 struct btrfs_key key;
360 struct btrfs_disk_key *found_key;
361 struct btrfs_leaf *leaf;
362 struct btrfs_file_extent_item *fi = NULL;
363 u64 extent_start = 0;
364 u64 extent_num_blocks = 0;
367 path = btrfs_alloc_path();
369 /* FIXME, add redo link to tree so we don't leak on crash */
370 key.objectid = inode->i_ino;
371 key.offset = (u64)-1;
374 * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
377 btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
379 btrfs_init_path(path);
380 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
385 BUG_ON(path->slots[0] == 0);
388 leaf = btrfs_buffer_leaf(path->nodes[0]);
389 found_key = &leaf->items[path->slots[0]].key;
390 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
392 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
393 btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
395 if (btrfs_disk_key_offset(found_key) < inode->i_size)
398 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
399 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
401 struct btrfs_file_extent_item);
402 if (btrfs_file_extent_type(fi) !=
403 BTRFS_FILE_EXTENT_INLINE) {
405 btrfs_file_extent_disk_blocknr(fi);
407 btrfs_file_extent_disk_num_blocks(fi);
408 /* FIXME blocksize != 4096 */
410 btrfs_file_extent_num_blocks(fi) << 3;
414 ret = btrfs_del_item(trans, root, path);
416 btrfs_release_path(root, path);
418 ret = btrfs_free_extent(trans, root, extent_start,
419 extent_num_blocks, 0);
425 btrfs_release_path(root, path);
426 btrfs_free_path(path);
427 inode->i_sb->s_dirt = 1;
431 static void btrfs_delete_inode(struct inode *inode)
433 struct btrfs_trans_handle *trans;
434 struct btrfs_root *root = BTRFS_I(inode)->root;
437 truncate_inode_pages(&inode->i_data, 0);
438 if (is_bad_inode(inode)) {
442 mutex_lock(&root->fs_info->fs_mutex);
443 trans = btrfs_start_transaction(root, 1);
444 btrfs_set_trans_block_group(trans, inode);
445 if (S_ISREG(inode->i_mode)) {
446 ret = btrfs_truncate_in_trans(trans, root, inode);
449 btrfs_free_inode(trans, root, inode);
450 btrfs_end_transaction(trans, root);
451 mutex_unlock(&root->fs_info->fs_mutex);
457 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
458 struct btrfs_key *location)
460 const char *name = dentry->d_name.name;
461 int namelen = dentry->d_name.len;
462 struct btrfs_dir_item *di;
463 struct btrfs_path *path;
464 struct btrfs_root *root = BTRFS_I(dir)->root;
467 path = btrfs_alloc_path();
469 btrfs_init_path(path);
470 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
472 if (!di || IS_ERR(di)) {
473 location->objectid = 0;
477 btrfs_disk_key_to_cpu(location, &di->location);
479 btrfs_release_path(root, path);
480 btrfs_free_path(path);
484 int fixup_tree_root_location(struct btrfs_root *root,
485 struct btrfs_key *location,
486 struct btrfs_root **sub_root)
488 struct btrfs_path *path;
489 struct btrfs_root_item *ri;
491 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
493 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
496 path = btrfs_alloc_path();
498 mutex_lock(&root->fs_info->fs_mutex);
500 *sub_root = btrfs_read_fs_root(root->fs_info, location);
501 if (IS_ERR(*sub_root))
502 return PTR_ERR(*sub_root);
504 ri = &(*sub_root)->root_item;
505 location->objectid = btrfs_root_dirid(ri);
507 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
508 location->offset = 0;
510 btrfs_free_path(path);
511 mutex_unlock(&root->fs_info->fs_mutex);
515 int btrfs_init_locked_inode(struct inode *inode, void *p)
517 struct btrfs_iget_args *args = p;
518 inode->i_ino = args->ino;
519 BTRFS_I(inode)->root = args->root;
523 int btrfs_find_actor(struct inode *inode, void *opaque)
525 struct btrfs_iget_args *args = opaque;
526 return (args->ino == inode->i_ino &&
527 args->root == BTRFS_I(inode)->root);
530 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
531 struct btrfs_root *root)
534 struct btrfs_iget_args args;
538 inode = iget5_locked(s, objectid, btrfs_find_actor,
539 btrfs_init_locked_inode,
544 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
545 struct nameidata *nd)
547 struct inode * inode;
548 struct btrfs_inode *bi = BTRFS_I(dir);
549 struct btrfs_root *root = bi->root;
550 struct btrfs_root *sub_root = root;
551 struct btrfs_key location;
554 if (dentry->d_name.len > BTRFS_NAME_LEN)
555 return ERR_PTR(-ENAMETOOLONG);
556 mutex_lock(&root->fs_info->fs_mutex);
557 ret = btrfs_inode_by_name(dir, dentry, &location);
558 mutex_unlock(&root->fs_info->fs_mutex);
562 if (location.objectid) {
563 ret = fixup_tree_root_location(root, &location, &sub_root);
567 return ERR_PTR(-ENOENT);
568 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
571 return ERR_PTR(-EACCES);
572 if (inode->i_state & I_NEW) {
573 if (sub_root != root) {
574 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
576 sub_root->inode = inode;
578 BTRFS_I(inode)->root = sub_root;
579 memcpy(&BTRFS_I(inode)->location, &location,
581 btrfs_read_locked_inode(inode);
582 unlock_new_inode(inode);
585 return d_splice_alias(inode, dentry);
588 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path)
590 struct btrfs_node *node;
600 node = btrfs_buffer_node(path->nodes[1]);
601 slot = path->slots[1];
602 objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key);
603 nritems = btrfs_header_nritems(&node->header);
604 for (i = slot; i < nritems; i++) {
605 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
606 if (item_objectid != objectid)
608 blocknr = btrfs_node_blockptr(node, i);
609 readahead_tree_block(root, blocknr);
613 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
615 struct inode *inode = filp->f_path.dentry->d_inode;
616 struct btrfs_root *root = BTRFS_I(inode)->root;
617 struct btrfs_item *item;
618 struct btrfs_dir_item *di;
619 struct btrfs_key key;
620 struct btrfs_path *path;
623 struct btrfs_leaf *leaf;
626 unsigned char d_type = DT_UNKNOWN;
631 int key_type = BTRFS_DIR_INDEX_KEY;
633 /* FIXME, use a real flag for deciding about the key type */
634 if (root->fs_info->tree_root == root)
635 key_type = BTRFS_DIR_ITEM_KEY;
636 mutex_lock(&root->fs_info->fs_mutex);
637 key.objectid = inode->i_ino;
639 btrfs_set_key_type(&key, key_type);
640 key.offset = filp->f_pos;
641 path = btrfs_alloc_path();
642 btrfs_init_path(path);
643 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
647 reada_leaves(root, path);
649 leaf = btrfs_buffer_leaf(path->nodes[0]);
650 nritems = btrfs_header_nritems(&leaf->header);
651 slot = path->slots[0];
652 if (advance || slot >= nritems) {
653 if (slot >= nritems -1) {
654 ret = btrfs_next_leaf(root, path);
657 leaf = btrfs_buffer_leaf(path->nodes[0]);
658 nritems = btrfs_header_nritems(&leaf->header);
659 slot = path->slots[0];
660 if (path->slots[1] == 0)
661 reada_leaves(root, path);
668 item = leaf->items + slot;
669 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
671 if (btrfs_disk_key_type(&item->key) != key_type)
673 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
675 filp->f_pos = btrfs_disk_key_offset(&item->key);
677 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
679 di_total = btrfs_item_size(leaf->items + slot);
680 while(di_cur < di_total) {
681 over = filldir(dirent, (const char *)(di + 1),
682 btrfs_dir_name_len(di),
683 btrfs_disk_key_offset(&item->key),
684 btrfs_disk_key_objectid(&di->location),
688 di_len = btrfs_dir_name_len(di) + sizeof(*di);
690 di = (struct btrfs_dir_item *)((char *)di + di_len);
697 btrfs_release_path(root, path);
698 btrfs_free_path(path);
699 mutex_unlock(&root->fs_info->fs_mutex);
703 static void btrfs_put_super (struct super_block * sb)
705 struct btrfs_root *root = btrfs_sb(sb);
708 ret = close_ctree(root);
710 printk("close ctree returns %d\n", ret);
712 sb->s_fs_info = NULL;
715 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
717 struct inode * inode;
718 struct dentry * root_dentry;
719 struct btrfs_super_block *disk_super;
720 struct btrfs_root *tree_root;
721 struct btrfs_inode *bi;
723 sb->s_maxbytes = MAX_LFS_FILESIZE;
724 sb->s_magic = BTRFS_SUPER_MAGIC;
725 sb->s_op = &btrfs_super_ops;
728 tree_root = open_ctree(sb);
731 printk("btrfs: open_ctree failed\n");
734 sb->s_fs_info = tree_root;
735 disk_super = tree_root->fs_info->disk_super;
736 printk("read in super total blocks %Lu root %Lu\n",
737 btrfs_super_total_blocks(disk_super),
738 btrfs_super_root_dir(disk_super));
740 inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
743 bi->location.objectid = inode->i_ino;
744 bi->location.offset = 0;
745 bi->location.flags = 0;
746 bi->root = tree_root;
747 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
751 if (inode->i_state & I_NEW) {
752 btrfs_read_locked_inode(inode);
753 unlock_new_inode(inode);
756 root_dentry = d_alloc_root(inode);
761 sb->s_root = root_dentry;
766 static int btrfs_write_inode(struct inode *inode, int wait)
768 struct btrfs_root *root = BTRFS_I(inode)->root;
769 struct btrfs_trans_handle *trans;
773 mutex_lock(&root->fs_info->fs_mutex);
774 trans = btrfs_start_transaction(root, 1);
775 btrfs_set_trans_block_group(trans, inode);
776 ret = btrfs_commit_transaction(trans, root);
777 mutex_unlock(&root->fs_info->fs_mutex);
782 static void btrfs_dirty_inode(struct inode *inode)
784 struct btrfs_root *root = BTRFS_I(inode)->root;
785 struct btrfs_trans_handle *trans;
787 mutex_lock(&root->fs_info->fs_mutex);
788 trans = btrfs_start_transaction(root, 1);
789 btrfs_set_trans_block_group(trans, inode);
790 btrfs_update_inode(trans, root, inode);
791 btrfs_end_transaction(trans, root);
792 mutex_unlock(&root->fs_info->fs_mutex);
795 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
796 struct btrfs_root *root,
798 struct btrfs_block_group_cache *group,
802 struct btrfs_inode_item inode_item;
803 struct btrfs_key *location;
806 inode = new_inode(root->fs_info->sb);
808 return ERR_PTR(-ENOMEM);
810 BTRFS_I(inode)->root = root;
811 group = btrfs_find_block_group(root, group, 0);
812 BTRFS_I(inode)->block_group = group;
814 inode->i_uid = current->fsuid;
815 inode->i_gid = current->fsgid;
816 inode->i_mode = mode;
817 inode->i_ino = objectid;
819 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
820 fill_inode_item(&inode_item, inode);
821 location = &BTRFS_I(inode)->location;
822 location->objectid = objectid;
824 location->offset = 0;
825 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
827 ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
830 insert_inode_hash(inode);
834 static int btrfs_add_link(struct btrfs_trans_handle *trans,
835 struct dentry *dentry, struct inode *inode)
838 struct btrfs_key key;
839 struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
840 key.objectid = inode->i_ino;
842 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
845 ret = btrfs_insert_dir_item(trans, root,
846 dentry->d_name.name, dentry->d_name.len,
847 dentry->d_parent->d_inode->i_ino,
850 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
851 ret = btrfs_update_inode(trans, root,
852 dentry->d_parent->d_inode);
857 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
858 struct dentry *dentry, struct inode *inode)
860 int err = btrfs_add_link(trans, dentry, inode);
862 d_instantiate(dentry, inode);
870 static int btrfs_create(struct inode *dir, struct dentry *dentry,
871 int mode, struct nameidata *nd)
873 struct btrfs_trans_handle *trans;
874 struct btrfs_root *root = BTRFS_I(dir)->root;
880 mutex_lock(&root->fs_info->fs_mutex);
881 trans = btrfs_start_transaction(root, 1);
882 btrfs_set_trans_block_group(trans, dir);
884 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
890 inode = btrfs_new_inode(trans, root, objectid,
891 BTRFS_I(dir)->block_group, mode);
892 err = PTR_ERR(inode);
896 btrfs_set_trans_block_group(trans, inode);
897 err = btrfs_add_nondir(trans, dentry, inode);
901 inode->i_mapping->a_ops = &btrfs_aops;
902 inode->i_fop = &btrfs_file_operations;
903 inode->i_op = &btrfs_file_inode_operations;
905 dir->i_sb->s_dirt = 1;
906 btrfs_update_inode_block_group(trans, inode);
907 btrfs_update_inode_block_group(trans, dir);
909 btrfs_end_transaction(trans, root);
910 mutex_unlock(&root->fs_info->fs_mutex);
913 inode_dec_link_count(inode);
919 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
920 struct btrfs_root *root,
921 u64 objectid, u64 dirid)
925 struct btrfs_key key;
930 key.objectid = objectid;
933 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
935 ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
939 key.objectid = dirid;
940 ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
948 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
951 struct btrfs_trans_handle *trans;
952 struct btrfs_root *root = BTRFS_I(dir)->root;
957 mutex_lock(&root->fs_info->fs_mutex);
958 trans = btrfs_start_transaction(root, 1);
959 btrfs_set_trans_block_group(trans, dir);
961 err = PTR_ERR(trans);
965 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
971 inode = btrfs_new_inode(trans, root, objectid,
972 BTRFS_I(dir)->block_group, S_IFDIR | mode);
974 err = PTR_ERR(inode);
978 inode->i_op = &btrfs_dir_inode_operations;
979 inode->i_fop = &btrfs_dir_file_operations;
980 btrfs_set_trans_block_group(trans, inode);
982 err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
987 err = btrfs_update_inode(trans, root, inode);
990 err = btrfs_add_link(trans, dentry, inode);
993 d_instantiate(dentry, inode);
995 dir->i_sb->s_dirt = 1;
996 btrfs_update_inode_block_group(trans, inode);
997 btrfs_update_inode_block_group(trans, dir);
1000 btrfs_end_transaction(trans, root);
1002 mutex_unlock(&root->fs_info->fs_mutex);
1008 static int btrfs_sync_file(struct file *file,
1009 struct dentry *dentry, int datasync)
1011 struct inode *inode = dentry->d_inode;
1012 struct btrfs_root *root = BTRFS_I(inode)->root;
1014 struct btrfs_trans_handle *trans;
1016 mutex_lock(&root->fs_info->fs_mutex);
1017 trans = btrfs_start_transaction(root, 1);
1022 ret = btrfs_commit_transaction(trans, root);
1023 mutex_unlock(&root->fs_info->fs_mutex);
1025 return ret > 0 ? EIO : ret;
1028 static int btrfs_sync_fs(struct super_block *sb, int wait)
1030 struct btrfs_trans_handle *trans;
1031 struct btrfs_root *root;
1033 root = btrfs_sb(sb);
1037 filemap_flush(root->fs_info->btree_inode->i_mapping);
1040 mutex_lock(&root->fs_info->fs_mutex);
1041 trans = btrfs_start_transaction(root, 1);
1042 ret = btrfs_commit_transaction(trans, root);
1045 printk("btrfs sync_fs\n");
1046 mutex_unlock(&root->fs_info->fs_mutex);
1050 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1051 struct buffer_head *result, int create)
1056 u64 extent_start = 0;
1058 u64 objectid = inode->i_ino;
1060 struct btrfs_path *path;
1061 struct btrfs_root *root = BTRFS_I(inode)->root;
1062 struct btrfs_file_extent_item *item;
1063 struct btrfs_leaf *leaf;
1064 struct btrfs_disk_key *found_key;
1066 path = btrfs_alloc_path();
1068 btrfs_init_path(path);
1073 ret = btrfs_lookup_file_extent(NULL, root, path,
1075 iblock << inode->i_blkbits, 0);
1082 if (path->slots[0] == 0) {
1083 btrfs_release_path(root, path);
1089 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1090 struct btrfs_file_extent_item);
1091 leaf = btrfs_buffer_leaf(path->nodes[0]);
1092 blocknr = btrfs_file_extent_disk_blocknr(item);
1093 blocknr += btrfs_file_extent_offset(item);
1095 /* are we inside the extent that was found? */
1096 found_key = &leaf->items[path->slots[0]].key;
1097 found_type = btrfs_disk_key_type(found_key);
1098 if (btrfs_disk_key_objectid(found_key) != objectid ||
1099 found_type != BTRFS_EXTENT_DATA_KEY) {
1102 btrfs_release_path(root, path);
1105 found_type = btrfs_file_extent_type(item);
1106 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1107 if (found_type == BTRFS_FILE_EXTENT_REG) {
1108 extent_start = extent_start >> inode->i_blkbits;
1109 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1110 if (iblock >= extent_start && iblock < extent_end) {
1112 btrfs_map_bh_to_logical(root, result, blocknr +
1113 iblock - extent_start);
1116 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1120 size = btrfs_file_extent_inline_len(leaf->items +
1122 extent_end = (extent_start + size) >> inode->i_blkbits;
1123 extent_start >>= inode->i_blkbits;
1124 if (iblock < extent_start || iblock > extent_end) {
1127 ptr = btrfs_file_extent_inline_start(item);
1128 map = kmap(result->b_page);
1129 memcpy(map, ptr, size);
1130 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1131 flush_dcache_page(result->b_page);
1132 kunmap(result->b_page);
1133 set_buffer_uptodate(result);
1134 SetPageChecked(result->b_page);
1135 btrfs_map_bh_to_logical(root, result, 0);
1138 btrfs_release_path(root, path);
1139 btrfs_free_path(path);
1143 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1144 struct buffer_head *result, int create)
1147 struct btrfs_root *root = BTRFS_I(inode)->root;
1148 mutex_lock(&root->fs_info->fs_mutex);
1149 err = btrfs_get_block_lock(inode, iblock, result, create);
1150 mutex_unlock(&root->fs_info->fs_mutex);
1154 static int btrfs_prepare_write(struct file *file, struct page *page,
1155 unsigned from, unsigned to)
1157 return nobh_prepare_write(page, from, to, btrfs_get_block);
1160 static void btrfs_write_super(struct super_block *sb)
1162 btrfs_sync_fs(sb, 1);
1165 static int btrfs_readpage(struct file *file, struct page *page)
1167 return mpage_readpage(page, btrfs_get_block);
1171 * While block_write_full_page is writing back the dirty buffers under
1172 * the page lock, whoever dirtied the buffers may decide to clean them
1173 * again at any time. We handle that by only looking at the buffer
1174 * state inside lock_buffer().
1176 * If block_write_full_page() is called for regular writeback
1177 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1178 * locked buffer. This only can happen if someone has written the buffer
1179 * directly, with submit_bh(). At the address_space level PageWriteback
1180 * prevents this contention from occurring.
1182 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1183 struct writeback_control *wbc)
1187 sector_t last_block;
1188 struct buffer_head *bh, *head;
1189 const unsigned blocksize = 1 << inode->i_blkbits;
1190 int nr_underway = 0;
1192 BUG_ON(!PageLocked(page));
1194 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1196 if (!page_has_buffers(page)) {
1197 create_empty_buffers(page, blocksize,
1198 (1 << BH_Dirty)|(1 << BH_Uptodate));
1202 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1203 * here, and the (potentially unmapped) buffers may become dirty at
1204 * any time. If a buffer becomes dirty here after we've inspected it
1205 * then we just miss that fact, and the page stays dirty.
1207 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1208 * handle that here by just cleaning them.
1211 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1212 head = page_buffers(page);
1216 * Get all the dirty buffers mapped to disk addresses and
1217 * handle any aliases from the underlying blockdev's mapping.
1220 if (block > last_block) {
1222 * mapped buffers outside i_size will occur, because
1223 * this page can be outside i_size when there is a
1224 * truncate in progress.
1227 * The buffer was zeroed by block_write_full_page()
1229 clear_buffer_dirty(bh);
1230 set_buffer_uptodate(bh);
1231 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1232 WARN_ON(bh->b_size != blocksize);
1233 err = btrfs_get_block(inode, block, bh, 0);
1236 if (buffer_new(bh)) {
1237 /* blockdev mappings never come here */
1238 clear_buffer_new(bh);
1239 unmap_underlying_metadata(bh->b_bdev,
1243 bh = bh->b_this_page;
1245 } while (bh != head);
1248 if (!buffer_mapped(bh))
1251 * If it's a fully non-blocking write attempt and we cannot
1252 * lock the buffer then redirty the page. Note that this can
1253 * potentially cause a busy-wait loop from pdflush and kswapd
1254 * activity, but those code paths have their own higher-level
1257 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1259 } else if (test_set_buffer_locked(bh)) {
1260 redirty_page_for_writepage(wbc, page);
1263 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1264 mark_buffer_async_write(bh);
1268 } while ((bh = bh->b_this_page) != head);
1271 * The page and its buffers are protected by PageWriteback(), so we can
1272 * drop the bh refcounts early.
1274 BUG_ON(PageWriteback(page));
1275 set_page_writeback(page);
1278 struct buffer_head *next = bh->b_this_page;
1279 if (buffer_async_write(bh)) {
1280 submit_bh(WRITE, bh);
1284 } while (bh != head);
1289 if (nr_underway == 0) {
1291 * The page was marked dirty, but the buffers were
1292 * clean. Someone wrote them back by hand with
1293 * ll_rw_block/submit_bh. A rare case.
1297 if (!buffer_uptodate(bh)) {
1301 bh = bh->b_this_page;
1302 } while (bh != head);
1304 SetPageUptodate(page);
1305 end_page_writeback(page);
1307 * The page and buffer_heads can be released at any time from
1310 wbc->pages_skipped++; /* We didn't write this page */
1316 * ENOSPC, or some other error. We may already have added some
1317 * blocks to the file, so we need to write these out to avoid
1318 * exposing stale data.
1319 * The page is currently locked and not marked for writeback
1322 /* Recovery: lock and submit the mapped buffers */
1324 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1326 mark_buffer_async_write(bh);
1329 * The buffer may have been set dirty during
1330 * attachment to a dirty page.
1332 clear_buffer_dirty(bh);
1334 } while ((bh = bh->b_this_page) != head);
1336 BUG_ON(PageWriteback(page));
1337 set_page_writeback(page);
1339 struct buffer_head *next = bh->b_this_page;
1340 if (buffer_async_write(bh)) {
1341 clear_buffer_dirty(bh);
1342 submit_bh(WRITE, bh);
1346 } while (bh != head);
1352 * The generic ->writepage function for buffer-backed address_spaces
1354 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1356 struct inode * const inode = page->mapping->host;
1357 loff_t i_size = i_size_read(inode);
1358 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1362 /* Is the page fully inside i_size? */
1363 if (page->index < end_index)
1364 return __btrfs_write_full_page(inode, page, wbc);
1366 /* Is the page fully outside i_size? (truncate in progress) */
1367 offset = i_size & (PAGE_CACHE_SIZE-1);
1368 if (page->index >= end_index+1 || !offset) {
1370 * The page may have dirty, unmapped buffers. For example,
1371 * they may have been added in ext3_writepage(). Make them
1372 * freeable here, so the page does not leak.
1374 block_invalidatepage(page, 0);
1376 return 0; /* don't care */
1380 * The page straddles i_size. It must be zeroed out on each and every
1381 * writepage invokation because it may be mmapped. "A file is mapped
1382 * in multiples of the page size. For a file that is not a multiple of
1383 * the page size, the remaining memory is zeroed when mapped, and
1384 * writes to that region are not written out to the file."
1386 kaddr = kmap_atomic(page, KM_USER0);
1387 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1388 flush_dcache_page(page);
1389 kunmap_atomic(kaddr, KM_USER0);
1390 return __btrfs_write_full_page(inode, page, wbc);
1393 static void btrfs_truncate(struct inode *inode)
1395 struct btrfs_root *root = BTRFS_I(inode)->root;
1397 struct btrfs_trans_handle *trans;
1399 if (!S_ISREG(inode->i_mode))
1401 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1404 nobh_truncate_page(inode->i_mapping, inode->i_size);
1406 /* FIXME, add redo link to tree so we don't leak on crash */
1407 mutex_lock(&root->fs_info->fs_mutex);
1408 trans = btrfs_start_transaction(root, 1);
1409 btrfs_set_trans_block_group(trans, inode);
1410 ret = btrfs_truncate_in_trans(trans, root, inode);
1412 ret = btrfs_end_transaction(trans, root);
1414 mutex_unlock(&root->fs_info->fs_mutex);
1415 mark_inode_dirty(inode);
1419 * Make sure any changes to nobh_commit_write() are reflected in
1420 * nobh_truncate_page(), since it doesn't call commit_write().
1422 static int btrfs_commit_write(struct file *file, struct page *page,
1423 unsigned from, unsigned to)
1425 struct inode *inode = page->mapping->host;
1426 struct buffer_head *bh;
1427 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1429 SetPageUptodate(page);
1430 bh = page_buffers(page);
1431 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1432 set_page_dirty(page);
1434 if (pos > inode->i_size) {
1435 i_size_write(inode, pos);
1436 mark_inode_dirty(inode);
1441 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1442 struct page **prepared_pages,
1443 const char __user * buf)
1445 long page_fault = 0;
1447 int offset = pos & (PAGE_CACHE_SIZE - 1);
1449 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1450 size_t count = min_t(size_t,
1451 PAGE_CACHE_SIZE - offset, write_bytes);
1452 struct page *page = prepared_pages[i];
1453 fault_in_pages_readable(buf, count);
1455 /* Copy data from userspace to the current page */
1457 page_fault = __copy_from_user(page_address(page) + offset,
1459 /* Flush processor's dcache for this page */
1460 flush_dcache_page(page);
1463 write_bytes -= count;
1468 return page_fault ? -EFAULT : 0;
1471 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1474 for (i = 0; i < num_pages; i++) {
1477 unlock_page(pages[i]);
1478 mark_page_accessed(pages[i]);
1479 page_cache_release(pages[i]);
1482 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1483 struct btrfs_root *root,
1485 struct page **pages,
1495 struct inode *inode = file->f_path.dentry->d_inode;
1496 struct buffer_head *bh;
1497 struct btrfs_file_extent_item *ei;
1499 for (i = 0; i < num_pages; i++) {
1500 offset = pos & (PAGE_CACHE_SIZE -1);
1501 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1502 /* FIXME, one block at a time */
1504 mutex_lock(&root->fs_info->fs_mutex);
1505 trans = btrfs_start_transaction(root, 1);
1506 btrfs_set_trans_block_group(trans, inode);
1508 bh = page_buffers(pages[i]);
1509 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1510 struct btrfs_key key;
1511 struct btrfs_path *path;
1515 path = btrfs_alloc_path();
1517 key.objectid = inode->i_ino;
1518 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1520 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1521 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1523 btrfs_file_extent_calc_inline_size(write_bytes);
1524 ret = btrfs_insert_empty_item(trans, root, path, &key,
1527 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1528 path->slots[0], struct btrfs_file_extent_item);
1529 btrfs_set_file_extent_generation(ei, trans->transid);
1530 btrfs_set_file_extent_type(ei,
1531 BTRFS_FILE_EXTENT_INLINE);
1532 ptr = btrfs_file_extent_inline_start(ei);
1533 memcpy(ptr, bh->b_data, offset + write_bytes);
1534 mark_buffer_dirty(path->nodes[0]);
1535 btrfs_free_path(path);
1537 btrfs_csum_file_block(trans, root, inode->i_ino,
1538 pages[i]->index << PAGE_CACHE_SHIFT,
1539 kmap(pages[i]), PAGE_CACHE_SIZE);
1542 SetPageChecked(pages[i]);
1543 btrfs_update_inode_block_group(trans, inode);
1544 ret = btrfs_end_transaction(trans, root);
1546 mutex_unlock(&root->fs_info->fs_mutex);
1548 ret = btrfs_commit_write(file, pages[i], offset,
1549 offset + this_write);
1555 WARN_ON(this_write > write_bytes);
1556 write_bytes -= this_write;
1562 static int drop_extents(struct btrfs_trans_handle *trans,
1563 struct btrfs_root *root,
1564 struct inode *inode,
1568 struct btrfs_key key;
1569 struct btrfs_leaf *leaf;
1571 struct btrfs_file_extent_item *extent;
1574 struct btrfs_file_extent_item old;
1575 struct btrfs_path *path;
1576 u64 search_start = start;
1582 path = btrfs_alloc_path();
1586 btrfs_release_path(root, path);
1587 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1592 if (path->slots[0] == 0) {
1603 leaf = btrfs_buffer_leaf(path->nodes[0]);
1604 slot = path->slots[0];
1605 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1606 if (key.offset >= end || key.objectid != inode->i_ino) {
1610 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1614 extent = btrfs_item_ptr(leaf, slot,
1615 struct btrfs_file_extent_item);
1616 found_type = btrfs_file_extent_type(extent);
1617 if (found_type == BTRFS_FILE_EXTENT_REG) {
1618 extent_end = key.offset +
1619 (btrfs_file_extent_num_blocks(extent) <<
1622 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1624 extent_end = key.offset +
1625 btrfs_file_extent_inline_len(leaf->items + slot);
1628 if (!found_extent && !found_inline) {
1633 if (search_start >= extent_end) {
1638 search_start = extent_end;
1640 if (end < extent_end && end >= key.offset) {
1642 memcpy(&old, extent, sizeof(old));
1643 ret = btrfs_inc_extent_ref(trans, root,
1644 btrfs_file_extent_disk_blocknr(&old),
1645 btrfs_file_extent_disk_num_blocks(&old));
1648 WARN_ON(found_inline);
1652 if (start > key.offset) {
1655 /* truncate existing extent */
1657 WARN_ON(start & (root->blocksize - 1));
1659 new_num = (start - key.offset) >>
1661 old_num = btrfs_file_extent_num_blocks(extent);
1662 inode->i_blocks -= (old_num - new_num) << 3;
1663 btrfs_set_file_extent_num_blocks(extent,
1665 mark_buffer_dirty(path->nodes[0]);
1669 ret = btrfs_truncate_item(trans, root, path,
1670 start - key.offset);
1676 u64 disk_blocknr = 0;
1677 u64 disk_num_blocks = 0;
1678 u64 extent_num_blocks = 0;
1681 btrfs_file_extent_disk_blocknr(extent);
1683 btrfs_file_extent_disk_num_blocks(extent);
1685 btrfs_file_extent_num_blocks(extent);
1687 ret = btrfs_del_item(trans, root, path);
1689 btrfs_release_path(root, path);
1692 btrfs_file_extent_num_blocks(extent) << 3;
1693 ret = btrfs_free_extent(trans, root,
1695 disk_num_blocks, 0);
1699 if (!bookend && search_start >= end) {
1706 if (bookend && found_extent) {
1707 /* create bookend */
1708 struct btrfs_key ins;
1709 ins.objectid = inode->i_ino;
1712 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1714 btrfs_release_path(root, path);
1715 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1718 extent = btrfs_item_ptr(
1719 btrfs_buffer_leaf(path->nodes[0]),
1721 struct btrfs_file_extent_item);
1722 btrfs_set_file_extent_disk_blocknr(extent,
1723 btrfs_file_extent_disk_blocknr(&old));
1724 btrfs_set_file_extent_disk_num_blocks(extent,
1725 btrfs_file_extent_disk_num_blocks(&old));
1727 btrfs_set_file_extent_offset(extent,
1728 btrfs_file_extent_offset(&old) +
1729 ((end - key.offset) >> inode->i_blkbits));
1730 WARN_ON(btrfs_file_extent_num_blocks(&old) <
1731 (end - key.offset) >> inode->i_blkbits);
1732 btrfs_set_file_extent_num_blocks(extent,
1733 btrfs_file_extent_num_blocks(&old) -
1734 ((end - key.offset) >> inode->i_blkbits));
1736 btrfs_set_file_extent_type(extent,
1737 BTRFS_FILE_EXTENT_REG);
1738 btrfs_set_file_extent_generation(extent,
1739 btrfs_file_extent_generation(&old));
1740 btrfs_mark_buffer_dirty(path->nodes[0]);
1742 btrfs_file_extent_num_blocks(extent) << 3;
1748 btrfs_free_path(path);
1752 static int prepare_pages(struct btrfs_root *root,
1754 struct page **pages,
1757 unsigned long first_index,
1758 unsigned long last_index,
1760 u64 alloc_extent_start)
1763 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1764 struct inode *inode = file->f_path.dentry->d_inode;
1768 struct buffer_head *bh;
1769 struct buffer_head *head;
1770 loff_t isize = i_size_read(inode);
1772 memset(pages, 0, num_pages * sizeof(struct page *));
1774 for (i = 0; i < num_pages; i++) {
1775 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1778 goto failed_release;
1780 offset = pos & (PAGE_CACHE_SIZE -1);
1781 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1782 create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
1783 (1 << BH_Uptodate));
1784 head = page_buffers(pages[i]);
1787 err = btrfs_map_bh_to_logical(root, bh,
1788 alloc_extent_start);
1791 goto failed_truncate;
1792 bh = bh->b_this_page;
1793 if (alloc_extent_start)
1794 alloc_extent_start++;
1795 } while (bh != head);
1797 WARN_ON(this_write > write_bytes);
1798 write_bytes -= this_write;
1803 btrfs_drop_pages(pages, num_pages);
1807 btrfs_drop_pages(pages, num_pages);
1809 vmtruncate(inode, isize);
1813 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1814 size_t count, loff_t *ppos)
1817 size_t num_written = 0;
1820 struct inode *inode = file->f_path.dentry->d_inode;
1821 struct btrfs_root *root = BTRFS_I(inode)->root;
1822 struct page *pages[8];
1823 struct page *pinned[2] = { NULL, NULL };
1824 unsigned long first_index;
1825 unsigned long last_index;
1828 u64 alloc_extent_start;
1829 struct btrfs_trans_handle *trans;
1830 struct btrfs_key ins;
1832 if (file->f_flags & O_DIRECT)
1835 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1836 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1837 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1842 err = remove_suid(file->f_path.dentry);
1845 file_update_time(file);
1847 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1848 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1851 mutex_lock(&inode->i_mutex);
1852 first_index = pos >> PAGE_CACHE_SHIFT;
1853 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1855 if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1856 (pos & (PAGE_CACHE_SIZE - 1))) {
1857 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1858 if (!PageUptodate(pinned[0])) {
1859 ret = mpage_readpage(pinned[0], btrfs_get_block);
1862 unlock_page(pinned[0]);
1865 if (first_index != last_index &&
1866 (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1867 (count & (PAGE_CACHE_SIZE - 1))) {
1868 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1869 if (!PageUptodate(pinned[1])) {
1870 ret = mpage_readpage(pinned[1], btrfs_get_block);
1873 unlock_page(pinned[1]);
1877 mutex_lock(&root->fs_info->fs_mutex);
1878 trans = btrfs_start_transaction(root, 1);
1881 mutex_unlock(&root->fs_info->fs_mutex);
1884 btrfs_set_trans_block_group(trans, inode);
1885 /* FIXME blocksize != 4096 */
1886 inode->i_blocks += num_blocks << 3;
1887 if (start_pos < inode->i_size) {
1888 /* FIXME blocksize != pagesize */
1889 ret = drop_extents(trans, root, inode,
1891 (pos + count + root->blocksize -1) &
1892 ~((u64)root->blocksize - 1));
1895 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1896 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1897 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1898 num_blocks, 1, (u64)-1, &ins);
1900 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1901 start_pos, ins.objectid, ins.offset);
1908 alloc_extent_start = ins.objectid;
1909 btrfs_update_inode_block_group(trans, inode);
1910 ret = btrfs_end_transaction(trans, root);
1911 mutex_unlock(&root->fs_info->fs_mutex);
1914 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1915 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1916 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1919 memset(pages, 0, sizeof(pages));
1920 ret = prepare_pages(root, file, pages, num_pages,
1921 pos, first_index, last_index,
1922 write_bytes, alloc_extent_start);
1925 /* FIXME blocks != pagesize */
1926 if (alloc_extent_start)
1927 alloc_extent_start += num_pages;
1928 ret = btrfs_copy_from_user(pos, num_pages,
1929 write_bytes, pages, buf);
1932 ret = dirty_and_release_pages(NULL, root, file, pages,
1933 num_pages, pos, write_bytes);
1935 btrfs_drop_pages(pages, num_pages);
1938 count -= write_bytes;
1940 num_written += write_bytes;
1942 balance_dirty_pages_ratelimited(inode->i_mapping);
1946 mutex_unlock(&inode->i_mutex);
1949 page_cache_release(pinned[0]);
1951 page_cache_release(pinned[1]);
1953 current->backing_dev_info = NULL;
1954 mark_inode_dirty(inode);
1955 return num_written ? num_written : err;
1958 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1959 unsigned long offset, unsigned long size)
1962 unsigned long left, count = desc->count;
1963 struct inode *inode = page->mapping->host;
1968 if (!PageChecked(page)) {
1969 /* FIXME, do it per block */
1970 struct btrfs_root *root = BTRFS_I(inode)->root;
1972 int ret = btrfs_csum_verify_file_block(root,
1973 page->mapping->host->i_ino,
1974 page->index << PAGE_CACHE_SHIFT,
1975 kmap(page), PAGE_CACHE_SIZE);
1977 printk("failed to verify ino %lu page %lu\n",
1978 page->mapping->host->i_ino,
1980 memset(page_address(page), 0, PAGE_CACHE_SIZE);
1982 SetPageChecked(page);
1986 * Faults on the destination of a read are common, so do it before
1989 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1990 kaddr = kmap_atomic(page, KM_USER0);
1991 left = __copy_to_user_inatomic(desc->arg.buf,
1992 kaddr + offset, size);
1993 kunmap_atomic(kaddr, KM_USER0);
1998 /* Do it the slow way */
2000 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2005 desc->error = -EFAULT;
2008 desc->count = count - size;
2009 desc->written += size;
2010 desc->arg.buf += size;
2015 * btrfs_file_aio_read - filesystem read routine
2016 * @iocb: kernel I/O control block
2017 * @iov: io vector request
2018 * @nr_segs: number of segments in the iovec
2019 * @pos: current file position
2021 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2022 unsigned long nr_segs, loff_t pos)
2024 struct file *filp = iocb->ki_filp;
2028 loff_t *ppos = &iocb->ki_pos;
2031 for (seg = 0; seg < nr_segs; seg++) {
2032 const struct iovec *iv = &iov[seg];
2035 * If any segment has a negative length, or the cumulative
2036 * length ever wraps negative then return -EINVAL.
2038 count += iv->iov_len;
2039 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2041 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2046 count -= iv->iov_len; /* This segment is no good */
2051 for (seg = 0; seg < nr_segs; seg++) {
2052 read_descriptor_t desc;
2055 desc.arg.buf = iov[seg].iov_base;
2056 desc.count = iov[seg].iov_len;
2057 if (desc.count == 0)
2060 do_generic_file_read(filp, ppos, &desc,
2062 retval += desc.written;
2064 retval = retval ?: desc.error;
2072 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2074 struct btrfs_trans_handle *trans;
2075 struct btrfs_key key;
2076 struct btrfs_root_item root_item;
2077 struct btrfs_inode_item *inode_item;
2078 struct buffer_head *subvol;
2079 struct btrfs_leaf *leaf;
2080 struct btrfs_root *new_root;
2081 struct inode *inode;
2085 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2087 mutex_lock(&root->fs_info->fs_mutex);
2088 trans = btrfs_start_transaction(root, 1);
2091 subvol = btrfs_alloc_free_block(trans, root, 0);
2094 leaf = btrfs_buffer_leaf(subvol);
2095 btrfs_set_header_nritems(&leaf->header, 0);
2096 btrfs_set_header_level(&leaf->header, 0);
2097 btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2098 btrfs_set_header_generation(&leaf->header, trans->transid);
2099 btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2100 memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2101 sizeof(leaf->header.fsid));
2102 mark_buffer_dirty(subvol);
2104 inode_item = &root_item.inode;
2105 memset(inode_item, 0, sizeof(*inode_item));
2106 btrfs_set_inode_generation(inode_item, 1);
2107 btrfs_set_inode_size(inode_item, 3);
2108 btrfs_set_inode_nlink(inode_item, 1);
2109 btrfs_set_inode_nblocks(inode_item, 1);
2110 btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2112 btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2113 btrfs_set_root_refs(&root_item, 1);
2117 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2121 btrfs_set_root_dirid(&root_item, new_dirid);
2123 key.objectid = objectid;
2126 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2127 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2132 * insert the directory item
2134 key.offset = (u64)-1;
2135 dir = root->fs_info->sb->s_root->d_inode;
2136 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2137 name, namelen, dir->i_ino, &key, 0);
2140 ret = btrfs_commit_transaction(trans, root);
2143 new_root = btrfs_read_fs_root(root->fs_info, &key);
2146 trans = btrfs_start_transaction(new_root, 1);
2149 inode = btrfs_new_inode(trans, new_root, new_dirid,
2150 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2151 inode->i_op = &btrfs_dir_inode_operations;
2152 inode->i_fop = &btrfs_dir_file_operations;
2154 ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2159 ret = btrfs_update_inode(trans, new_root, inode);
2162 ret = btrfs_commit_transaction(trans, new_root);
2167 mutex_unlock(&root->fs_info->fs_mutex);
2171 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2173 struct btrfs_trans_handle *trans;
2174 struct btrfs_key key;
2175 struct btrfs_root_item new_root_item;
2179 if (!root->ref_cows)
2182 mutex_lock(&root->fs_info->fs_mutex);
2183 trans = btrfs_start_transaction(root, 1);
2186 ret = btrfs_update_inode(trans, root, root->inode);
2189 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2193 memcpy(&new_root_item, &root->root_item,
2194 sizeof(new_root_item));
2196 key.objectid = objectid;
2199 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2200 btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2202 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2207 * insert the directory item
2209 key.offset = (u64)-1;
2210 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2212 root->fs_info->sb->s_root->d_inode->i_ino,
2217 ret = btrfs_inc_root_ref(trans, root);
2220 ret = btrfs_commit_transaction(trans, root);
2222 mutex_unlock(&root->fs_info->fs_mutex);
2226 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2228 struct block_device *bdev;
2229 struct btrfs_path *path;
2230 struct super_block *sb = root->fs_info->sb;
2231 struct btrfs_root *dev_root = root->fs_info->dev_root;
2232 struct btrfs_trans_handle *trans;
2233 struct btrfs_device_item *dev_item;
2234 struct btrfs_key key;
2241 printk("adding disk %s\n", name);
2242 path = btrfs_alloc_path();
2245 num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2246 bdev = open_bdev_excl(name, O_RDWR, sb);
2248 ret = PTR_ERR(bdev);
2249 printk("open bdev excl failed ret %d\n", ret);
2252 set_blocksize(bdev, sb->s_blocksize);
2253 new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2254 key.objectid = num_blocks;
2255 key.offset = new_blocks;
2257 btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2259 mutex_lock(&dev_root->fs_info->fs_mutex);
2260 trans = btrfs_start_transaction(dev_root, 1);
2261 item_size = sizeof(*dev_item) + namelen;
2262 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2263 ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2265 printk("insert failed %d\n", ret);
2266 close_bdev_excl(bdev);
2271 dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2272 path->slots[0], struct btrfs_device_item);
2273 btrfs_set_device_pathlen(dev_item, namelen);
2274 memcpy(dev_item + 1, name, namelen);
2276 device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2277 btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2278 btrfs_set_device_id(dev_item, device_id);
2279 mark_buffer_dirty(path->nodes[0]);
2281 ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2285 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2286 num_blocks + new_blocks);
2287 i_size_write(root->fs_info->btree_inode,
2288 (num_blocks + new_blocks) <<
2289 root->fs_info->btree_inode->i_blkbits);
2293 ret = btrfs_commit_transaction(trans, dev_root);
2295 mutex_unlock(&root->fs_info->fs_mutex);
2297 btrfs_free_path(path);
2302 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2303 cmd, unsigned long arg)
2305 struct btrfs_root *root = BTRFS_I(inode)->root;
2306 struct btrfs_ioctl_vol_args vol_args;
2308 struct btrfs_dir_item *di;
2310 struct btrfs_path *path;
2314 case BTRFS_IOC_SNAP_CREATE:
2315 if (copy_from_user(&vol_args,
2316 (struct btrfs_ioctl_vol_args __user *)arg,
2319 namelen = strlen(vol_args.name);
2320 if (namelen > BTRFS_VOL_NAME_MAX)
2322 path = btrfs_alloc_path();
2325 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2326 mutex_lock(&root->fs_info->fs_mutex);
2327 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2329 vol_args.name, namelen, 0);
2330 mutex_unlock(&root->fs_info->fs_mutex);
2331 btrfs_free_path(path);
2332 if (di && !IS_ERR(di))
2335 if (root == root->fs_info->tree_root)
2336 ret = create_subvol(root, vol_args.name, namelen);
2338 ret = create_snapshot(root, vol_args.name, namelen);
2341 case BTRFS_IOC_ADD_DISK:
2342 if (copy_from_user(&vol_args,
2343 (struct btrfs_ioctl_vol_args __user *)arg,
2346 namelen = strlen(vol_args.name);
2347 if (namelen > BTRFS_VOL_NAME_MAX)
2349 vol_args.name[namelen] = '\0';
2350 ret = add_disk(root, vol_args.name, namelen);
2358 static struct kmem_cache *btrfs_inode_cachep;
2359 struct kmem_cache *btrfs_trans_handle_cachep;
2360 struct kmem_cache *btrfs_transaction_cachep;
2361 struct kmem_cache *btrfs_bit_radix_cachep;
2362 struct kmem_cache *btrfs_path_cachep;
2365 * Called inside transaction, so use GFP_NOFS
2367 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2369 struct btrfs_inode *ei;
2371 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2374 return &ei->vfs_inode;
2377 static void btrfs_destroy_inode(struct inode *inode)
2379 WARN_ON(!list_empty(&inode->i_dentry));
2380 WARN_ON(inode->i_data.nrpages);
2382 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2385 static void init_once(void * foo, struct kmem_cache * cachep,
2386 unsigned long flags)
2388 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2390 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2391 SLAB_CTOR_CONSTRUCTOR) {
2392 inode_init_once(&ei->vfs_inode);
2396 static int init_inodecache(void)
2398 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2399 sizeof(struct btrfs_inode),
2400 0, (SLAB_RECLAIM_ACCOUNT|
2403 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2404 sizeof(struct btrfs_trans_handle),
2405 0, (SLAB_RECLAIM_ACCOUNT|
2408 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2409 sizeof(struct btrfs_transaction),
2410 0, (SLAB_RECLAIM_ACCOUNT|
2413 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2414 sizeof(struct btrfs_transaction),
2415 0, (SLAB_RECLAIM_ACCOUNT|
2418 btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2420 0, (SLAB_RECLAIM_ACCOUNT|
2422 SLAB_DESTROY_BY_RCU),
2424 if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2425 btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2430 static void destroy_inodecache(void)
2432 kmem_cache_destroy(btrfs_inode_cachep);
2433 kmem_cache_destroy(btrfs_trans_handle_cachep);
2434 kmem_cache_destroy(btrfs_transaction_cachep);
2435 kmem_cache_destroy(btrfs_bit_radix_cachep);
2436 kmem_cache_destroy(btrfs_path_cachep);
2439 static int btrfs_get_sb(struct file_system_type *fs_type,
2440 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2442 return get_sb_bdev(fs_type, flags, dev_name, data,
2443 btrfs_fill_super, mnt);
2447 static int btrfs_getattr(struct vfsmount *mnt,
2448 struct dentry *dentry, struct kstat *stat)
2450 struct inode *inode = dentry->d_inode;
2451 generic_fillattr(inode, stat);
2452 stat->blksize = 256 * 1024;
2456 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2458 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2459 struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2461 buf->f_namelen = BTRFS_NAME_LEN;
2462 buf->f_blocks = btrfs_super_total_blocks(disk_super);
2463 buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2464 buf->f_bavail = buf->f_bfree;
2465 buf->f_bsize = dentry->d_sb->s_blocksize;
2466 buf->f_type = BTRFS_SUPER_MAGIC;
2470 static struct file_system_type btrfs_fs_type = {
2471 .owner = THIS_MODULE,
2473 .get_sb = btrfs_get_sb,
2474 .kill_sb = kill_block_super,
2475 .fs_flags = FS_REQUIRES_DEV,
2478 static struct super_operations btrfs_super_ops = {
2479 .delete_inode = btrfs_delete_inode,
2480 .put_super = btrfs_put_super,
2481 .read_inode = btrfs_read_locked_inode,
2482 .write_super = btrfs_write_super,
2483 .sync_fs = btrfs_sync_fs,
2484 .write_inode = btrfs_write_inode,
2485 .dirty_inode = btrfs_dirty_inode,
2486 .alloc_inode = btrfs_alloc_inode,
2487 .destroy_inode = btrfs_destroy_inode,
2488 .statfs = btrfs_statfs,
2491 static struct inode_operations btrfs_dir_inode_operations = {
2492 .lookup = btrfs_lookup,
2493 .create = btrfs_create,
2494 .unlink = btrfs_unlink,
2495 .mkdir = btrfs_mkdir,
2496 .rmdir = btrfs_rmdir,
2499 static struct inode_operations btrfs_dir_ro_inode_operations = {
2500 .lookup = btrfs_lookup,
2503 static struct file_operations btrfs_dir_file_operations = {
2504 .llseek = generic_file_llseek,
2505 .read = generic_read_dir,
2506 .readdir = btrfs_readdir,
2507 .ioctl = btrfs_ioctl,
2510 static struct address_space_operations btrfs_aops = {
2511 .readpage = btrfs_readpage,
2512 .writepage = btrfs_writepage,
2513 .sync_page = block_sync_page,
2514 .prepare_write = btrfs_prepare_write,
2515 .commit_write = btrfs_commit_write,
2518 static struct inode_operations btrfs_file_inode_operations = {
2519 .truncate = btrfs_truncate,
2520 .getattr = btrfs_getattr,
2523 static struct file_operations btrfs_file_operations = {
2524 .llseek = generic_file_llseek,
2525 .read = do_sync_read,
2526 .aio_read = btrfs_file_aio_read,
2527 .write = btrfs_file_write,
2528 .mmap = generic_file_mmap,
2529 .open = generic_file_open,
2530 .ioctl = btrfs_ioctl,
2531 .fsync = btrfs_sync_file,
2534 static int __init init_btrfs_fs(void)
2537 printk("btrfs loaded!\n");
2538 err = init_inodecache();
2541 kset_set_kset_s(&btrfs_subsys, fs_subsys);
2542 err = subsystem_register(&btrfs_subsys);
2545 return register_filesystem(&btrfs_fs_type);
2547 destroy_inodecache();
2551 static void __exit exit_btrfs_fs(void)
2553 destroy_inodecache();
2554 unregister_filesystem(&btrfs_fs_type);
2555 subsystem_unregister(&btrfs_subsys);
2556 printk("btrfs unloaded\n");
2559 module_init(init_btrfs_fs)
2560 module_exit(exit_btrfs_fs)
2562 MODULE_LICENSE("GPL");