1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
17 #include "transaction.h"
18 #include "btrfs_inode.h"
21 void btrfs_fsinfo_release(struct kobject *obj)
23 struct btrfs_fs_info *fsinfo = container_of(obj,
24 struct btrfs_fs_info, kobj);
28 struct kobj_type btrfs_fsinfo_ktype = {
29 .release = btrfs_fsinfo_release,
32 struct btrfs_iget_args {
34 struct btrfs_root *root;
37 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
39 #define BTRFS_SUPER_MAGIC 0x9123682E
41 static struct inode_operations btrfs_dir_inode_operations;
42 static struct inode_operations btrfs_dir_ro_inode_operations;
43 static struct super_operations btrfs_super_ops;
44 static struct file_operations btrfs_dir_file_operations;
45 static struct inode_operations btrfs_file_inode_operations;
46 static struct address_space_operations btrfs_aops;
47 static struct file_operations btrfs_file_operations;
49 static void btrfs_read_locked_inode(struct inode *inode)
51 struct btrfs_path *path;
52 struct btrfs_inode_item *inode_item;
53 struct btrfs_root *root = BTRFS_I(inode)->root;
54 struct btrfs_key location;
55 struct btrfs_block_group_cache *alloc_group;
56 u64 alloc_group_block;
59 path = btrfs_alloc_path();
61 btrfs_init_path(path);
62 mutex_lock(&root->fs_info->fs_mutex);
64 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
65 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
67 btrfs_free_path(path);
70 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
72 struct btrfs_inode_item);
74 inode->i_mode = btrfs_inode_mode(inode_item);
75 inode->i_nlink = btrfs_inode_nlink(inode_item);
76 inode->i_uid = btrfs_inode_uid(inode_item);
77 inode->i_gid = btrfs_inode_gid(inode_item);
78 inode->i_size = btrfs_inode_size(inode_item);
79 inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
80 inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
81 inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
82 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
83 inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
84 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
85 inode->i_blocks = btrfs_inode_nblocks(inode_item);
86 inode->i_generation = btrfs_inode_generation(inode_item);
87 alloc_group_block = btrfs_inode_block_group(inode_item);
88 ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
89 (void **)&alloc_group,
90 alloc_group_block, 1);
92 BTRFS_I(inode)->block_group = alloc_group;
94 btrfs_free_path(path);
97 mutex_unlock(&root->fs_info->fs_mutex);
99 switch (inode->i_mode & S_IFMT) {
102 init_special_inode(inode, inode->i_mode,
103 btrfs_inode_rdev(inode_item));
107 inode->i_mapping->a_ops = &btrfs_aops;
108 inode->i_fop = &btrfs_file_operations;
109 inode->i_op = &btrfs_file_inode_operations;
112 inode->i_fop = &btrfs_dir_file_operations;
113 if (root == root->fs_info->tree_root)
114 inode->i_op = &btrfs_dir_ro_inode_operations;
116 inode->i_op = &btrfs_dir_inode_operations;
119 // inode->i_op = &page_symlink_inode_operations;
125 btrfs_release_path(root, path);
126 btrfs_free_path(path);
127 mutex_unlock(&root->fs_info->fs_mutex);
128 make_bad_inode(inode);
131 static void fill_inode_item(struct btrfs_inode_item *item,
134 btrfs_set_inode_uid(item, inode->i_uid);
135 btrfs_set_inode_gid(item, inode->i_gid);
136 btrfs_set_inode_size(item, inode->i_size);
137 btrfs_set_inode_mode(item, inode->i_mode);
138 btrfs_set_inode_nlink(item, inode->i_nlink);
139 btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
140 btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
141 btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
142 btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
143 btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
144 btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
145 btrfs_set_inode_nblocks(item, inode->i_blocks);
146 btrfs_set_inode_generation(item, inode->i_generation);
147 btrfs_set_inode_block_group(item,
148 BTRFS_I(inode)->block_group->key.objectid);
152 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
153 struct btrfs_root *root,
156 struct btrfs_inode_item *inode_item;
157 struct btrfs_path *path;
160 path = btrfs_alloc_path();
162 btrfs_init_path(path);
163 ret = btrfs_lookup_inode(trans, root, path,
164 &BTRFS_I(inode)->location, 1);
171 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
173 struct btrfs_inode_item);
175 fill_inode_item(inode_item, inode);
176 btrfs_mark_buffer_dirty(path->nodes[0]);
179 btrfs_release_path(root, path);
180 btrfs_free_path(path);
185 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
186 struct btrfs_root *root,
188 struct dentry *dentry)
190 struct btrfs_path *path;
191 const char *name = dentry->d_name.name;
192 int name_len = dentry->d_name.len;
195 struct btrfs_dir_item *di;
197 path = btrfs_alloc_path();
199 btrfs_init_path(path);
200 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
210 objectid = btrfs_disk_key_objectid(&di->location);
211 ret = btrfs_delete_one_dir_name(trans, root, path, di);
213 btrfs_release_path(root, path);
215 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
216 objectid, name, name_len, -1);
225 ret = btrfs_delete_one_dir_name(trans, root, path, di);
228 dentry->d_inode->i_ctime = dir->i_ctime;
230 btrfs_free_path(path);
232 dir->i_size -= name_len * 2;
233 btrfs_update_inode(trans, root, dir);
234 drop_nlink(dentry->d_inode);
235 btrfs_update_inode(trans, root, dentry->d_inode);
236 dir->i_sb->s_dirt = 1;
241 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
243 struct btrfs_root *root;
244 struct btrfs_trans_handle *trans;
247 root = BTRFS_I(dir)->root;
248 mutex_lock(&root->fs_info->fs_mutex);
249 trans = btrfs_start_transaction(root, 1);
250 btrfs_set_trans_block_group(trans, dir);
251 ret = btrfs_unlink_trans(trans, root, dir, dentry);
252 btrfs_end_transaction(trans, root);
253 mutex_unlock(&root->fs_info->fs_mutex);
257 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
259 struct inode *inode = dentry->d_inode;
262 struct btrfs_root *root = BTRFS_I(dir)->root;
263 struct btrfs_path *path;
264 struct btrfs_key key;
265 struct btrfs_trans_handle *trans;
266 struct btrfs_key found_key;
268 struct btrfs_leaf *leaf;
269 char *goodnames = "..";
271 path = btrfs_alloc_path();
273 btrfs_init_path(path);
274 mutex_lock(&root->fs_info->fs_mutex);
275 trans = btrfs_start_transaction(root, 1);
276 btrfs_set_trans_block_group(trans, dir);
277 key.objectid = inode->i_ino;
278 key.offset = (u64)-1;
281 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
287 if (path->slots[0] == 0) {
292 leaf = btrfs_buffer_leaf(path->nodes[0]);
293 btrfs_disk_key_to_cpu(&found_key,
294 &leaf->items[path->slots[0]].key);
295 found_type = btrfs_key_type(&found_key);
296 if (found_key.objectid != inode->i_ino) {
300 if ((found_type != BTRFS_DIR_ITEM_KEY &&
301 found_type != BTRFS_DIR_INDEX_KEY) ||
302 (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
303 !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
307 ret = btrfs_del_item(trans, root, path);
310 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
312 btrfs_release_path(root, path);
315 btrfs_release_path(root, path);
317 /* now the directory is empty */
318 err = btrfs_unlink_trans(trans, root, dir, dentry);
323 btrfs_release_path(root, path);
324 btrfs_free_path(path);
325 mutex_unlock(&root->fs_info->fs_mutex);
326 ret = btrfs_end_transaction(trans, root);
332 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
333 struct btrfs_root *root,
336 struct btrfs_path *path;
341 path = btrfs_alloc_path();
343 btrfs_init_path(path);
344 ret = btrfs_lookup_inode(trans, root, path,
345 &BTRFS_I(inode)->location, -1);
347 ret = btrfs_del_item(trans, root, path);
349 btrfs_free_path(path);
353 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
354 struct btrfs_root *root,
358 struct btrfs_path *path;
359 struct btrfs_key key;
360 struct btrfs_disk_key *found_key;
361 struct btrfs_leaf *leaf;
362 struct btrfs_file_extent_item *fi = NULL;
363 u64 extent_start = 0;
364 u64 extent_num_blocks = 0;
367 path = btrfs_alloc_path();
369 /* FIXME, add redo link to tree so we don't leak on crash */
370 key.objectid = inode->i_ino;
371 key.offset = (u64)-1;
374 * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
377 btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
379 btrfs_init_path(path);
380 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
385 BUG_ON(path->slots[0] == 0);
388 leaf = btrfs_buffer_leaf(path->nodes[0]);
389 found_key = &leaf->items[path->slots[0]].key;
390 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
392 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
393 btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
395 if (btrfs_disk_key_offset(found_key) < inode->i_size)
398 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
399 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
401 struct btrfs_file_extent_item);
402 if (btrfs_file_extent_type(fi) !=
403 BTRFS_FILE_EXTENT_INLINE) {
405 btrfs_file_extent_disk_blocknr(fi);
407 btrfs_file_extent_disk_num_blocks(fi);
408 /* FIXME blocksize != 4096 */
410 btrfs_file_extent_num_blocks(fi) << 3;
414 ret = btrfs_del_item(trans, root, path);
416 btrfs_release_path(root, path);
418 ret = btrfs_free_extent(trans, root, extent_start,
419 extent_num_blocks, 0);
425 btrfs_release_path(root, path);
426 btrfs_free_path(path);
427 inode->i_sb->s_dirt = 1;
431 static void btrfs_delete_inode(struct inode *inode)
433 struct btrfs_trans_handle *trans;
434 struct btrfs_root *root = BTRFS_I(inode)->root;
437 truncate_inode_pages(&inode->i_data, 0);
438 if (is_bad_inode(inode)) {
442 mutex_lock(&root->fs_info->fs_mutex);
443 trans = btrfs_start_transaction(root, 1);
444 btrfs_set_trans_block_group(trans, inode);
445 if (S_ISREG(inode->i_mode)) {
446 ret = btrfs_truncate_in_trans(trans, root, inode);
449 btrfs_free_inode(trans, root, inode);
450 btrfs_end_transaction(trans, root);
451 mutex_unlock(&root->fs_info->fs_mutex);
457 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
458 struct btrfs_key *location)
460 const char *name = dentry->d_name.name;
461 int namelen = dentry->d_name.len;
462 struct btrfs_dir_item *di;
463 struct btrfs_path *path;
464 struct btrfs_root *root = BTRFS_I(dir)->root;
467 path = btrfs_alloc_path();
469 btrfs_init_path(path);
470 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
472 if (!di || IS_ERR(di)) {
473 location->objectid = 0;
477 btrfs_disk_key_to_cpu(location, &di->location);
479 btrfs_release_path(root, path);
480 btrfs_free_path(path);
484 int fixup_tree_root_location(struct btrfs_root *root,
485 struct btrfs_key *location,
486 struct btrfs_root **sub_root)
488 struct btrfs_path *path;
489 struct btrfs_root_item *ri;
491 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
493 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
496 path = btrfs_alloc_path();
498 mutex_lock(&root->fs_info->fs_mutex);
500 *sub_root = btrfs_read_fs_root(root->fs_info, location);
501 if (IS_ERR(*sub_root))
502 return PTR_ERR(*sub_root);
504 ri = &(*sub_root)->root_item;
505 location->objectid = btrfs_root_dirid(ri);
507 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
508 location->offset = 0;
510 btrfs_free_path(path);
511 mutex_unlock(&root->fs_info->fs_mutex);
515 int btrfs_init_locked_inode(struct inode *inode, void *p)
517 struct btrfs_iget_args *args = p;
518 inode->i_ino = args->ino;
519 BTRFS_I(inode)->root = args->root;
523 int btrfs_find_actor(struct inode *inode, void *opaque)
525 struct btrfs_iget_args *args = opaque;
526 return (args->ino == inode->i_ino &&
527 args->root == BTRFS_I(inode)->root);
530 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
531 struct btrfs_root *root)
534 struct btrfs_iget_args args;
538 inode = iget5_locked(s, objectid, btrfs_find_actor,
539 btrfs_init_locked_inode,
544 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
545 struct nameidata *nd)
547 struct inode * inode;
548 struct btrfs_inode *bi = BTRFS_I(dir);
549 struct btrfs_root *root = bi->root;
550 struct btrfs_root *sub_root = root;
551 struct btrfs_key location;
554 if (dentry->d_name.len > BTRFS_NAME_LEN)
555 return ERR_PTR(-ENAMETOOLONG);
556 mutex_lock(&root->fs_info->fs_mutex);
557 ret = btrfs_inode_by_name(dir, dentry, &location);
558 mutex_unlock(&root->fs_info->fs_mutex);
562 if (location.objectid) {
563 ret = fixup_tree_root_location(root, &location, &sub_root);
567 return ERR_PTR(-ENOENT);
568 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
571 return ERR_PTR(-EACCES);
572 if (inode->i_state & I_NEW) {
573 if (sub_root != root) {
574 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
576 sub_root->inode = inode;
578 BTRFS_I(inode)->root = sub_root;
579 memcpy(&BTRFS_I(inode)->location, &location,
581 btrfs_read_locked_inode(inode);
582 unlock_new_inode(inode);
585 return d_splice_alias(inode, dentry);
588 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
590 struct inode *inode = filp->f_path.dentry->d_inode;
591 struct btrfs_root *root = BTRFS_I(inode)->root;
592 struct btrfs_item *item;
593 struct btrfs_dir_item *di;
594 struct btrfs_key key;
595 struct btrfs_path *path;
598 struct btrfs_leaf *leaf;
601 unsigned char d_type = DT_UNKNOWN;
606 int key_type = BTRFS_DIR_INDEX_KEY;
608 /* FIXME, use a real flag for deciding about the key type */
609 if (root->fs_info->tree_root == root)
610 key_type = BTRFS_DIR_ITEM_KEY;
611 mutex_lock(&root->fs_info->fs_mutex);
612 key.objectid = inode->i_ino;
614 btrfs_set_key_type(&key, key_type);
615 key.offset = filp->f_pos;
616 path = btrfs_alloc_path();
617 btrfs_init_path(path);
618 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
623 leaf = btrfs_buffer_leaf(path->nodes[0]);
624 nritems = btrfs_header_nritems(&leaf->header);
625 slot = path->slots[0];
626 if (advance || slot >= nritems) {
627 if (slot >= nritems -1) {
628 ret = btrfs_next_leaf(root, path);
631 leaf = btrfs_buffer_leaf(path->nodes[0]);
632 nritems = btrfs_header_nritems(&leaf->header);
633 slot = path->slots[0];
640 item = leaf->items + slot;
641 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
643 if (btrfs_disk_key_type(&item->key) != key_type)
645 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
647 filp->f_pos = btrfs_disk_key_offset(&item->key);
649 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
651 di_total = btrfs_item_size(leaf->items + slot);
652 while(di_cur < di_total) {
653 over = filldir(dirent, (const char *)(di + 1),
654 btrfs_dir_name_len(di),
655 btrfs_disk_key_offset(&item->key),
656 btrfs_disk_key_objectid(&di->location),
660 di_len = btrfs_dir_name_len(di) + sizeof(*di);
662 di = (struct btrfs_dir_item *)((char *)di + di_len);
669 btrfs_release_path(root, path);
670 btrfs_free_path(path);
671 mutex_unlock(&root->fs_info->fs_mutex);
675 static void btrfs_put_super (struct super_block * sb)
677 struct btrfs_root *root = btrfs_sb(sb);
680 ret = close_ctree(root);
682 printk("close ctree returns %d\n", ret);
684 sb->s_fs_info = NULL;
687 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
689 struct inode * inode;
690 struct dentry * root_dentry;
691 struct btrfs_super_block *disk_super;
692 struct btrfs_root *tree_root;
693 struct btrfs_inode *bi;
695 sb->s_maxbytes = MAX_LFS_FILESIZE;
696 sb->s_magic = BTRFS_SUPER_MAGIC;
697 sb->s_op = &btrfs_super_ops;
700 tree_root = open_ctree(sb);
703 printk("btrfs: open_ctree failed\n");
706 sb->s_fs_info = tree_root;
707 disk_super = tree_root->fs_info->disk_super;
708 printk("read in super total blocks %Lu root %Lu\n",
709 btrfs_super_total_blocks(disk_super),
710 btrfs_super_root_dir(disk_super));
712 inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
715 bi->location.objectid = inode->i_ino;
716 bi->location.offset = 0;
717 bi->location.flags = 0;
718 bi->root = tree_root;
719 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
723 if (inode->i_state & I_NEW) {
724 btrfs_read_locked_inode(inode);
725 unlock_new_inode(inode);
728 root_dentry = d_alloc_root(inode);
733 sb->s_root = root_dentry;
738 static int btrfs_write_inode(struct inode *inode, int wait)
740 struct btrfs_root *root = BTRFS_I(inode)->root;
741 struct btrfs_trans_handle *trans;
745 mutex_lock(&root->fs_info->fs_mutex);
746 trans = btrfs_start_transaction(root, 1);
747 btrfs_set_trans_block_group(trans, inode);
748 ret = btrfs_commit_transaction(trans, root);
749 mutex_unlock(&root->fs_info->fs_mutex);
754 static void btrfs_dirty_inode(struct inode *inode)
756 struct btrfs_root *root = BTRFS_I(inode)->root;
757 struct btrfs_trans_handle *trans;
759 mutex_lock(&root->fs_info->fs_mutex);
760 trans = btrfs_start_transaction(root, 1);
761 btrfs_set_trans_block_group(trans, inode);
762 btrfs_update_inode(trans, root, inode);
763 btrfs_end_transaction(trans, root);
764 mutex_unlock(&root->fs_info->fs_mutex);
767 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
768 struct btrfs_root *root,
770 struct btrfs_block_group_cache *group,
774 struct btrfs_inode_item inode_item;
775 struct btrfs_key *location;
778 inode = new_inode(root->fs_info->sb);
780 return ERR_PTR(-ENOMEM);
782 BTRFS_I(inode)->root = root;
783 group = btrfs_find_block_group(root, group, 0);
784 BTRFS_I(inode)->block_group = group;
786 inode->i_uid = current->fsuid;
787 inode->i_gid = current->fsgid;
788 inode->i_mode = mode;
789 inode->i_ino = objectid;
791 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
792 fill_inode_item(&inode_item, inode);
793 location = &BTRFS_I(inode)->location;
794 location->objectid = objectid;
796 location->offset = 0;
797 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
799 ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
802 insert_inode_hash(inode);
806 static int btrfs_add_link(struct btrfs_trans_handle *trans,
807 struct dentry *dentry, struct inode *inode)
810 struct btrfs_key key;
811 struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
812 key.objectid = inode->i_ino;
814 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
817 ret = btrfs_insert_dir_item(trans, root,
818 dentry->d_name.name, dentry->d_name.len,
819 dentry->d_parent->d_inode->i_ino,
822 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
823 ret = btrfs_update_inode(trans, root,
824 dentry->d_parent->d_inode);
829 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
830 struct dentry *dentry, struct inode *inode)
832 int err = btrfs_add_link(trans, dentry, inode);
834 d_instantiate(dentry, inode);
842 static int btrfs_create(struct inode *dir, struct dentry *dentry,
843 int mode, struct nameidata *nd)
845 struct btrfs_trans_handle *trans;
846 struct btrfs_root *root = BTRFS_I(dir)->root;
852 mutex_lock(&root->fs_info->fs_mutex);
853 trans = btrfs_start_transaction(root, 1);
854 btrfs_set_trans_block_group(trans, dir);
856 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
862 inode = btrfs_new_inode(trans, root, objectid,
863 BTRFS_I(dir)->block_group, mode);
864 err = PTR_ERR(inode);
868 btrfs_set_trans_block_group(trans, inode);
869 err = btrfs_add_nondir(trans, dentry, inode);
873 inode->i_mapping->a_ops = &btrfs_aops;
874 inode->i_fop = &btrfs_file_operations;
875 inode->i_op = &btrfs_file_inode_operations;
877 dir->i_sb->s_dirt = 1;
878 btrfs_update_inode_block_group(trans, inode);
879 btrfs_update_inode_block_group(trans, dir);
881 btrfs_end_transaction(trans, root);
882 mutex_unlock(&root->fs_info->fs_mutex);
885 inode_dec_link_count(inode);
891 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
892 struct btrfs_root *root,
893 u64 objectid, u64 dirid)
897 struct btrfs_key key;
902 key.objectid = objectid;
905 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
907 ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
911 key.objectid = dirid;
912 ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
920 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
923 struct btrfs_trans_handle *trans;
924 struct btrfs_root *root = BTRFS_I(dir)->root;
929 mutex_lock(&root->fs_info->fs_mutex);
930 trans = btrfs_start_transaction(root, 1);
931 btrfs_set_trans_block_group(trans, dir);
933 err = PTR_ERR(trans);
937 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
943 inode = btrfs_new_inode(trans, root, objectid,
944 BTRFS_I(dir)->block_group, S_IFDIR | mode);
946 err = PTR_ERR(inode);
950 inode->i_op = &btrfs_dir_inode_operations;
951 inode->i_fop = &btrfs_dir_file_operations;
952 btrfs_set_trans_block_group(trans, inode);
954 err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
959 err = btrfs_update_inode(trans, root, inode);
962 err = btrfs_add_link(trans, dentry, inode);
965 d_instantiate(dentry, inode);
967 dir->i_sb->s_dirt = 1;
968 btrfs_update_inode_block_group(trans, inode);
969 btrfs_update_inode_block_group(trans, dir);
972 btrfs_end_transaction(trans, root);
974 mutex_unlock(&root->fs_info->fs_mutex);
980 static int btrfs_sync_file(struct file *file,
981 struct dentry *dentry, int datasync)
983 struct inode *inode = dentry->d_inode;
984 struct btrfs_root *root = BTRFS_I(inode)->root;
986 struct btrfs_trans_handle *trans;
988 mutex_lock(&root->fs_info->fs_mutex);
989 trans = btrfs_start_transaction(root, 1);
994 ret = btrfs_commit_transaction(trans, root);
995 mutex_unlock(&root->fs_info->fs_mutex);
997 return ret > 0 ? EIO : ret;
1000 static int btrfs_sync_fs(struct super_block *sb, int wait)
1002 struct btrfs_trans_handle *trans;
1003 struct btrfs_root *root;
1005 root = btrfs_sb(sb);
1009 filemap_flush(root->fs_info->btree_inode->i_mapping);
1012 mutex_lock(&root->fs_info->fs_mutex);
1013 trans = btrfs_start_transaction(root, 1);
1014 ret = btrfs_commit_transaction(trans, root);
1017 printk("btrfs sync_fs\n");
1018 mutex_unlock(&root->fs_info->fs_mutex);
1022 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1023 struct buffer_head *result, int create)
1028 u64 extent_start = 0;
1030 u64 objectid = inode->i_ino;
1032 struct btrfs_path *path;
1033 struct btrfs_root *root = BTRFS_I(inode)->root;
1034 struct btrfs_file_extent_item *item;
1035 struct btrfs_leaf *leaf;
1036 struct btrfs_disk_key *found_key;
1038 path = btrfs_alloc_path();
1040 btrfs_init_path(path);
1045 ret = btrfs_lookup_file_extent(NULL, root, path,
1047 iblock << inode->i_blkbits, 0);
1054 if (path->slots[0] == 0) {
1055 btrfs_release_path(root, path);
1061 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1062 struct btrfs_file_extent_item);
1063 leaf = btrfs_buffer_leaf(path->nodes[0]);
1064 blocknr = btrfs_file_extent_disk_blocknr(item);
1065 blocknr += btrfs_file_extent_offset(item);
1067 /* are we inside the extent that was found? */
1068 found_key = &leaf->items[path->slots[0]].key;
1069 found_type = btrfs_disk_key_type(found_key);
1070 if (btrfs_disk_key_objectid(found_key) != objectid ||
1071 found_type != BTRFS_EXTENT_DATA_KEY) {
1074 btrfs_release_path(root, path);
1077 found_type = btrfs_file_extent_type(item);
1078 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1079 if (found_type == BTRFS_FILE_EXTENT_REG) {
1080 extent_start = extent_start >> inode->i_blkbits;
1081 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1082 if (iblock >= extent_start && iblock < extent_end) {
1084 btrfs_map_bh_to_logical(root, result, blocknr +
1085 iblock - extent_start);
1088 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1092 size = btrfs_file_extent_inline_len(leaf->items +
1094 extent_end = (extent_start + size) >> inode->i_blkbits;
1095 extent_start >>= inode->i_blkbits;
1096 if (iblock < extent_start || iblock > extent_end) {
1099 ptr = btrfs_file_extent_inline_start(item);
1100 map = kmap(result->b_page);
1101 memcpy(map, ptr, size);
1102 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1103 flush_dcache_page(result->b_page);
1104 kunmap(result->b_page);
1105 set_buffer_uptodate(result);
1106 SetPageChecked(result->b_page);
1107 btrfs_map_bh_to_logical(root, result, 0);
1110 btrfs_release_path(root, path);
1111 btrfs_free_path(path);
1115 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1116 struct buffer_head *result, int create)
1119 struct btrfs_root *root = BTRFS_I(inode)->root;
1120 mutex_lock(&root->fs_info->fs_mutex);
1121 err = btrfs_get_block_lock(inode, iblock, result, create);
1122 mutex_unlock(&root->fs_info->fs_mutex);
1126 static int btrfs_prepare_write(struct file *file, struct page *page,
1127 unsigned from, unsigned to)
1129 return nobh_prepare_write(page, from, to, btrfs_get_block);
1132 static void btrfs_write_super(struct super_block *sb)
1134 btrfs_sync_fs(sb, 1);
1137 static int btrfs_readpage(struct file *file, struct page *page)
1139 return mpage_readpage(page, btrfs_get_block);
1143 * While block_write_full_page is writing back the dirty buffers under
1144 * the page lock, whoever dirtied the buffers may decide to clean them
1145 * again at any time. We handle that by only looking at the buffer
1146 * state inside lock_buffer().
1148 * If block_write_full_page() is called for regular writeback
1149 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1150 * locked buffer. This only can happen if someone has written the buffer
1151 * directly, with submit_bh(). At the address_space level PageWriteback
1152 * prevents this contention from occurring.
1154 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1155 struct writeback_control *wbc)
1159 sector_t last_block;
1160 struct buffer_head *bh, *head;
1161 const unsigned blocksize = 1 << inode->i_blkbits;
1162 int nr_underway = 0;
1164 BUG_ON(!PageLocked(page));
1166 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1168 if (!page_has_buffers(page)) {
1169 create_empty_buffers(page, blocksize,
1170 (1 << BH_Dirty)|(1 << BH_Uptodate));
1174 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1175 * here, and the (potentially unmapped) buffers may become dirty at
1176 * any time. If a buffer becomes dirty here after we've inspected it
1177 * then we just miss that fact, and the page stays dirty.
1179 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1180 * handle that here by just cleaning them.
1183 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1184 head = page_buffers(page);
1188 * Get all the dirty buffers mapped to disk addresses and
1189 * handle any aliases from the underlying blockdev's mapping.
1192 if (block > last_block) {
1194 * mapped buffers outside i_size will occur, because
1195 * this page can be outside i_size when there is a
1196 * truncate in progress.
1199 * The buffer was zeroed by block_write_full_page()
1201 clear_buffer_dirty(bh);
1202 set_buffer_uptodate(bh);
1203 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1204 WARN_ON(bh->b_size != blocksize);
1205 err = btrfs_get_block(inode, block, bh, 0);
1208 if (buffer_new(bh)) {
1209 /* blockdev mappings never come here */
1210 clear_buffer_new(bh);
1211 unmap_underlying_metadata(bh->b_bdev,
1215 bh = bh->b_this_page;
1217 } while (bh != head);
1220 if (!buffer_mapped(bh))
1223 * If it's a fully non-blocking write attempt and we cannot
1224 * lock the buffer then redirty the page. Note that this can
1225 * potentially cause a busy-wait loop from pdflush and kswapd
1226 * activity, but those code paths have their own higher-level
1229 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1231 } else if (test_set_buffer_locked(bh)) {
1232 redirty_page_for_writepage(wbc, page);
1235 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1236 mark_buffer_async_write(bh);
1240 } while ((bh = bh->b_this_page) != head);
1243 * The page and its buffers are protected by PageWriteback(), so we can
1244 * drop the bh refcounts early.
1246 BUG_ON(PageWriteback(page));
1247 set_page_writeback(page);
1250 struct buffer_head *next = bh->b_this_page;
1251 if (buffer_async_write(bh)) {
1252 submit_bh(WRITE, bh);
1256 } while (bh != head);
1261 if (nr_underway == 0) {
1263 * The page was marked dirty, but the buffers were
1264 * clean. Someone wrote them back by hand with
1265 * ll_rw_block/submit_bh. A rare case.
1269 if (!buffer_uptodate(bh)) {
1273 bh = bh->b_this_page;
1274 } while (bh != head);
1276 SetPageUptodate(page);
1277 end_page_writeback(page);
1279 * The page and buffer_heads can be released at any time from
1282 wbc->pages_skipped++; /* We didn't write this page */
1288 * ENOSPC, or some other error. We may already have added some
1289 * blocks to the file, so we need to write these out to avoid
1290 * exposing stale data.
1291 * The page is currently locked and not marked for writeback
1294 /* Recovery: lock and submit the mapped buffers */
1296 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1298 mark_buffer_async_write(bh);
1301 * The buffer may have been set dirty during
1302 * attachment to a dirty page.
1304 clear_buffer_dirty(bh);
1306 } while ((bh = bh->b_this_page) != head);
1308 BUG_ON(PageWriteback(page));
1309 set_page_writeback(page);
1311 struct buffer_head *next = bh->b_this_page;
1312 if (buffer_async_write(bh)) {
1313 clear_buffer_dirty(bh);
1314 submit_bh(WRITE, bh);
1318 } while (bh != head);
1324 * The generic ->writepage function for buffer-backed address_spaces
1326 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1328 struct inode * const inode = page->mapping->host;
1329 loff_t i_size = i_size_read(inode);
1330 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1334 /* Is the page fully inside i_size? */
1335 if (page->index < end_index)
1336 return __btrfs_write_full_page(inode, page, wbc);
1338 /* Is the page fully outside i_size? (truncate in progress) */
1339 offset = i_size & (PAGE_CACHE_SIZE-1);
1340 if (page->index >= end_index+1 || !offset) {
1342 * The page may have dirty, unmapped buffers. For example,
1343 * they may have been added in ext3_writepage(). Make them
1344 * freeable here, so the page does not leak.
1346 block_invalidatepage(page, 0);
1348 return 0; /* don't care */
1352 * The page straddles i_size. It must be zeroed out on each and every
1353 * writepage invokation because it may be mmapped. "A file is mapped
1354 * in multiples of the page size. For a file that is not a multiple of
1355 * the page size, the remaining memory is zeroed when mapped, and
1356 * writes to that region are not written out to the file."
1358 kaddr = kmap_atomic(page, KM_USER0);
1359 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1360 flush_dcache_page(page);
1361 kunmap_atomic(kaddr, KM_USER0);
1362 return __btrfs_write_full_page(inode, page, wbc);
1365 static void btrfs_truncate(struct inode *inode)
1367 struct btrfs_root *root = BTRFS_I(inode)->root;
1369 struct btrfs_trans_handle *trans;
1371 if (!S_ISREG(inode->i_mode))
1373 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1376 nobh_truncate_page(inode->i_mapping, inode->i_size);
1378 /* FIXME, add redo link to tree so we don't leak on crash */
1379 mutex_lock(&root->fs_info->fs_mutex);
1380 trans = btrfs_start_transaction(root, 1);
1381 btrfs_set_trans_block_group(trans, inode);
1382 ret = btrfs_truncate_in_trans(trans, root, inode);
1384 ret = btrfs_end_transaction(trans, root);
1386 mutex_unlock(&root->fs_info->fs_mutex);
1387 mark_inode_dirty(inode);
1391 * Make sure any changes to nobh_commit_write() are reflected in
1392 * nobh_truncate_page(), since it doesn't call commit_write().
1394 static int btrfs_commit_write(struct file *file, struct page *page,
1395 unsigned from, unsigned to)
1397 struct inode *inode = page->mapping->host;
1398 struct buffer_head *bh;
1399 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1401 SetPageUptodate(page);
1402 bh = page_buffers(page);
1403 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1404 set_page_dirty(page);
1406 if (pos > inode->i_size) {
1407 i_size_write(inode, pos);
1408 mark_inode_dirty(inode);
1413 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1414 struct page **prepared_pages,
1415 const char __user * buf)
1417 long page_fault = 0;
1419 int offset = pos & (PAGE_CACHE_SIZE - 1);
1421 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1422 size_t count = min_t(size_t,
1423 PAGE_CACHE_SIZE - offset, write_bytes);
1424 struct page *page = prepared_pages[i];
1425 fault_in_pages_readable(buf, count);
1427 /* Copy data from userspace to the current page */
1429 page_fault = __copy_from_user(page_address(page) + offset,
1431 /* Flush processor's dcache for this page */
1432 flush_dcache_page(page);
1435 write_bytes -= count;
1440 return page_fault ? -EFAULT : 0;
1443 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1446 for (i = 0; i < num_pages; i++) {
1449 unlock_page(pages[i]);
1450 mark_page_accessed(pages[i]);
1451 page_cache_release(pages[i]);
1454 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1455 struct btrfs_root *root,
1457 struct page **pages,
1467 struct inode *inode = file->f_path.dentry->d_inode;
1468 struct buffer_head *bh;
1469 struct btrfs_file_extent_item *ei;
1471 for (i = 0; i < num_pages; i++) {
1472 offset = pos & (PAGE_CACHE_SIZE -1);
1473 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1474 /* FIXME, one block at a time */
1476 mutex_lock(&root->fs_info->fs_mutex);
1477 trans = btrfs_start_transaction(root, 1);
1478 btrfs_set_trans_block_group(trans, inode);
1480 bh = page_buffers(pages[i]);
1481 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1482 struct btrfs_key key;
1483 struct btrfs_path *path;
1487 path = btrfs_alloc_path();
1489 key.objectid = inode->i_ino;
1490 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1492 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1493 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1495 btrfs_file_extent_calc_inline_size(write_bytes);
1496 ret = btrfs_insert_empty_item(trans, root, path, &key,
1499 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1500 path->slots[0], struct btrfs_file_extent_item);
1501 btrfs_set_file_extent_generation(ei, trans->transid);
1502 btrfs_set_file_extent_type(ei,
1503 BTRFS_FILE_EXTENT_INLINE);
1504 ptr = btrfs_file_extent_inline_start(ei);
1505 memcpy(ptr, bh->b_data, offset + write_bytes);
1506 mark_buffer_dirty(path->nodes[0]);
1507 btrfs_free_path(path);
1509 btrfs_csum_file_block(trans, root, inode->i_ino,
1510 pages[i]->index << PAGE_CACHE_SHIFT,
1511 kmap(pages[i]), PAGE_CACHE_SIZE);
1514 SetPageChecked(pages[i]);
1515 btrfs_update_inode_block_group(trans, inode);
1516 ret = btrfs_end_transaction(trans, root);
1518 mutex_unlock(&root->fs_info->fs_mutex);
1520 ret = btrfs_commit_write(file, pages[i], offset,
1521 offset + this_write);
1527 WARN_ON(this_write > write_bytes);
1528 write_bytes -= this_write;
1534 static int drop_extents(struct btrfs_trans_handle *trans,
1535 struct btrfs_root *root,
1536 struct inode *inode,
1540 struct btrfs_key key;
1541 struct btrfs_leaf *leaf;
1543 struct btrfs_file_extent_item *extent;
1546 struct btrfs_file_extent_item old;
1547 struct btrfs_path *path;
1548 u64 search_start = start;
1554 path = btrfs_alloc_path();
1558 btrfs_release_path(root, path);
1559 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1564 if (path->slots[0] == 0) {
1575 leaf = btrfs_buffer_leaf(path->nodes[0]);
1576 slot = path->slots[0];
1577 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1578 if (key.offset >= end || key.objectid != inode->i_ino) {
1582 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1586 extent = btrfs_item_ptr(leaf, slot,
1587 struct btrfs_file_extent_item);
1588 found_type = btrfs_file_extent_type(extent);
1589 if (found_type == BTRFS_FILE_EXTENT_REG) {
1590 extent_end = key.offset +
1591 (btrfs_file_extent_num_blocks(extent) <<
1594 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1596 extent_end = key.offset +
1597 btrfs_file_extent_inline_len(leaf->items + slot);
1600 if (!found_extent && !found_inline) {
1605 if (search_start >= extent_end) {
1610 search_start = extent_end;
1612 if (end < extent_end && end >= key.offset) {
1614 memcpy(&old, extent, sizeof(old));
1615 ret = btrfs_inc_extent_ref(trans, root,
1616 btrfs_file_extent_disk_blocknr(&old),
1617 btrfs_file_extent_disk_num_blocks(&old));
1620 WARN_ON(found_inline);
1624 if (start > key.offset) {
1627 /* truncate existing extent */
1629 WARN_ON(start & (root->blocksize - 1));
1631 new_num = (start - key.offset) >>
1633 old_num = btrfs_file_extent_num_blocks(extent);
1634 inode->i_blocks -= (old_num - new_num) << 3;
1635 btrfs_set_file_extent_num_blocks(extent,
1637 mark_buffer_dirty(path->nodes[0]);
1641 ret = btrfs_truncate_item(trans, root, path,
1642 start - key.offset);
1648 u64 disk_blocknr = 0;
1649 u64 disk_num_blocks = 0;
1650 u64 extent_num_blocks = 0;
1653 btrfs_file_extent_disk_blocknr(extent);
1655 btrfs_file_extent_disk_num_blocks(extent);
1657 btrfs_file_extent_num_blocks(extent);
1659 ret = btrfs_del_item(trans, root, path);
1661 btrfs_release_path(root, path);
1664 btrfs_file_extent_num_blocks(extent) << 3;
1665 ret = btrfs_free_extent(trans, root,
1667 disk_num_blocks, 0);
1671 if (!bookend && search_start >= end) {
1678 if (bookend && found_extent) {
1679 /* create bookend */
1680 struct btrfs_key ins;
1681 ins.objectid = inode->i_ino;
1684 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1686 btrfs_release_path(root, path);
1687 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1690 extent = btrfs_item_ptr(
1691 btrfs_buffer_leaf(path->nodes[0]),
1693 struct btrfs_file_extent_item);
1694 btrfs_set_file_extent_disk_blocknr(extent,
1695 btrfs_file_extent_disk_blocknr(&old));
1696 btrfs_set_file_extent_disk_num_blocks(extent,
1697 btrfs_file_extent_disk_num_blocks(&old));
1699 btrfs_set_file_extent_offset(extent,
1700 btrfs_file_extent_offset(&old) +
1701 ((end - key.offset) >> inode->i_blkbits));
1702 WARN_ON(btrfs_file_extent_num_blocks(&old) <
1703 (end - key.offset) >> inode->i_blkbits);
1704 btrfs_set_file_extent_num_blocks(extent,
1705 btrfs_file_extent_num_blocks(&old) -
1706 ((end - key.offset) >> inode->i_blkbits));
1708 btrfs_set_file_extent_type(extent,
1709 BTRFS_FILE_EXTENT_REG);
1710 btrfs_set_file_extent_generation(extent,
1711 btrfs_file_extent_generation(&old));
1712 btrfs_mark_buffer_dirty(path->nodes[0]);
1714 btrfs_file_extent_num_blocks(extent) << 3;
1720 btrfs_free_path(path);
1724 static int prepare_pages(struct btrfs_root *root,
1726 struct page **pages,
1729 unsigned long first_index,
1730 unsigned long last_index,
1732 u64 alloc_extent_start)
1735 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1736 struct inode *inode = file->f_path.dentry->d_inode;
1740 struct buffer_head *bh;
1741 struct buffer_head *head;
1742 loff_t isize = i_size_read(inode);
1744 memset(pages, 0, num_pages * sizeof(struct page *));
1746 for (i = 0; i < num_pages; i++) {
1747 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1750 goto failed_release;
1752 offset = pos & (PAGE_CACHE_SIZE -1);
1753 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1754 create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
1755 (1 << BH_Uptodate));
1756 head = page_buffers(pages[i]);
1759 err = btrfs_map_bh_to_logical(root, bh,
1760 alloc_extent_start);
1763 goto failed_truncate;
1764 bh = bh->b_this_page;
1765 if (alloc_extent_start)
1766 alloc_extent_start++;
1767 } while (bh != head);
1769 WARN_ON(this_write > write_bytes);
1770 write_bytes -= this_write;
1775 btrfs_drop_pages(pages, num_pages);
1779 btrfs_drop_pages(pages, num_pages);
1781 vmtruncate(inode, isize);
1785 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1786 size_t count, loff_t *ppos)
1789 size_t num_written = 0;
1792 struct inode *inode = file->f_path.dentry->d_inode;
1793 struct btrfs_root *root = BTRFS_I(inode)->root;
1794 struct page *pages[8];
1795 struct page *pinned[2] = { NULL, NULL };
1796 unsigned long first_index;
1797 unsigned long last_index;
1800 u64 alloc_extent_start;
1801 struct btrfs_trans_handle *trans;
1802 struct btrfs_key ins;
1804 if (file->f_flags & O_DIRECT)
1807 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1808 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1809 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1814 err = remove_suid(file->f_path.dentry);
1817 file_update_time(file);
1819 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1820 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1823 mutex_lock(&inode->i_mutex);
1824 first_index = pos >> PAGE_CACHE_SHIFT;
1825 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1827 if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1828 (pos & (PAGE_CACHE_SIZE - 1))) {
1829 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1830 if (!PageUptodate(pinned[0])) {
1831 ret = mpage_readpage(pinned[0], btrfs_get_block);
1834 unlock_page(pinned[0]);
1837 if (first_index != last_index &&
1838 (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1839 (count & (PAGE_CACHE_SIZE - 1))) {
1840 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1841 if (!PageUptodate(pinned[1])) {
1842 ret = mpage_readpage(pinned[1], btrfs_get_block);
1845 unlock_page(pinned[1]);
1849 mutex_lock(&root->fs_info->fs_mutex);
1850 trans = btrfs_start_transaction(root, 1);
1853 mutex_unlock(&root->fs_info->fs_mutex);
1856 btrfs_set_trans_block_group(trans, inode);
1857 /* FIXME blocksize != 4096 */
1858 inode->i_blocks += num_blocks << 3;
1859 if (start_pos < inode->i_size) {
1860 /* FIXME blocksize != pagesize */
1861 ret = drop_extents(trans, root, inode,
1863 (pos + count + root->blocksize -1) &
1864 ~((u64)root->blocksize - 1));
1867 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1868 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1869 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1870 num_blocks, 1, (u64)-1, &ins);
1872 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1873 start_pos, ins.objectid, ins.offset);
1880 alloc_extent_start = ins.objectid;
1881 btrfs_update_inode_block_group(trans, inode);
1882 ret = btrfs_end_transaction(trans, root);
1883 mutex_unlock(&root->fs_info->fs_mutex);
1886 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1887 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1888 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1891 memset(pages, 0, sizeof(pages));
1892 ret = prepare_pages(root, file, pages, num_pages,
1893 pos, first_index, last_index,
1894 write_bytes, alloc_extent_start);
1897 /* FIXME blocks != pagesize */
1898 if (alloc_extent_start)
1899 alloc_extent_start += num_pages;
1900 ret = btrfs_copy_from_user(pos, num_pages,
1901 write_bytes, pages, buf);
1904 ret = dirty_and_release_pages(NULL, root, file, pages,
1905 num_pages, pos, write_bytes);
1907 btrfs_drop_pages(pages, num_pages);
1910 count -= write_bytes;
1912 num_written += write_bytes;
1914 balance_dirty_pages_ratelimited(inode->i_mapping);
1918 mutex_unlock(&inode->i_mutex);
1921 page_cache_release(pinned[0]);
1923 page_cache_release(pinned[1]);
1925 current->backing_dev_info = NULL;
1926 mark_inode_dirty(inode);
1927 return num_written ? num_written : err;
1930 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1931 unsigned long offset, unsigned long size)
1934 unsigned long left, count = desc->count;
1935 struct inode *inode = page->mapping->host;
1940 if (!PageChecked(page)) {
1941 /* FIXME, do it per block */
1942 struct btrfs_root *root = BTRFS_I(inode)->root;
1944 int ret = btrfs_csum_verify_file_block(root,
1945 page->mapping->host->i_ino,
1946 page->index << PAGE_CACHE_SHIFT,
1947 kmap(page), PAGE_CACHE_SIZE);
1949 printk("failed to verify ino %lu page %lu\n",
1950 page->mapping->host->i_ino,
1952 memset(page_address(page), 0, PAGE_CACHE_SIZE);
1954 SetPageChecked(page);
1958 * Faults on the destination of a read are common, so do it before
1961 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1962 kaddr = kmap_atomic(page, KM_USER0);
1963 left = __copy_to_user_inatomic(desc->arg.buf,
1964 kaddr + offset, size);
1965 kunmap_atomic(kaddr, KM_USER0);
1970 /* Do it the slow way */
1972 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1977 desc->error = -EFAULT;
1980 desc->count = count - size;
1981 desc->written += size;
1982 desc->arg.buf += size;
1987 * btrfs_file_aio_read - filesystem read routine
1988 * @iocb: kernel I/O control block
1989 * @iov: io vector request
1990 * @nr_segs: number of segments in the iovec
1991 * @pos: current file position
1993 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1994 unsigned long nr_segs, loff_t pos)
1996 struct file *filp = iocb->ki_filp;
2000 loff_t *ppos = &iocb->ki_pos;
2003 for (seg = 0; seg < nr_segs; seg++) {
2004 const struct iovec *iv = &iov[seg];
2007 * If any segment has a negative length, or the cumulative
2008 * length ever wraps negative then return -EINVAL.
2010 count += iv->iov_len;
2011 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2013 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2018 count -= iv->iov_len; /* This segment is no good */
2023 for (seg = 0; seg < nr_segs; seg++) {
2024 read_descriptor_t desc;
2027 desc.arg.buf = iov[seg].iov_base;
2028 desc.count = iov[seg].iov_len;
2029 if (desc.count == 0)
2032 do_generic_file_read(filp, ppos, &desc,
2034 retval += desc.written;
2036 retval = retval ?: desc.error;
2044 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2046 struct btrfs_trans_handle *trans;
2047 struct btrfs_key key;
2048 struct btrfs_root_item root_item;
2049 struct btrfs_inode_item *inode_item;
2050 struct buffer_head *subvol;
2051 struct btrfs_leaf *leaf;
2052 struct btrfs_root *new_root;
2053 struct inode *inode;
2057 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2059 mutex_lock(&root->fs_info->fs_mutex);
2060 trans = btrfs_start_transaction(root, 1);
2063 subvol = btrfs_alloc_free_block(trans, root, 0);
2066 leaf = btrfs_buffer_leaf(subvol);
2067 btrfs_set_header_nritems(&leaf->header, 0);
2068 btrfs_set_header_level(&leaf->header, 0);
2069 btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2070 btrfs_set_header_generation(&leaf->header, trans->transid);
2071 btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2072 memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2073 sizeof(leaf->header.fsid));
2074 mark_buffer_dirty(subvol);
2076 inode_item = &root_item.inode;
2077 memset(inode_item, 0, sizeof(*inode_item));
2078 btrfs_set_inode_generation(inode_item, 1);
2079 btrfs_set_inode_size(inode_item, 3);
2080 btrfs_set_inode_nlink(inode_item, 1);
2081 btrfs_set_inode_nblocks(inode_item, 1);
2082 btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2084 btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2085 btrfs_set_root_refs(&root_item, 1);
2089 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2093 btrfs_set_root_dirid(&root_item, new_dirid);
2095 key.objectid = objectid;
2098 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2099 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2104 * insert the directory item
2106 key.offset = (u64)-1;
2107 dir = root->fs_info->sb->s_root->d_inode;
2108 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2109 name, namelen, dir->i_ino, &key, 0);
2112 ret = btrfs_commit_transaction(trans, root);
2115 new_root = btrfs_read_fs_root(root->fs_info, &key);
2118 trans = btrfs_start_transaction(new_root, 1);
2121 inode = btrfs_new_inode(trans, new_root, new_dirid,
2122 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2123 inode->i_op = &btrfs_dir_inode_operations;
2124 inode->i_fop = &btrfs_dir_file_operations;
2126 ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2131 ret = btrfs_update_inode(trans, new_root, inode);
2134 ret = btrfs_commit_transaction(trans, new_root);
2139 mutex_unlock(&root->fs_info->fs_mutex);
2143 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2145 struct btrfs_trans_handle *trans;
2146 struct btrfs_key key;
2147 struct btrfs_root_item new_root_item;
2151 if (!root->ref_cows)
2154 mutex_lock(&root->fs_info->fs_mutex);
2155 trans = btrfs_start_transaction(root, 1);
2158 ret = btrfs_update_inode(trans, root, root->inode);
2161 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2165 memcpy(&new_root_item, &root->root_item,
2166 sizeof(new_root_item));
2168 key.objectid = objectid;
2171 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2172 btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2174 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2179 * insert the directory item
2181 key.offset = (u64)-1;
2182 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2184 root->fs_info->sb->s_root->d_inode->i_ino,
2189 ret = btrfs_inc_root_ref(trans, root);
2192 ret = btrfs_commit_transaction(trans, root);
2194 mutex_unlock(&root->fs_info->fs_mutex);
2198 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2200 struct block_device *bdev;
2201 struct btrfs_path *path;
2202 struct super_block *sb = root->fs_info->sb;
2203 struct btrfs_root *dev_root = root->fs_info->dev_root;
2204 struct btrfs_trans_handle *trans;
2205 struct btrfs_device_item *dev_item;
2206 struct btrfs_key key;
2213 printk("adding disk %s\n", name);
2214 path = btrfs_alloc_path();
2217 num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2218 bdev = open_bdev_excl(name, O_RDWR, sb);
2220 ret = PTR_ERR(bdev);
2221 printk("open bdev excl failed ret %d\n", ret);
2224 set_blocksize(bdev, sb->s_blocksize);
2225 new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2226 key.objectid = num_blocks;
2227 key.offset = new_blocks;
2229 btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2231 mutex_lock(&dev_root->fs_info->fs_mutex);
2232 trans = btrfs_start_transaction(dev_root, 1);
2233 item_size = sizeof(*dev_item) + namelen;
2234 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2235 ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2237 printk("insert failed %d\n", ret);
2238 close_bdev_excl(bdev);
2243 dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2244 path->slots[0], struct btrfs_device_item);
2245 btrfs_set_device_pathlen(dev_item, namelen);
2246 memcpy(dev_item + 1, name, namelen);
2248 device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2249 btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2250 btrfs_set_device_id(dev_item, device_id);
2251 mark_buffer_dirty(path->nodes[0]);
2253 ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2257 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2258 num_blocks + new_blocks);
2259 i_size_write(root->fs_info->btree_inode,
2260 (num_blocks + new_blocks) <<
2261 root->fs_info->btree_inode->i_blkbits);
2265 ret = btrfs_commit_transaction(trans, dev_root);
2267 mutex_unlock(&root->fs_info->fs_mutex);
2269 btrfs_free_path(path);
2274 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2275 cmd, unsigned long arg)
2277 struct btrfs_root *root = BTRFS_I(inode)->root;
2278 struct btrfs_ioctl_vol_args vol_args;
2280 struct btrfs_dir_item *di;
2282 struct btrfs_path *path;
2286 case BTRFS_IOC_SNAP_CREATE:
2287 if (copy_from_user(&vol_args,
2288 (struct btrfs_ioctl_vol_args __user *)arg,
2291 namelen = strlen(vol_args.name);
2292 if (namelen > BTRFS_VOL_NAME_MAX)
2294 path = btrfs_alloc_path();
2297 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2298 mutex_lock(&root->fs_info->fs_mutex);
2299 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2301 vol_args.name, namelen, 0);
2302 mutex_unlock(&root->fs_info->fs_mutex);
2303 btrfs_free_path(path);
2304 if (di && !IS_ERR(di))
2307 if (root == root->fs_info->tree_root)
2308 ret = create_subvol(root, vol_args.name, namelen);
2310 ret = create_snapshot(root, vol_args.name, namelen);
2313 case BTRFS_IOC_ADD_DISK:
2314 if (copy_from_user(&vol_args,
2315 (struct btrfs_ioctl_vol_args __user *)arg,
2318 namelen = strlen(vol_args.name);
2319 if (namelen > BTRFS_VOL_NAME_MAX)
2321 vol_args.name[namelen] = '\0';
2322 ret = add_disk(root, vol_args.name, namelen);
2330 static struct kmem_cache *btrfs_inode_cachep;
2331 struct kmem_cache *btrfs_trans_handle_cachep;
2332 struct kmem_cache *btrfs_transaction_cachep;
2333 struct kmem_cache *btrfs_bit_radix_cachep;
2334 struct kmem_cache *btrfs_path_cachep;
2337 * Called inside transaction, so use GFP_NOFS
2339 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2341 struct btrfs_inode *ei;
2343 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2346 return &ei->vfs_inode;
2349 static void btrfs_destroy_inode(struct inode *inode)
2351 WARN_ON(!list_empty(&inode->i_dentry));
2352 WARN_ON(inode->i_data.nrpages);
2354 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2357 static void init_once(void * foo, struct kmem_cache * cachep,
2358 unsigned long flags)
2360 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2362 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2363 SLAB_CTOR_CONSTRUCTOR) {
2364 inode_init_once(&ei->vfs_inode);
2368 static int init_inodecache(void)
2370 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2371 sizeof(struct btrfs_inode),
2372 0, (SLAB_RECLAIM_ACCOUNT|
2375 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2376 sizeof(struct btrfs_trans_handle),
2377 0, (SLAB_RECLAIM_ACCOUNT|
2380 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2381 sizeof(struct btrfs_transaction),
2382 0, (SLAB_RECLAIM_ACCOUNT|
2385 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2386 sizeof(struct btrfs_transaction),
2387 0, (SLAB_RECLAIM_ACCOUNT|
2390 btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2392 0, (SLAB_RECLAIM_ACCOUNT|
2394 SLAB_DESTROY_BY_RCU),
2396 if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2397 btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2402 static void destroy_inodecache(void)
2404 kmem_cache_destroy(btrfs_inode_cachep);
2405 kmem_cache_destroy(btrfs_trans_handle_cachep);
2406 kmem_cache_destroy(btrfs_transaction_cachep);
2407 kmem_cache_destroy(btrfs_bit_radix_cachep);
2408 kmem_cache_destroy(btrfs_path_cachep);
2411 static int btrfs_get_sb(struct file_system_type *fs_type,
2412 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2414 return get_sb_bdev(fs_type, flags, dev_name, data,
2415 btrfs_fill_super, mnt);
2419 static int btrfs_getattr(struct vfsmount *mnt,
2420 struct dentry *dentry, struct kstat *stat)
2422 struct inode *inode = dentry->d_inode;
2423 generic_fillattr(inode, stat);
2424 stat->blksize = 256 * 1024;
2428 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2430 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2431 struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2433 buf->f_namelen = BTRFS_NAME_LEN;
2434 buf->f_blocks = btrfs_super_total_blocks(disk_super);
2435 buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2436 buf->f_bavail = buf->f_bfree;
2437 buf->f_bsize = dentry->d_sb->s_blocksize;
2438 buf->f_type = BTRFS_SUPER_MAGIC;
2442 static struct file_system_type btrfs_fs_type = {
2443 .owner = THIS_MODULE,
2445 .get_sb = btrfs_get_sb,
2446 .kill_sb = kill_block_super,
2447 .fs_flags = FS_REQUIRES_DEV,
2450 static struct super_operations btrfs_super_ops = {
2451 .delete_inode = btrfs_delete_inode,
2452 .put_super = btrfs_put_super,
2453 .read_inode = btrfs_read_locked_inode,
2454 .write_super = btrfs_write_super,
2455 .sync_fs = btrfs_sync_fs,
2456 .write_inode = btrfs_write_inode,
2457 .dirty_inode = btrfs_dirty_inode,
2458 .alloc_inode = btrfs_alloc_inode,
2459 .destroy_inode = btrfs_destroy_inode,
2460 .statfs = btrfs_statfs,
2463 static struct inode_operations btrfs_dir_inode_operations = {
2464 .lookup = btrfs_lookup,
2465 .create = btrfs_create,
2466 .unlink = btrfs_unlink,
2467 .mkdir = btrfs_mkdir,
2468 .rmdir = btrfs_rmdir,
2471 static struct inode_operations btrfs_dir_ro_inode_operations = {
2472 .lookup = btrfs_lookup,
2475 static struct file_operations btrfs_dir_file_operations = {
2476 .llseek = generic_file_llseek,
2477 .read = generic_read_dir,
2478 .readdir = btrfs_readdir,
2479 .ioctl = btrfs_ioctl,
2482 static struct address_space_operations btrfs_aops = {
2483 .readpage = btrfs_readpage,
2484 .writepage = btrfs_writepage,
2485 .sync_page = block_sync_page,
2486 .prepare_write = btrfs_prepare_write,
2487 .commit_write = btrfs_commit_write,
2490 static struct inode_operations btrfs_file_inode_operations = {
2491 .truncate = btrfs_truncate,
2492 .getattr = btrfs_getattr,
2495 static struct file_operations btrfs_file_operations = {
2496 .llseek = generic_file_llseek,
2497 .read = do_sync_read,
2498 .aio_read = btrfs_file_aio_read,
2499 .write = btrfs_file_write,
2500 .mmap = generic_file_mmap,
2501 .open = generic_file_open,
2502 .ioctl = btrfs_ioctl,
2503 .fsync = btrfs_sync_file,
2506 static int __init init_btrfs_fs(void)
2509 printk("btrfs loaded!\n");
2510 err = init_inodecache();
2513 kset_set_kset_s(&btrfs_subsys, fs_subsys);
2514 err = subsystem_register(&btrfs_subsys);
2517 return register_filesystem(&btrfs_fs_type);
2519 destroy_inodecache();
2523 static void __exit exit_btrfs_fs(void)
2525 destroy_inodecache();
2526 unregister_filesystem(&btrfs_fs_type);
2527 subsystem_unregister(&btrfs_subsys);
2528 printk("btrfs unloaded\n");
2531 module_init(init_btrfs_fs)
2532 module_exit(exit_btrfs_fs)
2534 MODULE_LICENSE("GPL");