66d9fb2288c302511fbbfdf6529611a2fd53ea09
[linux-2.6-block.git] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include "ctree.h"
15 #include "disk-io.h"
16 #include "transaction.h"
17 #include "btrfs_inode.h"
18
19 void btrfs_fsinfo_release(struct kobject *obj)
20 {
21         struct btrfs_fs_info *fsinfo = container_of(obj,
22                                             struct btrfs_fs_info, kobj);
23         kfree(fsinfo);
24 }
25
26 struct kobj_type btrfs_fsinfo_ktype = {
27         .release = btrfs_fsinfo_release,
28 };
29
30 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
31
32 #define BTRFS_SUPER_MAGIC 0x9123682E
33
34 static struct inode_operations btrfs_dir_inode_operations;
35 static struct inode_operations btrfs_dir_ro_inode_operations;
36 static struct super_operations btrfs_super_ops;
37 static struct file_operations btrfs_dir_file_operations;
38 static struct inode_operations btrfs_file_inode_operations;
39 static struct address_space_operations btrfs_aops;
40 static struct file_operations btrfs_file_operations;
41
42 static int check_inode(struct inode *inode)
43 {
44         struct btrfs_inode *ei = BTRFS_I(inode);
45         WARN_ON(ei->magic != 0xDEADBEEF);
46         WARN_ON(ei->magic2 != 0xDEADBEAF);
47         return 0;
48 }
49
50 static void btrfs_read_locked_inode(struct inode *inode)
51 {
52         struct btrfs_path *path;
53         struct btrfs_inode_item *inode_item;
54         struct btrfs_root *root = BTRFS_I(inode)->root;
55         struct btrfs_key location;
56         int ret;
57
58         path = btrfs_alloc_path();
59         BUG_ON(!path);
60         btrfs_init_path(path);
61         mutex_lock(&root->fs_info->fs_mutex);
62
63         check_inode(inode);
64
65         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
66         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
67         if (ret) {
68                 btrfs_free_path(path);
69                 goto make_bad;
70         }
71         check_inode(inode);
72         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
73                                   path->slots[0],
74                                   struct btrfs_inode_item);
75
76         inode->i_mode = btrfs_inode_mode(inode_item);
77         inode->i_nlink = btrfs_inode_nlink(inode_item);
78         inode->i_uid = btrfs_inode_uid(inode_item);
79         inode->i_gid = btrfs_inode_gid(inode_item);
80         inode->i_size = btrfs_inode_size(inode_item);
81         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
82         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
83         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
84         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
85         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
86         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
87         inode->i_blocks = btrfs_inode_nblocks(inode_item);
88         inode->i_generation = btrfs_inode_generation(inode_item);
89
90         btrfs_free_path(path);
91         inode_item = NULL;
92
93         mutex_unlock(&root->fs_info->fs_mutex);
94         check_inode(inode);
95         switch (inode->i_mode & S_IFMT) {
96 #if 0
97         default:
98                 init_special_inode(inode, inode->i_mode,
99                                    btrfs_inode_rdev(inode_item));
100                 break;
101 #endif
102         case S_IFREG:
103                 inode->i_mapping->a_ops = &btrfs_aops;
104                 inode->i_fop = &btrfs_file_operations;
105                 inode->i_op = &btrfs_file_inode_operations;
106                 break;
107         case S_IFDIR:
108                 inode->i_fop = &btrfs_dir_file_operations;
109                 if (root == root->fs_info->tree_root)
110                         inode->i_op = &btrfs_dir_ro_inode_operations;
111                 else
112                         inode->i_op = &btrfs_dir_inode_operations;
113                 break;
114         case S_IFLNK:
115                 // inode->i_op = &page_symlink_inode_operations;
116                 break;
117         }
118         check_inode(inode);
119         return;
120
121 make_bad:
122         btrfs_release_path(root, path);
123         btrfs_free_path(path);
124         mutex_unlock(&root->fs_info->fs_mutex);
125         make_bad_inode(inode);
126 }
127
128 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
129                               struct btrfs_root *root,
130                               struct inode *dir,
131                               struct dentry *dentry)
132 {
133         struct btrfs_path *path;
134         const char *name = dentry->d_name.name;
135         int name_len = dentry->d_name.len;
136         int ret;
137         u64 objectid;
138         struct btrfs_dir_item *di;
139
140         path = btrfs_alloc_path();
141         BUG_ON(!path);
142         btrfs_init_path(path);
143         ret = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
144                                     name, name_len, -1);
145         if (ret < 0)
146                 goto err;
147         if (ret > 0) {
148                 ret = -ENOENT;
149                 goto err;
150         }
151         di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
152                             struct btrfs_dir_item);
153         objectid = btrfs_disk_key_objectid(&di->location);
154
155         ret = btrfs_del_item(trans, root, path);
156         BUG_ON(ret);
157
158         btrfs_release_path(root, path);
159         ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
160                                           objectid, -1);
161         BUG_ON(ret);
162         ret = btrfs_del_item(trans, root, path);
163         BUG_ON(ret);
164         dentry->d_inode->i_ctime = dir->i_ctime;
165 err:
166         btrfs_release_path(root, path);
167         btrfs_free_path(path);
168         if (ret == 0) {
169                 inode_dec_link_count(dentry->d_inode);
170                 dir->i_size -= name_len * 2;
171                 mark_inode_dirty(dir);
172         }
173         return ret;
174 }
175
176 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
177 {
178         struct btrfs_root *root;
179         struct btrfs_trans_handle *trans;
180         int ret;
181
182         root = BTRFS_I(dir)->root;
183         mutex_lock(&root->fs_info->fs_mutex);
184         trans = btrfs_start_transaction(root, 1);
185         ret = btrfs_unlink_trans(trans, root, dir, dentry);
186         btrfs_end_transaction(trans, root);
187         mutex_unlock(&root->fs_info->fs_mutex);
188         return ret;
189 }
190
191 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
192 {
193         struct inode *inode = dentry->d_inode;
194         int err;
195         int ret;
196         struct btrfs_root *root = BTRFS_I(dir)->root;
197         struct btrfs_path *path;
198         struct btrfs_key key;
199         struct btrfs_trans_handle *trans;
200         struct btrfs_key found_key;
201         int found_type;
202         struct btrfs_leaf *leaf;
203         char *goodnames = "..";
204
205         path = btrfs_alloc_path();
206         BUG_ON(!path);
207         btrfs_init_path(path);
208         mutex_lock(&root->fs_info->fs_mutex);
209         trans = btrfs_start_transaction(root, 1);
210         key.objectid = inode->i_ino;
211         key.offset = (u64)-1;
212         key.flags = (u32)-1;
213         while(1) {
214                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
215                 if (ret < 0) {
216                         err = ret;
217                         goto out;
218                 }
219                 BUG_ON(ret == 0);
220                 if (path->slots[0] == 0) {
221                         err = -ENOENT;
222                         goto out;
223                 }
224                 path->slots[0]--;
225                 leaf = btrfs_buffer_leaf(path->nodes[0]);
226                 btrfs_disk_key_to_cpu(&found_key,
227                                       &leaf->items[path->slots[0]].key);
228                 found_type = btrfs_key_type(&found_key);
229                 if (found_key.objectid != inode->i_ino) {
230                         err = -ENOENT;
231                         goto out;
232                 }
233                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
234                      found_type != BTRFS_DIR_INDEX_KEY) ||
235                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
236                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
237                         err = -ENOTEMPTY;
238                         goto out;
239                 }
240                 ret = btrfs_del_item(trans, root, path);
241                 BUG_ON(ret);
242
243                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
244                         break;
245                 btrfs_release_path(root, path);
246         }
247         ret = 0;
248         btrfs_release_path(root, path);
249
250         /* now the directory is empty */
251         err = btrfs_unlink_trans(trans, root, dir, dentry);
252         if (!err) {
253                 inode->i_size = 0;
254         }
255 out:
256         btrfs_release_path(root, path);
257         btrfs_free_path(path);
258         mutex_unlock(&root->fs_info->fs_mutex);
259         ret = btrfs_end_transaction(trans, root);
260         if (ret && !err)
261                 err = ret;
262         return err;
263 }
264
265 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
266                             struct btrfs_root *root,
267                             struct inode *inode)
268 {
269         u64 objectid = inode->i_ino;
270         struct btrfs_path *path;
271         struct btrfs_inode_map_item *map;
272         struct btrfs_key stat_data_key;
273         int ret;
274
275         clear_inode(inode);
276
277         path = btrfs_alloc_path();
278         BUG_ON(!path);
279         btrfs_init_path(path);
280         ret = btrfs_lookup_inode_map(trans, root, path, objectid, -1);
281         if (ret) {
282                 if (ret > 0)
283                         ret = -ENOENT;
284                 goto error;
285         }
286         map = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
287                             struct btrfs_inode_map_item);
288         btrfs_disk_key_to_cpu(&stat_data_key, &map->key);
289         ret = btrfs_del_item(trans, root->fs_info->inode_root, path);
290         BUG_ON(ret);
291         btrfs_release_path(root, path);
292
293         ret = btrfs_lookup_inode(trans, root, path,
294                                  &BTRFS_I(inode)->location, -1);
295         BUG_ON(ret);
296         ret = btrfs_del_item(trans, root, path);
297         BUG_ON(ret);
298 error:
299         btrfs_release_path(root, path);
300         btrfs_free_path(path);
301         return ret;
302 }
303
304 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
305                                    struct btrfs_root *root,
306                                    struct inode *inode)
307 {
308         int ret;
309         struct btrfs_path *path;
310         struct btrfs_key key;
311         struct btrfs_disk_key *found_key;
312         struct btrfs_leaf *leaf;
313         struct btrfs_file_extent_item *fi = NULL;
314         u64 extent_start = 0;
315         u64 extent_num_blocks = 0;
316         int found_extent;
317
318         path = btrfs_alloc_path();
319         BUG_ON(!path);
320         /* FIXME, add redo link to tree so we don't leak on crash */
321         key.objectid = inode->i_ino;
322         key.offset = (u64)-1;
323         key.flags = 0;
324         /*
325          * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
326          * or extent data
327          */
328         btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
329         while(1) {
330                 btrfs_init_path(path);
331                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
332                 if (ret < 0) {
333                         goto error;
334                 }
335                 if (ret > 0) {
336                         BUG_ON(path->slots[0] == 0);
337                         path->slots[0]--;
338                 }
339                 leaf = btrfs_buffer_leaf(path->nodes[0]);
340                 found_key = &leaf->items[path->slots[0]].key;
341                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
342                         break;
343                 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
344                     btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY &&
345                     btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
346                         break;
347                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
348                         break;
349                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
350                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
351                                             path->slots[0],
352                                             struct btrfs_file_extent_item);
353                         extent_start = btrfs_file_extent_disk_blocknr(fi);
354                         extent_num_blocks =
355                                 btrfs_file_extent_disk_num_blocks(fi);
356                         inode->i_blocks -=
357                                 btrfs_file_extent_num_blocks(fi) >> 9;
358                         found_extent = 1;
359                 } else {
360                         found_extent = 0;
361                 }
362                 ret = btrfs_del_item(trans, root, path);
363                 BUG_ON(ret);
364                 btrfs_release_path(root, path);
365                 if (found_extent) {
366                         ret = btrfs_free_extent(trans, root, extent_start,
367                                                 extent_num_blocks, 0);
368                         BUG_ON(ret);
369                 }
370         }
371         ret = 0;
372 error:
373         btrfs_release_path(root, path);
374         btrfs_free_path(path);
375         return ret;
376 }
377
378 static void btrfs_delete_inode(struct inode *inode)
379 {
380         struct btrfs_trans_handle *trans;
381         struct btrfs_root *root = BTRFS_I(inode)->root;
382         int ret;
383
384         truncate_inode_pages(&inode->i_data, 0);
385         if (is_bad_inode(inode)) {
386                 goto no_delete;
387         }
388         inode->i_size = 0;
389         mutex_lock(&root->fs_info->fs_mutex);
390         trans = btrfs_start_transaction(root, 1);
391         if (S_ISREG(inode->i_mode)) {
392                 ret = btrfs_truncate_in_trans(trans, root, inode);
393                 BUG_ON(ret);
394         }
395         btrfs_free_inode(trans, root, inode);
396         btrfs_end_transaction(trans, root);
397         mutex_unlock(&root->fs_info->fs_mutex);
398         return;
399 no_delete:
400         clear_inode(inode);
401 }
402
403 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
404                                struct btrfs_key *location)
405 {
406         const char *name = dentry->d_name.name;
407         int namelen = dentry->d_name.len;
408         struct btrfs_dir_item *di;
409         struct btrfs_path *path;
410         struct btrfs_root *root = BTRFS_I(dir)->root;
411         int ret;
412
413         path = btrfs_alloc_path();
414         BUG_ON(!path);
415         btrfs_init_path(path);
416         ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
417                                     namelen, 0);
418         if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) {
419                 location->objectid = 0;
420                 ret = 0;
421                 goto out;
422         }
423         di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
424                             struct btrfs_dir_item);
425         btrfs_disk_key_to_cpu(location, &di->location);
426 out:
427         btrfs_release_path(root, path);
428         btrfs_free_path(path);
429         check_inode(dir);
430         return ret;
431 }
432
433 int fixup_tree_root_location(struct btrfs_root *root,
434                              struct btrfs_key *location,
435                              struct btrfs_root **sub_root)
436 {
437         struct btrfs_path *path;
438         struct btrfs_root_item *ri;
439         int ret;
440
441         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
442                 return 0;
443         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
444                 return 0;
445
446         path = btrfs_alloc_path();
447         BUG_ON(!path);
448         mutex_lock(&root->fs_info->fs_mutex);
449
450         ret = btrfs_lookup_inode(NULL, root, path, location, 0);
451         if (ret)
452                 goto out;
453         ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
454                           path->slots[0],
455                           struct btrfs_root_item);
456         location->objectid = btrfs_root_dirid(ri);
457         location->flags = 0;
458         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
459         location->offset = 0;
460         /* FIXME properly select the root */
461         *sub_root = root->fs_info->fs_root;
462 out:
463         btrfs_free_path(path);
464         mutex_unlock(&root->fs_info->fs_mutex);
465         return ret;
466 }
467
468
469 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
470                                    struct nameidata *nd)
471 {
472         struct inode * inode;
473         struct btrfs_inode *bi = BTRFS_I(dir);
474         struct btrfs_root *root = bi->root;
475         struct btrfs_root *sub_root = root;
476         struct btrfs_key location;
477         int ret;
478
479         if (dentry->d_name.len > BTRFS_NAME_LEN)
480                 return ERR_PTR(-ENAMETOOLONG);
481         mutex_lock(&root->fs_info->fs_mutex);
482         ret = btrfs_inode_by_name(dir, dentry, &location);
483         mutex_unlock(&root->fs_info->fs_mutex);
484         if (ret < 0)
485                 return ERR_PTR(ret);
486         inode = NULL;
487         if (location.objectid) {
488                 ret = fixup_tree_root_location(root, &location, &sub_root);
489                 if (ret < 0)
490                         return ERR_PTR(ret);
491                 if (ret > 0)
492                         return ERR_PTR(-ENOENT);
493                 inode = iget_locked(dir->i_sb, location.objectid);
494                 if (!inode)
495                         return ERR_PTR(-EACCES);
496                 if (inode->i_state & I_NEW) {
497                         BTRFS_I(inode)->root = sub_root;
498                         memcpy(&BTRFS_I(inode)->location, &location,
499                                sizeof(location));
500                         btrfs_read_locked_inode(inode);
501                         unlock_new_inode(inode);
502                 }
503                 check_inode(inode);
504         }
505         check_inode(dir);
506         return d_splice_alias(inode, dentry);
507 }
508
509 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
510 {
511         struct inode *inode = filp->f_path.dentry->d_inode;
512         struct btrfs_root *root = BTRFS_I(inode)->root;
513         struct btrfs_item *item;
514         struct btrfs_dir_item *di;
515         struct btrfs_key key;
516         struct btrfs_path *path;
517         int ret;
518         u32 nritems;
519         struct btrfs_leaf *leaf;
520         int slot;
521         int advance;
522         unsigned char d_type = DT_UNKNOWN;
523         int over = 0;
524         int key_type = BTRFS_DIR_INDEX_KEY;
525
526         /* FIXME, use a real flag for deciding about the key type */
527         if (root->fs_info->tree_root == root)
528                 key_type = BTRFS_DIR_ITEM_KEY;
529
530         mutex_lock(&root->fs_info->fs_mutex);
531         key.objectid = inode->i_ino;
532         key.flags = 0;
533         btrfs_set_key_type(&key, key_type);
534         key.offset = filp->f_pos;
535         path = btrfs_alloc_path();
536         btrfs_init_path(path);
537         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
538         if (ret < 0) {
539                 goto err;
540         }
541         advance = 0;
542         while(1) {
543                 leaf = btrfs_buffer_leaf(path->nodes[0]);
544                 nritems = btrfs_header_nritems(&leaf->header);
545                 slot = path->slots[0];
546                 if (advance || slot >= nritems) {
547                         if (slot >= nritems -1) {
548                                 ret = btrfs_next_leaf(root, path);
549                                 if (ret)
550                                         break;
551                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
552                                 nritems = btrfs_header_nritems(&leaf->header);
553                                 slot = path->slots[0];
554                         } else {
555                                 slot++;
556                                 path->slots[0]++;
557                         }
558                 }
559                 advance = 1;
560                 item = leaf->items + slot;
561                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
562                         break;
563                 if (key_type == BTRFS_DIR_INDEX_KEY &&
564                     btrfs_disk_key_offset(&item->key) >
565                     root->fs_info->highest_inode)
566                         break;
567                 if (btrfs_disk_key_type(&item->key) != key_type)
568                         continue;
569                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
570                         continue;
571                 filp->f_pos = btrfs_disk_key_offset(&item->key);
572                 advance = 1;
573                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
574                 over = filldir(dirent, (const char *)(di + 1),
575                                btrfs_dir_name_len(di),
576                                btrfs_disk_key_offset(&item->key),
577                                btrfs_disk_key_objectid(&di->location), d_type);
578                 if (over)
579                         goto nopos;
580         }
581         filp->f_pos++;
582 nopos:
583         ret = 0;
584 err:
585         btrfs_release_path(root, path);
586         btrfs_free_path(path);
587         mutex_unlock(&root->fs_info->fs_mutex);
588         return ret;
589 }
590
591 static void btrfs_put_super (struct super_block * sb)
592 {
593         struct btrfs_root *root = btrfs_sb(sb);
594         int ret;
595
596         ret = close_ctree(root);
597         if (ret) {
598                 printk("close ctree returns %d\n", ret);
599         }
600         sb->s_fs_info = NULL;
601 }
602
603 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
604 {
605         struct inode * inode;
606         struct dentry * root_dentry;
607         struct btrfs_super_block *disk_super;
608         struct btrfs_root *root;
609         struct btrfs_inode *bi;
610
611         sb->s_maxbytes = MAX_LFS_FILESIZE;
612         sb->s_magic = BTRFS_SUPER_MAGIC;
613         sb->s_op = &btrfs_super_ops;
614         sb->s_time_gran = 1;
615
616         root = open_ctree(sb);
617
618         if (!root) {
619                 printk("btrfs: open_ctree failed\n");
620                 return -EIO;
621         }
622         sb->s_fs_info = root;
623         disk_super = root->fs_info->disk_super;
624         printk("read in super total blocks %Lu root %Lu\n",
625                btrfs_super_total_blocks(disk_super),
626                btrfs_super_root_dir(disk_super));
627
628         inode = iget_locked(sb, btrfs_super_root_dir(disk_super));
629         bi = BTRFS_I(inode);
630         bi->location.objectid = inode->i_ino;
631         bi->location.offset = 0;
632         bi->location.flags = 0;
633         bi->root = root->fs_info->tree_root;
634         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
635
636         if (!inode)
637                 return -ENOMEM;
638         if (inode->i_state & I_NEW) {
639                 btrfs_read_locked_inode(inode);
640                 unlock_new_inode(inode);
641         }
642
643         root_dentry = d_alloc_root(inode);
644         if (!root_dentry) {
645                 iput(inode);
646                 return -ENOMEM;
647         }
648         sb->s_root = root_dentry;
649
650         return 0;
651 }
652
653 static void fill_inode_item(struct btrfs_inode_item *item,
654                             struct inode *inode)
655 {
656         btrfs_set_inode_uid(item, inode->i_uid);
657         btrfs_set_inode_gid(item, inode->i_gid);
658         btrfs_set_inode_size(item, inode->i_size);
659         btrfs_set_inode_mode(item, inode->i_mode);
660         btrfs_set_inode_nlink(item, inode->i_nlink);
661         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
662         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
663         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
664         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
665         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
666         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
667         btrfs_set_inode_nblocks(item, inode->i_blocks);
668         btrfs_set_inode_generation(item, inode->i_generation);
669         check_inode(inode);
670 }
671
672 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
673                               struct btrfs_root *root,
674                               struct inode *inode)
675 {
676         struct btrfs_inode_item *inode_item;
677         struct btrfs_path *path;
678         int ret;
679
680         path = btrfs_alloc_path();
681         BUG_ON(!path);
682         btrfs_init_path(path);
683
684         ret = btrfs_lookup_inode(trans, root, path,
685                                  &BTRFS_I(inode)->location, 1);
686         if (ret) {
687                 if (ret > 0)
688                         ret = -ENOENT;
689                 goto failed;
690         }
691
692         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
693                                   path->slots[0],
694                                   struct btrfs_inode_item);
695
696         fill_inode_item(inode_item, inode);
697         btrfs_mark_buffer_dirty(path->nodes[0]);
698 failed:
699         btrfs_release_path(root, path);
700         btrfs_free_path(path);
701         check_inode(inode);
702         return 0;
703 }
704
705 static int btrfs_write_inode(struct inode *inode, int wait)
706 {
707         struct btrfs_root *root = BTRFS_I(inode)->root;
708         struct btrfs_trans_handle *trans;
709         int ret;
710
711         mutex_lock(&root->fs_info->fs_mutex);
712         trans = btrfs_start_transaction(root, 1);
713         ret = btrfs_update_inode(trans, root, inode);
714         if (wait)
715                 btrfs_commit_transaction(trans, root);
716         else
717                 btrfs_end_transaction(trans, root);
718         mutex_unlock(&root->fs_info->fs_mutex);
719         check_inode(inode);
720         return ret;
721 }
722
723 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
724                                      struct inode *dir, int mode)
725 {
726         struct inode *inode;
727         struct btrfs_inode_item inode_item;
728         struct btrfs_root *root = BTRFS_I(dir)->root;
729         struct btrfs_key *key;
730         int ret;
731         u64 objectid;
732
733         inode = new_inode(dir->i_sb);
734         if (!inode)
735                 return ERR_PTR(-ENOMEM);
736
737         BTRFS_I(inode)->root = BTRFS_I(dir)->root;
738         key = &BTRFS_I(inode)->location;
739         check_inode(inode);
740         ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
741         BUG_ON(ret);
742
743         inode->i_uid = current->fsuid;
744         inode->i_gid = current->fsgid;
745         inode->i_mode = mode;
746         inode->i_ino = objectid;
747         inode->i_blocks = 0;
748         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
749         fill_inode_item(&inode_item, inode);
750
751         key->objectid = objectid;
752         key->flags = 0;
753         key->offset = 0;
754         btrfs_set_key_type(key, BTRFS_INODE_ITEM_KEY);
755         ret = btrfs_insert_inode_map(trans, root, objectid, key);
756         BUG_ON(ret);
757
758         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
759         BUG_ON(ret);
760
761         insert_inode_hash(inode);
762         check_inode(inode);
763         check_inode(dir);
764         return inode;
765 }
766
767 static int btrfs_add_link(struct btrfs_trans_handle *trans,
768                             struct dentry *dentry, struct inode *inode)
769 {
770         int ret;
771         struct btrfs_key key;
772         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
773         key.objectid = inode->i_ino;
774         key.flags = 0;
775         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
776         key.offset = 0;
777
778         ret = btrfs_insert_dir_item(trans, root,
779                                     dentry->d_name.name, dentry->d_name.len,
780                                     dentry->d_parent->d_inode->i_ino,
781                                     &key, 0);
782         if (ret == 0) {
783                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
784                 ret = btrfs_update_inode(trans, root,
785                                          dentry->d_parent->d_inode);
786         }
787         check_inode(inode);
788         check_inode(dentry->d_parent->d_inode);
789         return ret;
790 }
791
792 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
793                             struct dentry *dentry, struct inode *inode)
794 {
795         int err = btrfs_add_link(trans, dentry, inode);
796         if (!err) {
797                 d_instantiate(dentry, inode);
798                 return 0;
799         }
800         if (err > 0)
801                 err = -EEXIST;
802         check_inode(inode);
803         return err;
804 }
805
806 static int btrfs_create(struct inode *dir, struct dentry *dentry,
807                         int mode, struct nameidata *nd)
808 {
809         struct btrfs_trans_handle *trans;
810         struct btrfs_root *root = BTRFS_I(dir)->root;
811         struct inode *inode;
812         int err;
813         int drop_inode = 0;
814
815         mutex_lock(&root->fs_info->fs_mutex);
816         trans = btrfs_start_transaction(root, 1);
817         inode = btrfs_new_inode(trans, dir, mode);
818         err = PTR_ERR(inode);
819         if (IS_ERR(inode))
820                 goto out_unlock;
821         // FIXME mark the inode dirty
822         err = btrfs_add_nondir(trans, dentry, inode);
823         if (err)
824                 drop_inode = 1;
825         else {
826                 inode->i_mapping->a_ops = &btrfs_aops;
827                 inode->i_fop = &btrfs_file_operations;
828                 inode->i_op = &btrfs_file_inode_operations;
829         }
830         dir->i_sb->s_dirt = 1;
831 out_unlock:
832         btrfs_end_transaction(trans, root);
833         mutex_unlock(&root->fs_info->fs_mutex);
834         check_inode(inode);
835         check_inode(dir);
836
837         if (drop_inode) {
838                 inode_dec_link_count(inode);
839                 iput(inode);
840         }
841         return err;
842 }
843
844 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
845                                 struct inode *inode, struct inode *dir)
846 {
847         struct btrfs_root *root = BTRFS_I(dir)->root;
848         int ret;
849         char buf[2];
850         struct btrfs_key key;
851
852         buf[0] = '.';
853         buf[1] = '.';
854
855         key.objectid = inode->i_ino;
856         key.offset = 0;
857         key.flags = 0;
858         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
859
860         ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino,
861                                     &key, 1);
862         if (ret)
863                 goto error;
864         key.objectid = dir->i_ino;
865         ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino,
866                                     &key, 1);
867         if (ret)
868                 goto error;
869         inode->i_size = 6;
870         ret = btrfs_update_inode(trans, root, inode);
871 error:
872         return ret;
873 }
874
875 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
876 {
877         struct inode *inode;
878         struct btrfs_trans_handle *trans;
879         struct btrfs_root *root = BTRFS_I(dir)->root;
880         int err = 0;
881         int drop_on_err = 0;
882
883         mutex_lock(&root->fs_info->fs_mutex);
884         trans = btrfs_start_transaction(root, 1);
885         if (IS_ERR(trans)) {
886                 err = PTR_ERR(trans);
887                 goto out_unlock;
888         }
889         inode = btrfs_new_inode(trans, dir, S_IFDIR | mode);
890         if (IS_ERR(inode)) {
891                 err = PTR_ERR(inode);
892                 goto out_fail;
893         }
894         drop_on_err = 1;
895         inode->i_op = &btrfs_dir_inode_operations;
896         inode->i_fop = &btrfs_dir_file_operations;
897
898         err = btrfs_make_empty_dir(trans, inode, dir);
899         if (err)
900                 goto out_fail;
901         err = btrfs_add_link(trans, dentry, inode);
902         if (err)
903                 goto out_fail;
904         d_instantiate(dentry, inode);
905         drop_on_err = 0;
906
907 out_fail:
908         btrfs_end_transaction(trans, root);
909 out_unlock:
910         mutex_unlock(&root->fs_info->fs_mutex);
911         if (drop_on_err)
912                 iput(inode);
913         return err;
914 }
915
916 static int btrfs_sync_fs(struct super_block *sb, int wait)
917 {
918         struct btrfs_trans_handle *trans;
919         struct btrfs_root *root;
920         int ret;
921         root = btrfs_sb(sb);
922
923         sb->s_dirt = 0;
924         if (!wait) {
925                 filemap_flush(root->fs_info->btree_inode->i_mapping);
926                 return 0;
927         }
928         filemap_write_and_wait(root->fs_info->btree_inode->i_mapping);
929         mutex_lock(&root->fs_info->fs_mutex);
930         trans = btrfs_start_transaction(root, 1);
931         ret = btrfs_commit_transaction(trans, root);
932         sb->s_dirt = 0;
933         BUG_ON(ret);
934 printk("btrfs sync_fs\n");
935         mutex_unlock(&root->fs_info->fs_mutex);
936         return 0;
937 }
938
939 #if 0
940 static int btrfs_get_block_inline(struct inode *inode, sector_t iblock,
941                            struct buffer_head *result, int create)
942 {
943         struct btrfs_root *root = btrfs_sb(inode->i_sb);
944         struct btrfs_path *path;
945         struct btrfs_key key;
946         struct btrfs_leaf *leaf;
947         int num_bytes = result->b_size;
948         int item_size;
949         int ret;
950         u64 pos;
951         char *ptr;
952         int copy_size;
953         int err = 0;
954         char *safe_ptr;
955         char *data_ptr;
956
957         path = btrfs_alloc_path();
958         BUG_ON(!path);
959
960         WARN_ON(create);
961         if (create) {
962                 return 0;
963         }
964         pos = iblock << inode->i_blkbits;
965         key.objectid = inode->i_ino;
966         key.flags = 0;
967         btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY);
968         ptr = kmap(result->b_page);
969         safe_ptr = ptr;
970         ptr += (pos & (PAGE_CACHE_SIZE -1));
971 again:
972         key.offset = pos;
973         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
974         if (ret) {
975                 if (ret < 0)
976                         err = ret;
977                 else
978                         err = 0;
979                 goto out;
980         }
981         leaf = btrfs_buffer_leaf(path->nodes[0]);
982         item_size = btrfs_item_size(leaf->items + path->slots[0]);
983         copy_size = min(num_bytes, item_size);
984         data_ptr = btrfs_item_ptr(leaf, path->slots[0], char);
985         WARN_ON(safe_ptr + PAGE_CACHE_SIZE < ptr + copy_size);
986         memcpy(ptr, data_ptr, copy_size);
987         pos += copy_size;
988         num_bytes -= copy_size;
989         WARN_ON(num_bytes < 0);
990         ptr += copy_size;
991         btrfs_release_path(root, path);
992         if (num_bytes != 0) {
993                 if (pos >= i_size_read(inode))
994                         memset(ptr, 0, num_bytes);
995                 else
996                         goto again;
997         }
998         set_buffer_uptodate(result);
999         map_bh(result, inode->i_sb, 0);
1000         err = 0;
1001 out:
1002         btrfs_free_path(path);
1003         kunmap(result->b_page);
1004         return err;
1005 }
1006 #endif
1007
1008 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1009                            struct buffer_head *result, int create)
1010 {
1011         int ret;
1012         int err = 0;
1013         u64 blocknr;
1014         u64 extent_start = 0;
1015         u64 extent_end = 0;
1016         u64 objectid = inode->i_ino;
1017         struct btrfs_path *path;
1018         struct btrfs_root *root = BTRFS_I(inode)->root;
1019         struct btrfs_trans_handle *trans = NULL;
1020         struct btrfs_file_extent_item *item;
1021         struct btrfs_leaf *leaf;
1022         struct btrfs_disk_key *found_key;
1023
1024         path = btrfs_alloc_path();
1025         BUG_ON(!path);
1026         btrfs_init_path(path);
1027         if (create)
1028                 trans = btrfs_start_transaction(root, 1);
1029
1030         ret = btrfs_lookup_file_extent(trans, root, path,
1031                                        inode->i_ino,
1032                                        iblock << inode->i_blkbits, 0);
1033         if (ret < 0) {
1034                 err = ret;
1035                 goto out;
1036         }
1037
1038         if (ret != 0) {
1039                 if (path->slots[0] == 0) {
1040                         btrfs_release_path(root, path);
1041                         goto allocate;
1042                 }
1043                 path->slots[0]--;
1044         }
1045
1046         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1047                               struct btrfs_file_extent_item);
1048         leaf = btrfs_buffer_leaf(path->nodes[0]);
1049         blocknr = btrfs_file_extent_disk_blocknr(item);
1050         blocknr += btrfs_file_extent_offset(item);
1051
1052         /* exact match found, use it */
1053         if (ret == 0) {
1054                 err = 0;
1055                 map_bh(result, inode->i_sb, blocknr);
1056                 goto out;
1057         }
1058
1059         /* are we inside the extent that was found? */
1060         found_key = &leaf->items[path->slots[0]].key;
1061         if (btrfs_disk_key_objectid(found_key) != objectid ||
1062             btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) {
1063                 extent_end = 0;
1064                 extent_start = 0;
1065                 btrfs_release_path(root, path);
1066                 goto allocate;
1067         }
1068
1069         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1070         extent_start = extent_start >> inode->i_blkbits;
1071         extent_start += btrfs_file_extent_offset(item);
1072         extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1073         if (iblock >= extent_start && iblock < extent_end) {
1074                 err = 0;
1075                 map_bh(result, inode->i_sb, blocknr + iblock - extent_start);
1076                 goto out;
1077         }
1078 allocate:
1079         /* ok, create a new extent */
1080         if (!create) {
1081                 err = 0;
1082                 goto out;
1083         }
1084         ret = btrfs_alloc_file_extent(trans, root, objectid,
1085                                       iblock << inode->i_blkbits,
1086                                       1, extent_end, &blocknr);
1087         if (ret) {
1088                 err = ret;
1089                 goto out;
1090         }
1091         inode->i_blocks += inode->i_sb->s_blocksize >> 9;
1092         set_buffer_new(result);
1093         map_bh(result, inode->i_sb, blocknr);
1094
1095 out:
1096         btrfs_release_path(root, path);
1097         btrfs_free_path(path);
1098         if (trans)
1099                 btrfs_end_transaction(trans, root);
1100         return err;
1101 }
1102
1103 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1104                            struct buffer_head *result, int create)
1105 {
1106         int err;
1107         struct btrfs_root *root = BTRFS_I(inode)->root;
1108         mutex_lock(&root->fs_info->fs_mutex);
1109         err = btrfs_get_block_lock(inode, iblock, result, create);
1110         // err = btrfs_get_block_inline(inode, iblock, result, create);
1111         mutex_unlock(&root->fs_info->fs_mutex);
1112         return err;
1113 }
1114
1115 static int btrfs_prepare_write(struct file *file, struct page *page,
1116                                unsigned from, unsigned to)
1117 {
1118         return nobh_prepare_write(page, from, to, btrfs_get_block);
1119 }
1120 static int btrfs_commit_write(struct file *file, struct page *page,
1121                                unsigned from, unsigned to)
1122 {
1123         return nobh_commit_write(file, page, from, to);
1124 }
1125
1126 static void btrfs_write_super(struct super_block *sb)
1127 {
1128         btrfs_sync_fs(sb, 1);
1129 }
1130
1131 static int btrfs_readpage(struct file *file, struct page *page)
1132 {
1133         return mpage_readpage(page, btrfs_get_block);
1134 }
1135
1136 static int btrfs_readpages(struct file *file, struct address_space *mapping,
1137                            struct list_head *pages, unsigned nr_pages)
1138 {
1139         return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block);
1140 }
1141
1142 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1143 {
1144         return nobh_writepage(page, btrfs_get_block, wbc);
1145 }
1146
1147 static void btrfs_truncate(struct inode *inode)
1148 {
1149         struct btrfs_root *root = BTRFS_I(inode)->root;
1150         int ret;
1151         struct btrfs_trans_handle *trans;
1152
1153         if (!S_ISREG(inode->i_mode))
1154                 return;
1155         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1156                 return;
1157
1158         nobh_truncate_page(inode->i_mapping, inode->i_size);
1159
1160         /* FIXME, add redo link to tree so we don't leak on crash */
1161         mutex_lock(&root->fs_info->fs_mutex);
1162         trans = btrfs_start_transaction(root, 1);
1163         ret = btrfs_truncate_in_trans(trans, root, inode);
1164         BUG_ON(ret);
1165         ret = btrfs_end_transaction(trans, root);
1166         BUG_ON(ret);
1167         mutex_unlock(&root->fs_info->fs_mutex);
1168         mark_inode_dirty(inode);
1169 }
1170
1171 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1172                                 struct page **prepared_pages,
1173                                 const char __user * buf)
1174 {
1175         long page_fault = 0;
1176         int i;
1177         int offset = pos & (PAGE_CACHE_SIZE - 1);
1178
1179         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1180                 size_t count = min_t(size_t,
1181                                      PAGE_CACHE_SIZE - offset, write_bytes);
1182                 struct page *page = prepared_pages[i];
1183                 fault_in_pages_readable(buf, count);
1184
1185                 /* Copy data from userspace to the current page */
1186                 kmap(page);
1187                 page_fault = __copy_from_user(page_address(page) + offset,
1188                                               buf, count);
1189                 /* Flush processor's dcache for this page */
1190                 flush_dcache_page(page);
1191                 kunmap(page);
1192                 buf += count;
1193                 write_bytes -= count;
1194
1195                 if (page_fault)
1196                         break;
1197         }
1198         return page_fault ? -EFAULT : 0;
1199 }
1200
1201 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1202 {
1203         size_t i;
1204         for (i = 0; i < num_pages; i++) {
1205                 if (!pages[i])
1206                         break;
1207                 unlock_page(pages[i]);
1208                 mark_page_accessed(pages[i]);
1209                 page_cache_release(pages[i]);
1210         }
1211 }
1212 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1213                                    struct btrfs_root *root,
1214                                    struct file *file,
1215                                    struct page **pages,
1216                                    size_t num_pages,
1217                                    loff_t pos,
1218                                    size_t write_bytes)
1219 {
1220         int i;
1221         int offset;
1222         int err = 0;
1223         int ret;
1224         int this_write;
1225         struct inode *inode = file->f_path.dentry->d_inode;
1226
1227         for (i = 0; i < num_pages; i++) {
1228                 offset = pos & (PAGE_CACHE_SIZE -1);
1229                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1230                 /* FIXME, one block at a time */
1231
1232                 mutex_lock(&root->fs_info->fs_mutex);
1233                 trans = btrfs_start_transaction(root, 1);
1234                 btrfs_csum_file_block(trans, root, inode->i_ino,
1235                                       pages[i]->index << PAGE_CACHE_SHIFT,
1236                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1237                 kunmap(pages[i]);
1238                 SetPageChecked(pages[i]);
1239                 ret = btrfs_end_transaction(trans, root);
1240                 BUG_ON(ret);
1241                 mutex_unlock(&root->fs_info->fs_mutex);
1242
1243                 ret = nobh_commit_write(file, pages[i], offset,
1244                                          offset + this_write);
1245                 pos += this_write;
1246                 if (ret) {
1247                         err = ret;
1248                         goto failed;
1249                 }
1250                 WARN_ON(this_write > write_bytes);
1251                 write_bytes -= this_write;
1252         }
1253 failed:
1254         return err;
1255 }
1256
1257 static int prepare_pages(struct btrfs_trans_handle *trans,
1258                          struct btrfs_root *root,
1259                          struct file *file,
1260                          struct page **pages,
1261                          size_t num_pages,
1262                          loff_t pos,
1263                          size_t write_bytes)
1264 {
1265         int i;
1266         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1267         struct inode *inode = file->f_path.dentry->d_inode;
1268         int offset;
1269         int err = 0;
1270         int ret;
1271         int this_write;
1272         loff_t isize = i_size_read(inode);
1273
1274         memset(pages, 0, num_pages * sizeof(struct page *));
1275
1276         for (i = 0; i < num_pages; i++) {
1277                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1278                 if (!pages[i]) {
1279                         err = -ENOMEM;
1280                         goto failed_release;
1281                 }
1282                 offset = pos & (PAGE_CACHE_SIZE -1);
1283                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1284                 ret = nobh_prepare_write(pages[i], offset,
1285                                          offset + this_write,
1286                                          btrfs_get_block);
1287                 pos += this_write;
1288                 if (ret) {
1289                         err = ret;
1290                         goto failed_truncate;
1291                 }
1292                 WARN_ON(this_write > write_bytes);
1293                 write_bytes -= this_write;
1294         }
1295         return 0;
1296
1297 failed_release:
1298         btrfs_drop_pages(pages, num_pages);
1299         return err;
1300
1301 failed_truncate:
1302         btrfs_drop_pages(pages, num_pages);
1303         if (pos > isize)
1304                 vmtruncate(inode, isize);
1305         return err;
1306 }
1307
1308 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1309                                 size_t count, loff_t *ppos)
1310 {
1311         loff_t pos;
1312         size_t num_written = 0;
1313         int err = 0;
1314         int ret = 0;
1315         struct inode *inode = file->f_path.dentry->d_inode;
1316         struct btrfs_root *root = BTRFS_I(inode)->root;
1317         struct page *pages[1];
1318
1319         if (file->f_flags & O_DIRECT)
1320                 return -EINVAL;
1321         pos = *ppos;
1322
1323         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1324         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1325         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1326         if (err)
1327                 goto out;
1328         if (count == 0)
1329                 goto out;
1330         err = remove_suid(file->f_path.dentry);
1331         if (err)
1332                 goto out;
1333         file_update_time(file);
1334         mutex_lock(&inode->i_mutex);
1335         while(count > 0) {
1336                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1337                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1338                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1339                                         PAGE_CACHE_SHIFT;
1340                 ret = prepare_pages(NULL, root, file, pages, num_pages,
1341                                     pos, write_bytes);
1342                 BUG_ON(ret);
1343                 ret = btrfs_copy_from_user(pos, num_pages,
1344                                            write_bytes, pages, buf);
1345                 BUG_ON(ret);
1346
1347                 ret = dirty_and_release_pages(NULL, root, file, pages,
1348                                               num_pages, pos, write_bytes);
1349                 BUG_ON(ret);
1350                 btrfs_drop_pages(pages, num_pages);
1351
1352                 buf += write_bytes;
1353                 count -= write_bytes;
1354                 pos += write_bytes;
1355                 num_written += write_bytes;
1356
1357                 balance_dirty_pages_ratelimited(inode->i_mapping);
1358                 cond_resched();
1359         }
1360         mutex_unlock(&inode->i_mutex);
1361 out:
1362         *ppos = pos;
1363         current->backing_dev_info = NULL;
1364         return num_written ? num_written : err;
1365 }
1366
1367 #if 0
1368 static ssize_t inline_one_page(struct btrfs_root *root, struct inode *inode,
1369                            struct page *page, loff_t pos,
1370                            size_t offset, size_t write_bytes)
1371 {
1372         struct btrfs_path *path;
1373         struct btrfs_trans_handle *trans;
1374         struct btrfs_key key;
1375         struct btrfs_leaf *leaf;
1376         struct btrfs_key found_key;
1377         int ret;
1378         size_t copy_size = 0;
1379         char *dst = NULL;
1380         int err = 0;
1381         size_t num_written = 0;
1382
1383         path = btrfs_alloc_path();
1384         BUG_ON(!path);
1385         mutex_lock(&root->fs_info->fs_mutex);
1386         trans = btrfs_start_transaction(root, 1);
1387         key.objectid = inode->i_ino;
1388         key.flags = 0;
1389         btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY);
1390
1391 again:
1392         key.offset = pos;
1393         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1394         if (ret < 0) {
1395                 err = ret;
1396                 goto out;
1397         }
1398         if (ret == 0) {
1399                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1400                 btrfs_disk_key_to_cpu(&found_key,
1401                                       &leaf->items[path->slots[0]].key);
1402                 copy_size = btrfs_item_size(leaf->items + path->slots[0]);
1403                 dst = btrfs_item_ptr(leaf, path->slots[0], char);
1404                 copy_size = min(write_bytes, copy_size);
1405                 goto copyit;
1406         } else {
1407                 int slot = path->slots[0];
1408                 if (slot > 0) {
1409                         slot--;
1410                 }
1411                 // FIXME find max key
1412                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1413                 btrfs_disk_key_to_cpu(&found_key,
1414                                       &leaf->items[slot].key);
1415                 if (found_key.objectid != inode->i_ino)
1416                         goto insert;
1417                 if (btrfs_key_type(&found_key) != BTRFS_INLINE_DATA_KEY)
1418                         goto insert;
1419                 copy_size = btrfs_item_size(leaf->items + slot);
1420                 if (found_key.offset + copy_size <= pos)
1421                         goto insert;
1422                 dst = btrfs_item_ptr(leaf, path->slots[0], char);
1423                 dst += pos - found_key.offset;
1424                 copy_size = copy_size - (pos - found_key.offset);
1425                 BUG_ON(copy_size < 0);
1426                 copy_size = min(write_bytes, copy_size);
1427                 WARN_ON(copy_size == 0);
1428                 goto copyit;
1429         }
1430 insert:
1431         btrfs_release_path(root, path);
1432         copy_size = min(write_bytes,
1433                         (size_t)BTRFS_LEAF_DATA_SIZE(root) -
1434                         sizeof(struct btrfs_item) * 4);
1435         ret = btrfs_insert_empty_item(trans, root, path, &key, copy_size);
1436         BUG_ON(ret);
1437         dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1438                              path->slots[0], char);
1439 copyit:
1440         WARN_ON(copy_size == 0);
1441         WARN_ON(dst + copy_size >
1442                 btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1443                                                  path->slots[0], char) +
1444                 btrfs_item_size(btrfs_buffer_leaf(path->nodes[0])->items +
1445                                                   path->slots[0]));
1446         btrfs_memcpy(root, path->nodes[0]->b_data, dst,
1447                      page_address(page) + offset, copy_size);
1448         mark_buffer_dirty(path->nodes[0]);
1449         btrfs_release_path(root, path);
1450         pos += copy_size;
1451         offset += copy_size;
1452         num_written += copy_size;
1453         write_bytes -= copy_size;
1454         if (write_bytes)
1455                 goto again;
1456 out:
1457         btrfs_free_path(path);
1458         ret = btrfs_end_transaction(trans, root);
1459         BUG_ON(ret);
1460         mutex_unlock(&root->fs_info->fs_mutex);
1461         return num_written ? num_written : err;
1462 }
1463
1464 static ssize_t btrfs_file_inline_write(struct file *file,
1465                                        const char __user *buf,
1466                                        size_t count, loff_t *ppos)
1467 {
1468         loff_t pos;
1469         size_t num_written = 0;
1470         int err = 0;
1471         int ret = 0;
1472         struct inode *inode = file->f_path.dentry->d_inode;
1473         struct btrfs_root *root = BTRFS_I(inode)->root;
1474         unsigned long page_index;
1475
1476         if (file->f_flags & O_DIRECT)
1477                 return -EINVAL;
1478         pos = *ppos;
1479
1480         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1481         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1482         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1483         if (err)
1484                 goto out;
1485         if (count == 0)
1486                 goto out;
1487         err = remove_suid(file->f_path.dentry);
1488         if (err)
1489                 goto out;
1490         file_update_time(file);
1491         mutex_lock(&inode->i_mutex);
1492         while(count > 0) {
1493                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1494                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1495                 struct page *page;
1496
1497                 page_index = pos >> PAGE_CACHE_SHIFT;
1498                 page = grab_cache_page(inode->i_mapping, page_index);
1499                 if (!PageUptodate(page)) {
1500                         ret = mpage_readpage(page, btrfs_get_block);
1501                         BUG_ON(ret);
1502                         lock_page(page);
1503                 }
1504                 ret = btrfs_copy_from_user(pos, 1,
1505                                            write_bytes, &page, buf);
1506                 BUG_ON(ret);
1507                 write_bytes = inline_one_page(root, inode, page, pos,
1508                                       offset, write_bytes);
1509                 SetPageUptodate(page);
1510                 if (write_bytes > 0 && pos + write_bytes > inode->i_size) {
1511                         i_size_write(inode, pos + write_bytes);
1512                         mark_inode_dirty(inode);
1513                 }
1514                 page_cache_release(page);
1515                 unlock_page(page);
1516                 if (write_bytes < 0)
1517                         goto out_unlock;
1518                 buf += write_bytes;
1519                 count -= write_bytes;
1520                 pos += write_bytes;
1521                 num_written += write_bytes;
1522
1523                 balance_dirty_pages_ratelimited(inode->i_mapping);
1524                 cond_resched();
1525         }
1526 out_unlock:
1527         mutex_unlock(&inode->i_mutex);
1528 out:
1529         *ppos = pos;
1530         current->backing_dev_info = NULL;
1531         return num_written ? num_written : err;
1532 }
1533 #endif
1534
1535 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1536                         unsigned long offset, unsigned long size)
1537 {
1538         char *kaddr;
1539         unsigned long left, count = desc->count;
1540         struct inode *inode = page->mapping->host;
1541
1542         if (size > count)
1543                 size = count;
1544
1545         if (!PageChecked(page)) {
1546                 /* FIXME, do it per block */
1547                 struct btrfs_root *root = BTRFS_I(inode)->root;
1548                 int ret = btrfs_csum_verify_file_block(root,
1549                                           page->mapping->host->i_ino,
1550                                           page->index << PAGE_CACHE_SHIFT,
1551                                           kmap(page), PAGE_CACHE_SIZE);
1552                 if (ret) {
1553                         printk("failed to verify ino %lu page %lu\n",
1554                                page->mapping->host->i_ino,
1555                                page->index);
1556                         memset(page_address(page), 0, PAGE_CACHE_SIZE);
1557                 }
1558                 SetPageChecked(page);
1559                 kunmap(page);
1560         }
1561         /*
1562          * Faults on the destination of a read are common, so do it before
1563          * taking the kmap.
1564          */
1565         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1566                 kaddr = kmap_atomic(page, KM_USER0);
1567                 left = __copy_to_user_inatomic(desc->arg.buf,
1568                                                 kaddr + offset, size);
1569                 kunmap_atomic(kaddr, KM_USER0);
1570                 if (left == 0)
1571                         goto success;
1572         }
1573
1574         /* Do it the slow way */
1575         kaddr = kmap(page);
1576         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1577         kunmap(page);
1578
1579         if (left) {
1580                 size -= left;
1581                 desc->error = -EFAULT;
1582         }
1583 success:
1584         desc->count = count - size;
1585         desc->written += size;
1586         desc->arg.buf += size;
1587         return size;
1588 }
1589
1590 /**
1591  * btrfs_file_aio_read - filesystem read routine
1592  * @iocb:       kernel I/O control block
1593  * @iov:        io vector request
1594  * @nr_segs:    number of segments in the iovec
1595  * @pos:        current file position
1596  */
1597 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1598                                    unsigned long nr_segs, loff_t pos)
1599 {
1600         struct file *filp = iocb->ki_filp;
1601         ssize_t retval;
1602         unsigned long seg;
1603         size_t count;
1604         loff_t *ppos = &iocb->ki_pos;
1605
1606         count = 0;
1607         for (seg = 0; seg < nr_segs; seg++) {
1608                 const struct iovec *iv = &iov[seg];
1609
1610                 /*
1611                  * If any segment has a negative length, or the cumulative
1612                  * length ever wraps negative then return -EINVAL.
1613                  */
1614                 count += iv->iov_len;
1615                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
1616                         return -EINVAL;
1617                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
1618                         continue;
1619                 if (seg == 0)
1620                         return -EFAULT;
1621                 nr_segs = seg;
1622                 count -= iv->iov_len;   /* This segment is no good */
1623                 break;
1624         }
1625         retval = 0;
1626         if (count) {
1627                 for (seg = 0; seg < nr_segs; seg++) {
1628                         read_descriptor_t desc;
1629
1630                         desc.written = 0;
1631                         desc.arg.buf = iov[seg].iov_base;
1632                         desc.count = iov[seg].iov_len;
1633                         if (desc.count == 0)
1634                                 continue;
1635                         desc.error = 0;
1636                         do_generic_file_read(filp, ppos, &desc,
1637                                              btrfs_read_actor);
1638                         retval += desc.written;
1639                         if (desc.error) {
1640                                 retval = retval ?: desc.error;
1641                                 break;
1642                         }
1643                 }
1644         }
1645         return retval;
1646 }
1647
1648 static struct kmem_cache *btrfs_inode_cachep;
1649 struct kmem_cache *btrfs_trans_handle_cachep;
1650 struct kmem_cache *btrfs_transaction_cachep;
1651 struct kmem_cache *btrfs_bit_radix_cachep;
1652 struct kmem_cache *btrfs_path_cachep;
1653
1654 /*
1655  * Called inside transaction, so use GFP_NOFS
1656  */
1657 static struct inode *btrfs_alloc_inode(struct super_block *sb)
1658 {
1659         struct btrfs_inode *ei;
1660
1661         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
1662         if (!ei)
1663                 return NULL;
1664         ei->magic = 0xDEADBEEF;
1665         ei->magic2 = 0xDEADBEAF;
1666         return &ei->vfs_inode;
1667 }
1668
1669 static void btrfs_destroy_inode(struct inode *inode)
1670 {
1671         struct btrfs_inode *ei = BTRFS_I(inode);
1672         WARN_ON(ei->magic != 0xDEADBEEF);
1673         WARN_ON(ei->magic2 != 0xDEADBEAF);
1674         WARN_ON(!list_empty(&inode->i_dentry));
1675         WARN_ON(inode->i_data.nrpages);
1676
1677         ei->magic = 0;
1678         ei->magic2 = 0;
1679         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
1680 }
1681
1682 static void init_once(void * foo, struct kmem_cache * cachep,
1683                       unsigned long flags)
1684 {
1685         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
1686
1687         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1688             SLAB_CTOR_CONSTRUCTOR) {
1689                 inode_init_once(&ei->vfs_inode);
1690         }
1691 }
1692
1693 static int init_inodecache(void)
1694 {
1695         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
1696                                              sizeof(struct btrfs_inode),
1697                                              0, (SLAB_RECLAIM_ACCOUNT|
1698                                                 SLAB_MEM_SPREAD),
1699                                              init_once, NULL);
1700         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
1701                                              sizeof(struct btrfs_trans_handle),
1702                                              0, (SLAB_RECLAIM_ACCOUNT|
1703                                                 SLAB_MEM_SPREAD),
1704                                              NULL, NULL);
1705         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
1706                                              sizeof(struct btrfs_transaction),
1707                                              0, (SLAB_RECLAIM_ACCOUNT|
1708                                                 SLAB_MEM_SPREAD),
1709                                              NULL, NULL);
1710         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
1711                                              sizeof(struct btrfs_transaction),
1712                                              0, (SLAB_RECLAIM_ACCOUNT|
1713                                                 SLAB_MEM_SPREAD),
1714                                              NULL, NULL);
1715         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
1716                                              256,
1717                                              0, (SLAB_RECLAIM_ACCOUNT|
1718                                                 SLAB_MEM_SPREAD |
1719                                                 SLAB_DESTROY_BY_RCU),
1720                                              NULL, NULL);
1721         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
1722             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
1723                 return -ENOMEM;
1724         return 0;
1725 }
1726
1727 static void destroy_inodecache(void)
1728 {
1729         kmem_cache_destroy(btrfs_inode_cachep);
1730         kmem_cache_destroy(btrfs_trans_handle_cachep);
1731         kmem_cache_destroy(btrfs_transaction_cachep);
1732         kmem_cache_destroy(btrfs_bit_radix_cachep);
1733         kmem_cache_destroy(btrfs_path_cachep);
1734 }
1735
1736 static int btrfs_get_sb(struct file_system_type *fs_type,
1737         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1738 {
1739         return get_sb_bdev(fs_type, flags, dev_name, data,
1740                            btrfs_fill_super, mnt);
1741 }
1742
1743 static struct file_system_type btrfs_fs_type = {
1744         .owner          = THIS_MODULE,
1745         .name           = "btrfs",
1746         .get_sb         = btrfs_get_sb,
1747         .kill_sb        = kill_block_super,
1748         .fs_flags       = FS_REQUIRES_DEV,
1749 };
1750
1751 static struct super_operations btrfs_super_ops = {
1752         .statfs         = simple_statfs,
1753         .delete_inode   = btrfs_delete_inode,
1754         .put_super      = btrfs_put_super,
1755         .read_inode     = btrfs_read_locked_inode,
1756         .write_super    = btrfs_write_super,
1757         .sync_fs        = btrfs_sync_fs,
1758         .write_inode    = btrfs_write_inode,
1759         .alloc_inode    = btrfs_alloc_inode,
1760         .destroy_inode  = btrfs_destroy_inode,
1761 };
1762
1763 static struct inode_operations btrfs_dir_inode_operations = {
1764         .lookup         = btrfs_lookup,
1765         .create         = btrfs_create,
1766         .unlink         = btrfs_unlink,
1767         .mkdir          = btrfs_mkdir,
1768         .rmdir          = btrfs_rmdir,
1769 };
1770
1771 static struct inode_operations btrfs_dir_ro_inode_operations = {
1772         .lookup         = btrfs_lookup,
1773 };
1774
1775 static struct file_operations btrfs_dir_file_operations = {
1776         .llseek         = generic_file_llseek,
1777         .read           = generic_read_dir,
1778         .readdir        = btrfs_readdir,
1779 };
1780
1781 static struct address_space_operations btrfs_aops = {
1782         .readpage       = btrfs_readpage,
1783         .readpages      = btrfs_readpages,
1784         .writepage      = btrfs_writepage,
1785         .sync_page      = block_sync_page,
1786         .prepare_write  = btrfs_prepare_write,
1787         .commit_write   = btrfs_commit_write,
1788 };
1789
1790 static struct inode_operations btrfs_file_inode_operations = {
1791         .truncate       = btrfs_truncate,
1792 };
1793
1794 static struct file_operations btrfs_file_operations = {
1795         .llseek         = generic_file_llseek,
1796         .read           = do_sync_read,
1797         .aio_read       = btrfs_file_aio_read,
1798         .write          = btrfs_file_write,
1799         .mmap           = generic_file_mmap,
1800         .open           = generic_file_open,
1801 };
1802
1803 static int __init init_btrfs_fs(void)
1804 {
1805         int err;
1806         printk("btrfs loaded!\n");
1807         err = init_inodecache();
1808         if (err)
1809                 return err;
1810         kset_set_kset_s(&btrfs_subsys, fs_subsys);
1811         err = subsystem_register(&btrfs_subsys);
1812         if (err)
1813                 goto out;
1814         return register_filesystem(&btrfs_fs_type);
1815 out:
1816         destroy_inodecache();
1817         return err;
1818 }
1819
1820 static void __exit exit_btrfs_fs(void)
1821 {
1822         destroy_inodecache();
1823         unregister_filesystem(&btrfs_fs_type);
1824         subsystem_unregister(&btrfs_subsys);
1825         printk("btrfs unloaded\n");
1826 }
1827
1828 module_init(init_btrfs_fs)
1829 module_exit(exit_btrfs_fs)
1830
1831 MODULE_LICENSE("GPL");