Btrfs: verify csums on read
[linux-block.git] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include "ctree.h"
15 #include "disk-io.h"
16 #include "transaction.h"
17
18 #define BTRFS_SUPER_MAGIC 0x9123682E
19
20 static struct inode_operations btrfs_dir_inode_operations;
21 static struct super_operations btrfs_super_ops;
22 static struct file_operations btrfs_dir_file_operations;
23 static struct inode_operations btrfs_file_inode_operations;
24 static struct address_space_operations btrfs_aops;
25 static struct file_operations btrfs_file_operations;
26
27 static void btrfs_read_locked_inode(struct inode *inode)
28 {
29         struct btrfs_path path;
30         struct btrfs_inode_item *inode_item;
31         struct btrfs_root *root = btrfs_sb(inode->i_sb);
32         int ret;
33
34         btrfs_init_path(&path);
35         mutex_lock(&root->fs_info->fs_mutex);
36
37         ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0);
38         if (ret) {
39                 btrfs_release_path(root, &path);
40                 mutex_unlock(&root->fs_info->fs_mutex);
41                 make_bad_inode(inode);
42                 return;
43         }
44         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
45                                   path.slots[0],
46                                   struct btrfs_inode_item);
47
48         inode->i_mode = btrfs_inode_mode(inode_item);
49         inode->i_nlink = btrfs_inode_nlink(inode_item);
50         inode->i_uid = btrfs_inode_uid(inode_item);
51         inode->i_gid = btrfs_inode_gid(inode_item);
52         inode->i_size = btrfs_inode_size(inode_item);
53         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
54         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
55         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
56         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
57         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
58         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
59         inode->i_blocks = btrfs_inode_nblocks(inode_item);
60         inode->i_generation = btrfs_inode_generation(inode_item);
61         btrfs_release_path(root, &path);
62         mutex_unlock(&root->fs_info->fs_mutex);
63         switch (inode->i_mode & S_IFMT) {
64 #if 0
65         default:
66                 init_special_inode(inode, inode->i_mode,
67                                    btrfs_inode_rdev(inode_item));
68                 break;
69 #endif
70         case S_IFREG:
71                 inode->i_mapping->a_ops = &btrfs_aops;
72                 inode->i_fop = &btrfs_file_operations;
73                 inode->i_op = &btrfs_file_inode_operations;
74                 break;
75         case S_IFDIR:
76                 inode->i_op = &btrfs_dir_inode_operations;
77                 inode->i_fop = &btrfs_dir_file_operations;
78                 break;
79         case S_IFLNK:
80                 // inode->i_op = &page_symlink_inode_operations;
81                 break;
82         }
83         return;
84 }
85
86 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
87                               struct btrfs_root *root,
88                               struct inode *dir,
89                               struct dentry *dentry)
90 {
91         struct btrfs_path path;
92         const char *name = dentry->d_name.name;
93         int name_len = dentry->d_name.len;
94         int ret;
95         u64 objectid;
96         struct btrfs_dir_item *di;
97
98         btrfs_init_path(&path);
99         ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino,
100                                     name, name_len, -1);
101         if (ret < 0)
102                 goto err;
103         if (ret > 0) {
104                 ret = -ENOENT;
105                 goto err;
106         }
107         di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
108                             struct btrfs_dir_item);
109         objectid = btrfs_dir_objectid(di);
110
111         ret = btrfs_del_item(trans, root, &path);
112         BUG_ON(ret);
113         dentry->d_inode->i_ctime = dir->i_ctime;
114 err:
115         btrfs_release_path(root, &path);
116         if (ret == 0)
117                 inode_dec_link_count(dentry->d_inode);
118         return ret;
119 }
120
121 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
122 {
123         struct btrfs_root *root;
124         struct btrfs_trans_handle *trans;
125         int ret;
126
127         root = btrfs_sb(dir->i_sb);
128         mutex_lock(&root->fs_info->fs_mutex);
129         trans = btrfs_start_transaction(root, 1);
130         ret = btrfs_unlink_trans(trans, root, dir, dentry);
131         btrfs_end_transaction(trans, root);
132         mutex_unlock(&root->fs_info->fs_mutex);
133         return ret;
134 }
135
136 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
137 {
138         struct inode *inode = dentry->d_inode;
139         int err;
140         int ret;
141         struct btrfs_root *root = btrfs_sb(dir->i_sb);
142         struct btrfs_path path;
143         struct btrfs_key key;
144         struct btrfs_trans_handle *trans;
145         struct btrfs_disk_key *found_key;
146         struct btrfs_leaf *leaf;
147
148         btrfs_init_path(&path);
149         mutex_lock(&root->fs_info->fs_mutex);
150         trans = btrfs_start_transaction(root, 1);
151         key.objectid = inode->i_ino;
152         key.offset = (u64)-1;
153         key.flags = 0;
154         btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
155         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
156         if (ret < 0) {
157                 err = ret;
158                 goto out;
159         }
160
161         BUG_ON(ret == 0);
162         BUG_ON(path.slots[0] == 0);
163         path.slots[0]--;
164         leaf = btrfs_buffer_leaf(path.nodes[0]);
165         found_key = &leaf->items[path.slots[0]].key;
166         if (btrfs_disk_key_objectid(found_key) != inode->i_ino) {
167                 err = -ENOENT;
168                 goto out;
169         }
170         if (btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY ||
171             btrfs_disk_key_offset(found_key) != 2) {
172                 err = -ENOTEMPTY;
173                 goto out;
174         }
175         ret = btrfs_del_item(trans, root, &path);
176         BUG_ON(ret);
177         btrfs_release_path(root, &path);
178         key.offset = 1;
179         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
180         if (ret < 0) {
181                 err = ret;
182                 goto out;
183         }
184         if (ret > 0) {
185                 err = -ENOTEMPTY;
186                 goto out;
187         }
188         ret = btrfs_del_item(trans, root, &path);
189         if (ret) {
190                 err = ret;
191                 goto out;
192         }
193         btrfs_release_path(root, &path);
194
195         /* now the directory is empty */
196         err = btrfs_unlink_trans(trans, root, dir, dentry);
197         if (!err) {
198                 inode->i_size = 0;
199         }
200 out:
201         mutex_unlock(&root->fs_info->fs_mutex);
202         ret = btrfs_end_transaction(trans, root);
203         if (ret && !err)
204                 err = ret;
205         return err;
206 }
207
208 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
209                             struct btrfs_root *root,
210                             struct inode *inode)
211 {
212         u64 objectid = inode->i_ino;
213         struct btrfs_path path;
214         struct btrfs_inode_map_item *map;
215         struct btrfs_key stat_data_key;
216         int ret;
217         clear_inode(inode);
218         btrfs_init_path(&path);
219         ret = btrfs_lookup_inode_map(trans, root, &path, objectid, -1);
220         if (ret) {
221                 if (ret > 0)
222                         ret = -ENOENT;
223                 btrfs_release_path(root, &path);
224                 goto error;
225         }
226         map = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
227                             struct btrfs_inode_map_item);
228         btrfs_disk_key_to_cpu(&stat_data_key, &map->key);
229         ret = btrfs_del_item(trans, root->fs_info->inode_root, &path);
230         BUG_ON(ret);
231         btrfs_release_path(root, &path);
232         btrfs_init_path(&path);
233
234         ret = btrfs_lookup_inode(trans, root, &path, objectid, -1);
235         BUG_ON(ret);
236         ret = btrfs_del_item(trans, root, &path);
237         BUG_ON(ret);
238         btrfs_release_path(root, &path);
239 error:
240         return ret;
241 }
242
243 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
244                                    struct btrfs_root *root,
245                                    struct inode *inode)
246 {
247         int ret;
248         struct btrfs_path path;
249         struct btrfs_key key;
250         struct btrfs_disk_key *found_key;
251         struct btrfs_leaf *leaf;
252         struct btrfs_file_extent_item *fi = NULL;
253         u64 extent_start = 0;
254         u64 extent_num_blocks = 0;
255         int found_extent;
256
257         /* FIXME, add redo link to tree so we don't leak on crash */
258         key.objectid = inode->i_ino;
259         key.offset = (u64)-1;
260         key.flags = 0;
261         btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
262         while(1) {
263                 btrfs_init_path(&path);
264                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
265                 if (ret < 0) {
266                         btrfs_release_path(root, &path);
267                         goto error;
268                 }
269                 if (ret > 0) {
270                         BUG_ON(path.slots[0] == 0);
271                         path.slots[0]--;
272                 }
273                 leaf = btrfs_buffer_leaf(path.nodes[0]);
274                 found_key = &leaf->items[path.slots[0]].key;
275                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
276                         break;
277                 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
278                     btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
279                         break;
280                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
281                         break;
282                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
283                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
284                                             path.slots[0],
285                                             struct btrfs_file_extent_item);
286                         extent_start = btrfs_file_extent_disk_blocknr(fi);
287                         extent_num_blocks =
288                                 btrfs_file_extent_disk_num_blocks(fi);
289                         inode->i_blocks -=
290                                 btrfs_file_extent_num_blocks(fi) >> 9;
291                         found_extent = 1;
292                 } else {
293                         found_extent = 0;
294                 }
295                 ret = btrfs_del_item(trans, root, &path);
296                 BUG_ON(ret);
297                 btrfs_release_path(root, &path);
298                 if (found_extent) {
299                         ret = btrfs_free_extent(trans, root, extent_start,
300                                                 extent_num_blocks, 0);
301                         BUG_ON(ret);
302                 }
303         }
304         btrfs_release_path(root, &path);
305         ret = 0;
306 error:
307         return ret;
308 }
309
310 static void btrfs_delete_inode(struct inode *inode)
311 {
312         struct btrfs_trans_handle *trans;
313         struct btrfs_root *root = btrfs_sb(inode->i_sb);
314         int ret;
315
316         truncate_inode_pages(&inode->i_data, 0);
317         if (is_bad_inode(inode)) {
318                 goto no_delete;
319         }
320         inode->i_size = 0;
321         mutex_lock(&root->fs_info->fs_mutex);
322         trans = btrfs_start_transaction(root, 1);
323         if (S_ISREG(inode->i_mode)) {
324                 ret = btrfs_truncate_in_trans(trans, root, inode);
325                 BUG_ON(ret);
326         }
327         btrfs_free_inode(trans, root, inode);
328         btrfs_end_transaction(trans, root);
329         mutex_unlock(&root->fs_info->fs_mutex);
330         return;
331 no_delete:
332         clear_inode(inode);
333 }
334
335 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
336                               ino_t *ino)
337 {
338         const char *name = dentry->d_name.name;
339         int namelen = dentry->d_name.len;
340         struct btrfs_dir_item *di;
341         struct btrfs_path path;
342         struct btrfs_root *root = btrfs_sb(dir->i_sb);
343         int ret;
344
345         btrfs_init_path(&path);
346         ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name,
347                                     namelen, 0);
348         if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) {
349                 *ino = 0;
350                 goto out;
351         }
352         di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
353                             struct btrfs_dir_item);
354         *ino = btrfs_dir_objectid(di);
355 out:
356         btrfs_release_path(root, &path);
357         return ret;
358 }
359
360 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
361                                    struct nameidata *nd)
362 {
363         struct inode * inode;
364         ino_t ino;
365         int ret;
366
367         if (dentry->d_name.len > BTRFS_NAME_LEN)
368                 return ERR_PTR(-ENAMETOOLONG);
369
370         ret = btrfs_inode_by_name(dir, dentry, &ino);
371         if (ret < 0)
372                 return ERR_PTR(ret);
373         inode = NULL;
374         if (ino) {
375                 inode = iget(dir->i_sb, ino);
376                 if (!inode)
377                         return ERR_PTR(-EACCES);
378         }
379         return d_splice_alias(inode, dentry);
380 }
381
382 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
383 {
384         struct inode *inode = filp->f_path.dentry->d_inode;
385         struct btrfs_root *root = btrfs_sb(inode->i_sb);
386         struct btrfs_item *item;
387         struct btrfs_dir_item *di;
388         struct btrfs_key key;
389         struct btrfs_path path;
390         int ret;
391         u32 nritems;
392         struct btrfs_leaf *leaf;
393         int slot;
394         int advance;
395         unsigned char d_type = DT_UNKNOWN;
396         int over = 0;
397
398         key.objectid = inode->i_ino;
399         key.flags = 0;
400         btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
401         key.offset = filp->f_pos;
402         btrfs_init_path(&path);
403         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
404         if (ret < 0) {
405                 goto err;
406         }
407         advance = 0;
408         while(1) {
409                 leaf = btrfs_buffer_leaf(path.nodes[0]);
410                 nritems = btrfs_header_nritems(&leaf->header);
411                 slot = path.slots[0];
412                 if (advance || slot >= nritems) {
413                         if (slot >= nritems -1) {
414                                 ret = btrfs_next_leaf(root, &path);
415                                 if (ret)
416                                         break;
417                                 leaf = btrfs_buffer_leaf(path.nodes[0]);
418                                 nritems = btrfs_header_nritems(&leaf->header);
419                                 slot = path.slots[0];
420                         } else {
421                                 slot++;
422                                 path.slots[0]++;
423                         }
424                 }
425                 advance = 1;
426                 item = leaf->items + slot;
427                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
428                         break;
429                 if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY)
430                         continue;
431                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
432                         continue;
433
434                 advance = 1;
435                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
436                 over = filldir(dirent, (const char *)(di + 1),
437                                btrfs_dir_name_len(di),
438                                btrfs_disk_key_offset(&item->key),
439                                btrfs_dir_objectid(di), d_type);
440                 if (over) {
441                         filp->f_pos = btrfs_disk_key_offset(&item->key);
442                         break;
443                 }
444                 filp->f_pos = btrfs_disk_key_offset(&item->key) + 1;
445         }
446         ret = 0;
447 err:
448         btrfs_release_path(root, &path);
449         return ret;
450 }
451
452 static void btrfs_put_super (struct super_block * sb)
453 {
454         struct btrfs_root *root = btrfs_sb(sb);
455         int ret;
456
457         ret = close_ctree(root);
458         if (ret) {
459                 printk("close ctree returns %d\n", ret);
460         }
461         sb->s_fs_info = NULL;
462 }
463
464 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
465 {
466         struct inode * inode;
467         struct dentry * root_dentry;
468         struct btrfs_super_block *disk_super;
469         struct buffer_head *bh;
470         struct btrfs_root *root;
471
472         sb->s_maxbytes = MAX_LFS_FILESIZE;
473         sb->s_blocksize = PAGE_CACHE_SIZE;
474         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
475         sb->s_magic = BTRFS_SUPER_MAGIC;
476         sb->s_op = &btrfs_super_ops;
477         sb->s_time_gran = 1;
478
479         bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize);
480         if (!bh) {
481                 printk("btrfs: unable to read on disk super\n");
482                 return -EIO;
483         }
484         disk_super = (struct btrfs_super_block *)bh->b_data;
485         root = open_ctree(sb, bh, disk_super);
486
487         if (!root) {
488                 printk("btrfs: open_ctree failed\n");
489                 return -EIO;
490         }
491         sb->s_fs_info = root;
492         disk_super = root->fs_info->disk_super;
493         printk("read in super total blocks %Lu root %Lu\n",
494                btrfs_super_total_blocks(disk_super),
495                btrfs_super_root_dir(disk_super));
496
497         inode = iget_locked(sb, btrfs_super_root_dir(disk_super));
498         if (!inode)
499                 return -ENOMEM;
500         if (inode->i_state & I_NEW) {
501                 btrfs_read_locked_inode(inode);
502                 unlock_new_inode(inode);
503         }
504
505         root_dentry = d_alloc_root(inode);
506         if (!root_dentry) {
507                 iput(inode);
508                 return -ENOMEM;
509         }
510         sb->s_root = root_dentry;
511
512         return 0;
513 }
514
515 static void fill_inode_item(struct btrfs_inode_item *item,
516                             struct inode *inode)
517 {
518         btrfs_set_inode_uid(item, inode->i_uid);
519         btrfs_set_inode_gid(item, inode->i_gid);
520         btrfs_set_inode_size(item, inode->i_size);
521         btrfs_set_inode_mode(item, inode->i_mode);
522         btrfs_set_inode_nlink(item, inode->i_nlink);
523         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
524         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
525         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
526         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
527         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
528         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
529         btrfs_set_inode_nblocks(item, inode->i_blocks);
530         btrfs_set_inode_generation(item, inode->i_generation);
531 }
532
533 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
534                               struct btrfs_root *root,
535                               struct inode *inode)
536 {
537         struct btrfs_inode_item *inode_item;
538         struct btrfs_path path;
539         int ret;
540
541         btrfs_init_path(&path);
542
543         ret = btrfs_lookup_inode(trans, root, &path, inode->i_ino, 1);
544         if (ret) {
545                 if (ret > 0)
546                         ret = -ENOENT;
547                 goto failed;
548         }
549
550         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
551                                   path.slots[0],
552                                   struct btrfs_inode_item);
553
554         fill_inode_item(inode_item, inode);
555         mark_buffer_dirty(path.nodes[0]);
556 failed:
557         btrfs_release_path(root, &path);
558         return 0;
559 }
560
561 static int btrfs_write_inode(struct inode *inode, int wait)
562 {
563         struct btrfs_root *root = btrfs_sb(inode->i_sb);
564         struct btrfs_trans_handle *trans;
565         int ret;
566
567         mutex_lock(&root->fs_info->fs_mutex);
568         trans = btrfs_start_transaction(root, 1);
569         ret = btrfs_update_inode(trans, root, inode);
570         if (wait)
571                 btrfs_commit_transaction(trans, root);
572         else
573                 btrfs_end_transaction(trans, root);
574         mutex_unlock(&root->fs_info->fs_mutex);
575         return ret;
576 }
577
578 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
579                                      struct inode *dir, int mode)
580 {
581         struct inode *inode;
582         struct btrfs_inode_item inode_item;
583         struct btrfs_root *root = btrfs_sb(dir->i_sb);
584         struct btrfs_key key;
585         int ret;
586         u64 objectid;
587
588         inode = new_inode(dir->i_sb);
589         if (!inode)
590                 return ERR_PTR(-ENOMEM);
591
592         ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
593         BUG_ON(ret);
594
595         inode->i_uid = current->fsuid;
596         inode->i_gid = current->fsgid;
597         inode->i_mode = mode;
598         inode->i_ino = objectid;
599         inode->i_blocks = 0;
600         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
601         fill_inode_item(&inode_item, inode);
602
603         key.objectid = objectid;
604         key.flags = 0;
605         key.offset = 0;
606         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
607         ret = btrfs_insert_inode_map(trans, root, objectid, &key);
608         BUG_ON(ret);
609
610         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
611         BUG_ON(ret);
612
613         insert_inode_hash(inode);
614         return inode;
615 }
616
617 static int btrfs_add_link(struct btrfs_trans_handle *trans,
618                             struct dentry *dentry, struct inode *inode)
619 {
620         int ret;
621         ret = btrfs_insert_dir_item(trans, btrfs_sb(inode->i_sb),
622                                     dentry->d_name.name, dentry->d_name.len,
623                                     dentry->d_parent->d_inode->i_ino,
624                                     inode->i_ino, 0);
625         if (ret == 0) {
626                 dentry->d_parent->d_inode->i_size += dentry->d_name.len;
627                 ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb),
628                                          dentry->d_parent->d_inode);
629         }
630
631         return ret;
632 }
633
634 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
635                             struct dentry *dentry, struct inode *inode)
636 {
637         int err = btrfs_add_link(trans, dentry, inode);
638         if (!err) {
639                 d_instantiate(dentry, inode);
640                 return 0;
641         }
642         return err;
643 }
644
645 static int btrfs_create(struct inode *dir, struct dentry *dentry,
646                         int mode, struct nameidata *nd)
647 {
648         struct btrfs_trans_handle *trans;
649         struct btrfs_root *root = btrfs_sb(dir->i_sb);
650         struct inode *inode;
651         int err;
652         int drop_inode = 0;
653
654         mutex_lock(&root->fs_info->fs_mutex);
655         trans = btrfs_start_transaction(root, 1);
656         inode = btrfs_new_inode(trans, dir, mode);
657         err = PTR_ERR(inode);
658         if (IS_ERR(inode))
659                 goto out_unlock;
660         // FIXME mark the inode dirty
661         err = btrfs_add_nondir(trans, dentry, inode);
662         if (err)
663                 drop_inode = 1;
664         else {
665                 inode->i_mapping->a_ops = &btrfs_aops;
666                 inode->i_fop = &btrfs_file_operations;
667                 inode->i_op = &btrfs_file_inode_operations;
668         }
669         dir->i_sb->s_dirt = 1;
670         btrfs_end_transaction(trans, root);
671 out_unlock:
672         mutex_unlock(&root->fs_info->fs_mutex);
673         if (drop_inode) {
674                 inode_dec_link_count(inode);
675                 iput(inode);
676         }
677         return err;
678 }
679
680 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
681                                 struct inode *inode, struct inode *dir)
682 {
683         struct btrfs_root *root = btrfs_sb(inode->i_sb);
684         int ret;
685         char buf[2];
686         buf[0] = '.';
687         buf[1] = '.';
688
689         ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino,
690                                     inode->i_ino, 1);
691         if (ret)
692                 goto error;
693         ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino,
694                                     dir->i_ino, 1);
695         if (ret)
696                 goto error;
697         inode->i_size = 3;
698         ret = btrfs_update_inode(trans, root, inode);
699 error:
700         return ret;
701 }
702
703 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
704 {
705         struct inode *inode;
706         struct btrfs_trans_handle *trans;
707         struct btrfs_root *root = btrfs_sb(dir->i_sb);
708         int err = 0;
709         int drop_on_err = 0;
710
711         mutex_lock(&root->fs_info->fs_mutex);
712         trans = btrfs_start_transaction(root, 1);
713         if (IS_ERR(trans)) {
714                 err = PTR_ERR(trans);
715                 goto out_unlock;
716         }
717         inode = btrfs_new_inode(trans, dir, S_IFDIR | mode);
718         if (IS_ERR(inode)) {
719                 err = PTR_ERR(inode);
720                 goto out_fail;
721         }
722         drop_on_err = 1;
723         inode->i_op = &btrfs_dir_inode_operations;
724         inode->i_fop = &btrfs_dir_file_operations;
725
726         err = btrfs_make_empty_dir(trans, inode, dir);
727         if (err)
728                 goto out_fail;
729         err = btrfs_add_link(trans, dentry, inode);
730         if (err)
731                 goto out_fail;
732         d_instantiate(dentry, inode);
733         drop_on_err = 0;
734
735 out_fail:
736         btrfs_end_transaction(trans, root);
737 out_unlock:
738         mutex_unlock(&root->fs_info->fs_mutex);
739         if (drop_on_err)
740                 iput(inode);
741         return err;
742 }
743
744 static int btrfs_sync_fs(struct super_block *sb, int wait)
745 {
746         struct btrfs_trans_handle *trans;
747         struct btrfs_root *root;
748         int ret;
749         root = btrfs_sb(sb);
750
751         sb->s_dirt = 0;
752         if (!wait) {
753                 filemap_flush(root->fs_info->btree_inode->i_mapping);
754                 return 0;
755         }
756         filemap_write_and_wait(root->fs_info->btree_inode->i_mapping);
757
758         mutex_lock(&root->fs_info->fs_mutex);
759         trans = btrfs_start_transaction(root, 1);
760         ret = btrfs_commit_transaction(trans, root);
761         sb->s_dirt = 0;
762         BUG_ON(ret);
763 printk("btrfs sync_fs\n");
764         mutex_unlock(&root->fs_info->fs_mutex);
765         return 0;
766 }
767
768 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
769                            struct buffer_head *result, int create)
770 {
771         int ret;
772         int err = 0;
773         u64 blocknr;
774         u64 extent_start = 0;
775         u64 extent_end = 0;
776         u64 objectid = inode->i_ino;
777         struct btrfs_path path;
778         struct btrfs_root *root = btrfs_sb(inode->i_sb);
779         struct btrfs_trans_handle *trans = NULL;
780         struct btrfs_file_extent_item *item;
781         struct btrfs_leaf *leaf;
782         struct btrfs_disk_key *found_key;
783
784         btrfs_init_path(&path);
785         if (create)
786                 trans = btrfs_start_transaction(root, 1);
787
788
789         ret = btrfs_lookup_file_extent(trans, root, &path,
790                                        inode->i_ino,
791                                        iblock << inode->i_blkbits, 0);
792         if (ret < 0) {
793                 btrfs_release_path(root, &path);
794                 err = ret;
795                 goto out;
796         }
797
798         if (ret != 0) {
799                 if (path.slots[0] == 0) {
800                         btrfs_release_path(root, &path);
801                         goto allocate;
802                 }
803                 path.slots[0]--;
804         }
805
806         item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
807                               struct btrfs_file_extent_item);
808         leaf = btrfs_buffer_leaf(path.nodes[0]);
809         blocknr = btrfs_file_extent_disk_blocknr(item);
810         blocknr += btrfs_file_extent_offset(item);
811
812         /* exact match found, use it */
813         if (ret == 0) {
814                 err = 0;
815                 map_bh(result, inode->i_sb, blocknr);
816                 btrfs_release_path(root, &path);
817                 goto out;
818         }
819
820         /* are we inside the extent that was found? */
821         found_key = &leaf->items[path.slots[0]].key;
822         if (btrfs_disk_key_objectid(found_key) != objectid ||
823             btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) {
824                 extent_end = 0;
825                 extent_start = 0;
826                 btrfs_release_path(root, &path);
827                 goto allocate;
828         }
829
830         extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key);
831         extent_start = extent_start >> inode->i_blkbits;
832         extent_start += btrfs_file_extent_offset(item);
833         extent_end = extent_start + btrfs_file_extent_num_blocks(item);
834         btrfs_release_path(root, &path);
835         if (iblock >= extent_start && iblock < extent_end) {
836                 err = 0;
837                 map_bh(result, inode->i_sb, blocknr + iblock - extent_start);
838                 goto out;
839         }
840 allocate:
841         /* ok, create a new extent */
842         if (!create) {
843                 err = 0;
844                 goto out;
845         }
846         ret = btrfs_alloc_file_extent(trans, root, objectid,
847                                       iblock << inode->i_blkbits,
848                                       1, extent_end, &blocknr);
849         if (ret) {
850                 err = ret;
851                 goto out;
852         }
853         inode->i_blocks += inode->i_sb->s_blocksize >> 9;
854         set_buffer_new(result);
855         map_bh(result, inode->i_sb, blocknr);
856
857 out:
858         if (trans)
859                 btrfs_end_transaction(trans, root);
860         return err;
861 }
862
863 static int btrfs_get_block(struct inode *inode, sector_t iblock,
864                            struct buffer_head *result, int create)
865 {
866         int err;
867         struct btrfs_root *root = btrfs_sb(inode->i_sb);
868         mutex_lock(&root->fs_info->fs_mutex);
869         err = btrfs_get_block_lock(inode, iblock, result, create);
870         mutex_unlock(&root->fs_info->fs_mutex);
871         return err;
872 }
873
874 static int btrfs_prepare_write(struct file *file, struct page *page,
875                                unsigned from, unsigned to)
876 {
877         WARN_ON(1);
878         return nobh_prepare_write(page, from, to, btrfs_get_block);
879 }
880 static int btrfs_commit_write(struct file *file, struct page *page,
881                                unsigned from, unsigned to)
882 {
883         WARN_ON(1);
884         return nobh_commit_write(file, page, from, to);
885 }
886
887 static void btrfs_write_super(struct super_block *sb)
888 {
889         btrfs_sync_fs(sb, 1);
890 }
891
892 static int btrfs_readpage(struct file *file, struct page *page)
893 {
894         return mpage_readpage(page, btrfs_get_block);
895 }
896
897 static int btrfs_readpages(struct file *file, struct address_space *mapping,
898                            struct list_head *pages, unsigned nr_pages)
899 {
900         return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block);
901 }
902
903 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
904 {
905         return nobh_writepage(page, btrfs_get_block, wbc);
906 }
907
908 static void btrfs_truncate(struct inode *inode)
909 {
910         struct btrfs_root *root = btrfs_sb(inode->i_sb);
911         int ret;
912         struct btrfs_trans_handle *trans;
913
914         if (!S_ISREG(inode->i_mode))
915                 return;
916         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
917                 return;
918
919         nobh_truncate_page(inode->i_mapping, inode->i_size);
920
921         /* FIXME, add redo link to tree so we don't leak on crash */
922         mutex_lock(&root->fs_info->fs_mutex);
923         trans = btrfs_start_transaction(root, 1);
924         ret = btrfs_truncate_in_trans(trans, root, inode);
925         BUG_ON(ret);
926         ret = btrfs_end_transaction(trans, root);
927         BUG_ON(ret);
928         mutex_unlock(&root->fs_info->fs_mutex);
929         mark_inode_dirty(inode);
930 }
931
932 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
933                                 struct page **prepared_pages,
934                                 const char __user * buf)
935 {
936         long page_fault = 0;
937         int i;
938         int offset = pos & (PAGE_CACHE_SIZE - 1);
939
940         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
941                 size_t count = min_t(size_t,
942                                      PAGE_CACHE_SIZE - offset, write_bytes);
943                 struct page *page = prepared_pages[i];
944                 fault_in_pages_readable(buf, count);
945
946                 /* Copy data from userspace to the current page */
947                 kmap(page);
948                 page_fault = __copy_from_user(page_address(page) + offset,
949                                               buf, count);
950                 /* Flush processor's dcache for this page */
951                 flush_dcache_page(page);
952                 kunmap(page);
953                 buf += count;
954                 write_bytes -= count;
955
956                 if (page_fault)
957                         break;
958         }
959         return page_fault ? -EFAULT : 0;
960 }
961
962 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
963 {
964         size_t i;
965         for (i = 0; i < num_pages; i++) {
966                 if (!pages[i])
967                         break;
968                 unlock_page(pages[i]);
969                 mark_page_accessed(pages[i]);
970                 page_cache_release(pages[i]);
971         }
972 }
973 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
974                                    struct btrfs_root *root,
975                                    struct file *file,
976                                    struct page **pages,
977                                    size_t num_pages,
978                                    loff_t pos,
979                                    size_t write_bytes)
980 {
981         int i;
982         int offset;
983         int err = 0;
984         int ret;
985         int this_write;
986         struct inode *inode = file->f_path.dentry->d_inode;
987
988         for (i = 0; i < num_pages; i++) {
989                 offset = pos & (PAGE_CACHE_SIZE -1);
990                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
991                 /* FIXME, one block at a time */
992
993                 mutex_lock(&root->fs_info->fs_mutex);
994                 trans = btrfs_start_transaction(root, 1);
995                 btrfs_csum_file_block(trans, root, inode->i_ino,
996                                       pages[i]->index << PAGE_CACHE_SHIFT,
997                                       kmap(pages[i]), PAGE_CACHE_SIZE);
998                 kunmap(pages[i]);
999                 SetPageChecked(pages[i]);
1000                 ret = btrfs_end_transaction(trans, root);
1001                 BUG_ON(ret);
1002                 mutex_unlock(&root->fs_info->fs_mutex);
1003
1004                 ret = nobh_commit_write(file, pages[i], offset,
1005                                          offset + this_write);
1006                 pos += this_write;
1007                 if (ret) {
1008                         err = ret;
1009                         goto failed;
1010                 }
1011                 WARN_ON(this_write > write_bytes);
1012                 write_bytes -= this_write;
1013         }
1014 failed:
1015         return err;
1016 }
1017
1018 static int prepare_pages(struct btrfs_trans_handle *trans,
1019                          struct btrfs_root *root,
1020                          struct file *file,
1021                          struct page **pages,
1022                          size_t num_pages,
1023                          loff_t pos,
1024                          size_t write_bytes)
1025 {
1026         int i;
1027         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1028         struct inode *inode = file->f_path.dentry->d_inode;
1029         int offset;
1030         int err = 0;
1031         int ret;
1032         int this_write;
1033         loff_t isize = i_size_read(inode);
1034
1035         memset(pages, 0, num_pages * sizeof(struct page *));
1036
1037         for (i = 0; i < num_pages; i++) {
1038                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1039                 if (!pages[i]) {
1040                         err = -ENOMEM;
1041                         goto failed_release;
1042                 }
1043                 offset = pos & (PAGE_CACHE_SIZE -1);
1044                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1045                 ret = nobh_prepare_write(pages[i], offset,
1046                                          offset + this_write,
1047                                          btrfs_get_block);
1048                 pos += this_write;
1049                 if (ret) {
1050                         err = ret;
1051                         goto failed_truncate;
1052                 }
1053                 WARN_ON(this_write > write_bytes);
1054                 write_bytes -= this_write;
1055         }
1056         return 0;
1057
1058 failed_release:
1059         btrfs_drop_pages(pages, num_pages);
1060         return err;
1061
1062 failed_truncate:
1063         btrfs_drop_pages(pages, num_pages);
1064         if (pos > isize)
1065                 vmtruncate(inode, isize);
1066         return err;
1067 }
1068
1069 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1070                                 size_t count, loff_t *ppos)
1071 {
1072         loff_t pos;
1073         size_t num_written = 0;
1074         int err = 0;
1075         int ret = 0;
1076         struct inode *inode = file->f_path.dentry->d_inode;
1077         struct btrfs_root *root = btrfs_sb(inode->i_sb);
1078         struct page *pages[1];
1079
1080         if (file->f_flags & O_DIRECT)
1081                 return -EINVAL;
1082         pos = *ppos;
1083
1084         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1085         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1086         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1087         if (err)
1088                 goto out;
1089         if (count == 0)
1090                 goto out;
1091         err = remove_suid(file->f_path.dentry);
1092         if (err)
1093                 goto out;
1094         file_update_time(file);
1095         mutex_lock(&inode->i_mutex);
1096         while(count > 0) {
1097                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1098                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1099                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1100                                         PAGE_CACHE_SHIFT;
1101                 ret = prepare_pages(NULL, root, file, pages, num_pages,
1102                                     pos, write_bytes);
1103                 BUG_ON(ret);
1104                 ret = btrfs_copy_from_user(pos, num_pages,
1105                                            write_bytes, pages, buf);
1106                 BUG_ON(ret);
1107
1108                 ret = dirty_and_release_pages(NULL, root, file, pages,
1109                                               num_pages, pos, write_bytes);
1110                 BUG_ON(ret);
1111                 btrfs_drop_pages(pages, num_pages);
1112
1113                 buf += write_bytes;
1114                 count -= write_bytes;
1115                 pos += write_bytes;
1116                 num_written += write_bytes;
1117
1118                 balance_dirty_pages_ratelimited(inode->i_mapping);
1119                 cond_resched();
1120         }
1121         mutex_unlock(&inode->i_mutex);
1122 out:
1123         *ppos = pos;
1124         current->backing_dev_info = NULL;
1125         return num_written ? num_written : err;
1126 }
1127
1128 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1129                         unsigned long offset, unsigned long size)
1130 {
1131         char *kaddr;
1132         unsigned long left, count = desc->count;
1133
1134         if (size > count)
1135                 size = count;
1136
1137         if (!PageChecked(page)) {
1138                 /* FIXME, do it per block */
1139                 struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb);
1140                 int ret = btrfs_csum_verify_file_block(root,
1141                                           page->mapping->host->i_ino,
1142                                           page->index << PAGE_CACHE_SHIFT,
1143                                           kmap(page), PAGE_CACHE_SIZE);
1144                 if (ret) {
1145                         printk("failed to verify ino %lu page %lu\n",
1146                                page->mapping->host->i_ino,
1147                                page->index);
1148                         memset(page_address(page), 0, PAGE_CACHE_SIZE);
1149                 }
1150                 SetPageChecked(page);
1151                 kunmap(page);
1152         }
1153         /*
1154          * Faults on the destination of a read are common, so do it before
1155          * taking the kmap.
1156          */
1157         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1158                 kaddr = kmap_atomic(page, KM_USER0);
1159                 left = __copy_to_user_inatomic(desc->arg.buf,
1160                                                 kaddr + offset, size);
1161                 kunmap_atomic(kaddr, KM_USER0);
1162                 if (left == 0)
1163                         goto success;
1164         }
1165
1166         /* Do it the slow way */
1167         kaddr = kmap(page);
1168         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1169         kunmap(page);
1170
1171         if (left) {
1172                 size -= left;
1173                 desc->error = -EFAULT;
1174         }
1175 success:
1176         desc->count = count - size;
1177         desc->written += size;
1178         desc->arg.buf += size;
1179         return size;
1180 }
1181
1182 /**
1183  * btrfs_file_aio_read - filesystem read routine
1184  * @iocb:       kernel I/O control block
1185  * @iov:        io vector request
1186  * @nr_segs:    number of segments in the iovec
1187  * @pos:        current file position
1188  */
1189 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1190                                    unsigned long nr_segs, loff_t pos)
1191 {
1192         struct file *filp = iocb->ki_filp;
1193         ssize_t retval;
1194         unsigned long seg;
1195         size_t count;
1196         loff_t *ppos = &iocb->ki_pos;
1197
1198         count = 0;
1199         for (seg = 0; seg < nr_segs; seg++) {
1200                 const struct iovec *iv = &iov[seg];
1201
1202                 /*
1203                  * If any segment has a negative length, or the cumulative
1204                  * length ever wraps negative then return -EINVAL.
1205                  */
1206                 count += iv->iov_len;
1207                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
1208                         return -EINVAL;
1209                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
1210                         continue;
1211                 if (seg == 0)
1212                         return -EFAULT;
1213                 nr_segs = seg;
1214                 count -= iv->iov_len;   /* This segment is no good */
1215                 break;
1216         }
1217         retval = 0;
1218         if (count) {
1219                 for (seg = 0; seg < nr_segs; seg++) {
1220                         read_descriptor_t desc;
1221
1222                         desc.written = 0;
1223                         desc.arg.buf = iov[seg].iov_base;
1224                         desc.count = iov[seg].iov_len;
1225                         if (desc.count == 0)
1226                                 continue;
1227                         desc.error = 0;
1228                         do_generic_file_read(filp, ppos, &desc,
1229                                              btrfs_read_actor);
1230                         retval += desc.written;
1231                         if (desc.error) {
1232                                 retval = retval ?: desc.error;
1233                                 break;
1234                         }
1235                 }
1236         }
1237         return retval;
1238 }
1239
1240 static int btrfs_get_sb(struct file_system_type *fs_type,
1241         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1242 {
1243         return get_sb_bdev(fs_type, flags, dev_name, data,
1244                            btrfs_fill_super, mnt);
1245 }
1246
1247 static struct file_system_type btrfs_fs_type = {
1248         .owner          = THIS_MODULE,
1249         .name           = "btrfs",
1250         .get_sb         = btrfs_get_sb,
1251         .kill_sb        = kill_block_super,
1252         .fs_flags       = FS_REQUIRES_DEV,
1253 };
1254
1255 static struct super_operations btrfs_super_ops = {
1256         .statfs         = simple_statfs,
1257         .delete_inode   = btrfs_delete_inode,
1258         .put_super      = btrfs_put_super,
1259         .read_inode     = btrfs_read_locked_inode,
1260         .write_super    = btrfs_write_super,
1261         .sync_fs        = btrfs_sync_fs,
1262         .write_inode    = btrfs_write_inode,
1263 };
1264
1265 static struct inode_operations btrfs_dir_inode_operations = {
1266         .lookup         = btrfs_lookup,
1267         .create         = btrfs_create,
1268         .unlink         = btrfs_unlink,
1269         .mkdir          = btrfs_mkdir,
1270         .rmdir          = btrfs_rmdir,
1271 };
1272
1273 static struct file_operations btrfs_dir_file_operations = {
1274         .llseek         = generic_file_llseek,
1275         .read           = generic_read_dir,
1276         .readdir        = btrfs_readdir,
1277 };
1278
1279 static struct address_space_operations btrfs_aops = {
1280         .readpage       = btrfs_readpage,
1281         .readpages      = btrfs_readpages,
1282         .writepage      = btrfs_writepage,
1283         .sync_page      = block_sync_page,
1284         .prepare_write  = btrfs_prepare_write,
1285         .commit_write   = btrfs_commit_write,
1286 };
1287
1288 static struct inode_operations btrfs_file_inode_operations = {
1289         .truncate       = btrfs_truncate,
1290 };
1291
1292 static struct file_operations btrfs_file_operations = {
1293         .llseek         = generic_file_llseek,
1294         .read           = do_sync_read,
1295         .aio_read       = btrfs_file_aio_read,
1296         .write          = btrfs_file_write,
1297         .mmap           = generic_file_mmap,
1298         .open           = generic_file_open,
1299 };
1300
1301 static int __init init_btrfs_fs(void)
1302 {
1303         printk("btrfs loaded!\n");
1304         return register_filesystem(&btrfs_fs_type);
1305 }
1306
1307 static void __exit exit_btrfs_fs(void)
1308 {
1309         unregister_filesystem(&btrfs_fs_type);
1310         printk("btrfs unloaded\n");
1311 }
1312
1313 module_init(init_btrfs_fs)
1314 module_exit(exit_btrfs_fs)
1315
1316 MODULE_LICENSE("GPL");