Btrfs: hunting slab corruption
[linux-2.6-block.git] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include "ctree.h"
15 #include "disk-io.h"
16 #include "transaction.h"
17
18 #define BTRFS_SUPER_MAGIC 0x9123682E
19
20 static struct inode_operations btrfs_dir_inode_operations;
21 static struct super_operations btrfs_super_ops;
22 static struct file_operations btrfs_dir_file_operations;
23 static struct inode_operations btrfs_file_inode_operations;
24 static struct address_space_operations btrfs_aops;
25 static struct file_operations btrfs_file_operations;
26
27 static void btrfs_read_locked_inode(struct inode *inode)
28 {
29         struct btrfs_path path;
30         struct btrfs_inode_item *inode_item;
31         struct btrfs_root *root = btrfs_sb(inode->i_sb);
32         int ret;
33
34         btrfs_init_path(&path);
35         mutex_lock(&root->fs_info->fs_mutex);
36
37         ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0);
38         if (ret) {
39                 btrfs_release_path(root, &path);
40                 mutex_unlock(&root->fs_info->fs_mutex);
41                 make_bad_inode(inode);
42                 return;
43         }
44         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
45                                   path.slots[0],
46                                   struct btrfs_inode_item);
47
48         inode->i_mode = btrfs_inode_mode(inode_item);
49         inode->i_nlink = btrfs_inode_nlink(inode_item);
50         inode->i_uid = btrfs_inode_uid(inode_item);
51         inode->i_gid = btrfs_inode_gid(inode_item);
52         inode->i_size = btrfs_inode_size(inode_item);
53         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
54         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
55         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
56         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
57         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
58         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
59         inode->i_blocks = btrfs_inode_nblocks(inode_item);
60         inode->i_generation = btrfs_inode_generation(inode_item);
61         btrfs_release_path(root, &path);
62         mutex_unlock(&root->fs_info->fs_mutex);
63         switch (inode->i_mode & S_IFMT) {
64 #if 0
65         default:
66                 init_special_inode(inode, inode->i_mode,
67                                    btrfs_inode_rdev(inode_item));
68                 break;
69 #endif
70         case S_IFREG:
71                 inode->i_mapping->a_ops = &btrfs_aops;
72                 inode->i_fop = &btrfs_file_operations;
73                 inode->i_op = &btrfs_file_inode_operations;
74                 break;
75         case S_IFDIR:
76                 inode->i_op = &btrfs_dir_inode_operations;
77                 inode->i_fop = &btrfs_dir_file_operations;
78                 break;
79         case S_IFLNK:
80                 // inode->i_op = &page_symlink_inode_operations;
81                 break;
82         }
83         return;
84 }
85
86 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
87                               struct btrfs_root *root,
88                               struct inode *dir,
89                               struct dentry *dentry)
90 {
91         struct btrfs_path path;
92         const char *name = dentry->d_name.name;
93         int name_len = dentry->d_name.len;
94         int ret;
95         u64 objectid;
96         struct btrfs_dir_item *di;
97
98         btrfs_init_path(&path);
99         ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino,
100                                     name, name_len, -1);
101         if (ret < 0)
102                 goto err;
103         if (ret > 0) {
104                 ret = -ENOENT;
105                 goto err;
106         }
107         di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
108                             struct btrfs_dir_item);
109         objectid = btrfs_dir_objectid(di);
110
111         ret = btrfs_del_item(trans, root, &path);
112         BUG_ON(ret);
113         dentry->d_inode->i_ctime = dir->i_ctime;
114 err:
115         btrfs_release_path(root, &path);
116         if (ret == 0)
117                 inode_dec_link_count(dentry->d_inode);
118         return ret;
119 }
120
121 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
122 {
123         struct btrfs_root *root;
124         struct btrfs_trans_handle *trans;
125         int ret;
126
127         root = btrfs_sb(dir->i_sb);
128         mutex_lock(&root->fs_info->fs_mutex);
129         trans = btrfs_start_transaction(root, 1);
130         ret = btrfs_unlink_trans(trans, root, dir, dentry);
131         btrfs_end_transaction(trans, root);
132         mutex_unlock(&root->fs_info->fs_mutex);
133         return ret;
134 }
135
136 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
137 {
138         struct inode *inode = dentry->d_inode;
139         int err;
140         int ret;
141         struct btrfs_root *root = btrfs_sb(dir->i_sb);
142         struct btrfs_path path;
143         struct btrfs_key key;
144         struct btrfs_trans_handle *trans;
145         struct btrfs_disk_key *found_key;
146         struct btrfs_leaf *leaf;
147
148         btrfs_init_path(&path);
149         mutex_lock(&root->fs_info->fs_mutex);
150         trans = btrfs_start_transaction(root, 1);
151         key.objectid = inode->i_ino;
152         key.offset = (u64)-1;
153         key.flags = 0;
154         btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
155         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
156         if (ret < 0) {
157                 err = ret;
158                 goto out;
159         }
160
161         BUG_ON(ret == 0);
162         BUG_ON(path.slots[0] == 0);
163         path.slots[0]--;
164         leaf = btrfs_buffer_leaf(path.nodes[0]);
165         found_key = &leaf->items[path.slots[0]].key;
166         if (btrfs_disk_key_objectid(found_key) != inode->i_ino) {
167                 err = -ENOENT;
168                 goto out;
169         }
170         if (btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY ||
171             btrfs_disk_key_offset(found_key) != 2) {
172                 err = -ENOTEMPTY;
173                 goto out;
174         }
175         ret = btrfs_del_item(trans, root, &path);
176         BUG_ON(ret);
177         btrfs_release_path(root, &path);
178         key.offset = 1;
179         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
180         if (ret < 0) {
181                 err = ret;
182                 goto out;
183         }
184         if (ret > 0) {
185                 err = -ENOTEMPTY;
186                 goto out;
187         }
188         ret = btrfs_del_item(trans, root, &path);
189         if (ret) {
190                 err = ret;
191                 goto out;
192         }
193         btrfs_release_path(root, &path);
194
195         /* now the directory is empty */
196         err = btrfs_unlink_trans(trans, root, dir, dentry);
197         if (!err) {
198                 inode->i_size = 0;
199         }
200 out:
201         mutex_unlock(&root->fs_info->fs_mutex);
202         ret = btrfs_end_transaction(trans, root);
203         if (ret && !err)
204                 err = ret;
205         return err;
206 }
207
208 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
209                             struct btrfs_root *root,
210                             struct inode *inode)
211 {
212         u64 objectid = inode->i_ino;
213         struct btrfs_path path;
214         struct btrfs_inode_map_item *map;
215         struct btrfs_key stat_data_key;
216         int ret;
217         clear_inode(inode);
218         btrfs_init_path(&path);
219         ret = btrfs_lookup_inode_map(trans, root, &path, objectid, -1);
220         if (ret) {
221                 if (ret > 0)
222                         ret = -ENOENT;
223                 btrfs_release_path(root, &path);
224                 goto error;
225         }
226         map = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
227                             struct btrfs_inode_map_item);
228         btrfs_disk_key_to_cpu(&stat_data_key, &map->key);
229         ret = btrfs_del_item(trans, root->fs_info->inode_root, &path);
230         BUG_ON(ret);
231         btrfs_release_path(root, &path);
232         btrfs_init_path(&path);
233
234         ret = btrfs_lookup_inode(trans, root, &path, objectid, -1);
235         BUG_ON(ret);
236         ret = btrfs_del_item(trans, root, &path);
237         BUG_ON(ret);
238         btrfs_release_path(root, &path);
239 error:
240         return ret;
241 }
242
243 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
244                                    struct btrfs_root *root,
245                                    struct inode *inode)
246 {
247         int ret;
248         struct btrfs_path path;
249         struct btrfs_key key;
250         struct btrfs_disk_key *found_key;
251         struct btrfs_leaf *leaf;
252         struct btrfs_file_extent_item *fi = NULL;
253         u64 extent_start = 0;
254         u64 extent_num_blocks = 0;
255         int found_extent;
256
257         /* FIXME, add redo link to tree so we don't leak on crash */
258         key.objectid = inode->i_ino;
259         key.offset = (u64)-1;
260         key.flags = 0;
261         btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
262         while(1) {
263                 btrfs_init_path(&path);
264                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
265                 if (ret < 0) {
266                         btrfs_release_path(root, &path);
267                         goto error;
268                 }
269                 if (ret > 0) {
270                         BUG_ON(path.slots[0] == 0);
271                         path.slots[0]--;
272                 }
273                 leaf = btrfs_buffer_leaf(path.nodes[0]);
274                 found_key = &leaf->items[path.slots[0]].key;
275                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
276                         break;
277                 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
278                     btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
279                         break;
280                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
281                         break;
282                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
283                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
284                                             path.slots[0],
285                                             struct btrfs_file_extent_item);
286                         extent_start = btrfs_file_extent_disk_blocknr(fi);
287                         extent_num_blocks =
288                                 btrfs_file_extent_disk_num_blocks(fi);
289                         inode->i_blocks -=
290                                 btrfs_file_extent_num_blocks(fi) >> 9;
291                         found_extent = 1;
292                 } else {
293                         found_extent = 0;
294                 }
295                 ret = btrfs_del_item(trans, root, &path);
296                 BUG_ON(ret);
297                 btrfs_release_path(root, &path);
298                 if (found_extent) {
299                         ret = btrfs_free_extent(trans, root, extent_start,
300                                                 extent_num_blocks, 0);
301                         BUG_ON(ret);
302                 }
303         }
304         btrfs_release_path(root, &path);
305         ret = 0;
306 error:
307         return ret;
308 }
309
310 static void btrfs_delete_inode(struct inode *inode)
311 {
312         struct btrfs_trans_handle *trans;
313         struct btrfs_root *root = btrfs_sb(inode->i_sb);
314         int ret;
315
316         truncate_inode_pages(&inode->i_data, 0);
317         if (is_bad_inode(inode)) {
318                 goto no_delete;
319         }
320         inode->i_size = 0;
321         mutex_lock(&root->fs_info->fs_mutex);
322         trans = btrfs_start_transaction(root, 1);
323         if (S_ISREG(inode->i_mode)) {
324                 ret = btrfs_truncate_in_trans(trans, root, inode);
325                 BUG_ON(ret);
326         }
327         btrfs_free_inode(trans, root, inode);
328         btrfs_end_transaction(trans, root);
329         mutex_unlock(&root->fs_info->fs_mutex);
330         return;
331 no_delete:
332         clear_inode(inode);
333 }
334
335 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
336                               ino_t *ino)
337 {
338         const char *name = dentry->d_name.name;
339         int namelen = dentry->d_name.len;
340         struct btrfs_dir_item *di;
341         struct btrfs_path path;
342         struct btrfs_root *root = btrfs_sb(dir->i_sb);
343         int ret;
344
345         btrfs_init_path(&path);
346         ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name,
347                                     namelen, 0);
348         if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) {
349                 *ino = 0;
350                 goto out;
351         }
352         di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
353                             struct btrfs_dir_item);
354         *ino = btrfs_dir_objectid(di);
355 out:
356         btrfs_release_path(root, &path);
357         return ret;
358 }
359
360 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
361                                    struct nameidata *nd)
362 {
363         struct inode * inode;
364         struct btrfs_root *root = btrfs_sb(dir->i_sb);
365         ino_t ino;
366         int ret;
367
368         if (dentry->d_name.len > BTRFS_NAME_LEN)
369                 return ERR_PTR(-ENAMETOOLONG);
370
371         mutex_lock(&root->fs_info->fs_mutex);
372         ret = btrfs_inode_by_name(dir, dentry, &ino);
373         mutex_unlock(&root->fs_info->fs_mutex);
374         if (ret < 0)
375                 return ERR_PTR(ret);
376         inode = NULL;
377         if (ino) {
378                 inode = iget(dir->i_sb, ino);
379                 if (!inode)
380                         return ERR_PTR(-EACCES);
381         }
382         return d_splice_alias(inode, dentry);
383 }
384
385 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
386 {
387         struct inode *inode = filp->f_path.dentry->d_inode;
388         struct btrfs_root *root = btrfs_sb(inode->i_sb);
389         struct btrfs_item *item;
390         struct btrfs_dir_item *di;
391         struct btrfs_key key;
392         struct btrfs_path path;
393         int ret;
394         u32 nritems;
395         struct btrfs_leaf *leaf;
396         int slot;
397         int advance;
398         unsigned char d_type = DT_UNKNOWN;
399         int over = 0;
400
401         mutex_lock(&root->fs_info->fs_mutex);
402         key.objectid = inode->i_ino;
403         key.flags = 0;
404         btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
405         key.offset = filp->f_pos;
406         btrfs_init_path(&path);
407         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
408         if (ret < 0) {
409                 goto err;
410         }
411         advance = 0;
412         while(1) {
413                 leaf = btrfs_buffer_leaf(path.nodes[0]);
414                 nritems = btrfs_header_nritems(&leaf->header);
415                 slot = path.slots[0];
416                 if (advance || slot >= nritems) {
417                         if (slot >= nritems -1) {
418                                 ret = btrfs_next_leaf(root, &path);
419                                 if (ret)
420                                         break;
421                                 leaf = btrfs_buffer_leaf(path.nodes[0]);
422                                 nritems = btrfs_header_nritems(&leaf->header);
423                                 slot = path.slots[0];
424                         } else {
425                                 slot++;
426                                 path.slots[0]++;
427                         }
428                 }
429                 advance = 1;
430                 item = leaf->items + slot;
431                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
432                         break;
433                 if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY)
434                         continue;
435                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
436                         continue;
437
438                 advance = 1;
439                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
440                 over = filldir(dirent, (const char *)(di + 1),
441                                btrfs_dir_name_len(di),
442                                btrfs_disk_key_offset(&item->key),
443                                btrfs_dir_objectid(di), d_type);
444                 if (over) {
445                         filp->f_pos = btrfs_disk_key_offset(&item->key);
446                         break;
447                 }
448                 filp->f_pos = btrfs_disk_key_offset(&item->key) + 1;
449         }
450         ret = 0;
451 err:
452         btrfs_release_path(root, &path);
453         mutex_unlock(&root->fs_info->fs_mutex);
454         return ret;
455 }
456
457 static void btrfs_put_super (struct super_block * sb)
458 {
459         struct btrfs_root *root = btrfs_sb(sb);
460         int ret;
461
462         ret = close_ctree(root);
463         if (ret) {
464                 printk("close ctree returns %d\n", ret);
465         }
466         sb->s_fs_info = NULL;
467 }
468
469 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
470 {
471         struct inode * inode;
472         struct dentry * root_dentry;
473         struct btrfs_super_block *disk_super;
474         struct buffer_head *bh;
475         struct btrfs_root *root;
476
477         sb->s_maxbytes = MAX_LFS_FILESIZE;
478         sb->s_blocksize = PAGE_CACHE_SIZE;
479         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
480         sb->s_magic = BTRFS_SUPER_MAGIC;
481         sb->s_op = &btrfs_super_ops;
482         sb->s_time_gran = 1;
483
484         bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize);
485         if (!bh) {
486                 printk("btrfs: unable to read on disk super\n");
487                 return -EIO;
488         }
489         disk_super = (struct btrfs_super_block *)bh->b_data;
490         root = open_ctree(sb, bh, disk_super);
491
492         if (!root) {
493                 printk("btrfs: open_ctree failed\n");
494                 return -EIO;
495         }
496         sb->s_fs_info = root;
497         disk_super = root->fs_info->disk_super;
498         printk("read in super total blocks %Lu root %Lu\n",
499                btrfs_super_total_blocks(disk_super),
500                btrfs_super_root_dir(disk_super));
501
502         inode = iget_locked(sb, btrfs_super_root_dir(disk_super));
503         if (!inode)
504                 return -ENOMEM;
505         if (inode->i_state & I_NEW) {
506                 btrfs_read_locked_inode(inode);
507                 unlock_new_inode(inode);
508         }
509
510         root_dentry = d_alloc_root(inode);
511         if (!root_dentry) {
512                 iput(inode);
513                 return -ENOMEM;
514         }
515         sb->s_root = root_dentry;
516
517         return 0;
518 }
519
520 static void fill_inode_item(struct btrfs_inode_item *item,
521                             struct inode *inode)
522 {
523         btrfs_set_inode_uid(item, inode->i_uid);
524         btrfs_set_inode_gid(item, inode->i_gid);
525         btrfs_set_inode_size(item, inode->i_size);
526         btrfs_set_inode_mode(item, inode->i_mode);
527         btrfs_set_inode_nlink(item, inode->i_nlink);
528         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
529         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
530         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
531         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
532         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
533         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
534         btrfs_set_inode_nblocks(item, inode->i_blocks);
535         btrfs_set_inode_generation(item, inode->i_generation);
536 }
537
538 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
539                               struct btrfs_root *root,
540                               struct inode *inode)
541 {
542         struct btrfs_inode_item *inode_item;
543         struct btrfs_path path;
544         int ret;
545
546         btrfs_init_path(&path);
547
548         ret = btrfs_lookup_inode(trans, root, &path, inode->i_ino, 1);
549         if (ret) {
550                 if (ret > 0)
551                         ret = -ENOENT;
552                 goto failed;
553         }
554
555         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
556                                   path.slots[0],
557                                   struct btrfs_inode_item);
558
559         fill_inode_item(inode_item, inode);
560         mark_buffer_dirty(path.nodes[0]);
561 failed:
562         btrfs_release_path(root, &path);
563         return 0;
564 }
565
566 static int btrfs_write_inode(struct inode *inode, int wait)
567 {
568         struct btrfs_root *root = btrfs_sb(inode->i_sb);
569         struct btrfs_trans_handle *trans;
570         int ret;
571
572         mutex_lock(&root->fs_info->fs_mutex);
573         trans = btrfs_start_transaction(root, 1);
574         ret = btrfs_update_inode(trans, root, inode);
575         if (wait)
576                 btrfs_commit_transaction(trans, root);
577         else
578                 btrfs_end_transaction(trans, root);
579         mutex_unlock(&root->fs_info->fs_mutex);
580         return ret;
581 }
582
583 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
584                                      struct inode *dir, int mode)
585 {
586         struct inode *inode;
587         struct btrfs_inode_item inode_item;
588         struct btrfs_root *root = btrfs_sb(dir->i_sb);
589         struct btrfs_key key;
590         int ret;
591         u64 objectid;
592
593         inode = new_inode(dir->i_sb);
594         if (!inode)
595                 return ERR_PTR(-ENOMEM);
596
597         ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
598         BUG_ON(ret);
599
600         inode->i_uid = current->fsuid;
601         inode->i_gid = current->fsgid;
602         inode->i_mode = mode;
603         inode->i_ino = objectid;
604         inode->i_blocks = 0;
605         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
606         fill_inode_item(&inode_item, inode);
607
608         key.objectid = objectid;
609         key.flags = 0;
610         key.offset = 0;
611         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
612         ret = btrfs_insert_inode_map(trans, root, objectid, &key);
613         BUG_ON(ret);
614
615         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
616         BUG_ON(ret);
617
618         insert_inode_hash(inode);
619         return inode;
620 }
621
622 static int btrfs_add_link(struct btrfs_trans_handle *trans,
623                             struct dentry *dentry, struct inode *inode)
624 {
625         int ret;
626         ret = btrfs_insert_dir_item(trans, btrfs_sb(inode->i_sb),
627                                     dentry->d_name.name, dentry->d_name.len,
628                                     dentry->d_parent->d_inode->i_ino,
629                                     inode->i_ino, 0);
630         if (ret == 0) {
631                 dentry->d_parent->d_inode->i_size += dentry->d_name.len;
632                 ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb),
633                                          dentry->d_parent->d_inode);
634         }
635
636         return ret;
637 }
638
639 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
640                             struct dentry *dentry, struct inode *inode)
641 {
642         int err = btrfs_add_link(trans, dentry, inode);
643         if (!err) {
644                 d_instantiate(dentry, inode);
645                 return 0;
646         }
647         return err;
648 }
649
650 static int btrfs_create(struct inode *dir, struct dentry *dentry,
651                         int mode, struct nameidata *nd)
652 {
653         struct btrfs_trans_handle *trans;
654         struct btrfs_root *root = btrfs_sb(dir->i_sb);
655         struct inode *inode;
656         int err;
657         int drop_inode = 0;
658
659         mutex_lock(&root->fs_info->fs_mutex);
660         trans = btrfs_start_transaction(root, 1);
661         inode = btrfs_new_inode(trans, dir, mode);
662         err = PTR_ERR(inode);
663         if (IS_ERR(inode))
664                 goto out_unlock;
665         // FIXME mark the inode dirty
666         err = btrfs_add_nondir(trans, dentry, inode);
667         if (err)
668                 drop_inode = 1;
669         else {
670                 inode->i_mapping->a_ops = &btrfs_aops;
671                 inode->i_fop = &btrfs_file_operations;
672                 inode->i_op = &btrfs_file_inode_operations;
673         }
674         dir->i_sb->s_dirt = 1;
675 out_unlock:
676         btrfs_end_transaction(trans, root);
677         mutex_unlock(&root->fs_info->fs_mutex);
678         if (drop_inode) {
679                 inode_dec_link_count(inode);
680                 iput(inode);
681         }
682         return err;
683 }
684
685 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
686                                 struct inode *inode, struct inode *dir)
687 {
688         struct btrfs_root *root = btrfs_sb(inode->i_sb);
689         int ret;
690         char buf[2];
691         buf[0] = '.';
692         buf[1] = '.';
693
694         ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino,
695                                     inode->i_ino, 1);
696         if (ret)
697                 goto error;
698         ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino,
699                                     dir->i_ino, 1);
700         if (ret)
701                 goto error;
702         inode->i_size = 3;
703         ret = btrfs_update_inode(trans, root, inode);
704 error:
705         return ret;
706 }
707
708 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
709 {
710         struct inode *inode;
711         struct btrfs_trans_handle *trans;
712         struct btrfs_root *root = btrfs_sb(dir->i_sb);
713         int err = 0;
714         int drop_on_err = 0;
715
716         mutex_lock(&root->fs_info->fs_mutex);
717         trans = btrfs_start_transaction(root, 1);
718         if (IS_ERR(trans)) {
719                 err = PTR_ERR(trans);
720                 goto out_unlock;
721         }
722         inode = btrfs_new_inode(trans, dir, S_IFDIR | mode);
723         if (IS_ERR(inode)) {
724                 err = PTR_ERR(inode);
725                 goto out_fail;
726         }
727         drop_on_err = 1;
728         inode->i_op = &btrfs_dir_inode_operations;
729         inode->i_fop = &btrfs_dir_file_operations;
730
731         err = btrfs_make_empty_dir(trans, inode, dir);
732         if (err)
733                 goto out_fail;
734         err = btrfs_add_link(trans, dentry, inode);
735         if (err)
736                 goto out_fail;
737         d_instantiate(dentry, inode);
738         drop_on_err = 0;
739
740 out_fail:
741         btrfs_end_transaction(trans, root);
742 out_unlock:
743         mutex_unlock(&root->fs_info->fs_mutex);
744         if (drop_on_err)
745                 iput(inode);
746         return err;
747 }
748
749 static int btrfs_sync_fs(struct super_block *sb, int wait)
750 {
751         struct btrfs_trans_handle *trans;
752         struct btrfs_root *root;
753         int ret;
754         root = btrfs_sb(sb);
755
756         sb->s_dirt = 0;
757         if (!wait) {
758                 filemap_flush(root->fs_info->btree_inode->i_mapping);
759                 return 0;
760         }
761         filemap_write_and_wait(root->fs_info->btree_inode->i_mapping);
762
763         mutex_lock(&root->fs_info->fs_mutex);
764         trans = btrfs_start_transaction(root, 1);
765         ret = btrfs_commit_transaction(trans, root);
766         sb->s_dirt = 0;
767         BUG_ON(ret);
768 printk("btrfs sync_fs\n");
769         mutex_unlock(&root->fs_info->fs_mutex);
770         return 0;
771 }
772
773 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
774                            struct buffer_head *result, int create)
775 {
776         int ret;
777         int err = 0;
778         u64 blocknr;
779         u64 extent_start = 0;
780         u64 extent_end = 0;
781         u64 objectid = inode->i_ino;
782         struct btrfs_path path;
783         struct btrfs_root *root = btrfs_sb(inode->i_sb);
784         struct btrfs_trans_handle *trans = NULL;
785         struct btrfs_file_extent_item *item;
786         struct btrfs_leaf *leaf;
787         struct btrfs_disk_key *found_key;
788
789         btrfs_init_path(&path);
790         if (create)
791                 trans = btrfs_start_transaction(root, 1);
792
793
794         ret = btrfs_lookup_file_extent(trans, root, &path,
795                                        inode->i_ino,
796                                        iblock << inode->i_blkbits, 0);
797         if (ret < 0) {
798                 btrfs_release_path(root, &path);
799                 err = ret;
800                 goto out;
801         }
802
803         if (ret != 0) {
804                 if (path.slots[0] == 0) {
805                         btrfs_release_path(root, &path);
806                         goto allocate;
807                 }
808                 path.slots[0]--;
809         }
810
811         item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
812                               struct btrfs_file_extent_item);
813         leaf = btrfs_buffer_leaf(path.nodes[0]);
814         blocknr = btrfs_file_extent_disk_blocknr(item);
815         blocknr += btrfs_file_extent_offset(item);
816
817         /* exact match found, use it */
818         if (ret == 0) {
819                 err = 0;
820                 map_bh(result, inode->i_sb, blocknr);
821                 btrfs_release_path(root, &path);
822                 goto out;
823         }
824
825         /* are we inside the extent that was found? */
826         found_key = &leaf->items[path.slots[0]].key;
827         if (btrfs_disk_key_objectid(found_key) != objectid ||
828             btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) {
829                 extent_end = 0;
830                 extent_start = 0;
831                 btrfs_release_path(root, &path);
832                 goto allocate;
833         }
834
835         extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key);
836         extent_start = extent_start >> inode->i_blkbits;
837         extent_start += btrfs_file_extent_offset(item);
838         extent_end = extent_start + btrfs_file_extent_num_blocks(item);
839         btrfs_release_path(root, &path);
840         if (iblock >= extent_start && iblock < extent_end) {
841                 err = 0;
842                 map_bh(result, inode->i_sb, blocknr + iblock - extent_start);
843                 goto out;
844         }
845 allocate:
846         /* ok, create a new extent */
847         if (!create) {
848                 err = 0;
849                 goto out;
850         }
851         ret = btrfs_alloc_file_extent(trans, root, objectid,
852                                       iblock << inode->i_blkbits,
853                                       1, extent_end, &blocknr);
854         if (ret) {
855                 err = ret;
856                 goto out;
857         }
858         inode->i_blocks += inode->i_sb->s_blocksize >> 9;
859         set_buffer_new(result);
860         map_bh(result, inode->i_sb, blocknr);
861
862 out:
863         if (trans)
864                 btrfs_end_transaction(trans, root);
865         return err;
866 }
867
868 static int btrfs_get_block(struct inode *inode, sector_t iblock,
869                            struct buffer_head *result, int create)
870 {
871         int err;
872         struct btrfs_root *root = btrfs_sb(inode->i_sb);
873         mutex_lock(&root->fs_info->fs_mutex);
874         err = btrfs_get_block_lock(inode, iblock, result, create);
875         mutex_unlock(&root->fs_info->fs_mutex);
876         return err;
877 }
878
879 static int btrfs_prepare_write(struct file *file, struct page *page,
880                                unsigned from, unsigned to)
881 {
882         WARN_ON(1);
883         return nobh_prepare_write(page, from, to, btrfs_get_block);
884 }
885 static int btrfs_commit_write(struct file *file, struct page *page,
886                                unsigned from, unsigned to)
887 {
888         WARN_ON(1);
889         return nobh_commit_write(file, page, from, to);
890 }
891
892 static void btrfs_write_super(struct super_block *sb)
893 {
894         btrfs_sync_fs(sb, 1);
895 }
896
897 static int btrfs_readpage(struct file *file, struct page *page)
898 {
899         return mpage_readpage(page, btrfs_get_block);
900 }
901
902 static int btrfs_readpages(struct file *file, struct address_space *mapping,
903                            struct list_head *pages, unsigned nr_pages)
904 {
905         return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block);
906 }
907
908 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
909 {
910         return nobh_writepage(page, btrfs_get_block, wbc);
911 }
912
913 static void btrfs_truncate(struct inode *inode)
914 {
915         struct btrfs_root *root = btrfs_sb(inode->i_sb);
916         int ret;
917         struct btrfs_trans_handle *trans;
918
919         if (!S_ISREG(inode->i_mode))
920                 return;
921         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
922                 return;
923
924         nobh_truncate_page(inode->i_mapping, inode->i_size);
925
926         /* FIXME, add redo link to tree so we don't leak on crash */
927         mutex_lock(&root->fs_info->fs_mutex);
928         trans = btrfs_start_transaction(root, 1);
929         ret = btrfs_truncate_in_trans(trans, root, inode);
930         BUG_ON(ret);
931         ret = btrfs_end_transaction(trans, root);
932         BUG_ON(ret);
933         mutex_unlock(&root->fs_info->fs_mutex);
934         mark_inode_dirty(inode);
935 }
936
937 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
938                                 struct page **prepared_pages,
939                                 const char __user * buf)
940 {
941         long page_fault = 0;
942         int i;
943         int offset = pos & (PAGE_CACHE_SIZE - 1);
944
945         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
946                 size_t count = min_t(size_t,
947                                      PAGE_CACHE_SIZE - offset, write_bytes);
948                 struct page *page = prepared_pages[i];
949                 fault_in_pages_readable(buf, count);
950
951                 /* Copy data from userspace to the current page */
952                 kmap(page);
953                 page_fault = __copy_from_user(page_address(page) + offset,
954                                               buf, count);
955                 /* Flush processor's dcache for this page */
956                 flush_dcache_page(page);
957                 kunmap(page);
958                 buf += count;
959                 write_bytes -= count;
960
961                 if (page_fault)
962                         break;
963         }
964         return page_fault ? -EFAULT : 0;
965 }
966
967 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
968 {
969         size_t i;
970         for (i = 0; i < num_pages; i++) {
971                 if (!pages[i])
972                         break;
973                 unlock_page(pages[i]);
974                 mark_page_accessed(pages[i]);
975                 page_cache_release(pages[i]);
976         }
977 }
978 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
979                                    struct btrfs_root *root,
980                                    struct file *file,
981                                    struct page **pages,
982                                    size_t num_pages,
983                                    loff_t pos,
984                                    size_t write_bytes)
985 {
986         int i;
987         int offset;
988         int err = 0;
989         int ret;
990         int this_write;
991         struct inode *inode = file->f_path.dentry->d_inode;
992
993         for (i = 0; i < num_pages; i++) {
994                 offset = pos & (PAGE_CACHE_SIZE -1);
995                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
996                 /* FIXME, one block at a time */
997
998                 mutex_lock(&root->fs_info->fs_mutex);
999                 trans = btrfs_start_transaction(root, 1);
1000                 btrfs_csum_file_block(trans, root, inode->i_ino,
1001                                       pages[i]->index << PAGE_CACHE_SHIFT,
1002                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1003                 kunmap(pages[i]);
1004                 SetPageChecked(pages[i]);
1005                 ret = btrfs_end_transaction(trans, root);
1006                 BUG_ON(ret);
1007                 mutex_unlock(&root->fs_info->fs_mutex);
1008
1009                 ret = nobh_commit_write(file, pages[i], offset,
1010                                          offset + this_write);
1011                 pos += this_write;
1012                 if (ret) {
1013                         err = ret;
1014                         goto failed;
1015                 }
1016                 WARN_ON(this_write > write_bytes);
1017                 write_bytes -= this_write;
1018         }
1019 failed:
1020         return err;
1021 }
1022
1023 static int prepare_pages(struct btrfs_trans_handle *trans,
1024                          struct btrfs_root *root,
1025                          struct file *file,
1026                          struct page **pages,
1027                          size_t num_pages,
1028                          loff_t pos,
1029                          size_t write_bytes)
1030 {
1031         int i;
1032         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1033         struct inode *inode = file->f_path.dentry->d_inode;
1034         int offset;
1035         int err = 0;
1036         int ret;
1037         int this_write;
1038         loff_t isize = i_size_read(inode);
1039
1040         memset(pages, 0, num_pages * sizeof(struct page *));
1041
1042         for (i = 0; i < num_pages; i++) {
1043                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1044                 if (!pages[i]) {
1045                         err = -ENOMEM;
1046                         goto failed_release;
1047                 }
1048                 offset = pos & (PAGE_CACHE_SIZE -1);
1049                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1050                 ret = nobh_prepare_write(pages[i], offset,
1051                                          offset + this_write,
1052                                          btrfs_get_block);
1053                 pos += this_write;
1054                 if (ret) {
1055                         err = ret;
1056                         goto failed_truncate;
1057                 }
1058                 WARN_ON(this_write > write_bytes);
1059                 write_bytes -= this_write;
1060         }
1061         return 0;
1062
1063 failed_release:
1064         btrfs_drop_pages(pages, num_pages);
1065         return err;
1066
1067 failed_truncate:
1068         btrfs_drop_pages(pages, num_pages);
1069         if (pos > isize)
1070                 vmtruncate(inode, isize);
1071         return err;
1072 }
1073
1074 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1075                                 size_t count, loff_t *ppos)
1076 {
1077         loff_t pos;
1078         size_t num_written = 0;
1079         int err = 0;
1080         int ret = 0;
1081         struct inode *inode = file->f_path.dentry->d_inode;
1082         struct btrfs_root *root = btrfs_sb(inode->i_sb);
1083         struct page *pages[1];
1084
1085         if (file->f_flags & O_DIRECT)
1086                 return -EINVAL;
1087         pos = *ppos;
1088
1089         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1090         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1091         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1092         if (err)
1093                 goto out;
1094         if (count == 0)
1095                 goto out;
1096         err = remove_suid(file->f_path.dentry);
1097         if (err)
1098                 goto out;
1099         file_update_time(file);
1100         mutex_lock(&inode->i_mutex);
1101         while(count > 0) {
1102                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1103                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1104                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1105                                         PAGE_CACHE_SHIFT;
1106                 ret = prepare_pages(NULL, root, file, pages, num_pages,
1107                                     pos, write_bytes);
1108                 BUG_ON(ret);
1109                 ret = btrfs_copy_from_user(pos, num_pages,
1110                                            write_bytes, pages, buf);
1111                 BUG_ON(ret);
1112
1113                 ret = dirty_and_release_pages(NULL, root, file, pages,
1114                                               num_pages, pos, write_bytes);
1115                 BUG_ON(ret);
1116                 btrfs_drop_pages(pages, num_pages);
1117
1118                 buf += write_bytes;
1119                 count -= write_bytes;
1120                 pos += write_bytes;
1121                 num_written += write_bytes;
1122
1123                 balance_dirty_pages_ratelimited(inode->i_mapping);
1124                 cond_resched();
1125         }
1126         mutex_unlock(&inode->i_mutex);
1127 out:
1128         *ppos = pos;
1129         current->backing_dev_info = NULL;
1130         return num_written ? num_written : err;
1131 }
1132
1133 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1134                         unsigned long offset, unsigned long size)
1135 {
1136         char *kaddr;
1137         unsigned long left, count = desc->count;
1138
1139         if (size > count)
1140                 size = count;
1141
1142         if (!PageChecked(page)) {
1143                 /* FIXME, do it per block */
1144                 struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb);
1145                 int ret = btrfs_csum_verify_file_block(root,
1146                                           page->mapping->host->i_ino,
1147                                           page->index << PAGE_CACHE_SHIFT,
1148                                           kmap(page), PAGE_CACHE_SIZE);
1149                 if (ret) {
1150                         printk("failed to verify ino %lu page %lu\n",
1151                                page->mapping->host->i_ino,
1152                                page->index);
1153                         memset(page_address(page), 0, PAGE_CACHE_SIZE);
1154                 }
1155                 SetPageChecked(page);
1156                 kunmap(page);
1157         }
1158         /*
1159          * Faults on the destination of a read are common, so do it before
1160          * taking the kmap.
1161          */
1162         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1163                 kaddr = kmap_atomic(page, KM_USER0);
1164                 left = __copy_to_user_inatomic(desc->arg.buf,
1165                                                 kaddr + offset, size);
1166                 kunmap_atomic(kaddr, KM_USER0);
1167                 if (left == 0)
1168                         goto success;
1169         }
1170
1171         /* Do it the slow way */
1172         kaddr = kmap(page);
1173         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1174         kunmap(page);
1175
1176         if (left) {
1177                 size -= left;
1178                 desc->error = -EFAULT;
1179         }
1180 success:
1181         desc->count = count - size;
1182         desc->written += size;
1183         desc->arg.buf += size;
1184         return size;
1185 }
1186
1187 /**
1188  * btrfs_file_aio_read - filesystem read routine
1189  * @iocb:       kernel I/O control block
1190  * @iov:        io vector request
1191  * @nr_segs:    number of segments in the iovec
1192  * @pos:        current file position
1193  */
1194 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1195                                    unsigned long nr_segs, loff_t pos)
1196 {
1197         struct file *filp = iocb->ki_filp;
1198         ssize_t retval;
1199         unsigned long seg;
1200         size_t count;
1201         loff_t *ppos = &iocb->ki_pos;
1202
1203         count = 0;
1204         for (seg = 0; seg < nr_segs; seg++) {
1205                 const struct iovec *iv = &iov[seg];
1206
1207                 /*
1208                  * If any segment has a negative length, or the cumulative
1209                  * length ever wraps negative then return -EINVAL.
1210                  */
1211                 count += iv->iov_len;
1212                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
1213                         return -EINVAL;
1214                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
1215                         continue;
1216                 if (seg == 0)
1217                         return -EFAULT;
1218                 nr_segs = seg;
1219                 count -= iv->iov_len;   /* This segment is no good */
1220                 break;
1221         }
1222         retval = 0;
1223         if (count) {
1224                 for (seg = 0; seg < nr_segs; seg++) {
1225                         read_descriptor_t desc;
1226
1227                         desc.written = 0;
1228                         desc.arg.buf = iov[seg].iov_base;
1229                         desc.count = iov[seg].iov_len;
1230                         if (desc.count == 0)
1231                                 continue;
1232                         desc.error = 0;
1233                         do_generic_file_read(filp, ppos, &desc,
1234                                              btrfs_read_actor);
1235                         retval += desc.written;
1236                         if (desc.error) {
1237                                 retval = retval ?: desc.error;
1238                                 break;
1239                         }
1240                 }
1241         }
1242         return retval;
1243 }
1244
1245 static int btrfs_get_sb(struct file_system_type *fs_type,
1246         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1247 {
1248         return get_sb_bdev(fs_type, flags, dev_name, data,
1249                            btrfs_fill_super, mnt);
1250 }
1251
1252 static struct file_system_type btrfs_fs_type = {
1253         .owner          = THIS_MODULE,
1254         .name           = "btrfs",
1255         .get_sb         = btrfs_get_sb,
1256         .kill_sb        = kill_block_super,
1257         .fs_flags       = FS_REQUIRES_DEV,
1258 };
1259
1260 static struct super_operations btrfs_super_ops = {
1261         .statfs         = simple_statfs,
1262         .delete_inode   = btrfs_delete_inode,
1263         .put_super      = btrfs_put_super,
1264         .read_inode     = btrfs_read_locked_inode,
1265         .write_super    = btrfs_write_super,
1266         .sync_fs        = btrfs_sync_fs,
1267         .write_inode    = btrfs_write_inode,
1268 };
1269
1270 static struct inode_operations btrfs_dir_inode_operations = {
1271         .lookup         = btrfs_lookup,
1272         .create         = btrfs_create,
1273         .unlink         = btrfs_unlink,
1274         .mkdir          = btrfs_mkdir,
1275         .rmdir          = btrfs_rmdir,
1276 };
1277
1278 static struct file_operations btrfs_dir_file_operations = {
1279         .llseek         = generic_file_llseek,
1280         .read           = generic_read_dir,
1281         .readdir        = btrfs_readdir,
1282 };
1283
1284 static struct address_space_operations btrfs_aops = {
1285         .readpage       = btrfs_readpage,
1286         .readpages      = btrfs_readpages,
1287         .writepage      = btrfs_writepage,
1288         .sync_page      = block_sync_page,
1289         .prepare_write  = btrfs_prepare_write,
1290         .commit_write   = btrfs_commit_write,
1291 };
1292
1293 static struct inode_operations btrfs_file_inode_operations = {
1294         .truncate       = btrfs_truncate,
1295 };
1296
1297 static struct file_operations btrfs_file_operations = {
1298         .llseek         = generic_file_llseek,
1299         .read           = do_sync_read,
1300         .aio_read       = btrfs_file_aio_read,
1301         .write          = btrfs_file_write,
1302         .mmap           = generic_file_mmap,
1303         .open           = generic_file_open,
1304 };
1305
1306 static int __init init_btrfs_fs(void)
1307 {
1308         printk("btrfs loaded!\n");
1309         return register_filesystem(&btrfs_fs_type);
1310 }
1311
1312 static void __exit exit_btrfs_fs(void)
1313 {
1314         unregister_filesystem(&btrfs_fs_type);
1315         printk("btrfs unloaded\n");
1316 }
1317
1318 module_init(init_btrfs_fs)
1319 module_exit(exit_btrfs_fs)
1320
1321 MODULE_LICENSE("GPL");