46f869d6d0fb9d4a5312ddd6d1d88569c0493286
[linux-2.6-block.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include "ctree.h"
35 #include "disk-io.h"
36 #include "transaction.h"
37 #include "btrfs_inode.h"
38 #include "ioctl.h"
39 #include "print-tree.h"
40
41 struct btrfs_iget_args {
42         u64 ino;
43         struct btrfs_root *root;
44 };
45
46 static struct inode_operations btrfs_dir_inode_operations;
47 static struct inode_operations btrfs_symlink_inode_operations;
48 static struct inode_operations btrfs_dir_ro_inode_operations;
49 static struct inode_operations btrfs_file_inode_operations;
50 static struct address_space_operations btrfs_aops;
51 static struct address_space_operations btrfs_symlink_aops;
52 static struct file_operations btrfs_dir_file_operations;
53
54 static struct kmem_cache *btrfs_inode_cachep;
55 struct kmem_cache *btrfs_trans_handle_cachep;
56 struct kmem_cache *btrfs_transaction_cachep;
57 struct kmem_cache *btrfs_bit_radix_cachep;
58 struct kmem_cache *btrfs_path_cachep;
59
60 #define S_SHIFT 12
61 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
62         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
63         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
64         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
65         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
66         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
67         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
68         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
69 };
70
71 void btrfs_read_locked_inode(struct inode *inode)
72 {
73         struct btrfs_path *path;
74         struct btrfs_inode_item *inode_item;
75         struct btrfs_root *root = BTRFS_I(inode)->root;
76         struct btrfs_key location;
77         u64 alloc_group_block;
78         int ret;
79
80         path = btrfs_alloc_path();
81         BUG_ON(!path);
82         btrfs_init_path(path);
83         mutex_lock(&root->fs_info->fs_mutex);
84
85         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
86         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
87         if (ret) {
88                 btrfs_free_path(path);
89                 goto make_bad;
90         }
91         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
92                                   path->slots[0],
93                                   struct btrfs_inode_item);
94
95         inode->i_mode = btrfs_inode_mode(inode_item);
96         inode->i_nlink = btrfs_inode_nlink(inode_item);
97         inode->i_uid = btrfs_inode_uid(inode_item);
98         inode->i_gid = btrfs_inode_gid(inode_item);
99         inode->i_size = btrfs_inode_size(inode_item);
100         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
101         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
102         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
103         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
104         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
105         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
106         inode->i_blocks = btrfs_inode_nblocks(inode_item);
107         inode->i_generation = btrfs_inode_generation(inode_item);
108         alloc_group_block = btrfs_inode_block_group(inode_item);
109         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
110                                                        alloc_group_block);
111
112         btrfs_free_path(path);
113         inode_item = NULL;
114
115         mutex_unlock(&root->fs_info->fs_mutex);
116
117         switch (inode->i_mode & S_IFMT) {
118 #if 0
119         default:
120                 init_special_inode(inode, inode->i_mode,
121                                    btrfs_inode_rdev(inode_item));
122                 break;
123 #endif
124         case S_IFREG:
125                 inode->i_mapping->a_ops = &btrfs_aops;
126                 inode->i_fop = &btrfs_file_operations;
127                 inode->i_op = &btrfs_file_inode_operations;
128                 break;
129         case S_IFDIR:
130                 inode->i_fop = &btrfs_dir_file_operations;
131                 if (root == root->fs_info->tree_root)
132                         inode->i_op = &btrfs_dir_ro_inode_operations;
133                 else
134                         inode->i_op = &btrfs_dir_inode_operations;
135                 break;
136         case S_IFLNK:
137                 inode->i_op = &btrfs_symlink_inode_operations;
138                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
139                 break;
140         }
141         return;
142
143 make_bad:
144         btrfs_release_path(root, path);
145         btrfs_free_path(path);
146         mutex_unlock(&root->fs_info->fs_mutex);
147         make_bad_inode(inode);
148 }
149
150 static void fill_inode_item(struct btrfs_inode_item *item,
151                             struct inode *inode)
152 {
153         btrfs_set_inode_uid(item, inode->i_uid);
154         btrfs_set_inode_gid(item, inode->i_gid);
155         btrfs_set_inode_size(item, inode->i_size);
156         btrfs_set_inode_mode(item, inode->i_mode);
157         btrfs_set_inode_nlink(item, inode->i_nlink);
158         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
159         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
160         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
161         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
162         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
163         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
164         btrfs_set_inode_nblocks(item, inode->i_blocks);
165         btrfs_set_inode_generation(item, inode->i_generation);
166         btrfs_set_inode_block_group(item,
167                                     BTRFS_I(inode)->block_group->key.objectid);
168 }
169
170 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
171                               struct btrfs_root *root,
172                               struct inode *inode)
173 {
174         struct btrfs_inode_item *inode_item;
175         struct btrfs_path *path;
176         int ret;
177
178         path = btrfs_alloc_path();
179         BUG_ON(!path);
180         btrfs_init_path(path);
181         ret = btrfs_lookup_inode(trans, root, path,
182                                  &BTRFS_I(inode)->location, 1);
183         if (ret) {
184                 if (ret > 0)
185                         ret = -ENOENT;
186                 goto failed;
187         }
188
189         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
190                                   path->slots[0],
191                                   struct btrfs_inode_item);
192
193         fill_inode_item(inode_item, inode);
194         btrfs_mark_buffer_dirty(path->nodes[0]);
195         ret = 0;
196 failed:
197         btrfs_release_path(root, path);
198         btrfs_free_path(path);
199         return ret;
200 }
201
202
203 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
204                               struct btrfs_root *root,
205                               struct inode *dir,
206                               struct dentry *dentry)
207 {
208         struct btrfs_path *path;
209         const char *name = dentry->d_name.name;
210         int name_len = dentry->d_name.len;
211         int ret = 0;
212         u64 objectid;
213         struct btrfs_dir_item *di;
214
215         path = btrfs_alloc_path();
216         BUG_ON(!path);
217         btrfs_init_path(path);
218         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
219                                     name, name_len, -1);
220         if (IS_ERR(di)) {
221                 ret = PTR_ERR(di);
222                 goto err;
223         }
224         if (!di) {
225                 ret = -ENOENT;
226                 goto err;
227         }
228         objectid = btrfs_disk_key_objectid(&di->location);
229         ret = btrfs_delete_one_dir_name(trans, root, path, di);
230         BUG_ON(ret);
231         btrfs_release_path(root, path);
232
233         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
234                                          objectid, name, name_len, -1);
235         if (IS_ERR(di)) {
236                 ret = PTR_ERR(di);
237                 goto err;
238         }
239         if (!di) {
240                 ret = -ENOENT;
241                 goto err;
242         }
243         ret = btrfs_delete_one_dir_name(trans, root, path, di);
244         BUG_ON(ret);
245
246         dentry->d_inode->i_ctime = dir->i_ctime;
247 err:
248         btrfs_free_path(path);
249         if (!ret) {
250                 dir->i_size -= name_len * 2;
251                 btrfs_update_inode(trans, root, dir);
252                 drop_nlink(dentry->d_inode);
253                 btrfs_update_inode(trans, root, dentry->d_inode);
254                 dir->i_sb->s_dirt = 1;
255         }
256         return ret;
257 }
258
259 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
260 {
261         struct btrfs_root *root;
262         struct btrfs_trans_handle *trans;
263         int ret;
264
265         root = BTRFS_I(dir)->root;
266         mutex_lock(&root->fs_info->fs_mutex);
267         trans = btrfs_start_transaction(root, 1);
268         btrfs_set_trans_block_group(trans, dir);
269         ret = btrfs_unlink_trans(trans, root, dir, dentry);
270         btrfs_end_transaction(trans, root);
271         mutex_unlock(&root->fs_info->fs_mutex);
272         btrfs_btree_balance_dirty(root);
273         return ret;
274 }
275
276 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
277 {
278         struct inode *inode = dentry->d_inode;
279         int err;
280         int ret;
281         struct btrfs_root *root = BTRFS_I(dir)->root;
282         struct btrfs_path *path;
283         struct btrfs_key key;
284         struct btrfs_trans_handle *trans;
285         struct btrfs_key found_key;
286         int found_type;
287         struct btrfs_leaf *leaf;
288         char *goodnames = "..";
289
290         path = btrfs_alloc_path();
291         BUG_ON(!path);
292         btrfs_init_path(path);
293         mutex_lock(&root->fs_info->fs_mutex);
294         trans = btrfs_start_transaction(root, 1);
295         btrfs_set_trans_block_group(trans, dir);
296         key.objectid = inode->i_ino;
297         key.offset = (u64)-1;
298         key.flags = (u32)-1;
299         while(1) {
300                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
301                 if (ret < 0) {
302                         err = ret;
303                         goto out;
304                 }
305                 BUG_ON(ret == 0);
306                 if (path->slots[0] == 0) {
307                         err = -ENOENT;
308                         goto out;
309                 }
310                 path->slots[0]--;
311                 leaf = btrfs_buffer_leaf(path->nodes[0]);
312                 btrfs_disk_key_to_cpu(&found_key,
313                                       &leaf->items[path->slots[0]].key);
314                 found_type = btrfs_key_type(&found_key);
315                 if (found_key.objectid != inode->i_ino) {
316                         err = -ENOENT;
317                         goto out;
318                 }
319                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
320                      found_type != BTRFS_DIR_INDEX_KEY) ||
321                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
322                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
323                         err = -ENOTEMPTY;
324                         goto out;
325                 }
326                 ret = btrfs_del_item(trans, root, path);
327                 BUG_ON(ret);
328
329                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
330                         break;
331                 btrfs_release_path(root, path);
332         }
333         ret = 0;
334         btrfs_release_path(root, path);
335
336         /* now the directory is empty */
337         err = btrfs_unlink_trans(trans, root, dir, dentry);
338         if (!err) {
339                 inode->i_size = 0;
340         }
341 out:
342         btrfs_release_path(root, path);
343         btrfs_free_path(path);
344         mutex_unlock(&root->fs_info->fs_mutex);
345         ret = btrfs_end_transaction(trans, root);
346         btrfs_btree_balance_dirty(root);
347         if (ret && !err)
348                 err = ret;
349         return err;
350 }
351
352 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
353                             struct btrfs_root *root,
354                             struct inode *inode)
355 {
356         struct btrfs_path *path;
357         int ret;
358
359         clear_inode(inode);
360
361         path = btrfs_alloc_path();
362         BUG_ON(!path);
363         btrfs_init_path(path);
364         ret = btrfs_lookup_inode(trans, root, path,
365                                  &BTRFS_I(inode)->location, -1);
366         BUG_ON(ret);
367         ret = btrfs_del_item(trans, root, path);
368         BUG_ON(ret);
369         btrfs_free_path(path);
370         return ret;
371 }
372
373 /*
374  * truncates go from a high offset to a low offset.  So, walk
375  * from hi to lo in the node and issue readas.  Stop when you find
376  * keys from a different objectid
377  */
378 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
379                            u64 objectid)
380 {
381         struct btrfs_node *node;
382         int i;
383         int nritems;
384         u64 item_objectid;
385         u64 blocknr;
386         int slot;
387         int ret;
388
389         if (!path->nodes[1])
390                 return;
391         node = btrfs_buffer_node(path->nodes[1]);
392         slot = path->slots[1];
393         if (slot == 0)
394                 return;
395         nritems = btrfs_header_nritems(&node->header);
396         for (i = slot - 1; i >= 0; i--) {
397                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
398                 if (item_objectid != objectid)
399                         break;
400                 blocknr = btrfs_node_blockptr(node, i);
401                 ret = readahead_tree_block(root, blocknr);
402                 if (ret)
403                         break;
404         }
405 }
406
407 /*
408  * this can truncate away extent items, csum items and directory items.
409  * It starts at a high offset and removes keys until it can't find
410  * any higher than i_size.
411  *
412  * csum items that cross the new i_size are truncated to the new size
413  * as well.
414  */
415 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
416                                    struct btrfs_root *root,
417                                    struct inode *inode)
418 {
419         int ret;
420         struct btrfs_path *path;
421         struct btrfs_key key;
422         struct btrfs_disk_key *found_key;
423         u32 found_type;
424         struct btrfs_leaf *leaf;
425         struct btrfs_file_extent_item *fi;
426         u64 extent_start = 0;
427         u64 extent_num_blocks = 0;
428         u64 item_end = 0;
429         int found_extent;
430         int del_item;
431
432         path = btrfs_alloc_path();
433         BUG_ON(!path);
434         /* FIXME, add redo link to tree so we don't leak on crash */
435         key.objectid = inode->i_ino;
436         key.offset = (u64)-1;
437         key.flags = (u32)-1;
438         while(1) {
439                 btrfs_init_path(path);
440                 fi = NULL;
441                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
442                 if (ret < 0) {
443                         goto error;
444                 }
445                 if (ret > 0) {
446                         BUG_ON(path->slots[0] == 0);
447                         path->slots[0]--;
448                 }
449                 reada_truncate(root, path, inode->i_ino);
450                 leaf = btrfs_buffer_leaf(path->nodes[0]);
451                 found_key = &leaf->items[path->slots[0]].key;
452                 found_type = btrfs_disk_key_type(found_key);
453
454                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
455                         break;
456                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
457                     found_type != BTRFS_DIR_ITEM_KEY &&
458                     found_type != BTRFS_DIR_INDEX_KEY &&
459                     found_type != BTRFS_EXTENT_DATA_KEY)
460                         break;
461
462                 item_end = btrfs_disk_key_offset(found_key);
463                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
464                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
465                                             path->slots[0],
466                                             struct btrfs_file_extent_item);
467                         if (btrfs_file_extent_type(fi) !=
468                             BTRFS_FILE_EXTENT_INLINE) {
469                                 item_end += btrfs_file_extent_num_blocks(fi) <<
470                                                 inode->i_blkbits;
471                         }
472                 }
473                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
474                         ret = btrfs_csum_truncate(trans, root, path,
475                                                   inode->i_size);
476                         BUG_ON(ret);
477                 }
478                 if (item_end < inode->i_size) {
479                         if (found_type) {
480                                 btrfs_set_key_type(&key, found_type - 1);
481                                 continue;
482                         }
483                         break;
484                 }
485                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
486                         del_item = 1;
487                 else
488                         del_item = 0;
489                 found_extent = 0;
490
491                 /* FIXME, shrink the extent if the ref count is only 1 */
492                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
493                            btrfs_file_extent_type(fi) !=
494                            BTRFS_FILE_EXTENT_INLINE) {
495                         u64 num_dec;
496                         if (!del_item) {
497                                 u64 orig_num_blocks =
498                                         btrfs_file_extent_num_blocks(fi);
499                                 extent_num_blocks = inode->i_size -
500                                         btrfs_disk_key_offset(found_key) +
501                                         root->blocksize - 1;
502                                 extent_num_blocks >>= inode->i_blkbits;
503                                 btrfs_set_file_extent_num_blocks(fi,
504                                                          extent_num_blocks);
505                                 inode->i_blocks -= (orig_num_blocks -
506                                         extent_num_blocks) << 3;
507                                 mark_buffer_dirty(path->nodes[0]);
508                         } else {
509                                 extent_start =
510                                         btrfs_file_extent_disk_blocknr(fi);
511                                 extent_num_blocks =
512                                         btrfs_file_extent_disk_num_blocks(fi);
513                                 /* FIXME blocksize != 4096 */
514                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
515                                 if (extent_start != 0) {
516                                         found_extent = 1;
517                                         inode->i_blocks -= num_dec;
518                                 }
519                         }
520                 }
521                 if (del_item) {
522                         ret = btrfs_del_item(trans, root, path);
523                         BUG_ON(ret);
524                 } else {
525                         break;
526                 }
527                 btrfs_release_path(root, path);
528                 if (found_extent) {
529                         ret = btrfs_free_extent(trans, root, extent_start,
530                                                 extent_num_blocks, 0);
531                         BUG_ON(ret);
532                 }
533         }
534         ret = 0;
535 error:
536         btrfs_release_path(root, path);
537         btrfs_free_path(path);
538         inode->i_sb->s_dirt = 1;
539         return ret;
540 }
541
542 /*
543  * taken from block_truncate_page, but does cow as it zeros out
544  * any bytes left in the last page in the file.
545  */
546 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
547 {
548         struct inode *inode = mapping->host;
549         unsigned blocksize = 1 << inode->i_blkbits;
550         pgoff_t index = from >> PAGE_CACHE_SHIFT;
551         unsigned offset = from & (PAGE_CACHE_SIZE-1);
552         struct page *page;
553         char *kaddr;
554         int ret = 0;
555         struct btrfs_root *root = BTRFS_I(inode)->root;
556         u64 alloc_hint = 0;
557         struct btrfs_key ins;
558         struct btrfs_trans_handle *trans;
559
560         if ((offset & (blocksize - 1)) == 0)
561                 goto out;
562
563         ret = -ENOMEM;
564         page = grab_cache_page(mapping, index);
565         if (!page)
566                 goto out;
567
568         if (!PageUptodate(page)) {
569                 ret = mpage_readpage(page, btrfs_get_block);
570                 lock_page(page);
571                 if (!PageUptodate(page)) {
572                         ret = -EIO;
573                         goto out;
574                 }
575         }
576         mutex_lock(&root->fs_info->fs_mutex);
577         trans = btrfs_start_transaction(root, 1);
578         btrfs_set_trans_block_group(trans, inode);
579
580         ret = btrfs_drop_extents(trans, root, inode,
581                                  page->index << PAGE_CACHE_SHIFT,
582                                  (page->index + 1) << PAGE_CACHE_SHIFT,
583                                  &alloc_hint);
584         BUG_ON(ret);
585         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
586                                  alloc_hint, (u64)-1, &ins, 1);
587         BUG_ON(ret);
588         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
589                                        page->index << PAGE_CACHE_SHIFT,
590                                        ins.objectid, 1, 1);
591         BUG_ON(ret);
592         SetPageChecked(page);
593         kaddr = kmap(page);
594         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
595         flush_dcache_page(page);
596         btrfs_csum_file_block(trans, root, inode->i_ino,
597                               page->index << PAGE_CACHE_SHIFT,
598                               kaddr, PAGE_CACHE_SIZE);
599         kunmap(page);
600         btrfs_end_transaction(trans, root);
601         mutex_unlock(&root->fs_info->fs_mutex);
602
603         set_page_dirty(page);
604         unlock_page(page);
605         page_cache_release(page);
606 out:
607         return ret;
608 }
609
610 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
611 {
612         struct inode *inode = dentry->d_inode;
613         int err;
614
615         err = inode_change_ok(inode, attr);
616         if (err)
617                 return err;
618
619         if (S_ISREG(inode->i_mode) &&
620             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
621                 struct btrfs_trans_handle *trans;
622                 struct btrfs_root *root = BTRFS_I(inode)->root;
623                 u64 mask = root->blocksize - 1;
624                 u64 pos = (inode->i_size + mask) & ~mask;
625                 u64 hole_size;
626
627                 if (attr->ia_size <= pos)
628                         goto out;
629
630                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
631
632                 hole_size = (attr->ia_size - pos + mask) & ~mask;
633                 hole_size >>= inode->i_blkbits;
634
635                 mutex_lock(&root->fs_info->fs_mutex);
636                 trans = btrfs_start_transaction(root, 1);
637                 btrfs_set_trans_block_group(trans, inode);
638                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
639                                                pos, 0, 0, hole_size);
640                 BUG_ON(err);
641                 btrfs_end_transaction(trans, root);
642                 mutex_unlock(&root->fs_info->fs_mutex);
643         }
644 out:
645         err = inode_setattr(inode, attr);
646
647         return err;
648 }
649 void btrfs_delete_inode(struct inode *inode)
650 {
651         struct btrfs_trans_handle *trans;
652         struct btrfs_root *root = BTRFS_I(inode)->root;
653         int ret;
654
655         truncate_inode_pages(&inode->i_data, 0);
656         if (is_bad_inode(inode)) {
657                 goto no_delete;
658         }
659         inode->i_size = 0;
660         mutex_lock(&root->fs_info->fs_mutex);
661         trans = btrfs_start_transaction(root, 1);
662         btrfs_set_trans_block_group(trans, inode);
663         ret = btrfs_truncate_in_trans(trans, root, inode);
664         BUG_ON(ret);
665         btrfs_free_inode(trans, root, inode);
666         btrfs_end_transaction(trans, root);
667         mutex_unlock(&root->fs_info->fs_mutex);
668         btrfs_btree_balance_dirty(root);
669         return;
670 no_delete:
671         clear_inode(inode);
672 }
673
674 /*
675  * this returns the key found in the dir entry in the location pointer.
676  * If no dir entries were found, location->objectid is 0.
677  */
678 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
679                                struct btrfs_key *location)
680 {
681         const char *name = dentry->d_name.name;
682         int namelen = dentry->d_name.len;
683         struct btrfs_dir_item *di;
684         struct btrfs_path *path;
685         struct btrfs_root *root = BTRFS_I(dir)->root;
686         int ret;
687
688         path = btrfs_alloc_path();
689         BUG_ON(!path);
690         btrfs_init_path(path);
691         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
692                                     namelen, 0);
693         if (!di || IS_ERR(di)) {
694                 location->objectid = 0;
695                 ret = 0;
696                 goto out;
697         }
698         btrfs_disk_key_to_cpu(location, &di->location);
699 out:
700         btrfs_release_path(root, path);
701         btrfs_free_path(path);
702         return ret;
703 }
704
705 /*
706  * when we hit a tree root in a directory, the btrfs part of the inode
707  * needs to be changed to reflect the root directory of the tree root.  This
708  * is kind of like crossing a mount point.
709  */
710 static int fixup_tree_root_location(struct btrfs_root *root,
711                              struct btrfs_key *location,
712                              struct btrfs_root **sub_root)
713 {
714         struct btrfs_path *path;
715         struct btrfs_root_item *ri;
716
717         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
718                 return 0;
719         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
720                 return 0;
721
722         path = btrfs_alloc_path();
723         BUG_ON(!path);
724         mutex_lock(&root->fs_info->fs_mutex);
725
726         *sub_root = btrfs_read_fs_root(root->fs_info, location);
727         if (IS_ERR(*sub_root))
728                 return PTR_ERR(*sub_root);
729
730         ri = &(*sub_root)->root_item;
731         location->objectid = btrfs_root_dirid(ri);
732         location->flags = 0;
733         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
734         location->offset = 0;
735
736         btrfs_free_path(path);
737         mutex_unlock(&root->fs_info->fs_mutex);
738         return 0;
739 }
740
741 static int btrfs_init_locked_inode(struct inode *inode, void *p)
742 {
743         struct btrfs_iget_args *args = p;
744         inode->i_ino = args->ino;
745         BTRFS_I(inode)->root = args->root;
746         return 0;
747 }
748
749 static int btrfs_find_actor(struct inode *inode, void *opaque)
750 {
751         struct btrfs_iget_args *args = opaque;
752         return (args->ino == inode->i_ino &&
753                 args->root == BTRFS_I(inode)->root);
754 }
755
756 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
757                                 struct btrfs_root *root)
758 {
759         struct inode *inode;
760         struct btrfs_iget_args args;
761         args.ino = objectid;
762         args.root = root;
763
764         inode = iget5_locked(s, objectid, btrfs_find_actor,
765                              btrfs_init_locked_inode,
766                              (void *)&args);
767         return inode;
768 }
769
770 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
771                                    struct nameidata *nd)
772 {
773         struct inode * inode;
774         struct btrfs_inode *bi = BTRFS_I(dir);
775         struct btrfs_root *root = bi->root;
776         struct btrfs_root *sub_root = root;
777         struct btrfs_key location;
778         int ret;
779
780         if (dentry->d_name.len > BTRFS_NAME_LEN)
781                 return ERR_PTR(-ENAMETOOLONG);
782         mutex_lock(&root->fs_info->fs_mutex);
783         ret = btrfs_inode_by_name(dir, dentry, &location);
784         mutex_unlock(&root->fs_info->fs_mutex);
785         if (ret < 0)
786                 return ERR_PTR(ret);
787         inode = NULL;
788         if (location.objectid) {
789                 ret = fixup_tree_root_location(root, &location, &sub_root);
790                 if (ret < 0)
791                         return ERR_PTR(ret);
792                 if (ret > 0)
793                         return ERR_PTR(-ENOENT);
794                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
795                                           sub_root);
796                 if (!inode)
797                         return ERR_PTR(-EACCES);
798                 if (inode->i_state & I_NEW) {
799                         /* the inode and parent dir are two different roots */
800                         if (sub_root != root) {
801                                 igrab(inode);
802                                 sub_root->inode = inode;
803                         }
804                         BTRFS_I(inode)->root = sub_root;
805                         memcpy(&BTRFS_I(inode)->location, &location,
806                                sizeof(location));
807                         btrfs_read_locked_inode(inode);
808                         unlock_new_inode(inode);
809                 }
810         }
811         return d_splice_alias(inode, dentry);
812 }
813
814 /*
815  * readahead one full node of leaves as long as their keys include
816  * the objectid supplied
817  */
818 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
819                          u64 objectid)
820 {
821         struct btrfs_node *node;
822         int i;
823         u32 nritems;
824         u64 item_objectid;
825         u64 blocknr;
826         int slot;
827         int ret;
828
829         if (!path->nodes[1])
830                 return;
831         node = btrfs_buffer_node(path->nodes[1]);
832         slot = path->slots[1];
833         nritems = btrfs_header_nritems(&node->header);
834         for (i = slot + 1; i < nritems; i++) {
835                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
836                 if (item_objectid != objectid)
837                         break;
838                 blocknr = btrfs_node_blockptr(node, i);
839                 ret = readahead_tree_block(root, blocknr);
840                 if (ret)
841                         break;
842         }
843 }
844 static unsigned char btrfs_filetype_table[] = {
845         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
846 };
847
848 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
849 {
850         struct inode *inode = filp->f_path.dentry->d_inode;
851         struct btrfs_root *root = BTRFS_I(inode)->root;
852         struct btrfs_item *item;
853         struct btrfs_dir_item *di;
854         struct btrfs_key key;
855         struct btrfs_path *path;
856         int ret;
857         u32 nritems;
858         struct btrfs_leaf *leaf;
859         int slot;
860         int advance;
861         unsigned char d_type;
862         int over = 0;
863         u32 di_cur;
864         u32 di_total;
865         u32 di_len;
866         int key_type = BTRFS_DIR_INDEX_KEY;
867
868         /* FIXME, use a real flag for deciding about the key type */
869         if (root->fs_info->tree_root == root)
870                 key_type = BTRFS_DIR_ITEM_KEY;
871         mutex_lock(&root->fs_info->fs_mutex);
872         key.objectid = inode->i_ino;
873         key.flags = 0;
874         btrfs_set_key_type(&key, key_type);
875         key.offset = filp->f_pos;
876         path = btrfs_alloc_path();
877         btrfs_init_path(path);
878         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
879         if (ret < 0)
880                 goto err;
881         advance = 0;
882         reada_leaves(root, path, inode->i_ino);
883         while(1) {
884                 leaf = btrfs_buffer_leaf(path->nodes[0]);
885                 nritems = btrfs_header_nritems(&leaf->header);
886                 slot = path->slots[0];
887                 if (advance || slot >= nritems) {
888                         if (slot >= nritems -1) {
889                                 reada_leaves(root, path, inode->i_ino);
890                                 ret = btrfs_next_leaf(root, path);
891                                 if (ret)
892                                         break;
893                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
894                                 nritems = btrfs_header_nritems(&leaf->header);
895                                 slot = path->slots[0];
896                         } else {
897                                 slot++;
898                                 path->slots[0]++;
899                         }
900                 }
901                 advance = 1;
902                 item = leaf->items + slot;
903                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
904                         break;
905                 if (btrfs_disk_key_type(&item->key) != key_type)
906                         break;
907                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
908                         continue;
909                 filp->f_pos = btrfs_disk_key_offset(&item->key);
910                 advance = 1;
911                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
912                 di_cur = 0;
913                 di_total = btrfs_item_size(leaf->items + slot);
914                 while(di_cur < di_total) {
915                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
916                         over = filldir(dirent, (const char *)(di + 1),
917                                        btrfs_dir_name_len(di),
918                                        btrfs_disk_key_offset(&item->key),
919                                        btrfs_disk_key_objectid(&di->location),
920                                        d_type);
921                         if (over)
922                                 goto nopos;
923                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
924                         di_cur += di_len;
925                         di = (struct btrfs_dir_item *)((char *)di + di_len);
926                 }
927         }
928         filp->f_pos++;
929 nopos:
930         ret = 0;
931 err:
932         btrfs_release_path(root, path);
933         btrfs_free_path(path);
934         mutex_unlock(&root->fs_info->fs_mutex);
935         return ret;
936 }
937
938 int btrfs_write_inode(struct inode *inode, int wait)
939 {
940         struct btrfs_root *root = BTRFS_I(inode)->root;
941         struct btrfs_trans_handle *trans;
942         int ret = 0;
943
944         if (wait) {
945                 mutex_lock(&root->fs_info->fs_mutex);
946                 trans = btrfs_start_transaction(root, 1);
947                 btrfs_set_trans_block_group(trans, inode);
948                 ret = btrfs_commit_transaction(trans, root);
949                 mutex_unlock(&root->fs_info->fs_mutex);
950         }
951         return ret;
952 }
953
954 /*
955  * This is somewhat expense, updating the tree every time the
956  * inode changes.  But, it is most likely to find the inode in cache.
957  * FIXME, needs more benchmarking...there are no reasons other than performance
958  * to keep or drop this code.
959  */
960 void btrfs_dirty_inode(struct inode *inode)
961 {
962         struct btrfs_root *root = BTRFS_I(inode)->root;
963         struct btrfs_trans_handle *trans;
964
965         mutex_lock(&root->fs_info->fs_mutex);
966         trans = btrfs_start_transaction(root, 1);
967         btrfs_set_trans_block_group(trans, inode);
968         btrfs_update_inode(trans, root, inode);
969         btrfs_end_transaction(trans, root);
970         mutex_unlock(&root->fs_info->fs_mutex);
971         btrfs_btree_balance_dirty(root);
972 }
973
974 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
975                                      struct btrfs_root *root,
976                                      u64 objectid,
977                                      struct btrfs_block_group_cache *group,
978                                      int mode)
979 {
980         struct inode *inode;
981         struct btrfs_inode_item inode_item;
982         struct btrfs_key *location;
983         int ret;
984         int owner;
985
986         inode = new_inode(root->fs_info->sb);
987         if (!inode)
988                 return ERR_PTR(-ENOMEM);
989
990         BTRFS_I(inode)->root = root;
991         if (mode & S_IFDIR)
992                 owner = 0;
993         else
994                 owner = 1;
995         group = btrfs_find_block_group(root, group, 0, 0, owner);
996         BTRFS_I(inode)->block_group = group;
997
998         inode->i_uid = current->fsuid;
999         inode->i_gid = current->fsgid;
1000         inode->i_mode = mode;
1001         inode->i_ino = objectid;
1002         inode->i_blocks = 0;
1003         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1004         fill_inode_item(&inode_item, inode);
1005         location = &BTRFS_I(inode)->location;
1006         location->objectid = objectid;
1007         location->flags = 0;
1008         location->offset = 0;
1009         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1010
1011         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1012         BUG_ON(ret);
1013
1014         insert_inode_hash(inode);
1015         return inode;
1016 }
1017
1018 static inline u8 btrfs_inode_type(struct inode *inode)
1019 {
1020         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1021 }
1022
1023 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1024                             struct dentry *dentry, struct inode *inode)
1025 {
1026         int ret;
1027         struct btrfs_key key;
1028         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1029         key.objectid = inode->i_ino;
1030         key.flags = 0;
1031         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1032         key.offset = 0;
1033
1034         ret = btrfs_insert_dir_item(trans, root,
1035                                     dentry->d_name.name, dentry->d_name.len,
1036                                     dentry->d_parent->d_inode->i_ino,
1037                                     &key, btrfs_inode_type(inode));
1038         if (ret == 0) {
1039                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1040                 ret = btrfs_update_inode(trans, root,
1041                                          dentry->d_parent->d_inode);
1042         }
1043         return ret;
1044 }
1045
1046 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1047                             struct dentry *dentry, struct inode *inode)
1048 {
1049         int err = btrfs_add_link(trans, dentry, inode);
1050         if (!err) {
1051                 d_instantiate(dentry, inode);
1052                 return 0;
1053         }
1054         if (err > 0)
1055                 err = -EEXIST;
1056         return err;
1057 }
1058
1059 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1060                         int mode, struct nameidata *nd)
1061 {
1062         struct btrfs_trans_handle *trans;
1063         struct btrfs_root *root = BTRFS_I(dir)->root;
1064         struct inode *inode;
1065         int err;
1066         int drop_inode = 0;
1067         u64 objectid;
1068
1069         mutex_lock(&root->fs_info->fs_mutex);
1070         trans = btrfs_start_transaction(root, 1);
1071         btrfs_set_trans_block_group(trans, dir);
1072
1073         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1074         if (err) {
1075                 err = -ENOSPC;
1076                 goto out_unlock;
1077         }
1078
1079         inode = btrfs_new_inode(trans, root, objectid,
1080                                 BTRFS_I(dir)->block_group, mode);
1081         err = PTR_ERR(inode);
1082         if (IS_ERR(inode))
1083                 goto out_unlock;
1084
1085         btrfs_set_trans_block_group(trans, inode);
1086         err = btrfs_add_nondir(trans, dentry, inode);
1087         if (err)
1088                 drop_inode = 1;
1089         else {
1090                 inode->i_mapping->a_ops = &btrfs_aops;
1091                 inode->i_fop = &btrfs_file_operations;
1092                 inode->i_op = &btrfs_file_inode_operations;
1093         }
1094         dir->i_sb->s_dirt = 1;
1095         btrfs_update_inode_block_group(trans, inode);
1096         btrfs_update_inode_block_group(trans, dir);
1097 out_unlock:
1098         btrfs_end_transaction(trans, root);
1099         mutex_unlock(&root->fs_info->fs_mutex);
1100
1101         if (drop_inode) {
1102                 inode_dec_link_count(inode);
1103                 iput(inode);
1104         }
1105         btrfs_btree_balance_dirty(root);
1106         return err;
1107 }
1108
1109 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1110                       struct dentry *dentry)
1111 {
1112         struct btrfs_trans_handle *trans;
1113         struct btrfs_root *root = BTRFS_I(dir)->root;
1114         struct inode *inode = old_dentry->d_inode;
1115         int err;
1116         int drop_inode = 0;
1117
1118         if (inode->i_nlink == 0)
1119                 return -ENOENT;
1120
1121         inc_nlink(inode);
1122         mutex_lock(&root->fs_info->fs_mutex);
1123         trans = btrfs_start_transaction(root, 1);
1124         btrfs_set_trans_block_group(trans, dir);
1125         atomic_inc(&inode->i_count);
1126         err = btrfs_add_nondir(trans, dentry, inode);
1127         if (err)
1128                 drop_inode = 1;
1129         dir->i_sb->s_dirt = 1;
1130         btrfs_update_inode_block_group(trans, dir);
1131         btrfs_update_inode(trans, root, inode);
1132
1133         btrfs_end_transaction(trans, root);
1134         mutex_unlock(&root->fs_info->fs_mutex);
1135
1136         if (drop_inode) {
1137                 inode_dec_link_count(inode);
1138                 iput(inode);
1139         }
1140         btrfs_btree_balance_dirty(root);
1141         return err;
1142 }
1143
1144 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1145                                 struct btrfs_root *root,
1146                                 u64 objectid, u64 dirid)
1147 {
1148         int ret;
1149         char buf[2];
1150         struct btrfs_key key;
1151
1152         buf[0] = '.';
1153         buf[1] = '.';
1154
1155         key.objectid = objectid;
1156         key.offset = 0;
1157         key.flags = 0;
1158         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1159
1160         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1161                                     &key, BTRFS_FT_DIR);
1162         if (ret)
1163                 goto error;
1164         key.objectid = dirid;
1165         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1166                                     &key, BTRFS_FT_DIR);
1167         if (ret)
1168                 goto error;
1169 error:
1170         return ret;
1171 }
1172
1173 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1174 {
1175         struct inode *inode;
1176         struct btrfs_trans_handle *trans;
1177         struct btrfs_root *root = BTRFS_I(dir)->root;
1178         int err = 0;
1179         int drop_on_err = 0;
1180         u64 objectid;
1181
1182         mutex_lock(&root->fs_info->fs_mutex);
1183         trans = btrfs_start_transaction(root, 1);
1184         btrfs_set_trans_block_group(trans, dir);
1185         if (IS_ERR(trans)) {
1186                 err = PTR_ERR(trans);
1187                 goto out_unlock;
1188         }
1189
1190         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1191         if (err) {
1192                 err = -ENOSPC;
1193                 goto out_unlock;
1194         }
1195
1196         inode = btrfs_new_inode(trans, root, objectid,
1197                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1198         if (IS_ERR(inode)) {
1199                 err = PTR_ERR(inode);
1200                 goto out_fail;
1201         }
1202         drop_on_err = 1;
1203         inode->i_op = &btrfs_dir_inode_operations;
1204         inode->i_fop = &btrfs_dir_file_operations;
1205         btrfs_set_trans_block_group(trans, inode);
1206
1207         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1208         if (err)
1209                 goto out_fail;
1210
1211         inode->i_size = 6;
1212         err = btrfs_update_inode(trans, root, inode);
1213         if (err)
1214                 goto out_fail;
1215         err = btrfs_add_link(trans, dentry, inode);
1216         if (err)
1217                 goto out_fail;
1218         d_instantiate(dentry, inode);
1219         drop_on_err = 0;
1220         dir->i_sb->s_dirt = 1;
1221         btrfs_update_inode_block_group(trans, inode);
1222         btrfs_update_inode_block_group(trans, dir);
1223
1224 out_fail:
1225         btrfs_end_transaction(trans, root);
1226 out_unlock:
1227         mutex_unlock(&root->fs_info->fs_mutex);
1228         if (drop_on_err)
1229                 iput(inode);
1230         btrfs_btree_balance_dirty(root);
1231         return err;
1232 }
1233
1234 /*
1235  * FIBMAP and others want to pass in a fake buffer head.  They need to
1236  * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy
1237  * any packed file data into the fake bh
1238  */
1239 #define BTRFS_GET_BLOCK_NO_CREATE 0
1240 #define BTRFS_GET_BLOCK_CREATE 1
1241 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1242
1243 /*
1244  * FIXME create==1 doe not work.
1245  */
1246 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1247                                 struct buffer_head *result, int create)
1248 {
1249         int ret;
1250         int err = 0;
1251         u64 blocknr;
1252         u64 extent_start = 0;
1253         u64 extent_end = 0;
1254         u64 objectid = inode->i_ino;
1255         u32 found_type;
1256         u64 alloc_hint = 0;
1257         struct btrfs_path *path;
1258         struct btrfs_root *root = BTRFS_I(inode)->root;
1259         struct btrfs_file_extent_item *item;
1260         struct btrfs_leaf *leaf;
1261         struct btrfs_disk_key *found_key;
1262         struct btrfs_trans_handle *trans = NULL;
1263
1264         path = btrfs_alloc_path();
1265         BUG_ON(!path);
1266         btrfs_init_path(path);
1267         if (create & BTRFS_GET_BLOCK_CREATE) {
1268                 WARN_ON(1);
1269                 /* this almost but not quite works */
1270                 trans = btrfs_start_transaction(root, 1);
1271                 if (!trans) {
1272                         err = -ENOMEM;
1273                         goto out;
1274                 }
1275                 ret = btrfs_drop_extents(trans, root, inode,
1276                                          iblock << inode->i_blkbits,
1277                                          (iblock + 1) << inode->i_blkbits,
1278                                          &alloc_hint);
1279                 BUG_ON(ret);
1280         }
1281
1282         ret = btrfs_lookup_file_extent(NULL, root, path,
1283                                        inode->i_ino,
1284                                        iblock << inode->i_blkbits, 0);
1285         if (ret < 0) {
1286                 err = ret;
1287                 goto out;
1288         }
1289
1290         if (ret != 0) {
1291                 if (path->slots[0] == 0) {
1292                         btrfs_release_path(root, path);
1293                         goto not_found;
1294                 }
1295                 path->slots[0]--;
1296         }
1297
1298         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1299                               struct btrfs_file_extent_item);
1300         leaf = btrfs_buffer_leaf(path->nodes[0]);
1301         blocknr = btrfs_file_extent_disk_blocknr(item);
1302         blocknr += btrfs_file_extent_offset(item);
1303
1304         /* are we inside the extent that was found? */
1305         found_key = &leaf->items[path->slots[0]].key;
1306         found_type = btrfs_disk_key_type(found_key);
1307         if (btrfs_disk_key_objectid(found_key) != objectid ||
1308             found_type != BTRFS_EXTENT_DATA_KEY) {
1309                 extent_end = 0;
1310                 extent_start = 0;
1311                 goto not_found;
1312         }
1313         found_type = btrfs_file_extent_type(item);
1314         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1315         if (found_type == BTRFS_FILE_EXTENT_REG) {
1316                 extent_start = extent_start >> inode->i_blkbits;
1317                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1318                 err = 0;
1319                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1320                         goto out;
1321                 if (iblock >= extent_start && iblock < extent_end) {
1322                         btrfs_map_bh_to_logical(root, result, blocknr +
1323                                                 iblock - extent_start);
1324                         goto out;
1325                 }
1326         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1327                 char *ptr;
1328                 char *map;
1329                 u32 size;
1330
1331                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1332                         err = -EINVAL;
1333                         goto out;
1334                 }
1335                 size = btrfs_file_extent_inline_len(leaf->items +
1336                                                     path->slots[0]);
1337                 extent_end = (extent_start + size) >> inode->i_blkbits;
1338                 extent_start >>= inode->i_blkbits;
1339                 if (iblock < extent_start || iblock > extent_end) {
1340                         goto not_found;
1341                 }
1342                 ptr = btrfs_file_extent_inline_start(item);
1343                 map = kmap(result->b_page);
1344                 memcpy(map, ptr, size);
1345                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1346                 flush_dcache_page(result->b_page);
1347                 kunmap(result->b_page);
1348                 set_buffer_uptodate(result);
1349                 SetPageChecked(result->b_page);
1350                 btrfs_map_bh_to_logical(root, result, 0);
1351         }
1352 not_found:
1353         if (create & BTRFS_GET_BLOCK_CREATE) {
1354                 struct btrfs_key ins;
1355                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1356                                          1, alloc_hint, (u64)-1,
1357                                          &ins, 1);
1358                 BUG_ON(ret);
1359                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1360                                                iblock << inode->i_blkbits,
1361                                                ins.objectid, ins.offset,
1362                                                ins.offset);
1363                 BUG_ON(ret);
1364                 SetPageChecked(result->b_page);
1365                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1366         }
1367 out:
1368         if (trans)
1369                 err = btrfs_end_transaction(trans, root);
1370         btrfs_free_path(path);
1371         return err;
1372 }
1373
1374 int btrfs_get_block(struct inode *inode, sector_t iblock,
1375                     struct buffer_head *result, int create)
1376 {
1377         int err;
1378         struct btrfs_root *root = BTRFS_I(inode)->root;
1379         mutex_lock(&root->fs_info->fs_mutex);
1380         err = btrfs_get_block_lock(inode, iblock, result, create);
1381         mutex_unlock(&root->fs_info->fs_mutex);
1382         return err;
1383 }
1384
1385 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1386                            struct buffer_head *result, int create)
1387 {
1388         struct btrfs_root *root = BTRFS_I(inode)->root;
1389         mutex_lock(&root->fs_info->fs_mutex);
1390         btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1391         mutex_unlock(&root->fs_info->fs_mutex);
1392         return 0;
1393 }
1394
1395 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1396 {
1397         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1398 }
1399
1400 static int btrfs_prepare_write(struct file *file, struct page *page,
1401                                unsigned from, unsigned to)
1402 {
1403         return block_prepare_write(page, from, to, btrfs_get_block);
1404 }
1405
1406 static int btrfs_readpage(struct file *file, struct page *page)
1407 {
1408         return mpage_readpage(page, btrfs_get_block);
1409 }
1410
1411 /*
1412  * Aside from a tiny bit of packed file data handling, this is the
1413  * same as the generic code.
1414  *
1415  * While block_write_full_page is writing back the dirty buffers under
1416  * the page lock, whoever dirtied the buffers may decide to clean them
1417  * again at any time.  We handle that by only looking at the buffer
1418  * state inside lock_buffer().
1419  *
1420  * If block_write_full_page() is called for regular writeback
1421  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1422  * locked buffer.   This only can happen if someone has written the buffer
1423  * directly, with submit_bh().  At the address_space level PageWriteback
1424  * prevents this contention from occurring.
1425  */
1426 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1427                                    struct writeback_control *wbc)
1428 {
1429         int err;
1430         sector_t block;
1431         sector_t last_block;
1432         struct buffer_head *bh, *head;
1433         const unsigned blocksize = 1 << inode->i_blkbits;
1434         int nr_underway = 0;
1435
1436         BUG_ON(!PageLocked(page));
1437
1438         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1439
1440         if (!page_has_buffers(page)) {
1441                 create_empty_buffers(page, blocksize,
1442                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1443         }
1444
1445         /*
1446          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1447          * here, and the (potentially unmapped) buffers may become dirty at
1448          * any time.  If a buffer becomes dirty here after we've inspected it
1449          * then we just miss that fact, and the page stays dirty.
1450          *
1451          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1452          * handle that here by just cleaning them.
1453          */
1454
1455         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1456         head = page_buffers(page);
1457         bh = head;
1458
1459         /*
1460          * Get all the dirty buffers mapped to disk addresses and
1461          * handle any aliases from the underlying blockdev's mapping.
1462          */
1463         do {
1464                 if (block > last_block) {
1465                         /*
1466                          * mapped buffers outside i_size will occur, because
1467                          * this page can be outside i_size when there is a
1468                          * truncate in progress.
1469                          */
1470                         /*
1471                          * The buffer was zeroed by block_write_full_page()
1472                          */
1473                         clear_buffer_dirty(bh);
1474                         set_buffer_uptodate(bh);
1475                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1476                         WARN_ON(bh->b_size != blocksize);
1477                         err = btrfs_get_block(inode, block, bh, 0);
1478                         if (err) {
1479                                 goto recover;
1480                         }
1481                         if (buffer_new(bh)) {
1482                                 /* blockdev mappings never come here */
1483                                 clear_buffer_new(bh);
1484                         }
1485                 }
1486                 bh = bh->b_this_page;
1487                 block++;
1488         } while (bh != head);
1489
1490         do {
1491                 if (!buffer_mapped(bh))
1492                         continue;
1493                 /*
1494                  * If it's a fully non-blocking write attempt and we cannot
1495                  * lock the buffer then redirty the page.  Note that this can
1496                  * potentially cause a busy-wait loop from pdflush and kswapd
1497                  * activity, but those code paths have their own higher-level
1498                  * throttling.
1499                  */
1500                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1501                         lock_buffer(bh);
1502                 } else if (test_set_buffer_locked(bh)) {
1503                         redirty_page_for_writepage(wbc, page);
1504                         continue;
1505                 }
1506                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1507                         mark_buffer_async_write(bh);
1508                 } else {
1509                         unlock_buffer(bh);
1510                 }
1511         } while ((bh = bh->b_this_page) != head);
1512
1513         /*
1514          * The page and its buffers are protected by PageWriteback(), so we can
1515          * drop the bh refcounts early.
1516          */
1517         BUG_ON(PageWriteback(page));
1518         set_page_writeback(page);
1519
1520         do {
1521                 struct buffer_head *next = bh->b_this_page;
1522                 if (buffer_async_write(bh)) {
1523                         submit_bh(WRITE, bh);
1524                         nr_underway++;
1525                 }
1526                 bh = next;
1527         } while (bh != head);
1528         unlock_page(page);
1529
1530         err = 0;
1531 done:
1532         if (nr_underway == 0) {
1533                 /*
1534                  * The page was marked dirty, but the buffers were
1535                  * clean.  Someone wrote them back by hand with
1536                  * ll_rw_block/submit_bh.  A rare case.
1537                  */
1538                 int uptodate = 1;
1539                 do {
1540                         if (!buffer_uptodate(bh)) {
1541                                 uptodate = 0;
1542                                 break;
1543                         }
1544                         bh = bh->b_this_page;
1545                 } while (bh != head);
1546                 if (uptodate)
1547                         SetPageUptodate(page);
1548                 end_page_writeback(page);
1549         }
1550         return err;
1551
1552 recover:
1553         /*
1554          * ENOSPC, or some other error.  We may already have added some
1555          * blocks to the file, so we need to write these out to avoid
1556          * exposing stale data.
1557          * The page is currently locked and not marked for writeback
1558          */
1559         bh = head;
1560         /* Recovery: lock and submit the mapped buffers */
1561         do {
1562                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1563                         lock_buffer(bh);
1564                         mark_buffer_async_write(bh);
1565                 } else {
1566                         /*
1567                          * The buffer may have been set dirty during
1568                          * attachment to a dirty page.
1569                          */
1570                         clear_buffer_dirty(bh);
1571                 }
1572         } while ((bh = bh->b_this_page) != head);
1573         SetPageError(page);
1574         BUG_ON(PageWriteback(page));
1575         set_page_writeback(page);
1576         do {
1577                 struct buffer_head *next = bh->b_this_page;
1578                 if (buffer_async_write(bh)) {
1579                         clear_buffer_dirty(bh);
1580                         submit_bh(WRITE, bh);
1581                         nr_underway++;
1582                 }
1583                 bh = next;
1584         } while (bh != head);
1585         unlock_page(page);
1586         goto done;
1587 }
1588
1589 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1590 {
1591         struct inode * const inode = page->mapping->host;
1592         loff_t i_size = i_size_read(inode);
1593         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1594         unsigned offset;
1595         void *kaddr;
1596
1597         /* Is the page fully inside i_size? */
1598         if (page->index < end_index)
1599                 return __btrfs_write_full_page(inode, page, wbc);
1600
1601         /* Is the page fully outside i_size? (truncate in progress) */
1602         offset = i_size & (PAGE_CACHE_SIZE-1);
1603         if (page->index >= end_index+1 || !offset) {
1604                 /*
1605                  * The page may have dirty, unmapped buffers.  For example,
1606                  * they may have been added in ext3_writepage().  Make them
1607                  * freeable here, so the page does not leak.
1608                  */
1609                 block_invalidatepage(page, 0);
1610                 unlock_page(page);
1611                 return 0; /* don't care */
1612         }
1613
1614         /*
1615          * The page straddles i_size.  It must be zeroed out on each and every
1616          * writepage invokation because it may be mmapped.  "A file is mapped
1617          * in multiples of the page size.  For a file that is not a multiple of
1618          * the  page size, the remaining memory is zeroed when mapped, and
1619          * writes to that region are not written out to the file."
1620          */
1621         kaddr = kmap_atomic(page, KM_USER0);
1622         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1623         flush_dcache_page(page);
1624         kunmap_atomic(kaddr, KM_USER0);
1625         return __btrfs_write_full_page(inode, page, wbc);
1626 }
1627
1628 static void btrfs_truncate(struct inode *inode)
1629 {
1630         struct btrfs_root *root = BTRFS_I(inode)->root;
1631         int ret;
1632         struct btrfs_trans_handle *trans;
1633
1634         if (!S_ISREG(inode->i_mode))
1635                 return;
1636         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1637                 return;
1638
1639         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1640
1641         mutex_lock(&root->fs_info->fs_mutex);
1642         trans = btrfs_start_transaction(root, 1);
1643         btrfs_set_trans_block_group(trans, inode);
1644
1645         /* FIXME, add redo link to tree so we don't leak on crash */
1646         ret = btrfs_truncate_in_trans(trans, root, inode);
1647         BUG_ON(ret);
1648         btrfs_update_inode(trans, root, inode);
1649         ret = btrfs_end_transaction(trans, root);
1650         BUG_ON(ret);
1651         mutex_unlock(&root->fs_info->fs_mutex);
1652         btrfs_btree_balance_dirty(root);
1653 }
1654
1655 int btrfs_commit_write(struct file *file, struct page *page,
1656                        unsigned from, unsigned to)
1657 {
1658         struct inode *inode = page->mapping->host;
1659         struct buffer_head *bh;
1660         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1661
1662         SetPageUptodate(page);
1663         bh = page_buffers(page);
1664         set_buffer_uptodate(bh);
1665         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1666                 set_page_dirty(page);
1667         }
1668         if (pos > inode->i_size) {
1669                 i_size_write(inode, pos);
1670                 mark_inode_dirty(inode);
1671         }
1672         return 0;
1673 }
1674
1675 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1676 {
1677         struct btrfs_trans_handle *trans;
1678         struct btrfs_key key;
1679         struct btrfs_root_item root_item;
1680         struct btrfs_inode_item *inode_item;
1681         struct buffer_head *subvol;
1682         struct btrfs_leaf *leaf;
1683         struct btrfs_root *new_root;
1684         struct inode *inode;
1685         struct inode *dir;
1686         int ret;
1687         u64 objectid;
1688         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
1689
1690         mutex_lock(&root->fs_info->fs_mutex);
1691         trans = btrfs_start_transaction(root, 1);
1692         BUG_ON(!trans);
1693
1694         subvol = btrfs_alloc_free_block(trans, root, 0);
1695         if (subvol == NULL)
1696                 return -ENOSPC;
1697         leaf = btrfs_buffer_leaf(subvol);
1698         btrfs_set_header_nritems(&leaf->header, 0);
1699         btrfs_set_header_level(&leaf->header, 0);
1700         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
1701         btrfs_set_header_generation(&leaf->header, trans->transid);
1702         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
1703         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
1704                sizeof(leaf->header.fsid));
1705         mark_buffer_dirty(subvol);
1706
1707         inode_item = &root_item.inode;
1708         memset(inode_item, 0, sizeof(*inode_item));
1709         btrfs_set_inode_generation(inode_item, 1);
1710         btrfs_set_inode_size(inode_item, 3);
1711         btrfs_set_inode_nlink(inode_item, 1);
1712         btrfs_set_inode_nblocks(inode_item, 1);
1713         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
1714
1715         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
1716         btrfs_set_root_refs(&root_item, 1);
1717         brelse(subvol);
1718         subvol = NULL;
1719
1720         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1721                                        0, &objectid);
1722         BUG_ON(ret);
1723
1724         btrfs_set_root_dirid(&root_item, new_dirid);
1725
1726         key.objectid = objectid;
1727         key.offset = 1;
1728         key.flags = 0;
1729         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1730         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1731                                 &root_item);
1732         BUG_ON(ret);
1733
1734         /*
1735          * insert the directory item
1736          */
1737         key.offset = (u64)-1;
1738         dir = root->fs_info->sb->s_root->d_inode;
1739         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1740                                     name, namelen, dir->i_ino, &key,
1741                                     BTRFS_FT_DIR);
1742         BUG_ON(ret);
1743
1744         ret = btrfs_commit_transaction(trans, root);
1745         BUG_ON(ret);
1746
1747         new_root = btrfs_read_fs_root(root->fs_info, &key);
1748         BUG_ON(!new_root);
1749
1750         trans = btrfs_start_transaction(new_root, 1);
1751         BUG_ON(!trans);
1752
1753         inode = btrfs_new_inode(trans, new_root, new_dirid,
1754                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
1755         inode->i_op = &btrfs_dir_inode_operations;
1756         inode->i_fop = &btrfs_dir_file_operations;
1757
1758         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
1759         BUG_ON(ret);
1760
1761         inode->i_nlink = 1;
1762         inode->i_size = 6;
1763         ret = btrfs_update_inode(trans, new_root, inode);
1764         BUG_ON(ret);
1765
1766         ret = btrfs_commit_transaction(trans, new_root);
1767         BUG_ON(ret);
1768
1769         iput(inode);
1770
1771         mutex_unlock(&root->fs_info->fs_mutex);
1772         btrfs_btree_balance_dirty(root);
1773         return 0;
1774 }
1775
1776 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
1777 {
1778         struct btrfs_trans_handle *trans;
1779         struct btrfs_key key;
1780         struct btrfs_root_item new_root_item;
1781         int ret;
1782         u64 objectid;
1783
1784         if (!root->ref_cows)
1785                 return -EINVAL;
1786
1787         mutex_lock(&root->fs_info->fs_mutex);
1788         trans = btrfs_start_transaction(root, 1);
1789         BUG_ON(!trans);
1790
1791         ret = btrfs_update_inode(trans, root, root->inode);
1792         BUG_ON(ret);
1793
1794         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1795                                        0, &objectid);
1796         BUG_ON(ret);
1797
1798         memcpy(&new_root_item, &root->root_item,
1799                sizeof(new_root_item));
1800
1801         key.objectid = objectid;
1802         key.offset = 1;
1803         key.flags = 0;
1804         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1805         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
1806
1807         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1808                                 &new_root_item);
1809         BUG_ON(ret);
1810
1811         /*
1812          * insert the directory item
1813          */
1814         key.offset = (u64)-1;
1815         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1816                                     name, namelen,
1817                                     root->fs_info->sb->s_root->d_inode->i_ino,
1818                                     &key, BTRFS_FT_DIR);
1819
1820         BUG_ON(ret);
1821
1822         ret = btrfs_inc_root_ref(trans, root);
1823         BUG_ON(ret);
1824
1825         ret = btrfs_commit_transaction(trans, root);
1826         BUG_ON(ret);
1827         mutex_unlock(&root->fs_info->fs_mutex);
1828         btrfs_btree_balance_dirty(root);
1829         return 0;
1830 }
1831
1832 int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
1833                 cmd, unsigned long arg)
1834 {
1835         struct btrfs_root *root = BTRFS_I(inode)->root;
1836         struct btrfs_ioctl_vol_args vol_args;
1837         int ret = 0;
1838         struct btrfs_dir_item *di;
1839         int namelen;
1840         struct btrfs_path *path;
1841         u64 root_dirid;
1842
1843         switch (cmd) {
1844         case BTRFS_IOC_SNAP_CREATE:
1845                 if (copy_from_user(&vol_args,
1846                                    (struct btrfs_ioctl_vol_args __user *)arg,
1847                                    sizeof(vol_args)))
1848                         return -EFAULT;
1849                 namelen = strlen(vol_args.name);
1850                 if (namelen > BTRFS_VOL_NAME_MAX)
1851                         return -EINVAL;
1852                 if (strchr(vol_args.name, '/'))
1853                         return -EINVAL;
1854                 path = btrfs_alloc_path();
1855                 if (!path)
1856                         return -ENOMEM;
1857                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
1858                 mutex_lock(&root->fs_info->fs_mutex);
1859                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
1860                                     path, root_dirid,
1861                                     vol_args.name, namelen, 0);
1862                 mutex_unlock(&root->fs_info->fs_mutex);
1863                 btrfs_free_path(path);
1864                 if (di && !IS_ERR(di))
1865                         return -EEXIST;
1866
1867                 if (root == root->fs_info->tree_root)
1868                         ret = create_subvol(root, vol_args.name, namelen);
1869                 else
1870                         ret = create_snapshot(root, vol_args.name, namelen);
1871                 WARN_ON(ret);
1872                 break;
1873         default:
1874                 return -ENOTTY;
1875         }
1876         return ret;
1877 }
1878
1879 #ifdef CONFIG_COMPAT
1880 long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
1881                                unsigned long arg)
1882 {
1883         struct inode *inode = file->f_path.dentry->d_inode;
1884         int ret;
1885         lock_kernel();
1886         ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
1887         unlock_kernel();
1888         return ret;
1889
1890 }
1891 #endif
1892
1893 /*
1894  * Called inside transaction, so use GFP_NOFS
1895  */
1896 struct inode *btrfs_alloc_inode(struct super_block *sb)
1897 {
1898         struct btrfs_inode *ei;
1899
1900         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
1901         if (!ei)
1902                 return NULL;
1903         return &ei->vfs_inode;
1904 }
1905
1906 void btrfs_destroy_inode(struct inode *inode)
1907 {
1908         WARN_ON(!list_empty(&inode->i_dentry));
1909         WARN_ON(inode->i_data.nrpages);
1910
1911         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
1912 }
1913
1914 static void init_once(void * foo, struct kmem_cache * cachep,
1915                       unsigned long flags)
1916 {
1917         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
1918
1919         inode_init_once(&ei->vfs_inode);
1920 }
1921
1922 void btrfs_destroy_cachep(void)
1923 {
1924         if (btrfs_inode_cachep)
1925                 kmem_cache_destroy(btrfs_inode_cachep);
1926         if (btrfs_trans_handle_cachep)
1927                 kmem_cache_destroy(btrfs_trans_handle_cachep);
1928         if (btrfs_transaction_cachep)
1929                 kmem_cache_destroy(btrfs_transaction_cachep);
1930         if (btrfs_bit_radix_cachep)
1931                 kmem_cache_destroy(btrfs_bit_radix_cachep);
1932         if (btrfs_path_cachep)
1933                 kmem_cache_destroy(btrfs_path_cachep);
1934 }
1935
1936 int btrfs_init_cachep(void)
1937 {
1938         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
1939                                              sizeof(struct btrfs_inode),
1940                                              0, (SLAB_RECLAIM_ACCOUNT|
1941                                                 SLAB_MEM_SPREAD),
1942                                              init_once, NULL);
1943         if (!btrfs_inode_cachep)
1944                 goto fail;
1945         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
1946                                              sizeof(struct btrfs_trans_handle),
1947                                              0, (SLAB_RECLAIM_ACCOUNT|
1948                                                 SLAB_MEM_SPREAD),
1949                                              NULL, NULL);
1950         if (!btrfs_trans_handle_cachep)
1951                 goto fail;
1952         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
1953                                              sizeof(struct btrfs_transaction),
1954                                              0, (SLAB_RECLAIM_ACCOUNT|
1955                                                 SLAB_MEM_SPREAD),
1956                                              NULL, NULL);
1957         if (!btrfs_transaction_cachep)
1958                 goto fail;
1959         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
1960                                              sizeof(struct btrfs_transaction),
1961                                              0, (SLAB_RECLAIM_ACCOUNT|
1962                                                 SLAB_MEM_SPREAD),
1963                                              NULL, NULL);
1964         if (!btrfs_path_cachep)
1965                 goto fail;
1966         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
1967                                              256,
1968                                              0, (SLAB_RECLAIM_ACCOUNT|
1969                                                 SLAB_MEM_SPREAD |
1970                                                 SLAB_DESTROY_BY_RCU),
1971                                              NULL, NULL);
1972         if (!btrfs_bit_radix_cachep)
1973                 goto fail;
1974         return 0;
1975 fail:
1976         btrfs_destroy_cachep();
1977         return -ENOMEM;
1978 }
1979
1980 static int btrfs_getattr(struct vfsmount *mnt,
1981                          struct dentry *dentry, struct kstat *stat)
1982 {
1983         struct inode *inode = dentry->d_inode;
1984         generic_fillattr(inode, stat);
1985         stat->blksize = 256 * 1024;
1986         return 0;
1987 }
1988
1989 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
1990                            struct inode * new_dir,struct dentry *new_dentry)
1991 {
1992         struct btrfs_trans_handle *trans;
1993         struct btrfs_root *root = BTRFS_I(old_dir)->root;
1994         struct inode *new_inode = new_dentry->d_inode;
1995         struct inode *old_inode = old_dentry->d_inode;
1996         struct timespec ctime = CURRENT_TIME;
1997         struct btrfs_path *path;
1998         struct btrfs_dir_item *di;
1999         int ret;
2000
2001         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2002             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2003                 return -ENOTEMPTY;
2004         }
2005         mutex_lock(&root->fs_info->fs_mutex);
2006         trans = btrfs_start_transaction(root, 1);
2007         btrfs_set_trans_block_group(trans, new_dir);
2008         path = btrfs_alloc_path();
2009         if (!path) {
2010                 ret = -ENOMEM;
2011                 goto out_fail;
2012         }
2013
2014         old_dentry->d_inode->i_nlink++;
2015         old_dir->i_ctime = old_dir->i_mtime = ctime;
2016         new_dir->i_ctime = new_dir->i_mtime = ctime;
2017         old_inode->i_ctime = ctime;
2018         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2019                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2020                 u64 old_parent_oid;
2021                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2022                                            "..", 2, -1);
2023                 if (IS_ERR(di)) {
2024                         ret = PTR_ERR(di);
2025                         goto out_fail;
2026                 }
2027                 if (!di) {
2028                         ret = -ENOENT;
2029                         goto out_fail;
2030                 }
2031                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2032                 ret = btrfs_del_item(trans, root, path);
2033                 if (ret) {
2034                         ret = -EIO;
2035                         goto out_fail;
2036                 }
2037                 btrfs_release_path(root, path);
2038
2039                 di = btrfs_lookup_dir_index_item(trans, root, path,
2040                                                  old_inode->i_ino,
2041                                                  old_parent_oid,
2042                                                  "..", 2, -1);
2043                 if (IS_ERR(di)) {
2044                         ret = PTR_ERR(di);
2045                         goto out_fail;
2046                 }
2047                 if (!di) {
2048                         ret = -ENOENT;
2049                         goto out_fail;
2050                 }
2051                 ret = btrfs_del_item(trans, root, path);
2052                 if (ret) {
2053                         ret = -EIO;
2054                         goto out_fail;
2055                 }
2056                 btrfs_release_path(root, path);
2057
2058                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2059                                             old_inode->i_ino, location,
2060                                             BTRFS_FT_DIR);
2061                 if (ret)
2062                         goto out_fail;
2063         }
2064
2065
2066         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2067         if (ret)
2068                 goto out_fail;
2069
2070         if (new_inode) {
2071                 new_inode->i_ctime = CURRENT_TIME;
2072                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2073                 if (ret)
2074                         goto out_fail;
2075                 if (S_ISDIR(new_inode->i_mode))
2076                         clear_nlink(new_inode);
2077                 else
2078                         drop_nlink(new_inode);
2079                 btrfs_update_inode(trans, root, new_inode);
2080         }
2081         ret = btrfs_add_link(trans, new_dentry, old_inode);
2082         if (ret)
2083                 goto out_fail;
2084
2085 out_fail:
2086         btrfs_free_path(path);
2087         btrfs_end_transaction(trans, root);
2088         mutex_unlock(&root->fs_info->fs_mutex);
2089         return ret;
2090 }
2091
2092 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2093                          const char *symname)
2094 {
2095         struct btrfs_trans_handle *trans;
2096         struct btrfs_root *root = BTRFS_I(dir)->root;
2097         struct btrfs_path *path;
2098         struct btrfs_key key;
2099         struct inode *inode;
2100         int err;
2101         int drop_inode = 0;
2102         u64 objectid;
2103         int name_len;
2104         int datasize;
2105         char *ptr;
2106         struct btrfs_file_extent_item *ei;
2107
2108         name_len = strlen(symname) + 1;
2109         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2110                 return -ENAMETOOLONG;
2111         mutex_lock(&root->fs_info->fs_mutex);
2112         trans = btrfs_start_transaction(root, 1);
2113         btrfs_set_trans_block_group(trans, dir);
2114
2115         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2116         if (err) {
2117                 err = -ENOSPC;
2118                 goto out_unlock;
2119         }
2120
2121         inode = btrfs_new_inode(trans, root, objectid,
2122                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2123         err = PTR_ERR(inode);
2124         if (IS_ERR(inode))
2125                 goto out_unlock;
2126
2127         btrfs_set_trans_block_group(trans, inode);
2128         err = btrfs_add_nondir(trans, dentry, inode);
2129         if (err)
2130                 drop_inode = 1;
2131         else {
2132                 inode->i_mapping->a_ops = &btrfs_aops;
2133                 inode->i_fop = &btrfs_file_operations;
2134                 inode->i_op = &btrfs_file_inode_operations;
2135         }
2136         dir->i_sb->s_dirt = 1;
2137         btrfs_update_inode_block_group(trans, inode);
2138         btrfs_update_inode_block_group(trans, dir);
2139         if (drop_inode)
2140                 goto out_unlock;
2141
2142         path = btrfs_alloc_path();
2143         BUG_ON(!path);
2144         key.objectid = inode->i_ino;
2145         key.offset = 0;
2146         key.flags = 0;
2147         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2148         datasize = btrfs_file_extent_calc_inline_size(name_len);
2149         err = btrfs_insert_empty_item(trans, root, path, &key,
2150                                       datasize);
2151         BUG_ON(err);
2152         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2153                path->slots[0], struct btrfs_file_extent_item);
2154         btrfs_set_file_extent_generation(ei, trans->transid);
2155         btrfs_set_file_extent_type(ei,
2156                                    BTRFS_FILE_EXTENT_INLINE);
2157         ptr = btrfs_file_extent_inline_start(ei);
2158         btrfs_memcpy(root, path->nodes[0]->b_data,
2159                      ptr, symname, name_len);
2160         mark_buffer_dirty(path->nodes[0]);
2161         btrfs_free_path(path);
2162         inode->i_op = &btrfs_symlink_inode_operations;
2163         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2164         inode->i_size = name_len - 1;
2165         btrfs_update_inode(trans, root, inode);
2166         err = 0;
2167
2168 out_unlock:
2169         btrfs_end_transaction(trans, root);
2170         mutex_unlock(&root->fs_info->fs_mutex);
2171
2172         if (drop_inode) {
2173                 inode_dec_link_count(inode);
2174                 iput(inode);
2175         }
2176         btrfs_btree_balance_dirty(root);
2177         return err;
2178 }
2179
2180 static struct inode_operations btrfs_dir_inode_operations = {
2181         .lookup         = btrfs_lookup,
2182         .create         = btrfs_create,
2183         .unlink         = btrfs_unlink,
2184         .link           = btrfs_link,
2185         .mkdir          = btrfs_mkdir,
2186         .rmdir          = btrfs_rmdir,
2187         .rename         = btrfs_rename,
2188         .symlink        = btrfs_symlink,
2189         .setattr        = btrfs_setattr,
2190 };
2191
2192 static struct inode_operations btrfs_dir_ro_inode_operations = {
2193         .lookup         = btrfs_lookup,
2194 };
2195
2196 static struct file_operations btrfs_dir_file_operations = {
2197         .llseek         = generic_file_llseek,
2198         .read           = generic_read_dir,
2199         .readdir        = btrfs_readdir,
2200         .ioctl          = btrfs_ioctl,
2201 #ifdef CONFIG_COMPAT
2202         .compat_ioctl   = btrfs_compat_ioctl,
2203 #endif
2204 };
2205
2206 static struct address_space_operations btrfs_aops = {
2207         .readpage       = btrfs_readpage,
2208         .writepage      = btrfs_writepage,
2209         .sync_page      = block_sync_page,
2210         .prepare_write  = btrfs_prepare_write,
2211         .commit_write   = btrfs_commit_write,
2212         .bmap           = btrfs_bmap,
2213 };
2214
2215 static struct address_space_operations btrfs_symlink_aops = {
2216         .readpage       = btrfs_readpage,
2217         .writepage      = btrfs_writepage,
2218 };
2219
2220 static struct inode_operations btrfs_file_inode_operations = {
2221         .truncate       = btrfs_truncate,
2222         .getattr        = btrfs_getattr,
2223         .setattr        = btrfs_setattr,
2224 };
2225
2226 static struct inode_operations btrfs_symlink_inode_operations = {
2227         .readlink       = generic_readlink,
2228         .follow_link    = page_follow_link_light,
2229         .put_link       = page_put_link,
2230 };