Btrfs: Add balance ioctl to restripe the chunks
[linux-block.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/buffer_head.h>
22 #include <linux/fs.h>
23 #include <linux/pagemap.h>
24 #include <linux/highmem.h>
25 #include <linux/time.h>
26 #include <linux/init.h>
27 #include <linux/string.h>
28 #include <linux/smp_lock.h>
29 #include <linux/backing-dev.h>
30 #include <linux/mpage.h>
31 #include <linux/swap.h>
32 #include <linux/writeback.h>
33 #include <linux/statfs.h>
34 #include <linux/compat.h>
35 #include <linux/bit_spinlock.h>
36 #include <linux/version.h>
37 #include <linux/xattr.h>
38 #include "ctree.h"
39 #include "disk-io.h"
40 #include "transaction.h"
41 #include "btrfs_inode.h"
42 #include "ioctl.h"
43 #include "print-tree.h"
44 #include "volumes.h"
45
46 struct btrfs_iget_args {
47         u64 ino;
48         struct btrfs_root *root;
49 };
50
51 static struct inode_operations btrfs_dir_inode_operations;
52 static struct inode_operations btrfs_symlink_inode_operations;
53 static struct inode_operations btrfs_dir_ro_inode_operations;
54 static struct inode_operations btrfs_special_inode_operations;
55 static struct inode_operations btrfs_file_inode_operations;
56 static struct address_space_operations btrfs_aops;
57 static struct address_space_operations btrfs_symlink_aops;
58 static struct file_operations btrfs_dir_file_operations;
59 static struct extent_io_ops btrfs_extent_io_ops;
60
61 static struct kmem_cache *btrfs_inode_cachep;
62 struct kmem_cache *btrfs_trans_handle_cachep;
63 struct kmem_cache *btrfs_transaction_cachep;
64 struct kmem_cache *btrfs_bit_radix_cachep;
65 struct kmem_cache *btrfs_path_cachep;
66
67 #define S_SHIFT 12
68 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
69         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
70         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
71         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
72         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
73         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
74         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
75         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
76 };
77
78 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
79                            int for_del)
80 {
81         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
82         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
83         u64 thresh;
84         unsigned long flags;
85         int ret = 0;
86
87         if (for_del)
88                 thresh = total * 90;
89         else
90                 thresh = total * 85;
91
92         do_div(thresh, 100);
93
94         spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
95         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
96                 ret = -ENOSPC;
97         spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
98         return ret;
99 }
100
101 static int cow_file_range(struct inode *inode, u64 start, u64 end)
102 {
103         struct btrfs_root *root = BTRFS_I(inode)->root;
104         struct btrfs_trans_handle *trans;
105         u64 alloc_hint = 0;
106         u64 num_bytes;
107         u64 cur_alloc_size;
108         u64 blocksize = root->sectorsize;
109         u64 orig_start = start;
110         u64 orig_num_bytes;
111         struct btrfs_key ins;
112         int ret;
113
114         trans = btrfs_start_transaction(root, 1);
115         BUG_ON(!trans);
116         btrfs_set_trans_block_group(trans, inode);
117
118         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
119         num_bytes = max(blocksize,  num_bytes);
120         ret = btrfs_drop_extents(trans, root, inode,
121                                  start, start + num_bytes, start, &alloc_hint);
122         orig_num_bytes = num_bytes;
123
124         if (alloc_hint == EXTENT_MAP_INLINE)
125                 goto out;
126
127         BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
128
129         while(num_bytes > 0) {
130                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
131                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
132                                          root->sectorsize,
133                                          root->root_key.objectid,
134                                          trans->transid,
135                                          inode->i_ino, start, 0,
136                                          alloc_hint, (u64)-1, &ins, 1);
137                 if (ret) {
138                         WARN_ON(1);
139                         goto out;
140                 }
141                 cur_alloc_size = ins.offset;
142                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
143                                                start, ins.objectid, ins.offset,
144                                                ins.offset);
145                 inode->i_blocks += ins.offset >> 9;
146                 btrfs_check_file(root, inode);
147                 if (num_bytes < cur_alloc_size) {
148                         printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
149                                cur_alloc_size);
150                         break;
151                 }
152                 num_bytes -= cur_alloc_size;
153                 alloc_hint = ins.objectid + ins.offset;
154                 start += cur_alloc_size;
155         }
156         btrfs_drop_extent_cache(inode, orig_start,
157                                 orig_start + orig_num_bytes - 1);
158         btrfs_add_ordered_inode(inode);
159         btrfs_update_inode(trans, root, inode);
160 out:
161         btrfs_end_transaction(trans, root);
162         return ret;
163 }
164
165 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
166 {
167         u64 extent_start;
168         u64 extent_end;
169         u64 bytenr;
170         u64 cow_end;
171         u64 loops = 0;
172         u64 total_fs_bytes;
173         struct btrfs_root *root = BTRFS_I(inode)->root;
174         struct extent_buffer *leaf;
175         int found_type;
176         struct btrfs_path *path;
177         struct btrfs_file_extent_item *item;
178         int ret;
179         int err;
180         struct btrfs_key found_key;
181
182         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
183         path = btrfs_alloc_path();
184         BUG_ON(!path);
185 again:
186         ret = btrfs_lookup_file_extent(NULL, root, path,
187                                        inode->i_ino, start, 0);
188         if (ret < 0) {
189                 btrfs_free_path(path);
190                 return ret;
191         }
192
193         cow_end = end;
194         if (ret != 0) {
195                 if (path->slots[0] == 0)
196                         goto not_found;
197                 path->slots[0]--;
198         }
199
200         leaf = path->nodes[0];
201         item = btrfs_item_ptr(leaf, path->slots[0],
202                               struct btrfs_file_extent_item);
203
204         /* are we inside the extent that was found? */
205         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
206         found_type = btrfs_key_type(&found_key);
207         if (found_key.objectid != inode->i_ino ||
208             found_type != BTRFS_EXTENT_DATA_KEY) {
209                 goto not_found;
210         }
211
212         found_type = btrfs_file_extent_type(leaf, item);
213         extent_start = found_key.offset;
214         if (found_type == BTRFS_FILE_EXTENT_REG) {
215                 u64 extent_num_bytes;
216
217                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
218                 extent_end = extent_start + extent_num_bytes;
219                 err = 0;
220
221                 if (loops && start != extent_start)
222                         goto not_found;
223
224                 if (start < extent_start || start >= extent_end)
225                         goto not_found;
226
227                 cow_end = min(end, extent_end - 1);
228                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
229                 if (bytenr == 0)
230                         goto not_found;
231
232                 /*
233                  * we may be called by the resizer, make sure we're inside
234                  * the limits of the FS
235                  */
236                 if (bytenr + extent_num_bytes > total_fs_bytes)
237                         goto not_found;
238
239                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
240                         goto not_found;
241                 }
242
243                 start = extent_end;
244         } else {
245                 goto not_found;
246         }
247 loop:
248         if (start > end) {
249                 btrfs_free_path(path);
250                 return 0;
251         }
252         btrfs_release_path(root, path);
253         loops++;
254         goto again;
255
256 not_found:
257         cow_file_range(inode, start, cow_end);
258         start = cow_end + 1;
259         goto loop;
260 }
261
262 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
263 {
264         struct btrfs_root *root = BTRFS_I(inode)->root;
265         int ret;
266         mutex_lock(&root->fs_info->fs_mutex);
267         if (btrfs_test_opt(root, NODATACOW) ||
268             btrfs_test_flag(inode, NODATACOW))
269                 ret = run_delalloc_nocow(inode, start, end);
270         else
271                 ret = cow_file_range(inode, start, end);
272
273         mutex_unlock(&root->fs_info->fs_mutex);
274         return ret;
275 }
276
277 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
278                        unsigned long old, unsigned long bits)
279 {
280         unsigned long flags;
281         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
282                 struct btrfs_root *root = BTRFS_I(inode)->root;
283                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
284                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
285                 root->fs_info->delalloc_bytes += end - start + 1;
286                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
287         }
288         return 0;
289 }
290
291 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
292                          unsigned long old, unsigned long bits)
293 {
294         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
295                 struct btrfs_root *root = BTRFS_I(inode)->root;
296                 unsigned long flags;
297
298                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
299                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
300                         printk("warning: delalloc account %Lu %Lu\n",
301                                end - start + 1, root->fs_info->delalloc_bytes);
302                         root->fs_info->delalloc_bytes = 0;
303                         BTRFS_I(inode)->delalloc_bytes = 0;
304                 } else {
305                         root->fs_info->delalloc_bytes -= end - start + 1;
306                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
307                 }
308                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
309         }
310         return 0;
311 }
312
313 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
314                          size_t size, struct bio *bio)
315 {
316         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
317         struct btrfs_mapping_tree *map_tree;
318         u64 logical = bio->bi_sector << 9;
319         u64 length = 0;
320         u64 map_length;
321         int ret;
322
323         length = bio->bi_size;
324         map_tree = &root->fs_info->mapping_tree;
325         map_length = length;
326         ret = btrfs_map_block(map_tree, READ, logical,
327                               &map_length, NULL, 0);
328
329         if (map_length < length + size) {
330                 return 1;
331         }
332         return 0;
333 }
334
335 int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
336                           int mirror_num)
337 {
338         struct btrfs_root *root = BTRFS_I(inode)->root;
339         struct btrfs_trans_handle *trans;
340         int ret = 0;
341         char *sums = NULL;
342
343         ret = btrfs_csum_one_bio(root, bio, &sums);
344         BUG_ON(ret);
345
346         mutex_lock(&root->fs_info->fs_mutex);
347         trans = btrfs_start_transaction(root, 1);
348
349         btrfs_set_trans_block_group(trans, inode);
350         btrfs_csum_file_blocks(trans, root, inode, bio, sums);
351
352         ret = btrfs_end_transaction(trans, root);
353         BUG_ON(ret);
354         mutex_unlock(&root->fs_info->fs_mutex);
355
356         kfree(sums);
357
358         return btrfs_map_bio(root, rw, bio, mirror_num);
359 }
360
361 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
362                           int mirror_num)
363 {
364         struct btrfs_root *root = BTRFS_I(inode)->root;
365         int ret = 0;
366
367         if (!(rw & (1 << BIO_RW))) {
368                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
369                 BUG_ON(ret);
370                 goto mapit;
371         }
372
373         if (btrfs_test_opt(root, NODATASUM) ||
374             btrfs_test_flag(inode, NODATASUM)) {
375                 goto mapit;
376         }
377
378         return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
379                                    inode, rw, bio, mirror_num,
380                                    __btrfs_submit_bio_hook);
381 mapit:
382         return btrfs_map_bio(root, rw, bio, mirror_num);
383 }
384
385 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
386 {
387         int ret = 0;
388         struct inode *inode = page->mapping->host;
389         struct btrfs_root *root = BTRFS_I(inode)->root;
390         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
391         struct btrfs_csum_item *item;
392         struct btrfs_path *path = NULL;
393         u32 csum;
394
395         if (btrfs_test_opt(root, NODATASUM) ||
396             btrfs_test_flag(inode, NODATASUM))
397                 return 0;
398
399         mutex_lock(&root->fs_info->fs_mutex);
400         path = btrfs_alloc_path();
401         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
402         if (IS_ERR(item)) {
403                 ret = PTR_ERR(item);
404                 /* a csum that isn't present is a preallocated region. */
405                 if (ret == -ENOENT || ret == -EFBIG)
406                         ret = 0;
407                 csum = 0;
408                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
409                 goto out;
410         }
411         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
412                            BTRFS_CRC32_SIZE);
413         set_state_private(io_tree, start, csum);
414 out:
415         if (path)
416                 btrfs_free_path(path);
417         mutex_unlock(&root->fs_info->fs_mutex);
418         return ret;
419 }
420
421 struct io_failure_record {
422         struct page *page;
423         u64 start;
424         u64 len;
425         u64 logical;
426         int last_mirror;
427 };
428
429 int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
430                                   struct page *page, u64 start, u64 end,
431                                   struct extent_state *state)
432 {
433         struct io_failure_record *failrec = NULL;
434         u64 private;
435         struct extent_map *em;
436         struct inode *inode = page->mapping->host;
437         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
438         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
439         struct bio *bio;
440         int num_copies;
441         int ret;
442         u64 logical;
443
444         ret = get_state_private(failure_tree, start, &private);
445         if (ret) {
446                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
447                 if (!failrec)
448                         return -ENOMEM;
449                 failrec->start = start;
450                 failrec->len = end - start + 1;
451                 failrec->last_mirror = 0;
452
453                 spin_lock(&em_tree->lock);
454                 em = lookup_extent_mapping(em_tree, start, failrec->len);
455                 if (em->start > start || em->start + em->len < start) {
456                         free_extent_map(em);
457                         em = NULL;
458                 }
459                 spin_unlock(&em_tree->lock);
460
461                 if (!em || IS_ERR(em)) {
462                         kfree(failrec);
463                         return -EIO;
464                 }
465                 logical = start - em->start;
466                 logical = em->block_start + logical;
467                 failrec->logical = logical;
468                 free_extent_map(em);
469                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
470                                 EXTENT_DIRTY, GFP_NOFS);
471                 set_state_private(failure_tree, start,
472                                  (u64)(unsigned long)failrec);
473         } else {
474                 failrec = (struct io_failure_record *)(unsigned long)private;
475         }
476         num_copies = btrfs_num_copies(
477                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
478                               failrec->logical, failrec->len);
479         failrec->last_mirror++;
480         if (!state) {
481                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
482                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
483                                                     failrec->start,
484                                                     EXTENT_LOCKED);
485                 if (state && state->start != failrec->start)
486                         state = NULL;
487                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
488         }
489         if (!state || failrec->last_mirror > num_copies) {
490                 set_state_private(failure_tree, failrec->start, 0);
491                 clear_extent_bits(failure_tree, failrec->start,
492                                   failrec->start + failrec->len - 1,
493                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
494                 kfree(failrec);
495                 return -EIO;
496         }
497         bio = bio_alloc(GFP_NOFS, 1);
498         bio->bi_private = state;
499         bio->bi_end_io = failed_bio->bi_end_io;
500         bio->bi_sector = failrec->logical >> 9;
501         bio->bi_bdev = failed_bio->bi_bdev;
502         bio->bi_size = 0;
503         bio_add_page(bio, page, failrec->len, start - page_offset(page));
504         btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror);
505         return 0;
506 }
507
508 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
509                                struct extent_state *state)
510 {
511         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
512         struct inode *inode = page->mapping->host;
513         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
514         char *kaddr;
515         u64 private = ~(u32)0;
516         int ret;
517         struct btrfs_root *root = BTRFS_I(inode)->root;
518         u32 csum = ~(u32)0;
519         unsigned long flags;
520
521         if (btrfs_test_opt(root, NODATASUM) ||
522             btrfs_test_flag(inode, NODATASUM))
523                 return 0;
524         if (state && state->start == start) {
525                 private = state->private;
526                 ret = 0;
527         } else {
528                 ret = get_state_private(io_tree, start, &private);
529         }
530         local_irq_save(flags);
531         kaddr = kmap_atomic(page, KM_IRQ0);
532         if (ret) {
533                 goto zeroit;
534         }
535         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
536         btrfs_csum_final(csum, (char *)&csum);
537         if (csum != private) {
538                 goto zeroit;
539         }
540         kunmap_atomic(kaddr, KM_IRQ0);
541         local_irq_restore(flags);
542
543         /* if the io failure tree for this inode is non-empty,
544          * check to see if we've recovered from a failed IO
545          */
546         private = 0;
547         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
548                              (u64)-1, 1, EXTENT_DIRTY)) {
549                 u64 private_failure;
550                 struct io_failure_record *failure;
551                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
552                                         start, &private_failure);
553                 if (ret == 0) {
554                         failure = (struct io_failure_record *)(unsigned long)
555                                    private_failure;
556                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
557                                           failure->start, 0);
558                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
559                                           failure->start,
560                                           failure->start + failure->len - 1,
561                                           EXTENT_DIRTY | EXTENT_LOCKED,
562                                           GFP_NOFS);
563                         kfree(failure);
564                 }
565         }
566         return 0;
567
568 zeroit:
569         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
570                page->mapping->host->i_ino, (unsigned long long)start, csum,
571                private);
572         memset(kaddr + offset, 1, end - start + 1);
573         flush_dcache_page(page);
574         kunmap_atomic(kaddr, KM_IRQ0);
575         local_irq_restore(flags);
576         if (private == 0)
577                 return 0;
578         return -EIO;
579 }
580
581 void btrfs_read_locked_inode(struct inode *inode)
582 {
583         struct btrfs_path *path;
584         struct extent_buffer *leaf;
585         struct btrfs_inode_item *inode_item;
586         struct btrfs_timespec *tspec;
587         struct btrfs_root *root = BTRFS_I(inode)->root;
588         struct btrfs_key location;
589         u64 alloc_group_block;
590         u32 rdev;
591         int ret;
592
593         path = btrfs_alloc_path();
594         BUG_ON(!path);
595         mutex_lock(&root->fs_info->fs_mutex);
596         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
597
598         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
599         if (ret)
600                 goto make_bad;
601
602         leaf = path->nodes[0];
603         inode_item = btrfs_item_ptr(leaf, path->slots[0],
604                                     struct btrfs_inode_item);
605
606         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
607         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
608         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
609         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
610         inode->i_size = btrfs_inode_size(leaf, inode_item);
611
612         tspec = btrfs_inode_atime(inode_item);
613         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
614         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
615
616         tspec = btrfs_inode_mtime(inode_item);
617         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
618         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
619
620         tspec = btrfs_inode_ctime(inode_item);
621         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
622         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
623
624         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
625         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
626         inode->i_rdev = 0;
627         rdev = btrfs_inode_rdev(leaf, inode_item);
628
629         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
630         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
631                                                        alloc_group_block);
632         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
633         if (!BTRFS_I(inode)->block_group) {
634                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
635                                                  NULL, 0,
636                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
637         }
638         btrfs_free_path(path);
639         inode_item = NULL;
640
641         mutex_unlock(&root->fs_info->fs_mutex);
642
643         switch (inode->i_mode & S_IFMT) {
644         case S_IFREG:
645                 inode->i_mapping->a_ops = &btrfs_aops;
646                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
647                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
648                 inode->i_fop = &btrfs_file_operations;
649                 inode->i_op = &btrfs_file_inode_operations;
650                 break;
651         case S_IFDIR:
652                 inode->i_fop = &btrfs_dir_file_operations;
653                 if (root == root->fs_info->tree_root)
654                         inode->i_op = &btrfs_dir_ro_inode_operations;
655                 else
656                         inode->i_op = &btrfs_dir_inode_operations;
657                 break;
658         case S_IFLNK:
659                 inode->i_op = &btrfs_symlink_inode_operations;
660                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
661                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
662                 break;
663         default:
664                 init_special_inode(inode, inode->i_mode, rdev);
665                 break;
666         }
667         return;
668
669 make_bad:
670         btrfs_release_path(root, path);
671         btrfs_free_path(path);
672         mutex_unlock(&root->fs_info->fs_mutex);
673         make_bad_inode(inode);
674 }
675
676 static void fill_inode_item(struct extent_buffer *leaf,
677                             struct btrfs_inode_item *item,
678                             struct inode *inode)
679 {
680         btrfs_set_inode_uid(leaf, item, inode->i_uid);
681         btrfs_set_inode_gid(leaf, item, inode->i_gid);
682         btrfs_set_inode_size(leaf, item, inode->i_size);
683         btrfs_set_inode_mode(leaf, item, inode->i_mode);
684         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
685
686         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
687                                inode->i_atime.tv_sec);
688         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
689                                 inode->i_atime.tv_nsec);
690
691         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
692                                inode->i_mtime.tv_sec);
693         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
694                                 inode->i_mtime.tv_nsec);
695
696         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
697                                inode->i_ctime.tv_sec);
698         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
699                                 inode->i_ctime.tv_nsec);
700
701         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
702         btrfs_set_inode_generation(leaf, item, inode->i_generation);
703         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
704         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
705         btrfs_set_inode_block_group(leaf, item,
706                                     BTRFS_I(inode)->block_group->key.objectid);
707 }
708
709 int btrfs_update_inode(struct btrfs_trans_handle *trans,
710                               struct btrfs_root *root,
711                               struct inode *inode)
712 {
713         struct btrfs_inode_item *inode_item;
714         struct btrfs_path *path;
715         struct extent_buffer *leaf;
716         int ret;
717
718         path = btrfs_alloc_path();
719         BUG_ON(!path);
720         ret = btrfs_lookup_inode(trans, root, path,
721                                  &BTRFS_I(inode)->location, 1);
722         if (ret) {
723                 if (ret > 0)
724                         ret = -ENOENT;
725                 goto failed;
726         }
727
728         leaf = path->nodes[0];
729         inode_item = btrfs_item_ptr(leaf, path->slots[0],
730                                   struct btrfs_inode_item);
731
732         fill_inode_item(leaf, inode_item, inode);
733         btrfs_mark_buffer_dirty(leaf);
734         btrfs_set_inode_last_trans(trans, inode);
735         ret = 0;
736 failed:
737         btrfs_release_path(root, path);
738         btrfs_free_path(path);
739         return ret;
740 }
741
742
743 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
744                               struct btrfs_root *root,
745                               struct inode *dir,
746                               struct dentry *dentry)
747 {
748         struct btrfs_path *path;
749         const char *name = dentry->d_name.name;
750         int name_len = dentry->d_name.len;
751         int ret = 0;
752         struct extent_buffer *leaf;
753         struct btrfs_dir_item *di;
754         struct btrfs_key key;
755
756         path = btrfs_alloc_path();
757         if (!path) {
758                 ret = -ENOMEM;
759                 goto err;
760         }
761
762         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
763                                     name, name_len, -1);
764         if (IS_ERR(di)) {
765                 ret = PTR_ERR(di);
766                 goto err;
767         }
768         if (!di) {
769                 ret = -ENOENT;
770                 goto err;
771         }
772         leaf = path->nodes[0];
773         btrfs_dir_item_key_to_cpu(leaf, di, &key);
774         ret = btrfs_delete_one_dir_name(trans, root, path, di);
775         if (ret)
776                 goto err;
777         btrfs_release_path(root, path);
778
779         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
780                                          key.objectid, name, name_len, -1);
781         if (IS_ERR(di)) {
782                 ret = PTR_ERR(di);
783                 goto err;
784         }
785         if (!di) {
786                 ret = -ENOENT;
787                 goto err;
788         }
789         ret = btrfs_delete_one_dir_name(trans, root, path, di);
790
791         dentry->d_inode->i_ctime = dir->i_ctime;
792         ret = btrfs_del_inode_ref(trans, root, name, name_len,
793                                   dentry->d_inode->i_ino,
794                                   dentry->d_parent->d_inode->i_ino);
795         if (ret) {
796                 printk("failed to delete reference to %.*s, "
797                        "inode %lu parent %lu\n", name_len, name,
798                        dentry->d_inode->i_ino,
799                        dentry->d_parent->d_inode->i_ino);
800         }
801 err:
802         btrfs_free_path(path);
803         if (!ret) {
804                 dir->i_size -= name_len * 2;
805                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
806                 btrfs_update_inode(trans, root, dir);
807 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
808                 dentry->d_inode->i_nlink--;
809 #else
810                 drop_nlink(dentry->d_inode);
811 #endif
812                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
813                 dir->i_sb->s_dirt = 1;
814         }
815         return ret;
816 }
817
818 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
819 {
820         struct btrfs_root *root;
821         struct btrfs_trans_handle *trans;
822         struct inode *inode = dentry->d_inode;
823         int ret;
824         unsigned long nr = 0;
825
826         root = BTRFS_I(dir)->root;
827         mutex_lock(&root->fs_info->fs_mutex);
828
829         ret = btrfs_check_free_space(root, 1, 1);
830         if (ret)
831                 goto fail;
832
833         trans = btrfs_start_transaction(root, 1);
834
835         btrfs_set_trans_block_group(trans, dir);
836         ret = btrfs_unlink_trans(trans, root, dir, dentry);
837         nr = trans->blocks_used;
838
839         if (inode->i_nlink == 0) {
840                 int found;
841                 /* if the inode isn't linked anywhere,
842                  * we don't need to worry about
843                  * data=ordered
844                  */
845                 found = btrfs_del_ordered_inode(inode);
846                 if (found == 1) {
847                         atomic_dec(&inode->i_count);
848                 }
849         }
850
851         btrfs_end_transaction(trans, root);
852 fail:
853         mutex_unlock(&root->fs_info->fs_mutex);
854         btrfs_btree_balance_dirty(root, nr);
855         btrfs_throttle(root);
856         return ret;
857 }
858
859 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
860 {
861         struct inode *inode = dentry->d_inode;
862         int err = 0;
863         int ret;
864         struct btrfs_root *root = BTRFS_I(dir)->root;
865         struct btrfs_trans_handle *trans;
866         unsigned long nr = 0;
867
868         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
869                 return -ENOTEMPTY;
870
871         mutex_lock(&root->fs_info->fs_mutex);
872         ret = btrfs_check_free_space(root, 1, 1);
873         if (ret)
874                 goto fail;
875
876         trans = btrfs_start_transaction(root, 1);
877         btrfs_set_trans_block_group(trans, dir);
878
879         /* now the directory is empty */
880         err = btrfs_unlink_trans(trans, root, dir, dentry);
881         if (!err) {
882                 inode->i_size = 0;
883         }
884
885         nr = trans->blocks_used;
886         ret = btrfs_end_transaction(trans, root);
887 fail:
888         mutex_unlock(&root->fs_info->fs_mutex);
889         btrfs_btree_balance_dirty(root, nr);
890         btrfs_throttle(root);
891
892         if (ret && !err)
893                 err = ret;
894         return err;
895 }
896
897 /*
898  * this can truncate away extent items, csum items and directory items.
899  * It starts at a high offset and removes keys until it can't find
900  * any higher than i_size.
901  *
902  * csum items that cross the new i_size are truncated to the new size
903  * as well.
904  */
905 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
906                                    struct btrfs_root *root,
907                                    struct inode *inode,
908                                    u32 min_type)
909 {
910         int ret;
911         struct btrfs_path *path;
912         struct btrfs_key key;
913         struct btrfs_key found_key;
914         u32 found_type;
915         struct extent_buffer *leaf;
916         struct btrfs_file_extent_item *fi;
917         u64 extent_start = 0;
918         u64 extent_num_bytes = 0;
919         u64 item_end = 0;
920         u64 root_gen = 0;
921         u64 root_owner = 0;
922         int found_extent;
923         int del_item;
924         int pending_del_nr = 0;
925         int pending_del_slot = 0;
926         int extent_type = -1;
927         u64 mask = root->sectorsize - 1;
928
929         btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
930         path = btrfs_alloc_path();
931         path->reada = -1;
932         BUG_ON(!path);
933
934         /* FIXME, add redo link to tree so we don't leak on crash */
935         key.objectid = inode->i_ino;
936         key.offset = (u64)-1;
937         key.type = (u8)-1;
938
939         btrfs_init_path(path);
940 search_again:
941         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
942         if (ret < 0) {
943                 goto error;
944         }
945         if (ret > 0) {
946                 BUG_ON(path->slots[0] == 0);
947                 path->slots[0]--;
948         }
949
950         while(1) {
951                 fi = NULL;
952                 leaf = path->nodes[0];
953                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
954                 found_type = btrfs_key_type(&found_key);
955
956                 if (found_key.objectid != inode->i_ino)
957                         break;
958
959                 if (found_type < min_type)
960                         break;
961
962                 item_end = found_key.offset;
963                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
964                         fi = btrfs_item_ptr(leaf, path->slots[0],
965                                             struct btrfs_file_extent_item);
966                         extent_type = btrfs_file_extent_type(leaf, fi);
967                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
968                                 item_end +=
969                                     btrfs_file_extent_num_bytes(leaf, fi);
970                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
971                                 struct btrfs_item *item = btrfs_item_nr(leaf,
972                                                                 path->slots[0]);
973                                 item_end += btrfs_file_extent_inline_len(leaf,
974                                                                          item);
975                         }
976                         item_end--;
977                 }
978                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
979                         ret = btrfs_csum_truncate(trans, root, path,
980                                                   inode->i_size);
981                         BUG_ON(ret);
982                 }
983                 if (item_end < inode->i_size) {
984                         if (found_type == BTRFS_DIR_ITEM_KEY) {
985                                 found_type = BTRFS_INODE_ITEM_KEY;
986                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
987                                 found_type = BTRFS_CSUM_ITEM_KEY;
988                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
989                                 found_type = BTRFS_XATTR_ITEM_KEY;
990                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
991                                 found_type = BTRFS_INODE_REF_KEY;
992                         } else if (found_type) {
993                                 found_type--;
994                         } else {
995                                 break;
996                         }
997                         btrfs_set_key_type(&key, found_type);
998                         goto next;
999                 }
1000                 if (found_key.offset >= inode->i_size)
1001                         del_item = 1;
1002                 else
1003                         del_item = 0;
1004                 found_extent = 0;
1005
1006                 /* FIXME, shrink the extent if the ref count is only 1 */
1007                 if (found_type != BTRFS_EXTENT_DATA_KEY)
1008                         goto delete;
1009
1010                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1011                         u64 num_dec;
1012                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
1013                         if (!del_item) {
1014                                 u64 orig_num_bytes =
1015                                         btrfs_file_extent_num_bytes(leaf, fi);
1016                                 extent_num_bytes = inode->i_size -
1017                                         found_key.offset + root->sectorsize - 1;
1018                                 extent_num_bytes = extent_num_bytes &
1019                                         ~((u64)root->sectorsize - 1);
1020                                 btrfs_set_file_extent_num_bytes(leaf, fi,
1021                                                          extent_num_bytes);
1022                                 num_dec = (orig_num_bytes -
1023                                            extent_num_bytes);
1024                                 if (extent_start != 0)
1025                                         dec_i_blocks(inode, num_dec);
1026                                 btrfs_mark_buffer_dirty(leaf);
1027                         } else {
1028                                 extent_num_bytes =
1029                                         btrfs_file_extent_disk_num_bytes(leaf,
1030                                                                          fi);
1031                                 /* FIXME blocksize != 4096 */
1032                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
1033                                 if (extent_start != 0) {
1034                                         found_extent = 1;
1035                                         dec_i_blocks(inode, num_dec);
1036                                 }
1037                                 root_gen = btrfs_header_generation(leaf);
1038                                 root_owner = btrfs_header_owner(leaf);
1039                         }
1040                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1041                         if (!del_item) {
1042                                 u32 newsize = inode->i_size - found_key.offset;
1043                                 dec_i_blocks(inode, item_end + 1 -
1044                                             found_key.offset - newsize);
1045                                 newsize =
1046                                     btrfs_file_extent_calc_inline_size(newsize);
1047                                 ret = btrfs_truncate_item(trans, root, path,
1048                                                           newsize, 1);
1049                                 BUG_ON(ret);
1050                         } else {
1051                                 dec_i_blocks(inode, item_end + 1 -
1052                                              found_key.offset);
1053                         }
1054                 }
1055 delete:
1056                 if (del_item) {
1057                         if (!pending_del_nr) {
1058                                 /* no pending yet, add ourselves */
1059                                 pending_del_slot = path->slots[0];
1060                                 pending_del_nr = 1;
1061                         } else if (pending_del_nr &&
1062                                    path->slots[0] + 1 == pending_del_slot) {
1063                                 /* hop on the pending chunk */
1064                                 pending_del_nr++;
1065                                 pending_del_slot = path->slots[0];
1066                         } else {
1067                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1068                         }
1069                 } else {
1070                         break;
1071                 }
1072                 if (found_extent) {
1073                         ret = btrfs_free_extent(trans, root, extent_start,
1074                                                 extent_num_bytes,
1075                                                 root_owner,
1076                                                 root_gen, inode->i_ino,
1077                                                 found_key.offset, 0);
1078                         BUG_ON(ret);
1079                 }
1080 next:
1081                 if (path->slots[0] == 0) {
1082                         if (pending_del_nr)
1083                                 goto del_pending;
1084                         btrfs_release_path(root, path);
1085                         goto search_again;
1086                 }
1087
1088                 path->slots[0]--;
1089                 if (pending_del_nr &&
1090                     path->slots[0] + 1 != pending_del_slot) {
1091                         struct btrfs_key debug;
1092 del_pending:
1093                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1094                                               pending_del_slot);
1095                         ret = btrfs_del_items(trans, root, path,
1096                                               pending_del_slot,
1097                                               pending_del_nr);
1098                         BUG_ON(ret);
1099                         pending_del_nr = 0;
1100                         btrfs_release_path(root, path);
1101                         goto search_again;
1102                 }
1103         }
1104         ret = 0;
1105 error:
1106         if (pending_del_nr) {
1107                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1108                                       pending_del_nr);
1109         }
1110         btrfs_release_path(root, path);
1111         btrfs_free_path(path);
1112         inode->i_sb->s_dirt = 1;
1113         return ret;
1114 }
1115
1116 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1117                               size_t zero_start)
1118 {
1119         char *kaddr;
1120         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1121         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1122         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1123         int ret = 0;
1124
1125         WARN_ON(!PageLocked(page));
1126         set_page_extent_mapped(page);
1127
1128         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1129         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1130                             page_end, GFP_NOFS);
1131
1132         if (zero_start != PAGE_CACHE_SIZE) {
1133                 kaddr = kmap(page);
1134                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1135                 flush_dcache_page(page);
1136                 kunmap(page);
1137         }
1138         set_page_dirty(page);
1139         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1140
1141         return ret;
1142 }
1143
1144 /*
1145  * taken from block_truncate_page, but does cow as it zeros out
1146  * any bytes left in the last page in the file.
1147  */
1148 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1149 {
1150         struct inode *inode = mapping->host;
1151         struct btrfs_root *root = BTRFS_I(inode)->root;
1152         u32 blocksize = root->sectorsize;
1153         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1154         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1155         struct page *page;
1156         int ret = 0;
1157         u64 page_start;
1158
1159         if ((offset & (blocksize - 1)) == 0)
1160                 goto out;
1161
1162         ret = -ENOMEM;
1163         page = grab_cache_page(mapping, index);
1164         if (!page)
1165                 goto out;
1166         if (!PageUptodate(page)) {
1167                 ret = btrfs_readpage(NULL, page);
1168                 lock_page(page);
1169                 if (!PageUptodate(page)) {
1170                         ret = -EIO;
1171                         goto out;
1172                 }
1173         }
1174         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1175
1176         ret = btrfs_cow_one_page(inode, page, offset);
1177
1178         unlock_page(page);
1179         page_cache_release(page);
1180 out:
1181         return ret;
1182 }
1183
1184 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1185 {
1186         struct inode *inode = dentry->d_inode;
1187         int err;
1188
1189         err = inode_change_ok(inode, attr);
1190         if (err)
1191                 return err;
1192
1193         if (S_ISREG(inode->i_mode) &&
1194             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1195                 struct btrfs_trans_handle *trans;
1196                 struct btrfs_root *root = BTRFS_I(inode)->root;
1197                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1198
1199                 u64 mask = root->sectorsize - 1;
1200                 u64 hole_start = (inode->i_size + mask) & ~mask;
1201                 u64 block_end = (attr->ia_size + mask) & ~mask;
1202                 u64 hole_size;
1203                 u64 alloc_hint = 0;
1204
1205                 if (attr->ia_size <= hole_start)
1206                         goto out;
1207
1208                 mutex_lock(&root->fs_info->fs_mutex);
1209                 err = btrfs_check_free_space(root, 1, 0);
1210                 mutex_unlock(&root->fs_info->fs_mutex);
1211                 if (err)
1212                         goto fail;
1213
1214                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1215
1216                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1217                 hole_size = block_end - hole_start;
1218
1219                 mutex_lock(&root->fs_info->fs_mutex);
1220                 trans = btrfs_start_transaction(root, 1);
1221                 btrfs_set_trans_block_group(trans, inode);
1222                 err = btrfs_drop_extents(trans, root, inode,
1223                                          hole_start, block_end, hole_start,
1224                                          &alloc_hint);
1225
1226                 if (alloc_hint != EXTENT_MAP_INLINE) {
1227                         err = btrfs_insert_file_extent(trans, root,
1228                                                        inode->i_ino,
1229                                                        hole_start, 0, 0,
1230                                                        hole_size);
1231                         btrfs_drop_extent_cache(inode, hole_start,
1232                                                 (u64)-1);
1233                         btrfs_check_file(root, inode);
1234                 }
1235                 btrfs_end_transaction(trans, root);
1236                 mutex_unlock(&root->fs_info->fs_mutex);
1237                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1238                 if (err)
1239                         return err;
1240         }
1241 out:
1242         err = inode_setattr(inode, attr);
1243 fail:
1244         return err;
1245 }
1246
1247 void btrfs_put_inode(struct inode *inode)
1248 {
1249         int ret;
1250
1251         if (!BTRFS_I(inode)->ordered_trans) {
1252                 return;
1253         }
1254
1255         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1256             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1257                 return;
1258
1259         ret = btrfs_del_ordered_inode(inode);
1260         if (ret == 1) {
1261                 atomic_dec(&inode->i_count);
1262         }
1263 }
1264
1265 void btrfs_delete_inode(struct inode *inode)
1266 {
1267         struct btrfs_trans_handle *trans;
1268         struct btrfs_root *root = BTRFS_I(inode)->root;
1269         unsigned long nr;
1270         int ret;
1271
1272         truncate_inode_pages(&inode->i_data, 0);
1273         if (is_bad_inode(inode)) {
1274                 goto no_delete;
1275         }
1276
1277         inode->i_size = 0;
1278         mutex_lock(&root->fs_info->fs_mutex);
1279         trans = btrfs_start_transaction(root, 1);
1280
1281         btrfs_set_trans_block_group(trans, inode);
1282         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1283         if (ret)
1284                 goto no_delete_lock;
1285
1286         nr = trans->blocks_used;
1287         clear_inode(inode);
1288
1289         btrfs_end_transaction(trans, root);
1290         mutex_unlock(&root->fs_info->fs_mutex);
1291         btrfs_btree_balance_dirty(root, nr);
1292         btrfs_throttle(root);
1293         return;
1294
1295 no_delete_lock:
1296         nr = trans->blocks_used;
1297         btrfs_end_transaction(trans, root);
1298         mutex_unlock(&root->fs_info->fs_mutex);
1299         btrfs_btree_balance_dirty(root, nr);
1300         btrfs_throttle(root);
1301 no_delete:
1302         clear_inode(inode);
1303 }
1304
1305 /*
1306  * this returns the key found in the dir entry in the location pointer.
1307  * If no dir entries were found, location->objectid is 0.
1308  */
1309 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1310                                struct btrfs_key *location)
1311 {
1312         const char *name = dentry->d_name.name;
1313         int namelen = dentry->d_name.len;
1314         struct btrfs_dir_item *di;
1315         struct btrfs_path *path;
1316         struct btrfs_root *root = BTRFS_I(dir)->root;
1317         int ret = 0;
1318
1319         if (namelen == 1 && strcmp(name, ".") == 0) {
1320                 location->objectid = dir->i_ino;
1321                 location->type = BTRFS_INODE_ITEM_KEY;
1322                 location->offset = 0;
1323                 return 0;
1324         }
1325         path = btrfs_alloc_path();
1326         BUG_ON(!path);
1327
1328         if (namelen == 2 && strcmp(name, "..") == 0) {
1329                 struct btrfs_key key;
1330                 struct extent_buffer *leaf;
1331                 u32 nritems;
1332                 int slot;
1333
1334                 key.objectid = dir->i_ino;
1335                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1336                 key.offset = 0;
1337                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1338                 BUG_ON(ret == 0);
1339                 ret = 0;
1340
1341                 leaf = path->nodes[0];
1342                 slot = path->slots[0];
1343                 nritems = btrfs_header_nritems(leaf);
1344                 if (slot >= nritems)
1345                         goto out_err;
1346
1347                 btrfs_item_key_to_cpu(leaf, &key, slot);
1348                 if (key.objectid != dir->i_ino ||
1349                     key.type != BTRFS_INODE_REF_KEY) {
1350                         goto out_err;
1351                 }
1352                 location->objectid = key.offset;
1353                 location->type = BTRFS_INODE_ITEM_KEY;
1354                 location->offset = 0;
1355                 goto out;
1356         }
1357
1358         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1359                                     namelen, 0);
1360         if (IS_ERR(di))
1361                 ret = PTR_ERR(di);
1362         if (!di || IS_ERR(di)) {
1363                 goto out_err;
1364         }
1365         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1366 out:
1367         btrfs_free_path(path);
1368         return ret;
1369 out_err:
1370         location->objectid = 0;
1371         goto out;
1372 }
1373
1374 /*
1375  * when we hit a tree root in a directory, the btrfs part of the inode
1376  * needs to be changed to reflect the root directory of the tree root.  This
1377  * is kind of like crossing a mount point.
1378  */
1379 static int fixup_tree_root_location(struct btrfs_root *root,
1380                              struct btrfs_key *location,
1381                              struct btrfs_root **sub_root,
1382                              struct dentry *dentry)
1383 {
1384         struct btrfs_path *path;
1385         struct btrfs_root_item *ri;
1386
1387         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1388                 return 0;
1389         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1390                 return 0;
1391
1392         path = btrfs_alloc_path();
1393         BUG_ON(!path);
1394         mutex_lock(&root->fs_info->fs_mutex);
1395
1396         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1397                                         dentry->d_name.name,
1398                                         dentry->d_name.len);
1399         if (IS_ERR(*sub_root))
1400                 return PTR_ERR(*sub_root);
1401
1402         ri = &(*sub_root)->root_item;
1403         location->objectid = btrfs_root_dirid(ri);
1404         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1405         location->offset = 0;
1406
1407         btrfs_free_path(path);
1408         mutex_unlock(&root->fs_info->fs_mutex);
1409         return 0;
1410 }
1411
1412 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1413 {
1414         struct btrfs_iget_args *args = p;
1415         inode->i_ino = args->ino;
1416         BTRFS_I(inode)->root = args->root;
1417         BTRFS_I(inode)->delalloc_bytes = 0;
1418         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1419         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1420                              inode->i_mapping, GFP_NOFS);
1421         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1422                              inode->i_mapping, GFP_NOFS);
1423         atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1424         return 0;
1425 }
1426
1427 static int btrfs_find_actor(struct inode *inode, void *opaque)
1428 {
1429         struct btrfs_iget_args *args = opaque;
1430         return (args->ino == inode->i_ino &&
1431                 args->root == BTRFS_I(inode)->root);
1432 }
1433
1434 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1435                             u64 root_objectid)
1436 {
1437         struct btrfs_iget_args args;
1438         args.ino = objectid;
1439         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1440
1441         if (!args.root)
1442                 return NULL;
1443
1444         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1445 }
1446
1447 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1448                                 struct btrfs_root *root)
1449 {
1450         struct inode *inode;
1451         struct btrfs_iget_args args;
1452         args.ino = objectid;
1453         args.root = root;
1454
1455         inode = iget5_locked(s, objectid, btrfs_find_actor,
1456                              btrfs_init_locked_inode,
1457                              (void *)&args);
1458         return inode;
1459 }
1460
1461 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1462                                    struct nameidata *nd)
1463 {
1464         struct inode * inode;
1465         struct btrfs_inode *bi = BTRFS_I(dir);
1466         struct btrfs_root *root = bi->root;
1467         struct btrfs_root *sub_root = root;
1468         struct btrfs_key location;
1469         int ret;
1470
1471         if (dentry->d_name.len > BTRFS_NAME_LEN)
1472                 return ERR_PTR(-ENAMETOOLONG);
1473
1474         mutex_lock(&root->fs_info->fs_mutex);
1475         ret = btrfs_inode_by_name(dir, dentry, &location);
1476         mutex_unlock(&root->fs_info->fs_mutex);
1477
1478         if (ret < 0)
1479                 return ERR_PTR(ret);
1480
1481         inode = NULL;
1482         if (location.objectid) {
1483                 ret = fixup_tree_root_location(root, &location, &sub_root,
1484                                                 dentry);
1485                 if (ret < 0)
1486                         return ERR_PTR(ret);
1487                 if (ret > 0)
1488                         return ERR_PTR(-ENOENT);
1489                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1490                                           sub_root);
1491                 if (!inode)
1492                         return ERR_PTR(-EACCES);
1493                 if (inode->i_state & I_NEW) {
1494                         /* the inode and parent dir are two different roots */
1495                         if (sub_root != root) {
1496                                 igrab(inode);
1497                                 sub_root->inode = inode;
1498                         }
1499                         BTRFS_I(inode)->root = sub_root;
1500                         memcpy(&BTRFS_I(inode)->location, &location,
1501                                sizeof(location));
1502                         btrfs_read_locked_inode(inode);
1503                         unlock_new_inode(inode);
1504                 }
1505         }
1506         return d_splice_alias(inode, dentry);
1507 }
1508
1509 static unsigned char btrfs_filetype_table[] = {
1510         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1511 };
1512
1513 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1514 {
1515         struct inode *inode = filp->f_dentry->d_inode;
1516         struct btrfs_root *root = BTRFS_I(inode)->root;
1517         struct btrfs_item *item;
1518         struct btrfs_dir_item *di;
1519         struct btrfs_key key;
1520         struct btrfs_key found_key;
1521         struct btrfs_path *path;
1522         int ret;
1523         u32 nritems;
1524         struct extent_buffer *leaf;
1525         int slot;
1526         int advance;
1527         unsigned char d_type;
1528         int over = 0;
1529         u32 di_cur;
1530         u32 di_total;
1531         u32 di_len;
1532         int key_type = BTRFS_DIR_INDEX_KEY;
1533         char tmp_name[32];
1534         char *name_ptr;
1535         int name_len;
1536
1537         /* FIXME, use a real flag for deciding about the key type */
1538         if (root->fs_info->tree_root == root)
1539                 key_type = BTRFS_DIR_ITEM_KEY;
1540
1541         /* special case for "." */
1542         if (filp->f_pos == 0) {
1543                 over = filldir(dirent, ".", 1,
1544                                1, inode->i_ino,
1545                                DT_DIR);
1546                 if (over)
1547                         return 0;
1548                 filp->f_pos = 1;
1549         }
1550
1551         mutex_lock(&root->fs_info->fs_mutex);
1552         key.objectid = inode->i_ino;
1553         path = btrfs_alloc_path();
1554         path->reada = 2;
1555
1556         /* special case for .., just use the back ref */
1557         if (filp->f_pos == 1) {
1558                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1559                 key.offset = 0;
1560                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1561                 BUG_ON(ret == 0);
1562                 leaf = path->nodes[0];
1563                 slot = path->slots[0];
1564                 nritems = btrfs_header_nritems(leaf);
1565                 if (slot >= nritems) {
1566                         btrfs_release_path(root, path);
1567                         goto read_dir_items;
1568                 }
1569                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1570                 btrfs_release_path(root, path);
1571                 if (found_key.objectid != key.objectid ||
1572                     found_key.type != BTRFS_INODE_REF_KEY)
1573                         goto read_dir_items;
1574                 over = filldir(dirent, "..", 2,
1575                                2, found_key.offset, DT_DIR);
1576                 if (over)
1577                         goto nopos;
1578                 filp->f_pos = 2;
1579         }
1580
1581 read_dir_items:
1582         btrfs_set_key_type(&key, key_type);
1583         key.offset = filp->f_pos;
1584
1585         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1586         if (ret < 0)
1587                 goto err;
1588         advance = 0;
1589         while(1) {
1590                 leaf = path->nodes[0];
1591                 nritems = btrfs_header_nritems(leaf);
1592                 slot = path->slots[0];
1593                 if (advance || slot >= nritems) {
1594                         if (slot >= nritems -1) {
1595                                 ret = btrfs_next_leaf(root, path);
1596                                 if (ret)
1597                                         break;
1598                                 leaf = path->nodes[0];
1599                                 nritems = btrfs_header_nritems(leaf);
1600                                 slot = path->slots[0];
1601                         } else {
1602                                 slot++;
1603                                 path->slots[0]++;
1604                         }
1605                 }
1606                 advance = 1;
1607                 item = btrfs_item_nr(leaf, slot);
1608                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1609
1610                 if (found_key.objectid != key.objectid)
1611                         break;
1612                 if (btrfs_key_type(&found_key) != key_type)
1613                         break;
1614                 if (found_key.offset < filp->f_pos)
1615                         continue;
1616
1617                 filp->f_pos = found_key.offset;
1618                 advance = 1;
1619                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1620                 di_cur = 0;
1621                 di_total = btrfs_item_size(leaf, item);
1622                 while(di_cur < di_total) {
1623                         struct btrfs_key location;
1624
1625                         name_len = btrfs_dir_name_len(leaf, di);
1626                         if (name_len < 32) {
1627                                 name_ptr = tmp_name;
1628                         } else {
1629                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1630                                 BUG_ON(!name_ptr);
1631                         }
1632                         read_extent_buffer(leaf, name_ptr,
1633                                            (unsigned long)(di + 1), name_len);
1634
1635                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1636                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1637                         over = filldir(dirent, name_ptr, name_len,
1638                                        found_key.offset,
1639                                        location.objectid,
1640                                        d_type);
1641
1642                         if (name_ptr != tmp_name)
1643                                 kfree(name_ptr);
1644
1645                         if (over)
1646                                 goto nopos;
1647                         di_len = btrfs_dir_name_len(leaf, di) +
1648                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1649                         di_cur += di_len;
1650                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1651                 }
1652         }
1653         if (key_type == BTRFS_DIR_INDEX_KEY)
1654                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1655         else
1656                 filp->f_pos++;
1657 nopos:
1658         ret = 0;
1659 err:
1660         btrfs_release_path(root, path);
1661         btrfs_free_path(path);
1662         mutex_unlock(&root->fs_info->fs_mutex);
1663         return ret;
1664 }
1665
1666 int btrfs_write_inode(struct inode *inode, int wait)
1667 {
1668         struct btrfs_root *root = BTRFS_I(inode)->root;
1669         struct btrfs_trans_handle *trans;
1670         int ret = 0;
1671
1672         if (wait) {
1673                 mutex_lock(&root->fs_info->fs_mutex);
1674                 trans = btrfs_start_transaction(root, 1);
1675                 btrfs_set_trans_block_group(trans, inode);
1676                 ret = btrfs_commit_transaction(trans, root);
1677                 mutex_unlock(&root->fs_info->fs_mutex);
1678         }
1679         return ret;
1680 }
1681
1682 /*
1683  * This is somewhat expensive, updating the tree every time the
1684  * inode changes.  But, it is most likely to find the inode in cache.
1685  * FIXME, needs more benchmarking...there are no reasons other than performance
1686  * to keep or drop this code.
1687  */
1688 void btrfs_dirty_inode(struct inode *inode)
1689 {
1690         struct btrfs_root *root = BTRFS_I(inode)->root;
1691         struct btrfs_trans_handle *trans;
1692
1693         mutex_lock(&root->fs_info->fs_mutex);
1694         trans = btrfs_start_transaction(root, 1);
1695         btrfs_set_trans_block_group(trans, inode);
1696         btrfs_update_inode(trans, root, inode);
1697         btrfs_end_transaction(trans, root);
1698         mutex_unlock(&root->fs_info->fs_mutex);
1699 }
1700
1701 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1702                                      struct btrfs_root *root,
1703                                      const char *name, int name_len,
1704                                      u64 ref_objectid,
1705                                      u64 objectid,
1706                                      struct btrfs_block_group_cache *group,
1707                                      int mode)
1708 {
1709         struct inode *inode;
1710         struct btrfs_inode_item *inode_item;
1711         struct btrfs_block_group_cache *new_inode_group;
1712         struct btrfs_key *location;
1713         struct btrfs_path *path;
1714         struct btrfs_inode_ref *ref;
1715         struct btrfs_key key[2];
1716         u32 sizes[2];
1717         unsigned long ptr;
1718         int ret;
1719         int owner;
1720
1721         path = btrfs_alloc_path();
1722         BUG_ON(!path);
1723
1724         inode = new_inode(root->fs_info->sb);
1725         if (!inode)
1726                 return ERR_PTR(-ENOMEM);
1727
1728         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1729         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1730                              inode->i_mapping, GFP_NOFS);
1731         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1732                              inode->i_mapping, GFP_NOFS);
1733         atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1734         BTRFS_I(inode)->delalloc_bytes = 0;
1735         BTRFS_I(inode)->root = root;
1736
1737         if (mode & S_IFDIR)
1738                 owner = 0;
1739         else
1740                 owner = 1;
1741         new_inode_group = btrfs_find_block_group(root, group, 0,
1742                                        BTRFS_BLOCK_GROUP_METADATA, owner);
1743         if (!new_inode_group) {
1744                 printk("find_block group failed\n");
1745                 new_inode_group = group;
1746         }
1747         BTRFS_I(inode)->block_group = new_inode_group;
1748         BTRFS_I(inode)->flags = 0;
1749
1750         key[0].objectid = objectid;
1751         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1752         key[0].offset = 0;
1753
1754         key[1].objectid = objectid;
1755         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1756         key[1].offset = ref_objectid;
1757
1758         sizes[0] = sizeof(struct btrfs_inode_item);
1759         sizes[1] = name_len + sizeof(*ref);
1760
1761         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1762         if (ret != 0)
1763                 goto fail;
1764
1765         if (objectid > root->highest_inode)
1766                 root->highest_inode = objectid;
1767
1768         inode->i_uid = current->fsuid;
1769         inode->i_gid = current->fsgid;
1770         inode->i_mode = mode;
1771         inode->i_ino = objectid;
1772         inode->i_blocks = 0;
1773         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1774         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1775                                   struct btrfs_inode_item);
1776         fill_inode_item(path->nodes[0], inode_item, inode);
1777
1778         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1779                              struct btrfs_inode_ref);
1780         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1781         ptr = (unsigned long)(ref + 1);
1782         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1783
1784         btrfs_mark_buffer_dirty(path->nodes[0]);
1785         btrfs_free_path(path);
1786
1787         location = &BTRFS_I(inode)->location;
1788         location->objectid = objectid;
1789         location->offset = 0;
1790         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1791
1792         insert_inode_hash(inode);
1793         return inode;
1794 fail:
1795         btrfs_free_path(path);
1796         return ERR_PTR(ret);
1797 }
1798
1799 static inline u8 btrfs_inode_type(struct inode *inode)
1800 {
1801         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1802 }
1803
1804 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1805                             struct dentry *dentry, struct inode *inode,
1806                             int add_backref)
1807 {
1808         int ret;
1809         struct btrfs_key key;
1810         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1811         struct inode *parent_inode;
1812
1813         key.objectid = inode->i_ino;
1814         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1815         key.offset = 0;
1816
1817         ret = btrfs_insert_dir_item(trans, root,
1818                                     dentry->d_name.name, dentry->d_name.len,
1819                                     dentry->d_parent->d_inode->i_ino,
1820                                     &key, btrfs_inode_type(inode));
1821         if (ret == 0) {
1822                 if (add_backref) {
1823                         ret = btrfs_insert_inode_ref(trans, root,
1824                                              dentry->d_name.name,
1825                                              dentry->d_name.len,
1826                                              inode->i_ino,
1827                                              dentry->d_parent->d_inode->i_ino);
1828                 }
1829                 parent_inode = dentry->d_parent->d_inode;
1830                 parent_inode->i_size += dentry->d_name.len * 2;
1831                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1832                 ret = btrfs_update_inode(trans, root,
1833                                          dentry->d_parent->d_inode);
1834         }
1835         return ret;
1836 }
1837
1838 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1839                             struct dentry *dentry, struct inode *inode,
1840                             int backref)
1841 {
1842         int err = btrfs_add_link(trans, dentry, inode, backref);
1843         if (!err) {
1844                 d_instantiate(dentry, inode);
1845                 return 0;
1846         }
1847         if (err > 0)
1848                 err = -EEXIST;
1849         return err;
1850 }
1851
1852 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1853                         int mode, dev_t rdev)
1854 {
1855         struct btrfs_trans_handle *trans;
1856         struct btrfs_root *root = BTRFS_I(dir)->root;
1857         struct inode *inode = NULL;
1858         int err;
1859         int drop_inode = 0;
1860         u64 objectid;
1861         unsigned long nr = 0;
1862
1863         if (!new_valid_dev(rdev))
1864                 return -EINVAL;
1865
1866         mutex_lock(&root->fs_info->fs_mutex);
1867         err = btrfs_check_free_space(root, 1, 0);
1868         if (err)
1869                 goto fail;
1870
1871         trans = btrfs_start_transaction(root, 1);
1872         btrfs_set_trans_block_group(trans, dir);
1873
1874         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1875         if (err) {
1876                 err = -ENOSPC;
1877                 goto out_unlock;
1878         }
1879
1880         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1881                                 dentry->d_name.len,
1882                                 dentry->d_parent->d_inode->i_ino, objectid,
1883                                 BTRFS_I(dir)->block_group, mode);
1884         err = PTR_ERR(inode);
1885         if (IS_ERR(inode))
1886                 goto out_unlock;
1887
1888         btrfs_set_trans_block_group(trans, inode);
1889         err = btrfs_add_nondir(trans, dentry, inode, 0);
1890         if (err)
1891                 drop_inode = 1;
1892         else {
1893                 inode->i_op = &btrfs_special_inode_operations;
1894                 init_special_inode(inode, inode->i_mode, rdev);
1895                 btrfs_update_inode(trans, root, inode);
1896         }
1897         dir->i_sb->s_dirt = 1;
1898         btrfs_update_inode_block_group(trans, inode);
1899         btrfs_update_inode_block_group(trans, dir);
1900 out_unlock:
1901         nr = trans->blocks_used;
1902         btrfs_end_transaction(trans, root);
1903 fail:
1904         mutex_unlock(&root->fs_info->fs_mutex);
1905
1906         if (drop_inode) {
1907                 inode_dec_link_count(inode);
1908                 iput(inode);
1909         }
1910         btrfs_btree_balance_dirty(root, nr);
1911         btrfs_throttle(root);
1912         return err;
1913 }
1914
1915 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1916                         int mode, struct nameidata *nd)
1917 {
1918         struct btrfs_trans_handle *trans;
1919         struct btrfs_root *root = BTRFS_I(dir)->root;
1920         struct inode *inode = NULL;
1921         int err;
1922         int drop_inode = 0;
1923         unsigned long nr = 0;
1924         u64 objectid;
1925
1926         mutex_lock(&root->fs_info->fs_mutex);
1927         err = btrfs_check_free_space(root, 1, 0);
1928         if (err)
1929                 goto fail;
1930         trans = btrfs_start_transaction(root, 1);
1931         btrfs_set_trans_block_group(trans, dir);
1932
1933         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1934         if (err) {
1935                 err = -ENOSPC;
1936                 goto out_unlock;
1937         }
1938
1939         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1940                                 dentry->d_name.len,
1941                                 dentry->d_parent->d_inode->i_ino,
1942                                 objectid, BTRFS_I(dir)->block_group, mode);
1943         err = PTR_ERR(inode);
1944         if (IS_ERR(inode))
1945                 goto out_unlock;
1946
1947         btrfs_set_trans_block_group(trans, inode);
1948         err = btrfs_add_nondir(trans, dentry, inode, 0);
1949         if (err)
1950                 drop_inode = 1;
1951         else {
1952                 inode->i_mapping->a_ops = &btrfs_aops;
1953                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1954                 inode->i_fop = &btrfs_file_operations;
1955                 inode->i_op = &btrfs_file_inode_operations;
1956                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1957                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1958                                      inode->i_mapping, GFP_NOFS);
1959                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1960                                      inode->i_mapping, GFP_NOFS);
1961                 BTRFS_I(inode)->delalloc_bytes = 0;
1962                 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1963                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1964         }
1965         dir->i_sb->s_dirt = 1;
1966         btrfs_update_inode_block_group(trans, inode);
1967         btrfs_update_inode_block_group(trans, dir);
1968 out_unlock:
1969         nr = trans->blocks_used;
1970         btrfs_end_transaction(trans, root);
1971 fail:
1972         mutex_unlock(&root->fs_info->fs_mutex);
1973
1974         if (drop_inode) {
1975                 inode_dec_link_count(inode);
1976                 iput(inode);
1977         }
1978         btrfs_btree_balance_dirty(root, nr);
1979         btrfs_throttle(root);
1980         return err;
1981 }
1982
1983 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1984                       struct dentry *dentry)
1985 {
1986         struct btrfs_trans_handle *trans;
1987         struct btrfs_root *root = BTRFS_I(dir)->root;
1988         struct inode *inode = old_dentry->d_inode;
1989         unsigned long nr = 0;
1990         int err;
1991         int drop_inode = 0;
1992
1993         if (inode->i_nlink == 0)
1994                 return -ENOENT;
1995
1996 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1997         inode->i_nlink++;
1998 #else
1999         inc_nlink(inode);
2000 #endif
2001         mutex_lock(&root->fs_info->fs_mutex);
2002         err = btrfs_check_free_space(root, 1, 0);
2003         if (err)
2004                 goto fail;
2005         trans = btrfs_start_transaction(root, 1);
2006
2007         btrfs_set_trans_block_group(trans, dir);
2008         atomic_inc(&inode->i_count);
2009         err = btrfs_add_nondir(trans, dentry, inode, 1);
2010
2011         if (err)
2012                 drop_inode = 1;
2013
2014         dir->i_sb->s_dirt = 1;
2015         btrfs_update_inode_block_group(trans, dir);
2016         err = btrfs_update_inode(trans, root, inode);
2017
2018         if (err)
2019                 drop_inode = 1;
2020
2021         nr = trans->blocks_used;
2022         btrfs_end_transaction(trans, root);
2023 fail:
2024         mutex_unlock(&root->fs_info->fs_mutex);
2025
2026         if (drop_inode) {
2027                 inode_dec_link_count(inode);
2028                 iput(inode);
2029         }
2030         btrfs_btree_balance_dirty(root, nr);
2031         btrfs_throttle(root);
2032         return err;
2033 }
2034
2035 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2036 {
2037         struct inode *inode;
2038         struct btrfs_trans_handle *trans;
2039         struct btrfs_root *root = BTRFS_I(dir)->root;
2040         int err = 0;
2041         int drop_on_err = 0;
2042         u64 objectid;
2043         unsigned long nr = 1;
2044
2045         mutex_lock(&root->fs_info->fs_mutex);
2046         err = btrfs_check_free_space(root, 1, 0);
2047         if (err)
2048                 goto out_unlock;
2049
2050         trans = btrfs_start_transaction(root, 1);
2051         btrfs_set_trans_block_group(trans, dir);
2052
2053         if (IS_ERR(trans)) {
2054                 err = PTR_ERR(trans);
2055                 goto out_unlock;
2056         }
2057
2058         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2059         if (err) {
2060                 err = -ENOSPC;
2061                 goto out_unlock;
2062         }
2063
2064         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2065                                 dentry->d_name.len,
2066                                 dentry->d_parent->d_inode->i_ino, objectid,
2067                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
2068         if (IS_ERR(inode)) {
2069                 err = PTR_ERR(inode);
2070                 goto out_fail;
2071         }
2072
2073         drop_on_err = 1;
2074         inode->i_op = &btrfs_dir_inode_operations;
2075         inode->i_fop = &btrfs_dir_file_operations;
2076         btrfs_set_trans_block_group(trans, inode);
2077
2078         inode->i_size = 0;
2079         err = btrfs_update_inode(trans, root, inode);
2080         if (err)
2081                 goto out_fail;
2082
2083         err = btrfs_add_link(trans, dentry, inode, 0);
2084         if (err)
2085                 goto out_fail;
2086
2087         d_instantiate(dentry, inode);
2088         drop_on_err = 0;
2089         dir->i_sb->s_dirt = 1;
2090         btrfs_update_inode_block_group(trans, inode);
2091         btrfs_update_inode_block_group(trans, dir);
2092
2093 out_fail:
2094         nr = trans->blocks_used;
2095         btrfs_end_transaction(trans, root);
2096
2097 out_unlock:
2098         mutex_unlock(&root->fs_info->fs_mutex);
2099         if (drop_on_err)
2100                 iput(inode);
2101         btrfs_btree_balance_dirty(root, nr);
2102         btrfs_throttle(root);
2103         return err;
2104 }
2105
2106 static int merge_extent_mapping(struct extent_map_tree *em_tree,
2107                                 struct extent_map *existing,
2108                                 struct extent_map *em)
2109 {
2110         u64 start_diff;
2111         u64 new_end;
2112         int ret = 0;
2113         int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2114
2115         if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2116                 goto invalid;
2117
2118         if (!real_blocks && em->block_start != existing->block_start)
2119                 goto invalid;
2120
2121         new_end = max(existing->start + existing->len, em->start + em->len);
2122
2123         if (existing->start >= em->start) {
2124                 if (em->start + em->len < existing->start)
2125                         goto invalid;
2126
2127                 start_diff = existing->start - em->start;
2128                 if (real_blocks && em->block_start + start_diff !=
2129                     existing->block_start)
2130                         goto invalid;
2131
2132                 em->len = new_end - em->start;
2133
2134                 remove_extent_mapping(em_tree, existing);
2135                 /* free for the tree */
2136                 free_extent_map(existing);
2137                 ret = add_extent_mapping(em_tree, em);
2138
2139         } else if (em->start > existing->start) {
2140
2141                 if (existing->start + existing->len < em->start)
2142                         goto invalid;
2143
2144                 start_diff = em->start - existing->start;
2145                 if (real_blocks && existing->block_start + start_diff !=
2146                     em->block_start)
2147                         goto invalid;
2148
2149                 remove_extent_mapping(em_tree, existing);
2150                 em->block_start = existing->block_start;
2151                 em->start = existing->start;
2152                 em->len = new_end - existing->start;
2153                 free_extent_map(existing);
2154
2155                 ret = add_extent_mapping(em_tree, em);
2156         } else {
2157                 goto invalid;
2158         }
2159         return ret;
2160
2161 invalid:
2162         printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2163                existing->start, existing->len, existing->block_start,
2164                em->start, em->len, em->block_start);
2165         return -EIO;
2166 }
2167
2168 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2169                                     size_t pg_offset, u64 start, u64 len,
2170                                     int create)
2171 {
2172         int ret;
2173         int err = 0;
2174         u64 bytenr;
2175         u64 extent_start = 0;
2176         u64 extent_end = 0;
2177         u64 objectid = inode->i_ino;
2178         u32 found_type;
2179         struct btrfs_path *path;
2180         struct btrfs_root *root = BTRFS_I(inode)->root;
2181         struct btrfs_file_extent_item *item;
2182         struct extent_buffer *leaf;
2183         struct btrfs_key found_key;
2184         struct extent_map *em = NULL;
2185         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2186         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2187         struct btrfs_trans_handle *trans = NULL;
2188
2189         path = btrfs_alloc_path();
2190         BUG_ON(!path);
2191         mutex_lock(&root->fs_info->fs_mutex);
2192
2193 again:
2194         spin_lock(&em_tree->lock);
2195         em = lookup_extent_mapping(em_tree, start, len);
2196         spin_unlock(&em_tree->lock);
2197
2198         if (em) {
2199                 if (em->start > start || em->start + em->len <= start)
2200                         free_extent_map(em);
2201                 else if (em->block_start == EXTENT_MAP_INLINE && page)
2202                         free_extent_map(em);
2203                 else
2204                         goto out;
2205         }
2206         em = alloc_extent_map(GFP_NOFS);
2207         if (!em) {
2208                 err = -ENOMEM;
2209                 goto out;
2210         }
2211
2212         em->start = EXTENT_MAP_HOLE;
2213         em->len = (u64)-1;
2214         em->bdev = inode->i_sb->s_bdev;
2215         ret = btrfs_lookup_file_extent(trans, root, path,
2216                                        objectid, start, trans != NULL);
2217         if (ret < 0) {
2218                 err = ret;
2219                 goto out;
2220         }
2221
2222         if (ret != 0) {
2223                 if (path->slots[0] == 0)
2224                         goto not_found;
2225                 path->slots[0]--;
2226         }
2227
2228         leaf = path->nodes[0];
2229         item = btrfs_item_ptr(leaf, path->slots[0],
2230                               struct btrfs_file_extent_item);
2231         /* are we inside the extent that was found? */
2232         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2233         found_type = btrfs_key_type(&found_key);
2234         if (found_key.objectid != objectid ||
2235             found_type != BTRFS_EXTENT_DATA_KEY) {
2236                 goto not_found;
2237         }
2238
2239         found_type = btrfs_file_extent_type(leaf, item);
2240         extent_start = found_key.offset;
2241         if (found_type == BTRFS_FILE_EXTENT_REG) {
2242                 extent_end = extent_start +
2243                        btrfs_file_extent_num_bytes(leaf, item);
2244                 err = 0;
2245                 if (start < extent_start || start >= extent_end) {
2246                         em->start = start;
2247                         if (start < extent_start) {
2248                                 if (start + len <= extent_start)
2249                                         goto not_found;
2250                                 em->len = extent_end - extent_start;
2251                         } else {
2252                                 em->len = len;
2253                         }
2254                         goto not_found_em;
2255                 }
2256                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2257                 if (bytenr == 0) {
2258                         em->start = extent_start;
2259                         em->len = extent_end - extent_start;
2260                         em->block_start = EXTENT_MAP_HOLE;
2261                         goto insert;
2262                 }
2263                 bytenr += btrfs_file_extent_offset(leaf, item);
2264                 em->block_start = bytenr;
2265                 em->start = extent_start;
2266                 em->len = extent_end - extent_start;
2267                 goto insert;
2268         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2269                 u64 page_start;
2270                 unsigned long ptr;
2271                 char *map;
2272                 size_t size;
2273                 size_t extent_offset;
2274                 size_t copy_size;
2275
2276                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2277                                                     path->slots[0]));
2278                 extent_end = (extent_start + size + root->sectorsize - 1) &
2279                         ~((u64)root->sectorsize - 1);
2280                 if (start < extent_start || start >= extent_end) {
2281                         em->start = start;
2282                         if (start < extent_start) {
2283                                 if (start + len <= extent_start)
2284                                         goto not_found;
2285                                 em->len = extent_end - extent_start;
2286                         } else {
2287                                 em->len = len;
2288                         }
2289                         goto not_found_em;
2290                 }
2291                 em->block_start = EXTENT_MAP_INLINE;
2292
2293                 if (!page) {
2294                         em->start = extent_start;
2295                         em->len = size;
2296                         goto out;
2297                 }
2298
2299                 page_start = page_offset(page) + pg_offset;
2300                 extent_offset = page_start - extent_start;
2301                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2302                                 size - extent_offset);
2303                 em->start = extent_start + extent_offset;
2304                 em->len = (copy_size + root->sectorsize - 1) &
2305                         ~((u64)root->sectorsize - 1);
2306                 map = kmap(page);
2307                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2308                 if (create == 0 && !PageUptodate(page)) {
2309                         read_extent_buffer(leaf, map + pg_offset, ptr,
2310                                            copy_size);
2311                         flush_dcache_page(page);
2312                 } else if (create && PageUptodate(page)) {
2313                         if (!trans) {
2314                                 kunmap(page);
2315                                 free_extent_map(em);
2316                                 em = NULL;
2317                                 btrfs_release_path(root, path);
2318                                 trans = btrfs_start_transaction(root, 1);
2319                                 goto again;
2320                         }
2321                         write_extent_buffer(leaf, map + pg_offset, ptr,
2322                                             copy_size);
2323                         btrfs_mark_buffer_dirty(leaf);
2324                 }
2325                 kunmap(page);
2326                 set_extent_uptodate(io_tree, em->start,
2327                                     extent_map_end(em) - 1, GFP_NOFS);
2328                 goto insert;
2329         } else {
2330                 printk("unkknown found_type %d\n", found_type);
2331                 WARN_ON(1);
2332         }
2333 not_found:
2334         em->start = start;
2335         em->len = len;
2336 not_found_em:
2337         em->block_start = EXTENT_MAP_HOLE;
2338 insert:
2339         btrfs_release_path(root, path);
2340         if (em->start > start || extent_map_end(em) <= start) {
2341                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2342                 err = -EIO;
2343                 goto out;
2344         }
2345
2346         err = 0;
2347         spin_lock(&em_tree->lock);
2348         ret = add_extent_mapping(em_tree, em);
2349         /* it is possible that someone inserted the extent into the tree
2350          * while we had the lock dropped.  It is also possible that
2351          * an overlapping map exists in the tree
2352          */
2353         if (ret == -EEXIST) {
2354                 struct extent_map *existing;
2355                 existing = lookup_extent_mapping(em_tree, start, len);
2356                 if (existing && (existing->start > start ||
2357                     existing->start + existing->len <= start)) {
2358                         free_extent_map(existing);
2359                         existing = NULL;
2360                 }
2361                 if (!existing) {
2362                         existing = lookup_extent_mapping(em_tree, em->start,
2363                                                          em->len);
2364                         if (existing) {
2365                                 err = merge_extent_mapping(em_tree, existing,
2366                                                            em);
2367                                 free_extent_map(existing);
2368                                 if (err) {
2369                                         free_extent_map(em);
2370                                         em = NULL;
2371                                 }
2372                         } else {
2373                                 err = -EIO;
2374                                 printk("failing to insert %Lu %Lu\n",
2375                                        start, len);
2376                                 free_extent_map(em);
2377                                 em = NULL;
2378                         }
2379                 } else {
2380                         free_extent_map(em);
2381                         em = existing;
2382                 }
2383         }
2384         spin_unlock(&em_tree->lock);
2385 out:
2386         btrfs_free_path(path);
2387         if (trans) {
2388                 ret = btrfs_end_transaction(trans, root);
2389                 if (!err)
2390                         err = ret;
2391         }
2392         mutex_unlock(&root->fs_info->fs_mutex);
2393         if (err) {
2394                 free_extent_map(em);
2395                 WARN_ON(1);
2396                 return ERR_PTR(err);
2397         }
2398         return em;
2399 }
2400
2401 #if 0 /* waiting for O_DIRECT reads */
2402 static int btrfs_get_block(struct inode *inode, sector_t iblock,
2403                         struct buffer_head *bh_result, int create)
2404 {
2405         struct extent_map *em;
2406         u64 start = (u64)iblock << inode->i_blkbits;
2407         struct btrfs_multi_bio *multi = NULL;
2408         struct btrfs_root *root = BTRFS_I(inode)->root;
2409         u64 len;
2410         u64 logical;
2411         u64 map_length;
2412         int ret = 0;
2413
2414         em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
2415
2416         if (!em || IS_ERR(em))
2417                 goto out;
2418
2419         if (em->start > start || em->start + em->len <= start) {
2420             goto out;
2421         }
2422
2423         if (em->block_start == EXTENT_MAP_INLINE) {
2424                 ret = -EINVAL;
2425                 goto out;
2426         }
2427
2428         len = em->start + em->len - start;
2429         len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
2430
2431         if (em->block_start == EXTENT_MAP_HOLE ||
2432             em->block_start == EXTENT_MAP_DELALLOC) {
2433                 bh_result->b_size = len;
2434                 goto out;
2435         }
2436
2437         logical = start - em->start;
2438         logical = em->block_start + logical;
2439
2440         map_length = len;
2441         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2442                               logical, &map_length, &multi, 0);
2443         BUG_ON(ret);
2444         bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
2445         bh_result->b_size = min(map_length, len);
2446
2447         bh_result->b_bdev = multi->stripes[0].dev->bdev;
2448         set_buffer_mapped(bh_result);
2449         kfree(multi);
2450 out:
2451         free_extent_map(em);
2452         return ret;
2453 }
2454 #endif
2455
2456 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
2457                         const struct iovec *iov, loff_t offset,
2458                         unsigned long nr_segs)
2459 {
2460         return -EINVAL;
2461 #if 0
2462         struct file *file = iocb->ki_filp;
2463         struct inode *inode = file->f_mapping->host;
2464
2465         if (rw == WRITE)
2466                 return -EINVAL;
2467
2468         return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2469                                   offset, nr_segs, btrfs_get_block, NULL);
2470 #endif
2471 }
2472
2473 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2474 {
2475         return extent_bmap(mapping, iblock, btrfs_get_extent);
2476 }
2477
2478 int btrfs_readpage(struct file *file, struct page *page)
2479 {
2480         struct extent_io_tree *tree;
2481         tree = &BTRFS_I(page->mapping->host)->io_tree;
2482         return extent_read_full_page(tree, page, btrfs_get_extent);
2483 }
2484
2485 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2486 {
2487         struct extent_io_tree *tree;
2488
2489
2490         if (current->flags & PF_MEMALLOC) {
2491                 redirty_page_for_writepage(wbc, page);
2492                 unlock_page(page);
2493                 return 0;
2494         }
2495         tree = &BTRFS_I(page->mapping->host)->io_tree;
2496         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2497 }
2498
2499 static int btrfs_writepages(struct address_space *mapping,
2500                             struct writeback_control *wbc)
2501 {
2502         struct extent_io_tree *tree;
2503         tree = &BTRFS_I(mapping->host)->io_tree;
2504         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2505 }
2506
2507 static int
2508 btrfs_readpages(struct file *file, struct address_space *mapping,
2509                 struct list_head *pages, unsigned nr_pages)
2510 {
2511         struct extent_io_tree *tree;
2512         tree = &BTRFS_I(mapping->host)->io_tree;
2513         return extent_readpages(tree, mapping, pages, nr_pages,
2514                                 btrfs_get_extent);
2515 }
2516
2517 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2518 {
2519         struct extent_io_tree *tree;
2520         struct extent_map_tree *map;
2521         int ret;
2522
2523         tree = &BTRFS_I(page->mapping->host)->io_tree;
2524         map = &BTRFS_I(page->mapping->host)->extent_tree;
2525         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2526         if (ret == 1) {
2527                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2528                 ClearPagePrivate(page);
2529                 set_page_private(page, 0);
2530                 page_cache_release(page);
2531         }
2532         return ret;
2533 }
2534
2535 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2536 {
2537         struct extent_io_tree *tree;
2538
2539         tree = &BTRFS_I(page->mapping->host)->io_tree;
2540         extent_invalidatepage(tree, page, offset);
2541         btrfs_releasepage(page, GFP_NOFS);
2542         if (PagePrivate(page)) {
2543                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2544                 ClearPagePrivate(page);
2545                 set_page_private(page, 0);
2546                 page_cache_release(page);
2547         }
2548 }
2549
2550 /*
2551  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2552  * called from a page fault handler when a page is first dirtied. Hence we must
2553  * be careful to check for EOF conditions here. We set the page up correctly
2554  * for a written page which means we get ENOSPC checking when writing into
2555  * holes and correct delalloc and unwritten extent mapping on filesystems that
2556  * support these features.
2557  *
2558  * We are not allowed to take the i_mutex here so we have to play games to
2559  * protect against truncate races as the page could now be beyond EOF.  Because
2560  * vmtruncate() writes the inode size before removing pages, once we have the
2561  * page lock we can determine safely if the page is beyond EOF. If it is not
2562  * beyond EOF, then the page is guaranteed safe against truncation until we
2563  * unlock the page.
2564  */
2565 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2566 {
2567         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2568         struct btrfs_root *root = BTRFS_I(inode)->root;
2569         unsigned long end;
2570         loff_t size;
2571         int ret;
2572         u64 page_start;
2573
2574         mutex_lock(&root->fs_info->fs_mutex);
2575         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2576         mutex_unlock(&root->fs_info->fs_mutex);
2577         if (ret)
2578                 goto out;
2579
2580         ret = -EINVAL;
2581
2582         lock_page(page);
2583         wait_on_page_writeback(page);
2584         size = i_size_read(inode);
2585         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2586
2587         if ((page->mapping != inode->i_mapping) ||
2588             (page_start > size)) {
2589                 /* page got truncated out from underneath us */
2590                 goto out_unlock;
2591         }
2592
2593         /* page is wholly or partially inside EOF */
2594         if (page_start + PAGE_CACHE_SIZE > size)
2595                 end = size & ~PAGE_CACHE_MASK;
2596         else
2597                 end = PAGE_CACHE_SIZE;
2598
2599         ret = btrfs_cow_one_page(inode, page, end);
2600
2601 out_unlock:
2602         unlock_page(page);
2603 out:
2604         return ret;
2605 }
2606
2607 static void btrfs_truncate(struct inode *inode)
2608 {
2609         struct btrfs_root *root = BTRFS_I(inode)->root;
2610         int ret;
2611         struct btrfs_trans_handle *trans;
2612         unsigned long nr;
2613
2614         if (!S_ISREG(inode->i_mode))
2615                 return;
2616         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2617                 return;
2618
2619         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2620
2621         mutex_lock(&root->fs_info->fs_mutex);
2622         trans = btrfs_start_transaction(root, 1);
2623         btrfs_set_trans_block_group(trans, inode);
2624
2625         /* FIXME, add redo link to tree so we don't leak on crash */
2626         ret = btrfs_truncate_in_trans(trans, root, inode,
2627                                       BTRFS_EXTENT_DATA_KEY);
2628         btrfs_update_inode(trans, root, inode);
2629         nr = trans->blocks_used;
2630
2631         ret = btrfs_end_transaction(trans, root);
2632         BUG_ON(ret);
2633         mutex_unlock(&root->fs_info->fs_mutex);
2634         btrfs_btree_balance_dirty(root, nr);
2635         btrfs_throttle(root);
2636 }
2637
2638 static int noinline create_subvol(struct btrfs_root *root, char *name,
2639                                   int namelen)
2640 {
2641         struct btrfs_trans_handle *trans;
2642         struct btrfs_key key;
2643         struct btrfs_root_item root_item;
2644         struct btrfs_inode_item *inode_item;
2645         struct extent_buffer *leaf;
2646         struct btrfs_root *new_root = root;
2647         struct inode *inode;
2648         struct inode *dir;
2649         int ret;
2650         int err;
2651         u64 objectid;
2652         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2653         unsigned long nr = 1;
2654
2655         mutex_lock(&root->fs_info->fs_mutex);
2656         ret = btrfs_check_free_space(root, 1, 0);
2657         if (ret)
2658                 goto fail_commit;
2659
2660         trans = btrfs_start_transaction(root, 1);
2661         BUG_ON(!trans);
2662
2663         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2664                                        0, &objectid);
2665         if (ret)
2666                 goto fail;
2667
2668         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2669                                         objectid, trans->transid, 0, 0,
2670                                         0, 0);
2671         if (IS_ERR(leaf))
2672                 return PTR_ERR(leaf);
2673
2674         btrfs_set_header_nritems(leaf, 0);
2675         btrfs_set_header_level(leaf, 0);
2676         btrfs_set_header_bytenr(leaf, leaf->start);
2677         btrfs_set_header_generation(leaf, trans->transid);
2678         btrfs_set_header_owner(leaf, objectid);
2679
2680         write_extent_buffer(leaf, root->fs_info->fsid,
2681                             (unsigned long)btrfs_header_fsid(leaf),
2682                             BTRFS_FSID_SIZE);
2683         btrfs_mark_buffer_dirty(leaf);
2684
2685         inode_item = &root_item.inode;
2686         memset(inode_item, 0, sizeof(*inode_item));
2687         inode_item->generation = cpu_to_le64(1);
2688         inode_item->size = cpu_to_le64(3);
2689         inode_item->nlink = cpu_to_le32(1);
2690         inode_item->nblocks = cpu_to_le64(1);
2691         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2692
2693         btrfs_set_root_bytenr(&root_item, leaf->start);
2694         btrfs_set_root_level(&root_item, 0);
2695         btrfs_set_root_refs(&root_item, 1);
2696         btrfs_set_root_used(&root_item, 0);
2697
2698         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2699         root_item.drop_level = 0;
2700
2701         free_extent_buffer(leaf);
2702         leaf = NULL;
2703
2704         btrfs_set_root_dirid(&root_item, new_dirid);
2705
2706         key.objectid = objectid;
2707         key.offset = 1;
2708         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2709         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2710                                 &root_item);
2711         if (ret)
2712                 goto fail;
2713
2714         /*
2715          * insert the directory item
2716          */
2717         key.offset = (u64)-1;
2718         dir = root->fs_info->sb->s_root->d_inode;
2719         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2720                                     name, namelen, dir->i_ino, &key,
2721                                     BTRFS_FT_DIR);
2722         if (ret)
2723                 goto fail;
2724
2725         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2726                              name, namelen, objectid,
2727                              root->fs_info->sb->s_root->d_inode->i_ino);
2728         if (ret)
2729                 goto fail;
2730
2731         ret = btrfs_commit_transaction(trans, root);
2732         if (ret)
2733                 goto fail_commit;
2734
2735         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2736         BUG_ON(!new_root);
2737
2738         trans = btrfs_start_transaction(new_root, 1);
2739         BUG_ON(!trans);
2740
2741         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2742                                 new_dirid,
2743                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2744         if (IS_ERR(inode))
2745                 goto fail;
2746         inode->i_op = &btrfs_dir_inode_operations;
2747         inode->i_fop = &btrfs_dir_file_operations;
2748         new_root->inode = inode;
2749
2750         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2751                                      new_dirid);
2752         inode->i_nlink = 1;
2753         inode->i_size = 0;
2754         ret = btrfs_update_inode(trans, new_root, inode);
2755         if (ret)
2756                 goto fail;
2757 fail:
2758         nr = trans->blocks_used;
2759         err = btrfs_commit_transaction(trans, new_root);
2760         if (err && !ret)
2761                 ret = err;
2762 fail_commit:
2763         mutex_unlock(&root->fs_info->fs_mutex);
2764         btrfs_btree_balance_dirty(root, nr);
2765         btrfs_throttle(root);
2766         return ret;
2767 }
2768
2769 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2770 {
2771         struct btrfs_pending_snapshot *pending_snapshot;
2772         struct btrfs_trans_handle *trans;
2773         int ret;
2774         int err;
2775         unsigned long nr = 0;
2776
2777         if (!root->ref_cows)
2778                 return -EINVAL;
2779
2780         mutex_lock(&root->fs_info->fs_mutex);
2781         ret = btrfs_check_free_space(root, 1, 0);
2782         if (ret)
2783                 goto fail_unlock;
2784
2785         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2786         if (!pending_snapshot) {
2787                 ret = -ENOMEM;
2788                 goto fail_unlock;
2789         }
2790         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2791         if (!pending_snapshot->name) {
2792                 ret = -ENOMEM;
2793                 kfree(pending_snapshot);
2794                 goto fail_unlock;
2795         }
2796         memcpy(pending_snapshot->name, name, namelen);
2797         pending_snapshot->name[namelen] = '\0';
2798         trans = btrfs_start_transaction(root, 1);
2799         BUG_ON(!trans);
2800         pending_snapshot->root = root;
2801         list_add(&pending_snapshot->list,
2802                  &trans->transaction->pending_snapshots);
2803         ret = btrfs_update_inode(trans, root, root->inode);
2804         err = btrfs_commit_transaction(trans, root);
2805
2806 fail_unlock:
2807         mutex_unlock(&root->fs_info->fs_mutex);
2808         btrfs_btree_balance_dirty(root, nr);
2809         btrfs_throttle(root);
2810         return ret;
2811 }
2812
2813 unsigned long btrfs_force_ra(struct address_space *mapping,
2814                               struct file_ra_state *ra, struct file *file,
2815                               pgoff_t offset, pgoff_t last_index)
2816 {
2817         pgoff_t req_size = last_index - offset + 1;
2818
2819 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2820         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2821         return offset;
2822 #else
2823         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2824         return offset + req_size;
2825 #endif
2826 }
2827
2828 int btrfs_defrag_file(struct file *file) {
2829         struct inode *inode = fdentry(file)->d_inode;
2830         struct btrfs_root *root = BTRFS_I(inode)->root;
2831         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2832         struct page *page;
2833         unsigned long last_index;
2834         unsigned long ra_pages = root->fs_info->bdi.ra_pages;
2835         unsigned long total_read = 0;
2836         u64 page_start;
2837         u64 page_end;
2838         unsigned long i;
2839         int ret;
2840
2841         mutex_lock(&root->fs_info->fs_mutex);
2842         ret = btrfs_check_free_space(root, inode->i_size, 0);
2843         mutex_unlock(&root->fs_info->fs_mutex);
2844         if (ret)
2845                 return -ENOSPC;
2846
2847         mutex_lock(&inode->i_mutex);
2848         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2849         for (i = 0; i <= last_index; i++) {
2850                 if (total_read % ra_pages == 0) {
2851                         btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
2852                                        min(last_index, i + ra_pages - 1));
2853                 }
2854                 total_read++;
2855                 page = grab_cache_page(inode->i_mapping, i);
2856                 if (!page)
2857                         goto out_unlock;
2858                 if (!PageUptodate(page)) {
2859                         btrfs_readpage(NULL, page);
2860                         lock_page(page);
2861                         if (!PageUptodate(page)) {
2862                                 unlock_page(page);
2863                                 page_cache_release(page);
2864                                 goto out_unlock;
2865                         }
2866                 }
2867
2868 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2869                 ClearPageDirty(page);
2870 #else
2871                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2872 #endif
2873                 wait_on_page_writeback(page);
2874                 set_page_extent_mapped(page);
2875
2876                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2877                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2878
2879                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2880                 set_extent_delalloc(io_tree, page_start,
2881                                     page_end, GFP_NOFS);
2882
2883                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2884                 set_page_dirty(page);
2885                 unlock_page(page);
2886                 page_cache_release(page);
2887                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2888         }
2889
2890 out_unlock:
2891         mutex_unlock(&inode->i_mutex);
2892         return 0;
2893 }
2894
2895 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2896 {
2897         u64 new_size;
2898         u64 old_size;
2899         u64 devid = 1;
2900         struct btrfs_ioctl_vol_args *vol_args;
2901         struct btrfs_trans_handle *trans;
2902         struct btrfs_device *device = NULL;
2903         char *sizestr;
2904         char *devstr = NULL;
2905         int ret = 0;
2906         int namelen;
2907         int mod = 0;
2908
2909         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2910
2911         if (!vol_args)
2912                 return -ENOMEM;
2913
2914         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2915                 ret = -EFAULT;
2916                 goto out;
2917         }
2918         namelen = strlen(vol_args->name);
2919         if (namelen > BTRFS_VOL_NAME_MAX) {
2920                 ret = -EINVAL;
2921                 goto out;
2922         }
2923
2924         mutex_lock(&root->fs_info->fs_mutex);
2925         sizestr = vol_args->name;
2926         devstr = strchr(sizestr, ':');
2927         if (devstr) {
2928                 char *end;
2929                 sizestr = devstr + 1;
2930                 *devstr = '\0';
2931                 devstr = vol_args->name;
2932                 devid = simple_strtoull(devstr, &end, 10);
2933 printk("resizing devid %Lu\n", devid);
2934         }
2935         device = btrfs_find_device(root, devid, NULL);
2936         if (!device) {
2937                 printk("resizer unable to find device %Lu\n", devid);
2938                 ret = -EINVAL;
2939                 goto out_unlock;
2940         }
2941         if (!strcmp(sizestr, "max"))
2942                 new_size = device->bdev->bd_inode->i_size;
2943         else {
2944                 if (sizestr[0] == '-') {
2945                         mod = -1;
2946                         sizestr++;
2947                 } else if (sizestr[0] == '+') {
2948                         mod = 1;
2949                         sizestr++;
2950                 }
2951                 new_size = btrfs_parse_size(sizestr);
2952                 if (new_size == 0) {
2953                         ret = -EINVAL;
2954                         goto out_unlock;
2955                 }
2956         }
2957
2958         old_size = device->total_bytes;
2959
2960         if (mod < 0) {
2961                 if (new_size > old_size) {
2962                         ret = -EINVAL;
2963                         goto out_unlock;
2964                 }
2965                 new_size = old_size - new_size;
2966         } else if (mod > 0) {
2967                 new_size = old_size + new_size;
2968         }
2969
2970         if (new_size < 256 * 1024 * 1024) {
2971                 ret = -EINVAL;
2972                 goto out_unlock;
2973         }
2974         if (new_size > device->bdev->bd_inode->i_size) {
2975                 ret = -EFBIG;
2976                 goto out_unlock;
2977         }
2978
2979         do_div(new_size, root->sectorsize);
2980         new_size *= root->sectorsize;
2981
2982 printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size);
2983
2984         if (new_size > old_size) {
2985                 trans = btrfs_start_transaction(root, 1);
2986                 ret = btrfs_grow_device(trans, device, new_size);
2987                 btrfs_commit_transaction(trans, root);
2988         } else {
2989                 ret = btrfs_shrink_device(device, new_size);
2990         }
2991
2992 out_unlock:
2993         mutex_unlock(&root->fs_info->fs_mutex);
2994 out:
2995         kfree(vol_args);
2996         return ret;
2997 }
2998
2999 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
3000                                             void __user *arg)
3001 {
3002         struct btrfs_ioctl_vol_args *vol_args;
3003         struct btrfs_dir_item *di;
3004         struct btrfs_path *path;
3005         u64 root_dirid;
3006         int namelen;
3007         int ret;
3008
3009         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3010
3011         if (!vol_args)
3012                 return -ENOMEM;
3013
3014         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3015                 ret = -EFAULT;
3016                 goto out;
3017         }
3018
3019         namelen = strlen(vol_args->name);
3020         if (namelen > BTRFS_VOL_NAME_MAX) {
3021                 ret = -EINVAL;
3022                 goto out;
3023         }
3024         if (strchr(vol_args->name, '/')) {
3025                 ret = -EINVAL;
3026                 goto out;
3027         }
3028
3029         path = btrfs_alloc_path();
3030         if (!path) {
3031                 ret = -ENOMEM;
3032                 goto out;
3033         }
3034
3035         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
3036         mutex_lock(&root->fs_info->fs_mutex);
3037         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
3038                             path, root_dirid,
3039                             vol_args->name, namelen, 0);
3040         mutex_unlock(&root->fs_info->fs_mutex);
3041         btrfs_free_path(path);
3042
3043         if (di && !IS_ERR(di)) {
3044                 ret = -EEXIST;
3045                 goto out;
3046         }
3047
3048         if (IS_ERR(di)) {
3049                 ret = PTR_ERR(di);
3050                 goto out;
3051         }
3052
3053         if (root == root->fs_info->tree_root)
3054                 ret = create_subvol(root, vol_args->name, namelen);
3055         else
3056                 ret = create_snapshot(root, vol_args->name, namelen);
3057 out:
3058         kfree(vol_args);
3059         return ret;
3060 }
3061
3062 static int btrfs_ioctl_defrag(struct file *file)
3063 {
3064         struct inode *inode = fdentry(file)->d_inode;
3065         struct btrfs_root *root = BTRFS_I(inode)->root;
3066
3067         switch (inode->i_mode & S_IFMT) {
3068         case S_IFDIR:
3069                 mutex_lock(&root->fs_info->fs_mutex);
3070                 btrfs_defrag_root(root, 0);
3071                 btrfs_defrag_root(root->fs_info->extent_root, 0);
3072                 mutex_unlock(&root->fs_info->fs_mutex);
3073                 break;
3074         case S_IFREG:
3075                 btrfs_defrag_file(file);
3076                 break;
3077         }
3078
3079         return 0;
3080 }
3081
3082 long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
3083 {
3084         struct btrfs_ioctl_vol_args *vol_args;
3085         int ret;
3086
3087         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3088
3089         if (!vol_args)
3090                 return -ENOMEM;
3091
3092         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3093                 ret = -EFAULT;
3094                 goto out;
3095         }
3096         ret = btrfs_init_new_device(root, vol_args->name);
3097
3098 out:
3099         kfree(vol_args);
3100         return ret;
3101 }
3102
3103 long btrfs_ioctl(struct file *file, unsigned int
3104                 cmd, unsigned long arg)
3105 {
3106         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3107
3108         switch (cmd) {
3109         case BTRFS_IOC_SNAP_CREATE:
3110                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
3111         case BTRFS_IOC_DEFRAG:
3112                 return btrfs_ioctl_defrag(file);
3113         case BTRFS_IOC_RESIZE:
3114                 return btrfs_ioctl_resize(root, (void __user *)arg);
3115         case BTRFS_IOC_ADD_DEV:
3116                 return btrfs_ioctl_add_dev(root, (void __user *)arg);
3117         case BTRFS_IOC_BALANCE:
3118                 return btrfs_balance(root->fs_info->dev_root);
3119         }
3120
3121         return -ENOTTY;
3122 }
3123
3124 /*
3125  * Called inside transaction, so use GFP_NOFS
3126  */
3127 struct inode *btrfs_alloc_inode(struct super_block *sb)
3128 {
3129         struct btrfs_inode *ei;
3130
3131         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
3132         if (!ei)
3133                 return NULL;
3134         ei->last_trans = 0;
3135         ei->ordered_trans = 0;
3136         return &ei->vfs_inode;
3137 }
3138
3139 void btrfs_destroy_inode(struct inode *inode)
3140 {
3141         WARN_ON(!list_empty(&inode->i_dentry));
3142         WARN_ON(inode->i_data.nrpages);
3143
3144         btrfs_drop_extent_cache(inode, 0, (u64)-1);
3145         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
3146 }
3147
3148 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3149 static void init_once(struct kmem_cache * cachep, void *foo)
3150 #else
3151 static void init_once(void * foo, struct kmem_cache * cachep,
3152                       unsigned long flags)
3153 #endif
3154 {
3155         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
3156
3157         inode_init_once(&ei->vfs_inode);
3158 }
3159
3160 void btrfs_destroy_cachep(void)
3161 {
3162         if (btrfs_inode_cachep)
3163                 kmem_cache_destroy(btrfs_inode_cachep);
3164         if (btrfs_trans_handle_cachep)
3165                 kmem_cache_destroy(btrfs_trans_handle_cachep);
3166         if (btrfs_transaction_cachep)
3167                 kmem_cache_destroy(btrfs_transaction_cachep);
3168         if (btrfs_bit_radix_cachep)
3169                 kmem_cache_destroy(btrfs_bit_radix_cachep);
3170         if (btrfs_path_cachep)
3171                 kmem_cache_destroy(btrfs_path_cachep);
3172 }
3173
3174 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
3175                                        unsigned long extra_flags,
3176 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3177                                        void (*ctor)(struct kmem_cache *, void *)
3178 #else
3179                                        void (*ctor)(void *, struct kmem_cache *,
3180                                                     unsigned long)
3181 #endif
3182                                      )
3183 {
3184         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
3185                                  SLAB_MEM_SPREAD | extra_flags), ctor
3186 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
3187                                  ,NULL
3188 #endif
3189                                 );
3190 }
3191
3192 int btrfs_init_cachep(void)
3193 {
3194         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
3195                                           sizeof(struct btrfs_inode),
3196                                           0, init_once);
3197         if (!btrfs_inode_cachep)
3198                 goto fail;
3199         btrfs_trans_handle_cachep =
3200                         btrfs_cache_create("btrfs_trans_handle_cache",
3201                                            sizeof(struct btrfs_trans_handle),
3202                                            0, NULL);
3203         if (!btrfs_trans_handle_cachep)
3204                 goto fail;
3205         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3206                                              sizeof(struct btrfs_transaction),
3207                                              0, NULL);
3208         if (!btrfs_transaction_cachep)
3209                 goto fail;
3210         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3211                                          sizeof(struct btrfs_path),
3212                                          0, NULL);
3213         if (!btrfs_path_cachep)
3214                 goto fail;
3215         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3216                                               SLAB_DESTROY_BY_RCU, NULL);
3217         if (!btrfs_bit_radix_cachep)
3218                 goto fail;
3219         return 0;
3220 fail:
3221         btrfs_destroy_cachep();
3222         return -ENOMEM;
3223 }
3224
3225 static int btrfs_getattr(struct vfsmount *mnt,
3226                          struct dentry *dentry, struct kstat *stat)
3227 {
3228         struct inode *inode = dentry->d_inode;
3229         generic_fillattr(inode, stat);
3230         stat->blksize = PAGE_CACHE_SIZE;
3231         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
3232         return 0;
3233 }
3234
3235 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3236                            struct inode * new_dir,struct dentry *new_dentry)
3237 {
3238         struct btrfs_trans_handle *trans;
3239         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3240         struct inode *new_inode = new_dentry->d_inode;
3241         struct inode *old_inode = old_dentry->d_inode;
3242         struct timespec ctime = CURRENT_TIME;
3243         struct btrfs_path *path;
3244         int ret;
3245
3246         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3247             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3248                 return -ENOTEMPTY;
3249         }
3250
3251         mutex_lock(&root->fs_info->fs_mutex);
3252         ret = btrfs_check_free_space(root, 1, 0);
3253         if (ret)
3254                 goto out_unlock;
3255
3256         trans = btrfs_start_transaction(root, 1);
3257
3258         btrfs_set_trans_block_group(trans, new_dir);
3259         path = btrfs_alloc_path();
3260         if (!path) {
3261                 ret = -ENOMEM;
3262                 goto out_fail;
3263         }
3264
3265         old_dentry->d_inode->i_nlink++;
3266         old_dir->i_ctime = old_dir->i_mtime = ctime;
3267         new_dir->i_ctime = new_dir->i_mtime = ctime;
3268         old_inode->i_ctime = ctime;
3269
3270         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
3271         if (ret)
3272                 goto out_fail;
3273
3274         if (new_inode) {
3275                 new_inode->i_ctime = CURRENT_TIME;
3276                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
3277                 if (ret)
3278                         goto out_fail;
3279         }
3280         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
3281         if (ret)
3282                 goto out_fail;
3283
3284 out_fail:
3285         btrfs_free_path(path);
3286         btrfs_end_transaction(trans, root);
3287 out_unlock:
3288         mutex_unlock(&root->fs_info->fs_mutex);
3289         return ret;
3290 }
3291
3292 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3293                          const char *symname)
3294 {
3295         struct btrfs_trans_handle *trans;
3296         struct btrfs_root *root = BTRFS_I(dir)->root;
3297         struct btrfs_path *path;
3298         struct btrfs_key key;
3299         struct inode *inode = NULL;
3300         int err;
3301         int drop_inode = 0;
3302         u64 objectid;
3303         int name_len;
3304         int datasize;
3305         unsigned long ptr;
3306         struct btrfs_file_extent_item *ei;
3307         struct extent_buffer *leaf;
3308         unsigned long nr = 0;
3309
3310         name_len = strlen(symname) + 1;
3311         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3312                 return -ENAMETOOLONG;
3313
3314         mutex_lock(&root->fs_info->fs_mutex);
3315         err = btrfs_check_free_space(root, 1, 0);
3316         if (err)
3317                 goto out_fail;
3318
3319         trans = btrfs_start_transaction(root, 1);
3320         btrfs_set_trans_block_group(trans, dir);
3321
3322         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3323         if (err) {
3324                 err = -ENOSPC;
3325                 goto out_unlock;
3326         }
3327
3328         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
3329                                 dentry->d_name.len,
3330                                 dentry->d_parent->d_inode->i_ino, objectid,
3331                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
3332         err = PTR_ERR(inode);
3333         if (IS_ERR(inode))
3334                 goto out_unlock;
3335
3336         btrfs_set_trans_block_group(trans, inode);
3337         err = btrfs_add_nondir(trans, dentry, inode, 0);
3338         if (err)
3339                 drop_inode = 1;
3340         else {
3341                 inode->i_mapping->a_ops = &btrfs_aops;
3342                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3343                 inode->i_fop = &btrfs_file_operations;
3344                 inode->i_op = &btrfs_file_inode_operations;
3345                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3346                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3347                                      inode->i_mapping, GFP_NOFS);
3348                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3349                                      inode->i_mapping, GFP_NOFS);
3350                 BTRFS_I(inode)->delalloc_bytes = 0;
3351                 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
3352                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3353         }
3354         dir->i_sb->s_dirt = 1;
3355         btrfs_update_inode_block_group(trans, inode);
3356         btrfs_update_inode_block_group(trans, dir);
3357         if (drop_inode)
3358                 goto out_unlock;
3359
3360         path = btrfs_alloc_path();
3361         BUG_ON(!path);
3362         key.objectid = inode->i_ino;
3363         key.offset = 0;
3364         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3365         datasize = btrfs_file_extent_calc_inline_size(name_len);
3366         err = btrfs_insert_empty_item(trans, root, path, &key,
3367                                       datasize);
3368         if (err) {
3369                 drop_inode = 1;
3370                 goto out_unlock;
3371         }
3372         leaf = path->nodes[0];
3373         ei = btrfs_item_ptr(leaf, path->slots[0],
3374                             struct btrfs_file_extent_item);
3375         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3376         btrfs_set_file_extent_type(leaf, ei,
3377                                    BTRFS_FILE_EXTENT_INLINE);
3378         ptr = btrfs_file_extent_inline_start(ei);
3379         write_extent_buffer(leaf, symname, ptr, name_len);
3380         btrfs_mark_buffer_dirty(leaf);
3381         btrfs_free_path(path);
3382
3383         inode->i_op = &btrfs_symlink_inode_operations;
3384         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3385         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3386         inode->i_size = name_len - 1;
3387         err = btrfs_update_inode(trans, root, inode);
3388         if (err)
3389                 drop_inode = 1;
3390
3391 out_unlock:
3392         nr = trans->blocks_used;
3393         btrfs_end_transaction(trans, root);
3394 out_fail:
3395         mutex_unlock(&root->fs_info->fs_mutex);
3396         if (drop_inode) {
3397                 inode_dec_link_count(inode);
3398                 iput(inode);
3399         }
3400         btrfs_btree_balance_dirty(root, nr);
3401         btrfs_throttle(root);
3402         return err;
3403 }
3404
3405 static int btrfs_permission(struct inode *inode, int mask,
3406                             struct nameidata *nd)
3407 {
3408         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3409                 return -EACCES;
3410         return generic_permission(inode, mask, NULL);
3411 }
3412
3413 static struct inode_operations btrfs_dir_inode_operations = {
3414         .lookup         = btrfs_lookup,
3415         .create         = btrfs_create,
3416         .unlink         = btrfs_unlink,
3417         .link           = btrfs_link,
3418         .mkdir          = btrfs_mkdir,
3419         .rmdir          = btrfs_rmdir,
3420         .rename         = btrfs_rename,
3421         .symlink        = btrfs_symlink,
3422         .setattr        = btrfs_setattr,
3423         .mknod          = btrfs_mknod,
3424         .setxattr       = generic_setxattr,
3425         .getxattr       = generic_getxattr,
3426         .listxattr      = btrfs_listxattr,
3427         .removexattr    = generic_removexattr,
3428         .permission     = btrfs_permission,
3429 };
3430 static struct inode_operations btrfs_dir_ro_inode_operations = {
3431         .lookup         = btrfs_lookup,
3432         .permission     = btrfs_permission,
3433 };
3434 static struct file_operations btrfs_dir_file_operations = {
3435         .llseek         = generic_file_llseek,
3436         .read           = generic_read_dir,
3437         .readdir        = btrfs_readdir,
3438         .unlocked_ioctl = btrfs_ioctl,
3439 #ifdef CONFIG_COMPAT
3440         .compat_ioctl   = btrfs_ioctl,
3441 #endif
3442 };
3443
3444 static struct extent_io_ops btrfs_extent_io_ops = {
3445         .fill_delalloc = run_delalloc_range,
3446         .submit_bio_hook = btrfs_submit_bio_hook,
3447         .merge_bio_hook = btrfs_merge_bio_hook,
3448         .readpage_io_hook = btrfs_readpage_io_hook,
3449         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3450         .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
3451         .set_bit_hook = btrfs_set_bit_hook,
3452         .clear_bit_hook = btrfs_clear_bit_hook,
3453 };
3454
3455 static struct address_space_operations btrfs_aops = {
3456         .readpage       = btrfs_readpage,
3457         .writepage      = btrfs_writepage,
3458         .writepages     = btrfs_writepages,
3459         .readpages      = btrfs_readpages,
3460         .sync_page      = block_sync_page,
3461         .bmap           = btrfs_bmap,
3462         .direct_IO      = btrfs_direct_IO,
3463         .invalidatepage = btrfs_invalidatepage,
3464         .releasepage    = btrfs_releasepage,
3465         .set_page_dirty = __set_page_dirty_nobuffers,
3466 };
3467
3468 static struct address_space_operations btrfs_symlink_aops = {
3469         .readpage       = btrfs_readpage,
3470         .writepage      = btrfs_writepage,
3471         .invalidatepage = btrfs_invalidatepage,
3472         .releasepage    = btrfs_releasepage,
3473 };
3474
3475 static struct inode_operations btrfs_file_inode_operations = {
3476         .truncate       = btrfs_truncate,
3477         .getattr        = btrfs_getattr,
3478         .setattr        = btrfs_setattr,
3479         .setxattr       = generic_setxattr,
3480         .getxattr       = generic_getxattr,
3481         .listxattr      = btrfs_listxattr,
3482         .removexattr    = generic_removexattr,
3483         .permission     = btrfs_permission,
3484 };
3485 static struct inode_operations btrfs_special_inode_operations = {
3486         .getattr        = btrfs_getattr,
3487         .setattr        = btrfs_setattr,
3488         .permission     = btrfs_permission,
3489 };
3490 static struct inode_operations btrfs_symlink_inode_operations = {
3491         .readlink       = generic_readlink,
3492         .follow_link    = page_follow_link_light,
3493         .put_link       = page_put_link,
3494         .permission     = btrfs_permission,
3495 };