Btrfs: Make an unplug function that doesn't unplug every spindle
[linux-2.6-block.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/bio.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include <linux/bit_spinlock.h>
35 #include <linux/version.h>
36 #include <linux/xattr.h>
37 #include "ctree.h"
38 #include "disk-io.h"
39 #include "transaction.h"
40 #include "btrfs_inode.h"
41 #include "ioctl.h"
42 #include "print-tree.h"
43 #include "volumes.h"
44
45 struct btrfs_iget_args {
46         u64 ino;
47         struct btrfs_root *root;
48 };
49
50 static struct inode_operations btrfs_dir_inode_operations;
51 static struct inode_operations btrfs_symlink_inode_operations;
52 static struct inode_operations btrfs_dir_ro_inode_operations;
53 static struct inode_operations btrfs_special_inode_operations;
54 static struct inode_operations btrfs_file_inode_operations;
55 static struct address_space_operations btrfs_aops;
56 static struct address_space_operations btrfs_symlink_aops;
57 static struct file_operations btrfs_dir_file_operations;
58 static struct extent_io_ops btrfs_extent_io_ops;
59
60 static struct kmem_cache *btrfs_inode_cachep;
61 struct kmem_cache *btrfs_trans_handle_cachep;
62 struct kmem_cache *btrfs_transaction_cachep;
63 struct kmem_cache *btrfs_bit_radix_cachep;
64 struct kmem_cache *btrfs_path_cachep;
65
66 #define S_SHIFT 12
67 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
68         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
69         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
70         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
71         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
72         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
73         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
74         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
75 };
76
77 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
78                            int for_del)
79 {
80         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
81         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
82         u64 thresh;
83         int ret = 0;
84
85         if (for_del)
86                 thresh = total * 90;
87         else
88                 thresh = total * 85;
89
90         do_div(thresh, 100);
91
92         spin_lock(&root->fs_info->delalloc_lock);
93         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
94                 ret = -ENOSPC;
95         spin_unlock(&root->fs_info->delalloc_lock);
96         return ret;
97 }
98
99 static int cow_file_range(struct inode *inode, u64 start, u64 end)
100 {
101         struct btrfs_root *root = BTRFS_I(inode)->root;
102         struct btrfs_trans_handle *trans;
103         u64 alloc_hint = 0;
104         u64 num_bytes;
105         u64 cur_alloc_size;
106         u64 blocksize = root->sectorsize;
107         u64 orig_start = start;
108         u64 orig_num_bytes;
109         struct btrfs_key ins;
110         int ret;
111
112         trans = btrfs_start_transaction(root, 1);
113         BUG_ON(!trans);
114         btrfs_set_trans_block_group(trans, inode);
115
116         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
117         num_bytes = max(blocksize,  num_bytes);
118         ret = btrfs_drop_extents(trans, root, inode,
119                                  start, start + num_bytes, start, &alloc_hint);
120         orig_num_bytes = num_bytes;
121
122         if (alloc_hint == EXTENT_MAP_INLINE)
123                 goto out;
124
125         BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
126
127         while(num_bytes > 0) {
128                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
129                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
130                                          root->sectorsize,
131                                          root->root_key.objectid,
132                                          trans->transid,
133                                          inode->i_ino, start, 0,
134                                          alloc_hint, (u64)-1, &ins, 1);
135                 if (ret) {
136                         WARN_ON(1);
137                         goto out;
138                 }
139                 cur_alloc_size = ins.offset;
140                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
141                                                start, ins.objectid, ins.offset,
142                                                ins.offset);
143                 inode->i_blocks += ins.offset >> 9;
144                 btrfs_check_file(root, inode);
145                 if (num_bytes < cur_alloc_size) {
146                         printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
147                                cur_alloc_size);
148                         break;
149                 }
150                 num_bytes -= cur_alloc_size;
151                 alloc_hint = ins.objectid + ins.offset;
152                 start += cur_alloc_size;
153         }
154         btrfs_drop_extent_cache(inode, orig_start,
155                                 orig_start + orig_num_bytes - 1);
156         btrfs_add_ordered_inode(inode);
157         btrfs_update_inode(trans, root, inode);
158 out:
159         btrfs_end_transaction(trans, root);
160         return ret;
161 }
162
163 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
164 {
165         u64 extent_start;
166         u64 extent_end;
167         u64 bytenr;
168         u64 cow_end;
169         u64 loops = 0;
170         u64 total_fs_bytes;
171         struct btrfs_root *root = BTRFS_I(inode)->root;
172         struct extent_buffer *leaf;
173         int found_type;
174         struct btrfs_path *path;
175         struct btrfs_file_extent_item *item;
176         int ret;
177         int err;
178         struct btrfs_key found_key;
179
180         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
181         path = btrfs_alloc_path();
182         BUG_ON(!path);
183 again:
184         ret = btrfs_lookup_file_extent(NULL, root, path,
185                                        inode->i_ino, start, 0);
186         if (ret < 0) {
187                 btrfs_free_path(path);
188                 return ret;
189         }
190
191         cow_end = end;
192         if (ret != 0) {
193                 if (path->slots[0] == 0)
194                         goto not_found;
195                 path->slots[0]--;
196         }
197
198         leaf = path->nodes[0];
199         item = btrfs_item_ptr(leaf, path->slots[0],
200                               struct btrfs_file_extent_item);
201
202         /* are we inside the extent that was found? */
203         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
204         found_type = btrfs_key_type(&found_key);
205         if (found_key.objectid != inode->i_ino ||
206             found_type != BTRFS_EXTENT_DATA_KEY) {
207                 goto not_found;
208         }
209
210         found_type = btrfs_file_extent_type(leaf, item);
211         extent_start = found_key.offset;
212         if (found_type == BTRFS_FILE_EXTENT_REG) {
213                 u64 extent_num_bytes;
214
215                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
216                 extent_end = extent_start + extent_num_bytes;
217                 err = 0;
218
219                 if (loops && start != extent_start)
220                         goto not_found;
221
222                 if (start < extent_start || start >= extent_end)
223                         goto not_found;
224
225                 cow_end = min(end, extent_end - 1);
226                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
227                 if (bytenr == 0)
228                         goto not_found;
229
230                 /*
231                  * we may be called by the resizer, make sure we're inside
232                  * the limits of the FS
233                  */
234                 if (bytenr + extent_num_bytes > total_fs_bytes)
235                         goto not_found;
236
237                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
238                         goto not_found;
239                 }
240
241                 start = extent_end;
242         } else {
243                 goto not_found;
244         }
245 loop:
246         if (start > end) {
247                 btrfs_free_path(path);
248                 return 0;
249         }
250         btrfs_release_path(root, path);
251         loops++;
252         goto again;
253
254 not_found:
255         cow_file_range(inode, start, cow_end);
256         start = cow_end + 1;
257         goto loop;
258 }
259
260 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
261 {
262         struct btrfs_root *root = BTRFS_I(inode)->root;
263         int ret;
264         mutex_lock(&root->fs_info->fs_mutex);
265         if (btrfs_test_opt(root, NODATACOW) ||
266             btrfs_test_flag(inode, NODATACOW))
267                 ret = run_delalloc_nocow(inode, start, end);
268         else
269                 ret = cow_file_range(inode, start, end);
270
271         mutex_unlock(&root->fs_info->fs_mutex);
272         return ret;
273 }
274
275 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
276                        unsigned long old, unsigned long bits)
277 {
278         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
279                 struct btrfs_root *root = BTRFS_I(inode)->root;
280                 spin_lock(&root->fs_info->delalloc_lock);
281                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
282                 root->fs_info->delalloc_bytes += end - start + 1;
283                 spin_unlock(&root->fs_info->delalloc_lock);
284         }
285         return 0;
286 }
287
288 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
289                          unsigned long old, unsigned long bits)
290 {
291         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
292                 struct btrfs_root *root = BTRFS_I(inode)->root;
293                 spin_lock(&root->fs_info->delalloc_lock);
294                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
295                         printk("warning: delalloc account %Lu %Lu\n",
296                                end - start + 1, root->fs_info->delalloc_bytes);
297                         root->fs_info->delalloc_bytes = 0;
298                         BTRFS_I(inode)->delalloc_bytes = 0;
299                 } else {
300                         root->fs_info->delalloc_bytes -= end - start + 1;
301                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
302                 }
303                 spin_unlock(&root->fs_info->delalloc_lock);
304         }
305         return 0;
306 }
307
308 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
309                          size_t size, struct bio *bio)
310 {
311         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
312         struct btrfs_mapping_tree *map_tree;
313         u64 logical = bio->bi_sector << 9;
314         u64 length = 0;
315         u64 map_length;
316         int ret;
317
318         length = bio->bi_size;
319         map_tree = &root->fs_info->mapping_tree;
320         map_length = length;
321         ret = btrfs_map_block(map_tree, READ, logical,
322                               &map_length, NULL, 0);
323
324         if (map_length < length + size) {
325                 return 1;
326         }
327         return 0;
328 }
329
330 int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
331                           int mirror_num)
332 {
333         struct btrfs_root *root = BTRFS_I(inode)->root;
334         struct btrfs_trans_handle *trans;
335         int ret = 0;
336         char *sums = NULL;
337
338         ret = btrfs_csum_one_bio(root, bio, &sums);
339         BUG_ON(ret);
340
341         mutex_lock(&root->fs_info->fs_mutex);
342         trans = btrfs_start_transaction(root, 1);
343
344         btrfs_set_trans_block_group(trans, inode);
345         btrfs_csum_file_blocks(trans, root, inode, bio, sums);
346
347         ret = btrfs_end_transaction(trans, root);
348         BUG_ON(ret);
349         mutex_unlock(&root->fs_info->fs_mutex);
350
351         kfree(sums);
352
353         return btrfs_map_bio(root, rw, bio, mirror_num);
354 }
355
356 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
357                           int mirror_num)
358 {
359         struct btrfs_root *root = BTRFS_I(inode)->root;
360         int ret = 0;
361
362         if (!(rw & (1 << BIO_RW))) {
363                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
364                 BUG_ON(ret);
365                 goto mapit;
366         }
367
368         if (btrfs_test_opt(root, NODATASUM) ||
369             btrfs_test_flag(inode, NODATASUM)) {
370                 goto mapit;
371         }
372
373         return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
374                                    inode, rw, bio, mirror_num,
375                                    __btrfs_submit_bio_hook);
376 mapit:
377         return btrfs_map_bio(root, rw, bio, mirror_num);
378 }
379
380 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
381 {
382         int ret = 0;
383         struct inode *inode = page->mapping->host;
384         struct btrfs_root *root = BTRFS_I(inode)->root;
385         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
386         struct btrfs_csum_item *item;
387         struct btrfs_path *path = NULL;
388         u32 csum;
389
390         if (btrfs_test_opt(root, NODATASUM) ||
391             btrfs_test_flag(inode, NODATASUM))
392                 return 0;
393
394         mutex_lock(&root->fs_info->fs_mutex);
395         path = btrfs_alloc_path();
396         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
397         if (IS_ERR(item)) {
398                 ret = PTR_ERR(item);
399                 /* a csum that isn't present is a preallocated region. */
400                 if (ret == -ENOENT || ret == -EFBIG)
401                         ret = 0;
402                 csum = 0;
403                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
404                 goto out;
405         }
406         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
407                            BTRFS_CRC32_SIZE);
408         set_state_private(io_tree, start, csum);
409 out:
410         if (path)
411                 btrfs_free_path(path);
412         mutex_unlock(&root->fs_info->fs_mutex);
413         return ret;
414 }
415
416 struct io_failure_record {
417         struct page *page;
418         u64 start;
419         u64 len;
420         u64 logical;
421         int last_mirror;
422 };
423
424 int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
425                                   struct page *page, u64 start, u64 end,
426                                   struct extent_state *state)
427 {
428         struct io_failure_record *failrec = NULL;
429         u64 private;
430         struct extent_map *em;
431         struct inode *inode = page->mapping->host;
432         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
433         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
434         struct bio *bio;
435         int num_copies;
436         int ret;
437         u64 logical;
438
439         ret = get_state_private(failure_tree, start, &private);
440         if (ret) {
441                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
442                 if (!failrec)
443                         return -ENOMEM;
444                 failrec->start = start;
445                 failrec->len = end - start + 1;
446                 failrec->last_mirror = 0;
447
448                 spin_lock(&em_tree->lock);
449                 em = lookup_extent_mapping(em_tree, start, failrec->len);
450                 if (em->start > start || em->start + em->len < start) {
451                         free_extent_map(em);
452                         em = NULL;
453                 }
454                 spin_unlock(&em_tree->lock);
455
456                 if (!em || IS_ERR(em)) {
457                         kfree(failrec);
458                         return -EIO;
459                 }
460                 logical = start - em->start;
461                 logical = em->block_start + logical;
462                 failrec->logical = logical;
463                 free_extent_map(em);
464                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
465                                 EXTENT_DIRTY, GFP_NOFS);
466                 set_state_private(failure_tree, start,
467                                  (u64)(unsigned long)failrec);
468         } else {
469                 failrec = (struct io_failure_record *)(unsigned long)private;
470         }
471         num_copies = btrfs_num_copies(
472                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
473                               failrec->logical, failrec->len);
474         failrec->last_mirror++;
475         if (!state) {
476                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
477                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
478                                                     failrec->start,
479                                                     EXTENT_LOCKED);
480                 if (state && state->start != failrec->start)
481                         state = NULL;
482                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
483         }
484         if (!state || failrec->last_mirror > num_copies) {
485                 set_state_private(failure_tree, failrec->start, 0);
486                 clear_extent_bits(failure_tree, failrec->start,
487                                   failrec->start + failrec->len - 1,
488                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
489                 kfree(failrec);
490                 return -EIO;
491         }
492         bio = bio_alloc(GFP_NOFS, 1);
493         bio->bi_private = state;
494         bio->bi_end_io = failed_bio->bi_end_io;
495         bio->bi_sector = failrec->logical >> 9;
496         bio->bi_bdev = failed_bio->bi_bdev;
497         bio_add_page(bio, page, failrec->len, start - page_offset(page));
498         btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror);
499         return 0;
500 }
501
502 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
503                                struct extent_state *state)
504 {
505         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
506         struct inode *inode = page->mapping->host;
507         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
508         char *kaddr;
509         u64 private = ~(u32)0;
510         int ret;
511         struct btrfs_root *root = BTRFS_I(inode)->root;
512         u32 csum = ~(u32)0;
513         unsigned long flags;
514
515         if (btrfs_test_opt(root, NODATASUM) ||
516             btrfs_test_flag(inode, NODATASUM))
517                 return 0;
518         if (state && state->start == start) {
519                 private = state->private;
520                 ret = 0;
521         } else {
522                 ret = get_state_private(io_tree, start, &private);
523         }
524         local_irq_save(flags);
525         kaddr = kmap_atomic(page, KM_IRQ0);
526         if (ret) {
527                 goto zeroit;
528         }
529         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
530         btrfs_csum_final(csum, (char *)&csum);
531         if (csum != private) {
532                 goto zeroit;
533         }
534         kunmap_atomic(kaddr, KM_IRQ0);
535         local_irq_restore(flags);
536
537         /* if the io failure tree for this inode is non-empty,
538          * check to see if we've recovered from a failed IO
539          */
540         private = 0;
541         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
542                              (u64)-1, 1, EXTENT_DIRTY)) {
543                 u64 private_failure;
544                 struct io_failure_record *failure;
545                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
546                                         start, &private_failure);
547                 if (ret == 0) {
548                         failure = (struct io_failure_record *)(unsigned long)
549                                    private_failure;
550                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
551                                           failure->start, 0);
552                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
553                                           failure->start,
554                                           failure->start + failure->len - 1,
555                                           EXTENT_DIRTY | EXTENT_LOCKED,
556                                           GFP_NOFS);
557                         kfree(failure);
558                 }
559         }
560         return 0;
561
562 zeroit:
563         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
564                page->mapping->host->i_ino, (unsigned long long)start, csum,
565                private);
566         memset(kaddr + offset, 1, end - start + 1);
567         flush_dcache_page(page);
568         kunmap_atomic(kaddr, KM_IRQ0);
569         local_irq_restore(flags);
570         if (private == 0)
571                 return 0;
572         return -EIO;
573 }
574
575 void btrfs_read_locked_inode(struct inode *inode)
576 {
577         struct btrfs_path *path;
578         struct extent_buffer *leaf;
579         struct btrfs_inode_item *inode_item;
580         struct btrfs_timespec *tspec;
581         struct btrfs_root *root = BTRFS_I(inode)->root;
582         struct btrfs_key location;
583         u64 alloc_group_block;
584         u32 rdev;
585         int ret;
586
587         path = btrfs_alloc_path();
588         BUG_ON(!path);
589         mutex_lock(&root->fs_info->fs_mutex);
590         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
591
592         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
593         if (ret)
594                 goto make_bad;
595
596         leaf = path->nodes[0];
597         inode_item = btrfs_item_ptr(leaf, path->slots[0],
598                                     struct btrfs_inode_item);
599
600         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
601         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
602         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
603         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
604         inode->i_size = btrfs_inode_size(leaf, inode_item);
605
606         tspec = btrfs_inode_atime(inode_item);
607         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
608         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
609
610         tspec = btrfs_inode_mtime(inode_item);
611         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
612         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
613
614         tspec = btrfs_inode_ctime(inode_item);
615         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
616         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
617
618         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
619         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
620         inode->i_rdev = 0;
621         rdev = btrfs_inode_rdev(leaf, inode_item);
622
623         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
624         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
625                                                        alloc_group_block);
626         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
627         if (!BTRFS_I(inode)->block_group) {
628                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
629                                                  NULL, 0,
630                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
631         }
632         btrfs_free_path(path);
633         inode_item = NULL;
634
635         mutex_unlock(&root->fs_info->fs_mutex);
636
637         switch (inode->i_mode & S_IFMT) {
638         case S_IFREG:
639                 inode->i_mapping->a_ops = &btrfs_aops;
640                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
641                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
642                 inode->i_fop = &btrfs_file_operations;
643                 inode->i_op = &btrfs_file_inode_operations;
644                 break;
645         case S_IFDIR:
646                 inode->i_fop = &btrfs_dir_file_operations;
647                 if (root == root->fs_info->tree_root)
648                         inode->i_op = &btrfs_dir_ro_inode_operations;
649                 else
650                         inode->i_op = &btrfs_dir_inode_operations;
651                 break;
652         case S_IFLNK:
653                 inode->i_op = &btrfs_symlink_inode_operations;
654                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
655                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
656                 break;
657         default:
658                 init_special_inode(inode, inode->i_mode, rdev);
659                 break;
660         }
661         return;
662
663 make_bad:
664         btrfs_release_path(root, path);
665         btrfs_free_path(path);
666         mutex_unlock(&root->fs_info->fs_mutex);
667         make_bad_inode(inode);
668 }
669
670 static void fill_inode_item(struct extent_buffer *leaf,
671                             struct btrfs_inode_item *item,
672                             struct inode *inode)
673 {
674         btrfs_set_inode_uid(leaf, item, inode->i_uid);
675         btrfs_set_inode_gid(leaf, item, inode->i_gid);
676         btrfs_set_inode_size(leaf, item, inode->i_size);
677         btrfs_set_inode_mode(leaf, item, inode->i_mode);
678         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
679
680         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
681                                inode->i_atime.tv_sec);
682         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
683                                 inode->i_atime.tv_nsec);
684
685         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
686                                inode->i_mtime.tv_sec);
687         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
688                                 inode->i_mtime.tv_nsec);
689
690         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
691                                inode->i_ctime.tv_sec);
692         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
693                                 inode->i_ctime.tv_nsec);
694
695         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
696         btrfs_set_inode_generation(leaf, item, inode->i_generation);
697         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
698         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
699         btrfs_set_inode_block_group(leaf, item,
700                                     BTRFS_I(inode)->block_group->key.objectid);
701 }
702
703 int btrfs_update_inode(struct btrfs_trans_handle *trans,
704                               struct btrfs_root *root,
705                               struct inode *inode)
706 {
707         struct btrfs_inode_item *inode_item;
708         struct btrfs_path *path;
709         struct extent_buffer *leaf;
710         int ret;
711
712         path = btrfs_alloc_path();
713         BUG_ON(!path);
714         ret = btrfs_lookup_inode(trans, root, path,
715                                  &BTRFS_I(inode)->location, 1);
716         if (ret) {
717                 if (ret > 0)
718                         ret = -ENOENT;
719                 goto failed;
720         }
721
722         leaf = path->nodes[0];
723         inode_item = btrfs_item_ptr(leaf, path->slots[0],
724                                   struct btrfs_inode_item);
725
726         fill_inode_item(leaf, inode_item, inode);
727         btrfs_mark_buffer_dirty(leaf);
728         btrfs_set_inode_last_trans(trans, inode);
729         ret = 0;
730 failed:
731         btrfs_release_path(root, path);
732         btrfs_free_path(path);
733         return ret;
734 }
735
736
737 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
738                               struct btrfs_root *root,
739                               struct inode *dir,
740                               struct dentry *dentry)
741 {
742         struct btrfs_path *path;
743         const char *name = dentry->d_name.name;
744         int name_len = dentry->d_name.len;
745         int ret = 0;
746         struct extent_buffer *leaf;
747         struct btrfs_dir_item *di;
748         struct btrfs_key key;
749
750         path = btrfs_alloc_path();
751         if (!path) {
752                 ret = -ENOMEM;
753                 goto err;
754         }
755
756         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
757                                     name, name_len, -1);
758         if (IS_ERR(di)) {
759                 ret = PTR_ERR(di);
760                 goto err;
761         }
762         if (!di) {
763                 ret = -ENOENT;
764                 goto err;
765         }
766         leaf = path->nodes[0];
767         btrfs_dir_item_key_to_cpu(leaf, di, &key);
768         ret = btrfs_delete_one_dir_name(trans, root, path, di);
769         if (ret)
770                 goto err;
771         btrfs_release_path(root, path);
772
773         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
774                                          key.objectid, name, name_len, -1);
775         if (IS_ERR(di)) {
776                 ret = PTR_ERR(di);
777                 goto err;
778         }
779         if (!di) {
780                 ret = -ENOENT;
781                 goto err;
782         }
783         ret = btrfs_delete_one_dir_name(trans, root, path, di);
784
785         dentry->d_inode->i_ctime = dir->i_ctime;
786         ret = btrfs_del_inode_ref(trans, root, name, name_len,
787                                   dentry->d_inode->i_ino,
788                                   dentry->d_parent->d_inode->i_ino);
789         if (ret) {
790                 printk("failed to delete reference to %.*s, "
791                        "inode %lu parent %lu\n", name_len, name,
792                        dentry->d_inode->i_ino,
793                        dentry->d_parent->d_inode->i_ino);
794         }
795 err:
796         btrfs_free_path(path);
797         if (!ret) {
798                 dir->i_size -= name_len * 2;
799                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
800                 btrfs_update_inode(trans, root, dir);
801 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
802                 dentry->d_inode->i_nlink--;
803 #else
804                 drop_nlink(dentry->d_inode);
805 #endif
806                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
807                 dir->i_sb->s_dirt = 1;
808         }
809         return ret;
810 }
811
812 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
813 {
814         struct btrfs_root *root;
815         struct btrfs_trans_handle *trans;
816         struct inode *inode = dentry->d_inode;
817         int ret;
818         unsigned long nr = 0;
819
820         root = BTRFS_I(dir)->root;
821         mutex_lock(&root->fs_info->fs_mutex);
822
823         ret = btrfs_check_free_space(root, 1, 1);
824         if (ret)
825                 goto fail;
826
827         trans = btrfs_start_transaction(root, 1);
828
829         btrfs_set_trans_block_group(trans, dir);
830         ret = btrfs_unlink_trans(trans, root, dir, dentry);
831         nr = trans->blocks_used;
832
833         if (inode->i_nlink == 0) {
834                 int found;
835                 /* if the inode isn't linked anywhere,
836                  * we don't need to worry about
837                  * data=ordered
838                  */
839                 found = btrfs_del_ordered_inode(inode);
840                 if (found == 1) {
841                         atomic_dec(&inode->i_count);
842                 }
843         }
844
845         btrfs_end_transaction(trans, root);
846 fail:
847         mutex_unlock(&root->fs_info->fs_mutex);
848         btrfs_btree_balance_dirty(root, nr);
849         btrfs_throttle(root);
850         return ret;
851 }
852
853 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
854 {
855         struct inode *inode = dentry->d_inode;
856         int err = 0;
857         int ret;
858         struct btrfs_root *root = BTRFS_I(dir)->root;
859         struct btrfs_trans_handle *trans;
860         unsigned long nr = 0;
861
862         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
863                 return -ENOTEMPTY;
864
865         mutex_lock(&root->fs_info->fs_mutex);
866         ret = btrfs_check_free_space(root, 1, 1);
867         if (ret)
868                 goto fail;
869
870         trans = btrfs_start_transaction(root, 1);
871         btrfs_set_trans_block_group(trans, dir);
872
873         /* now the directory is empty */
874         err = btrfs_unlink_trans(trans, root, dir, dentry);
875         if (!err) {
876                 inode->i_size = 0;
877         }
878
879         nr = trans->blocks_used;
880         ret = btrfs_end_transaction(trans, root);
881 fail:
882         mutex_unlock(&root->fs_info->fs_mutex);
883         btrfs_btree_balance_dirty(root, nr);
884         btrfs_throttle(root);
885
886         if (ret && !err)
887                 err = ret;
888         return err;
889 }
890
891 /*
892  * this can truncate away extent items, csum items and directory items.
893  * It starts at a high offset and removes keys until it can't find
894  * any higher than i_size.
895  *
896  * csum items that cross the new i_size are truncated to the new size
897  * as well.
898  */
899 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
900                                    struct btrfs_root *root,
901                                    struct inode *inode,
902                                    u32 min_type)
903 {
904         int ret;
905         struct btrfs_path *path;
906         struct btrfs_key key;
907         struct btrfs_key found_key;
908         u32 found_type;
909         struct extent_buffer *leaf;
910         struct btrfs_file_extent_item *fi;
911         u64 extent_start = 0;
912         u64 extent_num_bytes = 0;
913         u64 item_end = 0;
914         u64 root_gen = 0;
915         u64 root_owner = 0;
916         int found_extent;
917         int del_item;
918         int pending_del_nr = 0;
919         int pending_del_slot = 0;
920         int extent_type = -1;
921         u64 mask = root->sectorsize - 1;
922
923         btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
924         path = btrfs_alloc_path();
925         path->reada = -1;
926         BUG_ON(!path);
927
928         /* FIXME, add redo link to tree so we don't leak on crash */
929         key.objectid = inode->i_ino;
930         key.offset = (u64)-1;
931         key.type = (u8)-1;
932
933         btrfs_init_path(path);
934 search_again:
935         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
936         if (ret < 0) {
937                 goto error;
938         }
939         if (ret > 0) {
940                 BUG_ON(path->slots[0] == 0);
941                 path->slots[0]--;
942         }
943
944         while(1) {
945                 fi = NULL;
946                 leaf = path->nodes[0];
947                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
948                 found_type = btrfs_key_type(&found_key);
949
950                 if (found_key.objectid != inode->i_ino)
951                         break;
952
953                 if (found_type < min_type)
954                         break;
955
956                 item_end = found_key.offset;
957                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
958                         fi = btrfs_item_ptr(leaf, path->slots[0],
959                                             struct btrfs_file_extent_item);
960                         extent_type = btrfs_file_extent_type(leaf, fi);
961                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
962                                 item_end +=
963                                     btrfs_file_extent_num_bytes(leaf, fi);
964                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
965                                 struct btrfs_item *item = btrfs_item_nr(leaf,
966                                                                 path->slots[0]);
967                                 item_end += btrfs_file_extent_inline_len(leaf,
968                                                                          item);
969                         }
970                         item_end--;
971                 }
972                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
973                         ret = btrfs_csum_truncate(trans, root, path,
974                                                   inode->i_size);
975                         BUG_ON(ret);
976                 }
977                 if (item_end < inode->i_size) {
978                         if (found_type == BTRFS_DIR_ITEM_KEY) {
979                                 found_type = BTRFS_INODE_ITEM_KEY;
980                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
981                                 found_type = BTRFS_CSUM_ITEM_KEY;
982                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
983                                 found_type = BTRFS_XATTR_ITEM_KEY;
984                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
985                                 found_type = BTRFS_INODE_REF_KEY;
986                         } else if (found_type) {
987                                 found_type--;
988                         } else {
989                                 break;
990                         }
991                         btrfs_set_key_type(&key, found_type);
992                         goto next;
993                 }
994                 if (found_key.offset >= inode->i_size)
995                         del_item = 1;
996                 else
997                         del_item = 0;
998                 found_extent = 0;
999
1000                 /* FIXME, shrink the extent if the ref count is only 1 */
1001                 if (found_type != BTRFS_EXTENT_DATA_KEY)
1002                         goto delete;
1003
1004                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1005                         u64 num_dec;
1006                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
1007                         if (!del_item) {
1008                                 u64 orig_num_bytes =
1009                                         btrfs_file_extent_num_bytes(leaf, fi);
1010                                 extent_num_bytes = inode->i_size -
1011                                         found_key.offset + root->sectorsize - 1;
1012                                 extent_num_bytes = extent_num_bytes &
1013                                         ~((u64)root->sectorsize - 1);
1014                                 btrfs_set_file_extent_num_bytes(leaf, fi,
1015                                                          extent_num_bytes);
1016                                 num_dec = (orig_num_bytes -
1017                                            extent_num_bytes);
1018                                 if (extent_start != 0)
1019                                         dec_i_blocks(inode, num_dec);
1020                                 btrfs_mark_buffer_dirty(leaf);
1021                         } else {
1022                                 extent_num_bytes =
1023                                         btrfs_file_extent_disk_num_bytes(leaf,
1024                                                                          fi);
1025                                 /* FIXME blocksize != 4096 */
1026                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
1027                                 if (extent_start != 0) {
1028                                         found_extent = 1;
1029                                         dec_i_blocks(inode, num_dec);
1030                                 }
1031                                 root_gen = btrfs_header_generation(leaf);
1032                                 root_owner = btrfs_header_owner(leaf);
1033                         }
1034                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1035                         if (!del_item) {
1036                                 u32 newsize = inode->i_size - found_key.offset;
1037                                 dec_i_blocks(inode, item_end + 1 -
1038                                             found_key.offset - newsize);
1039                                 newsize =
1040                                     btrfs_file_extent_calc_inline_size(newsize);
1041                                 ret = btrfs_truncate_item(trans, root, path,
1042                                                           newsize, 1);
1043                                 BUG_ON(ret);
1044                         } else {
1045                                 dec_i_blocks(inode, item_end + 1 -
1046                                              found_key.offset);
1047                         }
1048                 }
1049 delete:
1050                 if (del_item) {
1051                         if (!pending_del_nr) {
1052                                 /* no pending yet, add ourselves */
1053                                 pending_del_slot = path->slots[0];
1054                                 pending_del_nr = 1;
1055                         } else if (pending_del_nr &&
1056                                    path->slots[0] + 1 == pending_del_slot) {
1057                                 /* hop on the pending chunk */
1058                                 pending_del_nr++;
1059                                 pending_del_slot = path->slots[0];
1060                         } else {
1061                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1062                         }
1063                 } else {
1064                         break;
1065                 }
1066                 if (found_extent) {
1067                         ret = btrfs_free_extent(trans, root, extent_start,
1068                                                 extent_num_bytes,
1069                                                 root_owner,
1070                                                 root_gen, inode->i_ino,
1071                                                 found_key.offset, 0);
1072                         BUG_ON(ret);
1073                 }
1074 next:
1075                 if (path->slots[0] == 0) {
1076                         if (pending_del_nr)
1077                                 goto del_pending;
1078                         btrfs_release_path(root, path);
1079                         goto search_again;
1080                 }
1081
1082                 path->slots[0]--;
1083                 if (pending_del_nr &&
1084                     path->slots[0] + 1 != pending_del_slot) {
1085                         struct btrfs_key debug;
1086 del_pending:
1087                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1088                                               pending_del_slot);
1089                         ret = btrfs_del_items(trans, root, path,
1090                                               pending_del_slot,
1091                                               pending_del_nr);
1092                         BUG_ON(ret);
1093                         pending_del_nr = 0;
1094                         btrfs_release_path(root, path);
1095                         goto search_again;
1096                 }
1097         }
1098         ret = 0;
1099 error:
1100         if (pending_del_nr) {
1101                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1102                                       pending_del_nr);
1103         }
1104         btrfs_release_path(root, path);
1105         btrfs_free_path(path);
1106         inode->i_sb->s_dirt = 1;
1107         return ret;
1108 }
1109
1110 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1111                               size_t zero_start)
1112 {
1113         char *kaddr;
1114         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1115         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1116         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1117         int ret = 0;
1118
1119         WARN_ON(!PageLocked(page));
1120         set_page_extent_mapped(page);
1121
1122         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1123         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1124                             page_end, GFP_NOFS);
1125
1126         if (zero_start != PAGE_CACHE_SIZE) {
1127                 kaddr = kmap(page);
1128                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1129                 flush_dcache_page(page);
1130                 kunmap(page);
1131         }
1132         set_page_dirty(page);
1133         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1134
1135         return ret;
1136 }
1137
1138 /*
1139  * taken from block_truncate_page, but does cow as it zeros out
1140  * any bytes left in the last page in the file.
1141  */
1142 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1143 {
1144         struct inode *inode = mapping->host;
1145         struct btrfs_root *root = BTRFS_I(inode)->root;
1146         u32 blocksize = root->sectorsize;
1147         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1148         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1149         struct page *page;
1150         int ret = 0;
1151         u64 page_start;
1152
1153         if ((offset & (blocksize - 1)) == 0)
1154                 goto out;
1155
1156         ret = -ENOMEM;
1157         page = grab_cache_page(mapping, index);
1158         if (!page)
1159                 goto out;
1160         if (!PageUptodate(page)) {
1161                 ret = btrfs_readpage(NULL, page);
1162                 lock_page(page);
1163                 if (!PageUptodate(page)) {
1164                         ret = -EIO;
1165                         goto out;
1166                 }
1167         }
1168         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1169
1170         ret = btrfs_cow_one_page(inode, page, offset);
1171
1172         unlock_page(page);
1173         page_cache_release(page);
1174 out:
1175         return ret;
1176 }
1177
1178 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1179 {
1180         struct inode *inode = dentry->d_inode;
1181         int err;
1182
1183         err = inode_change_ok(inode, attr);
1184         if (err)
1185                 return err;
1186
1187         if (S_ISREG(inode->i_mode) &&
1188             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1189                 struct btrfs_trans_handle *trans;
1190                 struct btrfs_root *root = BTRFS_I(inode)->root;
1191                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1192
1193                 u64 mask = root->sectorsize - 1;
1194                 u64 hole_start = (inode->i_size + mask) & ~mask;
1195                 u64 block_end = (attr->ia_size + mask) & ~mask;
1196                 u64 hole_size;
1197                 u64 alloc_hint = 0;
1198
1199                 if (attr->ia_size <= hole_start)
1200                         goto out;
1201
1202                 mutex_lock(&root->fs_info->fs_mutex);
1203                 err = btrfs_check_free_space(root, 1, 0);
1204                 mutex_unlock(&root->fs_info->fs_mutex);
1205                 if (err)
1206                         goto fail;
1207
1208                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1209
1210                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1211                 hole_size = block_end - hole_start;
1212
1213                 mutex_lock(&root->fs_info->fs_mutex);
1214                 trans = btrfs_start_transaction(root, 1);
1215                 btrfs_set_trans_block_group(trans, inode);
1216                 err = btrfs_drop_extents(trans, root, inode,
1217                                          hole_start, block_end, hole_start,
1218                                          &alloc_hint);
1219
1220                 if (alloc_hint != EXTENT_MAP_INLINE) {
1221                         err = btrfs_insert_file_extent(trans, root,
1222                                                        inode->i_ino,
1223                                                        hole_start, 0, 0,
1224                                                        hole_size);
1225                         btrfs_drop_extent_cache(inode, hole_start,
1226                                                 (u64)-1);
1227                         btrfs_check_file(root, inode);
1228                 }
1229                 btrfs_end_transaction(trans, root);
1230                 mutex_unlock(&root->fs_info->fs_mutex);
1231                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1232                 if (err)
1233                         return err;
1234         }
1235 out:
1236         err = inode_setattr(inode, attr);
1237 fail:
1238         return err;
1239 }
1240
1241 void btrfs_put_inode(struct inode *inode)
1242 {
1243         int ret;
1244
1245         if (!BTRFS_I(inode)->ordered_trans) {
1246                 return;
1247         }
1248
1249         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1250             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1251                 return;
1252
1253         ret = btrfs_del_ordered_inode(inode);
1254         if (ret == 1) {
1255                 atomic_dec(&inode->i_count);
1256         }
1257 }
1258
1259 void btrfs_delete_inode(struct inode *inode)
1260 {
1261         struct btrfs_trans_handle *trans;
1262         struct btrfs_root *root = BTRFS_I(inode)->root;
1263         unsigned long nr;
1264         int ret;
1265
1266         truncate_inode_pages(&inode->i_data, 0);
1267         if (is_bad_inode(inode)) {
1268                 goto no_delete;
1269         }
1270
1271         inode->i_size = 0;
1272         mutex_lock(&root->fs_info->fs_mutex);
1273         trans = btrfs_start_transaction(root, 1);
1274
1275         btrfs_set_trans_block_group(trans, inode);
1276         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1277         if (ret)
1278                 goto no_delete_lock;
1279
1280         nr = trans->blocks_used;
1281         clear_inode(inode);
1282
1283         btrfs_end_transaction(trans, root);
1284         mutex_unlock(&root->fs_info->fs_mutex);
1285         btrfs_btree_balance_dirty(root, nr);
1286         btrfs_throttle(root);
1287         return;
1288
1289 no_delete_lock:
1290         nr = trans->blocks_used;
1291         btrfs_end_transaction(trans, root);
1292         mutex_unlock(&root->fs_info->fs_mutex);
1293         btrfs_btree_balance_dirty(root, nr);
1294         btrfs_throttle(root);
1295 no_delete:
1296         clear_inode(inode);
1297 }
1298
1299 /*
1300  * this returns the key found in the dir entry in the location pointer.
1301  * If no dir entries were found, location->objectid is 0.
1302  */
1303 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1304                                struct btrfs_key *location)
1305 {
1306         const char *name = dentry->d_name.name;
1307         int namelen = dentry->d_name.len;
1308         struct btrfs_dir_item *di;
1309         struct btrfs_path *path;
1310         struct btrfs_root *root = BTRFS_I(dir)->root;
1311         int ret = 0;
1312
1313         if (namelen == 1 && strcmp(name, ".") == 0) {
1314                 location->objectid = dir->i_ino;
1315                 location->type = BTRFS_INODE_ITEM_KEY;
1316                 location->offset = 0;
1317                 return 0;
1318         }
1319         path = btrfs_alloc_path();
1320         BUG_ON(!path);
1321
1322         if (namelen == 2 && strcmp(name, "..") == 0) {
1323                 struct btrfs_key key;
1324                 struct extent_buffer *leaf;
1325                 u32 nritems;
1326                 int slot;
1327
1328                 key.objectid = dir->i_ino;
1329                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1330                 key.offset = 0;
1331                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1332                 BUG_ON(ret == 0);
1333                 ret = 0;
1334
1335                 leaf = path->nodes[0];
1336                 slot = path->slots[0];
1337                 nritems = btrfs_header_nritems(leaf);
1338                 if (slot >= nritems)
1339                         goto out_err;
1340
1341                 btrfs_item_key_to_cpu(leaf, &key, slot);
1342                 if (key.objectid != dir->i_ino ||
1343                     key.type != BTRFS_INODE_REF_KEY) {
1344                         goto out_err;
1345                 }
1346                 location->objectid = key.offset;
1347                 location->type = BTRFS_INODE_ITEM_KEY;
1348                 location->offset = 0;
1349                 goto out;
1350         }
1351
1352         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1353                                     namelen, 0);
1354         if (IS_ERR(di))
1355                 ret = PTR_ERR(di);
1356         if (!di || IS_ERR(di)) {
1357                 goto out_err;
1358         }
1359         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1360 out:
1361         btrfs_free_path(path);
1362         return ret;
1363 out_err:
1364         location->objectid = 0;
1365         goto out;
1366 }
1367
1368 /*
1369  * when we hit a tree root in a directory, the btrfs part of the inode
1370  * needs to be changed to reflect the root directory of the tree root.  This
1371  * is kind of like crossing a mount point.
1372  */
1373 static int fixup_tree_root_location(struct btrfs_root *root,
1374                              struct btrfs_key *location,
1375                              struct btrfs_root **sub_root,
1376                              struct dentry *dentry)
1377 {
1378         struct btrfs_path *path;
1379         struct btrfs_root_item *ri;
1380
1381         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1382                 return 0;
1383         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1384                 return 0;
1385
1386         path = btrfs_alloc_path();
1387         BUG_ON(!path);
1388         mutex_lock(&root->fs_info->fs_mutex);
1389
1390         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1391                                         dentry->d_name.name,
1392                                         dentry->d_name.len);
1393         if (IS_ERR(*sub_root))
1394                 return PTR_ERR(*sub_root);
1395
1396         ri = &(*sub_root)->root_item;
1397         location->objectid = btrfs_root_dirid(ri);
1398         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1399         location->offset = 0;
1400
1401         btrfs_free_path(path);
1402         mutex_unlock(&root->fs_info->fs_mutex);
1403         return 0;
1404 }
1405
1406 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1407 {
1408         struct btrfs_iget_args *args = p;
1409         inode->i_ino = args->ino;
1410         BTRFS_I(inode)->root = args->root;
1411         BTRFS_I(inode)->delalloc_bytes = 0;
1412         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1413         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1414                              inode->i_mapping, GFP_NOFS);
1415         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1416                              inode->i_mapping, GFP_NOFS);
1417         return 0;
1418 }
1419
1420 static int btrfs_find_actor(struct inode *inode, void *opaque)
1421 {
1422         struct btrfs_iget_args *args = opaque;
1423         return (args->ino == inode->i_ino &&
1424                 args->root == BTRFS_I(inode)->root);
1425 }
1426
1427 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1428                             u64 root_objectid)
1429 {
1430         struct btrfs_iget_args args;
1431         args.ino = objectid;
1432         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1433
1434         if (!args.root)
1435                 return NULL;
1436
1437         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1438 }
1439
1440 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1441                                 struct btrfs_root *root)
1442 {
1443         struct inode *inode;
1444         struct btrfs_iget_args args;
1445         args.ino = objectid;
1446         args.root = root;
1447
1448         inode = iget5_locked(s, objectid, btrfs_find_actor,
1449                              btrfs_init_locked_inode,
1450                              (void *)&args);
1451         return inode;
1452 }
1453
1454 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1455                                    struct nameidata *nd)
1456 {
1457         struct inode * inode;
1458         struct btrfs_inode *bi = BTRFS_I(dir);
1459         struct btrfs_root *root = bi->root;
1460         struct btrfs_root *sub_root = root;
1461         struct btrfs_key location;
1462         int ret;
1463
1464         if (dentry->d_name.len > BTRFS_NAME_LEN)
1465                 return ERR_PTR(-ENAMETOOLONG);
1466
1467         mutex_lock(&root->fs_info->fs_mutex);
1468         ret = btrfs_inode_by_name(dir, dentry, &location);
1469         mutex_unlock(&root->fs_info->fs_mutex);
1470
1471         if (ret < 0)
1472                 return ERR_PTR(ret);
1473
1474         inode = NULL;
1475         if (location.objectid) {
1476                 ret = fixup_tree_root_location(root, &location, &sub_root,
1477                                                 dentry);
1478                 if (ret < 0)
1479                         return ERR_PTR(ret);
1480                 if (ret > 0)
1481                         return ERR_PTR(-ENOENT);
1482                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1483                                           sub_root);
1484                 if (!inode)
1485                         return ERR_PTR(-EACCES);
1486                 if (inode->i_state & I_NEW) {
1487                         /* the inode and parent dir are two different roots */
1488                         if (sub_root != root) {
1489                                 igrab(inode);
1490                                 sub_root->inode = inode;
1491                         }
1492                         BTRFS_I(inode)->root = sub_root;
1493                         memcpy(&BTRFS_I(inode)->location, &location,
1494                                sizeof(location));
1495                         btrfs_read_locked_inode(inode);
1496                         unlock_new_inode(inode);
1497                 }
1498         }
1499         return d_splice_alias(inode, dentry);
1500 }
1501
1502 static unsigned char btrfs_filetype_table[] = {
1503         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1504 };
1505
1506 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1507 {
1508         struct inode *inode = filp->f_dentry->d_inode;
1509         struct btrfs_root *root = BTRFS_I(inode)->root;
1510         struct btrfs_item *item;
1511         struct btrfs_dir_item *di;
1512         struct btrfs_key key;
1513         struct btrfs_key found_key;
1514         struct btrfs_path *path;
1515         int ret;
1516         u32 nritems;
1517         struct extent_buffer *leaf;
1518         int slot;
1519         int advance;
1520         unsigned char d_type;
1521         int over = 0;
1522         u32 di_cur;
1523         u32 di_total;
1524         u32 di_len;
1525         int key_type = BTRFS_DIR_INDEX_KEY;
1526         char tmp_name[32];
1527         char *name_ptr;
1528         int name_len;
1529
1530         /* FIXME, use a real flag for deciding about the key type */
1531         if (root->fs_info->tree_root == root)
1532                 key_type = BTRFS_DIR_ITEM_KEY;
1533
1534         /* special case for "." */
1535         if (filp->f_pos == 0) {
1536                 over = filldir(dirent, ".", 1,
1537                                1, inode->i_ino,
1538                                DT_DIR);
1539                 if (over)
1540                         return 0;
1541                 filp->f_pos = 1;
1542         }
1543
1544         mutex_lock(&root->fs_info->fs_mutex);
1545         key.objectid = inode->i_ino;
1546         path = btrfs_alloc_path();
1547         path->reada = 2;
1548
1549         /* special case for .., just use the back ref */
1550         if (filp->f_pos == 1) {
1551                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1552                 key.offset = 0;
1553                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1554                 BUG_ON(ret == 0);
1555                 leaf = path->nodes[0];
1556                 slot = path->slots[0];
1557                 nritems = btrfs_header_nritems(leaf);
1558                 if (slot >= nritems) {
1559                         btrfs_release_path(root, path);
1560                         goto read_dir_items;
1561                 }
1562                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1563                 btrfs_release_path(root, path);
1564                 if (found_key.objectid != key.objectid ||
1565                     found_key.type != BTRFS_INODE_REF_KEY)
1566                         goto read_dir_items;
1567                 over = filldir(dirent, "..", 2,
1568                                2, found_key.offset, DT_DIR);
1569                 if (over)
1570                         goto nopos;
1571                 filp->f_pos = 2;
1572         }
1573
1574 read_dir_items:
1575         btrfs_set_key_type(&key, key_type);
1576         key.offset = filp->f_pos;
1577
1578         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1579         if (ret < 0)
1580                 goto err;
1581         advance = 0;
1582         while(1) {
1583                 leaf = path->nodes[0];
1584                 nritems = btrfs_header_nritems(leaf);
1585                 slot = path->slots[0];
1586                 if (advance || slot >= nritems) {
1587                         if (slot >= nritems -1) {
1588                                 ret = btrfs_next_leaf(root, path);
1589                                 if (ret)
1590                                         break;
1591                                 leaf = path->nodes[0];
1592                                 nritems = btrfs_header_nritems(leaf);
1593                                 slot = path->slots[0];
1594                         } else {
1595                                 slot++;
1596                                 path->slots[0]++;
1597                         }
1598                 }
1599                 advance = 1;
1600                 item = btrfs_item_nr(leaf, slot);
1601                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1602
1603                 if (found_key.objectid != key.objectid)
1604                         break;
1605                 if (btrfs_key_type(&found_key) != key_type)
1606                         break;
1607                 if (found_key.offset < filp->f_pos)
1608                         continue;
1609
1610                 filp->f_pos = found_key.offset;
1611                 advance = 1;
1612                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1613                 di_cur = 0;
1614                 di_total = btrfs_item_size(leaf, item);
1615                 while(di_cur < di_total) {
1616                         struct btrfs_key location;
1617
1618                         name_len = btrfs_dir_name_len(leaf, di);
1619                         if (name_len < 32) {
1620                                 name_ptr = tmp_name;
1621                         } else {
1622                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1623                                 BUG_ON(!name_ptr);
1624                         }
1625                         read_extent_buffer(leaf, name_ptr,
1626                                            (unsigned long)(di + 1), name_len);
1627
1628                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1629                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1630                         over = filldir(dirent, name_ptr, name_len,
1631                                        found_key.offset,
1632                                        location.objectid,
1633                                        d_type);
1634
1635                         if (name_ptr != tmp_name)
1636                                 kfree(name_ptr);
1637
1638                         if (over)
1639                                 goto nopos;
1640                         di_len = btrfs_dir_name_len(leaf, di) +
1641                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1642                         di_cur += di_len;
1643                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1644                 }
1645         }
1646         if (key_type == BTRFS_DIR_INDEX_KEY)
1647                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1648         else
1649                 filp->f_pos++;
1650 nopos:
1651         ret = 0;
1652 err:
1653         btrfs_release_path(root, path);
1654         btrfs_free_path(path);
1655         mutex_unlock(&root->fs_info->fs_mutex);
1656         return ret;
1657 }
1658
1659 int btrfs_write_inode(struct inode *inode, int wait)
1660 {
1661         struct btrfs_root *root = BTRFS_I(inode)->root;
1662         struct btrfs_trans_handle *trans;
1663         int ret = 0;
1664
1665         if (wait) {
1666                 mutex_lock(&root->fs_info->fs_mutex);
1667                 trans = btrfs_start_transaction(root, 1);
1668                 btrfs_set_trans_block_group(trans, inode);
1669                 ret = btrfs_commit_transaction(trans, root);
1670                 mutex_unlock(&root->fs_info->fs_mutex);
1671         }
1672         return ret;
1673 }
1674
1675 /*
1676  * This is somewhat expensive, updating the tree every time the
1677  * inode changes.  But, it is most likely to find the inode in cache.
1678  * FIXME, needs more benchmarking...there are no reasons other than performance
1679  * to keep or drop this code.
1680  */
1681 void btrfs_dirty_inode(struct inode *inode)
1682 {
1683         struct btrfs_root *root = BTRFS_I(inode)->root;
1684         struct btrfs_trans_handle *trans;
1685
1686         mutex_lock(&root->fs_info->fs_mutex);
1687         trans = btrfs_start_transaction(root, 1);
1688         btrfs_set_trans_block_group(trans, inode);
1689         btrfs_update_inode(trans, root, inode);
1690         btrfs_end_transaction(trans, root);
1691         mutex_unlock(&root->fs_info->fs_mutex);
1692 }
1693
1694 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1695                                      struct btrfs_root *root,
1696                                      const char *name, int name_len,
1697                                      u64 ref_objectid,
1698                                      u64 objectid,
1699                                      struct btrfs_block_group_cache *group,
1700                                      int mode)
1701 {
1702         struct inode *inode;
1703         struct btrfs_inode_item *inode_item;
1704         struct btrfs_block_group_cache *new_inode_group;
1705         struct btrfs_key *location;
1706         struct btrfs_path *path;
1707         struct btrfs_inode_ref *ref;
1708         struct btrfs_key key[2];
1709         u32 sizes[2];
1710         unsigned long ptr;
1711         int ret;
1712         int owner;
1713
1714         path = btrfs_alloc_path();
1715         BUG_ON(!path);
1716
1717         inode = new_inode(root->fs_info->sb);
1718         if (!inode)
1719                 return ERR_PTR(-ENOMEM);
1720
1721         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1722         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1723                              inode->i_mapping, GFP_NOFS);
1724         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1725                              inode->i_mapping, GFP_NOFS);
1726         BTRFS_I(inode)->delalloc_bytes = 0;
1727         BTRFS_I(inode)->root = root;
1728
1729         if (mode & S_IFDIR)
1730                 owner = 0;
1731         else
1732                 owner = 1;
1733         new_inode_group = btrfs_find_block_group(root, group, 0,
1734                                        BTRFS_BLOCK_GROUP_METADATA, owner);
1735         if (!new_inode_group) {
1736                 printk("find_block group failed\n");
1737                 new_inode_group = group;
1738         }
1739         BTRFS_I(inode)->block_group = new_inode_group;
1740         BTRFS_I(inode)->flags = 0;
1741
1742         key[0].objectid = objectid;
1743         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1744         key[0].offset = 0;
1745
1746         key[1].objectid = objectid;
1747         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1748         key[1].offset = ref_objectid;
1749
1750         sizes[0] = sizeof(struct btrfs_inode_item);
1751         sizes[1] = name_len + sizeof(*ref);
1752
1753         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1754         if (ret != 0)
1755                 goto fail;
1756
1757         if (objectid > root->highest_inode)
1758                 root->highest_inode = objectid;
1759
1760         inode->i_uid = current->fsuid;
1761         inode->i_gid = current->fsgid;
1762         inode->i_mode = mode;
1763         inode->i_ino = objectid;
1764         inode->i_blocks = 0;
1765         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1766         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1767                                   struct btrfs_inode_item);
1768         fill_inode_item(path->nodes[0], inode_item, inode);
1769
1770         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1771                              struct btrfs_inode_ref);
1772         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1773         ptr = (unsigned long)(ref + 1);
1774         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1775
1776         btrfs_mark_buffer_dirty(path->nodes[0]);
1777         btrfs_free_path(path);
1778
1779         location = &BTRFS_I(inode)->location;
1780         location->objectid = objectid;
1781         location->offset = 0;
1782         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1783
1784         insert_inode_hash(inode);
1785         return inode;
1786 fail:
1787         btrfs_free_path(path);
1788         return ERR_PTR(ret);
1789 }
1790
1791 static inline u8 btrfs_inode_type(struct inode *inode)
1792 {
1793         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1794 }
1795
1796 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1797                             struct dentry *dentry, struct inode *inode,
1798                             int add_backref)
1799 {
1800         int ret;
1801         struct btrfs_key key;
1802         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1803         struct inode *parent_inode;
1804
1805         key.objectid = inode->i_ino;
1806         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1807         key.offset = 0;
1808
1809         ret = btrfs_insert_dir_item(trans, root,
1810                                     dentry->d_name.name, dentry->d_name.len,
1811                                     dentry->d_parent->d_inode->i_ino,
1812                                     &key, btrfs_inode_type(inode));
1813         if (ret == 0) {
1814                 if (add_backref) {
1815                         ret = btrfs_insert_inode_ref(trans, root,
1816                                              dentry->d_name.name,
1817                                              dentry->d_name.len,
1818                                              inode->i_ino,
1819                                              dentry->d_parent->d_inode->i_ino);
1820                 }
1821                 parent_inode = dentry->d_parent->d_inode;
1822                 parent_inode->i_size += dentry->d_name.len * 2;
1823                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1824                 ret = btrfs_update_inode(trans, root,
1825                                          dentry->d_parent->d_inode);
1826         }
1827         return ret;
1828 }
1829
1830 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1831                             struct dentry *dentry, struct inode *inode,
1832                             int backref)
1833 {
1834         int err = btrfs_add_link(trans, dentry, inode, backref);
1835         if (!err) {
1836                 d_instantiate(dentry, inode);
1837                 return 0;
1838         }
1839         if (err > 0)
1840                 err = -EEXIST;
1841         return err;
1842 }
1843
1844 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1845                         int mode, dev_t rdev)
1846 {
1847         struct btrfs_trans_handle *trans;
1848         struct btrfs_root *root = BTRFS_I(dir)->root;
1849         struct inode *inode = NULL;
1850         int err;
1851         int drop_inode = 0;
1852         u64 objectid;
1853         unsigned long nr = 0;
1854
1855         if (!new_valid_dev(rdev))
1856                 return -EINVAL;
1857
1858         mutex_lock(&root->fs_info->fs_mutex);
1859         err = btrfs_check_free_space(root, 1, 0);
1860         if (err)
1861                 goto fail;
1862
1863         trans = btrfs_start_transaction(root, 1);
1864         btrfs_set_trans_block_group(trans, dir);
1865
1866         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1867         if (err) {
1868                 err = -ENOSPC;
1869                 goto out_unlock;
1870         }
1871
1872         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1873                                 dentry->d_name.len,
1874                                 dentry->d_parent->d_inode->i_ino, objectid,
1875                                 BTRFS_I(dir)->block_group, mode);
1876         err = PTR_ERR(inode);
1877         if (IS_ERR(inode))
1878                 goto out_unlock;
1879
1880         btrfs_set_trans_block_group(trans, inode);
1881         err = btrfs_add_nondir(trans, dentry, inode, 0);
1882         if (err)
1883                 drop_inode = 1;
1884         else {
1885                 inode->i_op = &btrfs_special_inode_operations;
1886                 init_special_inode(inode, inode->i_mode, rdev);
1887                 btrfs_update_inode(trans, root, inode);
1888         }
1889         dir->i_sb->s_dirt = 1;
1890         btrfs_update_inode_block_group(trans, inode);
1891         btrfs_update_inode_block_group(trans, dir);
1892 out_unlock:
1893         nr = trans->blocks_used;
1894         btrfs_end_transaction(trans, root);
1895 fail:
1896         mutex_unlock(&root->fs_info->fs_mutex);
1897
1898         if (drop_inode) {
1899                 inode_dec_link_count(inode);
1900                 iput(inode);
1901         }
1902         btrfs_btree_balance_dirty(root, nr);
1903         btrfs_throttle(root);
1904         return err;
1905 }
1906
1907 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1908                         int mode, struct nameidata *nd)
1909 {
1910         struct btrfs_trans_handle *trans;
1911         struct btrfs_root *root = BTRFS_I(dir)->root;
1912         struct inode *inode = NULL;
1913         int err;
1914         int drop_inode = 0;
1915         unsigned long nr = 0;
1916         u64 objectid;
1917
1918         mutex_lock(&root->fs_info->fs_mutex);
1919         err = btrfs_check_free_space(root, 1, 0);
1920         if (err)
1921                 goto fail;
1922         trans = btrfs_start_transaction(root, 1);
1923         btrfs_set_trans_block_group(trans, dir);
1924
1925         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1926         if (err) {
1927                 err = -ENOSPC;
1928                 goto out_unlock;
1929         }
1930
1931         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1932                                 dentry->d_name.len,
1933                                 dentry->d_parent->d_inode->i_ino,
1934                                 objectid, BTRFS_I(dir)->block_group, mode);
1935         err = PTR_ERR(inode);
1936         if (IS_ERR(inode))
1937                 goto out_unlock;
1938
1939         btrfs_set_trans_block_group(trans, inode);
1940         err = btrfs_add_nondir(trans, dentry, inode, 0);
1941         if (err)
1942                 drop_inode = 1;
1943         else {
1944                 inode->i_mapping->a_ops = &btrfs_aops;
1945                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1946                 inode->i_fop = &btrfs_file_operations;
1947                 inode->i_op = &btrfs_file_inode_operations;
1948                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1949                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1950                                      inode->i_mapping, GFP_NOFS);
1951                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1952                                      inode->i_mapping, GFP_NOFS);
1953                 BTRFS_I(inode)->delalloc_bytes = 0;
1954                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1955         }
1956         dir->i_sb->s_dirt = 1;
1957         btrfs_update_inode_block_group(trans, inode);
1958         btrfs_update_inode_block_group(trans, dir);
1959 out_unlock:
1960         nr = trans->blocks_used;
1961         btrfs_end_transaction(trans, root);
1962 fail:
1963         mutex_unlock(&root->fs_info->fs_mutex);
1964
1965         if (drop_inode) {
1966                 inode_dec_link_count(inode);
1967                 iput(inode);
1968         }
1969         btrfs_btree_balance_dirty(root, nr);
1970         btrfs_throttle(root);
1971         return err;
1972 }
1973
1974 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1975                       struct dentry *dentry)
1976 {
1977         struct btrfs_trans_handle *trans;
1978         struct btrfs_root *root = BTRFS_I(dir)->root;
1979         struct inode *inode = old_dentry->d_inode;
1980         unsigned long nr = 0;
1981         int err;
1982         int drop_inode = 0;
1983
1984         if (inode->i_nlink == 0)
1985                 return -ENOENT;
1986
1987 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1988         inode->i_nlink++;
1989 #else
1990         inc_nlink(inode);
1991 #endif
1992         mutex_lock(&root->fs_info->fs_mutex);
1993         err = btrfs_check_free_space(root, 1, 0);
1994         if (err)
1995                 goto fail;
1996         trans = btrfs_start_transaction(root, 1);
1997
1998         btrfs_set_trans_block_group(trans, dir);
1999         atomic_inc(&inode->i_count);
2000         err = btrfs_add_nondir(trans, dentry, inode, 1);
2001
2002         if (err)
2003                 drop_inode = 1;
2004
2005         dir->i_sb->s_dirt = 1;
2006         btrfs_update_inode_block_group(trans, dir);
2007         err = btrfs_update_inode(trans, root, inode);
2008
2009         if (err)
2010                 drop_inode = 1;
2011
2012         nr = trans->blocks_used;
2013         btrfs_end_transaction(trans, root);
2014 fail:
2015         mutex_unlock(&root->fs_info->fs_mutex);
2016
2017         if (drop_inode) {
2018                 inode_dec_link_count(inode);
2019                 iput(inode);
2020         }
2021         btrfs_btree_balance_dirty(root, nr);
2022         btrfs_throttle(root);
2023         return err;
2024 }
2025
2026 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2027 {
2028         struct inode *inode;
2029         struct btrfs_trans_handle *trans;
2030         struct btrfs_root *root = BTRFS_I(dir)->root;
2031         int err = 0;
2032         int drop_on_err = 0;
2033         u64 objectid;
2034         unsigned long nr = 1;
2035
2036         mutex_lock(&root->fs_info->fs_mutex);
2037         err = btrfs_check_free_space(root, 1, 0);
2038         if (err)
2039                 goto out_unlock;
2040
2041         trans = btrfs_start_transaction(root, 1);
2042         btrfs_set_trans_block_group(trans, dir);
2043
2044         if (IS_ERR(trans)) {
2045                 err = PTR_ERR(trans);
2046                 goto out_unlock;
2047         }
2048
2049         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2050         if (err) {
2051                 err = -ENOSPC;
2052                 goto out_unlock;
2053         }
2054
2055         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2056                                 dentry->d_name.len,
2057                                 dentry->d_parent->d_inode->i_ino, objectid,
2058                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
2059         if (IS_ERR(inode)) {
2060                 err = PTR_ERR(inode);
2061                 goto out_fail;
2062         }
2063
2064         drop_on_err = 1;
2065         inode->i_op = &btrfs_dir_inode_operations;
2066         inode->i_fop = &btrfs_dir_file_operations;
2067         btrfs_set_trans_block_group(trans, inode);
2068
2069         inode->i_size = 0;
2070         err = btrfs_update_inode(trans, root, inode);
2071         if (err)
2072                 goto out_fail;
2073
2074         err = btrfs_add_link(trans, dentry, inode, 0);
2075         if (err)
2076                 goto out_fail;
2077
2078         d_instantiate(dentry, inode);
2079         drop_on_err = 0;
2080         dir->i_sb->s_dirt = 1;
2081         btrfs_update_inode_block_group(trans, inode);
2082         btrfs_update_inode_block_group(trans, dir);
2083
2084 out_fail:
2085         nr = trans->blocks_used;
2086         btrfs_end_transaction(trans, root);
2087
2088 out_unlock:
2089         mutex_unlock(&root->fs_info->fs_mutex);
2090         if (drop_on_err)
2091                 iput(inode);
2092         btrfs_btree_balance_dirty(root, nr);
2093         btrfs_throttle(root);
2094         return err;
2095 }
2096
2097 static int merge_extent_mapping(struct extent_map_tree *em_tree,
2098                                 struct extent_map *existing,
2099                                 struct extent_map *em)
2100 {
2101         u64 start_diff;
2102         u64 new_end;
2103         int ret = 0;
2104         int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2105
2106         if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2107                 goto invalid;
2108
2109         if (!real_blocks && em->block_start != existing->block_start)
2110                 goto invalid;
2111
2112         new_end = max(existing->start + existing->len, em->start + em->len);
2113
2114         if (existing->start >= em->start) {
2115                 if (em->start + em->len < existing->start)
2116                         goto invalid;
2117
2118                 start_diff = existing->start - em->start;
2119                 if (real_blocks && em->block_start + start_diff !=
2120                     existing->block_start)
2121                         goto invalid;
2122
2123                 em->len = new_end - em->start;
2124
2125                 remove_extent_mapping(em_tree, existing);
2126                 /* free for the tree */
2127                 free_extent_map(existing);
2128                 ret = add_extent_mapping(em_tree, em);
2129
2130         } else if (em->start > existing->start) {
2131
2132                 if (existing->start + existing->len < em->start)
2133                         goto invalid;
2134
2135                 start_diff = em->start - existing->start;
2136                 if (real_blocks && existing->block_start + start_diff !=
2137                     em->block_start)
2138                         goto invalid;
2139
2140                 remove_extent_mapping(em_tree, existing);
2141                 em->block_start = existing->block_start;
2142                 em->start = existing->start;
2143                 em->len = new_end - existing->start;
2144                 free_extent_map(existing);
2145
2146                 ret = add_extent_mapping(em_tree, em);
2147         } else {
2148                 goto invalid;
2149         }
2150         return ret;
2151
2152 invalid:
2153         printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2154                existing->start, existing->len, existing->block_start,
2155                em->start, em->len, em->block_start);
2156         return -EIO;
2157 }
2158
2159 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2160                                     size_t pg_offset, u64 start, u64 len,
2161                                     int create)
2162 {
2163         int ret;
2164         int err = 0;
2165         u64 bytenr;
2166         u64 extent_start = 0;
2167         u64 extent_end = 0;
2168         u64 objectid = inode->i_ino;
2169         u32 found_type;
2170         struct btrfs_path *path;
2171         struct btrfs_root *root = BTRFS_I(inode)->root;
2172         struct btrfs_file_extent_item *item;
2173         struct extent_buffer *leaf;
2174         struct btrfs_key found_key;
2175         struct extent_map *em = NULL;
2176         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2177         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2178         struct btrfs_trans_handle *trans = NULL;
2179
2180         path = btrfs_alloc_path();
2181         BUG_ON(!path);
2182         mutex_lock(&root->fs_info->fs_mutex);
2183
2184 again:
2185         spin_lock(&em_tree->lock);
2186         em = lookup_extent_mapping(em_tree, start, len);
2187         spin_unlock(&em_tree->lock);
2188
2189         if (em) {
2190                 if (em->start > start) {
2191                         printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
2192                                start, len, em->start, em->len);
2193                         WARN_ON(1);
2194                 }
2195                 if (em->block_start == EXTENT_MAP_INLINE && page)
2196                         free_extent_map(em);
2197                 else
2198                         goto out;
2199         }
2200         em = alloc_extent_map(GFP_NOFS);
2201         if (!em) {
2202                 err = -ENOMEM;
2203                 goto out;
2204         }
2205
2206         em->start = EXTENT_MAP_HOLE;
2207         em->len = (u64)-1;
2208         em->bdev = inode->i_sb->s_bdev;
2209         ret = btrfs_lookup_file_extent(trans, root, path,
2210                                        objectid, start, trans != NULL);
2211         if (ret < 0) {
2212                 err = ret;
2213                 goto out;
2214         }
2215
2216         if (ret != 0) {
2217                 if (path->slots[0] == 0)
2218                         goto not_found;
2219                 path->slots[0]--;
2220         }
2221
2222         leaf = path->nodes[0];
2223         item = btrfs_item_ptr(leaf, path->slots[0],
2224                               struct btrfs_file_extent_item);
2225         /* are we inside the extent that was found? */
2226         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2227         found_type = btrfs_key_type(&found_key);
2228         if (found_key.objectid != objectid ||
2229             found_type != BTRFS_EXTENT_DATA_KEY) {
2230                 goto not_found;
2231         }
2232
2233         found_type = btrfs_file_extent_type(leaf, item);
2234         extent_start = found_key.offset;
2235         if (found_type == BTRFS_FILE_EXTENT_REG) {
2236                 extent_end = extent_start +
2237                        btrfs_file_extent_num_bytes(leaf, item);
2238                 err = 0;
2239                 if (start < extent_start || start >= extent_end) {
2240                         em->start = start;
2241                         if (start < extent_start) {
2242                                 if (start + len <= extent_start)
2243                                         goto not_found;
2244                                 em->len = extent_end - extent_start;
2245                         } else {
2246                                 em->len = len;
2247                         }
2248                         goto not_found_em;
2249                 }
2250                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2251                 if (bytenr == 0) {
2252                         em->start = extent_start;
2253                         em->len = extent_end - extent_start;
2254                         em->block_start = EXTENT_MAP_HOLE;
2255                         goto insert;
2256                 }
2257                 bytenr += btrfs_file_extent_offset(leaf, item);
2258                 em->block_start = bytenr;
2259                 em->start = extent_start;
2260                 em->len = extent_end - extent_start;
2261                 goto insert;
2262         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2263                 u64 page_start;
2264                 unsigned long ptr;
2265                 char *map;
2266                 size_t size;
2267                 size_t extent_offset;
2268                 size_t copy_size;
2269
2270                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2271                                                     path->slots[0]));
2272                 extent_end = (extent_start + size + root->sectorsize - 1) &
2273                         ~((u64)root->sectorsize - 1);
2274                 if (start < extent_start || start >= extent_end) {
2275                         em->start = start;
2276                         if (start < extent_start) {
2277                                 if (start + len <= extent_start)
2278                                         goto not_found;
2279                                 em->len = extent_end - extent_start;
2280                         } else {
2281                                 em->len = len;
2282                         }
2283                         goto not_found_em;
2284                 }
2285                 em->block_start = EXTENT_MAP_INLINE;
2286
2287                 if (!page) {
2288                         em->start = extent_start;
2289                         em->len = size;
2290                         goto out;
2291                 }
2292
2293                 page_start = page_offset(page) + pg_offset;
2294                 extent_offset = page_start - extent_start;
2295                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2296                                 size - extent_offset);
2297                 em->start = extent_start + extent_offset;
2298                 em->len = (copy_size + root->sectorsize - 1) &
2299                         ~((u64)root->sectorsize - 1);
2300                 map = kmap(page);
2301                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2302                 if (create == 0 && !PageUptodate(page)) {
2303                         read_extent_buffer(leaf, map + pg_offset, ptr,
2304                                            copy_size);
2305                         flush_dcache_page(page);
2306                 } else if (create && PageUptodate(page)) {
2307                         if (!trans) {
2308                                 kunmap(page);
2309                                 free_extent_map(em);
2310                                 em = NULL;
2311                                 btrfs_release_path(root, path);
2312                                 trans = btrfs_start_transaction(root, 1);
2313                                 goto again;
2314                         }
2315                         write_extent_buffer(leaf, map + pg_offset, ptr,
2316                                             copy_size);
2317                         btrfs_mark_buffer_dirty(leaf);
2318                 }
2319                 kunmap(page);
2320                 set_extent_uptodate(io_tree, em->start,
2321                                     extent_map_end(em) - 1, GFP_NOFS);
2322                 goto insert;
2323         } else {
2324                 printk("unkknown found_type %d\n", found_type);
2325                 WARN_ON(1);
2326         }
2327 not_found:
2328         em->start = start;
2329         em->len = len;
2330 not_found_em:
2331         em->block_start = EXTENT_MAP_HOLE;
2332 insert:
2333         btrfs_release_path(root, path);
2334         if (em->start > start || extent_map_end(em) <= start) {
2335                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2336                 err = -EIO;
2337                 goto out;
2338         }
2339
2340         err = 0;
2341         spin_lock(&em_tree->lock);
2342         ret = add_extent_mapping(em_tree, em);
2343
2344         /* it is possible that someone inserted the extent into the tree
2345          * while we had the lock dropped.  It is also possible that
2346          * an overlapping map exists in the tree
2347          */
2348         if (ret == -EEXIST) {
2349                 struct extent_map *existing;
2350                 existing = lookup_extent_mapping(em_tree, start, len);
2351                 if (!existing) {
2352                         existing = lookup_extent_mapping(em_tree, em->start,
2353                                                          em->len);
2354                         if (existing) {
2355                                 err = merge_extent_mapping(em_tree, existing,
2356                                                            em);
2357                                 free_extent_map(existing);
2358                                 if (err) {
2359                                         free_extent_map(em);
2360                                         em = NULL;
2361                                 }
2362                         } else {
2363                                 err = -EIO;
2364                                 printk("failing to insert %Lu %Lu\n",
2365                                        start, len);
2366                                 free_extent_map(em);
2367                                 em = NULL;
2368                         }
2369                 } else {
2370                         free_extent_map(em);
2371                         em = existing;
2372                 }
2373         }
2374         spin_unlock(&em_tree->lock);
2375 out:
2376         btrfs_free_path(path);
2377         if (trans) {
2378                 ret = btrfs_end_transaction(trans, root);
2379                 if (!err)
2380                         err = ret;
2381         }
2382         mutex_unlock(&root->fs_info->fs_mutex);
2383         if (err) {
2384                 free_extent_map(em);
2385                 WARN_ON(1);
2386                 return ERR_PTR(err);
2387         }
2388         return em;
2389 }
2390
2391 static int btrfs_get_block(struct inode *inode, sector_t iblock,
2392                         struct buffer_head *bh_result, int create)
2393 {
2394         struct extent_map *em;
2395         u64 start = (u64)iblock << inode->i_blkbits;
2396         struct btrfs_multi_bio *multi = NULL;
2397         struct btrfs_root *root = BTRFS_I(inode)->root;
2398         u64 len;
2399         u64 logical;
2400         u64 map_length;
2401         int ret = 0;
2402
2403         em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
2404
2405         if (!em || IS_ERR(em))
2406                 goto out;
2407
2408         if (em->start > start || em->start + em->len <= start)
2409             goto out;
2410
2411         if (em->block_start == EXTENT_MAP_INLINE) {
2412                 ret = -EINVAL;
2413                 goto out;
2414         }
2415
2416         if (em->block_start == EXTENT_MAP_HOLE ||
2417             em->block_start == EXTENT_MAP_DELALLOC) {
2418                 goto out;
2419         }
2420
2421         len = em->start + em->len - start;
2422         len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
2423
2424         logical = start - em->start;
2425         logical = em->block_start + logical;
2426
2427         map_length = len;
2428         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2429                               logical, &map_length, &multi, 0);
2430         BUG_ON(ret);
2431         bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
2432         bh_result->b_size = min(map_length, len);
2433         bh_result->b_bdev = multi->stripes[0].dev->bdev;
2434         set_buffer_mapped(bh_result);
2435         kfree(multi);
2436 out:
2437         free_extent_map(em);
2438         return ret;
2439 }
2440
2441 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
2442                         const struct iovec *iov, loff_t offset,
2443                         unsigned long nr_segs)
2444 {
2445         struct file *file = iocb->ki_filp;
2446         struct inode *inode = file->f_mapping->host;
2447
2448         if (rw == WRITE)
2449                 return -EINVAL;
2450
2451         return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2452                                   offset, nr_segs, btrfs_get_block, NULL);
2453 }
2454
2455 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2456 {
2457         return extent_bmap(mapping, iblock, btrfs_get_extent);
2458 }
2459
2460 int btrfs_readpage(struct file *file, struct page *page)
2461 {
2462         struct extent_io_tree *tree;
2463         tree = &BTRFS_I(page->mapping->host)->io_tree;
2464         return extent_read_full_page(tree, page, btrfs_get_extent);
2465 }
2466
2467 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2468 {
2469         struct extent_io_tree *tree;
2470
2471
2472         if (current->flags & PF_MEMALLOC) {
2473                 redirty_page_for_writepage(wbc, page);
2474                 unlock_page(page);
2475                 return 0;
2476         }
2477         tree = &BTRFS_I(page->mapping->host)->io_tree;
2478         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2479 }
2480
2481 static int btrfs_writepages(struct address_space *mapping,
2482                             struct writeback_control *wbc)
2483 {
2484         struct extent_io_tree *tree;
2485         tree = &BTRFS_I(mapping->host)->io_tree;
2486         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2487 }
2488
2489 static int
2490 btrfs_readpages(struct file *file, struct address_space *mapping,
2491                 struct list_head *pages, unsigned nr_pages)
2492 {
2493         struct extent_io_tree *tree;
2494         tree = &BTRFS_I(mapping->host)->io_tree;
2495         return extent_readpages(tree, mapping, pages, nr_pages,
2496                                 btrfs_get_extent);
2497 }
2498
2499 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2500 {
2501         struct extent_io_tree *tree;
2502         struct extent_map_tree *map;
2503         int ret;
2504
2505         tree = &BTRFS_I(page->mapping->host)->io_tree;
2506         map = &BTRFS_I(page->mapping->host)->extent_tree;
2507         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2508         if (ret == 1) {
2509                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2510                 ClearPagePrivate(page);
2511                 set_page_private(page, 0);
2512                 page_cache_release(page);
2513         }
2514         return ret;
2515 }
2516
2517 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2518 {
2519         struct extent_io_tree *tree;
2520
2521         tree = &BTRFS_I(page->mapping->host)->io_tree;
2522         extent_invalidatepage(tree, page, offset);
2523         btrfs_releasepage(page, GFP_NOFS);
2524         if (PagePrivate(page)) {
2525                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2526                 ClearPagePrivate(page);
2527                 set_page_private(page, 0);
2528                 page_cache_release(page);
2529         }
2530 }
2531
2532 /*
2533  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2534  * called from a page fault handler when a page is first dirtied. Hence we must
2535  * be careful to check for EOF conditions here. We set the page up correctly
2536  * for a written page which means we get ENOSPC checking when writing into
2537  * holes and correct delalloc and unwritten extent mapping on filesystems that
2538  * support these features.
2539  *
2540  * We are not allowed to take the i_mutex here so we have to play games to
2541  * protect against truncate races as the page could now be beyond EOF.  Because
2542  * vmtruncate() writes the inode size before removing pages, once we have the
2543  * page lock we can determine safely if the page is beyond EOF. If it is not
2544  * beyond EOF, then the page is guaranteed safe against truncation until we
2545  * unlock the page.
2546  */
2547 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2548 {
2549         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2550         struct btrfs_root *root = BTRFS_I(inode)->root;
2551         unsigned long end;
2552         loff_t size;
2553         int ret;
2554         u64 page_start;
2555
2556         mutex_lock(&root->fs_info->fs_mutex);
2557         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2558         mutex_unlock(&root->fs_info->fs_mutex);
2559         if (ret)
2560                 goto out;
2561
2562         ret = -EINVAL;
2563
2564         lock_page(page);
2565         wait_on_page_writeback(page);
2566         size = i_size_read(inode);
2567         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2568
2569         if ((page->mapping != inode->i_mapping) ||
2570             (page_start > size)) {
2571                 /* page got truncated out from underneath us */
2572                 goto out_unlock;
2573         }
2574
2575         /* page is wholly or partially inside EOF */
2576         if (page_start + PAGE_CACHE_SIZE > size)
2577                 end = size & ~PAGE_CACHE_MASK;
2578         else
2579                 end = PAGE_CACHE_SIZE;
2580
2581         ret = btrfs_cow_one_page(inode, page, end);
2582
2583 out_unlock:
2584         unlock_page(page);
2585 out:
2586         return ret;
2587 }
2588
2589 static void btrfs_truncate(struct inode *inode)
2590 {
2591         struct btrfs_root *root = BTRFS_I(inode)->root;
2592         int ret;
2593         struct btrfs_trans_handle *trans;
2594         unsigned long nr;
2595
2596         if (!S_ISREG(inode->i_mode))
2597                 return;
2598         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2599                 return;
2600
2601         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2602
2603         mutex_lock(&root->fs_info->fs_mutex);
2604         trans = btrfs_start_transaction(root, 1);
2605         btrfs_set_trans_block_group(trans, inode);
2606
2607         /* FIXME, add redo link to tree so we don't leak on crash */
2608         ret = btrfs_truncate_in_trans(trans, root, inode,
2609                                       BTRFS_EXTENT_DATA_KEY);
2610         btrfs_update_inode(trans, root, inode);
2611         nr = trans->blocks_used;
2612
2613         ret = btrfs_end_transaction(trans, root);
2614         BUG_ON(ret);
2615         mutex_unlock(&root->fs_info->fs_mutex);
2616         btrfs_btree_balance_dirty(root, nr);
2617         btrfs_throttle(root);
2618 }
2619
2620 static int noinline create_subvol(struct btrfs_root *root, char *name,
2621                                   int namelen)
2622 {
2623         struct btrfs_trans_handle *trans;
2624         struct btrfs_key key;
2625         struct btrfs_root_item root_item;
2626         struct btrfs_inode_item *inode_item;
2627         struct extent_buffer *leaf;
2628         struct btrfs_root *new_root = root;
2629         struct inode *inode;
2630         struct inode *dir;
2631         int ret;
2632         int err;
2633         u64 objectid;
2634         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2635         unsigned long nr = 1;
2636
2637         mutex_lock(&root->fs_info->fs_mutex);
2638         ret = btrfs_check_free_space(root, 1, 0);
2639         if (ret)
2640                 goto fail_commit;
2641
2642         trans = btrfs_start_transaction(root, 1);
2643         BUG_ON(!trans);
2644
2645         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2646                                        0, &objectid);
2647         if (ret)
2648                 goto fail;
2649
2650         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2651                                         objectid, trans->transid, 0, 0,
2652                                         0, 0);
2653         if (IS_ERR(leaf))
2654                 return PTR_ERR(leaf);
2655
2656         btrfs_set_header_nritems(leaf, 0);
2657         btrfs_set_header_level(leaf, 0);
2658         btrfs_set_header_bytenr(leaf, leaf->start);
2659         btrfs_set_header_generation(leaf, trans->transid);
2660         btrfs_set_header_owner(leaf, objectid);
2661
2662         write_extent_buffer(leaf, root->fs_info->fsid,
2663                             (unsigned long)btrfs_header_fsid(leaf),
2664                             BTRFS_FSID_SIZE);
2665         btrfs_mark_buffer_dirty(leaf);
2666
2667         inode_item = &root_item.inode;
2668         memset(inode_item, 0, sizeof(*inode_item));
2669         inode_item->generation = cpu_to_le64(1);
2670         inode_item->size = cpu_to_le64(3);
2671         inode_item->nlink = cpu_to_le32(1);
2672         inode_item->nblocks = cpu_to_le64(1);
2673         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2674
2675         btrfs_set_root_bytenr(&root_item, leaf->start);
2676         btrfs_set_root_level(&root_item, 0);
2677         btrfs_set_root_refs(&root_item, 1);
2678         btrfs_set_root_used(&root_item, 0);
2679
2680         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2681         root_item.drop_level = 0;
2682
2683         free_extent_buffer(leaf);
2684         leaf = NULL;
2685
2686         btrfs_set_root_dirid(&root_item, new_dirid);
2687
2688         key.objectid = objectid;
2689         key.offset = 1;
2690         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2691         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2692                                 &root_item);
2693         if (ret)
2694                 goto fail;
2695
2696         /*
2697          * insert the directory item
2698          */
2699         key.offset = (u64)-1;
2700         dir = root->fs_info->sb->s_root->d_inode;
2701         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2702                                     name, namelen, dir->i_ino, &key,
2703                                     BTRFS_FT_DIR);
2704         if (ret)
2705                 goto fail;
2706
2707         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2708                              name, namelen, objectid,
2709                              root->fs_info->sb->s_root->d_inode->i_ino);
2710         if (ret)
2711                 goto fail;
2712
2713         ret = btrfs_commit_transaction(trans, root);
2714         if (ret)
2715                 goto fail_commit;
2716
2717         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2718         BUG_ON(!new_root);
2719
2720         trans = btrfs_start_transaction(new_root, 1);
2721         BUG_ON(!trans);
2722
2723         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2724                                 new_dirid,
2725                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2726         if (IS_ERR(inode))
2727                 goto fail;
2728         inode->i_op = &btrfs_dir_inode_operations;
2729         inode->i_fop = &btrfs_dir_file_operations;
2730         new_root->inode = inode;
2731
2732         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2733                                      new_dirid);
2734         inode->i_nlink = 1;
2735         inode->i_size = 0;
2736         ret = btrfs_update_inode(trans, new_root, inode);
2737         if (ret)
2738                 goto fail;
2739 fail:
2740         nr = trans->blocks_used;
2741         err = btrfs_commit_transaction(trans, new_root);
2742         if (err && !ret)
2743                 ret = err;
2744 fail_commit:
2745         mutex_unlock(&root->fs_info->fs_mutex);
2746         btrfs_btree_balance_dirty(root, nr);
2747         btrfs_throttle(root);
2748         return ret;
2749 }
2750
2751 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2752 {
2753         struct btrfs_pending_snapshot *pending_snapshot;
2754         struct btrfs_trans_handle *trans;
2755         int ret;
2756         int err;
2757         unsigned long nr = 0;
2758
2759         if (!root->ref_cows)
2760                 return -EINVAL;
2761
2762         mutex_lock(&root->fs_info->fs_mutex);
2763         ret = btrfs_check_free_space(root, 1, 0);
2764         if (ret)
2765                 goto fail_unlock;
2766
2767         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2768         if (!pending_snapshot) {
2769                 ret = -ENOMEM;
2770                 goto fail_unlock;
2771         }
2772         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2773         if (!pending_snapshot->name) {
2774                 ret = -ENOMEM;
2775                 kfree(pending_snapshot);
2776                 goto fail_unlock;
2777         }
2778         memcpy(pending_snapshot->name, name, namelen);
2779         pending_snapshot->name[namelen] = '\0';
2780         trans = btrfs_start_transaction(root, 1);
2781         BUG_ON(!trans);
2782         pending_snapshot->root = root;
2783         list_add(&pending_snapshot->list,
2784                  &trans->transaction->pending_snapshots);
2785         ret = btrfs_update_inode(trans, root, root->inode);
2786         err = btrfs_commit_transaction(trans, root);
2787
2788 fail_unlock:
2789         mutex_unlock(&root->fs_info->fs_mutex);
2790         btrfs_btree_balance_dirty(root, nr);
2791         btrfs_throttle(root);
2792         return ret;
2793 }
2794
2795 unsigned long btrfs_force_ra(struct address_space *mapping,
2796                               struct file_ra_state *ra, struct file *file,
2797                               pgoff_t offset, pgoff_t last_index)
2798 {
2799         pgoff_t req_size;
2800
2801 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2802         req_size = last_index - offset + 1;
2803         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2804         return offset;
2805 #else
2806         req_size = min(last_index - offset + 1, (pgoff_t)128);
2807         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2808         return offset + req_size;
2809 #endif
2810 }
2811
2812 int btrfs_defrag_file(struct file *file) {
2813         struct inode *inode = fdentry(file)->d_inode;
2814         struct btrfs_root *root = BTRFS_I(inode)->root;
2815         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2816         struct page *page;
2817         unsigned long last_index;
2818         unsigned long ra_index = 0;
2819         u64 page_start;
2820         u64 page_end;
2821         unsigned long i;
2822         int ret;
2823
2824         mutex_lock(&root->fs_info->fs_mutex);
2825         ret = btrfs_check_free_space(root, inode->i_size, 0);
2826         mutex_unlock(&root->fs_info->fs_mutex);
2827         if (ret)
2828                 return -ENOSPC;
2829
2830         mutex_lock(&inode->i_mutex);
2831         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2832         for (i = 0; i <= last_index; i++) {
2833                 if (i == ra_index) {
2834                         ra_index = btrfs_force_ra(inode->i_mapping,
2835                                                   &file->f_ra,
2836                                                   file, ra_index, last_index);
2837                 }
2838                 page = grab_cache_page(inode->i_mapping, i);
2839                 if (!page)
2840                         goto out_unlock;
2841                 if (!PageUptodate(page)) {
2842                         btrfs_readpage(NULL, page);
2843                         lock_page(page);
2844                         if (!PageUptodate(page)) {
2845                                 unlock_page(page);
2846                                 page_cache_release(page);
2847                                 goto out_unlock;
2848                         }
2849                 }
2850                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2851                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2852
2853                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2854                 set_extent_delalloc(io_tree, page_start,
2855                                     page_end, GFP_NOFS);
2856
2857                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2858                 set_page_dirty(page);
2859                 unlock_page(page);
2860                 page_cache_release(page);
2861                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2862         }
2863
2864 out_unlock:
2865         mutex_unlock(&inode->i_mutex);
2866         return 0;
2867 }
2868
2869 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2870 {
2871         u64 new_size;
2872         u64 old_size;
2873         struct btrfs_ioctl_vol_args *vol_args;
2874         struct btrfs_trans_handle *trans;
2875         char *sizestr;
2876         int ret = 0;
2877         int namelen;
2878         int mod = 0;
2879
2880         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2881
2882         if (!vol_args)
2883                 return -ENOMEM;
2884
2885         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2886                 ret = -EFAULT;
2887                 goto out;
2888         }
2889         namelen = strlen(vol_args->name);
2890         if (namelen > BTRFS_VOL_NAME_MAX) {
2891                 ret = -EINVAL;
2892                 goto out;
2893         }
2894
2895         sizestr = vol_args->name;
2896         if (!strcmp(sizestr, "max"))
2897                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2898         else {
2899                 if (sizestr[0] == '-') {
2900                         mod = -1;
2901                         sizestr++;
2902                 } else if (sizestr[0] == '+') {
2903                         mod = 1;
2904                         sizestr++;
2905                 }
2906                 new_size = btrfs_parse_size(sizestr);
2907                 if (new_size == 0) {
2908                         ret = -EINVAL;
2909                         goto out;
2910                 }
2911         }
2912
2913         mutex_lock(&root->fs_info->fs_mutex);
2914         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2915
2916         if (mod < 0) {
2917                 if (new_size > old_size) {
2918                         ret = -EINVAL;
2919                         goto out_unlock;
2920                 }
2921                 new_size = old_size - new_size;
2922         } else if (mod > 0) {
2923                 new_size = old_size + new_size;
2924         }
2925
2926         if (new_size < 256 * 1024 * 1024) {
2927                 ret = -EINVAL;
2928                 goto out_unlock;
2929         }
2930         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2931                 ret = -EFBIG;
2932                 goto out_unlock;
2933         }
2934
2935         do_div(new_size, root->sectorsize);
2936         new_size *= root->sectorsize;
2937
2938 printk("new size is %Lu\n", new_size);
2939         if (new_size > old_size) {
2940                 trans = btrfs_start_transaction(root, 1);
2941                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2942                 btrfs_commit_transaction(trans, root);
2943         } else {
2944                 ret = btrfs_shrink_extent_tree(root, new_size);
2945         }
2946
2947 out_unlock:
2948         mutex_unlock(&root->fs_info->fs_mutex);
2949 out:
2950         kfree(vol_args);
2951         return ret;
2952 }
2953
2954 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2955                                             void __user *arg)
2956 {
2957         struct btrfs_ioctl_vol_args *vol_args;
2958         struct btrfs_dir_item *di;
2959         struct btrfs_path *path;
2960         u64 root_dirid;
2961         int namelen;
2962         int ret;
2963
2964         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2965
2966         if (!vol_args)
2967                 return -ENOMEM;
2968
2969         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2970                 ret = -EFAULT;
2971                 goto out;
2972         }
2973
2974         namelen = strlen(vol_args->name);
2975         if (namelen > BTRFS_VOL_NAME_MAX) {
2976                 ret = -EINVAL;
2977                 goto out;
2978         }
2979         if (strchr(vol_args->name, '/')) {
2980                 ret = -EINVAL;
2981                 goto out;
2982         }
2983
2984         path = btrfs_alloc_path();
2985         if (!path) {
2986                 ret = -ENOMEM;
2987                 goto out;
2988         }
2989
2990         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2991         mutex_lock(&root->fs_info->fs_mutex);
2992         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2993                             path, root_dirid,
2994                             vol_args->name, namelen, 0);
2995         mutex_unlock(&root->fs_info->fs_mutex);
2996         btrfs_free_path(path);
2997
2998         if (di && !IS_ERR(di)) {
2999                 ret = -EEXIST;
3000                 goto out;
3001         }
3002
3003         if (IS_ERR(di)) {
3004                 ret = PTR_ERR(di);
3005                 goto out;
3006         }
3007
3008         if (root == root->fs_info->tree_root)
3009                 ret = create_subvol(root, vol_args->name, namelen);
3010         else
3011                 ret = create_snapshot(root, vol_args->name, namelen);
3012 out:
3013         kfree(vol_args);
3014         return ret;
3015 }
3016
3017 static int btrfs_ioctl_defrag(struct file *file)
3018 {
3019         struct inode *inode = fdentry(file)->d_inode;
3020         struct btrfs_root *root = BTRFS_I(inode)->root;
3021
3022         switch (inode->i_mode & S_IFMT) {
3023         case S_IFDIR:
3024                 mutex_lock(&root->fs_info->fs_mutex);
3025                 btrfs_defrag_root(root, 0);
3026                 btrfs_defrag_root(root->fs_info->extent_root, 0);
3027                 mutex_unlock(&root->fs_info->fs_mutex);
3028                 break;
3029         case S_IFREG:
3030                 btrfs_defrag_file(file);
3031                 break;
3032         }
3033
3034         return 0;
3035 }
3036
3037 long btrfs_ioctl(struct file *file, unsigned int
3038                 cmd, unsigned long arg)
3039 {
3040         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3041
3042         switch (cmd) {
3043         case BTRFS_IOC_SNAP_CREATE:
3044                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
3045         case BTRFS_IOC_DEFRAG:
3046                 return btrfs_ioctl_defrag(file);
3047         case BTRFS_IOC_RESIZE:
3048                 return btrfs_ioctl_resize(root, (void __user *)arg);
3049         }
3050
3051         return -ENOTTY;
3052 }
3053
3054 /*
3055  * Called inside transaction, so use GFP_NOFS
3056  */
3057 struct inode *btrfs_alloc_inode(struct super_block *sb)
3058 {
3059         struct btrfs_inode *ei;
3060
3061         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
3062         if (!ei)
3063                 return NULL;
3064         ei->last_trans = 0;
3065         ei->ordered_trans = 0;
3066         return &ei->vfs_inode;
3067 }
3068
3069 void btrfs_destroy_inode(struct inode *inode)
3070 {
3071         WARN_ON(!list_empty(&inode->i_dentry));
3072         WARN_ON(inode->i_data.nrpages);
3073
3074         btrfs_drop_extent_cache(inode, 0, (u64)-1);
3075         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
3076 }
3077
3078 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3079 static void init_once(struct kmem_cache * cachep, void *foo)
3080 #else
3081 static void init_once(void * foo, struct kmem_cache * cachep,
3082                       unsigned long flags)
3083 #endif
3084 {
3085         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
3086
3087         inode_init_once(&ei->vfs_inode);
3088 }
3089
3090 void btrfs_destroy_cachep(void)
3091 {
3092         if (btrfs_inode_cachep)
3093                 kmem_cache_destroy(btrfs_inode_cachep);
3094         if (btrfs_trans_handle_cachep)
3095                 kmem_cache_destroy(btrfs_trans_handle_cachep);
3096         if (btrfs_transaction_cachep)
3097                 kmem_cache_destroy(btrfs_transaction_cachep);
3098         if (btrfs_bit_radix_cachep)
3099                 kmem_cache_destroy(btrfs_bit_radix_cachep);
3100         if (btrfs_path_cachep)
3101                 kmem_cache_destroy(btrfs_path_cachep);
3102 }
3103
3104 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
3105                                        unsigned long extra_flags,
3106 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3107                                        void (*ctor)(struct kmem_cache *, void *)
3108 #else
3109                                        void (*ctor)(void *, struct kmem_cache *,
3110                                                     unsigned long)
3111 #endif
3112                                      )
3113 {
3114         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
3115                                  SLAB_MEM_SPREAD | extra_flags), ctor
3116 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
3117                                  ,NULL
3118 #endif
3119                                 );
3120 }
3121
3122 int btrfs_init_cachep(void)
3123 {
3124         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
3125                                           sizeof(struct btrfs_inode),
3126                                           0, init_once);
3127         if (!btrfs_inode_cachep)
3128                 goto fail;
3129         btrfs_trans_handle_cachep =
3130                         btrfs_cache_create("btrfs_trans_handle_cache",
3131                                            sizeof(struct btrfs_trans_handle),
3132                                            0, NULL);
3133         if (!btrfs_trans_handle_cachep)
3134                 goto fail;
3135         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3136                                              sizeof(struct btrfs_transaction),
3137                                              0, NULL);
3138         if (!btrfs_transaction_cachep)
3139                 goto fail;
3140         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3141                                          sizeof(struct btrfs_path),
3142                                          0, NULL);
3143         if (!btrfs_path_cachep)
3144                 goto fail;
3145         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3146                                               SLAB_DESTROY_BY_RCU, NULL);
3147         if (!btrfs_bit_radix_cachep)
3148                 goto fail;
3149         return 0;
3150 fail:
3151         btrfs_destroy_cachep();
3152         return -ENOMEM;
3153 }
3154
3155 static int btrfs_getattr(struct vfsmount *mnt,
3156                          struct dentry *dentry, struct kstat *stat)
3157 {
3158         struct inode *inode = dentry->d_inode;
3159         generic_fillattr(inode, stat);
3160         stat->blksize = PAGE_CACHE_SIZE;
3161         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
3162         return 0;
3163 }
3164
3165 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3166                            struct inode * new_dir,struct dentry *new_dentry)
3167 {
3168         struct btrfs_trans_handle *trans;
3169         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3170         struct inode *new_inode = new_dentry->d_inode;
3171         struct inode *old_inode = old_dentry->d_inode;
3172         struct timespec ctime = CURRENT_TIME;
3173         struct btrfs_path *path;
3174         int ret;
3175
3176         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3177             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3178                 return -ENOTEMPTY;
3179         }
3180
3181         mutex_lock(&root->fs_info->fs_mutex);
3182         ret = btrfs_check_free_space(root, 1, 0);
3183         if (ret)
3184                 goto out_unlock;
3185
3186         trans = btrfs_start_transaction(root, 1);
3187
3188         btrfs_set_trans_block_group(trans, new_dir);
3189         path = btrfs_alloc_path();
3190         if (!path) {
3191                 ret = -ENOMEM;
3192                 goto out_fail;
3193         }
3194
3195         old_dentry->d_inode->i_nlink++;
3196         old_dir->i_ctime = old_dir->i_mtime = ctime;
3197         new_dir->i_ctime = new_dir->i_mtime = ctime;
3198         old_inode->i_ctime = ctime;
3199
3200         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
3201         if (ret)
3202                 goto out_fail;
3203
3204         if (new_inode) {
3205                 new_inode->i_ctime = CURRENT_TIME;
3206                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
3207                 if (ret)
3208                         goto out_fail;
3209         }
3210         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
3211         if (ret)
3212                 goto out_fail;
3213
3214 out_fail:
3215         btrfs_free_path(path);
3216         btrfs_end_transaction(trans, root);
3217 out_unlock:
3218         mutex_unlock(&root->fs_info->fs_mutex);
3219         return ret;
3220 }
3221
3222 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3223                          const char *symname)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_root *root = BTRFS_I(dir)->root;
3227         struct btrfs_path *path;
3228         struct btrfs_key key;
3229         struct inode *inode = NULL;
3230         int err;
3231         int drop_inode = 0;
3232         u64 objectid;
3233         int name_len;
3234         int datasize;
3235         unsigned long ptr;
3236         struct btrfs_file_extent_item *ei;
3237         struct extent_buffer *leaf;
3238         unsigned long nr = 0;
3239
3240         name_len = strlen(symname) + 1;
3241         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3242                 return -ENAMETOOLONG;
3243
3244         mutex_lock(&root->fs_info->fs_mutex);
3245         err = btrfs_check_free_space(root, 1, 0);
3246         if (err)
3247                 goto out_fail;
3248
3249         trans = btrfs_start_transaction(root, 1);
3250         btrfs_set_trans_block_group(trans, dir);
3251
3252         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3253         if (err) {
3254                 err = -ENOSPC;
3255                 goto out_unlock;
3256         }
3257
3258         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
3259                                 dentry->d_name.len,
3260                                 dentry->d_parent->d_inode->i_ino, objectid,
3261                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
3262         err = PTR_ERR(inode);
3263         if (IS_ERR(inode))
3264                 goto out_unlock;
3265
3266         btrfs_set_trans_block_group(trans, inode);
3267         err = btrfs_add_nondir(trans, dentry, inode, 0);
3268         if (err)
3269                 drop_inode = 1;
3270         else {
3271                 inode->i_mapping->a_ops = &btrfs_aops;
3272                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3273                 inode->i_fop = &btrfs_file_operations;
3274                 inode->i_op = &btrfs_file_inode_operations;
3275                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3276                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3277                                      inode->i_mapping, GFP_NOFS);
3278                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3279                                      inode->i_mapping, GFP_NOFS);
3280                 BTRFS_I(inode)->delalloc_bytes = 0;
3281                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3282         }
3283         dir->i_sb->s_dirt = 1;
3284         btrfs_update_inode_block_group(trans, inode);
3285         btrfs_update_inode_block_group(trans, dir);
3286         if (drop_inode)
3287                 goto out_unlock;
3288
3289         path = btrfs_alloc_path();
3290         BUG_ON(!path);
3291         key.objectid = inode->i_ino;
3292         key.offset = 0;
3293         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3294         datasize = btrfs_file_extent_calc_inline_size(name_len);
3295         err = btrfs_insert_empty_item(trans, root, path, &key,
3296                                       datasize);
3297         if (err) {
3298                 drop_inode = 1;
3299                 goto out_unlock;
3300         }
3301         leaf = path->nodes[0];
3302         ei = btrfs_item_ptr(leaf, path->slots[0],
3303                             struct btrfs_file_extent_item);
3304         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3305         btrfs_set_file_extent_type(leaf, ei,
3306                                    BTRFS_FILE_EXTENT_INLINE);
3307         ptr = btrfs_file_extent_inline_start(ei);
3308         write_extent_buffer(leaf, symname, ptr, name_len);
3309         btrfs_mark_buffer_dirty(leaf);
3310         btrfs_free_path(path);
3311
3312         inode->i_op = &btrfs_symlink_inode_operations;
3313         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3314         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3315         inode->i_size = name_len - 1;
3316         err = btrfs_update_inode(trans, root, inode);
3317         if (err)
3318                 drop_inode = 1;
3319
3320 out_unlock:
3321         nr = trans->blocks_used;
3322         btrfs_end_transaction(trans, root);
3323 out_fail:
3324         mutex_unlock(&root->fs_info->fs_mutex);
3325         if (drop_inode) {
3326                 inode_dec_link_count(inode);
3327                 iput(inode);
3328         }
3329         btrfs_btree_balance_dirty(root, nr);
3330         btrfs_throttle(root);
3331         return err;
3332 }
3333
3334 static int btrfs_permission(struct inode *inode, int mask,
3335                             struct nameidata *nd)
3336 {
3337         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3338                 return -EACCES;
3339         return generic_permission(inode, mask, NULL);
3340 }
3341
3342 static struct inode_operations btrfs_dir_inode_operations = {
3343         .lookup         = btrfs_lookup,
3344         .create         = btrfs_create,
3345         .unlink         = btrfs_unlink,
3346         .link           = btrfs_link,
3347         .mkdir          = btrfs_mkdir,
3348         .rmdir          = btrfs_rmdir,
3349         .rename         = btrfs_rename,
3350         .symlink        = btrfs_symlink,
3351         .setattr        = btrfs_setattr,
3352         .mknod          = btrfs_mknod,
3353         .setxattr       = generic_setxattr,
3354         .getxattr       = generic_getxattr,
3355         .listxattr      = btrfs_listxattr,
3356         .removexattr    = generic_removexattr,
3357         .permission     = btrfs_permission,
3358 };
3359 static struct inode_operations btrfs_dir_ro_inode_operations = {
3360         .lookup         = btrfs_lookup,
3361         .permission     = btrfs_permission,
3362 };
3363 static struct file_operations btrfs_dir_file_operations = {
3364         .llseek         = generic_file_llseek,
3365         .read           = generic_read_dir,
3366         .readdir        = btrfs_readdir,
3367         .unlocked_ioctl = btrfs_ioctl,
3368 #ifdef CONFIG_COMPAT
3369         .compat_ioctl   = btrfs_ioctl,
3370 #endif
3371 };
3372
3373 static struct extent_io_ops btrfs_extent_io_ops = {
3374         .fill_delalloc = run_delalloc_range,
3375         .submit_bio_hook = btrfs_submit_bio_hook,
3376         .merge_bio_hook = btrfs_merge_bio_hook,
3377         .readpage_io_hook = btrfs_readpage_io_hook,
3378         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3379         .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
3380         .set_bit_hook = btrfs_set_bit_hook,
3381         .clear_bit_hook = btrfs_clear_bit_hook,
3382 };
3383
3384 static struct address_space_operations btrfs_aops = {
3385         .readpage       = btrfs_readpage,
3386         .writepage      = btrfs_writepage,
3387         .writepages     = btrfs_writepages,
3388         .readpages      = btrfs_readpages,
3389         .sync_page      = block_sync_page,
3390         .bmap           = btrfs_bmap,
3391         .direct_IO      = btrfs_direct_IO,
3392         .invalidatepage = btrfs_invalidatepage,
3393         .releasepage    = btrfs_releasepage,
3394         .set_page_dirty = __set_page_dirty_nobuffers,
3395 };
3396
3397 static struct address_space_operations btrfs_symlink_aops = {
3398         .readpage       = btrfs_readpage,
3399         .writepage      = btrfs_writepage,
3400         .invalidatepage = btrfs_invalidatepage,
3401         .releasepage    = btrfs_releasepage,
3402 };
3403
3404 static struct inode_operations btrfs_file_inode_operations = {
3405         .truncate       = btrfs_truncate,
3406         .getattr        = btrfs_getattr,
3407         .setattr        = btrfs_setattr,
3408         .setxattr       = generic_setxattr,
3409         .getxattr       = generic_getxattr,
3410         .listxattr      = btrfs_listxattr,
3411         .removexattr    = generic_removexattr,
3412         .permission     = btrfs_permission,
3413 };
3414 static struct inode_operations btrfs_special_inode_operations = {
3415         .getattr        = btrfs_getattr,
3416         .setattr        = btrfs_setattr,
3417         .permission     = btrfs_permission,
3418 };
3419 static struct inode_operations btrfs_symlink_inode_operations = {
3420         .readlink       = generic_readlink,
3421         .follow_link    = page_follow_link_light,
3422         .put_link       = page_put_link,
3423         .permission     = btrfs_permission,
3424 };