fs/btrfs/inode.c

   1 /*
   2  * Copyright (C) 2007 Oracle.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/kernel.h>
  20 #include <linux/bio.h>
  21 #include <linux/buffer_head.h>
  22 #include <linux/file.h>
  23 #include <linux/fs.h>
  24 #include <linux/pagemap.h>
  25 #include <linux/highmem.h>
  26 #include <linux/time.h>
  27 #include <linux/init.h>
  28 #include <linux/string.h>
  29 #include <linux/smp_lock.h>
  30 #include <linux/backing-dev.h>
  31 #include <linux/mpage.h>
  32 #include <linux/swap.h>
  33 #include <linux/writeback.h>
  34 #include <linux/statfs.h>
  35 #include <linux/compat.h>
  36 #include <linux/bit_spinlock.h>
  37 #include <linux/version.h>
  38 #include <linux/xattr.h>
  39 #include <linux/posix_acl.h>
  40 #include "ctree.h"
  41 #include "disk-io.h"
  42 #include "transaction.h"
  43 #include "btrfs_inode.h"
  44 #include "ioctl.h"
  45 #include "print-tree.h"
  46 #include "volumes.h"
  47 #include "ordered-data.h"
  48 #include "xattr.h"
  49 #include "compat.h"
  50 #include "tree-log.h"
  51
  52 struct btrfs_iget_args {
  53         u64 ino;
  54         struct btrfs_root *root;
  55 };
  56
  57 static struct inode_operations btrfs_dir_inode_operations;
  58 static struct inode_operations btrfs_symlink_inode_operations;
  59 static struct inode_operations btrfs_dir_ro_inode_operations;
  60 static struct inode_operations btrfs_special_inode_operations;
  61 static struct inode_operations btrfs_file_inode_operations;
  62 static struct address_space_operations btrfs_aops;
  63 static struct address_space_operations btrfs_symlink_aops;
  64 static struct file_operations btrfs_dir_file_operations;
  65 static struct extent_io_ops btrfs_extent_io_ops;
  66
  67 static struct kmem_cache *btrfs_inode_cachep;
  68 struct kmem_cache *btrfs_trans_handle_cachep;
  69 struct kmem_cache *btrfs_transaction_cachep;
  70 struct kmem_cache *btrfs_bit_radix_cachep;
  71 struct kmem_cache *btrfs_path_cachep;
  72
  73 #define S_SHIFT 12
  74 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
  75         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
  76         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
  77         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
  78         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
  79         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
  80         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
  81         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
  82 };
  83
  84 static void btrfs_truncate(struct inode *inode);
  85
  86 /*
  87  * a very lame attempt at stopping writes when the FS is 85% full.  There
  88  * are countless ways this is incorrect, but it is better than nothing.
  89  */
  90 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
  91                            int for_del)
  92 {
  93         u64 total;
  94         u64 used;
  95         u64 thresh;
  96         unsigned long flags;
  97         int ret = 0;
  98
  99         spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
 100         total = btrfs_super_total_bytes(&root->fs_info->super_copy);
 101         used = btrfs_super_bytes_used(&root->fs_info->super_copy);
 102         if (for_del)
 103                 thresh = total * 90;
 104         else
 105                 thresh = total * 85;
 106
 107         do_div(thresh, 100);
 108
 109         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
 110                 ret = -ENOSPC;
 111         spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
 112         return ret;
 113 }
 114
 115 /*
 116  * when extent_io.c finds a delayed allocation range in the file,
 117  * the call backs end up in this code.  The basic idea is to
 118  * allocate extents on disk for the range, and create ordered data structs
 119  * in ram to track those extents.
 120  */
 121 static int cow_file_range(struct inode *inode, u64 start, u64 end)
 122 {
 123         struct btrfs_root *root = BTRFS_I(inode)->root;
 124         struct btrfs_trans_handle *trans;
 125         u64 alloc_hint = 0;
 126         u64 num_bytes;
 127         u64 cur_alloc_size;
 128         u64 blocksize = root->sectorsize;
 129         u64 orig_num_bytes;
 130         struct btrfs_key ins;
 131         struct extent_map *em;
 132         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 133         int ret = 0;
 134
 135         trans = btrfs_join_transaction(root, 1);
 136         BUG_ON(!trans);
 137         btrfs_set_trans_block_group(trans, inode);
 138
 139         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
 140         num_bytes = max(blocksize,  num_bytes);
 141         orig_num_bytes = num_bytes;
 142
 143         if (alloc_hint == EXTENT_MAP_INLINE)
 144                 goto out;
 145
 146         BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
 147         mutex_lock(&BTRFS_I(inode)->extent_mutex);
 148         btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
 149         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 150
 151         while(num_bytes > 0) {
 152                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
 153                 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
 154                                            root->sectorsize, 0, alloc_hint,
 155                                            (u64)-1, &ins, 1);
 156                 if (ret) {
 157                         WARN_ON(1);
 158                         goto out;
 159                 }
 160                 em = alloc_extent_map(GFP_NOFS);
 161                 em->start = start;
 162                 em->len = ins.offset;
 163                 em->block_start = ins.objectid;
 164                 em->bdev = root->fs_info->fs_devices->latest_bdev;
 165                 mutex_lock(&BTRFS_I(inode)->extent_mutex);
 166                 set_bit(EXTENT_FLAG_PINNED, &em->flags);
 167                 while(1) {
 168                         spin_lock(&em_tree->lock);
 169                         ret = add_extent_mapping(em_tree, em);
 170                         spin_unlock(&em_tree->lock);
 171                         if (ret != -EEXIST) {
 172                                 free_extent_map(em);
 173                                 break;
 174                         }
 175                         btrfs_drop_extent_cache(inode, start,
 176                                                 start + ins.offset - 1, 0);
 177                 }
 178                 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 179
 180                 cur_alloc_size = ins.offset;
 181                 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
 182                                                ins.offset, 0);
 183                 BUG_ON(ret);
 184                 if (num_bytes < cur_alloc_size) {
 185                         printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
 186                                cur_alloc_size);
 187                         break;
 188                 }
 189                 num_bytes -= cur_alloc_size;
 190                 alloc_hint = ins.objectid + ins.offset;
 191                 start += cur_alloc_size;
 192         }
 193 out:
 194         btrfs_end_transaction(trans, root);
 195         return ret;
 196 }
 197
 198 /*
 199  * when nowcow writeback call back.  This checks for snapshots or COW copies
 200  * of the extents that exist in the file, and COWs the file as required.
 201  *
 202  * If no cow copies or snapshots exist, we write directly to the existing
 203  * blocks on disk
 204  */
 205 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
 206 {
 207         u64 extent_start;
 208         u64 extent_end;
 209         u64 bytenr;
 210         u64 loops = 0;
 211         u64 total_fs_bytes;
 212         struct btrfs_root *root = BTRFS_I(inode)->root;
 213         struct btrfs_block_group_cache *block_group;
 214         struct btrfs_trans_handle *trans;
 215         struct extent_buffer *leaf;
 216         int found_type;
 217         struct btrfs_path *path;
 218         struct btrfs_file_extent_item *item;
 219         int ret;
 220         int err = 0;
 221         struct btrfs_key found_key;
 222
 223         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 224         path = btrfs_alloc_path();
 225         BUG_ON(!path);
 226         trans = btrfs_join_transaction(root, 1);
 227         BUG_ON(!trans);
 228 again:
 229         ret = btrfs_lookup_file_extent(NULL, root, path,
 230                                        inode->i_ino, start, 0);
 231         if (ret < 0) {
 232                 err = ret;
 233                 goto out;
 234         }
 235
 236         if (ret != 0) {
 237                 if (path->slots[0] == 0)
 238                         goto not_found;
 239                 path->slots[0]--;
 240         }
 241
 242         leaf = path->nodes[0];
 243         item = btrfs_item_ptr(leaf, path->slots[0],
 244                               struct btrfs_file_extent_item);
 245
 246         /* are we inside the extent that was found? */
 247         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 248         found_type = btrfs_key_type(&found_key);
 249         if (found_key.objectid != inode->i_ino ||
 250             found_type != BTRFS_EXTENT_DATA_KEY)
 251                 goto not_found;
 252
 253         found_type = btrfs_file_extent_type(leaf, item);
 254         extent_start = found_key.offset;
 255         if (found_type == BTRFS_FILE_EXTENT_REG) {
 256                 u64 extent_num_bytes;
 257
 258                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
 259                 extent_end = extent_start + extent_num_bytes;
 260                 err = 0;
 261
 262                 if (loops && start != extent_start)
 263                         goto not_found;
 264
 265                 if (start < extent_start || start >= extent_end)
 266                         goto not_found;
 267
 268                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
 269                 if (bytenr == 0)
 270                         goto not_found;
 271
 272                 if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
 273                         goto not_found;
 274                 /*
 275                  * we may be called by the resizer, make sure we're inside
 276                  * the limits of the FS
 277                  */
 278                 block_group = btrfs_lookup_block_group(root->fs_info,
 279                                                        bytenr);
 280                 if (!block_group || block_group->ro)
 281                         goto not_found;
 282
 283                 bytenr += btrfs_file_extent_offset(leaf, item);
 284                 extent_num_bytes = min(end + 1, extent_end) - start;
 285                 ret = btrfs_add_ordered_extent(inode, start, bytenr,
 286                                                 extent_num_bytes, 1);
 287                 if (ret) {
 288                         err = ret;
 289                         goto out;
 290                 }
 291
 292                 btrfs_release_path(root, path);
 293                 start = extent_end;
 294                 if (start <= end) {
 295                         loops++;
 296                         goto again;
 297                 }
 298         } else {
 299 not_found:
 300                 btrfs_end_transaction(trans, root);
 301                 btrfs_free_path(path);
 302                 return cow_file_range(inode, start, end);
 303         }
 304 out:
 305         WARN_ON(err);
 306         btrfs_end_transaction(trans, root);
 307         btrfs_free_path(path);
 308         return err;
 309 }
 310
 311 /*
 312  * extent_io.c call back to do delayed allocation processing
 313  */
 314 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
 315 {
 316         struct btrfs_root *root = BTRFS_I(inode)->root;
 317         int ret;
 318
 319         if (btrfs_test_opt(root, NODATACOW) ||
 320             btrfs_test_flag(inode, NODATACOW))
 321                 ret = run_delalloc_nocow(inode, start, end);
 322         else
 323                 ret = cow_file_range(inode, start, end);
 324
 325         return ret;
 326 }
 327
 328 /*
 329  * extent_io.c set_bit_hook, used to track delayed allocation
 330  * bytes in this file, and to maintain the list of inodes that
 331  * have pending delalloc work to be done.
 332  */
 333 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
 334                        unsigned long old, unsigned long bits)
 335 {
 336         unsigned long flags;
 337         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
 338                 struct btrfs_root *root = BTRFS_I(inode)->root;
 339                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
 340                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
 341                 root->fs_info->delalloc_bytes += end - start + 1;
 342                 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
 343                         list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
 344                                       &root->fs_info->delalloc_inodes);
 345                 }
 346                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
 347         }
 348         return 0;
 349 }
 350
 351 /*
 352  * extent_io.c clear_bit_hook, see set_bit_hook for why
 353  */
 354 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
 355                          unsigned long old, unsigned long bits)
 356 {
 357         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
 358                 struct btrfs_root *root = BTRFS_I(inode)->root;
 359                 unsigned long flags;
 360
 361                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
 362                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
 363                         printk("warning: delalloc account %Lu %Lu\n",
 364                                end - start + 1, root->fs_info->delalloc_bytes);
 365                         root->fs_info->delalloc_bytes = 0;
 366                         BTRFS_I(inode)->delalloc_bytes = 0;
 367                 } else {
 368                         root->fs_info->delalloc_bytes -= end - start + 1;
 369                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
 370                 }
 371                 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
 372                     !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
 373                         list_del_init(&BTRFS_I(inode)->delalloc_inodes);
 374                 }
 375                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
 376         }
 377         return 0;
 378 }
 379
 380 /*
 381  * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
 382  * we don't create bios that span stripes or chunks
 383  */
 384 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 385                          size_t size, struct bio *bio)
 386 {
 387         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
 388         struct btrfs_mapping_tree *map_tree;
 389         u64 logical = bio->bi_sector << 9;
 390         u64 length = 0;
 391         u64 map_length;
 392         int ret;
 393
 394         length = bio->bi_size;
 395         map_tree = &root->fs_info->mapping_tree;
 396         map_length = length;
 397         ret = btrfs_map_block(map_tree, READ, logical,
 398                               &map_length, NULL, 0);
 399
 400         if (map_length < length + size) {
 401                 return 1;
 402         }
 403         return 0;
 404 }
 405
 406 /*
 407  * in order to insert checksums into the metadata in large chunks,
 408  * we wait until bio submission time.   All the pages in the bio are
 409  * checksummed and sums are attached onto the ordered extent record.
 410  *
 411  * At IO completion time the cums attached on the ordered extent record
 412  * are inserted into the btree
 413  */
 414 int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 415                           int mirror_num)
 416 {
 417         struct btrfs_root *root = BTRFS_I(inode)->root;
 418         int ret = 0;
 419
 420         ret = btrfs_csum_one_bio(root, inode, bio);
 421         BUG_ON(ret);
 422
 423         return btrfs_map_bio(root, rw, bio, mirror_num, 1);
 424 }
 425
 426 /*
 427  * extent_io.c submission hook. This does the right thing for csum calculation on write,
 428  * or reading the csums from the tree before a read
 429  */
 430 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 431                           int mirror_num)
 432 {
 433         struct btrfs_root *root = BTRFS_I(inode)->root;
 434         int ret = 0;
 435
 436         ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 437         BUG_ON(ret);
 438
 439         if (btrfs_test_opt(root, NODATASUM) ||
 440             btrfs_test_flag(inode, NODATASUM)) {
 441                 goto mapit;
 442         }
 443
 444         if (!(rw & (1 << BIO_RW))) {
 445                 btrfs_lookup_bio_sums(root, inode, bio);
 446                 goto mapit;
 447         }
 448         return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
 449                                    inode, rw, bio, mirror_num,
 450                                    __btrfs_submit_bio_hook);
 451 mapit:
 452         return btrfs_map_bio(root, rw, bio, mirror_num, 0);
 453 }
 454
 455 /*
 456  * given a list of ordered sums record them in the inode.  This happens
 457  * at IO completion time based on sums calculated at bio submission time.
 458  */
 459 static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 460                              struct inode *inode, u64 file_offset,
 461                              struct list_head *list)
 462 {
 463         struct list_head *cur;
 464         struct btrfs_ordered_sum *sum;
 465
 466         btrfs_set_trans_block_group(trans, inode);
 467         list_for_each(cur, list) {
 468                 sum = list_entry(cur, struct btrfs_ordered_sum, list);
 469                 btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
 470                                        inode, sum);
 471         }
 472         return 0;
 473 }
 474
 475 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end)
 476 {
 477         return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
 478                                    GFP_NOFS);
 479 }
 480
 481 /* see btrfs_writepage_start_hook for details on why this is required */
 482 struct btrfs_writepage_fixup {
 483         struct page *page;
 484         struct btrfs_work work;
 485 };
 486
 487 void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 488 {
 489         struct btrfs_writepage_fixup *fixup;
 490         struct btrfs_ordered_extent *ordered;
 491         struct page *page;
 492         struct inode *inode;
 493         u64 page_start;
 494         u64 page_end;
 495
 496         fixup = container_of(work, struct btrfs_writepage_fixup, work);
 497         page = fixup->page;
 498 again:
 499         lock_page(page);
 500         if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
 501                 ClearPageChecked(page);
 502                 goto out_page;
 503         }
 504
 505         inode = page->mapping->host;
 506         page_start = page_offset(page);
 507         page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
 508
 509         lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
 510
 511         /* already ordered? We're done */
 512         if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
 513                              EXTENT_ORDERED, 0)) {
 514                 goto out;
 515         }
 516
 517         ordered = btrfs_lookup_ordered_extent(inode, page_start);
 518         if (ordered) {
 519                 unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
 520                               page_end, GFP_NOFS);
 521                 unlock_page(page);
 522                 btrfs_start_ordered_extent(inode, ordered, 1);
 523                 goto again;
 524         }
 525
 526         btrfs_set_extent_delalloc(inode, page_start, page_end);
 527         ClearPageChecked(page);
 528 out:
 529         unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
 530 out_page:
 531         unlock_page(page);
 532         page_cache_release(page);
 533 }
 534
 535 /*
 536  * There are a few paths in the higher layers of the kernel that directly
 537  * set the page dirty bit without asking the filesystem if it is a
 538  * good idea.  This causes problems because we want to make sure COW
 539  * properly happens and the data=ordered rules are followed.
 540  *
 541  * In our case any range that doesn't have the EXTENT_ORDERED bit set
 542  * hasn't been properly setup for IO.  We kick off an async process
 543  * to fix it up.  The async helper will wait for ordered extents, set
 544  * the delalloc bit and make it safe to write the page.
 545  */
 546 int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
 547 {
 548         struct inode *inode = page->mapping->host;
 549         struct btrfs_writepage_fixup *fixup;
 550         struct btrfs_root *root = BTRFS_I(inode)->root;
 551         int ret;
 552
 553         ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
 554                              EXTENT_ORDERED, 0);
 555         if (ret)
 556                 return 0;
 557
 558         if (PageChecked(page))
 559                 return -EAGAIN;
 560
 561         fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
 562         if (!fixup)
 563                 return -EAGAIN;
 564
 565         SetPageChecked(page);
 566         page_cache_get(page);
 567         fixup->work.func = btrfs_writepage_fixup_worker;
 568         fixup->page = page;
 569         btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
 570         return -EAGAIN;
 571 }
 572
 573 /* as ordered data IO finishes, this gets called so we can finish
 574  * an ordered extent if the range of bytes in the file it covers are
 575  * fully written.
 576  */
 577 static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 578 {
 579         struct btrfs_root *root = BTRFS_I(inode)->root;
 580         struct btrfs_trans_handle *trans;
 581         struct btrfs_ordered_extent *ordered_extent;
 582         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 583         struct btrfs_file_extent_item *extent_item;
 584         struct btrfs_path *path = NULL;
 585         struct extent_buffer *leaf;
 586         u64 alloc_hint = 0;
 587         struct list_head list;
 588         struct btrfs_key ins;
 589         int ret;
 590
 591         ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
 592         if (!ret)
 593                 return 0;
 594
 595         trans = btrfs_join_transaction(root, 1);
 596
 597         ordered_extent = btrfs_lookup_ordered_extent(inode, start);
 598         BUG_ON(!ordered_extent);
 599         if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
 600                 goto nocow;
 601
 602         path = btrfs_alloc_path();
 603         BUG_ON(!path);
 604
 605         lock_extent(io_tree, ordered_extent->file_offset,
 606                     ordered_extent->file_offset + ordered_extent->len - 1,
 607                     GFP_NOFS);
 608
 609         INIT_LIST_HEAD(&list);
 610
 611         mutex_lock(&BTRFS_I(inode)->extent_mutex);
 612
 613         ret = btrfs_drop_extents(trans, root, inode,
 614                                  ordered_extent->file_offset,
 615                                  ordered_extent->file_offset +
 616                                  ordered_extent->len,
 617                                  ordered_extent->file_offset, &alloc_hint);
 618         BUG_ON(ret);
 619
 620         ins.objectid = inode->i_ino;
 621         ins.offset = ordered_extent->file_offset;
 622         ins.type = BTRFS_EXTENT_DATA_KEY;
 623         ret = btrfs_insert_empty_item(trans, root, path, &ins,
 624                                       sizeof(*extent_item));
 625         BUG_ON(ret);
 626         leaf = path->nodes[0];
 627         extent_item = btrfs_item_ptr(leaf, path->slots[0],
 628                                      struct btrfs_file_extent_item);
 629         btrfs_set_file_extent_generation(leaf, extent_item, trans->transid);
 630         btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG);
 631         btrfs_set_file_extent_disk_bytenr(leaf, extent_item,
 632                                           ordered_extent->start);
 633         btrfs_set_file_extent_disk_num_bytes(leaf, extent_item,
 634                                              ordered_extent->len);
 635         btrfs_set_file_extent_offset(leaf, extent_item, 0);
 636         btrfs_set_file_extent_num_bytes(leaf, extent_item,
 637                                         ordered_extent->len);
 638         btrfs_mark_buffer_dirty(leaf);
 639
 640         btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
 641                                 ordered_extent->file_offset +
 642                                 ordered_extent->len - 1, 0);
 643         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 644
 645         ins.objectid = ordered_extent->start;
 646         ins.offset = ordered_extent->len;
 647         ins.type = BTRFS_EXTENT_ITEM_KEY;
 648         ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
 649                                           root->root_key.objectid,
 650                                           trans->transid, inode->i_ino,
 651                                           ordered_extent->file_offset, &ins);
 652         BUG_ON(ret);
 653         btrfs_release_path(root, path);
 654
 655         inode->i_blocks += ordered_extent->len >> 9;
 656         unlock_extent(io_tree, ordered_extent->file_offset,
 657                     ordered_extent->file_offset + ordered_extent->len - 1,
 658                     GFP_NOFS);
 659 nocow:
 660         add_pending_csums(trans, inode, ordered_extent->file_offset,
 661                           &ordered_extent->list);
 662
 663         mutex_lock(&BTRFS_I(inode)->extent_mutex);
 664         btrfs_ordered_update_i_size(inode, ordered_extent);
 665         btrfs_update_inode(trans, root, inode);
 666         btrfs_remove_ordered_extent(inode, ordered_extent);
 667         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 668
 669         /* once for us */
 670         btrfs_put_ordered_extent(ordered_extent);
 671         /* once for the tree */
 672         btrfs_put_ordered_extent(ordered_extent);
 673
 674         btrfs_end_transaction(trans, root);
 675         if (path)
 676                 btrfs_free_path(path);
 677         return 0;
 678 }
 679
 680 int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 681                                 struct extent_state *state, int uptodate)
 682 {
 683         return btrfs_finish_ordered_io(page->mapping->host, start, end);
 684 }
 685
 686 /*
 687  * When IO fails, either with EIO or csum verification fails, we
 688  * try other mirrors that might have a good copy of the data.  This
 689  * io_failure_record is used to record state as we go through all the
 690  * mirrors.  If another mirror has good data, the page is set up to date
 691  * and things continue.  If a good mirror can't be found, the original
 692  * bio end_io callback is called to indicate things have failed.
 693  */
 694 struct io_failure_record {
 695         struct page *page;
 696         u64 start;
 697         u64 len;
 698         u64 logical;
 699         int last_mirror;
 700 };
 701
 702 int btrfs_io_failed_hook(struct bio *failed_bio,
 703                          struct page *page, u64 start, u64 end,
 704                          struct extent_state *state)
 705 {
 706         struct io_failure_record *failrec = NULL;
 707         u64 private;
 708         struct extent_map *em;
 709         struct inode *inode = page->mapping->host;
 710         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
 711         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 712         struct bio *bio;
 713         int num_copies;
 714         int ret;
 715         int rw;
 716         u64 logical;
 717
 718         ret = get_state_private(failure_tree, start, &private);
 719         if (ret) {
 720                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
 721                 if (!failrec)
 722                         return -ENOMEM;
 723                 failrec->start = start;
 724                 failrec->len = end - start + 1;
 725                 failrec->last_mirror = 0;
 726
 727                 spin_lock(&em_tree->lock);
 728                 em = lookup_extent_mapping(em_tree, start, failrec->len);
 729                 if (em->start > start || em->start + em->len < start) {
 730                         free_extent_map(em);
 731                         em = NULL;
 732                 }
 733                 spin_unlock(&em_tree->lock);
 734
 735                 if (!em || IS_ERR(em)) {
 736                         kfree(failrec);
 737                         return -EIO;
 738                 }
 739                 logical = start - em->start;
 740                 logical = em->block_start + logical;
 741                 failrec->logical = logical;
 742                 free_extent_map(em);
 743                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
 744                                 EXTENT_DIRTY, GFP_NOFS);
 745                 set_state_private(failure_tree, start,
 746                                  (u64)(unsigned long)failrec);
 747         } else {
 748                 failrec = (struct io_failure_record *)(unsigned long)private;
 749         }
 750         num_copies = btrfs_num_copies(
 751                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
 752                               failrec->logical, failrec->len);
 753         failrec->last_mirror++;
 754         if (!state) {
 755                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
 756                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
 757                                                     failrec->start,
 758                                                     EXTENT_LOCKED);
 759                 if (state && state->start != failrec->start)
 760                         state = NULL;
 761                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
 762         }
 763         if (!state || failrec->last_mirror > num_copies) {
 764                 set_state_private(failure_tree, failrec->start, 0);
 765                 clear_extent_bits(failure_tree, failrec->start,
 766                                   failrec->start + failrec->len - 1,
 767                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
 768                 kfree(failrec);
 769                 return -EIO;
 770         }
 771         bio = bio_alloc(GFP_NOFS, 1);
 772         bio->bi_private = state;
 773         bio->bi_end_io = failed_bio->bi_end_io;
 774         bio->bi_sector = failrec->logical >> 9;
 775         bio->bi_bdev = failed_bio->bi_bdev;
 776         bio->bi_size = 0;
 777         bio_add_page(bio, page, failrec->len, start - page_offset(page));
 778         if (failed_bio->bi_rw & (1 << BIO_RW))
 779                 rw = WRITE;
 780         else
 781                 rw = READ;
 782
 783         BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
 784                                                       failrec->last_mirror);
 785         return 0;
 786 }
 787
 788 /*
 789  * each time an IO finishes, we do a fast check in the IO failure tree
 790  * to see if we need to process or clean up an io_failure_record
 791  */
 792 int btrfs_clean_io_failures(struct inode *inode, u64 start)
 793 {
 794         u64 private;
 795         u64 private_failure;
 796         struct io_failure_record *failure;
 797         int ret;
 798
 799         private = 0;
 800         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
 801                              (u64)-1, 1, EXTENT_DIRTY)) {
 802                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
 803                                         start, &private_failure);
 804                 if (ret == 0) {
 805                         failure = (struct io_failure_record *)(unsigned long)
 806                                    private_failure;
 807                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
 808                                           failure->start, 0);
 809                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
 810                                           failure->start,
 811                                           failure->start + failure->len - 1,
 812                                           EXTENT_DIRTY | EXTENT_LOCKED,
 813                                           GFP_NOFS);
 814                         kfree(failure);
 815                 }
 816         }
 817         return 0;
 818 }
 819
 820 /*
 821  * when reads are done, we need to check csums to verify the data is correct
 822  * if there's a match, we allow the bio to finish.  If not, we go through
 823  * the io_failure_record routines to find good copies
 824  */
 825 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 826                                struct extent_state *state)
 827 {
 828         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
 829         struct inode *inode = page->mapping->host;
 830         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 831         char *kaddr;
 832         u64 private = ~(u32)0;
 833         int ret;
 834         struct btrfs_root *root = BTRFS_I(inode)->root;
 835         u32 csum = ~(u32)0;
 836         unsigned long flags;
 837
 838         if (btrfs_test_opt(root, NODATASUM) ||
 839             btrfs_test_flag(inode, NODATASUM))
 840                 return 0;
 841         if (state && state->start == start) {
 842                 private = state->private;
 843                 ret = 0;
 844         } else {
 845                 ret = get_state_private(io_tree, start, &private);
 846         }
 847         local_irq_save(flags);
 848         kaddr = kmap_atomic(page, KM_IRQ0);
 849         if (ret) {
 850                 goto zeroit;
 851         }
 852         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
 853         btrfs_csum_final(csum, (char *)&csum);
 854         if (csum != private) {
 855                 goto zeroit;
 856         }
 857         kunmap_atomic(kaddr, KM_IRQ0);
 858         local_irq_restore(flags);
 859
 860         /* if the io failure tree for this inode is non-empty,
 861          * check to see if we've recovered from a failed IO
 862          */
 863         btrfs_clean_io_failures(inode, start);
 864         return 0;
 865
 866 zeroit:
 867         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
 868                page->mapping->host->i_ino, (unsigned long long)start, csum,
 869                private);
 870         memset(kaddr + offset, 1, end - start + 1);
 871         flush_dcache_page(page);
 872         kunmap_atomic(kaddr, KM_IRQ0);
 873         local_irq_restore(flags);
 874         if (private == 0)
 875                 return 0;
 876         return -EIO;
 877 }
 878
 879 /*
 880  * This creates an orphan entry for the given inode in case something goes
 881  * wrong in the middle of an unlink/truncate.
 882  */
 883 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 884 {
 885         struct btrfs_root *root = BTRFS_I(inode)->root;
 886         int ret = 0;
 887
 888         spin_lock(&root->list_lock);
 889
 890         /* already on the orphan list, we're good */
 891         if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
 892                 spin_unlock(&root->list_lock);
 893                 return 0;
 894         }
 895
 896         list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
 897
 898         spin_unlock(&root->list_lock);
 899
 900         /*
 901          * insert an orphan item to track this unlinked/truncated file
 902          */
 903         ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
 904
 905         return ret;
 906 }
 907
 908 /*
 909  * We have done the truncate/delete so we can go ahead and remove the orphan
 910  * item for this particular inode.
 911  */
 912 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
 913 {
 914         struct btrfs_root *root = BTRFS_I(inode)->root;
 915         int ret = 0;
 916
 917         spin_lock(&root->list_lock);
 918
 919         if (list_empty(&BTRFS_I(inode)->i_orphan)) {
 920                 spin_unlock(&root->list_lock);
 921                 return 0;
 922         }
 923
 924         list_del_init(&BTRFS_I(inode)->i_orphan);
 925         if (!trans) {
 926                 spin_unlock(&root->list_lock);
 927                 return 0;
 928         }
 929
 930         spin_unlock(&root->list_lock);
 931
 932         ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
 933
 934         return ret;
 935 }
 936
 937 /*
 938  * this cleans up any orphans that may be left on the list from the last use
 939  * of this root.
 940  */
 941 void btrfs_orphan_cleanup(struct btrfs_root *root)
 942 {
 943         struct btrfs_path *path;
 944         struct extent_buffer *leaf;
 945         struct btrfs_item *item;
 946         struct btrfs_key key, found_key;
 947         struct btrfs_trans_handle *trans;
 948         struct inode *inode;
 949         int ret = 0, nr_unlink = 0, nr_truncate = 0;
 950
 951         /* don't do orphan cleanup if the fs is readonly. */
 952         if (root->fs_info->sb->s_flags & MS_RDONLY)
 953                 return;
 954
 955         path = btrfs_alloc_path();
 956         if (!path)
 957                 return;
 958         path->reada = -1;
 959
 960         key.objectid = BTRFS_ORPHAN_OBJECTID;
 961         btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
 962         key.offset = (u64)-1;
 963
 964
 965         while (1) {
 966                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 967                 if (ret < 0) {
 968                         printk(KERN_ERR "Error searching slot for orphan: %d"
 969                                "\n", ret);
 970                         break;
 971                 }
 972
 973                 /*
 974                  * if ret == 0 means we found what we were searching for, which
 975                  * is weird, but possible, so only screw with path if we didnt
 976                  * find the key and see if we have stuff that matches
 977                  */
 978                 if (ret > 0) {
 979                         if (path->slots[0] == 0)
 980                                 break;
 981                         path->slots[0]--;
 982                 }
 983
 984                 /* pull out the item */
 985                 leaf = path->nodes[0];
 986                 item = btrfs_item_nr(leaf, path->slots[0]);
 987                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 988
 989                 /* make sure the item matches what we want */
 990                 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
 991                         break;
 992                 if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY)
 993                         break;
 994
 995                 /* release the path since we're done with it */
 996                 btrfs_release_path(root, path);
 997
 998                 /*
 999                  * this is where we are basically btrfs_lookup, without the
1000                  * crossing root thing.  we store the inode number in the
1001                  * offset of the orphan item.
1002                  */
1003                 inode = btrfs_iget_locked(root->fs_info->sb,
1004                                           found_key.offset, root);
1005                 if (!inode)
1006                         break;
1007
1008                 if (inode->i_state & I_NEW) {
1009                         BTRFS_I(inode)->root = root;
1010
1011                         /* have to set the location manually */
1012                         BTRFS_I(inode)->location.objectid = inode->i_ino;
1013                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
1014                         BTRFS_I(inode)->location.offset = 0;
1015
1016                         btrfs_read_locked_inode(inode);
1017                         unlock_new_inode(inode);
1018                 }
1019
1020                 /*
1021                  * add this inode to the orphan list so btrfs_orphan_del does
1022                  * the proper thing when we hit it
1023                  */
1024                 spin_lock(&root->list_lock);
1025                 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
1026                 spin_unlock(&root->list_lock);
1027
1028                 /*
1029                  * if this is a bad inode, means we actually succeeded in
1030                  * removing the inode, but not the orphan record, which means
1031                  * we need to manually delete the orphan since iput will just
1032                  * do a destroy_inode
1033                  */
1034                 if (is_bad_inode(inode)) {
1035                         trans = btrfs_start_transaction(root, 1);
1036                         btrfs_orphan_del(trans, inode);
1037                         btrfs_end_transaction(trans, root);
1038                         iput(inode);
1039                         continue;
1040                 }
1041
1042                 /* if we have links, this was a truncate, lets do that */
1043                 if (inode->i_nlink) {
1044                         nr_truncate++;
1045                         btrfs_truncate(inode);
1046                 } else {
1047                         nr_unlink++;
1048                 }
1049
1050                 /* this will do delete_inode and everything for us */
1051                 iput(inode);
1052         }
1053
1054         if (nr_unlink)
1055                 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
1056         if (nr_truncate)
1057                 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
1058
1059         btrfs_free_path(path);
1060 }
1061
1062 /*
1063  * read an inode from the btree into the in-memory inode
1064  */
1065 void btrfs_read_locked_inode(struct inode *inode)
1066 {
1067         struct btrfs_path *path;
1068         struct extent_buffer *leaf;
1069         struct btrfs_inode_item *inode_item;
1070         struct btrfs_timespec *tspec;
1071         struct btrfs_root *root = BTRFS_I(inode)->root;
1072         struct btrfs_key location;
1073         u64 alloc_group_block;
1074         u32 rdev;
1075         int ret;
1076
1077         path = btrfs_alloc_path();
1078         BUG_ON(!path);
1079         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
1080
1081         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
1082         if (ret)
1083                 goto make_bad;
1084
1085         leaf = path->nodes[0];
1086         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1087                                     struct btrfs_inode_item);
1088
1089         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
1090         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
1091         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
1092         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
1093         btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1094
1095         tspec = btrfs_inode_atime(inode_item);
1096         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1097         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
1098
1099         tspec = btrfs_inode_mtime(inode_item);
1100         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1101         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
1102
1103         tspec = btrfs_inode_ctime(inode_item);
1104         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1105         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
1106
1107         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
1108         BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
1109         inode->i_generation = BTRFS_I(inode)->generation;
1110         inode->i_rdev = 0;
1111         rdev = btrfs_inode_rdev(leaf, inode_item);
1112
1113         BTRFS_I(inode)->index_cnt = (u64)-1;
1114
1115         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
1116         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
1117                                                        alloc_group_block);
1118         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
1119         if (!BTRFS_I(inode)->block_group) {
1120                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
1121                                                  NULL, 0,
1122                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
1123         }
1124         btrfs_free_path(path);
1125         inode_item = NULL;
1126
1127         switch (inode->i_mode & S_IFMT) {
1128         case S_IFREG:
1129                 inode->i_mapping->a_ops = &btrfs_aops;
1130                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1131                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1132                 inode->i_fop = &btrfs_file_operations;
1133                 inode->i_op = &btrfs_file_inode_operations;
1134                 break;
1135         case S_IFDIR:
1136                 inode->i_fop = &btrfs_dir_file_operations;
1137                 if (root == root->fs_info->tree_root)
1138                         inode->i_op = &btrfs_dir_ro_inode_operations;
1139                 else
1140                         inode->i_op = &btrfs_dir_inode_operations;
1141                 break;
1142         case S_IFLNK:
1143                 inode->i_op = &btrfs_symlink_inode_operations;
1144                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
1145                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1146                 break;
1147         default:
1148                 init_special_inode(inode, inode->i_mode, rdev);
1149                 break;
1150         }
1151         return;
1152
1153 make_bad:
1154         btrfs_free_path(path);
1155         make_bad_inode(inode);
1156 }
1157
1158 /*
1159  * given a leaf and an inode, copy the inode fields into the leaf
1160  */
1161 static void fill_inode_item(struct btrfs_trans_handle *trans,
1162                             struct extent_buffer *leaf,
1163                             struct btrfs_inode_item *item,
1164                             struct inode *inode)
1165 {
1166         btrfs_set_inode_uid(leaf, item, inode->i_uid);
1167         btrfs_set_inode_gid(leaf, item, inode->i_gid);
1168         btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
1169         btrfs_set_inode_mode(leaf, item, inode->i_mode);
1170         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
1171
1172         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
1173                                inode->i_atime.tv_sec);
1174         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
1175                                 inode->i_atime.tv_nsec);
1176
1177         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
1178                                inode->i_mtime.tv_sec);
1179         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
1180                                 inode->i_mtime.tv_nsec);
1181
1182         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
1183                                inode->i_ctime.tv_sec);
1184         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
1185                                 inode->i_ctime.tv_nsec);
1186
1187         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
1188         btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
1189         btrfs_set_inode_transid(leaf, item, trans->transid);
1190         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
1191         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
1192         btrfs_set_inode_block_group(leaf, item,
1193                                     BTRFS_I(inode)->block_group->key.objectid);
1194 }
1195
1196 /*
1197  * copy everything in the in-memory inode into the btree.
1198  */
1199 int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
1200                               struct btrfs_root *root,
1201                               struct inode *inode)
1202 {
1203         struct btrfs_inode_item *inode_item;
1204         struct btrfs_path *path;
1205         struct extent_buffer *leaf;
1206         int ret;
1207
1208         path = btrfs_alloc_path();
1209         BUG_ON(!path);
1210         ret = btrfs_lookup_inode(trans, root, path,
1211                                  &BTRFS_I(inode)->location, 1);
1212         if (ret) {
1213                 if (ret > 0)
1214                         ret = -ENOENT;
1215                 goto failed;
1216         }
1217
1218         leaf = path->nodes[0];
1219         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1220                                   struct btrfs_inode_item);
1221
1222         fill_inode_item(trans, leaf, inode_item, inode);
1223         btrfs_mark_buffer_dirty(leaf);
1224         btrfs_set_inode_last_trans(trans, inode);
1225         ret = 0;
1226 failed:
1227         btrfs_free_path(path);
1228         return ret;
1229 }
1230
1231
1232 /*
1233  * unlink helper that gets used here in inode.c and in the tree logging
1234  * recovery code.  It remove a link in a directory with a given name, and
1235  * also drops the back refs in the inode to the directory
1236  */
1237 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
1238                        struct btrfs_root *root,
1239                        struct inode *dir, struct inode *inode,
1240                        const char *name, int name_len)
1241 {
1242         struct btrfs_path *path;
1243         int ret = 0;
1244         struct extent_buffer *leaf;
1245         struct btrfs_dir_item *di;
1246         struct btrfs_key key;
1247         u64 index;
1248
1249         path = btrfs_alloc_path();
1250         if (!path) {
1251                 ret = -ENOMEM;
1252                 goto err;
1253         }
1254
1255         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
1256                                     name, name_len, -1);
1257         if (IS_ERR(di)) {
1258                 ret = PTR_ERR(di);
1259                 goto err;
1260         }
1261         if (!di) {
1262                 ret = -ENOENT;
1263                 goto err;
1264         }
1265         leaf = path->nodes[0];
1266         btrfs_dir_item_key_to_cpu(leaf, di, &key);
1267         ret = btrfs_delete_one_dir_name(trans, root, path, di);
1268         if (ret)
1269                 goto err;
1270         btrfs_release_path(root, path);
1271
1272         ret = btrfs_del_inode_ref(trans, root, name, name_len,
1273                                   inode->i_ino,
1274                                   dir->i_ino, &index);
1275         if (ret) {
1276                 printk("failed to delete reference to %.*s, "
1277                        "inode %lu parent %lu\n", name_len, name,
1278                        inode->i_ino, dir->i_ino);
1279                 goto err;
1280         }
1281
1282         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
1283                                          index, name, name_len, -1);
1284         if (IS_ERR(di)) {
1285                 ret = PTR_ERR(di);
1286                 goto err;
1287         }
1288         if (!di) {
1289                 ret = -ENOENT;
1290                 goto err;
1291         }
1292         ret = btrfs_delete_one_dir_name(trans, root, path, di);
1293         btrfs_release_path(root, path);
1294
1295         ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
1296                                          inode, dir->i_ino);
1297         BUG_ON(ret != 0 && ret != -ENOENT);
1298         if (ret != -ENOENT)
1299                 BTRFS_I(dir)->log_dirty_trans = trans->transid;
1300
1301         ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
1302                                            dir, index);
1303         BUG_ON(ret);
1304 err:
1305         btrfs_free_path(path);
1306         if (ret)
1307                 goto out;
1308
1309         btrfs_i_size_write(dir, dir->i_size - name_len * 2);
1310         inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1311         btrfs_update_inode(trans, root, dir);
1312         btrfs_drop_nlink(inode);
1313         ret = btrfs_update_inode(trans, root, inode);
1314         dir->i_sb->s_dirt = 1;
1315 out:
1316         return ret;
1317 }
1318
1319 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
1320 {
1321         struct btrfs_root *root;
1322         struct btrfs_trans_handle *trans;
1323         struct inode *inode = dentry->d_inode;
1324         int ret;
1325         unsigned long nr = 0;
1326
1327         root = BTRFS_I(dir)->root;
1328
1329         ret = btrfs_check_free_space(root, 1, 1);
1330         if (ret)
1331                 goto fail;
1332
1333         trans = btrfs_start_transaction(root, 1);
1334
1335         btrfs_set_trans_block_group(trans, dir);
1336         ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
1337                                  dentry->d_name.name, dentry->d_name.len);
1338
1339         if (inode->i_nlink == 0)
1340                 ret = btrfs_orphan_add(trans, inode);
1341
1342         nr = trans->blocks_used;
1343
1344         btrfs_end_transaction_throttle(trans, root);
1345 fail:
1346         btrfs_btree_balance_dirty(root, nr);
1347         return ret;
1348 }
1349
1350 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
1351 {
1352         struct inode *inode = dentry->d_inode;
1353         int err = 0;
1354         int ret;
1355         struct btrfs_root *root = BTRFS_I(dir)->root;
1356         struct btrfs_trans_handle *trans;
1357         unsigned long nr = 0;
1358
1359         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
1360                 return -ENOTEMPTY;
1361         }
1362
1363         ret = btrfs_check_free_space(root, 1, 1);
1364         if (ret)
1365                 goto fail;
1366
1367         trans = btrfs_start_transaction(root, 1);
1368         btrfs_set_trans_block_group(trans, dir);
1369
1370         err = btrfs_orphan_add(trans, inode);
1371         if (err)
1372                 goto fail_trans;
1373
1374         /* now the directory is empty */
1375         err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
1376                                  dentry->d_name.name, dentry->d_name.len);
1377         if (!err) {
1378                 btrfs_i_size_write(inode, 0);
1379         }
1380
1381 fail_trans:
1382         nr = trans->blocks_used;
1383         ret = btrfs_end_transaction_throttle(trans, root);
1384 fail:
1385         btrfs_btree_balance_dirty(root, nr);
1386
1387         if (ret && !err)
1388                 err = ret;
1389         return err;
1390 }
1391
1392 /*
1393  * when truncating bytes in a file, it is possible to avoid reading
1394  * the leaves that contain only checksum items.  This can be the
1395  * majority of the IO required to delete a large file, but it must
1396  * be done carefully.
1397  *
1398  * The keys in the level just above the leaves are checked to make sure
1399  * the lowest key in a given leaf is a csum key, and starts at an offset
1400  * after the new  size.
1401  *
1402  * Then the key for the next leaf is checked to make sure it also has
1403  * a checksum item for the same file.  If it does, we know our target leaf
1404  * contains only checksum items, and it can be safely freed without reading
1405  * it.
1406  *
1407  * This is just an optimization targeted at large files.  It may do
1408  * nothing.  It will return 0 unless things went badly.
1409  */
1410 static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
1411                                      struct btrfs_root *root,
1412                                      struct btrfs_path *path,
1413                                      struct inode *inode, u64 new_size)
1414 {
1415         struct btrfs_key key;
1416         int ret;
1417         int nritems;
1418         struct btrfs_key found_key;
1419         struct btrfs_key other_key;
1420
1421         path->lowest_level = 1;
1422         key.objectid = inode->i_ino;
1423         key.type = BTRFS_CSUM_ITEM_KEY;
1424         key.offset = new_size;
1425 again:
1426         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1427         if (ret < 0)
1428                 goto out;
1429
1430         if (path->nodes[1] == NULL) {
1431                 ret = 0;
1432                 goto out;
1433         }
1434         ret = 0;
1435         btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
1436         nritems = btrfs_header_nritems(path->nodes[1]);
1437
1438         if (!nritems)
1439                 goto out;
1440
1441         if (path->slots[1] >= nritems)
1442                 goto next_node;
1443
1444         /* did we find a key greater than anything we want to delete? */
1445         if (found_key.objectid > inode->i_ino ||
1446            (found_key.objectid == inode->i_ino && found_key.type > key.type))
1447                 goto out;
1448
1449         /* we check the next key in the node to make sure the leave contains
1450          * only checksum items.  This comparison doesn't work if our
1451          * leaf is the last one in the node
1452          */
1453         if (path->slots[1] + 1 >= nritems) {
1454 next_node:
1455                 /* search forward from the last key in the node, this
1456                  * will bring us into the next node in the tree
1457                  */
1458                 btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
1459
1460                 /* unlikely, but we inc below, so check to be safe */
1461                 if (found_key.offset == (u64)-1)
1462                         goto out;
1463
1464                 /* search_forward needs a path with locks held, do the
1465                  * search again for the original key.  It is possible
1466                  * this will race with a balance and return a path that
1467                  * we could modify, but this drop is just an optimization
1468                  * and is allowed to miss some leaves.
1469                  */
1470                 btrfs_release_path(root, path);
1471                 found_key.offset++;
1472
1473                 /* setup a max key for search_forward */
1474                 other_key.offset = (u64)-1;
1475                 other_key.type = key.type;
1476                 other_key.objectid = key.objectid;
1477
1478                 path->keep_locks = 1;
1479                 ret = btrfs_search_forward(root, &found_key, &other_key,
1480                                            path, 0, 0);
1481                 path->keep_locks = 0;
1482                 if (ret || found_key.objectid != key.objectid ||
1483                     found_key.type != key.type) {
1484                         ret = 0;
1485                         goto out;
1486                 }
1487
1488                 key.offset = found_key.offset;
1489                 btrfs_release_path(root, path);
1490                 cond_resched();
1491                 goto again;
1492         }
1493
1494         /* we know there's one more slot after us in the tree,
1495          * read that key so we can verify it is also a checksum item
1496          */
1497         btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
1498
1499         if (found_key.objectid < inode->i_ino)
1500                 goto next_key;
1501
1502         if (found_key.type != key.type || found_key.offset < new_size)
1503                 goto next_key;
1504
1505         /*
1506          * if the key for the next leaf isn't a csum key from this objectid,
1507          * we can't be sure there aren't good items inside this leaf.
1508          * Bail out
1509          */
1510         if (other_key.objectid != inode->i_ino || other_key.type != key.type)
1511                 goto out;
1512
1513         /*
1514          * it is safe to delete this leaf, it contains only
1515          * csum items from this inode at an offset >= new_size
1516          */
1517         ret = btrfs_del_leaf(trans, root, path,
1518                              btrfs_node_blockptr(path->nodes[1],
1519                                                  path->slots[1]));
1520         BUG_ON(ret);
1521
1522 next_key:
1523         btrfs_release_path(root, path);
1524
1525         if (other_key.objectid == inode->i_ino &&
1526             other_key.type == key.type && other_key.offset > key.offset) {
1527                 key.offset = other_key.offset;
1528                 cond_resched();
1529                 goto again;
1530         }
1531         ret = 0;
1532 out:
1533         /* fixup any changes we've made to the path */
1534         path->lowest_level = 0;
1535         path->keep_locks = 0;
1536         btrfs_release_path(root, path);
1537         return ret;
1538 }
1539
1540 /*
1541  * this can truncate away extent items, csum items and directory items.
1542  * It starts at a high offset and removes keys until it can't find
1543  * any higher than new_size
1544  *
1545  * csum items that cross the new i_size are truncated to the new size
1546  * as well.
1547  *
1548  * min_type is the minimum key type to truncate down to.  If set to 0, this
1549  * will kill all the items on this inode, including the INODE_ITEM_KEY.
1550  */
1551 noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
1552                                         struct btrfs_root *root,
1553                                         struct inode *inode,
1554                                         u64 new_size, u32 min_type)
1555 {
1556         int ret;
1557         struct btrfs_path *path;
1558         struct btrfs_key key;
1559         struct btrfs_key found_key;
1560         u32 found_type;
1561         struct extent_buffer *leaf;
1562         struct btrfs_file_extent_item *fi;
1563         u64 extent_start = 0;
1564         u64 extent_num_bytes = 0;
1565         u64 item_end = 0;
1566         u64 root_gen = 0;
1567         u64 root_owner = 0;
1568         int found_extent;
1569         int del_item;
1570         int pending_del_nr = 0;
1571         int pending_del_slot = 0;
1572         int extent_type = -1;
1573         u64 mask = root->sectorsize - 1;
1574
1575         if (root->ref_cows)
1576                 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
1577         path = btrfs_alloc_path();
1578         path->reada = -1;
1579         BUG_ON(!path);
1580
1581         /* FIXME, add redo link to tree so we don't leak on crash */
1582         key.objectid = inode->i_ino;
1583         key.offset = (u64)-1;
1584         key.type = (u8)-1;
1585
1586         btrfs_init_path(path);
1587
1588         ret = drop_csum_leaves(trans, root, path, inode, new_size);
1589         BUG_ON(ret);
1590
1591 search_again:
1592         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1593         if (ret < 0) {
1594                 goto error;
1595         }
1596         if (ret > 0) {
1597                 /* there are no items in the tree for us to truncate, we're
1598                  * done
1599                  */
1600                 if (path->slots[0] == 0) {
1601                         ret = 0;
1602                         goto error;
1603                 }
1604                 path->slots[0]--;
1605         }
1606
1607         while(1) {
1608                 fi = NULL;
1609                 leaf = path->nodes[0];
1610                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1611                 found_type = btrfs_key_type(&found_key);
1612
1613                 if (found_key.objectid != inode->i_ino)
1614                         break;
1615
1616                 if (found_type < min_type)
1617                         break;
1618
1619                 item_end = found_key.offset;
1620                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
1621                         fi = btrfs_item_ptr(leaf, path->slots[0],
1622                                             struct btrfs_file_extent_item);
1623                         extent_type = btrfs_file_extent_type(leaf, fi);
1624                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1625                                 item_end +=
1626                                     btrfs_file_extent_num_bytes(leaf, fi);
1627                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1628                                 struct btrfs_item *item = btrfs_item_nr(leaf,
1629                                                                 path->slots[0]);
1630                                 item_end += btrfs_file_extent_inline_len(leaf,
1631                                                                          item);
1632                         }
1633                         item_end--;
1634                 }
1635                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
1636                         ret = btrfs_csum_truncate(trans, root, path,
1637                                                   new_size);
1638                         BUG_ON(ret);
1639                 }
1640                 if (item_end < new_size) {
1641                         if (found_type == BTRFS_DIR_ITEM_KEY) {
1642                                 found_type = BTRFS_INODE_ITEM_KEY;
1643                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
1644                                 found_type = BTRFS_CSUM_ITEM_KEY;
1645                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
1646                                 found_type = BTRFS_XATTR_ITEM_KEY;
1647                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
1648                                 found_type = BTRFS_INODE_REF_KEY;
1649                         } else if (found_type) {
1650                                 found_type--;
1651                         } else {
1652                                 break;
1653                         }
1654                         btrfs_set_key_type(&key, found_type);
1655                         goto next;
1656                 }
1657                 if (found_key.offset >= new_size)
1658                         del_item = 1;
1659                 else
1660                         del_item = 0;
1661                 found_extent = 0;
1662
1663                 /* FIXME, shrink the extent if the ref count is only 1 */
1664                 if (found_type != BTRFS_EXTENT_DATA_KEY)
1665                         goto delete;
1666
1667                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1668                         u64 num_dec;
1669                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
1670                         if (!del_item) {
1671                                 u64 orig_num_bytes =
1672                                         btrfs_file_extent_num_bytes(leaf, fi);
1673                                 extent_num_bytes = new_size -
1674                                         found_key.offset + root->sectorsize - 1;
1675                                 extent_num_bytes = extent_num_bytes &
1676                                         ~((u64)root->sectorsize - 1);
1677                                 btrfs_set_file_extent_num_bytes(leaf, fi,
1678                                                          extent_num_bytes);
1679                                 num_dec = (orig_num_bytes -
1680                                            extent_num_bytes);
1681                                 if (root->ref_cows && extent_start != 0)
1682                                         dec_i_blocks(inode, num_dec);
1683                                 btrfs_mark_buffer_dirty(leaf);
1684                         } else {
1685                                 extent_num_bytes =
1686                                         btrfs_file_extent_disk_num_bytes(leaf,
1687                                                                          fi);
1688                                 /* FIXME blocksize != 4096 */
1689                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
1690                                 if (extent_start != 0) {
1691                                         found_extent = 1;
1692                                         if (root->ref_cows)
1693                                                 dec_i_blocks(inode, num_dec);
1694                                 }
1695                                 root_gen = btrfs_header_generation(leaf);
1696                                 root_owner = btrfs_header_owner(leaf);
1697                         }
1698                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1699                         if (!del_item) {
1700                                 u32 size = new_size - found_key.offset;
1701
1702                                 if (root->ref_cows) {
1703                                         dec_i_blocks(inode, item_end + 1 -
1704                                                     found_key.offset - size);
1705                                 }
1706                                 size =
1707                                     btrfs_file_extent_calc_inline_size(size);
1708                                 ret = btrfs_truncate_item(trans, root, path,
1709                                                           size, 1);
1710                                 BUG_ON(ret);
1711                         } else if (root->ref_cows) {
1712                                 dec_i_blocks(inode, item_end + 1 -
1713                                              found_key.offset);
1714                         }
1715                 }
1716 delete:
1717                 if (del_item) {
1718                         if (!pending_del_nr) {
1719                                 /* no pending yet, add ourselves */
1720                                 pending_del_slot = path->slots[0];
1721                                 pending_del_nr = 1;
1722                         } else if (pending_del_nr &&
1723                                    path->slots[0] + 1 == pending_del_slot) {
1724                                 /* hop on the pending chunk */
1725                                 pending_del_nr++;
1726                                 pending_del_slot = path->slots[0];
1727                         } else {
1728                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1729                         }
1730                 } else {
1731                         break;
1732                 }
1733                 if (found_extent) {
1734                         ret = btrfs_free_extent(trans, root, extent_start,
1735                                                 extent_num_bytes,
1736                                                 leaf->start, root_owner,
1737                                                 root_gen, inode->i_ino,
1738                                                 found_key.offset, 0);
1739                         BUG_ON(ret);
1740                 }
1741 next:
1742                 if (path->slots[0] == 0) {
1743                         if (pending_del_nr)
1744                                 goto del_pending;
1745                         btrfs_release_path(root, path);
1746                         goto search_again;
1747                 }
1748
1749                 path->slots[0]--;
1750                 if (pending_del_nr &&
1751                     path->slots[0] + 1 != pending_del_slot) {
1752                         struct btrfs_key debug;
1753 del_pending:
1754                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1755                                               pending_del_slot);
1756                         ret = btrfs_del_items(trans, root, path,
1757                                               pending_del_slot,
1758                                               pending_del_nr);
1759                         BUG_ON(ret);
1760                         pending_del_nr = 0;
1761                         btrfs_release_path(root, path);
1762                         goto search_again;
1763                 }
1764         }
1765         ret = 0;
1766 error:
1767         if (pending_del_nr) {
1768                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1769                                       pending_del_nr);
1770         }
1771         btrfs_free_path(path);
1772         inode->i_sb->s_dirt = 1;
1773         return ret;
1774 }
1775
1776 /*
1777  * taken from block_truncate_page, but does cow as it zeros out
1778  * any bytes left in the last page in the file.
1779  */
1780 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1781 {
1782         struct inode *inode = mapping->host;
1783         struct btrfs_root *root = BTRFS_I(inode)->root;
1784         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1785         struct btrfs_ordered_extent *ordered;
1786         char *kaddr;
1787         u32 blocksize = root->sectorsize;
1788         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1789         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1790         struct page *page;
1791         int ret = 0;
1792         u64 page_start;
1793         u64 page_end;
1794
1795         if ((offset & (blocksize - 1)) == 0)
1796                 goto out;
1797
1798         ret = -ENOMEM;
1799 again:
1800         page = grab_cache_page(mapping, index);
1801         if (!page)
1802                 goto out;
1803
1804         page_start = page_offset(page);
1805         page_end = page_start + PAGE_CACHE_SIZE - 1;
1806
1807         if (!PageUptodate(page)) {
1808                 ret = btrfs_readpage(NULL, page);
1809                 lock_page(page);
1810                 if (page->mapping != mapping) {
1811                         unlock_page(page);
1812                         page_cache_release(page);
1813                         goto again;
1814                 }
1815                 if (!PageUptodate(page)) {
1816                         ret = -EIO;
1817                         goto out_unlock;
1818                 }
1819         }
1820         wait_on_page_writeback(page);
1821
1822         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1823         set_page_extent_mapped(page);
1824
1825         ordered = btrfs_lookup_ordered_extent(inode, page_start);
1826         if (ordered) {
1827                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1828                 unlock_page(page);
1829                 page_cache_release(page);
1830                 btrfs_start_ordered_extent(inode, ordered, 1);
1831                 btrfs_put_ordered_extent(ordered);
1832                 goto again;
1833         }
1834
1835         btrfs_set_extent_delalloc(inode, page_start, page_end);
1836         ret = 0;
1837         if (offset != PAGE_CACHE_SIZE) {
1838                 kaddr = kmap(page);
1839                 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1840                 flush_dcache_page(page);
1841                 kunmap(page);
1842         }
1843         ClearPageChecked(page);
1844         set_page_dirty(page);
1845         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1846
1847 out_unlock:
1848         unlock_page(page);
1849         page_cache_release(page);
1850 out:
1851         return ret;
1852 }
1853
1854 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1855 {
1856         struct inode *inode = dentry->d_inode;
1857         int err;
1858
1859         err = inode_change_ok(inode, attr);
1860         if (err)
1861                 return err;
1862
1863         if (S_ISREG(inode->i_mode) &&
1864             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1865                 struct btrfs_trans_handle *trans;
1866                 struct btrfs_root *root = BTRFS_I(inode)->root;
1867                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1868
1869                 u64 mask = root->sectorsize - 1;
1870                 u64 hole_start = (inode->i_size + mask) & ~mask;
1871                 u64 block_end = (attr->ia_size + mask) & ~mask;
1872                 u64 hole_size;
1873                 u64 alloc_hint = 0;
1874
1875                 if (attr->ia_size <= hole_start)
1876                         goto out;
1877
1878                 err = btrfs_check_free_space(root, 1, 0);
1879                 if (err)
1880                         goto fail;
1881
1882                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1883
1884                 hole_size = block_end - hole_start;
1885                 while(1) {
1886                         struct btrfs_ordered_extent *ordered;
1887                         btrfs_wait_ordered_range(inode, hole_start, hole_size);
1888
1889                         lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1890                         ordered = btrfs_lookup_ordered_extent(inode, hole_start);
1891                         if (ordered) {
1892                                 unlock_extent(io_tree, hole_start,
1893                                               block_end - 1, GFP_NOFS);
1894                                 btrfs_put_ordered_extent(ordered);
1895                         } else {
1896                                 break;
1897                         }
1898                 }
1899
1900                 trans = btrfs_start_transaction(root, 1);
1901                 btrfs_set_trans_block_group(trans, inode);
1902                 mutex_lock(&BTRFS_I(inode)->extent_mutex);
1903                 err = btrfs_drop_extents(trans, root, inode,
1904                                          hole_start, block_end, hole_start,
1905                                          &alloc_hint);
1906
1907                 if (alloc_hint != EXTENT_MAP_INLINE) {
1908                         err = btrfs_insert_file_extent(trans, root,
1909                                                        inode->i_ino,
1910                                                        hole_start, 0, 0,
1911                                                        hole_size, 0);
1912                         btrfs_drop_extent_cache(inode, hole_start,
1913                                                 (u64)-1, 0);
1914                         btrfs_check_file(root, inode);
1915                 }
1916                 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
1917                 btrfs_end_transaction(trans, root);
1918                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1919                 if (err)
1920                         return err;
1921         }
1922 out:
1923         err = inode_setattr(inode, attr);
1924
1925         if (!err && ((attr->ia_valid & ATTR_MODE)))
1926                 err = btrfs_acl_chmod(inode);
1927 fail:
1928         return err;
1929 }
1930
1931 void btrfs_delete_inode(struct inode *inode)
1932 {
1933         struct btrfs_trans_handle *trans;
1934         struct btrfs_root *root = BTRFS_I(inode)->root;
1935         unsigned long nr;
1936         int ret;
1937
1938         truncate_inode_pages(&inode->i_data, 0);
1939         if (is_bad_inode(inode)) {
1940                 btrfs_orphan_del(NULL, inode);
1941                 goto no_delete;
1942         }
1943         btrfs_wait_ordered_range(inode, 0, (u64)-1);
1944
1945         btrfs_i_size_write(inode, 0);
1946         trans = btrfs_start_transaction(root, 1);
1947
1948         btrfs_set_trans_block_group(trans, inode);
1949         ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
1950         if (ret) {
1951                 btrfs_orphan_del(NULL, inode);
1952                 goto no_delete_lock;
1953         }
1954
1955         btrfs_orphan_del(trans, inode);
1956
1957         nr = trans->blocks_used;
1958         clear_inode(inode);
1959
1960         btrfs_end_transaction(trans, root);
1961         btrfs_btree_balance_dirty(root, nr);
1962         return;
1963
1964 no_delete_lock:
1965         nr = trans->blocks_used;
1966         btrfs_end_transaction(trans, root);
1967         btrfs_btree_balance_dirty(root, nr);
1968 no_delete:
1969         clear_inode(inode);
1970 }
1971
1972 /*
1973  * this returns the key found in the dir entry in the location pointer.
1974  * If no dir entries were found, location->objectid is 0.
1975  */
1976 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1977                                struct btrfs_key *location)
1978 {
1979         const char *name = dentry->d_name.name;
1980         int namelen = dentry->d_name.len;
1981         struct btrfs_dir_item *di;
1982         struct btrfs_path *path;
1983         struct btrfs_root *root = BTRFS_I(dir)->root;
1984         int ret = 0;
1985
1986         path = btrfs_alloc_path();
1987         BUG_ON(!path);
1988
1989         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1990                                     namelen, 0);
1991         if (IS_ERR(di))
1992                 ret = PTR_ERR(di);
1993         if (!di || IS_ERR(di)) {
1994                 goto out_err;
1995         }
1996         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1997 out:
1998         btrfs_free_path(path);
1999         return ret;
2000 out_err:
2001         location->objectid = 0;
2002         goto out;
2003 }
2004
2005 /*
2006  * when we hit a tree root in a directory, the btrfs part of the inode
2007  * needs to be changed to reflect the root directory of the tree root.  This
2008  * is kind of like crossing a mount point.
2009  */
2010 static int fixup_tree_root_location(struct btrfs_root *root,
2011                              struct btrfs_key *location,
2012                              struct btrfs_root **sub_root,
2013                              struct dentry *dentry)
2014 {
2015         struct btrfs_root_item *ri;
2016
2017         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
2018                 return 0;
2019         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
2020                 return 0;
2021
2022         *sub_root = btrfs_read_fs_root(root->fs_info, location,
2023                                         dentry->d_name.name,
2024                                         dentry->d_name.len);
2025         if (IS_ERR(*sub_root))
2026                 return PTR_ERR(*sub_root);
2027
2028         ri = &(*sub_root)->root_item;
2029         location->objectid = btrfs_root_dirid(ri);
2030         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
2031         location->offset = 0;
2032
2033         return 0;
2034 }
2035
2036 static noinline void init_btrfs_i(struct inode *inode)
2037 {
2038         struct btrfs_inode *bi = BTRFS_I(inode);
2039
2040         bi->i_acl = NULL;
2041         bi->i_default_acl = NULL;
2042
2043         bi->generation = 0;
2044         bi->last_trans = 0;
2045         bi->logged_trans = 0;
2046         bi->delalloc_bytes = 0;
2047         bi->disk_i_size = 0;
2048         bi->flags = 0;
2049         bi->index_cnt = (u64)-1;
2050         bi->log_dirty_trans = 0;
2051         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
2052         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
2053                              inode->i_mapping, GFP_NOFS);
2054         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
2055                              inode->i_mapping, GFP_NOFS);
2056         INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
2057         btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
2058         mutex_init(&BTRFS_I(inode)->csum_mutex);
2059         mutex_init(&BTRFS_I(inode)->extent_mutex);
2060         mutex_init(&BTRFS_I(inode)->log_mutex);
2061 }
2062
2063 static int btrfs_init_locked_inode(struct inode *inode, void *p)
2064 {
2065         struct btrfs_iget_args *args = p;
2066         inode->i_ino = args->ino;
2067         init_btrfs_i(inode);
2068         BTRFS_I(inode)->root = args->root;
2069         return 0;
2070 }
2071
2072 static int btrfs_find_actor(struct inode *inode, void *opaque)
2073 {
2074         struct btrfs_iget_args *args = opaque;
2075         return (args->ino == inode->i_ino &&
2076                 args->root == BTRFS_I(inode)->root);
2077 }
2078
2079 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
2080                             struct btrfs_root *root, int wait)
2081 {
2082         struct inode *inode;
2083         struct btrfs_iget_args args;
2084         args.ino = objectid;
2085         args.root = root;
2086
2087         if (wait) {
2088                 inode = ilookup5(s, objectid, btrfs_find_actor,
2089                                  (void *)&args);
2090         } else {
2091                 inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
2092                                         (void *)&args);
2093         }
2094         return inode;
2095 }
2096
2097 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
2098                                 struct btrfs_root *root)
2099 {
2100         struct inode *inode;
2101         struct btrfs_iget_args args;
2102         args.ino = objectid;
2103         args.root = root;
2104
2105         inode = iget5_locked(s, objectid, btrfs_find_actor,
2106                              btrfs_init_locked_inode,
2107                              (void *)&args);
2108         return inode;
2109 }
2110
2111 /* Get an inode object given its location and corresponding root.
2112  * Returns in *is_new if the inode was read from disk
2113  */
2114 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2115                          struct btrfs_root *root, int *is_new)
2116 {
2117         struct inode *inode;
2118
2119         inode = btrfs_iget_locked(s, location->objectid, root);
2120         if (!inode)
2121                 return ERR_PTR(-EACCES);
2122
2123         if (inode->i_state & I_NEW) {
2124                 BTRFS_I(inode)->root = root;
2125                 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
2126                 btrfs_read_locked_inode(inode);
2127                 unlock_new_inode(inode);
2128                 if (is_new)
2129                         *is_new = 1;
2130         } else {
2131                 if (is_new)
2132                         *is_new = 0;
2133         }
2134
2135         return inode;
2136 }
2137
2138 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
2139                                    struct nameidata *nd)
2140 {
2141         struct inode * inode;
2142         struct btrfs_inode *bi = BTRFS_I(dir);
2143         struct btrfs_root *root = bi->root;
2144         struct btrfs_root *sub_root = root;
2145         struct btrfs_key location;
2146         int ret, new, do_orphan = 0;
2147
2148         if (dentry->d_name.len > BTRFS_NAME_LEN)
2149                 return ERR_PTR(-ENAMETOOLONG);
2150
2151         ret = btrfs_inode_by_name(dir, dentry, &location);
2152
2153         if (ret < 0)
2154                 return ERR_PTR(ret);
2155
2156         inode = NULL;
2157         if (location.objectid) {
2158                 ret = fixup_tree_root_location(root, &location, &sub_root,
2159                                                 dentry);
2160                 if (ret < 0)
2161                         return ERR_PTR(ret);
2162                 if (ret > 0)
2163                         return ERR_PTR(-ENOENT);
2164                 inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
2165                 if (IS_ERR(inode))
2166                         return ERR_CAST(inode);
2167
2168                 /* the inode and parent dir are two different roots */
2169                 if (new && root != sub_root) {
2170                         igrab(inode);
2171                         sub_root->inode = inode;
2172                         do_orphan = 1;
2173                 }
2174         }
2175
2176         if (unlikely(do_orphan))
2177                 btrfs_orphan_cleanup(sub_root);
2178
2179         return d_splice_alias(inode, dentry);
2180 }
2181
2182 static unsigned char btrfs_filetype_table[] = {
2183         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
2184 };
2185
2186 static int btrfs_real_readdir(struct file *filp, void *dirent,
2187                               filldir_t filldir)
2188 {
2189         struct inode *inode = filp->f_dentry->d_inode;
2190         struct btrfs_root *root = BTRFS_I(inode)->root;
2191         struct btrfs_item *item;
2192         struct btrfs_dir_item *di;
2193         struct btrfs_key key;
2194         struct btrfs_key found_key;
2195         struct btrfs_path *path;
2196         int ret;
2197         u32 nritems;
2198         struct extent_buffer *leaf;
2199         int slot;
2200         int advance;
2201         unsigned char d_type;
2202         int over = 0;
2203         u32 di_cur;
2204         u32 di_total;
2205         u32 di_len;
2206         int key_type = BTRFS_DIR_INDEX_KEY;
2207         char tmp_name[32];
2208         char *name_ptr;
2209         int name_len;
2210
2211         /* FIXME, use a real flag for deciding about the key type */
2212         if (root->fs_info->tree_root == root)
2213                 key_type = BTRFS_DIR_ITEM_KEY;
2214
2215         /* special case for "." */
2216         if (filp->f_pos == 0) {
2217                 over = filldir(dirent, ".", 1,
2218                                1, inode->i_ino,
2219                                DT_DIR);
2220                 if (over)
2221                         return 0;
2222                 filp->f_pos = 1;
2223         }
2224         /* special case for .., just use the back ref */
2225         if (filp->f_pos == 1) {
2226                 u64 pino = parent_ino(filp->f_path.dentry);
2227                 over = filldir(dirent, "..", 2,
2228                                2, pino, DT_DIR);
2229                 if (over)
2230                         return 0;
2231                 filp->f_pos = 2;
2232         }
2233
2234         path = btrfs_alloc_path();
2235         path->reada = 2;
2236
2237         btrfs_set_key_type(&key, key_type);
2238         key.offset = filp->f_pos;
2239         key.objectid = inode->i_ino;
2240
2241         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2242         if (ret < 0)
2243                 goto err;
2244         advance = 0;
2245
2246         while (1) {
2247                 leaf = path->nodes[0];
2248                 nritems = btrfs_header_nritems(leaf);
2249                 slot = path->slots[0];
2250                 if (advance || slot >= nritems) {
2251                         if (slot >= nritems - 1) {
2252                                 ret = btrfs_next_leaf(root, path);
2253                                 if (ret)
2254                                         break;
2255                                 leaf = path->nodes[0];
2256                                 nritems = btrfs_header_nritems(leaf);
2257                                 slot = path->slots[0];
2258                         } else {
2259                                 slot++;
2260                                 path->slots[0]++;
2261                         }
2262                 }
2263                 advance = 1;
2264                 item = btrfs_item_nr(leaf, slot);
2265                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2266
2267                 if (found_key.objectid != key.objectid)
2268                         break;
2269                 if (btrfs_key_type(&found_key) != key_type)
2270                         break;
2271                 if (found_key.offset < filp->f_pos)
2272                         continue;
2273
2274                 filp->f_pos = found_key.offset;
2275
2276                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
2277                 di_cur = 0;
2278                 di_total = btrfs_item_size(leaf, item);
2279
2280                 while (di_cur < di_total) {
2281                         struct btrfs_key location;
2282
2283                         name_len = btrfs_dir_name_len(leaf, di);
2284                         if (name_len <= sizeof(tmp_name)) {
2285                                 name_ptr = tmp_name;
2286                         } else {
2287                                 name_ptr = kmalloc(name_len, GFP_NOFS);
2288                                 if (!name_ptr) {
2289                                         ret = -ENOMEM;
2290                                         goto err;
2291                                 }
2292                         }
2293                         read_extent_buffer(leaf, name_ptr,
2294                                            (unsigned long)(di + 1), name_len);
2295
2296                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
2297                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
2298                         over = filldir(dirent, name_ptr, name_len,
2299                                        found_key.offset, location.objectid,
2300                                        d_type);
2301
2302                         if (name_ptr != tmp_name)
2303                                 kfree(name_ptr);
2304
2305                         if (over)
2306                                 goto nopos;
2307
2308                         di_len = btrfs_dir_name_len(leaf, di) +
2309                                  btrfs_dir_data_len(leaf, di) + sizeof(*di);
2310                         di_cur += di_len;
2311                         di = (struct btrfs_dir_item *)((char *)di + di_len);
2312                 }
2313         }
2314
2315         /* Reached end of directory/root. Bump pos past the last item. */
2316         if (key_type == BTRFS_DIR_INDEX_KEY)
2317                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
2318         else
2319                 filp->f_pos++;
2320 nopos:
2321         ret = 0;
2322 err:
2323         btrfs_free_path(path);
2324         return ret;
2325 }
2326
2327 int btrfs_write_inode(struct inode *inode, int wait)
2328 {
2329         struct btrfs_root *root = BTRFS_I(inode)->root;
2330         struct btrfs_trans_handle *trans;
2331         int ret = 0;
2332
2333         if (root->fs_info->closing > 1)
2334                 return 0;
2335
2336         if (wait) {
2337                 trans = btrfs_join_transaction(root, 1);
2338                 btrfs_set_trans_block_group(trans, inode);
2339                 ret = btrfs_commit_transaction(trans, root);
2340         }
2341         return ret;
2342 }
2343
2344 /*
2345  * This is somewhat expensive, updating the tree every time the
2346  * inode changes.  But, it is most likely to find the inode in cache.
2347  * FIXME, needs more benchmarking...there are no reasons other than performance
2348  * to keep or drop this code.
2349  */
2350 void btrfs_dirty_inode(struct inode *inode)
2351 {
2352         struct btrfs_root *root = BTRFS_I(inode)->root;
2353         struct btrfs_trans_handle *trans;
2354
2355         trans = btrfs_join_transaction(root, 1);
2356         btrfs_set_trans_block_group(trans, inode);
2357         btrfs_update_inode(trans, root, inode);
2358         btrfs_end_transaction(trans, root);
2359 }
2360
2361 /*
2362  * find the highest existing sequence number in a directory
2363  * and then set the in-memory index_cnt variable to reflect
2364  * free sequence numbers
2365  */
2366 static int btrfs_set_inode_index_count(struct inode *inode)
2367 {
2368         struct btrfs_root *root = BTRFS_I(inode)->root;
2369         struct btrfs_key key, found_key;
2370         struct btrfs_path *path;
2371         struct extent_buffer *leaf;
2372         int ret;
2373
2374         key.objectid = inode->i_ino;
2375         btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
2376         key.offset = (u64)-1;
2377
2378         path = btrfs_alloc_path();
2379         if (!path)
2380                 return -ENOMEM;
2381
2382         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2383         if (ret < 0)
2384                 goto out;
2385         /* FIXME: we should be able to handle this */
2386         if (ret == 0)
2387                 goto out;
2388         ret = 0;
2389
2390         /*
2391          * MAGIC NUMBER EXPLANATION:
2392          * since we search a directory based on f_pos we have to start at 2
2393          * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
2394          * else has to start at 2
2395          */
2396         if (path->slots[0] == 0) {
2397                 BTRFS_I(inode)->index_cnt = 2;
2398                 goto out;
2399         }
2400
2401         path->slots[0]--;
2402
2403         leaf = path->nodes[0];
2404         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2405
2406         if (found_key.objectid != inode->i_ino ||
2407             btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
2408                 BTRFS_I(inode)->index_cnt = 2;
2409                 goto out;
2410         }
2411
2412         BTRFS_I(inode)->index_cnt = found_key.offset + 1;
2413 out:
2414         btrfs_free_path(path);
2415         return ret;
2416 }
2417
2418 /*
2419  * helper to find a free sequence number in a given directory.  This current
2420  * code is very simple, later versions will do smarter things in the btree
2421  */
2422 static int btrfs_set_inode_index(struct inode *dir, struct inode *inode,
2423                                  u64 *index)
2424 {
2425         int ret = 0;
2426
2427         if (BTRFS_I(dir)->index_cnt == (u64)-1) {
2428                 ret = btrfs_set_inode_index_count(dir);
2429                 if (ret) {
2430                         return ret;
2431                 }
2432         }
2433
2434         *index = BTRFS_I(dir)->index_cnt;
2435         BTRFS_I(dir)->index_cnt++;
2436
2437         return ret;
2438 }
2439
2440 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
2441                                      struct btrfs_root *root,
2442                                      struct inode *dir,
2443                                      const char *name, int name_len,
2444                                      u64 ref_objectid,
2445                                      u64 objectid,
2446                                      struct btrfs_block_group_cache *group,
2447                                      int mode, u64 *index)
2448 {
2449         struct inode *inode;
2450         struct btrfs_inode_item *inode_item;
2451         struct btrfs_block_group_cache *new_inode_group;
2452         struct btrfs_key *location;
2453         struct btrfs_path *path;
2454         struct btrfs_inode_ref *ref;
2455         struct btrfs_key key[2];
2456         u32 sizes[2];
2457         unsigned long ptr;
2458         int ret;
2459         int owner;
2460
2461         path = btrfs_alloc_path();
2462         BUG_ON(!path);
2463
2464         inode = new_inode(root->fs_info->sb);
2465         if (!inode)
2466                 return ERR_PTR(-ENOMEM);
2467
2468         if (dir) {
2469                 ret = btrfs_set_inode_index(dir, inode, index);
2470                 if (ret)
2471                         return ERR_PTR(ret);
2472         }
2473         /*
2474          * index_cnt is ignored for everything but a dir,
2475          * btrfs_get_inode_index_count has an explanation for the magic
2476          * number
2477          */
2478         init_btrfs_i(inode);
2479         BTRFS_I(inode)->index_cnt = 2;
2480         BTRFS_I(inode)->root = root;
2481         BTRFS_I(inode)->generation = trans->transid;
2482
2483         if (mode & S_IFDIR)
2484                 owner = 0;
2485         else
2486                 owner = 1;
2487         new_inode_group = btrfs_find_block_group(root, group, 0,
2488                                        BTRFS_BLOCK_GROUP_METADATA, owner);
2489         if (!new_inode_group) {
2490                 printk("find_block group failed\n");
2491                 new_inode_group = group;
2492         }
2493         BTRFS_I(inode)->block_group = new_inode_group;
2494
2495         key[0].objectid = objectid;
2496         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
2497         key[0].offset = 0;
2498
2499         key[1].objectid = objectid;
2500         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
2501         key[1].offset = ref_objectid;
2502
2503         sizes[0] = sizeof(struct btrfs_inode_item);
2504         sizes[1] = name_len + sizeof(*ref);
2505
2506         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
2507         if (ret != 0)
2508                 goto fail;
2509
2510         if (objectid > root->highest_inode)
2511                 root->highest_inode = objectid;
2512
2513         inode->i_uid = current->fsuid;
2514         inode->i_gid = current->fsgid;
2515         inode->i_mode = mode;
2516         inode->i_ino = objectid;
2517         inode->i_blocks = 0;
2518         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2519         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2520                                   struct btrfs_inode_item);
2521         fill_inode_item(trans, path->nodes[0], inode_item, inode);
2522
2523         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2524                              struct btrfs_inode_ref);
2525         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
2526         btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
2527         ptr = (unsigned long)(ref + 1);
2528         write_extent_buffer(path->nodes[0], name, ptr, name_len);
2529
2530         btrfs_mark_buffer_dirty(path->nodes[0]);
2531         btrfs_free_path(path);
2532
2533         location = &BTRFS_I(inode)->location;
2534         location->objectid = objectid;
2535         location->offset = 0;
2536         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
2537
2538         insert_inode_hash(inode);
2539         return inode;
2540 fail:
2541         if (dir)
2542                 BTRFS_I(dir)->index_cnt--;
2543         btrfs_free_path(path);
2544         return ERR_PTR(ret);
2545 }
2546
2547 static inline u8 btrfs_inode_type(struct inode *inode)
2548 {
2549         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
2550 }
2551
2552 /*
2553  * utility function to add 'inode' into 'parent_inode' with
2554  * a give name and a given sequence number.
2555  * if 'add_backref' is true, also insert a backref from the
2556  * inode to the parent directory.
2557  */
2558 int btrfs_add_link(struct btrfs_trans_handle *trans,
2559                    struct inode *parent_inode, struct inode *inode,
2560                    const char *name, int name_len, int add_backref, u64 index)
2561 {
2562         int ret;
2563         struct btrfs_key key;
2564         struct btrfs_root *root = BTRFS_I(parent_inode)->root;
2565
2566         key.objectid = inode->i_ino;
2567         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
2568         key.offset = 0;
2569
2570         ret = btrfs_insert_dir_item(trans, root, name, name_len,
2571                                     parent_inode->i_ino,
2572                                     &key, btrfs_inode_type(inode),
2573                                     index);
2574         if (ret == 0) {
2575                 if (add_backref) {
2576                         ret = btrfs_insert_inode_ref(trans, root,
2577                                                      name, name_len,
2578                                                      inode->i_ino,
2579                                                      parent_inode->i_ino,
2580                                                      index);
2581                 }
2582                 btrfs_i_size_write(parent_inode, parent_inode->i_size +
2583                                    name_len * 2);
2584                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
2585                 ret = btrfs_update_inode(trans, root, parent_inode);
2586         }
2587         return ret;
2588 }
2589
2590 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
2591                             struct dentry *dentry, struct inode *inode,
2592                             int backref, u64 index)
2593 {
2594         int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
2595                                  inode, dentry->d_name.name,
2596                                  dentry->d_name.len, backref, index);
2597         if (!err) {
2598                 d_instantiate(dentry, inode);
2599                 return 0;
2600         }
2601         if (err > 0)
2602                 err = -EEXIST;
2603         return err;
2604 }
2605
2606 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
2607                         int mode, dev_t rdev)
2608 {
2609         struct btrfs_trans_handle *trans;
2610         struct btrfs_root *root = BTRFS_I(dir)->root;
2611         struct inode *inode = NULL;
2612         int err;
2613         int drop_inode = 0;
2614         u64 objectid;
2615         unsigned long nr = 0;
2616         u64 index = 0;
2617
2618         if (!new_valid_dev(rdev))
2619                 return -EINVAL;
2620
2621         err = btrfs_check_free_space(root, 1, 0);
2622         if (err)
2623                 goto fail;
2624
2625         trans = btrfs_start_transaction(root, 1);
2626         btrfs_set_trans_block_group(trans, dir);
2627
2628         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2629         if (err) {
2630                 err = -ENOSPC;
2631                 goto out_unlock;
2632         }
2633
2634         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
2635                                 dentry->d_name.len,
2636                                 dentry->d_parent->d_inode->i_ino, objectid,
2637                                 BTRFS_I(dir)->block_group, mode, &index);
2638         err = PTR_ERR(inode);
2639         if (IS_ERR(inode))
2640                 goto out_unlock;
2641
2642         err = btrfs_init_acl(inode, dir);
2643         if (err) {
2644                 drop_inode = 1;
2645                 goto out_unlock;
2646         }
2647
2648         btrfs_set_trans_block_group(trans, inode);
2649         err = btrfs_add_nondir(trans, dentry, inode, 0, index);
2650         if (err)
2651                 drop_inode = 1;
2652         else {
2653                 inode->i_op = &btrfs_special_inode_operations;
2654                 init_special_inode(inode, inode->i_mode, rdev);
2655                 btrfs_update_inode(trans, root, inode);
2656         }
2657         dir->i_sb->s_dirt = 1;
2658         btrfs_update_inode_block_group(trans, inode);
2659         btrfs_update_inode_block_group(trans, dir);
2660 out_unlock:
2661         nr = trans->blocks_used;
2662         btrfs_end_transaction_throttle(trans, root);
2663 fail:
2664         if (drop_inode) {
2665                 inode_dec_link_count(inode);
2666                 iput(inode);
2667         }
2668         btrfs_btree_balance_dirty(root, nr);
2669         return err;
2670 }
2671
2672 static int btrfs_create(struct inode *dir, struct dentry *dentry,
2673                         int mode, struct nameidata *nd)
2674 {
2675         struct btrfs_trans_handle *trans;
2676         struct btrfs_root *root = BTRFS_I(dir)->root;
2677         struct inode *inode = NULL;
2678         int err;
2679         int drop_inode = 0;
2680         unsigned long nr = 0;
2681         u64 objectid;
2682         u64 index = 0;
2683
2684         err = btrfs_check_free_space(root, 1, 0);
2685         if (err)
2686                 goto fail;
2687         trans = btrfs_start_transaction(root, 1);
2688         btrfs_set_trans_block_group(trans, dir);
2689
2690         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2691         if (err) {
2692                 err = -ENOSPC;
2693                 goto out_unlock;
2694         }
2695
2696         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
2697                                 dentry->d_name.len,
2698                                 dentry->d_parent->d_inode->i_ino,
2699                                 objectid, BTRFS_I(dir)->block_group, mode,
2700                                 &index);
2701         err = PTR_ERR(inode);
2702         if (IS_ERR(inode))
2703                 goto out_unlock;
2704
2705         err = btrfs_init_acl(inode, dir);
2706         if (err) {
2707                 drop_inode = 1;
2708                 goto out_unlock;
2709         }
2710
2711         btrfs_set_trans_block_group(trans, inode);
2712         err = btrfs_add_nondir(trans, dentry, inode, 0, index);
2713         if (err)
2714                 drop_inode = 1;
2715         else {
2716                 inode->i_mapping->a_ops = &btrfs_aops;
2717                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
2718                 inode->i_fop = &btrfs_file_operations;
2719                 inode->i_op = &btrfs_file_inode_operations;
2720                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2721         }
2722         dir->i_sb->s_dirt = 1;
2723         btrfs_update_inode_block_group(trans, inode);
2724         btrfs_update_inode_block_group(trans, dir);
2725 out_unlock:
2726         nr = trans->blocks_used;
2727         btrfs_end_transaction_throttle(trans, root);
2728 fail:
2729         if (drop_inode) {
2730                 inode_dec_link_count(inode);
2731                 iput(inode);
2732         }
2733         btrfs_btree_balance_dirty(root, nr);
2734         return err;
2735 }
2736
2737 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
2738                       struct dentry *dentry)
2739 {
2740         struct btrfs_trans_handle *trans;
2741         struct btrfs_root *root = BTRFS_I(dir)->root;
2742         struct inode *inode = old_dentry->d_inode;
2743         u64 index;
2744         unsigned long nr = 0;
2745         int err;
2746         int drop_inode = 0;
2747
2748         if (inode->i_nlink == 0)
2749                 return -ENOENT;
2750
2751         btrfs_inc_nlink(inode);
2752         err = btrfs_check_free_space(root, 1, 0);
2753         if (err)
2754                 goto fail;
2755         err = btrfs_set_inode_index(dir, inode, &index);
2756         if (err)
2757                 goto fail;
2758
2759         trans = btrfs_start_transaction(root, 1);
2760
2761         btrfs_set_trans_block_group(trans, dir);
2762         atomic_inc(&inode->i_count);
2763
2764         err = btrfs_add_nondir(trans, dentry, inode, 1, index);
2765
2766         if (err)
2767                 drop_inode = 1;
2768
2769         dir->i_sb->s_dirt = 1;
2770         btrfs_update_inode_block_group(trans, dir);
2771         err = btrfs_update_inode(trans, root, inode);
2772
2773         if (err)
2774                 drop_inode = 1;
2775
2776         nr = trans->blocks_used;
2777         btrfs_end_transaction_throttle(trans, root);
2778 fail:
2779         if (drop_inode) {
2780                 inode_dec_link_count(inode);
2781                 iput(inode);
2782         }
2783         btrfs_btree_balance_dirty(root, nr);
2784         return err;
2785 }
2786
2787 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2788 {
2789         struct inode *inode = NULL;
2790         struct btrfs_trans_handle *trans;
2791         struct btrfs_root *root = BTRFS_I(dir)->root;
2792         int err = 0;
2793         int drop_on_err = 0;
2794         u64 objectid = 0;
2795         u64 index = 0;
2796         unsigned long nr = 1;
2797
2798         err = btrfs_check_free_space(root, 1, 0);
2799         if (err)
2800                 goto out_unlock;
2801
2802         trans = btrfs_start_transaction(root, 1);
2803         btrfs_set_trans_block_group(trans, dir);
2804
2805         if (IS_ERR(trans)) {
2806                 err = PTR_ERR(trans);
2807                 goto out_unlock;
2808         }
2809
2810         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2811         if (err) {
2812                 err = -ENOSPC;
2813                 goto out_unlock;
2814         }
2815
2816         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
2817                                 dentry->d_name.len,
2818                                 dentry->d_parent->d_inode->i_ino, objectid,
2819                                 BTRFS_I(dir)->block_group, S_IFDIR | mode,
2820                                 &index);
2821         if (IS_ERR(inode)) {
2822                 err = PTR_ERR(inode);
2823                 goto out_fail;
2824         }
2825
2826         drop_on_err = 1;
2827
2828         err = btrfs_init_acl(inode, dir);
2829         if (err)
2830                 goto out_fail;
2831
2832         inode->i_op = &btrfs_dir_inode_operations;
2833         inode->i_fop = &btrfs_dir_file_operations;
2834         btrfs_set_trans_block_group(trans, inode);
2835
2836         btrfs_i_size_write(inode, 0);
2837         err = btrfs_update_inode(trans, root, inode);
2838         if (err)
2839                 goto out_fail;
2840
2841         err = btrfs_add_link(trans, dentry->d_parent->d_inode,
2842                                  inode, dentry->d_name.name,
2843                                  dentry->d_name.len, 0, index);
2844         if (err)
2845                 goto out_fail;
2846
2847         d_instantiate(dentry, inode);
2848         drop_on_err = 0;
2849         dir->i_sb->s_dirt = 1;
2850         btrfs_update_inode_block_group(trans, inode);
2851         btrfs_update_inode_block_group(trans, dir);
2852
2853 out_fail:
2854         nr = trans->blocks_used;
2855         btrfs_end_transaction_throttle(trans, root);
2856
2857 out_unlock:
2858         if (drop_on_err)
2859                 iput(inode);
2860         btrfs_btree_balance_dirty(root, nr);
2861         return err;
2862 }
2863
2864 /* helper for btfs_get_extent.  Given an existing extent in the tree,
2865  * and an extent that you want to insert, deal with overlap and insert
2866  * the new extent into the tree.
2867  */
2868 static int merge_extent_mapping(struct extent_map_tree *em_tree,
2869                                 struct extent_map *existing,
2870                                 struct extent_map *em,
2871                                 u64 map_start, u64 map_len)
2872 {
2873         u64 start_diff;
2874
2875         BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
2876         start_diff = map_start - em->start;
2877         em->start = map_start;
2878         em->len = map_len;
2879         if (em->block_start < EXTENT_MAP_LAST_BYTE)
2880                 em->block_start += start_diff;
2881         return add_extent_mapping(em_tree, em);
2882 }
2883
2884 /*
2885  * a bit scary, this does extent mapping from logical file offset to the disk.
2886  * the ugly parts come from merging extents from the disk with the
2887  * in-ram representation.  This gets more complex because of the data=ordered code,
2888  * where the in-ram extents might be locked pending data=ordered completion.
2889  *
2890  * This also copies inline extents directly into the page.
2891  */
2892 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2893                                     size_t pg_offset, u64 start, u64 len,
2894                                     int create)
2895 {
2896         int ret;
2897         int err = 0;
2898         u64 bytenr;
2899         u64 extent_start = 0;
2900         u64 extent_end = 0;
2901         u64 objectid = inode->i_ino;
2902         u32 found_type;
2903         struct btrfs_path *path = NULL;
2904         struct btrfs_root *root = BTRFS_I(inode)->root;
2905         struct btrfs_file_extent_item *item;
2906         struct extent_buffer *leaf;
2907         struct btrfs_key found_key;
2908         struct extent_map *em = NULL;
2909         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2910         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2911         struct btrfs_trans_handle *trans = NULL;
2912
2913 again:
2914         spin_lock(&em_tree->lock);
2915         em = lookup_extent_mapping(em_tree, start, len);
2916         if (em)
2917                 em->bdev = root->fs_info->fs_devices->latest_bdev;
2918         spin_unlock(&em_tree->lock);
2919
2920         if (em) {
2921                 if (em->start > start || em->start + em->len <= start)
2922                         free_extent_map(em);
2923                 else if (em->block_start == EXTENT_MAP_INLINE && page)
2924                         free_extent_map(em);
2925                 else
2926                         goto out;
2927         }
2928         em = alloc_extent_map(GFP_NOFS);
2929         if (!em) {
2930                 err = -ENOMEM;
2931                 goto out;
2932         }
2933         em->bdev = root->fs_info->fs_devices->latest_bdev;
2934         em->start = EXTENT_MAP_HOLE;
2935         em->len = (u64)-1;
2936
2937         if (!path) {
2938                 path = btrfs_alloc_path();
2939                 BUG_ON(!path);
2940         }
2941
2942         ret = btrfs_lookup_file_extent(trans, root, path,
2943                                        objectid, start, trans != NULL);
2944         if (ret < 0) {
2945                 err = ret;
2946                 goto out;
2947         }
2948
2949         if (ret != 0) {
2950                 if (path->slots[0] == 0)
2951                         goto not_found;
2952                 path->slots[0]--;
2953         }
2954
2955         leaf = path->nodes[0];
2956         item = btrfs_item_ptr(leaf, path->slots[0],
2957                               struct btrfs_file_extent_item);
2958         /* are we inside the extent that was found? */
2959         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2960         found_type = btrfs_key_type(&found_key);
2961         if (found_key.objectid != objectid ||
2962             found_type != BTRFS_EXTENT_DATA_KEY) {
2963                 goto not_found;
2964         }
2965
2966         found_type = btrfs_file_extent_type(leaf, item);
2967         extent_start = found_key.offset;
2968         if (found_type == BTRFS_FILE_EXTENT_REG) {
2969                 extent_end = extent_start +
2970                        btrfs_file_extent_num_bytes(leaf, item);
2971                 err = 0;
2972                 if (start < extent_start || start >= extent_end) {
2973                         em->start = start;
2974                         if (start < extent_start) {
2975                                 if (start + len <= extent_start)
2976                                         goto not_found;
2977                                 em->len = extent_end - extent_start;
2978                         } else {
2979                                 em->len = len;
2980                         }
2981                         goto not_found_em;
2982                 }
2983                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2984                 if (bytenr == 0) {
2985                         em->start = extent_start;
2986                         em->len = extent_end - extent_start;
2987                         em->block_start = EXTENT_MAP_HOLE;
2988                         goto insert;
2989                 }
2990                 bytenr += btrfs_file_extent_offset(leaf, item);
2991                 em->block_start = bytenr;
2992                 em->start = extent_start;
2993                 em->len = extent_end - extent_start;
2994                 goto insert;
2995         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2996                 u64 page_start;
2997                 unsigned long ptr;
2998                 char *map;
2999                 size_t size;
3000                 size_t extent_offset;
3001                 size_t copy_size;
3002
3003                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
3004                                                     path->slots[0]));
3005                 extent_end = (extent_start + size + root->sectorsize - 1) &
3006                         ~((u64)root->sectorsize - 1);
3007                 if (start < extent_start || start >= extent_end) {
3008                         em->start = start;
3009                         if (start < extent_start) {
3010                                 if (start + len <= extent_start)
3011                                         goto not_found;
3012                                 em->len = extent_end - extent_start;
3013                         } else {
3014                                 em->len = len;
3015                         }
3016                         goto not_found_em;
3017                 }
3018                 em->block_start = EXTENT_MAP_INLINE;
3019
3020                 if (!page) {
3021                         em->start = extent_start;
3022                         em->len = size;
3023                         goto out;
3024                 }
3025
3026                 page_start = page_offset(page) + pg_offset;
3027                 extent_offset = page_start - extent_start;
3028                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
3029                                 size - extent_offset);
3030                 em->start = extent_start + extent_offset;
3031                 em->len = (copy_size + root->sectorsize - 1) &
3032                         ~((u64)root->sectorsize - 1);
3033                 map = kmap(page);
3034                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
3035                 if (create == 0 && !PageUptodate(page)) {
3036                         read_extent_buffer(leaf, map + pg_offset, ptr,
3037                                            copy_size);
3038                         flush_dcache_page(page);
3039                 } else if (create && PageUptodate(page)) {
3040                         if (!trans) {
3041                                 kunmap(page);
3042                                 free_extent_map(em);
3043                                 em = NULL;
3044                                 btrfs_release_path(root, path);
3045                                 trans = btrfs_join_transaction(root, 1);
3046                                 goto again;
3047                         }
3048                         write_extent_buffer(leaf, map + pg_offset, ptr,
3049                                             copy_size);
3050                         btrfs_mark_buffer_dirty(leaf);
3051                 }
3052                 kunmap(page);
3053                 set_extent_uptodate(io_tree, em->start,
3054                                     extent_map_end(em) - 1, GFP_NOFS);
3055                 goto insert;
3056         } else {
3057                 printk("unkknown found_type %d\n", found_type);
3058                 WARN_ON(1);
3059         }
3060 not_found:
3061         em->start = start;
3062         em->len = len;
3063 not_found_em:
3064         em->block_start = EXTENT_MAP_HOLE;
3065 insert:
3066         btrfs_release_path(root, path);
3067         if (em->start > start || extent_map_end(em) <= start) {
3068                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
3069                 err = -EIO;
3070                 goto out;
3071         }
3072
3073         err = 0;
3074         spin_lock(&em_tree->lock);
3075         ret = add_extent_mapping(em_tree, em);
3076         /* it is possible that someone inserted the extent into the tree
3077          * while we had the lock dropped.  It is also possible that
3078          * an overlapping map exists in the tree
3079          */
3080         if (ret == -EEXIST) {
3081                 struct extent_map *existing;
3082
3083                 ret = 0;
3084
3085                 existing = lookup_extent_mapping(em_tree, start, len);
3086                 if (existing && (existing->start > start ||
3087                     existing->start + existing->len <= start)) {
3088                         free_extent_map(existing);
3089                         existing = NULL;
3090                 }
3091                 if (!existing) {
3092                         existing = lookup_extent_mapping(em_tree, em->start,
3093                                                          em->len);
3094                         if (existing) {
3095                                 err = merge_extent_mapping(em_tree, existing,
3096                                                            em, start,
3097                                                            root->sectorsize);
3098                                 free_extent_map(existing);
3099                                 if (err) {
3100                                         free_extent_map(em);
3101                                         em = NULL;
3102                                 }
3103                         } else {
3104                                 err = -EIO;
3105                                 printk("failing to insert %Lu %Lu\n",
3106                                        start, len);
3107                                 free_extent_map(em);
3108                                 em = NULL;
3109                         }
3110                 } else {
3111                         free_extent_map(em);
3112                         em = existing;
3113                         err = 0;
3114                 }
3115         }
3116         spin_unlock(&em_tree->lock);
3117 out:
3118         if (path)
3119                 btrfs_free_path(path);
3120         if (trans) {
3121                 ret = btrfs_end_transaction(trans, root);
3122                 if (!err) {
3123                         err = ret;
3124                 }
3125         }
3126         if (err) {
3127                 free_extent_map(em);
3128                 WARN_ON(1);
3129                 return ERR_PTR(err);
3130         }
3131         return em;
3132 }
3133
3134 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
3135                         const struct iovec *iov, loff_t offset,
3136                         unsigned long nr_segs)
3137 {
3138         return -EINVAL;
3139 }
3140
3141 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
3142 {
3143         return extent_bmap(mapping, iblock, btrfs_get_extent);
3144 }
3145
3146 int btrfs_readpage(struct file *file, struct page *page)
3147 {
3148         struct extent_io_tree *tree;
3149         tree = &BTRFS_I(page->mapping->host)->io_tree;
3150         return extent_read_full_page(tree, page, btrfs_get_extent);
3151 }
3152
3153 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
3154 {
3155         struct extent_io_tree *tree;
3156
3157
3158         if (current->flags & PF_MEMALLOC) {
3159                 redirty_page_for_writepage(wbc, page);
3160                 unlock_page(page);
3161                 return 0;
3162         }
3163         tree = &BTRFS_I(page->mapping->host)->io_tree;
3164         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
3165 }
3166
3167 int btrfs_writepages(struct address_space *mapping,
3168                      struct writeback_control *wbc)
3169 {
3170         struct extent_io_tree *tree;
3171         tree = &BTRFS_I(mapping->host)->io_tree;
3172         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
3173 }
3174
3175 static int
3176 btrfs_readpages(struct file *file, struct address_space *mapping,
3177                 struct list_head *pages, unsigned nr_pages)
3178 {
3179         struct extent_io_tree *tree;
3180         tree = &BTRFS_I(mapping->host)->io_tree;
3181         return extent_readpages(tree, mapping, pages, nr_pages,
3182                                 btrfs_get_extent);
3183 }
3184 static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
3185 {
3186         struct extent_io_tree *tree;
3187         struct extent_map_tree *map;
3188         int ret;
3189
3190         tree = &BTRFS_I(page->mapping->host)->io_tree;
3191         map = &BTRFS_I(page->mapping->host)->extent_tree;
3192         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
3193         if (ret == 1) {
3194                 ClearPagePrivate(page);
3195                 set_page_private(page, 0);
3196                 page_cache_release(page);
3197         }
3198         return ret;
3199 }
3200
3201 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
3202 {
3203         if (PageWriteback(page) || PageDirty(page))
3204                 return 0;
3205         return __btrfs_releasepage(page, gfp_flags);
3206 }
3207
3208 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
3209 {
3210         struct extent_io_tree *tree;
3211         struct btrfs_ordered_extent *ordered;
3212         u64 page_start = page_offset(page);
3213         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
3214
3215         wait_on_page_writeback(page);
3216         tree = &BTRFS_I(page->mapping->host)->io_tree;
3217         if (offset) {
3218                 btrfs_releasepage(page, GFP_NOFS);
3219                 return;
3220         }
3221
3222         lock_extent(tree, page_start, page_end, GFP_NOFS);
3223         ordered = btrfs_lookup_ordered_extent(page->mapping->host,
3224                                            page_offset(page));
3225         if (ordered) {
3226                 /*
3227                  * IO on this page will never be started, so we need
3228                  * to account for any ordered extents now
3229                  */
3230                 clear_extent_bit(tree, page_start, page_end,
3231                                  EXTENT_DIRTY | EXTENT_DELALLOC |
3232                                  EXTENT_LOCKED, 1, 0, GFP_NOFS);
3233                 btrfs_finish_ordered_io(page->mapping->host,
3234                                         page_start, page_end);
3235                 btrfs_put_ordered_extent(ordered);
3236                 lock_extent(tree, page_start, page_end, GFP_NOFS);
3237         }
3238         clear_extent_bit(tree, page_start, page_end,
3239                  EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
3240                  EXTENT_ORDERED,
3241                  1, 1, GFP_NOFS);
3242         __btrfs_releasepage(page, GFP_NOFS);
3243
3244         ClearPageChecked(page);
3245         if (PagePrivate(page)) {
3246                 ClearPagePrivate(page);
3247                 set_page_private(page, 0);
3248                 page_cache_release(page);
3249         }
3250 }
3251
3252 /*
3253  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
3254  * called from a page fault handler when a page is first dirtied. Hence we must
3255  * be careful to check for EOF conditions here. We set the page up correctly
3256  * for a written page which means we get ENOSPC checking when writing into
3257  * holes and correct delalloc and unwritten extent mapping on filesystems that
3258  * support these features.
3259  *
3260  * We are not allowed to take the i_mutex here so we have to play games to
3261  * protect against truncate races as the page could now be beyond EOF.  Because
3262  * vmtruncate() writes the inode size before removing pages, once we have the
3263  * page lock we can determine safely if the page is beyond EOF. If it is not
3264  * beyond EOF, then the page is guaranteed safe against truncation until we
3265  * unlock the page.
3266  */
3267 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
3268 {
3269         struct inode *inode = fdentry(vma->vm_file)->d_inode;
3270         struct btrfs_root *root = BTRFS_I(inode)->root;
3271         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3272         struct btrfs_ordered_extent *ordered;
3273         char *kaddr;
3274         unsigned long zero_start;
3275         loff_t size;
3276         int ret;
3277         u64 page_start;
3278         u64 page_end;
3279
3280         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
3281         if (ret)
3282                 goto out;
3283
3284         ret = -EINVAL;
3285 again:
3286         lock_page(page);
3287         size = i_size_read(inode);
3288         page_start = page_offset(page);
3289         page_end = page_start + PAGE_CACHE_SIZE - 1;
3290
3291         if ((page->mapping != inode->i_mapping) ||
3292             (page_start >= size)) {
3293                 /* page got truncated out from underneath us */
3294                 goto out_unlock;
3295         }
3296         wait_on_page_writeback(page);
3297
3298         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3299         set_page_extent_mapped(page);
3300
3301         /*
3302          * we can't set the delalloc bits if there are pending ordered
3303          * extents.  Drop our locks and wait for them to finish
3304          */
3305         ordered = btrfs_lookup_ordered_extent(inode, page_start);
3306         if (ordered) {
3307                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3308                 unlock_page(page);
3309                 btrfs_start_ordered_extent(inode, ordered, 1);
3310                 btrfs_put_ordered_extent(ordered);
3311                 goto again;
3312         }
3313
3314         btrfs_set_extent_delalloc(inode, page_start, page_end);
3315         ret = 0;
3316
3317         /* page is wholly or partially inside EOF */
3318         if (page_start + PAGE_CACHE_SIZE > size)
3319                 zero_start = size & ~PAGE_CACHE_MASK;
3320         else
3321                 zero_start = PAGE_CACHE_SIZE;
3322
3323         if (zero_start != PAGE_CACHE_SIZE) {
3324                 kaddr = kmap(page);
3325                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
3326                 flush_dcache_page(page);
3327                 kunmap(page);
3328         }
3329         ClearPageChecked(page);
3330         set_page_dirty(page);
3331         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3332
3333 out_unlock:
3334         unlock_page(page);
3335 out:
3336         return ret;
3337 }
3338
3339 static void btrfs_truncate(struct inode *inode)
3340 {
3341         struct btrfs_root *root = BTRFS_I(inode)->root;
3342         int ret;
3343         struct btrfs_trans_handle *trans;
3344         unsigned long nr;
3345         u64 mask = root->sectorsize - 1;
3346
3347         if (!S_ISREG(inode->i_mode))
3348                 return;
3349         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
3350                 return;
3351
3352         btrfs_truncate_page(inode->i_mapping, inode->i_size);
3353         btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
3354
3355         trans = btrfs_start_transaction(root, 1);
3356         btrfs_set_trans_block_group(trans, inode);
3357         btrfs_i_size_write(inode, inode->i_size);
3358
3359         ret = btrfs_orphan_add(trans, inode);
3360         if (ret)
3361                 goto out;
3362         /* FIXME, add redo link to tree so we don't leak on crash */
3363         ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
3364                                       BTRFS_EXTENT_DATA_KEY);
3365         btrfs_update_inode(trans, root, inode);
3366
3367         ret = btrfs_orphan_del(trans, inode);
3368         BUG_ON(ret);
3369
3370 out:
3371         nr = trans->blocks_used;
3372         ret = btrfs_end_transaction_throttle(trans, root);
3373         BUG_ON(ret);
3374         btrfs_btree_balance_dirty(root, nr);
3375 }
3376
3377 /*
3378  * Invalidate a single dcache entry at the root of the filesystem.
3379  * Needed after creation of snapshot or subvolume.
3380  */
3381 void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
3382                                   int namelen)
3383 {
3384         struct dentry *alias, *entry;
3385         struct qstr qstr;
3386
3387         alias = d_find_alias(root->fs_info->sb->s_root->d_inode);
3388         if (alias) {
3389                 qstr.name = name;
3390                 qstr.len = namelen;
3391                 /* change me if btrfs ever gets a d_hash operation */
3392                 qstr.hash = full_name_hash(qstr.name, qstr.len);
3393                 entry = d_lookup(alias, &qstr);
3394                 dput(alias);
3395                 if (entry) {
3396                         d_invalidate(entry);
3397                         dput(entry);
3398                 }
3399         }
3400 }
3401
3402 /*
3403  * create a new subvolume directory/inode (helper for the ioctl).
3404  */
3405 int btrfs_create_subvol_root(struct btrfs_root *new_root,
3406                 struct btrfs_trans_handle *trans, u64 new_dirid,
3407                 struct btrfs_block_group_cache *block_group)
3408 {
3409         struct inode *inode;
3410         u64 index = 0;
3411
3412         inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
3413                                 new_dirid, block_group, S_IFDIR | 0700, &index);
3414         if (IS_ERR(inode))
3415                 return PTR_ERR(inode);
3416         inode->i_op = &btrfs_dir_inode_operations;
3417         inode->i_fop = &btrfs_dir_file_operations;
3418         new_root->inode = inode;
3419
3420         inode->i_nlink = 1;
3421         btrfs_i_size_write(inode, 0);
3422
3423         return btrfs_update_inode(trans, new_root, inode);
3424 }
3425
3426 /* helper function for file defrag and space balancing.  This
3427  * forces readahead on a given range of bytes in an inode
3428  */
3429 unsigned long btrfs_force_ra(struct address_space *mapping,
3430                               struct file_ra_state *ra, struct file *file,
3431                               pgoff_t offset, pgoff_t last_index)
3432 {
3433         pgoff_t req_size = last_index - offset + 1;
3434
3435         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
3436         return offset + req_size;
3437 }
3438
3439 struct inode *btrfs_alloc_inode(struct super_block *sb)
3440 {
3441         struct btrfs_inode *ei;
3442
3443         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
3444         if (!ei)
3445                 return NULL;
3446         ei->last_trans = 0;
3447         ei->logged_trans = 0;
3448         btrfs_ordered_inode_tree_init(&ei->ordered_tree);
3449         ei->i_acl = BTRFS_ACL_NOT_CACHED;
3450         ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
3451         INIT_LIST_HEAD(&ei->i_orphan);
3452         return &ei->vfs_inode;
3453 }
3454
3455 void btrfs_destroy_inode(struct inode *inode)
3456 {
3457         struct btrfs_ordered_extent *ordered;
3458         WARN_ON(!list_empty(&inode->i_dentry));
3459         WARN_ON(inode->i_data.nrpages);
3460
3461         if (BTRFS_I(inode)->i_acl &&
3462             BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
3463                 posix_acl_release(BTRFS_I(inode)->i_acl);
3464         if (BTRFS_I(inode)->i_default_acl &&
3465             BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
3466                 posix_acl_release(BTRFS_I(inode)->i_default_acl);
3467
3468         spin_lock(&BTRFS_I(inode)->root->list_lock);
3469         if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
3470                 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
3471                        " list\n", inode->i_ino);
3472                 dump_stack();
3473         }
3474         spin_unlock(&BTRFS_I(inode)->root->list_lock);
3475
3476         while(1) {
3477                 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
3478                 if (!ordered)
3479                         break;
3480                 else {
3481                         printk("found ordered extent %Lu %Lu\n",
3482                                ordered->file_offset, ordered->len);
3483                         btrfs_remove_ordered_extent(inode, ordered);
3484                         btrfs_put_ordered_extent(ordered);
3485                         btrfs_put_ordered_extent(ordered);
3486                 }
3487         }
3488         btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
3489         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
3490 }
3491
3492 static void init_once(void *foo)
3493 {
3494         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
3495
3496         inode_init_once(&ei->vfs_inode);
3497 }
3498
3499 void btrfs_destroy_cachep(void)
3500 {
3501         if (btrfs_inode_cachep)
3502                 kmem_cache_destroy(btrfs_inode_cachep);
3503         if (btrfs_trans_handle_cachep)
3504                 kmem_cache_destroy(btrfs_trans_handle_cachep);
3505         if (btrfs_transaction_cachep)
3506                 kmem_cache_destroy(btrfs_transaction_cachep);
3507         if (btrfs_bit_radix_cachep)
3508                 kmem_cache_destroy(btrfs_bit_radix_cachep);
3509         if (btrfs_path_cachep)
3510                 kmem_cache_destroy(btrfs_path_cachep);
3511 }
3512
3513 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
3514                                        unsigned long extra_flags,
3515                                        void (*ctor)(void *))
3516 {
3517         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
3518                                  SLAB_MEM_SPREAD | extra_flags), ctor);
3519 }
3520
3521 int btrfs_init_cachep(void)
3522 {
3523         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
3524                                           sizeof(struct btrfs_inode),
3525                                           0, init_once);
3526         if (!btrfs_inode_cachep)
3527                 goto fail;
3528         btrfs_trans_handle_cachep =
3529                         btrfs_cache_create("btrfs_trans_handle_cache",
3530                                            sizeof(struct btrfs_trans_handle),
3531                                            0, NULL);
3532         if (!btrfs_trans_handle_cachep)
3533                 goto fail;
3534         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3535                                              sizeof(struct btrfs_transaction),
3536                                              0, NULL);
3537         if (!btrfs_transaction_cachep)
3538                 goto fail;
3539         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3540                                          sizeof(struct btrfs_path),
3541                                          0, NULL);
3542         if (!btrfs_path_cachep)
3543                 goto fail;
3544         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3545                                               SLAB_DESTROY_BY_RCU, NULL);
3546         if (!btrfs_bit_radix_cachep)
3547                 goto fail;
3548         return 0;
3549 fail:
3550         btrfs_destroy_cachep();
3551         return -ENOMEM;
3552 }
3553
3554 static int btrfs_getattr(struct vfsmount *mnt,
3555                          struct dentry *dentry, struct kstat *stat)
3556 {
3557         struct inode *inode = dentry->d_inode;
3558         generic_fillattr(inode, stat);
3559         stat->blksize = PAGE_CACHE_SIZE;
3560         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
3561         return 0;
3562 }
3563
3564 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3565                            struct inode * new_dir,struct dentry *new_dentry)
3566 {
3567         struct btrfs_trans_handle *trans;
3568         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3569         struct inode *new_inode = new_dentry->d_inode;
3570         struct inode *old_inode = old_dentry->d_inode;
3571         struct timespec ctime = CURRENT_TIME;
3572         u64 index = 0;
3573         int ret;
3574
3575         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3576             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3577                 return -ENOTEMPTY;
3578         }
3579
3580         ret = btrfs_check_free_space(root, 1, 0);
3581         if (ret)
3582                 goto out_unlock;
3583
3584         trans = btrfs_start_transaction(root, 1);
3585
3586         btrfs_set_trans_block_group(trans, new_dir);
3587
3588         btrfs_inc_nlink(old_dentry->d_inode);
3589         old_dir->i_ctime = old_dir->i_mtime = ctime;
3590         new_dir->i_ctime = new_dir->i_mtime = ctime;
3591         old_inode->i_ctime = ctime;
3592
3593         ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
3594                                  old_dentry->d_name.name,
3595                                  old_dentry->d_name.len);
3596         if (ret)
3597                 goto out_fail;
3598
3599         if (new_inode) {
3600                 new_inode->i_ctime = CURRENT_TIME;
3601                 ret = btrfs_unlink_inode(trans, root, new_dir,
3602                                          new_dentry->d_inode,
3603                                          new_dentry->d_name.name,
3604                                          new_dentry->d_name.len);
3605                 if (ret)
3606                         goto out_fail;
3607                 if (new_inode->i_nlink == 0) {
3608                         ret = btrfs_orphan_add(trans, new_dentry->d_inode);
3609                         if (ret)
3610                                 goto out_fail;
3611                 }
3612
3613         }
3614         ret = btrfs_set_inode_index(new_dir, old_inode, &index);
3615         if (ret)
3616                 goto out_fail;
3617
3618         ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
3619                              old_inode, new_dentry->d_name.name,
3620                              new_dentry->d_name.len, 1, index);
3621         if (ret)
3622                 goto out_fail;
3623
3624 out_fail:
3625         btrfs_end_transaction_throttle(trans, root);
3626 out_unlock:
3627         return ret;
3628 }
3629
3630 /*
3631  * some fairly slow code that needs optimization. This walks the list
3632  * of all the inodes with pending delalloc and forces them to disk.
3633  */
3634 int btrfs_start_delalloc_inodes(struct btrfs_root *root)
3635 {
3636         struct list_head *head = &root->fs_info->delalloc_inodes;
3637         struct btrfs_inode *binode;
3638         struct inode *inode;
3639         unsigned long flags;
3640
3641         spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
3642         while(!list_empty(head)) {
3643                 binode = list_entry(head->next, struct btrfs_inode,
3644                                     delalloc_inodes);
3645                 inode = igrab(&binode->vfs_inode);
3646                 if (!inode)
3647                         list_del_init(&binode->delalloc_inodes);
3648                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
3649                 if (inode) {
3650                         filemap_flush(inode->i_mapping);
3651                         iput(inode);
3652                 }
3653                 cond_resched();
3654                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
3655         }
3656         spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
3657
3658         /* the filemap_flush will queue IO into the worker threads, but
3659          * we have to make sure the IO is actually started and that
3660          * ordered extents get created before we return
3661          */
3662         atomic_inc(&root->fs_info->async_submit_draining);
3663         while(atomic_read(&root->fs_info->nr_async_submits)) {
3664                 wait_event(root->fs_info->async_submit_wait,
3665                    (atomic_read(&root->fs_info->nr_async_submits) == 0));
3666         }
3667         atomic_dec(&root->fs_info->async_submit_draining);
3668         return 0;
3669 }
3670
3671 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3672                          const char *symname)
3673 {
3674         struct btrfs_trans_handle *trans;
3675         struct btrfs_root *root = BTRFS_I(dir)->root;
3676         struct btrfs_path *path;
3677         struct btrfs_key key;
3678         struct inode *inode = NULL;
3679         int err;
3680         int drop_inode = 0;
3681         u64 objectid;
3682         u64 index = 0 ;
3683         int name_len;
3684         int datasize;
3685         unsigned long ptr;
3686         struct btrfs_file_extent_item *ei;
3687         struct extent_buffer *leaf;
3688         unsigned long nr = 0;
3689
3690         name_len = strlen(symname) + 1;
3691         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3692                 return -ENAMETOOLONG;
3693
3694         err = btrfs_check_free_space(root, 1, 0);
3695         if (err)
3696                 goto out_fail;
3697
3698         trans = btrfs_start_transaction(root, 1);
3699         btrfs_set_trans_block_group(trans, dir);
3700
3701         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3702         if (err) {
3703                 err = -ENOSPC;
3704                 goto out_unlock;
3705         }
3706
3707         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
3708                                 dentry->d_name.len,
3709                                 dentry->d_parent->d_inode->i_ino, objectid,
3710                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
3711                                 &index);
3712         err = PTR_ERR(inode);
3713         if (IS_ERR(inode))
3714                 goto out_unlock;
3715
3716         err = btrfs_init_acl(inode, dir);
3717         if (err) {
3718                 drop_inode = 1;
3719                 goto out_unlock;
3720         }
3721
3722         btrfs_set_trans_block_group(trans, inode);
3723         err = btrfs_add_nondir(trans, dentry, inode, 0, index);
3724         if (err)
3725                 drop_inode = 1;
3726         else {
3727                 inode->i_mapping->a_ops = &btrfs_aops;
3728                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3729                 inode->i_fop = &btrfs_file_operations;
3730                 inode->i_op = &btrfs_file_inode_operations;
3731                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3732         }
3733         dir->i_sb->s_dirt = 1;
3734         btrfs_update_inode_block_group(trans, inode);
3735         btrfs_update_inode_block_group(trans, dir);
3736         if (drop_inode)
3737                 goto out_unlock;
3738
3739         path = btrfs_alloc_path();
3740         BUG_ON(!path);
3741         key.objectid = inode->i_ino;
3742         key.offset = 0;
3743         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3744         datasize = btrfs_file_extent_calc_inline_size(name_len);
3745         err = btrfs_insert_empty_item(trans, root, path, &key,
3746                                       datasize);
3747         if (err) {
3748                 drop_inode = 1;
3749                 goto out_unlock;
3750         }
3751         leaf = path->nodes[0];
3752         ei = btrfs_item_ptr(leaf, path->slots[0],
3753                             struct btrfs_file_extent_item);
3754         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3755         btrfs_set_file_extent_type(leaf, ei,
3756                                    BTRFS_FILE_EXTENT_INLINE);
3757         ptr = btrfs_file_extent_inline_start(ei);
3758         write_extent_buffer(leaf, symname, ptr, name_len);
3759         btrfs_mark_buffer_dirty(leaf);
3760         btrfs_free_path(path);
3761
3762         inode->i_op = &btrfs_symlink_inode_operations;
3763         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3764         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3765         btrfs_i_size_write(inode, name_len - 1);
3766         err = btrfs_update_inode(trans, root, inode);
3767         if (err)
3768                 drop_inode = 1;
3769
3770 out_unlock:
3771         nr = trans->blocks_used;
3772         btrfs_end_transaction_throttle(trans, root);
3773 out_fail:
3774         if (drop_inode) {
3775                 inode_dec_link_count(inode);
3776                 iput(inode);
3777         }
3778         btrfs_btree_balance_dirty(root, nr);
3779         return err;
3780 }
3781
3782 static int btrfs_set_page_dirty(struct page *page)
3783 {
3784         return __set_page_dirty_nobuffers(page);
3785 }
3786
3787 static int btrfs_permission(struct inode *inode, int mask)
3788 {
3789         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3790                 return -EACCES;
3791         return generic_permission(inode, mask, btrfs_check_acl);
3792 }
3793
3794 static struct inode_operations btrfs_dir_inode_operations = {
3795         .lookup         = btrfs_lookup,
3796         .create         = btrfs_create,
3797         .unlink         = btrfs_unlink,
3798         .link           = btrfs_link,
3799         .mkdir          = btrfs_mkdir,
3800         .rmdir          = btrfs_rmdir,
3801         .rename         = btrfs_rename,
3802         .symlink        = btrfs_symlink,
3803         .setattr        = btrfs_setattr,
3804         .mknod          = btrfs_mknod,
3805         .setxattr       = btrfs_setxattr,
3806         .getxattr       = btrfs_getxattr,
3807         .listxattr      = btrfs_listxattr,
3808         .removexattr    = btrfs_removexattr,
3809         .permission     = btrfs_permission,
3810 };
3811 static struct inode_operations btrfs_dir_ro_inode_operations = {
3812         .lookup         = btrfs_lookup,
3813         .permission     = btrfs_permission,
3814 };
3815 static struct file_operations btrfs_dir_file_operations = {
3816         .llseek         = generic_file_llseek,
3817         .read           = generic_read_dir,
3818         .readdir        = btrfs_real_readdir,
3819         .unlocked_ioctl = btrfs_ioctl,
3820 #ifdef CONFIG_COMPAT
3821         .compat_ioctl   = btrfs_ioctl,
3822 #endif
3823         .release        = btrfs_release_file,
3824         .fsync          = btrfs_sync_file,
3825 };
3826
3827 static struct extent_io_ops btrfs_extent_io_ops = {
3828         .fill_delalloc = run_delalloc_range,
3829         .submit_bio_hook = btrfs_submit_bio_hook,
3830         .merge_bio_hook = btrfs_merge_bio_hook,
3831         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3832         .writepage_end_io_hook = btrfs_writepage_end_io_hook,
3833         .writepage_start_hook = btrfs_writepage_start_hook,
3834         .readpage_io_failed_hook = btrfs_io_failed_hook,
3835         .set_bit_hook = btrfs_set_bit_hook,
3836         .clear_bit_hook = btrfs_clear_bit_hook,
3837 };
3838
3839 static struct address_space_operations btrfs_aops = {
3840         .readpage       = btrfs_readpage,
3841         .writepage      = btrfs_writepage,
3842         .writepages     = btrfs_writepages,
3843         .readpages      = btrfs_readpages,
3844         .sync_page      = block_sync_page,
3845         .bmap           = btrfs_bmap,
3846         .direct_IO      = btrfs_direct_IO,
3847         .invalidatepage = btrfs_invalidatepage,
3848         .releasepage    = btrfs_releasepage,
3849         .set_page_dirty = btrfs_set_page_dirty,
3850 };
3851
3852 static struct address_space_operations btrfs_symlink_aops = {
3853         .readpage       = btrfs_readpage,
3854         .writepage      = btrfs_writepage,
3855         .invalidatepage = btrfs_invalidatepage,
3856         .releasepage    = btrfs_releasepage,
3857 };
3858
3859 static struct inode_operations btrfs_file_inode_operations = {
3860         .truncate       = btrfs_truncate,
3861         .getattr        = btrfs_getattr,
3862         .setattr        = btrfs_setattr,
3863         .setxattr       = btrfs_setxattr,
3864         .getxattr       = btrfs_getxattr,
3865         .listxattr      = btrfs_listxattr,
3866         .removexattr    = btrfs_removexattr,
3867         .permission     = btrfs_permission,
3868 };
3869 static struct inode_operations btrfs_special_inode_operations = {
3870         .getattr        = btrfs_getattr,
3871         .setattr        = btrfs_setattr,
3872         .permission     = btrfs_permission,
3873         .setxattr       = btrfs_setxattr,
3874         .getxattr       = btrfs_getxattr,
3875         .listxattr      = btrfs_listxattr,
3876         .removexattr    = btrfs_removexattr,
3877 };
3878 static struct inode_operations btrfs_symlink_inode_operations = {
3879         .readlink       = generic_readlink,
3880         .follow_link    = page_follow_link_light,
3881         .put_link       = page_put_link,
3882         .permission     = btrfs_permission,
3883 };