Btrfs: Split the extent_map code into two parts
authorChris Mason <chris.mason@oracle.com>
Thu, 24 Jan 2008 21:13:08 +0000 (16:13 -0500)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:03:59 +0000 (11:03 -0400)
There is now extent_map for mapping offsets in the file to disk and
extent_io for state tracking, IO submission and extent_bufers.

The new extent_map code shifts from [start,end] pairs to [start,len], and
pushes the locking out into the caller.  This allows a few performance
optimizations and is easier to use.

A number of extent_map usage bugs were fixed, mostly with failing
to remove extent_map entries when changing the file.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
14 files changed:
fs/btrfs/Makefile
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c [new file with mode: 0644]
fs/btrfs/extent_io.h [new file with mode: 0644]
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index 72cc3e16741f2435bdd64dad669aa3d0bcd9f9dd..9c61609f371495d612fd0d94f36027512023cc84 100644 (file)
@@ -5,7 +5,8 @@ obj-m  := btrfs.o
 btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           hash.o file-item.o inode-item.o inode-map.o disk-io.o \
           transaction.o bit-radix.o inode.o file.o tree-defrag.o \
-          extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o
+          extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
+          extent_io.o
 
 ifeq ($(CONFIG_FS_POSIX_ACL),y)
 btrfs-y += acl.o
index 9fc99883c70a86f2802715c54c548c4ef56cb50f..9a0647dc5a0b0ace76f89e68b1728b7a4ccc8d62 100644 (file)
@@ -20,6 +20,7 @@
 #define __BTRFS_I__
 
 #include "extent_map.h"
+#include "extent_io.h"
 
 /* in memory btrfs inode */
 struct btrfs_inode {
@@ -27,6 +28,7 @@ struct btrfs_inode {
        struct btrfs_block_group_cache *block_group;
        struct btrfs_key location;
        struct extent_map_tree extent_tree;
+       struct extent_io_tree io_tree;
        struct inode vfs_inode;
 
        u64 ordered_trans;
index b9f2975b55c9b854607e86bd6426b58dca29c72a..6c65473e0fe3266f563a910eb6b0187bf247ead7 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/completion.h>
 #include <asm/kmap_types.h>
 #include "bit-radix.h"
+#include "extent_io.h"
 #include "extent_map.h"
 
 struct btrfs_trans_handle;
@@ -314,11 +315,11 @@ struct btrfs_fs_info {
        struct btrfs_root *tree_root;
        struct radix_tree_root fs_roots_radix;
 
-       struct extent_map_tree free_space_cache;
-       struct extent_map_tree block_group_cache;
-       struct extent_map_tree pinned_extents;
-       struct extent_map_tree pending_del;
-       struct extent_map_tree extent_ins;
+       struct extent_io_tree free_space_cache;
+       struct extent_io_tree block_group_cache;
+       struct extent_io_tree pinned_extents;
+       struct extent_io_tree pending_del;
+       struct extent_io_tree extent_ins;
 
        u64 generation;
        u64 last_trans_committed;
@@ -956,7 +957,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
                                  u64 first_extent);
 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root);
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy);
+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
                                                         btrfs_fs_info *info,
                                                         u64 bytenr);
@@ -1001,7 +1002,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
                      u64 owner_objectid, u64 owner_offset, int pin);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
-                              struct extent_map_tree *unpin);
+                              struct extent_io_tree *unpin);
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes,
index 5d1f9bca271232d95e36b2e81b4e57f8cdc6d934..4c4ebea0b2a94974799b3c9193ff1b922718c3be 100644 (file)
@@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
 }
 #endif
 
-static struct extent_map_ops btree_extent_map_ops;
+static struct extent_io_ops btree_extent_io_ops;
 
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize)
 {
        struct inode *btree_inode = root->fs_info->btree_inode;
        struct extent_buffer *eb;
-       eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
+       eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
                                bytenr, blocksize, GFP_NOFS);
        return eb;
 }
@@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
        struct inode *btree_inode = root->fs_info->btree_inode;
        struct extent_buffer *eb;
 
-       eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
+       eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
                                 bytenr, blocksize, NULL, GFP_NOFS);
        return eb;
 }
 
 struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
-                                   size_t page_offset, u64 start, u64 end,
+                                   size_t page_offset, u64 start, u64 len,
                                    int create)
 {
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
@@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
        int ret;
 
 again:
-       em = lookup_extent_mapping(em_tree, start, end);
+       spin_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, start, len);
+       spin_unlock(&em_tree->lock);
        if (em) {
                goto out;
        }
@@ -85,11 +87,14 @@ again:
                goto out;
        }
        em->start = 0;
-       em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1;
+       em->len = i_size_read(inode);
        em->block_start = 0;
-       em->block_end = em->end;
        em->bdev = inode->i_sb->s_bdev;
+
+       spin_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
+       spin_unlock(&em_tree->lock);
+
        if (ret == -EEXIST) {
                free_extent_map(em);
                em = NULL;
@@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
 
 int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 {
-       struct extent_map_tree *tree;
+       struct extent_io_tree *tree;
        u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 found_start;
        int found_level;
        unsigned long len;
        struct extent_buffer *eb;
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
 
        if (page->private == EXTENT_PAGE_PRIVATE)
                goto out;
@@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
 
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
-       struct extent_map_tree *tree;
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
+       struct extent_io_tree *tree;
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
        return extent_write_full_page(tree, page, btree_get_extent, wbc);
 }
 
 static int btree_writepages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
-       struct extent_map_tree *tree;
-       tree = &BTRFS_I(mapping->host)->extent_tree;
+       struct extent_io_tree *tree;
+       tree = &BTRFS_I(mapping->host)->io_tree;
        if (wbc->sync_mode == WB_SYNC_NONE) {
                u64 num_dirty;
                u64 start = 0;
@@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping,
 
 int btree_readpage(struct file *file, struct page *page)
 {
-       struct extent_map_tree *tree;
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
+       struct extent_io_tree *tree;
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
        return extent_read_full_page(tree, page, btree_get_extent);
 }
 
 static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
 {
-       struct extent_map_tree *tree;
+       struct extent_io_tree *tree;
+       struct extent_map_tree *map;
        int ret;
 
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
-       ret = try_release_extent_mapping(tree, page);
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
+       map = &BTRFS_I(page->mapping->host)->extent_tree;
+       ret = try_release_extent_mapping(map, tree, page);
        if (ret == 1) {
                ClearPagePrivate(page);
                set_page_private(page, 0);
@@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
 
 static void btree_invalidatepage(struct page *page, unsigned long offset)
 {
-       struct extent_map_tree *tree;
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
+       struct extent_io_tree *tree;
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
        extent_invalidatepage(tree, page, offset);
        btree_releasepage(page, GFP_NOFS);
 }
@@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
        buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!buf)
                return 0;
-       read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
+       read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
                                 buf, 0, 0);
        free_extent_buffer(buf);
        return ret;
@@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 {
        struct extent_buffer *buf = NULL;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_map_tree *extent_tree;
+       struct extent_io_tree *io_tree;
        u64 end;
        int ret;
 
-       extent_tree = &BTRFS_I(btree_inode)->extent_tree;
+       io_tree = &BTRFS_I(btree_inode)->io_tree;
 
        buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!buf)
                return NULL;
-       read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
-                                buf, 0, 1);
+       read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1);
 
        if (buf->flags & EXTENT_CSUM)
                return buf;
 
        end = buf->start + PAGE_CACHE_SIZE - 1;
-       if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
+       if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) {
                buf->flags |= EXTENT_CSUM;
                return buf;
        }
 
-       lock_extent(extent_tree, buf->start, end, GFP_NOFS);
+       lock_extent(io_tree, buf->start, end, GFP_NOFS);
 
-       if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
+       if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) {
                buf->flags |= EXTENT_CSUM;
                goto out_unlock;
        }
 
        ret = csum_tree_block(root, buf, 1);
-       set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
+       set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
        buf->flags |= EXTENT_CSUM;
 
 out_unlock:
-       unlock_extent(extent_tree, buf->start, end, GFP_NOFS);
+       unlock_extent(io_tree, buf->start, end, GFP_NOFS);
        return buf;
 }
 
@@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        struct inode *btree_inode = root->fs_info->btree_inode;
        if (btrfs_header_generation(buf) ==
            root->fs_info->running_transaction->transid)
-               clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree,
+               clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
                                          buf);
        return 0;
 }
@@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root,
                                 struct extent_buffer *buf)
 {
        struct inode *btree_inode = root->fs_info->btree_inode;
-       wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree,
+       wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree,
                                        buf);
        return 0;
 }
@@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb)
        fs_info->btree_inode->i_nlink = 1;
        fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
        fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
-       extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
+       extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
                             fs_info->btree_inode->i_mapping,
                             GFP_NOFS);
-       BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops;
+       extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
+                            GFP_NOFS);
+
+       BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
 
-       extent_map_tree_init(&fs_info->free_space_cache,
+       extent_io_tree_init(&fs_info->free_space_cache,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
-       extent_map_tree_init(&fs_info->block_group_cache,
+       extent_io_tree_init(&fs_info->block_group_cache,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
-       extent_map_tree_init(&fs_info->pinned_extents,
+       extent_io_tree_init(&fs_info->pinned_extents,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
-       extent_map_tree_init(&fs_info->pending_del,
+       extent_io_tree_init(&fs_info->pending_del,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
-       extent_map_tree_init(&fs_info->extent_ins,
+       extent_io_tree_init(&fs_info->extent_ins,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
        fs_info->do_barriers = 1;
        fs_info->closing = 0;
@@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
 
        if (!btrfs_test_opt(root, NOBARRIER))
                blkdev_issue_flush(sb->s_bdev, NULL);
-       set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super);
+       set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super);
        ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
                                     super->start, super->len);
        if (!btrfs_test_opt(root, NOBARRIER))
@@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root)
 
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 
-       extent_map_tree_empty_lru(&fs_info->free_space_cache);
-       extent_map_tree_empty_lru(&fs_info->block_group_cache);
-       extent_map_tree_empty_lru(&fs_info->pinned_extents);
-       extent_map_tree_empty_lru(&fs_info->pending_del);
-       extent_map_tree_empty_lru(&fs_info->extent_ins);
-       extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree);
+       extent_io_tree_empty_lru(&fs_info->free_space_cache);
+       extent_io_tree_empty_lru(&fs_info->block_group_cache);
+       extent_io_tree_empty_lru(&fs_info->pinned_extents);
+       extent_io_tree_empty_lru(&fs_info->pending_del);
+       extent_io_tree_empty_lru(&fs_info->extent_ins);
+       extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
 
        truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
 
@@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root)
 int btrfs_buffer_uptodate(struct extent_buffer *buf)
 {
        struct inode *btree_inode = buf->first_page->mapping->host;
-       return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
+       return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);
 }
 
 int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
 {
        struct inode *btree_inode = buf->first_page->mapping->host;
-       return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
+       return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
                                          buf);
 }
 
@@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
                        transid, root->fs_info->generation);
                WARN_ON(1);
        }
-       set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
+       set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
 }
 
 void btrfs_throttle(struct btrfs_root *root)
@@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
+       set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
                        buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
 }
 
@@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
+       set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
                        buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
                        GFP_NOFS);
 }
@@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
+       return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
                     buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
 }
 
@@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
+       return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
                     buf->start, buf->start + buf->len - 1,
                     EXTENT_DEFRAG_DONE, 0);
 }
@@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
+       return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
                     buf->start, buf->start + buf->len - 1,
                     EXTENT_DEFRAG_DONE, GFP_NOFS);
 }
@@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
+       return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
                     buf->start, buf->start + buf->len - 1,
                     EXTENT_DEFRAG, GFP_NOFS);
 }
@@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf)
 {
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
+       return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
                                        buf, 0, 1);
 }
 
-static struct extent_map_ops btree_extent_map_ops = {
+static struct extent_io_ops btree_extent_io_ops = {
        .writepage_io_hook = btree_writepage_io_hook,
 };
index b69a46691a96ccea19017017edfc3da63a9e62a3..1cf125ab7822c8d3169d97d04c080c860dc52636 100644 (file)
@@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root,
        int ret;
        struct btrfs_key key;
        struct extent_buffer *leaf;
-       struct extent_map_tree *free_space_cache;
+       struct extent_io_tree *free_space_cache;
        int slot;
        u64 last = 0;
        u64 hole_size;
@@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
                                                         btrfs_fs_info *info,
                                                         u64 bytenr)
 {
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_block_group_cache *block_group = NULL;
        u64 ptr;
        u64 start;
@@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                                                 int data, int owner)
 {
        struct btrfs_block_group_cache *cache;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_block_group_cache *found_group = NULL;
        struct btrfs_fs_info *info = root->fs_info;
        u64 used;
@@ -951,7 +951,7 @@ fail:
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_block_group_cache *cache;
        int ret;
        int err = 0;
@@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root,
        return 0;
 }
 
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
 {
        u64 last = 0;
        u64 start;
        u64 end;
-       struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
+       struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
        int ret;
 
        while(1) {
@@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
 
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
-                              struct extent_map_tree *unpin)
+                              struct extent_io_tree *unpin)
 {
        u64 start;
        u64 end;
        int ret;
-       struct extent_map_tree *free_space_cache;
+       struct extent_io_tree *free_space_cache;
        free_space_cache = &root->fs_info->free_space_cache;
 
        while(1) {
@@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
        int err = 0;
        u64 start;
        u64 end;
-       struct extent_map_tree *pending_del;
-       struct extent_map_tree *pinned_extents;
+       struct extent_io_tree *pending_del;
+       struct extent_io_tree *pinned_extents;
 
        pending_del = &extent_root->fs_info->pending_del;
        pinned_extents = &extent_root->fs_info->pinned_extents;
@@ -1802,7 +1802,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 
        set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
                         buf->start + buf->len - 1, GFP_NOFS);
-       set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
+       set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
                        buf->start, buf->start + buf->len - 1,
                        EXTENT_CSUM, GFP_NOFS);
        buf->flags |= EXTENT_CSUM;
@@ -2166,7 +2166,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
        unsigned long i;
        struct page *page;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct file_ra_state *ra;
 
        ra = kzalloc(sizeof(*ra), GFP_NOFS);
@@ -2195,15 +2195,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
                page_start = (u64)page->index << PAGE_CACHE_SHIFT;
                page_end = page_start + PAGE_CACHE_SIZE - 1;
 
-               lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+               lock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
                delalloc_start = page_start;
-               existing_delalloc =
-                       count_range_bits(&BTRFS_I(inode)->extent_tree,
-                                        &delalloc_start, page_end,
-                                        PAGE_CACHE_SIZE, EXTENT_DELALLOC);
+               existing_delalloc = count_range_bits(io_tree,
+                                            &delalloc_start, page_end,
+                                            PAGE_CACHE_SIZE, EXTENT_DELALLOC);
 
-               set_extent_delalloc(em_tree, page_start,
+               set_extent_delalloc(io_tree, page_start,
                                    page_end, GFP_NOFS);
 
                spin_lock(&root->fs_info->delalloc_lock);
@@ -2211,7 +2210,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
                                                 existing_delalloc;
                spin_unlock(&root->fs_info->delalloc_lock);
 
-               unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
+               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                set_page_dirty(page);
                unlock_page(page);
                page_cache_release(page);
@@ -2379,7 +2378,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
        u64 cur_byte;
        u64 total_found;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
@@ -2561,7 +2560,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *cache;
        struct btrfs_block_group_item *item;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct extent_buffer *leaf;
        int ret;
@@ -2645,7 +2644,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        int bit;
        struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_map_tree *block_group_cache;
+       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
new file mode 100644 (file)
index 0000000..15cc158
--- /dev/null
@@ -0,0 +1,3089 @@
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/bio.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h>
+#include <linux/version.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include "extent_io.h"
+#include "extent_map.h"
+
+/* temporary define until extent_map moves out of btrfs */
+struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
+                                      unsigned long extra_flags,
+                                      void (*ctor)(void *, struct kmem_cache *,
+                                                   unsigned long));
+
+static struct kmem_cache *extent_state_cache;
+static struct kmem_cache *extent_buffer_cache;
+
+static LIST_HEAD(buffers);
+static LIST_HEAD(states);
+
+static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
+#define BUFFER_LRU_MAX 64
+
+struct tree_entry {
+       u64 start;
+       u64 end;
+       int in_tree;
+       struct rb_node rb_node;
+};
+
+struct extent_page_data {
+       struct bio *bio;
+       struct extent_io_tree *tree;
+       get_extent_t *get_extent;
+};
+
+int __init extent_io_init(void)
+{
+       extent_state_cache = btrfs_cache_create("extent_state",
+                                           sizeof(struct extent_state), 0,
+                                           NULL);
+       if (!extent_state_cache)
+               return -ENOMEM;
+
+       extent_buffer_cache = btrfs_cache_create("extent_buffers",
+                                           sizeof(struct extent_buffer), 0,
+                                           NULL);
+       if (!extent_buffer_cache)
+               goto free_state_cache;
+       return 0;
+
+free_state_cache:
+       kmem_cache_destroy(extent_state_cache);
+       return -ENOMEM;
+}
+
+void extent_io_exit(void)
+{
+       struct extent_state *state;
+
+       while (!list_empty(&states)) {
+               state = list_entry(states.next, struct extent_state, list);
+               printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
+               list_del(&state->list);
+               kmem_cache_free(extent_state_cache, state);
+
+       }
+
+       if (extent_state_cache)
+               kmem_cache_destroy(extent_state_cache);
+       if (extent_buffer_cache)
+               kmem_cache_destroy(extent_buffer_cache);
+}
+
+void extent_io_tree_init(struct extent_io_tree *tree,
+                         struct address_space *mapping, gfp_t mask)
+{
+       tree->state.rb_node = NULL;
+       tree->ops = NULL;
+       tree->dirty_bytes = 0;
+       rwlock_init(&tree->lock);
+       spin_lock_init(&tree->lru_lock);
+       tree->mapping = mapping;
+       INIT_LIST_HEAD(&tree->buffer_lru);
+       tree->lru_size = 0;
+}
+EXPORT_SYMBOL(extent_io_tree_init);
+
+void extent_io_tree_empty_lru(struct extent_io_tree *tree)
+{
+       struct extent_buffer *eb;
+       while(!list_empty(&tree->buffer_lru)) {
+               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
+                               lru);
+               list_del_init(&eb->lru);
+               free_extent_buffer(eb);
+       }
+}
+EXPORT_SYMBOL(extent_io_tree_empty_lru);
+
+struct extent_state *alloc_extent_state(gfp_t mask)
+{
+       struct extent_state *state;
+       unsigned long flags;
+
+       state = kmem_cache_alloc(extent_state_cache, mask);
+       if (!state || IS_ERR(state))
+               return state;
+       state->state = 0;
+       state->in_tree = 0;
+       state->private = 0;
+
+       spin_lock_irqsave(&state_lock, flags);
+       list_add(&state->list, &states);
+       spin_unlock_irqrestore(&state_lock, flags);
+
+       atomic_set(&state->refs, 1);
+       init_waitqueue_head(&state->wq);
+       return state;
+}
+EXPORT_SYMBOL(alloc_extent_state);
+
+void free_extent_state(struct extent_state *state)
+{
+       unsigned long flags;
+       if (!state)
+               return;
+       if (atomic_dec_and_test(&state->refs)) {
+               WARN_ON(state->in_tree);
+               spin_lock_irqsave(&state_lock, flags);
+               list_del(&state->list);
+               spin_unlock_irqrestore(&state_lock, flags);
+               kmem_cache_free(extent_state_cache, state);
+       }
+}
+EXPORT_SYMBOL(free_extent_state);
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
+                                  struct rb_node *node)
+{
+       struct rb_node ** p = &root->rb_node;
+       struct rb_node * parent = NULL;
+       struct tree_entry *entry;
+
+       while(*p) {
+               parent = *p;
+               entry = rb_entry(parent, struct tree_entry, rb_node);
+
+               if (offset < entry->start)
+                       p = &(*p)->rb_left;
+               else if (offset > entry->end)
+                       p = &(*p)->rb_right;
+               else
+                       return parent;
+       }
+
+       entry = rb_entry(node, struct tree_entry, rb_node);
+       entry->in_tree = 1;
+       rb_link_node(node, parent, p);
+       rb_insert_color(node, root);
+       return NULL;
+}
+
+static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
+                                    struct rb_node **prev_ret,
+                                    struct rb_node **next_ret)
+{
+       struct rb_node * n = root->rb_node;
+       struct rb_node *prev = NULL;
+       struct rb_node *orig_prev = NULL;
+       struct tree_entry *entry;
+       struct tree_entry *prev_entry = NULL;
+
+       while(n) {
+               entry = rb_entry(n, struct tree_entry, rb_node);
+               prev = n;
+               prev_entry = entry;
+
+               if (offset < entry->start)
+                       n = n->rb_left;
+               else if (offset > entry->end)
+                       n = n->rb_right;
+               else
+                       return n;
+       }
+
+       if (prev_ret) {
+               orig_prev = prev;
+               while(prev && offset > prev_entry->end) {
+                       prev = rb_next(prev);
+                       prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+               }
+               *prev_ret = prev;
+               prev = orig_prev;
+       }
+
+       if (next_ret) {
+               prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+               while(prev && offset < prev_entry->start) {
+                       prev = rb_prev(prev);
+                       prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+               }
+               *next_ret = prev;
+       }
+       return NULL;
+}
+
+static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
+{
+       struct rb_node *prev;
+       struct rb_node *ret;
+       ret = __tree_search(root, offset, &prev, NULL);
+       if (!ret)
+               return prev;
+       return ret;
+}
+
+/*
+ * utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree.  Extents with EXTENT_IO in their state field
+ * are not merged because the end_io handlers need to be able to do
+ * operations on them without sleeping (or doing allocations/splits).
+ *
+ * This should be called with the tree lock held.
+ */
+static int merge_state(struct extent_io_tree *tree,
+                      struct extent_state *state)
+{
+       struct extent_state *other;
+       struct rb_node *other_node;
+
+       if (state->state & EXTENT_IOBITS)
+               return 0;
+
+       other_node = rb_prev(&state->rb_node);
+       if (other_node) {
+               other = rb_entry(other_node, struct extent_state, rb_node);
+               if (other->end == state->start - 1 &&
+                   other->state == state->state) {
+                       state->start = other->start;
+                       other->in_tree = 0;
+                       rb_erase(&other->rb_node, &tree->state);
+                       free_extent_state(other);
+               }
+       }
+       other_node = rb_next(&state->rb_node);
+       if (other_node) {
+               other = rb_entry(other_node, struct extent_state, rb_node);
+               if (other->start == state->end + 1 &&
+                   other->state == state->state) {
+                       other->start = state->start;
+                       state->in_tree = 0;
+                       rb_erase(&state->rb_node, &tree->state);
+                       free_extent_state(state);
+               }
+       }
+       return 0;
+}
+
+/*
+ * insert an extent_state struct into the tree.  'bits' are set on the
+ * struct before it is inserted.
+ *
+ * This may return -EEXIST if the extent is already there, in which case the
+ * state struct is freed.
+ *
+ * The tree lock is not taken internally.  This is a utility function and
+ * probably isn't what you want to call (see set/clear_extent_bit).
+ */
+static int insert_state(struct extent_io_tree *tree,
+                       struct extent_state *state, u64 start, u64 end,
+                       int bits)
+{
+       struct rb_node *node;
+
+       if (end < start) {
+               printk("end < start %Lu %Lu\n", end, start);
+               WARN_ON(1);
+       }
+       if (bits & EXTENT_DIRTY)
+               tree->dirty_bytes += end - start + 1;
+       state->state |= bits;
+       state->start = start;
+       state->end = end;
+       node = tree_insert(&tree->state, end, &state->rb_node);
+       if (node) {
+               struct extent_state *found;
+               found = rb_entry(node, struct extent_state, rb_node);
+               printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
+               free_extent_state(state);
+               return -EEXIST;
+       }
+       merge_state(tree, state);
+       return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half.  'split' indicates an
+ * offset inside 'orig' where it should be split.
+ *
+ * Before calling,
+ * the tree has 'orig' at [orig->start, orig->end].  After calling, there
+ * are two extent state structs in the tree:
+ * prealloc: [orig->start, split - 1]
+ * orig: [ split, orig->end ]
+ *
+ * The tree locks are not taken by this function. They need to be held
+ * by the caller.
+ */
+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
+                      struct extent_state *prealloc, u64 split)
+{
+       struct rb_node *node;
+       prealloc->start = orig->start;
+       prealloc->end = split - 1;
+       prealloc->state = orig->state;
+       orig->start = split;
+
+       node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
+       if (node) {
+               struct extent_state *found;
+               found = rb_entry(node, struct extent_state, rb_node);
+               printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
+               free_extent_state(prealloc);
+               return -EEXIST;
+       }
+       return 0;
+}
+
+/*
+ * utility function to clear some bits in an extent state struct.
+ * it will optionally wake up any one waiting on this state (wake == 1), or
+ * forcibly remove the state from the tree (delete == 1).
+ *
+ * If no bits are set on the state struct after clearing things, the
+ * struct is freed and removed from the tree
+ */
+static int clear_state_bit(struct extent_io_tree *tree,
+                           struct extent_state *state, int bits, int wake,
+                           int delete)
+{
+       int ret = state->state & bits;
+
+       if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
+               u64 range = state->end - state->start + 1;
+               WARN_ON(range > tree->dirty_bytes);
+               tree->dirty_bytes -= range;
+       }
+       state->state &= ~bits;
+       if (wake)
+               wake_up(&state->wq);
+       if (delete || state->state == 0) {
+               if (state->in_tree) {
+                       rb_erase(&state->rb_node, &tree->state);
+                       state->in_tree = 0;
+                       free_extent_state(state);
+               } else {
+                       WARN_ON(1);
+               }
+       } else {
+               merge_state(tree, state);
+       }
+       return ret;
+}
+
+/*
+ * clear some bits on a range in the tree.  This may require splitting
+ * or inserting elements in the tree, so the gfp mask is used to
+ * indicate which allocations or sleeping are allowed.
+ *
+ * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
+ * the given range from the tree regardless of state (ie for truncate).
+ *
+ * the range [start, end] is inclusive.
+ *
+ * This takes the tree lock, and returns < 0 on error, > 0 if any of the
+ * bits were already set, or zero if none of the bits were already set.
+ */
+int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                    int bits, int wake, int delete, gfp_t mask)
+{
+       struct extent_state *state;
+       struct extent_state *prealloc = NULL;
+       struct rb_node *node;
+       unsigned long flags;
+       int err;
+       int set = 0;
+
+again:
+       if (!prealloc && (mask & __GFP_WAIT)) {
+               prealloc = alloc_extent_state(mask);
+               if (!prealloc)
+                       return -ENOMEM;
+       }
+
+       write_lock_irqsave(&tree->lock, flags);
+       /*
+        * this search will find the extents that end after
+        * our range starts
+        */
+       node = tree_search(&tree->state, start);
+       if (!node)
+               goto out;
+       state = rb_entry(node, struct extent_state, rb_node);
+       if (state->start > end)
+               goto out;
+       WARN_ON(state->end < start);
+
+       /*
+        *     | ---- desired range ---- |
+        *  | state | or
+        *  | ------------- state -------------- |
+        *
+        * We need to split the extent we found, and may flip
+        * bits on second half.
+        *
+        * If the extent we found extends past our range, we
+        * just split and search again.  It'll get split again
+        * the next time though.
+        *
+        * If the extent we found is inside our range, we clear
+        * the desired bit on it.
+        */
+
+       if (state->start < start) {
+               err = split_state(tree, state, prealloc, start);
+               BUG_ON(err == -EEXIST);
+               prealloc = NULL;
+               if (err)
+                       goto out;
+               if (state->end <= end) {
+                       start = state->end + 1;
+                       set |= clear_state_bit(tree, state, bits,
+                                       wake, delete);
+               } else {
+                       start = state->start;
+               }
+               goto search_again;
+       }
+       /*
+        * | ---- desired range ---- |
+        *                        | state |
+        * We need to split the extent, and clear the bit
+        * on the first half
+        */
+       if (state->start <= end && state->end > end) {
+               err = split_state(tree, state, prealloc, end + 1);
+               BUG_ON(err == -EEXIST);
+
+               if (wake)
+                       wake_up(&state->wq);
+               set |= clear_state_bit(tree, prealloc, bits,
+                                      wake, delete);
+               prealloc = NULL;
+               goto out;
+       }
+
+       start = state->end + 1;
+       set |= clear_state_bit(tree, state, bits, wake, delete);
+       goto search_again;
+
+out:
+       write_unlock_irqrestore(&tree->lock, flags);
+       if (prealloc)
+               free_extent_state(prealloc);
+
+       return set;
+
+search_again:
+       if (start > end)
+               goto out;
+       write_unlock_irqrestore(&tree->lock, flags);
+       if (mask & __GFP_WAIT)
+               cond_resched();
+       goto again;
+}
+EXPORT_SYMBOL(clear_extent_bit);
+
+static int wait_on_state(struct extent_io_tree *tree,
+                        struct extent_state *state)
+{
+       DEFINE_WAIT(wait);
+       prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
+       read_unlock_irq(&tree->lock);
+       schedule();
+       read_lock_irq(&tree->lock);
+       finish_wait(&state->wq, &wait);
+       return 0;
+}
+
+/*
+ * waits for one or more bits to clear on a range in the state tree.
+ * The range [start, end] is inclusive.
+ * The tree lock is taken by this function
+ */
+int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+       struct extent_state *state;
+       struct rb_node *node;
+
+       read_lock_irq(&tree->lock);
+again:
+       while (1) {
+               /*
+                * this search will find all the extents that end after
+                * our range starts
+                */
+               node = tree_search(&tree->state, start);
+               if (!node)
+                       break;
+
+               state = rb_entry(node, struct extent_state, rb_node);
+
+               if (state->start > end)
+                       goto out;
+
+               if (state->state & bits) {
+                       start = state->start;
+                       atomic_inc(&state->refs);
+                       wait_on_state(tree, state);
+                       free_extent_state(state);
+                       goto again;
+               }
+               start = state->end + 1;
+
+               if (start > end)
+                       break;
+
+               if (need_resched()) {
+                       read_unlock_irq(&tree->lock);
+                       cond_resched();
+                       read_lock_irq(&tree->lock);
+               }
+       }
+out:
+       read_unlock_irq(&tree->lock);
+       return 0;
+}
+EXPORT_SYMBOL(wait_extent_bit);
+
+static void set_state_bits(struct extent_io_tree *tree,
+                          struct extent_state *state,
+                          int bits)
+{
+       if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
+               u64 range = state->end - state->start + 1;
+               tree->dirty_bytes += range;
+       }
+       state->state |= bits;
+}
+
+/*
+ * set some bits on a range in the tree.  This may require allocations
+ * or sleeping, so the gfp mask is used to indicate what is allowed.
+ *
+ * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
+ * range already has the desired bits set.  The start of the existing
+ * range is returned in failed_start in this case.
+ *
+ * [start, end] is inclusive
+ * This takes the tree lock.
+ */
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
+                  int exclusive, u64 *failed_start, gfp_t mask)
+{
+       struct extent_state *state;
+       struct extent_state *prealloc = NULL;
+       struct rb_node *node;
+       unsigned long flags;
+       int err = 0;
+       int set;
+       u64 last_start;
+       u64 last_end;
+again:
+       if (!prealloc && (mask & __GFP_WAIT)) {
+               prealloc = alloc_extent_state(mask);
+               if (!prealloc)
+                       return -ENOMEM;
+       }
+
+       write_lock_irqsave(&tree->lock, flags);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(&tree->state, start);
+       if (!node) {
+               err = insert_state(tree, prealloc, start, end, bits);
+               prealloc = NULL;
+               BUG_ON(err == -EEXIST);
+               goto out;
+       }
+
+       state = rb_entry(node, struct extent_state, rb_node);
+       last_start = state->start;
+       last_end = state->end;
+
+       /*
+        * | ---- desired range ---- |
+        * | state |
+        *
+        * Just lock what we found and keep going
+        */
+       if (state->start == start && state->end <= end) {
+               set = state->state & bits;
+               if (set && exclusive) {
+                       *failed_start = state->start;
+                       err = -EEXIST;
+                       goto out;
+               }
+               set_state_bits(tree, state, bits);
+               start = state->end + 1;
+               merge_state(tree, state);
+               goto search_again;
+       }
+
+       /*
+        *     | ---- desired range ---- |
+        * | state |
+        *   or
+        * | ------------- state -------------- |
+        *
+        * We need to split the extent we found, and may flip bits on
+        * second half.
+        *
+        * If the extent we found extends past our
+        * range, we just split and search again.  It'll get split
+        * again the next time though.
+        *
+        * If the extent we found is inside our range, we set the
+        * desired bit on it.
+        */
+       if (state->start < start) {
+               set = state->state & bits;
+               if (exclusive && set) {
+                       *failed_start = start;
+                       err = -EEXIST;
+                       goto out;
+               }
+               err = split_state(tree, state, prealloc, start);
+               BUG_ON(err == -EEXIST);
+               prealloc = NULL;
+               if (err)
+                       goto out;
+               if (state->end <= end) {
+                       set_state_bits(tree, state, bits);
+                       start = state->end + 1;
+                       merge_state(tree, state);
+               } else {
+                       start = state->start;
+               }
+               goto search_again;
+       }
+       /*
+        * | ---- desired range ---- |
+        *     | state | or               | state |
+        *
+        * There's a hole, we need to insert something in it and
+        * ignore the extent we found.
+        */
+       if (state->start > start) {
+               u64 this_end;
+               if (end < last_start)
+                       this_end = end;
+               else
+                       this_end = last_start -1;
+               err = insert_state(tree, prealloc, start, this_end,
+                                  bits);
+               prealloc = NULL;
+               BUG_ON(err == -EEXIST);
+               if (err)
+                       goto out;
+               start = this_end + 1;
+               goto search_again;
+       }
+       /*
+        * | ---- desired range ---- |
+        *                        | state |
+        * We need to split the extent, and set the bit
+        * on the first half
+        */
+       if (state->start <= end && state->end > end) {
+               set = state->state & bits;
+               if (exclusive && set) {
+                       *failed_start = start;
+                       err = -EEXIST;
+                       goto out;
+               }
+               err = split_state(tree, state, prealloc, end + 1);
+               BUG_ON(err == -EEXIST);
+
+               set_state_bits(tree, prealloc, bits);
+               merge_state(tree, prealloc);
+               prealloc = NULL;
+               goto out;
+       }
+
+       goto search_again;
+
+out:
+       write_unlock_irqrestore(&tree->lock, flags);
+       if (prealloc)
+               free_extent_state(prealloc);
+
+       return err;
+
+search_again:
+       if (start > end)
+               goto out;
+       write_unlock_irqrestore(&tree->lock, flags);
+       if (mask & __GFP_WAIT)
+               cond_resched();
+       goto again;
+}
+EXPORT_SYMBOL(set_extent_bit);
+
+/* wrappers around set/clear extent bit */
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+                    gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
+                             mask);
+}
+EXPORT_SYMBOL(set_extent_dirty);
+
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                   int bits, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, bits, 0, NULL,
+                             mask);
+}
+EXPORT_SYMBOL(set_extent_bits);
+
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                     int bits, gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_bits);
+
+int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
+                    gfp_t mask)
+{
+       return set_extent_bit(tree, start, end,
+                             EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
+                             mask);
+}
+EXPORT_SYMBOL(set_extent_delalloc);
+
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+                      gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end,
+                               EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_dirty);
+
+int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+                    gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
+                             mask);
+}
+EXPORT_SYMBOL(set_extent_new);
+
+int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+                      gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_new);
+
+int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+                       gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
+                             mask);
+}
+EXPORT_SYMBOL(set_extent_uptodate);
+
+int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+                         gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_uptodate);
+
+int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
+                        gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
+                             0, NULL, mask);
+}
+EXPORT_SYMBOL(set_extent_writeback);
+
+int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
+                          gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_writeback);
+
+int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
+}
+EXPORT_SYMBOL(wait_on_extent_writeback);
+
+/*
+ * locks a range in ascending order, waiting for any locked regions
+ * it hits on the way.  [start,end] are inclusive, and this will sleep.
+ */
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+       int err;
+       u64 failed_start;
+       while (1) {
+               err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
+                                    &failed_start, mask);
+               if (err == -EEXIST && (mask & __GFP_WAIT)) {
+                       wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
+                       start = failed_start;
+               } else {
+                       break;
+               }
+               WARN_ON(start > end);
+       }
+       return err;
+}
+EXPORT_SYMBOL(lock_extent);
+
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+                 gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+}
+EXPORT_SYMBOL(unlock_extent);
+
+/*
+ * helper function to set pages and extents in the tree dirty
+ */
+int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+       struct page *page;
+
+       while (index <= end_index) {
+               page = find_get_page(tree->mapping, index);
+               BUG_ON(!page);
+               __set_page_dirty_nobuffers(page);
+               page_cache_release(page);
+               index++;
+       }
+       set_extent_dirty(tree, start, end, GFP_NOFS);
+       return 0;
+}
+EXPORT_SYMBOL(set_range_dirty);
+
+/*
+ * helper function to set both pages and extents in the tree writeback
+ */
+int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+       struct page *page;
+
+       while (index <= end_index) {
+               page = find_get_page(tree->mapping, index);
+               BUG_ON(!page);
+               set_page_writeback(page);
+               page_cache_release(page);
+               index++;
+       }
+       set_extent_writeback(tree, start, end, GFP_NOFS);
+       return 0;
+}
+EXPORT_SYMBOL(set_range_writeback);
+
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+                         u64 *start_ret, u64 *end_ret, int bits)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+       int ret = 1;
+
+       read_lock_irq(&tree->lock);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(&tree->state, start);
+       if (!node || IS_ERR(node)) {
+               goto out;
+       }
+
+       while(1) {
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (state->end >= start && (state->state & bits)) {
+                       *start_ret = state->start;
+                       *end_ret = state->end;
+                       ret = 0;
+                       break;
+               }
+               node = rb_next(node);
+               if (!node)
+                       break;
+       }
+out:
+       read_unlock_irq(&tree->lock);
+       return ret;
+}
+EXPORT_SYMBOL(find_first_extent_bit);
+
+u64 find_lock_delalloc_range(struct extent_io_tree *tree,
+                            u64 *start, u64 *end, u64 max_bytes)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+       u64 cur_start = *start;
+       u64 found = 0;
+       u64 total_bytes = 0;
+
+       write_lock_irq(&tree->lock);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+search_again:
+       node = tree_search(&tree->state, cur_start);
+       if (!node || IS_ERR(node)) {
+               *end = (u64)-1;
+               goto out;
+       }
+
+       while(1) {
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (found && state->start != cur_start) {
+                       goto out;
+               }
+               if (!(state->state & EXTENT_DELALLOC)) {
+                       if (!found)
+                               *end = state->end;
+                       goto out;
+               }
+               if (!found) {
+                       struct extent_state *prev_state;
+                       struct rb_node *prev_node = node;
+                       while(1) {
+                               prev_node = rb_prev(prev_node);
+                               if (!prev_node)
+                                       break;
+                               prev_state = rb_entry(prev_node,
+                                                     struct extent_state,
+                                                     rb_node);
+                               if (!(prev_state->state & EXTENT_DELALLOC))
+                                       break;
+                               state = prev_state;
+                               node = prev_node;
+                       }
+               }
+               if (state->state & EXTENT_LOCKED) {
+                       DEFINE_WAIT(wait);
+                       atomic_inc(&state->refs);
+                       prepare_to_wait(&state->wq, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       write_unlock_irq(&tree->lock);
+                       schedule();
+                       write_lock_irq(&tree->lock);
+                       finish_wait(&state->wq, &wait);
+                       free_extent_state(state);
+                       goto search_again;
+               }
+               state->state |= EXTENT_LOCKED;
+               if (!found)
+                       *start = state->start;
+               found++;
+               *end = state->end;
+               cur_start = state->end + 1;
+               node = rb_next(node);
+               if (!node)
+                       break;
+               total_bytes += state->end - state->start + 1;
+               if (total_bytes >= max_bytes)
+                       break;
+       }
+out:
+       write_unlock_irq(&tree->lock);
+       return found;
+}
+
+u64 count_range_bits(struct extent_io_tree *tree,
+                    u64 *start, u64 search_end, u64 max_bytes,
+                    unsigned long bits)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+       u64 cur_start = *start;
+       u64 total_bytes = 0;
+       int found = 0;
+
+       if (search_end <= cur_start) {
+               printk("search_end %Lu start %Lu\n", search_end, cur_start);
+               WARN_ON(1);
+               return 0;
+       }
+
+       write_lock_irq(&tree->lock);
+       if (cur_start == 0 && bits == EXTENT_DIRTY) {
+               total_bytes = tree->dirty_bytes;
+               goto out;
+       }
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(&tree->state, cur_start);
+       if (!node || IS_ERR(node)) {
+               goto out;
+       }
+
+       while(1) {
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (state->start > search_end)
+                       break;
+               if (state->end >= cur_start && (state->state & bits)) {
+                       total_bytes += min(search_end, state->end) + 1 -
+                                      max(cur_start, state->start);
+                       if (total_bytes >= max_bytes)
+                               break;
+                       if (!found) {
+                               *start = state->start;
+                               found = 1;
+                       }
+               }
+               node = rb_next(node);
+               if (!node)
+                       break;
+       }
+out:
+       write_unlock_irq(&tree->lock);
+       return total_bytes;
+}
+/*
+ * helper function to lock both pages and extents in the tree.
+ * pages must be locked first.
+ */
+int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+       struct page *page;
+       int err;
+
+       while (index <= end_index) {
+               page = grab_cache_page(tree->mapping, index);
+               if (!page) {
+                       err = -ENOMEM;
+                       goto failed;
+               }
+               if (IS_ERR(page)) {
+                       err = PTR_ERR(page);
+                       goto failed;
+               }
+               index++;
+       }
+       lock_extent(tree, start, end, GFP_NOFS);
+       return 0;
+
+failed:
+       /*
+        * we failed above in getting the page at 'index', so we undo here
+        * up to but not including the page at 'index'
+        */
+       end_index = index;
+       index = start >> PAGE_CACHE_SHIFT;
+       while (index < end_index) {
+               page = find_get_page(tree->mapping, index);
+               unlock_page(page);
+               page_cache_release(page);
+               index++;
+       }
+       return err;
+}
+EXPORT_SYMBOL(lock_range);
+
+/*
+ * helper function to unlock both pages and extents in the tree.
+ */
+int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+       struct page *page;
+
+       while (index <= end_index) {
+               page = find_get_page(tree->mapping, index);
+               unlock_page(page);
+               page_cache_release(page);
+               index++;
+       }
+       unlock_extent(tree, start, end, GFP_NOFS);
+       return 0;
+}
+EXPORT_SYMBOL(unlock_range);
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+       int ret = 0;
+
+       write_lock_irq(&tree->lock);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(&tree->state, start);
+       if (!node || IS_ERR(node)) {
+               ret = -ENOENT;
+               goto out;
+       }
+       state = rb_entry(node, struct extent_state, rb_node);
+       if (state->start != start) {
+               ret = -ENOENT;
+               goto out;
+       }
+       state->private = private;
+out:
+       write_unlock_irq(&tree->lock);
+       return ret;
+}
+
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+       int ret = 0;
+
+       read_lock_irq(&tree->lock);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(&tree->state, start);
+       if (!node || IS_ERR(node)) {
+               ret = -ENOENT;
+               goto out;
+       }
+       state = rb_entry(node, struct extent_state, rb_node);
+       if (state->start != start) {
+               ret = -ENOENT;
+               goto out;
+       }
+       *private = state->private;
+out:
+       read_unlock_irq(&tree->lock);
+       return ret;
+}
+
+/*
+ * searches a range in the state tree for a given mask.
+ * If 'filled' == 1, this returns 1 only if ever extent in the tree
+ * has the bits set.  Otherwise, 1 is returned if any bit in the
+ * range is found set.
+ */
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  int bits, int filled)
+{
+       struct extent_state *state = NULL;
+       struct rb_node *node;
+       int bitset = 0;
+       unsigned long flags;
+
+       read_lock_irqsave(&tree->lock, flags);
+       node = tree_search(&tree->state, start);
+       while (node && start <= end) {
+               state = rb_entry(node, struct extent_state, rb_node);
+
+               if (filled && state->start > start) {
+                       bitset = 0;
+                       break;
+               }
+
+               if (state->start > end)
+                       break;
+
+               if (state->state & bits) {
+                       bitset = 1;
+                       if (!filled)
+                               break;
+               } else if (filled) {
+                       bitset = 0;
+                       break;
+               }
+               start = state->end + 1;
+               if (start > end)
+                       break;
+               node = rb_next(node);
+               if (!node) {
+                       if (filled)
+                               bitset = 0;
+                       break;
+               }
+       }
+       read_unlock_irqrestore(&tree->lock, flags);
+       return bitset;
+}
+EXPORT_SYMBOL(test_range_bit);
+
+/*
+ * helper function to set a given page up to date if all the
+ * extents in the tree for that page are up to date
+ */
+static int check_page_uptodate(struct extent_io_tree *tree,
+                              struct page *page)
+{
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 end = start + PAGE_CACHE_SIZE - 1;
+       if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
+               SetPageUptodate(page);
+       return 0;
+}
+
+/*
+ * helper function to unlock a page if all the extents in the tree
+ * for that page are unlocked
+ */
+static int check_page_locked(struct extent_io_tree *tree,
+                            struct page *page)
+{
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 end = start + PAGE_CACHE_SIZE - 1;
+       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
+               unlock_page(page);
+       return 0;
+}
+
+/*
+ * helper function to end page writeback if all the extents
+ * in the tree for that page are done with writeback
+ */
+static int check_page_writeback(struct extent_io_tree *tree,
+                            struct page *page)
+{
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 end = start + PAGE_CACHE_SIZE - 1;
+       if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
+               end_page_writeback(page);
+       return 0;
+}
+
+/* lots and lots of room for performance fixes in the end_bio funcs */
+
+/*
+ * after a writepage IO is done, we need to:
+ * clear the uptodate bits on error
+ * clear the writeback bits in the extent tree for this IO
+ * end_page_writeback if the page has no more pending IO
+ *
+ * Scheduling is not allowed, so the extent state tree is expected
+ * to have one and only one object corresponding to this IO.
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_writepage(struct bio *bio, int err)
+#else
+static int end_bio_extent_writepage(struct bio *bio,
+                                  unsigned int bytes_done, int err)
+#endif
+{
+       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+       struct extent_io_tree *tree = bio->bi_private;
+       u64 start;
+       u64 end;
+       int whole_page;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       if (bio->bi_size)
+               return 1;
+#endif
+
+       do {
+               struct page *page = bvec->bv_page;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                        bvec->bv_offset;
+               end = start + bvec->bv_len - 1;
+
+               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
+                       whole_page = 1;
+               else
+                       whole_page = 0;
+
+               if (--bvec >= bio->bi_io_vec)
+                       prefetchw(&bvec->bv_page->flags);
+
+               if (!uptodate) {
+                       clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
+                       ClearPageUptodate(page);
+                       SetPageError(page);
+               }
+               clear_extent_writeback(tree, start, end, GFP_ATOMIC);
+
+               if (whole_page)
+                       end_page_writeback(page);
+               else
+                       check_page_writeback(tree, page);
+               if (tree->ops && tree->ops->writepage_end_io_hook)
+                       tree->ops->writepage_end_io_hook(page, start, end);
+       } while (bvec >= bio->bi_io_vec);
+
+       bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       return 0;
+#endif
+}
+
+/*
+ * after a readpage IO is done, we need to:
+ * clear the uptodate bits on error
+ * set the uptodate bits if things worked
+ * set the page up to date if all extents in the tree are uptodate
+ * clear the lock bit in the extent tree
+ * unlock the page if there are no other extents locked for it
+ *
+ * Scheduling is not allowed, so the extent state tree is expected
+ * to have one and only one object corresponding to this IO.
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_readpage(struct bio *bio, int err)
+#else
+static int end_bio_extent_readpage(struct bio *bio,
+                                  unsigned int bytes_done, int err)
+#endif
+{
+       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+       struct extent_io_tree *tree = bio->bi_private;
+       u64 start;
+       u64 end;
+       int whole_page;
+       int ret;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       if (bio->bi_size)
+               return 1;
+#endif
+
+       do {
+               struct page *page = bvec->bv_page;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                       bvec->bv_offset;
+               end = start + bvec->bv_len - 1;
+
+               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
+                       whole_page = 1;
+               else
+                       whole_page = 0;
+
+               if (--bvec >= bio->bi_io_vec)
+                       prefetchw(&bvec->bv_page->flags);
+
+               if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
+                       ret = tree->ops->readpage_end_io_hook(page, start, end);
+                       if (ret)
+                               uptodate = 0;
+               }
+               if (uptodate) {
+                       set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+                       if (whole_page)
+                               SetPageUptodate(page);
+                       else
+                               check_page_uptodate(tree, page);
+               } else {
+                       ClearPageUptodate(page);
+                       SetPageError(page);
+               }
+
+               unlock_extent(tree, start, end, GFP_ATOMIC);
+
+               if (whole_page)
+                       unlock_page(page);
+               else
+                       check_page_locked(tree, page);
+       } while (bvec >= bio->bi_io_vec);
+
+       bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       return 0;
+#endif
+}
+
+/*
+ * IO done from prepare_write is pretty simple, we just unlock
+ * the structs in the extent tree when done, and set the uptodate bits
+ * as appropriate.
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_preparewrite(struct bio *bio, int err)
+#else
+static int end_bio_extent_preparewrite(struct bio *bio,
+                                      unsigned int bytes_done, int err)
+#endif
+{
+       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+       struct extent_io_tree *tree = bio->bi_private;
+       u64 start;
+       u64 end;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       if (bio->bi_size)
+               return 1;
+#endif
+
+       do {
+               struct page *page = bvec->bv_page;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                       bvec->bv_offset;
+               end = start + bvec->bv_len - 1;
+
+               if (--bvec >= bio->bi_io_vec)
+                       prefetchw(&bvec->bv_page->flags);
+
+               if (uptodate) {
+                       set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+               } else {
+                       ClearPageUptodate(page);
+                       SetPageError(page);
+               }
+
+               unlock_extent(tree, start, end, GFP_ATOMIC);
+
+       } while (bvec >= bio->bi_io_vec);
+
+       bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       return 0;
+#endif
+}
+
+static struct bio *
+extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+                gfp_t gfp_flags)
+{
+       struct bio *bio;
+
+       bio = bio_alloc(gfp_flags, nr_vecs);
+
+       if (bio == NULL && (current->flags & PF_MEMALLOC)) {
+               while (!bio && (nr_vecs /= 2))
+                       bio = bio_alloc(gfp_flags, nr_vecs);
+       }
+
+       if (bio) {
+               bio->bi_bdev = bdev;
+               bio->bi_sector = first_sector;
+       }
+       return bio;
+}
+
+static int submit_one_bio(int rw, struct bio *bio)
+{
+       u64 maxsector;
+       int ret = 0;
+
+       bio_get(bio);
+
+        maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+       if (maxsector < bio->bi_sector) {
+               printk("sector too large max %Lu got %llu\n", maxsector,
+                       (unsigned long long)bio->bi_sector);
+               WARN_ON(1);
+       }
+
+       submit_bio(rw, bio);
+       if (bio_flagged(bio, BIO_EOPNOTSUPP))
+               ret = -EOPNOTSUPP;
+       bio_put(bio);
+       return ret;
+}
+
+static int submit_extent_page(int rw, struct extent_io_tree *tree,
+                             struct page *page, sector_t sector,
+                             size_t size, unsigned long offset,
+                             struct block_device *bdev,
+                             struct bio **bio_ret,
+                             unsigned long max_pages,
+                             bio_end_io_t end_io_func)
+{
+       int ret = 0;
+       struct bio *bio;
+       int nr;
+
+       if (bio_ret && *bio_ret) {
+               bio = *bio_ret;
+               if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
+                   bio_add_page(bio, page, size, offset) < size) {
+                       ret = submit_one_bio(rw, bio);
+                       bio = NULL;
+               } else {
+                       return 0;
+               }
+       }
+       nr = min_t(int, max_pages, bio_get_nr_vecs(bdev));
+       bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+       if (!bio) {
+               printk("failed to allocate bio nr %d\n", nr);
+       }
+       bio_add_page(bio, page, size, offset);
+       bio->bi_end_io = end_io_func;
+       bio->bi_private = tree;
+       if (bio_ret) {
+               *bio_ret = bio;
+       } else {
+               ret = submit_one_bio(rw, bio);
+       }
+
+       return ret;
+}
+
+void set_page_extent_mapped(struct page *page)
+{
+       if (!PagePrivate(page)) {
+               SetPagePrivate(page);
+               WARN_ON(!page->mapping->a_ops->invalidatepage);
+               set_page_private(page, EXTENT_PAGE_PRIVATE);
+               page_cache_get(page);
+       }
+}
+
+void set_page_extent_head(struct page *page, unsigned long len)
+{
+       set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
+}
+
+/*
+ * basic readpage implementation.  Locked extent state structs are inserted
+ * into the tree that are removed when the IO is done (by the end_io
+ * handlers)
+ */
+static int __extent_read_full_page(struct extent_io_tree *tree,
+                                  struct page *page,
+                                  get_extent_t *get_extent,
+                                  struct bio **bio)
+{
+       struct inode *inode = page->mapping->host;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 page_end = start + PAGE_CACHE_SIZE - 1;
+       u64 end;
+       u64 cur = start;
+       u64 extent_offset;
+       u64 last_byte = i_size_read(inode);
+       u64 block_start;
+       u64 cur_end;
+       sector_t sector;
+       struct extent_map *em;
+       struct block_device *bdev;
+       int ret;
+       int nr = 0;
+       size_t page_offset = 0;
+       size_t iosize;
+       size_t blocksize = inode->i_sb->s_blocksize;
+
+       set_page_extent_mapped(page);
+
+       end = page_end;
+       lock_extent(tree, start, end, GFP_NOFS);
+
+       while (cur <= end) {
+               if (cur >= last_byte) {
+                       char *userpage;
+                       iosize = PAGE_CACHE_SIZE - page_offset;
+                       userpage = kmap_atomic(page, KM_USER0);
+                       memset(userpage + page_offset, 0, iosize);
+                       flush_dcache_page(page);
+                       kunmap_atomic(userpage, KM_USER0);
+                       set_extent_uptodate(tree, cur, cur + iosize - 1,
+                                           GFP_NOFS);
+                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                       break;
+               }
+               em = get_extent(inode, page, page_offset, cur,
+                               end - cur + 1, 0);
+               if (IS_ERR(em) || !em) {
+                       SetPageError(page);
+                       unlock_extent(tree, cur, end, GFP_NOFS);
+                       break;
+               }
+
+               extent_offset = cur - em->start;
+               BUG_ON(extent_map_end(em) <= cur);
+               BUG_ON(end < cur);
+
+               iosize = min(extent_map_end(em) - cur, end - cur + 1);
+               cur_end = min(extent_map_end(em) - 1, end);
+               iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
+               sector = (em->block_start + extent_offset) >> 9;
+               bdev = em->bdev;
+               block_start = em->block_start;
+               free_extent_map(em);
+               em = NULL;
+
+               /* we've found a hole, just zero and go on */
+               if (block_start == EXTENT_MAP_HOLE) {
+                       char *userpage;
+                       userpage = kmap_atomic(page, KM_USER0);
+                       memset(userpage + page_offset, 0, iosize);
+                       flush_dcache_page(page);
+                       kunmap_atomic(userpage, KM_USER0);
+
+                       set_extent_uptodate(tree, cur, cur + iosize - 1,
+                                           GFP_NOFS);
+                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                       cur = cur + iosize;
+                       page_offset += iosize;
+                       continue;
+               }
+               /* the get_extent function already copied into the page */
+               if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                       cur = cur + iosize;
+                       page_offset += iosize;
+                       continue;
+               }
+
+               ret = 0;
+               if (tree->ops && tree->ops->readpage_io_hook) {
+                       ret = tree->ops->readpage_io_hook(page, cur,
+                                                         cur + iosize - 1);
+               }
+               if (!ret) {
+                       unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+                       nr -= page->index;
+                       ret = submit_extent_page(READ, tree, page,
+                                        sector, iosize, page_offset,
+                                        bdev, bio, nr,
+                                        end_bio_extent_readpage);
+               }
+               if (ret)
+                       SetPageError(page);
+               cur = cur + iosize;
+               page_offset += iosize;
+               nr++;
+       }
+       if (!nr) {
+               if (!PageError(page))
+                       SetPageUptodate(page);
+               unlock_page(page);
+       }
+       return 0;
+}
+
+int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
+                           get_extent_t *get_extent)
+{
+       struct bio *bio = NULL;
+       int ret;
+
+       ret = __extent_read_full_page(tree, page, get_extent, &bio);
+       if (bio)
+               submit_one_bio(READ, bio);
+       return ret;
+}
+EXPORT_SYMBOL(extent_read_full_page);
+
+/*
+ * the writepage semantics are similar to regular writepage.  extent
+ * records are inserted to lock ranges in the tree, and as dirty areas
+ * are found, they are marked writeback.  Then the lock bits are removed
+ * and the end_io handler clears the writeback ranges
+ */
+static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+                             void *data)
+{
+       struct inode *inode = page->mapping->host;
+       struct extent_page_data *epd = data;
+       struct extent_io_tree *tree = epd->tree;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 delalloc_start;
+       u64 page_end = start + PAGE_CACHE_SIZE - 1;
+       u64 end;
+       u64 cur = start;
+       u64 extent_offset;
+       u64 last_byte = i_size_read(inode);
+       u64 block_start;
+       u64 iosize;
+       sector_t sector;
+       struct extent_map *em;
+       struct block_device *bdev;
+       int ret;
+       int nr = 0;
+       size_t page_offset = 0;
+       size_t blocksize;
+       loff_t i_size = i_size_read(inode);
+       unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
+       u64 nr_delalloc;
+       u64 delalloc_end;
+
+       WARN_ON(!PageLocked(page));
+       if (page->index > end_index) {
+               clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+               unlock_page(page);
+               return 0;
+       }
+
+       if (page->index == end_index) {
+               char *userpage;
+
+               size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
+
+               userpage = kmap_atomic(page, KM_USER0);
+               memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
+               flush_dcache_page(page);
+               kunmap_atomic(userpage, KM_USER0);
+       }
+
+       set_page_extent_mapped(page);
+
+       delalloc_start = start;
+       delalloc_end = 0;
+       while(delalloc_end < page_end) {
+               nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
+                                                      &delalloc_end,
+                                                      128 * 1024 * 1024);
+               if (nr_delalloc == 0) {
+                       delalloc_start = delalloc_end + 1;
+                       continue;
+               }
+               tree->ops->fill_delalloc(inode, delalloc_start,
+                                        delalloc_end);
+               clear_extent_bit(tree, delalloc_start,
+                                delalloc_end,
+                                EXTENT_LOCKED | EXTENT_DELALLOC,
+                                1, 0, GFP_NOFS);
+               delalloc_start = delalloc_end + 1;
+       }
+       lock_extent(tree, start, page_end, GFP_NOFS);
+
+       end = page_end;
+       if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
+               printk("found delalloc bits after lock_extent\n");
+       }
+
+       if (last_byte <= start) {
+               clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+               goto done;
+       }
+
+       set_extent_uptodate(tree, start, page_end, GFP_NOFS);
+       blocksize = inode->i_sb->s_blocksize;
+
+       while (cur <= end) {
+               if (cur >= last_byte) {
+                       clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
+                       break;
+               }
+               em = epd->get_extent(inode, page, page_offset, cur,
+                                    end - cur + 1, 1);
+               if (IS_ERR(em) || !em) {
+                       SetPageError(page);
+                       break;
+               }
+
+               extent_offset = cur - em->start;
+               BUG_ON(extent_map_end(em) <= cur);
+               BUG_ON(end < cur);
+               iosize = min(extent_map_end(em) - cur, end - cur + 1);
+               iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
+               sector = (em->block_start + extent_offset) >> 9;
+               bdev = em->bdev;
+               block_start = em->block_start;
+               free_extent_map(em);
+               em = NULL;
+
+               if (block_start == EXTENT_MAP_HOLE ||
+                   block_start == EXTENT_MAP_INLINE) {
+                       clear_extent_dirty(tree, cur,
+                                          cur + iosize - 1, GFP_NOFS);
+                       cur = cur + iosize;
+                       page_offset += iosize;
+                       continue;
+               }
+
+               /* leave this out until we have a page_mkwrite call */
+               if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
+                                  EXTENT_DIRTY, 0)) {
+                       cur = cur + iosize;
+                       page_offset += iosize;
+                       continue;
+               }
+               clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
+               if (tree->ops && tree->ops->writepage_io_hook) {
+                       ret = tree->ops->writepage_io_hook(page, cur,
+                                               cur + iosize - 1);
+               } else {
+                       ret = 0;
+               }
+               if (ret)
+                       SetPageError(page);
+               else {
+                       unsigned long max_nr = end_index + 1;
+                       set_range_writeback(tree, cur, cur + iosize - 1);
+                       if (!PageWriteback(page)) {
+                               printk("warning page %lu not writeback, "
+                                      "cur %llu end %llu\n", page->index,
+                                      (unsigned long long)cur,
+                                      (unsigned long long)end);
+                       }
+
+                       ret = submit_extent_page(WRITE, tree, page, sector,
+                                                iosize, page_offset, bdev,
+                                                &epd->bio, max_nr,
+                                                end_bio_extent_writepage);
+                       if (ret)
+                               SetPageError(page);
+               }
+               cur = cur + iosize;
+               page_offset += iosize;
+               nr++;
+       }
+done:
+       if (nr == 0) {
+               /* make sure the mapping tag for page dirty gets cleared */
+               set_page_writeback(page);
+               end_page_writeback(page);
+       }
+       unlock_extent(tree, start, page_end, GFP_NOFS);
+       unlock_page(page);
+       return 0;
+}
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+
+/* Taken directly from 2.6.23 for 2.6.18 back port */
+typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
+                                void *data);
+
+/**
+ * write_cache_pages - walk the list of dirty pages of the given address space
+ * and write all of them.
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ * @writepage: function called for each page
+ * @data: data passed to writepage function
+ *
+ * If a page is already under I/O, write_cache_pages() skips it, even
+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them.  If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ */
+static int write_cache_pages(struct address_space *mapping,
+                     struct writeback_control *wbc, writepage_t writepage,
+                     void *data)
+{
+       struct backing_dev_info *bdi = mapping->backing_dev_info;
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       int scanned = 0;
+       int range_whole = 0;
+
+       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+               wbc->encountered_congestion = 1;
+               return 0;
+       }
+
+       pagevec_init(&pvec, 0);
+       if (wbc->range_cyclic) {
+               index = mapping->writeback_index; /* Start from prev offset */
+               end = -1;
+       } else {
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
+               end = wbc->range_end >> PAGE_CACHE_SHIFT;
+               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                       range_whole = 1;
+               scanned = 1;
+       }
+retry:
+       while (!done && (index <= end) &&
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                             PAGECACHE_TAG_DIRTY,
+                                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+               unsigned i;
+
+               scanned = 1;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /*
+                        * At this point we hold neither mapping->tree_lock nor
+                        * lock on the page itself: the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or even
+                        * swizzled back from swapper_space to tmpfs file
+                        * mapping
+                        */
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != mapping)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (!wbc->range_cyclic && page->index > end) {
+                               done = 1;
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (wbc->sync_mode != WB_SYNC_NONE)
+                               wait_on_page_writeback(page);
+
+                       if (PageWriteback(page) ||
+                           !clear_page_dirty_for_io(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       ret = (*writepage)(page, wbc, data);
+
+                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
+                               unlock_page(page);
+                               ret = 0;
+                       }
+                       if (ret || (--(wbc->nr_to_write) <= 0))
+                               done = 1;
+                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                               wbc->encountered_congestion = 1;
+                               done = 1;
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       if (!scanned && !done) {
+               /*
+                * We hit the last page and there is more work to be done: wrap
+                * back to the start of the file
+                */
+               scanned = 1;
+               index = 0;
+               goto retry;
+       }
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = index;
+       return ret;
+}
+#endif
+
+int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
+                         get_extent_t *get_extent,
+                         struct writeback_control *wbc)
+{
+       int ret;
+       struct address_space *mapping = page->mapping;
+       struct extent_page_data epd = {
+               .bio = NULL,
+               .tree = tree,
+               .get_extent = get_extent,
+       };
+       struct writeback_control wbc_writepages = {
+               .bdi            = wbc->bdi,
+               .sync_mode      = WB_SYNC_NONE,
+               .older_than_this = NULL,
+               .nr_to_write    = 64,
+               .range_start    = page_offset(page) + PAGE_CACHE_SIZE,
+               .range_end      = (loff_t)-1,
+       };
+
+
+       ret = __extent_writepage(page, wbc, &epd);
+
+       write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
+       if (epd.bio) {
+               submit_one_bio(WRITE, epd.bio);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(extent_write_full_page);
+
+
+int extent_writepages(struct extent_io_tree *tree,
+                     struct address_space *mapping,
+                     get_extent_t *get_extent,
+                     struct writeback_control *wbc)
+{
+       int ret = 0;
+       struct extent_page_data epd = {
+               .bio = NULL,
+               .tree = tree,
+               .get_extent = get_extent,
+       };
+
+       ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
+       if (epd.bio) {
+               submit_one_bio(WRITE, epd.bio);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(extent_writepages);
+
+int extent_readpages(struct extent_io_tree *tree,
+                    struct address_space *mapping,
+                    struct list_head *pages, unsigned nr_pages,
+                    get_extent_t get_extent)
+{
+       struct bio *bio = NULL;
+       unsigned page_idx;
+       struct pagevec pvec;
+
+       pagevec_init(&pvec, 0);
+       for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+               struct page *page = list_entry(pages->prev, struct page, lru);
+
+               prefetchw(&page->flags);
+               list_del(&page->lru);
+               /*
+                * what we want to do here is call add_to_page_cache_lru,
+                * but that isn't exported, so we reproduce it here
+                */
+               if (!add_to_page_cache(page, mapping,
+                                       page->index, GFP_KERNEL)) {
+
+                       /* open coding of lru_cache_add, also not exported */
+                       page_cache_get(page);
+                       if (!pagevec_add(&pvec, page))
+                               __pagevec_lru_add(&pvec);
+                       __extent_read_full_page(tree, page, get_extent, &bio);
+               }
+               page_cache_release(page);
+       }
+       if (pagevec_count(&pvec))
+               __pagevec_lru_add(&pvec);
+       BUG_ON(!list_empty(pages));
+       if (bio)
+               submit_one_bio(READ, bio);
+       return 0;
+}
+EXPORT_SYMBOL(extent_readpages);
+
+/*
+ * basic invalidatepage code, this waits on any locked or writeback
+ * ranges corresponding to the page, and then deletes any extent state
+ * records from the tree
+ */
+int extent_invalidatepage(struct extent_io_tree *tree,
+                         struct page *page, unsigned long offset)
+{
+       u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
+       u64 end = start + PAGE_CACHE_SIZE - 1;
+       size_t blocksize = page->mapping->host->i_sb->s_blocksize;
+
+       start += (offset + blocksize -1) & ~(blocksize - 1);
+       if (start > end)
+               return 0;
+
+       lock_extent(tree, start, end, GFP_NOFS);
+       wait_on_extent_writeback(tree, start, end);
+       clear_extent_bit(tree, start, end,
+                        EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+                        1, 1, GFP_NOFS);
+       return 0;
+}
+EXPORT_SYMBOL(extent_invalidatepage);
+
+/*
+ * simple commit_write call, set_range_dirty is used to mark both
+ * the pages and the extent records as dirty
+ */
+int extent_commit_write(struct extent_io_tree *tree,
+                       struct inode *inode, struct page *page,
+                       unsigned from, unsigned to)
+{
+       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+       set_page_extent_mapped(page);
+       set_page_dirty(page);
+
+       if (pos > inode->i_size) {
+               i_size_write(inode, pos);
+               mark_inode_dirty(inode);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(extent_commit_write);
+
+int extent_prepare_write(struct extent_io_tree *tree,
+                        struct inode *inode, struct page *page,
+                        unsigned from, unsigned to, get_extent_t *get_extent)
+{
+       u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+       u64 block_start;
+       u64 orig_block_start;
+       u64 block_end;
+       u64 cur_end;
+       struct extent_map *em;
+       unsigned blocksize = 1 << inode->i_blkbits;
+       size_t page_offset = 0;
+       size_t block_off_start;
+       size_t block_off_end;
+       int err = 0;
+       int iocount = 0;
+       int ret = 0;
+       int isnew;
+
+       set_page_extent_mapped(page);
+
+       block_start = (page_start + from) & ~((u64)blocksize - 1);
+       block_end = (page_start + to - 1) | (blocksize - 1);
+       orig_block_start = block_start;
+
+       lock_extent(tree, page_start, page_end, GFP_NOFS);
+       while(block_start <= block_end) {
+               em = get_extent(inode, page, page_offset, block_start,
+                               block_end - block_start + 1, 1);
+               if (IS_ERR(em) || !em) {
+                       goto err;
+               }
+               cur_end = min(block_end, extent_map_end(em) - 1);
+               block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
+               block_off_end = block_off_start + blocksize;
+               isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
+
+               if (!PageUptodate(page) && isnew &&
+                   (block_off_end > to || block_off_start < from)) {
+                       void *kaddr;
+
+                       kaddr = kmap_atomic(page, KM_USER0);
+                       if (block_off_end > to)
+                               memset(kaddr + to, 0, block_off_end - to);
+                       if (block_off_start < from)
+                               memset(kaddr + block_off_start, 0,
+                                      from - block_off_start);
+                       flush_dcache_page(page);
+                       kunmap_atomic(kaddr, KM_USER0);
+               }
+               if ((em->block_start != EXTENT_MAP_HOLE &&
+                    em->block_start != EXTENT_MAP_INLINE) &&
+                   !isnew && !PageUptodate(page) &&
+                   (block_off_end > to || block_off_start < from) &&
+                   !test_range_bit(tree, block_start, cur_end,
+                                   EXTENT_UPTODATE, 1)) {
+                       u64 sector;
+                       u64 extent_offset = block_start - em->start;
+                       size_t iosize;
+                       sector = (em->block_start + extent_offset) >> 9;
+                       iosize = (cur_end - block_start + blocksize) &
+                               ~((u64)blocksize - 1);
+                       /*
+                        * we've already got the extent locked, but we
+                        * need to split the state such that our end_bio
+                        * handler can clear the lock.
+                        */
+                       set_extent_bit(tree, block_start,
+                                      block_start + iosize - 1,
+                                      EXTENT_LOCKED, 0, NULL, GFP_NOFS);
+                       ret = submit_extent_page(READ, tree, page,
+                                        sector, iosize, page_offset, em->bdev,
+                                        NULL, 1,
+                                        end_bio_extent_preparewrite);
+                       iocount++;
+                       block_start = block_start + iosize;
+               } else {
+                       set_extent_uptodate(tree, block_start, cur_end,
+                                           GFP_NOFS);
+                       unlock_extent(tree, block_start, cur_end, GFP_NOFS);
+                       block_start = cur_end + 1;
+               }
+               page_offset = block_start & (PAGE_CACHE_SIZE - 1);
+               free_extent_map(em);
+       }
+       if (iocount) {
+               wait_extent_bit(tree, orig_block_start,
+                               block_end, EXTENT_LOCKED);
+       }
+       check_page_uptodate(tree, page);
+err:
+       /* FIXME, zero out newly allocated blocks on error */
+       return err;
+}
+EXPORT_SYMBOL(extent_prepare_write);
+
+/*
+ * a helper for releasepage.  As long as there are no locked extents
+ * in the range corresponding to the page, both state records and extent
+ * map records are removed
+ */
+int try_release_extent_mapping(struct extent_map_tree *map,
+                              struct extent_io_tree *tree, struct page *page)
+{
+       struct extent_map *em;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 end = start + PAGE_CACHE_SIZE - 1;
+       u64 orig_start = start;
+       int ret = 1;
+
+       while (start <= end) {
+               spin_lock(&map->lock);
+               em = lookup_extent_mapping(map, start, end);
+               if (!em || IS_ERR(em)) {
+                       spin_unlock(&map->lock);
+                       break;
+               }
+               if (!test_range_bit(tree, em->start, extent_map_end(em) - 1,
+                                   EXTENT_LOCKED, 0)) {
+                       remove_extent_mapping(map, em);
+                       /* once for the rb tree */
+                       free_extent_map(em);
+               }
+               start = extent_map_end(em);
+               spin_unlock(&map->lock);
+
+               /* once for us */
+               free_extent_map(em);
+       }
+       if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
+               ret = 0;
+       else
+               clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
+                                1, 1, GFP_NOFS);
+       return ret;
+}
+EXPORT_SYMBOL(try_release_extent_mapping);
+
+sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
+               get_extent_t *get_extent)
+{
+       struct inode *inode = mapping->host;
+       u64 start = iblock << inode->i_blkbits;
+       sector_t sector = 0;
+       struct extent_map *em;
+
+       em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
+       if (!em || IS_ERR(em))
+               return 0;
+
+       if (em->block_start == EXTENT_MAP_INLINE ||
+           em->block_start == EXTENT_MAP_HOLE)
+               goto out;
+
+       sector = (em->block_start + start - em->start) >> inode->i_blkbits;
+printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start);
+out:
+       free_extent_map(em);
+       return sector;
+}
+
+static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
+{
+       if (list_empty(&eb->lru)) {
+               extent_buffer_get(eb);
+               list_add(&eb->lru, &tree->buffer_lru);
+               tree->lru_size++;
+               if (tree->lru_size >= BUFFER_LRU_MAX) {
+                       struct extent_buffer *rm;
+                       rm = list_entry(tree->buffer_lru.prev,
+                                       struct extent_buffer, lru);
+                       tree->lru_size--;
+                       list_del_init(&rm->lru);
+                       free_extent_buffer(rm);
+               }
+       } else
+               list_move(&eb->lru, &tree->buffer_lru);
+       return 0;
+}
+static struct extent_buffer *find_lru(struct extent_io_tree *tree,
+                                     u64 start, unsigned long len)
+{
+       struct list_head *lru = &tree->buffer_lru;
+       struct list_head *cur = lru->next;
+       struct extent_buffer *eb;
+
+       if (list_empty(lru))
+               return NULL;
+
+       do {
+               eb = list_entry(cur, struct extent_buffer, lru);
+               if (eb->start == start && eb->len == len) {
+                       extent_buffer_get(eb);
+                       return eb;
+               }
+               cur = cur->next;
+       } while (cur != lru);
+       return NULL;
+}
+
+static inline unsigned long num_extent_pages(u64 start, u64 len)
+{
+       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+               (start >> PAGE_CACHE_SHIFT);
+}
+
+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
+                                             unsigned long i)
+{
+       struct page *p;
+       struct address_space *mapping;
+
+       if (i == 0)
+               return eb->first_page;
+       i += eb->start >> PAGE_CACHE_SHIFT;
+       mapping = eb->first_page->mapping;
+       read_lock_irq(&mapping->tree_lock);
+       p = radix_tree_lookup(&mapping->page_tree, i);
+       read_unlock_irq(&mapping->tree_lock);
+       return p;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
+                                                  u64 start,
+                                                  unsigned long len,
+                                                  gfp_t mask)
+{
+       struct extent_buffer *eb = NULL;
+
+       spin_lock(&tree->lru_lock);
+       eb = find_lru(tree, start, len);
+       spin_unlock(&tree->lru_lock);
+       if (eb) {
+               return eb;
+       }
+
+       eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+       INIT_LIST_HEAD(&eb->lru);
+       eb->start = start;
+       eb->len = len;
+       atomic_set(&eb->refs, 1);
+
+       return eb;
+}
+
+static void __free_extent_buffer(struct extent_buffer *eb)
+{
+       kmem_cache_free(extent_buffer_cache, eb);
+}
+
+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+                                         u64 start, unsigned long len,
+                                         struct page *page0,
+                                         gfp_t mask)
+{
+       unsigned long num_pages = num_extent_pages(start, len);
+       unsigned long i;
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       struct extent_buffer *eb;
+       struct page *p;
+       struct address_space *mapping = tree->mapping;
+       int uptodate = 1;
+
+       eb = __alloc_extent_buffer(tree, start, len, mask);
+       if (!eb || IS_ERR(eb))
+               return NULL;
+
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               goto lru_add;
+
+       if (page0) {
+               eb->first_page = page0;
+               i = 1;
+               index++;
+               page_cache_get(page0);
+               mark_page_accessed(page0);
+               set_page_extent_mapped(page0);
+               WARN_ON(!PageUptodate(page0));
+               set_page_extent_head(page0, len);
+       } else {
+               i = 0;
+       }
+       for (; i < num_pages; i++, index++) {
+               p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
+               if (!p) {
+                       WARN_ON(1);
+                       goto fail;
+               }
+               set_page_extent_mapped(p);
+               mark_page_accessed(p);
+               if (i == 0) {
+                       eb->first_page = p;
+                       set_page_extent_head(p, len);
+               } else {
+                       set_page_private(p, EXTENT_PAGE_PRIVATE);
+               }
+               if (!PageUptodate(p))
+                       uptodate = 0;
+               unlock_page(p);
+       }
+       if (uptodate)
+               eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
+
+lru_add:
+       spin_lock(&tree->lru_lock);
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
+       return eb;
+
+fail:
+       spin_lock(&tree->lru_lock);
+       list_del_init(&eb->lru);
+       spin_unlock(&tree->lru_lock);
+       if (!atomic_dec_and_test(&eb->refs))
+               return NULL;
+       for (index = 1; index < i; index++) {
+               page_cache_release(extent_buffer_page(eb, index));
+       }
+       if (i > 0)
+               page_cache_release(extent_buffer_page(eb, 0));
+       __free_extent_buffer(eb);
+       return NULL;
+}
+EXPORT_SYMBOL(alloc_extent_buffer);
+
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+                                        u64 start, unsigned long len,
+                                         gfp_t mask)
+{
+       unsigned long num_pages = num_extent_pages(start, len);
+       unsigned long i;
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       struct extent_buffer *eb;
+       struct page *p;
+       struct address_space *mapping = tree->mapping;
+       int uptodate = 1;
+
+       eb = __alloc_extent_buffer(tree, start, len, mask);
+       if (!eb || IS_ERR(eb))
+               return NULL;
+
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               goto lru_add;
+
+       for (i = 0; i < num_pages; i++, index++) {
+               p = find_lock_page(mapping, index);
+               if (!p) {
+                       goto fail;
+               }
+               set_page_extent_mapped(p);
+               mark_page_accessed(p);
+
+               if (i == 0) {
+                       eb->first_page = p;
+                       set_page_extent_head(p, len);
+               } else {
+                       set_page_private(p, EXTENT_PAGE_PRIVATE);
+               }
+
+               if (!PageUptodate(p))
+                       uptodate = 0;
+               unlock_page(p);
+       }
+       if (uptodate)
+               eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
+
+lru_add:
+       spin_lock(&tree->lru_lock);
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
+       return eb;
+fail:
+       spin_lock(&tree->lru_lock);
+       list_del_init(&eb->lru);
+       spin_unlock(&tree->lru_lock);
+       if (!atomic_dec_and_test(&eb->refs))
+               return NULL;
+       for (index = 1; index < i; index++) {
+               page_cache_release(extent_buffer_page(eb, index));
+       }
+       if (i > 0)
+               page_cache_release(extent_buffer_page(eb, 0));
+       __free_extent_buffer(eb);
+       return NULL;
+}
+EXPORT_SYMBOL(find_extent_buffer);
+
+void free_extent_buffer(struct extent_buffer *eb)
+{
+       unsigned long i;
+       unsigned long num_pages;
+
+       if (!eb)
+               return;
+
+       if (!atomic_dec_and_test(&eb->refs))
+               return;
+
+       WARN_ON(!list_empty(&eb->lru));
+       num_pages = num_extent_pages(eb->start, eb->len);
+
+       for (i = 1; i < num_pages; i++) {
+               page_cache_release(extent_buffer_page(eb, i));
+       }
+       page_cache_release(extent_buffer_page(eb, 0));
+       __free_extent_buffer(eb);
+}
+EXPORT_SYMBOL(free_extent_buffer);
+
+int clear_extent_buffer_dirty(struct extent_io_tree *tree,
+                             struct extent_buffer *eb)
+{
+       int set;
+       unsigned long i;
+       unsigned long num_pages;
+       struct page *page;
+
+       u64 start = eb->start;
+       u64 end = start + eb->len - 1;
+
+       set = clear_extent_dirty(tree, start, end, GFP_NOFS);
+       num_pages = num_extent_pages(eb->start, eb->len);
+
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               lock_page(page);
+               if (i == 0)
+                       set_page_extent_head(page, eb->len);
+               else
+                       set_page_private(page, EXTENT_PAGE_PRIVATE);
+
+               /*
+                * if we're on the last page or the first page and the
+                * block isn't aligned on a page boundary, do extra checks
+                * to make sure we don't clean page that is partially dirty
+                */
+               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
+                   ((i == num_pages - 1) &&
+                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
+                       start = (u64)page->index << PAGE_CACHE_SHIFT;
+                       end  = start + PAGE_CACHE_SIZE - 1;
+                       if (test_range_bit(tree, start, end,
+                                          EXTENT_DIRTY, 0)) {
+                               unlock_page(page);
+                               continue;
+                       }
+               }
+               clear_page_dirty_for_io(page);
+               write_lock_irq(&page->mapping->tree_lock);
+               if (!PageDirty(page)) {
+                       radix_tree_tag_clear(&page->mapping->page_tree,
+                                               page_index(page),
+                                               PAGECACHE_TAG_DIRTY);
+               }
+               write_unlock_irq(&page->mapping->tree_lock);
+               unlock_page(page);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(clear_extent_buffer_dirty);
+
+int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
+                                   struct extent_buffer *eb)
+{
+       return wait_on_extent_writeback(tree, eb->start,
+                                       eb->start + eb->len - 1);
+}
+EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
+
+int set_extent_buffer_dirty(struct extent_io_tree *tree,
+                            struct extent_buffer *eb)
+{
+       unsigned long i;
+       unsigned long num_pages;
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = 0; i < num_pages; i++) {
+               struct page *page = extent_buffer_page(eb, i);
+               /* writepage may need to do something special for the
+                * first page, we have to make sure page->private is
+                * properly set.  releasepage may drop page->private
+                * on us if the page isn't already dirty.
+                */
+               if (i == 0) {
+                       lock_page(page);
+                       set_page_extent_head(page, eb->len);
+               } else if (PagePrivate(page) &&
+                          page->private != EXTENT_PAGE_PRIVATE) {
+                       lock_page(page);
+                       set_page_extent_mapped(page);
+                       unlock_page(page);
+               }
+               __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
+               if (i == 0)
+                       unlock_page(page);
+       }
+       return set_extent_dirty(tree, eb->start,
+                               eb->start + eb->len - 1, GFP_NOFS);
+}
+EXPORT_SYMBOL(set_extent_buffer_dirty);
+
+int set_extent_buffer_uptodate(struct extent_io_tree *tree,
+                               struct extent_buffer *eb)
+{
+       unsigned long i;
+       struct page *page;
+       unsigned long num_pages;
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+
+       set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                           GFP_NOFS);
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
+                   ((i == num_pages - 1) &&
+                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
+                       check_page_uptodate(tree, page);
+                       continue;
+               }
+               SetPageUptodate(page);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(set_extent_buffer_uptodate);
+
+int extent_buffer_uptodate(struct extent_io_tree *tree,
+                            struct extent_buffer *eb)
+{
+       if (eb->flags & EXTENT_UPTODATE)
+               return 1;
+       return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                          EXTENT_UPTODATE, 1);
+}
+EXPORT_SYMBOL(extent_buffer_uptodate);
+
+int read_extent_buffer_pages(struct extent_io_tree *tree,
+                            struct extent_buffer *eb,
+                            u64 start,
+                            int wait)
+{
+       unsigned long i;
+       unsigned long start_i;
+       struct page *page;
+       int err;
+       int ret = 0;
+       unsigned long num_pages;
+
+       if (eb->flags & EXTENT_UPTODATE)
+               return 0;
+
+       if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                          EXTENT_UPTODATE, 1)) {
+               return 0;
+       }
+
+       if (start) {
+               WARN_ON(start < eb->start);
+               start_i = (start >> PAGE_CACHE_SHIFT) -
+                       (eb->start >> PAGE_CACHE_SHIFT);
+       } else {
+               start_i = 0;
+       }
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = start_i; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               if (PageUptodate(page)) {
+                       continue;
+               }
+               if (!wait) {
+                       if (TestSetPageLocked(page)) {
+                               continue;
+                       }
+               } else {
+                       lock_page(page);
+               }
+               if (!PageUptodate(page)) {
+                       err = page->mapping->a_ops->readpage(NULL, page);
+                       if (err) {
+                               ret = err;
+                       }
+               } else {
+                       unlock_page(page);
+               }
+       }
+
+       if (ret || !wait) {
+               return ret;
+       }
+
+       for (i = start_i; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               wait_on_page_locked(page);
+               if (!PageUptodate(page)) {
+                       ret = -EIO;
+               }
+       }
+       if (!ret)
+               eb->flags |= EXTENT_UPTODATE;
+       return ret;
+}
+EXPORT_SYMBOL(read_extent_buffer_pages);
+
+void read_extent_buffer(struct extent_buffer *eb, void *dstv,
+                       unsigned long start,
+                       unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       char *dst = (char *)dstv;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       unsigned long num_pages = num_extent_pages(eb->start, eb->len);
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               if (!PageUptodate(page)) {
+                       printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
+                       WARN_ON(1);
+               }
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, (PAGE_CACHE_SIZE - offset));
+               kaddr = kmap_atomic(page, KM_USER1);
+               memcpy(dst, kaddr + offset, cur);
+               kunmap_atomic(kaddr, KM_USER1);
+
+               dst += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(read_extent_buffer);
+
+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
+                              unsigned long min_len, char **token, char **map,
+                              unsigned long *map_start,
+                              unsigned long *map_len, int km)
+{
+       size_t offset = start & (PAGE_CACHE_SIZE - 1);
+       char *kaddr;
+       struct page *p;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       unsigned long end_i = (start_offset + start + min_len - 1) >>
+               PAGE_CACHE_SHIFT;
+
+       if (i != end_i)
+               return -EINVAL;
+
+       if (i == 0) {
+               offset = start_offset;
+               *map_start = 0;
+       } else {
+               offset = 0;
+               *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
+       }
+       if (start + min_len > eb->len) {
+printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
+               WARN_ON(1);
+       }
+
+       p = extent_buffer_page(eb, i);
+       WARN_ON(!PageUptodate(p));
+       kaddr = kmap_atomic(p, km);
+       *token = kaddr;
+       *map = kaddr + offset;
+       *map_len = PAGE_CACHE_SIZE - offset;
+       return 0;
+}
+EXPORT_SYMBOL(map_private_extent_buffer);
+
+int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
+                     unsigned long min_len,
+                     char **token, char **map,
+                     unsigned long *map_start,
+                     unsigned long *map_len, int km)
+{
+       int err;
+       int save = 0;
+       if (eb->map_token) {
+               unmap_extent_buffer(eb, eb->map_token, km);
+               eb->map_token = NULL;
+               save = 1;
+       }
+       err = map_private_extent_buffer(eb, start, min_len, token, map,
+                                      map_start, map_len, km);
+       if (!err && save) {
+               eb->map_token = *token;
+               eb->kaddr = *map;
+               eb->map_start = *map_start;
+               eb->map_len = *map_len;
+       }
+       return err;
+}
+EXPORT_SYMBOL(map_extent_buffer);
+
+void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
+{
+       kunmap_atomic(token, km);
+}
+EXPORT_SYMBOL(unmap_extent_buffer);
+
+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+                         unsigned long start,
+                         unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       char *ptr = (char *)ptrv;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       int ret = 0;
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, (PAGE_CACHE_SIZE - offset));
+
+               kaddr = kmap_atomic(page, KM_USER0);
+               ret = memcmp(ptr, kaddr + offset, cur);
+               kunmap_atomic(kaddr, KM_USER0);
+               if (ret)
+                       break;
+
+               ptr += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+       return ret;
+}
+EXPORT_SYMBOL(memcmp_extent_buffer);
+
+void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
+                        unsigned long start, unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       char *src = (char *)srcv;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, PAGE_CACHE_SIZE - offset);
+               kaddr = kmap_atomic(page, KM_USER1);
+               memcpy(kaddr + offset, src, cur);
+               kunmap_atomic(kaddr, KM_USER1);
+
+               src += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(write_extent_buffer);
+
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+                         unsigned long start, unsigned long len)
+{
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+       while(len > 0) {
+               page = extent_buffer_page(eb, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, PAGE_CACHE_SIZE - offset);
+               kaddr = kmap_atomic(page, KM_USER0);
+               memset(kaddr + offset, c, cur);
+               kunmap_atomic(kaddr, KM_USER0);
+
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(memset_extent_buffer);
+
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+                       unsigned long dst_offset, unsigned long src_offset,
+                       unsigned long len)
+{
+       u64 dst_len = dst->len;
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+
+       WARN_ON(src->len != dst_len);
+
+       offset = (start_offset + dst_offset) &
+               ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+       while(len > 0) {
+               page = extent_buffer_page(dst, i);
+               WARN_ON(!PageUptodate(page));
+
+               cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
+
+               kaddr = kmap_atomic(page, KM_USER0);
+               read_extent_buffer(src, kaddr + offset, src_offset, cur);
+               kunmap_atomic(kaddr, KM_USER0);
+
+               src_offset += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+}
+EXPORT_SYMBOL(copy_extent_buffer);
+
+static void move_pages(struct page *dst_page, struct page *src_page,
+                      unsigned long dst_off, unsigned long src_off,
+                      unsigned long len)
+{
+       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       if (dst_page == src_page) {
+               memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
+       } else {
+               char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+               char *p = dst_kaddr + dst_off + len;
+               char *s = src_kaddr + src_off + len;
+
+               while (len--)
+                       *--p = *--s;
+
+               kunmap_atomic(src_kaddr, KM_USER1);
+       }
+       kunmap_atomic(dst_kaddr, KM_USER0);
+}
+
+static void copy_pages(struct page *dst_page, struct page *src_page,
+                      unsigned long dst_off, unsigned long src_off,
+                      unsigned long len)
+{
+       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *src_kaddr;
+
+       if (dst_page != src_page)
+               src_kaddr = kmap_atomic(src_page, KM_USER1);
+       else
+               src_kaddr = dst_kaddr;
+
+       memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
+       kunmap_atomic(dst_kaddr, KM_USER0);
+       if (dst_page != src_page)
+               kunmap_atomic(src_kaddr, KM_USER1);
+}
+
+void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len)
+{
+       size_t cur;
+       size_t dst_off_in_page;
+       size_t src_off_in_page;
+       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long dst_i;
+       unsigned long src_i;
+
+       if (src_offset + len > dst->len) {
+               printk("memmove bogus src_offset %lu move len %lu len %lu\n",
+                      src_offset, len, dst->len);
+               BUG_ON(1);
+       }
+       if (dst_offset + len > dst->len) {
+               printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
+                      dst_offset, len, dst->len);
+               BUG_ON(1);
+       }
+
+       while(len > 0) {
+               dst_off_in_page = (start_offset + dst_offset) &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+               src_off_in_page = (start_offset + src_offset) &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+               dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+               src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
+
+               cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
+                                              src_off_in_page));
+               cur = min_t(unsigned long, cur,
+                       (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
+
+               copy_pages(extent_buffer_page(dst, dst_i),
+                          extent_buffer_page(dst, src_i),
+                          dst_off_in_page, src_off_in_page, cur);
+
+               src_offset += cur;
+               dst_offset += cur;
+               len -= cur;
+       }
+}
+EXPORT_SYMBOL(memcpy_extent_buffer);
+
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len)
+{
+       size_t cur;
+       size_t dst_off_in_page;
+       size_t src_off_in_page;
+       unsigned long dst_end = dst_offset + len - 1;
+       unsigned long src_end = src_offset + len - 1;
+       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long dst_i;
+       unsigned long src_i;
+
+       if (src_offset + len > dst->len) {
+               printk("memmove bogus src_offset %lu move len %lu len %lu\n",
+                      src_offset, len, dst->len);
+               BUG_ON(1);
+       }
+       if (dst_offset + len > dst->len) {
+               printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
+                      dst_offset, len, dst->len);
+               BUG_ON(1);
+       }
+       if (dst_offset < src_offset) {
+               memcpy_extent_buffer(dst, dst_offset, src_offset, len);
+               return;
+       }
+       while(len > 0) {
+               dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
+               src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
+
+               dst_off_in_page = (start_offset + dst_end) &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+               src_off_in_page = (start_offset + src_end) &
+                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+               cur = min_t(unsigned long, len, src_off_in_page + 1);
+               cur = min(cur, dst_off_in_page + 1);
+               move_pages(extent_buffer_page(dst, dst_i),
+                          extent_buffer_page(dst, src_i),
+                          dst_off_in_page - cur + 1,
+                          src_off_in_page - cur + 1, cur);
+
+               dst_end -= cur;
+               src_end -= cur;
+               len -= cur;
+       }
+}
+EXPORT_SYMBOL(memmove_extent_buffer);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
new file mode 100644 (file)
index 0000000..06be1fe
--- /dev/null
@@ -0,0 +1,193 @@
+#ifndef __EXTENTIO__
+#define __EXTENTIO__
+
+#include <linux/rbtree.h>
+
+/* bits for the extent state */
+#define EXTENT_DIRTY 1
+#define EXTENT_WRITEBACK (1 << 1)
+#define EXTENT_UPTODATE (1 << 2)
+#define EXTENT_LOCKED (1 << 3)
+#define EXTENT_NEW (1 << 4)
+#define EXTENT_DELALLOC (1 << 5)
+#define EXTENT_DEFRAG (1 << 6)
+#define EXTENT_DEFRAG_DONE (1 << 7)
+#define EXTENT_BUFFER_FILLED (1 << 8)
+#define EXTENT_CSUM (1 << 9)
+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+
+/*
+ * page->private values.  Every page that is controlled by the extent
+ * map has page->private set to one.
+ */
+#define EXTENT_PAGE_PRIVATE 1
+#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
+
+struct extent_io_ops {
+       int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
+       int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
+       int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
+       int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end);
+       void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end);
+};
+
+struct extent_io_tree {
+       struct rb_root state;
+       struct address_space *mapping;
+       u64 dirty_bytes;
+       rwlock_t lock;
+       struct extent_io_ops *ops;
+       spinlock_t lru_lock;
+       struct list_head buffer_lru;
+       int lru_size;
+};
+
+struct extent_state {
+       u64 start;
+       u64 end; /* inclusive */
+       int in_tree;
+       struct rb_node rb_node;
+       wait_queue_head_t wq;
+       atomic_t refs;
+       unsigned long state;
+
+       /* for use by the FS */
+       u64 private;
+
+       struct list_head list;
+};
+
+struct extent_buffer {
+       u64 start;
+       unsigned long len;
+       char *map_token;
+       char *kaddr;
+       unsigned long map_start;
+       unsigned long map_len;
+       struct page *first_page;
+       struct list_head lru;
+       atomic_t refs;
+       int flags;
+};
+
+struct extent_map_tree;
+
+typedef struct extent_map *(get_extent_t)(struct inode *inode,
+                                         struct page *page,
+                                         size_t page_offset,
+                                         u64 start, u64 len,
+                                         int create);
+
+void extent_io_tree_init(struct extent_io_tree *tree,
+                         struct address_space *mapping, gfp_t mask);
+void extent_io_tree_empty_lru(struct extent_io_tree *tree);
+int try_release_extent_mapping(struct extent_map_tree *map,
+                              struct extent_io_tree *tree, struct page *page);
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
+                         get_extent_t *get_extent);
+int __init extent_io_init(void);
+void extent_io_exit(void);
+
+u64 count_range_bits(struct extent_io_tree *tree,
+                    u64 *start, u64 search_end,
+                    u64 max_bytes, unsigned long bits);
+
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  int bits, int filled);
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                     int bits, gfp_t mask);
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                   int bits, gfp_t mask);
+int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+                       gfp_t mask);
+int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+                  gfp_t mask);
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+                    gfp_t mask);
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+                      gfp_t mask);
+int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
+                    gfp_t mask);
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+                         u64 *start_ret, u64 *end_ret, int bits);
+int extent_invalidatepage(struct extent_io_tree *tree,
+                         struct page *page, unsigned long offset);
+int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
+                         get_extent_t *get_extent,
+                         struct writeback_control *wbc);
+int extent_writepages(struct extent_io_tree *tree,
+                     struct address_space *mapping,
+                     get_extent_t *get_extent,
+                     struct writeback_control *wbc);
+int extent_readpages(struct extent_io_tree *tree,
+                    struct address_space *mapping,
+                    struct list_head *pages, unsigned nr_pages,
+                    get_extent_t get_extent);
+int extent_prepare_write(struct extent_io_tree *tree,
+                        struct inode *inode, struct page *page,
+                        unsigned from, unsigned to, get_extent_t *get_extent);
+int extent_commit_write(struct extent_io_tree *tree,
+                       struct inode *inode, struct page *page,
+                       unsigned from, unsigned to);
+sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
+               get_extent_t *get_extent);
+int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
+void set_page_extent_mapped(struct page *page);
+
+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+                                         u64 start, unsigned long len,
+                                         struct page *page0,
+                                         gfp_t mask);
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+                                        u64 start, unsigned long len,
+                                         gfp_t mask);
+void free_extent_buffer(struct extent_buffer *eb);
+int read_extent_buffer_pages(struct extent_io_tree *tree,
+                            struct extent_buffer *eb, u64 start, int wait);
+
+static inline void extent_buffer_get(struct extent_buffer *eb)
+{
+       atomic_inc(&eb->refs);
+}
+
+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+                         unsigned long start,
+                         unsigned long len);
+void read_extent_buffer(struct extent_buffer *eb, void *dst,
+                       unsigned long start,
+                       unsigned long len);
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+                        unsigned long start, unsigned long len);
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+                       unsigned long dst_offset, unsigned long src_offset,
+                       unsigned long len);
+void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len);
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len);
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+                         unsigned long start, unsigned long len);
+int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
+                                   struct extent_buffer *eb);
+int clear_extent_buffer_dirty(struct extent_io_tree *tree,
+                             struct extent_buffer *eb);
+int set_extent_buffer_dirty(struct extent_io_tree *tree,
+                            struct extent_buffer *eb);
+int set_extent_buffer_uptodate(struct extent_io_tree *tree,
+                              struct extent_buffer *eb);
+int extent_buffer_uptodate(struct extent_io_tree *tree,
+                          struct extent_buffer *eb);
+int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
+                     unsigned long min_len, char **token, char **map,
+                     unsigned long *map_start,
+                     unsigned long *map_len, int km);
+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
+                     unsigned long min_len, char **token, char **map,
+                     unsigned long *map_start,
+                     unsigned long *map_len, int km);
+void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
+#endif
index 010a287fbd7132784cd4bb7e02cbef3e1f797f11..268ad8facf6e442d62521fc580e22b2e35801dcc 100644 (file)
@@ -1,17 +1,10 @@
-#include <linux/bitops.h>
-#include <linux/slab.h>
-#include <linux/bio.h>
-#include <linux/mm.h>
+#include <linux/err.h>
 #include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/page-flags.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <linux/blkdev.h>
-#include <linux/swap.h>
 #include <linux/version.h>
-#include <linux/writeback.h>
-#include <linux/pagevec.h>
+#include <linux/hardirq.h>
 #include "extent_map.h"
 
 /* temporary define until extent_map moves out of btrfs */
@@ -21,27 +14,6 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
                                                    unsigned long));
 
 static struct kmem_cache *extent_map_cache;
-static struct kmem_cache *extent_state_cache;
-static struct kmem_cache *extent_buffer_cache;
-
-static LIST_HEAD(buffers);
-static LIST_HEAD(states);
-
-static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
-#define BUFFER_LRU_MAX 64
-
-struct tree_entry {
-       u64 start;
-       u64 end;
-       int in_tree;
-       struct rb_node rb_node;
-};
-
-struct extent_page_data {
-       struct bio *bio;
-       struct extent_map_tree *tree;
-       get_extent_t *get_extent;
-};
 
 int __init extent_map_init(void)
 {
@@ -50,72 +22,23 @@ int __init extent_map_init(void)
                                            NULL);
        if (!extent_map_cache)
                return -ENOMEM;
-       extent_state_cache = btrfs_cache_create("extent_state",
-                                           sizeof(struct extent_state), 0,
-                                           NULL);
-       if (!extent_state_cache)
-               goto free_map_cache;
-       extent_buffer_cache = btrfs_cache_create("extent_buffers",
-                                           sizeof(struct extent_buffer), 0,
-                                           NULL);
-       if (!extent_buffer_cache)
-               goto free_state_cache;
        return 0;
-
-free_state_cache:
-       kmem_cache_destroy(extent_state_cache);
-free_map_cache:
-       kmem_cache_destroy(extent_map_cache);
-       return -ENOMEM;
 }
 
 void extent_map_exit(void)
 {
-       struct extent_state *state;
-
-       while (!list_empty(&states)) {
-               state = list_entry(states.next, struct extent_state, list);
-               printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
-               list_del(&state->list);
-               kmem_cache_free(extent_state_cache, state);
-
-       }
-
        if (extent_map_cache)
                kmem_cache_destroy(extent_map_cache);
-       if (extent_state_cache)
-               kmem_cache_destroy(extent_state_cache);
-       if (extent_buffer_cache)
-               kmem_cache_destroy(extent_buffer_cache);
 }
 
-void extent_map_tree_init(struct extent_map_tree *tree,
-                         struct address_space *mapping, gfp_t mask)
+void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
 {
        tree->map.rb_node = NULL;
-       tree->state.rb_node = NULL;
-       tree->ops = NULL;
-       tree->dirty_bytes = 0;
-       rwlock_init(&tree->lock);
-       spin_lock_init(&tree->lru_lock);
-       tree->mapping = mapping;
-       INIT_LIST_HEAD(&tree->buffer_lru);
-       tree->lru_size = 0;
+       tree->last = NULL;
+       spin_lock_init(&tree->lock);
 }
 EXPORT_SYMBOL(extent_map_tree_init);
 
-void extent_map_tree_empty_lru(struct extent_map_tree *tree)
-{
-       struct extent_buffer *eb;
-       while(!list_empty(&tree->buffer_lru)) {
-               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
-                               lru);
-               list_del_init(&eb->lru);
-               free_extent_buffer(eb);
-       }
-}
-EXPORT_SYMBOL(extent_map_tree_empty_lru);
-
 struct extent_map *alloc_extent_map(gfp_t mask)
 {
        struct extent_map *em;
@@ -123,6 +46,7 @@ struct extent_map *alloc_extent_map(gfp_t mask)
        if (!em || IS_ERR(em))
                return em;
        em->in_tree = 0;
+       em->flags = 0;
        atomic_set(&em->refs, 1);
        return em;
 }
@@ -132,6 +56,7 @@ void free_extent_map(struct extent_map *em)
 {
        if (!em)
                return;
+       WARN_ON(atomic_read(&em->refs) == 0);
        if (atomic_dec_and_test(&em->refs)) {
                WARN_ON(em->in_tree);
                kmem_cache_free(extent_map_cache, em);
@@ -139,64 +64,28 @@ void free_extent_map(struct extent_map *em)
 }
 EXPORT_SYMBOL(free_extent_map);
 
-
-struct extent_state *alloc_extent_state(gfp_t mask)
-{
-       struct extent_state *state;
-       unsigned long flags;
-
-       state = kmem_cache_alloc(extent_state_cache, mask);
-       if (!state || IS_ERR(state))
-               return state;
-       state->state = 0;
-       state->in_tree = 0;
-       state->private = 0;
-
-       spin_lock_irqsave(&state_lock, flags);
-       list_add(&state->list, &states);
-       spin_unlock_irqrestore(&state_lock, flags);
-
-       atomic_set(&state->refs, 1);
-       init_waitqueue_head(&state->wq);
-       return state;
-}
-EXPORT_SYMBOL(alloc_extent_state);
-
-void free_extent_state(struct extent_state *state)
-{
-       unsigned long flags;
-       if (!state)
-               return;
-       if (atomic_dec_and_test(&state->refs)) {
-               WARN_ON(state->in_tree);
-               spin_lock_irqsave(&state_lock, flags);
-               list_del(&state->list);
-               spin_unlock_irqrestore(&state_lock, flags);
-               kmem_cache_free(extent_state_cache, state);
-       }
-}
-EXPORT_SYMBOL(free_extent_state);
-
 static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
                                   struct rb_node *node)
 {
        struct rb_node ** p = &root->rb_node;
        struct rb_node * parent = NULL;
-       struct tree_entry *entry;
+       struct extent_map *entry;
 
        while(*p) {
                parent = *p;
-               entry = rb_entry(parent, struct tree_entry, rb_node);
+               entry = rb_entry(parent, struct extent_map, rb_node);
+
+               WARN_ON(!entry->in_tree);
 
                if (offset < entry->start)
                        p = &(*p)->rb_left;
-               else if (offset > entry->end)
+               else if (offset >= extent_map_end(entry))
                        p = &(*p)->rb_right;
                else
                        return parent;
        }
 
-       entry = rb_entry(node, struct tree_entry, rb_node);
+       entry = rb_entry(node, struct extent_map, rb_node);
        entry->in_tree = 1;
        rb_link_node(node, parent, p);
        rb_insert_color(node, root);
@@ -210,17 +99,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
        struct rb_node * n = root->rb_node;
        struct rb_node *prev = NULL;
        struct rb_node *orig_prev = NULL;
-       struct tree_entry *entry;
-       struct tree_entry *prev_entry = NULL;
+       struct extent_map *entry;
+       struct extent_map *prev_entry = NULL;
 
        while(n) {
-               entry = rb_entry(n, struct tree_entry, rb_node);
+               entry = rb_entry(n, struct extent_map, rb_node);
                prev = n;
                prev_entry = entry;
 
+               WARN_ON(!entry->in_tree);
+
                if (offset < entry->start)
                        n = n->rb_left;
-               else if (offset > entry->end)
+               else if (offset >= extent_map_end(entry))
                        n = n->rb_right;
                else
                        return n;
@@ -228,19 +119,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
 
        if (prev_ret) {
                orig_prev = prev;
-               while(prev && offset > prev_entry->end) {
+               while(prev && offset >= extent_map_end(prev_entry)) {
                        prev = rb_next(prev);
-                       prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+                       prev_entry = rb_entry(prev, struct extent_map, rb_node);
                }
                *prev_ret = prev;
                prev = orig_prev;
        }
 
        if (next_ret) {
-               prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+               prev_entry = rb_entry(prev, struct extent_map, rb_node);
                while(prev && offset < prev_entry->start) {
                        prev = rb_prev(prev);
-                       prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+                       prev_entry = rb_entry(prev, struct extent_map, rb_node);
                }
                *next_ret = prev;
        }
@@ -257,22 +148,26 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
        return ret;
 }
 
-static int tree_delete(struct rb_root *root, u64 offset)
+static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 {
-       struct rb_node *node;
-       struct tree_entry *entry;
-
-       node = __tree_search(root, offset, NULL, NULL);
-       if (!node)
-               return -ENOENT;
-       entry = rb_entry(node, struct tree_entry, rb_node);
-       entry->in_tree = 0;
-       rb_erase(node, root);
+       if (extent_map_end(prev) == next->start &&
+           prev->flags == next->flags &&
+           prev->bdev == next->bdev &&
+           ((next->block_start == EXTENT_MAP_HOLE &&
+             prev->block_start == EXTENT_MAP_HOLE) ||
+            (next->block_start == EXTENT_MAP_INLINE &&
+             prev->block_start == EXTENT_MAP_INLINE) ||
+            (next->block_start == EXTENT_MAP_DELALLOC &&
+             prev->block_start == EXTENT_MAP_DELALLOC) ||
+            (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
+             next->block_start == extent_map_block_end(prev)))) {
+               return 1;
+       }
        return 0;
 }
 
 /*
- * add_extent_mapping tries a simple backward merge with existing
+ * add_extent_mapping tries a simple forward/backward merge with existing
  * mappings.  The extent_map struct passed in will be inserted into
  * the tree directly (no copies made, just a reference taken).
  */
@@ -280,13 +175,12 @@ int add_extent_mapping(struct extent_map_tree *tree,
                       struct extent_map *em)
 {
        int ret = 0;
-       struct extent_map *prev = NULL;
+       struct extent_map *merge = NULL;
        struct rb_node *rb;
 
-       write_lock_irq(&tree->lock);
-       rb = tree_insert(&tree->map, em->end, &em->rb_node);
+       rb = tree_insert(&tree->map, em->start, &em->rb_node);
        if (rb) {
-               prev = rb_entry(rb, struct extent_map, rb_node);
+               merge = rb_entry(rb, struct extent_map, rb_node);
                ret = -EEXIST;
                goto out;
        }
@@ -294,53 +188,60 @@ int add_extent_mapping(struct extent_map_tree *tree,
        if (em->start != 0) {
                rb = rb_prev(&em->rb_node);
                if (rb)
-                       prev = rb_entry(rb, struct extent_map, rb_node);
-               if (prev && prev->end + 1 == em->start &&
-                   ((em->block_start == EXTENT_MAP_HOLE &&
-                     prev->block_start == EXTENT_MAP_HOLE) ||
-                    (em->block_start == EXTENT_MAP_INLINE &&
-                     prev->block_start == EXTENT_MAP_INLINE) ||
-                    (em->block_start == EXTENT_MAP_DELALLOC &&
-                     prev->block_start == EXTENT_MAP_DELALLOC) ||
-                    (em->block_start < EXTENT_MAP_DELALLOC - 1 &&
-                     em->block_start == prev->block_end + 1))) {
-                       em->start = prev->start;
-                       em->block_start = prev->block_start;
-                       rb_erase(&prev->rb_node, &tree->map);
-                       prev->in_tree = 0;
-                       free_extent_map(prev);
+                       merge = rb_entry(rb, struct extent_map, rb_node);
+               if (rb && mergable_maps(merge, em)) {
+                       em->start = merge->start;
+                       em->len += merge->len;
+                       em->block_start = merge->block_start;
+                       merge->in_tree = 0;
+                       rb_erase(&merge->rb_node, &tree->map);
+                       free_extent_map(merge);
                }
         }
+       rb = rb_next(&em->rb_node);
+       if (rb)
+               merge = rb_entry(rb, struct extent_map, rb_node);
+       if (rb && mergable_maps(em, merge)) {
+               em->len += merge->len;
+               rb_erase(&merge->rb_node, &tree->map);
+               merge->in_tree = 0;
+               free_extent_map(merge);
+       }
+       tree->last = em;
 out:
-       write_unlock_irq(&tree->lock);
        return ret;
 }
 EXPORT_SYMBOL(add_extent_mapping);
 
+static u64 range_end(u64 start, u64 len)
+{
+       if (start + len < start)
+               return (u64)-1;
+       return start + len;
+}
+
 /*
  * lookup_extent_mapping returns the first extent_map struct in the
- * tree that intersects the [start, end] (inclusive) range.  There may
+ * tree that intersects the [start, len] range.  There may
  * be additional objects in the tree that intersect, so check the object
  * returned carefully to make sure you don't need additional lookups.
  */
 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
-                                        u64 start, u64 end)
+                                        u64 start, u64 len)
 {
        struct extent_map *em;
        struct rb_node *rb_node;
-       struct rb_node *prev = NULL;
-       struct rb_node *next = NULL;
+       struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found;
 
-       read_lock_irq(&tree->lock);
        rb_node = __tree_search(&tree->map, start, &prev, &next);
        if (!rb_node && prev) {
                em = rb_entry(prev, struct extent_map, rb_node);
-               if (em->start <= end && em->end >= start)
+               if (end > em->start && start < extent_map_end(em))
                        goto found;
        }
        if (!rb_node && next) {
                em = rb_entry(next, struct extent_map, rb_node);
-               if (em->start <= end && em->end >= start)
+               if (end > em->start && start < extent_map_end(em))
                        goto found;
        }
        if (!rb_node) {
@@ -352,14 +253,16 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
                goto out;
        }
        em = rb_entry(rb_node, struct extent_map, rb_node);
-       if (em->end < start || em->start > end) {
-               em = NULL;
-               goto out;
-       }
+       if (end > em->start && start < extent_map_end(em))
+               goto found;
+
+       em = NULL;
+       goto out;
+
 found:
        atomic_inc(&em->refs);
+       tree->last = em;
 out:
-       read_unlock_irq(&tree->lock);
        return em;
 }
 EXPORT_SYMBOL(lookup_extent_mapping);
@@ -370,2866 +273,12 @@ EXPORT_SYMBOL(lookup_extent_mapping);
  */
 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
 {
-       int ret;
+       int ret = 0;
 
-       write_lock_irq(&tree->lock);
-       ret = tree_delete(&tree->map, em->end);
-       write_unlock_irq(&tree->lock);
+       rb_erase(&em->rb_node, &tree->map);
+       em->in_tree = 0;
+       if (tree->last == em)
+               tree->last = NULL;
        return ret;
 }
 EXPORT_SYMBOL(remove_extent_mapping);
-
-/*
- * utility function to look for merge candidates inside a given range.
- * Any extents with matching state are merged together into a single
- * extent in the tree.  Extents with EXTENT_IO in their state field
- * are not merged because the end_io handlers need to be able to do
- * operations on them without sleeping (or doing allocations/splits).
- *
- * This should be called with the tree lock held.
- */
-static int merge_state(struct extent_map_tree *tree,
-                      struct extent_state *state)
-{
-       struct extent_state *other;
-       struct rb_node *other_node;
-
-       if (state->state & EXTENT_IOBITS)
-               return 0;
-
-       other_node = rb_prev(&state->rb_node);
-       if (other_node) {
-               other = rb_entry(other_node, struct extent_state, rb_node);
-               if (other->end == state->start - 1 &&
-                   other->state == state->state) {
-                       state->start = other->start;
-                       other->in_tree = 0;
-                       rb_erase(&other->rb_node, &tree->state);
-                       free_extent_state(other);
-               }
-       }
-       other_node = rb_next(&state->rb_node);
-       if (other_node) {
-               other = rb_entry(other_node, struct extent_state, rb_node);
-               if (other->start == state->end + 1 &&
-                   other->state == state->state) {
-                       other->start = state->start;
-                       state->in_tree = 0;
-                       rb_erase(&state->rb_node, &tree->state);
-                       free_extent_state(state);
-               }
-       }
-       return 0;
-}
-
-/*
- * insert an extent_state struct into the tree.  'bits' are set on the
- * struct before it is inserted.
- *
- * This may return -EEXIST if the extent is already there, in which case the
- * state struct is freed.
- *
- * The tree lock is not taken internally.  This is a utility function and
- * probably isn't what you want to call (see set/clear_extent_bit).
- */
-static int insert_state(struct extent_map_tree *tree,
-                       struct extent_state *state, u64 start, u64 end,
-                       int bits)
-{
-       struct rb_node *node;
-
-       if (end < start) {
-               printk("end < start %Lu %Lu\n", end, start);
-               WARN_ON(1);
-       }
-       if (bits & EXTENT_DIRTY)
-               tree->dirty_bytes += end - start + 1;
-       state->state |= bits;
-       state->start = start;
-       state->end = end;
-       node = tree_insert(&tree->state, end, &state->rb_node);
-       if (node) {
-               struct extent_state *found;
-               found = rb_entry(node, struct extent_state, rb_node);
-               printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
-               free_extent_state(state);
-               return -EEXIST;
-       }
-       merge_state(tree, state);
-       return 0;
-}
-
-/*
- * split a given extent state struct in two, inserting the preallocated
- * struct 'prealloc' as the newly created second half.  'split' indicates an
- * offset inside 'orig' where it should be split.
- *
- * Before calling,
- * the tree has 'orig' at [orig->start, orig->end].  After calling, there
- * are two extent state structs in the tree:
- * prealloc: [orig->start, split - 1]
- * orig: [ split, orig->end ]
- *
- * The tree locks are not taken by this function. They need to be held
- * by the caller.
- */
-static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
-                      struct extent_state *prealloc, u64 split)
-{
-       struct rb_node *node;
-       prealloc->start = orig->start;
-       prealloc->end = split - 1;
-       prealloc->state = orig->state;
-       orig->start = split;
-
-       node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
-       if (node) {
-               struct extent_state *found;
-               found = rb_entry(node, struct extent_state, rb_node);
-               printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
-               free_extent_state(prealloc);
-               return -EEXIST;
-       }
-       return 0;
-}
-
-/*
- * utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1), or
- * forcibly remove the state from the tree (delete == 1).
- *
- * If no bits are set on the state struct after clearing things, the
- * struct is freed and removed from the tree
- */
-static int clear_state_bit(struct extent_map_tree *tree,
-                           struct extent_state *state, int bits, int wake,
-                           int delete)
-{
-       int ret = state->state & bits;
-
-       if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
-               u64 range = state->end - state->start + 1;
-               WARN_ON(range > tree->dirty_bytes);
-               tree->dirty_bytes -= range;
-       }
-       state->state &= ~bits;
-       if (wake)
-               wake_up(&state->wq);
-       if (delete || state->state == 0) {
-               if (state->in_tree) {
-                       rb_erase(&state->rb_node, &tree->state);
-                       state->in_tree = 0;
-                       free_extent_state(state);
-               } else {
-                       WARN_ON(1);
-               }
-       } else {
-               merge_state(tree, state);
-       }
-       return ret;
-}
-
-/*
- * clear some bits on a range in the tree.  This may require splitting
- * or inserting elements in the tree, so the gfp mask is used to
- * indicate which allocations or sleeping are allowed.
- *
- * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
- * the given range from the tree regardless of state (ie for truncate).
- *
- * the range [start, end] is inclusive.
- *
- * This takes the tree lock, and returns < 0 on error, > 0 if any of the
- * bits were already set, or zero if none of the bits were already set.
- */
-int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
-                    int bits, int wake, int delete, gfp_t mask)
-{
-       struct extent_state *state;
-       struct extent_state *prealloc = NULL;
-       struct rb_node *node;
-       unsigned long flags;
-       int err;
-       int set = 0;
-
-again:
-       if (!prealloc && (mask & __GFP_WAIT)) {
-               prealloc = alloc_extent_state(mask);
-               if (!prealloc)
-                       return -ENOMEM;
-       }
-
-       write_lock_irqsave(&tree->lock, flags);
-       /*
-        * this search will find the extents that end after
-        * our range starts
-        */
-       node = tree_search(&tree->state, start);
-       if (!node)
-               goto out;
-       state = rb_entry(node, struct extent_state, rb_node);
-       if (state->start > end)
-               goto out;
-       WARN_ON(state->end < start);
-
-       /*
-        *     | ---- desired range ---- |
-        *  | state | or
-        *  | ------------- state -------------- |
-        *
-        * We need to split the extent we found, and may flip
-        * bits on second half.
-        *
-        * If the extent we found extends past our range, we
-        * just split and search again.  It'll get split again
-        * the next time though.
-        *
-        * If the extent we found is inside our range, we clear
-        * the desired bit on it.
-        */
-
-       if (state->start < start) {
-               err = split_state(tree, state, prealloc, start);
-               BUG_ON(err == -EEXIST);
-               prealloc = NULL;
-               if (err)
-                       goto out;
-               if (state->end <= end) {
-                       start = state->end + 1;
-                       set |= clear_state_bit(tree, state, bits,
-                                       wake, delete);
-               } else {
-                       start = state->start;
-               }
-               goto search_again;
-       }
-       /*
-        * | ---- desired range ---- |
-        *                        | state |
-        * We need to split the extent, and clear the bit
-        * on the first half
-        */
-       if (state->start <= end && state->end > end) {
-               err = split_state(tree, state, prealloc, end + 1);
-               BUG_ON(err == -EEXIST);
-
-               if (wake)
-                       wake_up(&state->wq);
-               set |= clear_state_bit(tree, prealloc, bits,
-                                      wake, delete);
-               prealloc = NULL;
-               goto out;
-       }
-
-       start = state->end + 1;
-       set |= clear_state_bit(tree, state, bits, wake, delete);
-       goto search_again;
-
-out:
-       write_unlock_irqrestore(&tree->lock, flags);
-       if (prealloc)
-               free_extent_state(prealloc);
-
-       return set;
-
-search_again:
-       if (start > end)
-               goto out;
-       write_unlock_irqrestore(&tree->lock, flags);
-       if (mask & __GFP_WAIT)
-               cond_resched();
-       goto again;
-}
-EXPORT_SYMBOL(clear_extent_bit);
-
-static int wait_on_state(struct extent_map_tree *tree,
-                        struct extent_state *state)
-{
-       DEFINE_WAIT(wait);
-       prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
-       read_unlock_irq(&tree->lock);
-       schedule();
-       read_lock_irq(&tree->lock);
-       finish_wait(&state->wq, &wait);
-       return 0;
-}
-
-/*
- * waits for one or more bits to clear on a range in the state tree.
- * The range [start, end] is inclusive.
- * The tree lock is taken by this function
- */
-int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
-{
-       struct extent_state *state;
-       struct rb_node *node;
-
-       read_lock_irq(&tree->lock);
-again:
-       while (1) {
-               /*
-                * this search will find all the extents that end after
-                * our range starts
-                */
-               node = tree_search(&tree->state, start);
-               if (!node)
-                       break;
-
-               state = rb_entry(node, struct extent_state, rb_node);
-
-               if (state->start > end)
-                       goto out;
-
-               if (state->state & bits) {
-                       start = state->start;
-                       atomic_inc(&state->refs);
-                       wait_on_state(tree, state);
-                       free_extent_state(state);
-                       goto again;
-               }
-               start = state->end + 1;
-
-               if (start > end)
-                       break;
-
-               if (need_resched()) {
-                       read_unlock_irq(&tree->lock);
-                       cond_resched();
-                       read_lock_irq(&tree->lock);
-               }
-       }
-out:
-       read_unlock_irq(&tree->lock);
-       return 0;
-}
-EXPORT_SYMBOL(wait_extent_bit);
-
-static void set_state_bits(struct extent_map_tree *tree,
-                          struct extent_state *state,
-                          int bits)
-{
-       if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
-               u64 range = state->end - state->start + 1;
-               tree->dirty_bytes += range;
-       }
-       state->state |= bits;
-}
-
-/*
- * set some bits on a range in the tree.  This may require allocations
- * or sleeping, so the gfp mask is used to indicate what is allowed.
- *
- * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
- * range already has the desired bits set.  The start of the existing
- * range is returned in failed_start in this case.
- *
- * [start, end] is inclusive
- * This takes the tree lock.
- */
-int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
-                  int exclusive, u64 *failed_start, gfp_t mask)
-{
-       struct extent_state *state;
-       struct extent_state *prealloc = NULL;
-       struct rb_node *node;
-       unsigned long flags;
-       int err = 0;
-       int set;
-       u64 last_start;
-       u64 last_end;
-again:
-       if (!prealloc && (mask & __GFP_WAIT)) {
-               prealloc = alloc_extent_state(mask);
-               if (!prealloc)
-                       return -ENOMEM;
-       }
-
-       write_lock_irqsave(&tree->lock, flags);
-       /*
-        * this search will find all the extents that end after
-        * our range starts.
-        */
-       node = tree_search(&tree->state, start);
-       if (!node) {
-               err = insert_state(tree, prealloc, start, end, bits);
-               prealloc = NULL;
-               BUG_ON(err == -EEXIST);
-               goto out;
-       }
-
-       state = rb_entry(node, struct extent_state, rb_node);
-       last_start = state->start;
-       last_end = state->end;
-
-       /*
-        * | ---- desired range ---- |
-        * | state |
-        *
-        * Just lock what we found and keep going
-        */
-       if (state->start == start && state->end <= end) {
-               set = state->state & bits;
-               if (set && exclusive) {
-                       *failed_start = state->start;
-                       err = -EEXIST;
-                       goto out;
-               }
-               set_state_bits(tree, state, bits);
-               start = state->end + 1;
-               merge_state(tree, state);
-               goto search_again;
-       }
-
-       /*
-        *     | ---- desired range ---- |
-        * | state |
-        *   or
-        * | ------------- state -------------- |
-        *
-        * We need to split the extent we found, and may flip bits on
-        * second half.
-        *
-        * If the extent we found extends past our
-        * range, we just split and search again.  It'll get split
-        * again the next time though.
-        *
-        * If the extent we found is inside our range, we set the
-        * desired bit on it.
-        */
-       if (state->start < start) {
-               set = state->state & bits;
-               if (exclusive && set) {
-                       *failed_start = start;
-                       err = -EEXIST;
-                       goto out;
-               }
-               err = split_state(tree, state, prealloc, start);
-               BUG_ON(err == -EEXIST);
-               prealloc = NULL;
-               if (err)
-                       goto out;
-               if (state->end <= end) {
-                       set_state_bits(tree, state, bits);
-                       start = state->end + 1;
-                       merge_state(tree, state);
-               } else {
-                       start = state->start;
-               }
-               goto search_again;
-       }
-       /*
-        * | ---- desired range ---- |
-        *     | state | or               | state |
-        *
-        * There's a hole, we need to insert something in it and
-        * ignore the extent we found.
-        */
-       if (state->start > start) {
-               u64 this_end;
-               if (end < last_start)
-                       this_end = end;
-               else
-                       this_end = last_start -1;
-               err = insert_state(tree, prealloc, start, this_end,
-                                  bits);
-               prealloc = NULL;
-               BUG_ON(err == -EEXIST);
-               if (err)
-                       goto out;
-               start = this_end + 1;
-               goto search_again;
-       }
-       /*
-        * | ---- desired range ---- |
-        *                        | state |
-        * We need to split the extent, and set the bit
-        * on the first half
-        */
-       if (state->start <= end && state->end > end) {
-               set = state->state & bits;
-               if (exclusive && set) {
-                       *failed_start = start;
-                       err = -EEXIST;
-                       goto out;
-               }
-               err = split_state(tree, state, prealloc, end + 1);
-               BUG_ON(err == -EEXIST);
-
-               set_state_bits(tree, prealloc, bits);
-               merge_state(tree, prealloc);
-               prealloc = NULL;
-               goto out;
-       }
-
-       goto search_again;
-
-out:
-       write_unlock_irqrestore(&tree->lock, flags);
-       if (prealloc)
-               free_extent_state(prealloc);
-
-       return err;
-
-search_again:
-       if (start > end)
-               goto out;
-       write_unlock_irqrestore(&tree->lock, flags);
-       if (mask & __GFP_WAIT)
-               cond_resched();
-       goto again;
-}
-EXPORT_SYMBOL(set_extent_bit);
-
-/* wrappers around set/clear extent bit */
-int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
-                    gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
-                             mask);
-}
-EXPORT_SYMBOL(set_extent_dirty);
-
-int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
-                   int bits, gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, bits, 0, NULL,
-                             mask);
-}
-EXPORT_SYMBOL(set_extent_bits);
-
-int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
-                     int bits, gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_bits);
-
-int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
-                    gfp_t mask)
-{
-       return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
-                             mask);
-}
-EXPORT_SYMBOL(set_extent_delalloc);
-
-int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
-                      gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end,
-                               EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_dirty);
-
-int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
-                    gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
-                             mask);
-}
-EXPORT_SYMBOL(set_extent_new);
-
-int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
-                      gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_new);
-
-int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
-                       gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
-                             mask);
-}
-EXPORT_SYMBOL(set_extent_uptodate);
-
-int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
-                         gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_uptodate);
-
-int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
-                        gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
-                             0, NULL, mask);
-}
-EXPORT_SYMBOL(set_extent_writeback);
-
-int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
-                          gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_writeback);
-
-int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
-{
-       return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
-}
-EXPORT_SYMBOL(wait_on_extent_writeback);
-
-/*
- * locks a range in ascending order, waiting for any locked regions
- * it hits on the way.  [start,end] are inclusive, and this will sleep.
- */
-int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
-{
-       int err;
-       u64 failed_start;
-       while (1) {
-               err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
-                                    &failed_start, mask);
-               if (err == -EEXIST && (mask & __GFP_WAIT)) {
-                       wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
-                       start = failed_start;
-               } else {
-                       break;
-               }
-               WARN_ON(start > end);
-       }
-       return err;
-}
-EXPORT_SYMBOL(lock_extent);
-
-int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
-                 gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
-}
-EXPORT_SYMBOL(unlock_extent);
-
-/*
- * helper function to set pages and extents in the tree dirty
- */
-int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
-{
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-       struct page *page;
-
-       while (index <= end_index) {
-               page = find_get_page(tree->mapping, index);
-               BUG_ON(!page);
-               __set_page_dirty_nobuffers(page);
-               page_cache_release(page);
-               index++;
-       }
-       set_extent_dirty(tree, start, end, GFP_NOFS);
-       return 0;
-}
-EXPORT_SYMBOL(set_range_dirty);
-
-/*
- * helper function to set both pages and extents in the tree writeback
- */
-int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
-{
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-       struct page *page;
-
-       while (index <= end_index) {
-               page = find_get_page(tree->mapping, index);
-               BUG_ON(!page);
-               set_page_writeback(page);
-               page_cache_release(page);
-               index++;
-       }
-       set_extent_writeback(tree, start, end, GFP_NOFS);
-       return 0;
-}
-EXPORT_SYMBOL(set_range_writeback);
-
-int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
-                         u64 *start_ret, u64 *end_ret, int bits)
-{
-       struct rb_node *node;
-       struct extent_state *state;
-       int ret = 1;
-
-       read_lock_irq(&tree->lock);
-       /*
-        * this search will find all the extents that end after
-        * our range starts.
-        */
-       node = tree_search(&tree->state, start);
-       if (!node || IS_ERR(node)) {
-               goto out;
-       }
-
-       while(1) {
-               state = rb_entry(node, struct extent_state, rb_node);
-               if (state->end >= start && (state->state & bits)) {
-                       *start_ret = state->start;
-                       *end_ret = state->end;
-                       ret = 0;
-                       break;
-               }
-               node = rb_next(node);
-               if (!node)
-                       break;
-       }
-out:
-       read_unlock_irq(&tree->lock);
-       return ret;
-}
-EXPORT_SYMBOL(find_first_extent_bit);
-
-u64 find_lock_delalloc_range(struct extent_map_tree *tree,
-                            u64 *start, u64 *end, u64 max_bytes)
-{
-       struct rb_node *node;
-       struct extent_state *state;
-       u64 cur_start = *start;
-       u64 found = 0;
-       u64 total_bytes = 0;
-
-       write_lock_irq(&tree->lock);
-       /*
-        * this search will find all the extents that end after
-        * our range starts.
-        */
-search_again:
-       node = tree_search(&tree->state, cur_start);
-       if (!node || IS_ERR(node)) {
-               *end = (u64)-1;
-               goto out;
-       }
-
-       while(1) {
-               state = rb_entry(node, struct extent_state, rb_node);
-               if (found && state->start != cur_start) {
-                       goto out;
-               }
-               if (!(state->state & EXTENT_DELALLOC)) {
-                       if (!found)
-                               *end = state->end;
-                       goto out;
-               }
-               if (!found) {
-                       struct extent_state *prev_state;
-                       struct rb_node *prev_node = node;
-                       while(1) {
-                               prev_node = rb_prev(prev_node);
-                               if (!prev_node)
-                                       break;
-                               prev_state = rb_entry(prev_node,
-                                                     struct extent_state,
-                                                     rb_node);
-                               if (!(prev_state->state & EXTENT_DELALLOC))
-                                       break;
-                               state = prev_state;
-                               node = prev_node;
-                       }
-               }
-               if (state->state & EXTENT_LOCKED) {
-                       DEFINE_WAIT(wait);
-                       atomic_inc(&state->refs);
-                       prepare_to_wait(&state->wq, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-                       write_unlock_irq(&tree->lock);
-                       schedule();
-                       write_lock_irq(&tree->lock);
-                       finish_wait(&state->wq, &wait);
-                       free_extent_state(state);
-                       goto search_again;
-               }
-               state->state |= EXTENT_LOCKED;
-               if (!found)
-                       *start = state->start;
-               found++;
-               *end = state->end;
-               cur_start = state->end + 1;
-               node = rb_next(node);
-               if (!node)
-                       break;
-               total_bytes += state->end - state->start + 1;
-               if (total_bytes >= max_bytes)
-                       break;
-       }
-out:
-       write_unlock_irq(&tree->lock);
-       return found;
-}
-
-u64 count_range_bits(struct extent_map_tree *tree,
-                    u64 *start, u64 search_end, u64 max_bytes,
-                    unsigned long bits)
-{
-       struct rb_node *node;
-       struct extent_state *state;
-       u64 cur_start = *start;
-       u64 total_bytes = 0;
-       int found = 0;
-
-       if (search_end <= cur_start) {
-               printk("search_end %Lu start %Lu\n", search_end, cur_start);
-               WARN_ON(1);
-               return 0;
-       }
-
-       write_lock_irq(&tree->lock);
-       if (cur_start == 0 && bits == EXTENT_DIRTY) {
-               total_bytes = tree->dirty_bytes;
-               goto out;
-       }
-       /*
-        * this search will find all the extents that end after
-        * our range starts.
-        */
-       node = tree_search(&tree->state, cur_start);
-       if (!node || IS_ERR(node)) {
-               goto out;
-       }
-
-       while(1) {
-               state = rb_entry(node, struct extent_state, rb_node);
-               if (state->start > search_end)
-                       break;
-               if (state->end >= cur_start && (state->state & bits)) {
-                       total_bytes += min(search_end, state->end) + 1 -
-                                      max(cur_start, state->start);
-                       if (total_bytes >= max_bytes)
-                               break;
-                       if (!found) {
-                               *start = state->start;
-                               found = 1;
-                       }
-               }
-               node = rb_next(node);
-               if (!node)
-                       break;
-       }
-out:
-       write_unlock_irq(&tree->lock);
-       return total_bytes;
-}
-/*
- * helper function to lock both pages and extents in the tree.
- * pages must be locked first.
- */
-int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
-{
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-       struct page *page;
-       int err;
-
-       while (index <= end_index) {
-               page = grab_cache_page(tree->mapping, index);
-               if (!page) {
-                       err = -ENOMEM;
-                       goto failed;
-               }
-               if (IS_ERR(page)) {
-                       err = PTR_ERR(page);
-                       goto failed;
-               }
-               index++;
-       }
-       lock_extent(tree, start, end, GFP_NOFS);
-       return 0;
-
-failed:
-       /*
-        * we failed above in getting the page at 'index', so we undo here
-        * up to but not including the page at 'index'
-        */
-       end_index = index;
-       index = start >> PAGE_CACHE_SHIFT;
-       while (index < end_index) {
-               page = find_get_page(tree->mapping, index);
-               unlock_page(page);
-               page_cache_release(page);
-               index++;
-       }
-       return err;
-}
-EXPORT_SYMBOL(lock_range);
-
-/*
- * helper function to unlock both pages and extents in the tree.
- */
-int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
-{
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-       struct page *page;
-
-       while (index <= end_index) {
-               page = find_get_page(tree->mapping, index);
-               unlock_page(page);
-               page_cache_release(page);
-               index++;
-       }
-       unlock_extent(tree, start, end, GFP_NOFS);
-       return 0;
-}
-EXPORT_SYMBOL(unlock_range);
-
-int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
-{
-       struct rb_node *node;
-       struct extent_state *state;
-       int ret = 0;
-
-       write_lock_irq(&tree->lock);
-       /*
-        * this search will find all the extents that end after
-        * our range starts.
-        */
-       node = tree_search(&tree->state, start);
-       if (!node || IS_ERR(node)) {
-               ret = -ENOENT;
-               goto out;
-       }
-       state = rb_entry(node, struct extent_state, rb_node);
-       if (state->start != start) {
-               ret = -ENOENT;
-               goto out;
-       }
-       state->private = private;
-out:
-       write_unlock_irq(&tree->lock);
-       return ret;
-}
-
-int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
-{
-       struct rb_node *node;
-       struct extent_state *state;
-       int ret = 0;
-
-       read_lock_irq(&tree->lock);
-       /*
-        * this search will find all the extents that end after
-        * our range starts.
-        */
-       node = tree_search(&tree->state, start);
-       if (!node || IS_ERR(node)) {
-               ret = -ENOENT;
-               goto out;
-       }
-       state = rb_entry(node, struct extent_state, rb_node);
-       if (state->start != start) {
-               ret = -ENOENT;
-               goto out;
-       }
-       *private = state->private;
-out:
-       read_unlock_irq(&tree->lock);
-       return ret;
-}
-
-/*
- * searches a range in the state tree for a given mask.
- * If 'filled' == 1, this returns 1 only if ever extent in the tree
- * has the bits set.  Otherwise, 1 is returned if any bit in the
- * range is found set.
- */
-int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
-                  int bits, int filled)
-{
-       struct extent_state *state = NULL;
-       struct rb_node *node;
-       int bitset = 0;
-
-       read_lock_irq(&tree->lock);
-       node = tree_search(&tree->state, start);
-       while (node && start <= end) {
-               state = rb_entry(node, struct extent_state, rb_node);
-
-               if (filled && state->start > start) {
-                       bitset = 0;
-                       break;
-               }
-
-               if (state->start > end)
-                       break;
-
-               if (state->state & bits) {
-                       bitset = 1;
-                       if (!filled)
-                               break;
-               } else if (filled) {
-                       bitset = 0;
-                       break;
-               }
-               start = state->end + 1;
-               if (start > end)
-                       break;
-               node = rb_next(node);
-               if (!node) {
-                       if (filled)
-                               bitset = 0;
-                       break;
-               }
-       }
-       read_unlock_irq(&tree->lock);
-       return bitset;
-}
-EXPORT_SYMBOL(test_range_bit);
-
-/*
- * helper function to set a given page up to date if all the
- * extents in the tree for that page are up to date
- */
-static int check_page_uptodate(struct extent_map_tree *tree,
-                              struct page *page)
-{
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
-               SetPageUptodate(page);
-       return 0;
-}
-
-/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static int check_page_locked(struct extent_map_tree *tree,
-                            struct page *page)
-{
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
-               unlock_page(page);
-       return 0;
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static int check_page_writeback(struct extent_map_tree *tree,
-                            struct page *page)
-{
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
-               end_page_writeback(page);
-       return 0;
-}
-
-/* lots and lots of room for performance fixes in the end_bio funcs */
-
-/*
- * after a writepage IO is done, we need to:
- * clear the uptodate bits on error
- * clear the writeback bits in the extent tree for this IO
- * end_page_writeback if the page has no more pending IO
- *
- * Scheduling is not allowed, so the extent state tree is expected
- * to have one and only one object corresponding to this IO.
- */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-static void end_bio_extent_writepage(struct bio *bio, int err)
-#else
-static int end_bio_extent_writepage(struct bio *bio,
-                                  unsigned int bytes_done, int err)
-#endif
-{
-       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct extent_map_tree *tree = bio->bi_private;
-       u64 start;
-       u64 end;
-       int whole_page;
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       if (bio->bi_size)
-               return 1;
-#endif
-
-       do {
-               struct page *page = bvec->bv_page;
-               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
-                        bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
-
-               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
-                       whole_page = 1;
-               else
-                       whole_page = 0;
-
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-
-               if (!uptodate) {
-                       clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
-                       ClearPageUptodate(page);
-                       SetPageError(page);
-               }
-               clear_extent_writeback(tree, start, end, GFP_ATOMIC);
-
-               if (whole_page)
-                       end_page_writeback(page);
-               else
-                       check_page_writeback(tree, page);
-               if (tree->ops && tree->ops->writepage_end_io_hook)
-                       tree->ops->writepage_end_io_hook(page, start, end);
-       } while (bvec >= bio->bi_io_vec);
-
-       bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       return 0;
-#endif
-}
-
-/*
- * after a readpage IO is done, we need to:
- * clear the uptodate bits on error
- * set the uptodate bits if things worked
- * set the page up to date if all extents in the tree are uptodate
- * clear the lock bit in the extent tree
- * unlock the page if there are no other extents locked for it
- *
- * Scheduling is not allowed, so the extent state tree is expected
- * to have one and only one object corresponding to this IO.
- */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-static void end_bio_extent_readpage(struct bio *bio, int err)
-#else
-static int end_bio_extent_readpage(struct bio *bio,
-                                  unsigned int bytes_done, int err)
-#endif
-{
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct extent_map_tree *tree = bio->bi_private;
-       u64 start;
-       u64 end;
-       int whole_page;
-       int ret;
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       if (bio->bi_size)
-               return 1;
-#endif
-
-       do {
-               struct page *page = bvec->bv_page;
-               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
-                       bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
-
-               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
-                       whole_page = 1;
-               else
-                       whole_page = 0;
-
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-
-               if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
-                       ret = tree->ops->readpage_end_io_hook(page, start, end);
-                       if (ret)
-                               uptodate = 0;
-               }
-               if (uptodate) {
-                       set_extent_uptodate(tree, start, end, GFP_ATOMIC);
-                       if (whole_page)
-                               SetPageUptodate(page);
-                       else
-                               check_page_uptodate(tree, page);
-               } else {
-                       ClearPageUptodate(page);
-                       SetPageError(page);
-               }
-
-               unlock_extent(tree, start, end, GFP_ATOMIC);
-
-               if (whole_page)
-                       unlock_page(page);
-               else
-                       check_page_locked(tree, page);
-       } while (bvec >= bio->bi_io_vec);
-
-       bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       return 0;
-#endif
-}
-
-/*
- * IO done from prepare_write is pretty simple, we just unlock
- * the structs in the extent tree when done, and set the uptodate bits
- * as appropriate.
- */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-static void end_bio_extent_preparewrite(struct bio *bio, int err)
-#else
-static int end_bio_extent_preparewrite(struct bio *bio,
-                                      unsigned int bytes_done, int err)
-#endif
-{
-       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct extent_map_tree *tree = bio->bi_private;
-       u64 start;
-       u64 end;
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       if (bio->bi_size)
-               return 1;
-#endif
-
-       do {
-               struct page *page = bvec->bv_page;
-               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
-                       bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
-
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-
-               if (uptodate) {
-                       set_extent_uptodate(tree, start, end, GFP_ATOMIC);
-               } else {
-                       ClearPageUptodate(page);
-                       SetPageError(page);
-               }
-
-               unlock_extent(tree, start, end, GFP_ATOMIC);
-
-       } while (bvec >= bio->bi_io_vec);
-
-       bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
-       return 0;
-#endif
-}
-
-static struct bio *
-extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
-                gfp_t gfp_flags)
-{
-       struct bio *bio;
-
-       bio = bio_alloc(gfp_flags, nr_vecs);
-
-       if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-               while (!bio && (nr_vecs /= 2))
-                       bio = bio_alloc(gfp_flags, nr_vecs);
-       }
-
-       if (bio) {
-               bio->bi_bdev = bdev;
-               bio->bi_sector = first_sector;
-       }
-       return bio;
-}
-
-static int submit_one_bio(int rw, struct bio *bio)
-{
-       u64 maxsector;
-       int ret = 0;
-
-       bio_get(bio);
-
-        maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-       if (maxsector < bio->bi_sector) {
-               printk("sector too large max %Lu got %llu\n", maxsector,
-                       (unsigned long long)bio->bi_sector);
-               WARN_ON(1);
-       }
-
-       submit_bio(rw, bio);
-       if (bio_flagged(bio, BIO_EOPNOTSUPP))
-               ret = -EOPNOTSUPP;
-       bio_put(bio);
-       return ret;
-}
-
-static int submit_extent_page(int rw, struct extent_map_tree *tree,
-                             struct page *page, sector_t sector,
-                             size_t size, unsigned long offset,
-                             struct block_device *bdev,
-                             struct bio **bio_ret,
-                             unsigned long max_pages,
-                             bio_end_io_t end_io_func)
-{
-       int ret = 0;
-       struct bio *bio;
-       int nr;
-
-       if (bio_ret && *bio_ret) {
-               bio = *bio_ret;
-               if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
-                   bio_add_page(bio, page, size, offset) < size) {
-                       ret = submit_one_bio(rw, bio);
-                       bio = NULL;
-               } else {
-                       return 0;
-               }
-       }
-       nr = min_t(int, max_pages, bio_get_nr_vecs(bdev));
-       bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
-       if (!bio) {
-               printk("failed to allocate bio nr %d\n", nr);
-       }
-       bio_add_page(bio, page, size, offset);
-       bio->bi_end_io = end_io_func;
-       bio->bi_private = tree;
-       if (bio_ret) {
-               *bio_ret = bio;
-       } else {
-               ret = submit_one_bio(rw, bio);
-       }
-
-       return ret;
-}
-
-void set_page_extent_mapped(struct page *page)
-{
-       if (!PagePrivate(page)) {
-               SetPagePrivate(page);
-               WARN_ON(!page->mapping->a_ops->invalidatepage);
-               set_page_private(page, EXTENT_PAGE_PRIVATE);
-               page_cache_get(page);
-       }
-}
-
-void set_page_extent_head(struct page *page, unsigned long len)
-{
-       set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
-}
-
-/*
- * basic readpage implementation.  Locked extent state structs are inserted
- * into the tree that are removed when the IO is done (by the end_io
- * handlers)
- */
-static int __extent_read_full_page(struct extent_map_tree *tree,
-                                  struct page *page,
-                                  get_extent_t *get_extent,
-                                  struct bio **bio)
-{
-       struct inode *inode = page->mapping->host;
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 page_end = start + PAGE_CACHE_SIZE - 1;
-       u64 end;
-       u64 cur = start;
-       u64 extent_offset;
-       u64 last_byte = i_size_read(inode);
-       u64 block_start;
-       u64 cur_end;
-       sector_t sector;
-       struct extent_map *em;
-       struct block_device *bdev;
-       int ret;
-       int nr = 0;
-       size_t page_offset = 0;
-       size_t iosize;
-       size_t blocksize = inode->i_sb->s_blocksize;
-
-       set_page_extent_mapped(page);
-
-       end = page_end;
-       lock_extent(tree, start, end, GFP_NOFS);
-
-       while (cur <= end) {
-               if (cur >= last_byte) {
-                       char *userpage;
-                       iosize = PAGE_CACHE_SIZE - page_offset;
-                       userpage = kmap_atomic(page, KM_USER0);
-                       memset(userpage + page_offset, 0, iosize);
-                       flush_dcache_page(page);
-                       kunmap_atomic(userpage, KM_USER0);
-                       set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                           GFP_NOFS);
-                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
-                       break;
-               }
-               em = get_extent(inode, page, page_offset, cur, end, 0);
-               if (IS_ERR(em) || !em) {
-                       SetPageError(page);
-                       unlock_extent(tree, cur, end, GFP_NOFS);
-                       break;
-               }
-
-               extent_offset = cur - em->start;
-               BUG_ON(em->end < cur);
-               BUG_ON(end < cur);
-
-               iosize = min(em->end - cur, end - cur) + 1;
-               cur_end = min(em->end, end);
-               iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
-               sector = (em->block_start + extent_offset) >> 9;
-               bdev = em->bdev;
-               block_start = em->block_start;
-               free_extent_map(em);
-               em = NULL;
-
-               /* we've found a hole, just zero and go on */
-               if (block_start == EXTENT_MAP_HOLE) {
-                       char *userpage;
-                       userpage = kmap_atomic(page, KM_USER0);
-                       memset(userpage + page_offset, 0, iosize);
-                       flush_dcache_page(page);
-                       kunmap_atomic(userpage, KM_USER0);
-
-                       set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                           GFP_NOFS);
-                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
-                       cur = cur + iosize;
-                       page_offset += iosize;
-                       continue;
-               }
-               /* the get_extent function already copied into the page */
-               if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
-                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
-                       cur = cur + iosize;
-                       page_offset += iosize;
-                       continue;
-               }
-
-               ret = 0;
-               if (tree->ops && tree->ops->readpage_io_hook) {
-                       ret = tree->ops->readpage_io_hook(page, cur,
-                                                         cur + iosize - 1);
-               }
-               if (!ret) {
-                       unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
-                       nr -= page->index;
-                       ret = submit_extent_page(READ, tree, page,
-                                        sector, iosize, page_offset,
-                                        bdev, bio, nr,
-                                        end_bio_extent_readpage);
-               }
-               if (ret)
-                       SetPageError(page);
-               cur = cur + iosize;
-               page_offset += iosize;
-               nr++;
-       }
-       if (!nr) {
-               if (!PageError(page))
-                       SetPageUptodate(page);
-               unlock_page(page);
-       }
-       return 0;
-}
-
-int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
-                           get_extent_t *get_extent)
-{
-       struct bio *bio = NULL;
-       int ret;
-
-       ret = __extent_read_full_page(tree, page, get_extent, &bio);
-       if (bio)
-               submit_one_bio(READ, bio);
-       return ret;
-}
-EXPORT_SYMBOL(extent_read_full_page);
-
-/*
- * the writepage semantics are similar to regular writepage.  extent
- * records are inserted to lock ranges in the tree, and as dirty areas
- * are found, they are marked writeback.  Then the lock bits are removed
- * and the end_io handler clears the writeback ranges
- */
-static int __extent_writepage(struct page *page, struct writeback_control *wbc,
-                             void *data)
-{
-       struct inode *inode = page->mapping->host;
-       struct extent_page_data *epd = data;
-       struct extent_map_tree *tree = epd->tree;
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 delalloc_start;
-       u64 page_end = start + PAGE_CACHE_SIZE - 1;
-       u64 end;
-       u64 cur = start;
-       u64 extent_offset;
-       u64 last_byte = i_size_read(inode);
-       u64 block_start;
-       u64 iosize;
-       sector_t sector;
-       struct extent_map *em;
-       struct block_device *bdev;
-       int ret;
-       int nr = 0;
-       size_t page_offset = 0;
-       size_t blocksize;
-       loff_t i_size = i_size_read(inode);
-       unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
-       u64 nr_delalloc;
-       u64 delalloc_end;
-
-       WARN_ON(!PageLocked(page));
-       if (page->index > end_index) {
-               clear_extent_dirty(tree, start, page_end, GFP_NOFS);
-               unlock_page(page);
-               return 0;
-       }
-
-       if (page->index == end_index) {
-               char *userpage;
-
-               size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
-
-               userpage = kmap_atomic(page, KM_USER0);
-               memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
-               flush_dcache_page(page);
-               kunmap_atomic(userpage, KM_USER0);
-       }
-
-       set_page_extent_mapped(page);
-
-       delalloc_start = start;
-       delalloc_end = 0;
-       while(delalloc_end < page_end) {
-               nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
-                                                      &delalloc_end,
-                                                      128 * 1024 * 1024);
-               if (nr_delalloc == 0) {
-                       delalloc_start = delalloc_end + 1;
-                       continue;
-               }
-               tree->ops->fill_delalloc(inode, delalloc_start,
-                                        delalloc_end);
-               clear_extent_bit(tree, delalloc_start,
-                                delalloc_end,
-                                EXTENT_LOCKED | EXTENT_DELALLOC,
-                                1, 0, GFP_NOFS);
-               delalloc_start = delalloc_end + 1;
-       }
-       lock_extent(tree, start, page_end, GFP_NOFS);
-
-       end = page_end;
-       if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
-               printk("found delalloc bits after lock_extent\n");
-       }
-
-       if (last_byte <= start) {
-               clear_extent_dirty(tree, start, page_end, GFP_NOFS);
-               goto done;
-       }
-
-       set_extent_uptodate(tree, start, page_end, GFP_NOFS);
-       blocksize = inode->i_sb->s_blocksize;
-
-       while (cur <= end) {
-               if (cur >= last_byte) {
-                       clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
-                       break;
-               }
-               em = epd->get_extent(inode, page, page_offset, cur, end, 1);
-               if (IS_ERR(em) || !em) {
-                       SetPageError(page);
-                       break;
-               }
-
-               extent_offset = cur - em->start;
-               BUG_ON(em->end < cur);
-               BUG_ON(end < cur);
-               iosize = min(em->end - cur, end - cur) + 1;
-               iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
-               sector = (em->block_start + extent_offset) >> 9;
-               bdev = em->bdev;
-               block_start = em->block_start;
-               free_extent_map(em);
-               em = NULL;
-
-               if (block_start == EXTENT_MAP_HOLE ||
-                   block_start == EXTENT_MAP_INLINE) {
-                       clear_extent_dirty(tree, cur,
-                                          cur + iosize - 1, GFP_NOFS);
-                       cur = cur + iosize;
-                       page_offset += iosize;
-                       continue;
-               }
-
-               /* leave this out until we have a page_mkwrite call */
-               if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
-                                  EXTENT_DIRTY, 0)) {
-                       cur = cur + iosize;
-                       page_offset += iosize;
-                       continue;
-               }
-               clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
-               if (tree->ops && tree->ops->writepage_io_hook) {
-                       ret = tree->ops->writepage_io_hook(page, cur,
-                                               cur + iosize - 1);
-               } else {
-                       ret = 0;
-               }
-               if (ret)
-                       SetPageError(page);
-               else {
-                       unsigned long max_nr = end_index + 1;
-                       set_range_writeback(tree, cur, cur + iosize - 1);
-                       if (!PageWriteback(page)) {
-                               printk("warning page %lu not writeback, "
-                                      "cur %llu end %llu\n", page->index,
-                                      (unsigned long long)cur,
-                                      (unsigned long long)end);
-                       }
-
-                       ret = submit_extent_page(WRITE, tree, page, sector,
-                                                iosize, page_offset, bdev,
-                                                &epd->bio, max_nr,
-                                                end_bio_extent_writepage);
-                       if (ret)
-                               SetPageError(page);
-               }
-               cur = cur + iosize;
-               page_offset += iosize;
-               nr++;
-       }
-done:
-       if (nr == 0) {
-               /* make sure the mapping tag for page dirty gets cleared */
-               set_page_writeback(page);
-               end_page_writeback(page);
-       }
-       unlock_extent(tree, start, page_end, GFP_NOFS);
-       unlock_page(page);
-       return 0;
-}
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-
-/* Taken directly from 2.6.23 for 2.6.18 back port */
-typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
-                                void *data);
-
-/**
- * write_cache_pages - walk the list of dirty pages of the given address space
- * and write all of them.
- * @mapping: address space structure to write
- * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- * @writepage: function called for each page
- * @data: data passed to writepage function
- *
- * If a page is already under I/O, write_cache_pages() skips it, even
- * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
- * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
- * and msync() need to guarantee that all the data which was dirty at the time
- * the call was made get new I/O started against them.  If wbc->sync_mode is
- * WB_SYNC_ALL then we were called for data integrity and we must wait for
- * existing IO to complete.
- */
-static int write_cache_pages(struct address_space *mapping,
-                     struct writeback_control *wbc, writepage_t writepage,
-                     void *data)
-{
-       struct backing_dev_info *bdi = mapping->backing_dev_info;
-       int ret = 0;
-       int done = 0;
-       struct pagevec pvec;
-       int nr_pages;
-       pgoff_t index;
-       pgoff_t end;            /* Inclusive */
-       int scanned = 0;
-       int range_whole = 0;
-
-       if (wbc->nonblocking && bdi_write_congested(bdi)) {
-               wbc->encountered_congestion = 1;
-               return 0;
-       }
-
-       pagevec_init(&pvec, 0);
-       if (wbc->range_cyclic) {
-               index = mapping->writeback_index; /* Start from prev offset */
-               end = -1;
-       } else {
-               index = wbc->range_start >> PAGE_CACHE_SHIFT;
-               end = wbc->range_end >> PAGE_CACHE_SHIFT;
-               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-                       range_whole = 1;
-               scanned = 1;
-       }
-retry:
-       while (!done && (index <= end) &&
-              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                                             PAGECACHE_TAG_DIRTY,
-                                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
-               unsigned i;
-
-               scanned = 1;
-               for (i = 0; i < nr_pages; i++) {
-                       struct page *page = pvec.pages[i];
-
-                       /*
-                        * At this point we hold neither mapping->tree_lock nor
-                        * lock on the page itself: the page may be truncated or
-                        * invalidated (changing page->mapping to NULL), or even
-                        * swizzled back from swapper_space to tmpfs file
-                        * mapping
-                        */
-                       lock_page(page);
-
-                       if (unlikely(page->mapping != mapping)) {
-                               unlock_page(page);
-                               continue;
-                       }
-
-                       if (!wbc->range_cyclic && page->index > end) {
-                               done = 1;
-                               unlock_page(page);
-                               continue;
-                       }
-
-                       if (wbc->sync_mode != WB_SYNC_NONE)
-                               wait_on_page_writeback(page);
-
-                       if (PageWriteback(page) ||
-                           !clear_page_dirty_for_io(page)) {
-                               unlock_page(page);
-                               continue;
-                       }
-
-                       ret = (*writepage)(page, wbc, data);
-
-                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
-                               unlock_page(page);
-                               ret = 0;
-                       }
-                       if (ret || (--(wbc->nr_to_write) <= 0))
-                               done = 1;
-                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
-                               wbc->encountered_congestion = 1;
-                               done = 1;
-                       }
-               }
-               pagevec_release(&pvec);
-               cond_resched();
-       }
-       if (!scanned && !done) {
-               /*
-                * We hit the last page and there is more work to be done: wrap
-                * back to the start of the file
-                */
-               scanned = 1;
-               index = 0;
-               goto retry;
-       }
-       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-               mapping->writeback_index = index;
-       return ret;
-}
-#endif
-
-int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
-                         get_extent_t *get_extent,
-                         struct writeback_control *wbc)
-{
-       int ret;
-       struct address_space *mapping = page->mapping;
-       struct extent_page_data epd = {
-               .bio = NULL,
-               .tree = tree,
-               .get_extent = get_extent,
-       };
-       struct writeback_control wbc_writepages = {
-               .bdi            = wbc->bdi,
-               .sync_mode      = WB_SYNC_NONE,
-               .older_than_this = NULL,
-               .nr_to_write    = 64,
-               .range_start    = page_offset(page) + PAGE_CACHE_SIZE,
-               .range_end      = (loff_t)-1,
-       };
-
-
-       ret = __extent_writepage(page, wbc, &epd);
-
-       write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
-       if (epd.bio) {
-               submit_one_bio(WRITE, epd.bio);
-       }
-       return ret;
-}
-EXPORT_SYMBOL(extent_write_full_page);
-
-
-int extent_writepages(struct extent_map_tree *tree,
-                     struct address_space *mapping,
-                     get_extent_t *get_extent,
-                     struct writeback_control *wbc)
-{
-       int ret = 0;
-       struct extent_page_data epd = {
-               .bio = NULL,
-               .tree = tree,
-               .get_extent = get_extent,
-       };
-
-       ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
-       if (epd.bio) {
-               submit_one_bio(WRITE, epd.bio);
-       }
-       return ret;
-}
-EXPORT_SYMBOL(extent_writepages);
-
-int extent_readpages(struct extent_map_tree *tree,
-                    struct address_space *mapping,
-                    struct list_head *pages, unsigned nr_pages,
-                    get_extent_t get_extent)
-{
-       struct bio *bio = NULL;
-       unsigned page_idx;
-       struct pagevec pvec;
-
-       pagevec_init(&pvec, 0);
-       for (page_idx = 0; page_idx < nr_pages; page_idx++) {
-               struct page *page = list_entry(pages->prev, struct page, lru);
-
-               prefetchw(&page->flags);
-               list_del(&page->lru);
-               /*
-                * what we want to do here is call add_to_page_cache_lru,
-                * but that isn't exported, so we reproduce it here
-                */
-               if (!add_to_page_cache(page, mapping,
-                                       page->index, GFP_KERNEL)) {
-
-                       /* open coding of lru_cache_add, also not exported */
-                       page_cache_get(page);
-                       if (!pagevec_add(&pvec, page))
-                               __pagevec_lru_add(&pvec);
-                       __extent_read_full_page(tree, page, get_extent, &bio);
-               }
-               page_cache_release(page);
-       }
-       if (pagevec_count(&pvec))
-               __pagevec_lru_add(&pvec);
-       BUG_ON(!list_empty(pages));
-       if (bio)
-               submit_one_bio(READ, bio);
-       return 0;
-}
-EXPORT_SYMBOL(extent_readpages);
-
-/*
- * basic invalidatepage code, this waits on any locked or writeback
- * ranges corresponding to the page, and then deletes any extent state
- * records from the tree
- */
-int extent_invalidatepage(struct extent_map_tree *tree,
-                         struct page *page, unsigned long offset)
-{
-       u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       size_t blocksize = page->mapping->host->i_sb->s_blocksize;
-
-       start += (offset + blocksize -1) & ~(blocksize - 1);
-       if (start > end)
-               return 0;
-
-       lock_extent(tree, start, end, GFP_NOFS);
-       wait_on_extent_writeback(tree, start, end);
-       clear_extent_bit(tree, start, end,
-                        EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
-                        1, 1, GFP_NOFS);
-       return 0;
-}
-EXPORT_SYMBOL(extent_invalidatepage);
-
-/*
- * simple commit_write call, set_range_dirty is used to mark both
- * the pages and the extent records as dirty
- */
-int extent_commit_write(struct extent_map_tree *tree,
-                       struct inode *inode, struct page *page,
-                       unsigned from, unsigned to)
-{
-       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-
-       set_page_extent_mapped(page);
-       set_page_dirty(page);
-
-       if (pos > inode->i_size) {
-               i_size_write(inode, pos);
-               mark_inode_dirty(inode);
-       }
-       return 0;
-}
-EXPORT_SYMBOL(extent_commit_write);
-
-int extent_prepare_write(struct extent_map_tree *tree,
-                        struct inode *inode, struct page *page,
-                        unsigned from, unsigned to, get_extent_t *get_extent)
-{
-       u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
-       u64 block_start;
-       u64 orig_block_start;
-       u64 block_end;
-       u64 cur_end;
-       struct extent_map *em;
-       unsigned blocksize = 1 << inode->i_blkbits;
-       size_t page_offset = 0;
-       size_t block_off_start;
-       size_t block_off_end;
-       int err = 0;
-       int iocount = 0;
-       int ret = 0;
-       int isnew;
-
-       set_page_extent_mapped(page);
-
-       block_start = (page_start + from) & ~((u64)blocksize - 1);
-       block_end = (page_start + to - 1) | (blocksize - 1);
-       orig_block_start = block_start;
-
-       lock_extent(tree, page_start, page_end, GFP_NOFS);
-       while(block_start <= block_end) {
-               em = get_extent(inode, page, page_offset, block_start,
-                               block_end, 1);
-               if (IS_ERR(em) || !em) {
-                       goto err;
-               }
-               cur_end = min(block_end, em->end);
-               block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
-               block_off_end = block_off_start + blocksize;
-               isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
-
-               if (!PageUptodate(page) && isnew &&
-                   (block_off_end > to || block_off_start < from)) {
-                       void *kaddr;
-
-                       kaddr = kmap_atomic(page, KM_USER0);
-                       if (block_off_end > to)
-                               memset(kaddr + to, 0, block_off_end - to);
-                       if (block_off_start < from)
-                               memset(kaddr + block_off_start, 0,
-                                      from - block_off_start);
-                       flush_dcache_page(page);
-                       kunmap_atomic(kaddr, KM_USER0);
-               }
-               if ((em->block_start != EXTENT_MAP_HOLE &&
-                    em->block_start != EXTENT_MAP_INLINE) &&
-                   !isnew && !PageUptodate(page) &&
-                   (block_off_end > to || block_off_start < from) &&
-                   !test_range_bit(tree, block_start, cur_end,
-                                   EXTENT_UPTODATE, 1)) {
-                       u64 sector;
-                       u64 extent_offset = block_start - em->start;
-                       size_t iosize;
-                       sector = (em->block_start + extent_offset) >> 9;
-                       iosize = (cur_end - block_start + blocksize) &
-                               ~((u64)blocksize - 1);
-                       /*
-                        * we've already got the extent locked, but we
-                        * need to split the state such that our end_bio
-                        * handler can clear the lock.
-                        */
-                       set_extent_bit(tree, block_start,
-                                      block_start + iosize - 1,
-                                      EXTENT_LOCKED, 0, NULL, GFP_NOFS);
-                       ret = submit_extent_page(READ, tree, page,
-                                        sector, iosize, page_offset, em->bdev,
-                                        NULL, 1,
-                                        end_bio_extent_preparewrite);
-                       iocount++;
-                       block_start = block_start + iosize;
-               } else {
-                       set_extent_uptodate(tree, block_start, cur_end,
-                                           GFP_NOFS);
-                       unlock_extent(tree, block_start, cur_end, GFP_NOFS);
-                       block_start = cur_end + 1;
-               }
-               page_offset = block_start & (PAGE_CACHE_SIZE - 1);
-               free_extent_map(em);
-       }
-       if (iocount) {
-               wait_extent_bit(tree, orig_block_start,
-                               block_end, EXTENT_LOCKED);
-       }
-       check_page_uptodate(tree, page);
-err:
-       /* FIXME, zero out newly allocated blocks on error */
-       return err;
-}
-EXPORT_SYMBOL(extent_prepare_write);
-
-/*
- * a helper for releasepage.  As long as there are no locked extents
- * in the range corresponding to the page, both state records and extent
- * map records are removed
- */
-int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
-{
-       struct extent_map *em;
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       u64 orig_start = start;
-       int ret = 1;
-
-       while (start <= end) {
-               em = lookup_extent_mapping(tree, start, end);
-               if (!em || IS_ERR(em))
-                       break;
-               if (!test_range_bit(tree, em->start, em->end,
-                                   EXTENT_LOCKED, 0)) {
-                       remove_extent_mapping(tree, em);
-                       /* once for the rb tree */
-                       free_extent_map(em);
-               }
-               start = em->end + 1;
-               /* once for us */
-               free_extent_map(em);
-       }
-       if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
-               ret = 0;
-       else
-               clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
-                                1, 1, GFP_NOFS);
-       return ret;
-}
-EXPORT_SYMBOL(try_release_extent_mapping);
-
-sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
-               get_extent_t *get_extent)
-{
-       struct inode *inode = mapping->host;
-       u64 start = iblock << inode->i_blkbits;
-       u64 end = start + (1 << inode->i_blkbits) - 1;
-       sector_t sector = 0;
-       struct extent_map *em;
-
-       em = get_extent(inode, NULL, 0, start, end, 0);
-       if (!em || IS_ERR(em))
-               return 0;
-
-       if (em->block_start == EXTENT_MAP_INLINE ||
-           em->block_start == EXTENT_MAP_HOLE)
-               goto out;
-
-       sector = (em->block_start + start - em->start) >> inode->i_blkbits;
-out:
-       free_extent_map(em);
-       return sector;
-}
-
-static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
-{
-       if (list_empty(&eb->lru)) {
-               extent_buffer_get(eb);
-               list_add(&eb->lru, &tree->buffer_lru);
-               tree->lru_size++;
-               if (tree->lru_size >= BUFFER_LRU_MAX) {
-                       struct extent_buffer *rm;
-                       rm = list_entry(tree->buffer_lru.prev,
-                                       struct extent_buffer, lru);
-                       tree->lru_size--;
-                       list_del_init(&rm->lru);
-                       free_extent_buffer(rm);
-               }
-       } else
-               list_move(&eb->lru, &tree->buffer_lru);
-       return 0;
-}
-static struct extent_buffer *find_lru(struct extent_map_tree *tree,
-                                     u64 start, unsigned long len)
-{
-       struct list_head *lru = &tree->buffer_lru;
-       struct list_head *cur = lru->next;
-       struct extent_buffer *eb;
-
-       if (list_empty(lru))
-               return NULL;
-
-       do {
-               eb = list_entry(cur, struct extent_buffer, lru);
-               if (eb->start == start && eb->len == len) {
-                       extent_buffer_get(eb);
-                       return eb;
-               }
-               cur = cur->next;
-       } while (cur != lru);
-       return NULL;
-}
-
-static inline unsigned long num_extent_pages(u64 start, u64 len)
-{
-       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-               (start >> PAGE_CACHE_SHIFT);
-}
-
-static inline struct page *extent_buffer_page(struct extent_buffer *eb,
-                                             unsigned long i)
-{
-       struct page *p;
-       struct address_space *mapping;
-
-       if (i == 0)
-               return eb->first_page;
-       i += eb->start >> PAGE_CACHE_SHIFT;
-       mapping = eb->first_page->mapping;
-       read_lock_irq(&mapping->tree_lock);
-       p = radix_tree_lookup(&mapping->page_tree, i);
-       read_unlock_irq(&mapping->tree_lock);
-       return p;
-}
-
-static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
-                                                  u64 start,
-                                                  unsigned long len,
-                                                  gfp_t mask)
-{
-       struct extent_buffer *eb = NULL;
-
-       spin_lock(&tree->lru_lock);
-       eb = find_lru(tree, start, len);
-       spin_unlock(&tree->lru_lock);
-       if (eb) {
-               return eb;
-       }
-
-       eb = kmem_cache_zalloc(extent_buffer_cache, mask);
-       INIT_LIST_HEAD(&eb->lru);
-       eb->start = start;
-       eb->len = len;
-       atomic_set(&eb->refs, 1);
-
-       return eb;
-}
-
-static void __free_extent_buffer(struct extent_buffer *eb)
-{
-       kmem_cache_free(extent_buffer_cache, eb);
-}
-
-struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
-                                         u64 start, unsigned long len,
-                                         struct page *page0,
-                                         gfp_t mask)
-{
-       unsigned long num_pages = num_extent_pages(start, len);
-       unsigned long i;
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       struct extent_buffer *eb;
-       struct page *p;
-       struct address_space *mapping = tree->mapping;
-       int uptodate = 1;
-
-       eb = __alloc_extent_buffer(tree, start, len, mask);
-       if (!eb || IS_ERR(eb))
-               return NULL;
-
-       if (eb->flags & EXTENT_BUFFER_FILLED)
-               goto lru_add;
-
-       if (page0) {
-               eb->first_page = page0;
-               i = 1;
-               index++;
-               page_cache_get(page0);
-               mark_page_accessed(page0);
-               set_page_extent_mapped(page0);
-               WARN_ON(!PageUptodate(page0));
-               set_page_extent_head(page0, len);
-       } else {
-               i = 0;
-       }
-       for (; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
-               if (!p) {
-                       WARN_ON(1);
-                       goto fail;
-               }
-               set_page_extent_mapped(p);
-               mark_page_accessed(p);
-               if (i == 0) {
-                       eb->first_page = p;
-                       set_page_extent_head(p, len);
-               } else {
-                       set_page_private(p, EXTENT_PAGE_PRIVATE);
-               }
-               if (!PageUptodate(p))
-                       uptodate = 0;
-               unlock_page(p);
-       }
-       if (uptodate)
-               eb->flags |= EXTENT_UPTODATE;
-       eb->flags |= EXTENT_BUFFER_FILLED;
-
-lru_add:
-       spin_lock(&tree->lru_lock);
-       add_lru(tree, eb);
-       spin_unlock(&tree->lru_lock);
-       return eb;
-
-fail:
-       spin_lock(&tree->lru_lock);
-       list_del_init(&eb->lru);
-       spin_unlock(&tree->lru_lock);
-       if (!atomic_dec_and_test(&eb->refs))
-               return NULL;
-       for (index = 1; index < i; index++) {
-               page_cache_release(extent_buffer_page(eb, index));
-       }
-       if (i > 0)
-               page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
-       return NULL;
-}
-EXPORT_SYMBOL(alloc_extent_buffer);
-
-struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
-                                        u64 start, unsigned long len,
-                                         gfp_t mask)
-{
-       unsigned long num_pages = num_extent_pages(start, len);
-       unsigned long i;
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       struct extent_buffer *eb;
-       struct page *p;
-       struct address_space *mapping = tree->mapping;
-       int uptodate = 1;
-
-       eb = __alloc_extent_buffer(tree, start, len, mask);
-       if (!eb || IS_ERR(eb))
-               return NULL;
-
-       if (eb->flags & EXTENT_BUFFER_FILLED)
-               goto lru_add;
-
-       for (i = 0; i < num_pages; i++, index++) {
-               p = find_lock_page(mapping, index);
-               if (!p) {
-                       goto fail;
-               }
-               set_page_extent_mapped(p);
-               mark_page_accessed(p);
-
-               if (i == 0) {
-                       eb->first_page = p;
-                       set_page_extent_head(p, len);
-               } else {
-                       set_page_private(p, EXTENT_PAGE_PRIVATE);
-               }
-
-               if (!PageUptodate(p))
-                       uptodate = 0;
-               unlock_page(p);
-       }
-       if (uptodate)
-               eb->flags |= EXTENT_UPTODATE;
-       eb->flags |= EXTENT_BUFFER_FILLED;
-
-lru_add:
-       spin_lock(&tree->lru_lock);
-       add_lru(tree, eb);
-       spin_unlock(&tree->lru_lock);
-       return eb;
-fail:
-       spin_lock(&tree->lru_lock);
-       list_del_init(&eb->lru);
-       spin_unlock(&tree->lru_lock);
-       if (!atomic_dec_and_test(&eb->refs))
-               return NULL;
-       for (index = 1; index < i; index++) {
-               page_cache_release(extent_buffer_page(eb, index));
-       }
-       if (i > 0)
-               page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
-       return NULL;
-}
-EXPORT_SYMBOL(find_extent_buffer);
-
-void free_extent_buffer(struct extent_buffer *eb)
-{
-       unsigned long i;
-       unsigned long num_pages;
-
-       if (!eb)
-               return;
-
-       if (!atomic_dec_and_test(&eb->refs))
-               return;
-
-       WARN_ON(!list_empty(&eb->lru));
-       num_pages = num_extent_pages(eb->start, eb->len);
-
-       for (i = 1; i < num_pages; i++) {
-               page_cache_release(extent_buffer_page(eb, i));
-       }
-       page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
-}
-EXPORT_SYMBOL(free_extent_buffer);
-
-int clear_extent_buffer_dirty(struct extent_map_tree *tree,
-                             struct extent_buffer *eb)
-{
-       int set;
-       unsigned long i;
-       unsigned long num_pages;
-       struct page *page;
-
-       u64 start = eb->start;
-       u64 end = start + eb->len - 1;
-
-       set = clear_extent_dirty(tree, start, end, GFP_NOFS);
-       num_pages = num_extent_pages(eb->start, eb->len);
-
-       for (i = 0; i < num_pages; i++) {
-               page = extent_buffer_page(eb, i);
-               lock_page(page);
-               if (i == 0)
-                       set_page_extent_head(page, eb->len);
-               else
-                       set_page_private(page, EXTENT_PAGE_PRIVATE);
-
-               /*
-                * if we're on the last page or the first page and the
-                * block isn't aligned on a page boundary, do extra checks
-                * to make sure we don't clean page that is partially dirty
-                */
-               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
-                   ((i == num_pages - 1) &&
-                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
-                       start = (u64)page->index << PAGE_CACHE_SHIFT;
-                       end  = start + PAGE_CACHE_SIZE - 1;
-                       if (test_range_bit(tree, start, end,
-                                          EXTENT_DIRTY, 0)) {
-                               unlock_page(page);
-                               continue;
-                       }
-               }
-               clear_page_dirty_for_io(page);
-               write_lock_irq(&page->mapping->tree_lock);
-               if (!PageDirty(page)) {
-                       radix_tree_tag_clear(&page->mapping->page_tree,
-                                               page_index(page),
-                                               PAGECACHE_TAG_DIRTY);
-               }
-               write_unlock_irq(&page->mapping->tree_lock);
-               unlock_page(page);
-       }
-       return 0;
-}
-EXPORT_SYMBOL(clear_extent_buffer_dirty);
-
-int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
-                                   struct extent_buffer *eb)
-{
-       return wait_on_extent_writeback(tree, eb->start,
-                                       eb->start + eb->len - 1);
-}
-EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
-
-int set_extent_buffer_dirty(struct extent_map_tree *tree,
-                            struct extent_buffer *eb)
-{
-       unsigned long i;
-       unsigned long num_pages;
-
-       num_pages = num_extent_pages(eb->start, eb->len);
-       for (i = 0; i < num_pages; i++) {
-               struct page *page = extent_buffer_page(eb, i);
-               /* writepage may need to do something special for the
-                * first page, we have to make sure page->private is
-                * properly set.  releasepage may drop page->private
-                * on us if the page isn't already dirty.
-                */
-               if (i == 0) {
-                       lock_page(page);
-                       set_page_extent_head(page, eb->len);
-               } else if (PagePrivate(page) &&
-                          page->private != EXTENT_PAGE_PRIVATE) {
-                       lock_page(page);
-                       set_page_extent_mapped(page);
-                       unlock_page(page);
-               }
-               __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
-               if (i == 0)
-                       unlock_page(page);
-       }
-       return set_extent_dirty(tree, eb->start,
-                               eb->start + eb->len - 1, GFP_NOFS);
-}
-EXPORT_SYMBOL(set_extent_buffer_dirty);
-
-int set_extent_buffer_uptodate(struct extent_map_tree *tree,
-                               struct extent_buffer *eb)
-{
-       unsigned long i;
-       struct page *page;
-       unsigned long num_pages;
-
-       num_pages = num_extent_pages(eb->start, eb->len);
-
-       set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                           GFP_NOFS);
-       for (i = 0; i < num_pages; i++) {
-               page = extent_buffer_page(eb, i);
-               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
-                   ((i == num_pages - 1) &&
-                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
-                       check_page_uptodate(tree, page);
-                       continue;
-               }
-               SetPageUptodate(page);
-       }
-       return 0;
-}
-EXPORT_SYMBOL(set_extent_buffer_uptodate);
-
-int extent_buffer_uptodate(struct extent_map_tree *tree,
-                            struct extent_buffer *eb)
-{
-       if (eb->flags & EXTENT_UPTODATE)
-               return 1;
-       return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1);
-}
-EXPORT_SYMBOL(extent_buffer_uptodate);
-
-int read_extent_buffer_pages(struct extent_map_tree *tree,
-                            struct extent_buffer *eb,
-                            u64 start,
-                            int wait)
-{
-       unsigned long i;
-       unsigned long start_i;
-       struct page *page;
-       int err;
-       int ret = 0;
-       unsigned long num_pages;
-
-       if (eb->flags & EXTENT_UPTODATE)
-               return 0;
-
-       if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1)) {
-               return 0;
-       }
-
-       if (start) {
-               WARN_ON(start < eb->start);
-               start_i = (start >> PAGE_CACHE_SHIFT) -
-                       (eb->start >> PAGE_CACHE_SHIFT);
-       } else {
-               start_i = 0;
-       }
-
-       num_pages = num_extent_pages(eb->start, eb->len);
-       for (i = start_i; i < num_pages; i++) {
-               page = extent_buffer_page(eb, i);
-               if (PageUptodate(page)) {
-                       continue;
-               }
-               if (!wait) {
-                       if (TestSetPageLocked(page)) {
-                               continue;
-                       }
-               } else {
-                       lock_page(page);
-               }
-               if (!PageUptodate(page)) {
-                       err = page->mapping->a_ops->readpage(NULL, page);
-                       if (err) {
-                               ret = err;
-                       }
-               } else {
-                       unlock_page(page);
-               }
-       }
-
-       if (ret || !wait) {
-               return ret;
-       }
-
-       for (i = start_i; i < num_pages; i++) {
-               page = extent_buffer_page(eb, i);
-               wait_on_page_locked(page);
-               if (!PageUptodate(page)) {
-                       ret = -EIO;
-               }
-       }
-       if (!ret)
-               eb->flags |= EXTENT_UPTODATE;
-       return ret;
-}
-EXPORT_SYMBOL(read_extent_buffer_pages);
-
-void read_extent_buffer(struct extent_buffer *eb, void *dstv,
-                       unsigned long start,
-                       unsigned long len)
-{
-       size_t cur;
-       size_t offset;
-       struct page *page;
-       char *kaddr;
-       char *dst = (char *)dstv;
-       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-       unsigned long num_pages = num_extent_pages(eb->start, eb->len);
-
-       WARN_ON(start > eb->len);
-       WARN_ON(start + len > eb->start + eb->len);
-
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
-       while(len > 0) {
-               page = extent_buffer_page(eb, i);
-               if (!PageUptodate(page)) {
-                       printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
-                       WARN_ON(1);
-               }
-               WARN_ON(!PageUptodate(page));
-
-               cur = min(len, (PAGE_CACHE_SIZE - offset));
-               kaddr = kmap_atomic(page, KM_USER1);
-               memcpy(dst, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER1);
-
-               dst += cur;
-               len -= cur;
-               offset = 0;
-               i++;
-       }
-}
-EXPORT_SYMBOL(read_extent_buffer);
-
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                              unsigned long min_len, char **token, char **map,
-                              unsigned long *map_start,
-                              unsigned long *map_len, int km)
-{
-       size_t offset = start & (PAGE_CACHE_SIZE - 1);
-       char *kaddr;
-       struct page *p;
-       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-       unsigned long end_i = (start_offset + start + min_len - 1) >>
-               PAGE_CACHE_SHIFT;
-
-       if (i != end_i)
-               return -EINVAL;
-
-       if (i == 0) {
-               offset = start_offset;
-               *map_start = 0;
-       } else {
-               offset = 0;
-               *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
-       }
-       if (start + min_len > eb->len) {
-printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
-               WARN_ON(1);
-       }
-
-       p = extent_buffer_page(eb, i);
-       WARN_ON(!PageUptodate(p));
-       kaddr = kmap_atomic(p, km);
-       *token = kaddr;
-       *map = kaddr + offset;
-       *map_len = PAGE_CACHE_SIZE - offset;
-       return 0;
-}
-EXPORT_SYMBOL(map_private_extent_buffer);
-
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                     unsigned long min_len,
-                     char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km)
-{
-       int err;
-       int save = 0;
-       if (eb->map_token) {
-               unmap_extent_buffer(eb, eb->map_token, km);
-               eb->map_token = NULL;
-               save = 1;
-       }
-       err = map_private_extent_buffer(eb, start, min_len, token, map,
-                                      map_start, map_len, km);
-       if (!err && save) {
-               eb->map_token = *token;
-               eb->kaddr = *map;
-               eb->map_start = *map_start;
-               eb->map_len = *map_len;
-       }
-       return err;
-}
-EXPORT_SYMBOL(map_extent_buffer);
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
-       kunmap_atomic(token, km);
-}
-EXPORT_SYMBOL(unmap_extent_buffer);
-
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
-                         unsigned long start,
-                         unsigned long len)
-{
-       size_t cur;
-       size_t offset;
-       struct page *page;
-       char *kaddr;
-       char *ptr = (char *)ptrv;
-       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-       int ret = 0;
-
-       WARN_ON(start > eb->len);
-       WARN_ON(start + len > eb->start + eb->len);
-
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
-       while(len > 0) {
-               page = extent_buffer_page(eb, i);
-               WARN_ON(!PageUptodate(page));
-
-               cur = min(len, (PAGE_CACHE_SIZE - offset));
-
-               kaddr = kmap_atomic(page, KM_USER0);
-               ret = memcmp(ptr, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
-               if (ret)
-                       break;
-
-               ptr += cur;
-               len -= cur;
-               offset = 0;
-               i++;
-       }
-       return ret;
-}
-EXPORT_SYMBOL(memcmp_extent_buffer);
-
-void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
-                        unsigned long start, unsigned long len)
-{
-       size_t cur;
-       size_t offset;
-       struct page *page;
-       char *kaddr;
-       char *src = (char *)srcv;
-       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-
-       WARN_ON(start > eb->len);
-       WARN_ON(start + len > eb->start + eb->len);
-
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
-       while(len > 0) {
-               page = extent_buffer_page(eb, i);
-               WARN_ON(!PageUptodate(page));
-
-               cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER1);
-               memcpy(kaddr + offset, src, cur);
-               kunmap_atomic(kaddr, KM_USER1);
-
-               src += cur;
-               len -= cur;
-               offset = 0;
-               i++;
-       }
-}
-EXPORT_SYMBOL(write_extent_buffer);
-
-void memset_extent_buffer(struct extent_buffer *eb, char c,
-                         unsigned long start, unsigned long len)
-{
-       size_t cur;
-       size_t offset;
-       struct page *page;
-       char *kaddr;
-       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-
-       WARN_ON(start > eb->len);
-       WARN_ON(start + len > eb->start + eb->len);
-
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
-       while(len > 0) {
-               page = extent_buffer_page(eb, i);
-               WARN_ON(!PageUptodate(page));
-
-               cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER0);
-               memset(kaddr + offset, c, cur);
-               kunmap_atomic(kaddr, KM_USER0);
-
-               len -= cur;
-               offset = 0;
-               i++;
-       }
-}
-EXPORT_SYMBOL(memset_extent_buffer);
-
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
-                       unsigned long dst_offset, unsigned long src_offset,
-                       unsigned long len)
-{
-       u64 dst_len = dst->len;
-       size_t cur;
-       size_t offset;
-       struct page *page;
-       char *kaddr;
-       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
-
-       WARN_ON(src->len != dst_len);
-
-       offset = (start_offset + dst_offset) &
-               ((unsigned long)PAGE_CACHE_SIZE - 1);
-
-       while(len > 0) {
-               page = extent_buffer_page(dst, i);
-               WARN_ON(!PageUptodate(page));
-
-               cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
-
-               kaddr = kmap_atomic(page, KM_USER0);
-               read_extent_buffer(src, kaddr + offset, src_offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
-
-               src_offset += cur;
-               len -= cur;
-               offset = 0;
-               i++;
-       }
-}
-EXPORT_SYMBOL(copy_extent_buffer);
-
-static void move_pages(struct page *dst_page, struct page *src_page,
-                      unsigned long dst_off, unsigned long src_off,
-                      unsigned long len)
-{
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
-       if (dst_page == src_page) {
-               memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
-       } else {
-               char *src_kaddr = kmap_atomic(src_page, KM_USER1);
-               char *p = dst_kaddr + dst_off + len;
-               char *s = src_kaddr + src_off + len;
-
-               while (len--)
-                       *--p = *--s;
-
-               kunmap_atomic(src_kaddr, KM_USER1);
-       }
-       kunmap_atomic(dst_kaddr, KM_USER0);
-}
-
-static void copy_pages(struct page *dst_page, struct page *src_page,
-                      unsigned long dst_off, unsigned long src_off,
-                      unsigned long len)
-{
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
-       char *src_kaddr;
-
-       if (dst_page != src_page)
-               src_kaddr = kmap_atomic(src_page, KM_USER1);
-       else
-               src_kaddr = dst_kaddr;
-
-       memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-       kunmap_atomic(dst_kaddr, KM_USER0);
-       if (dst_page != src_page)
-               kunmap_atomic(src_kaddr, KM_USER1);
-}
-
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-                          unsigned long src_offset, unsigned long len)
-{
-       size_t cur;
-       size_t dst_off_in_page;
-       size_t src_off_in_page;
-       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long dst_i;
-       unsigned long src_i;
-
-       if (src_offset + len > dst->len) {
-               printk("memmove bogus src_offset %lu move len %lu len %lu\n",
-                      src_offset, len, dst->len);
-               BUG_ON(1);
-       }
-       if (dst_offset + len > dst->len) {
-               printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
-                      dst_offset, len, dst->len);
-               BUG_ON(1);
-       }
-
-       while(len > 0) {
-               dst_off_in_page = (start_offset + dst_offset) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
-               src_off_in_page = (start_offset + src_offset) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
-
-               dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
-               src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
-
-               cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
-                                              src_off_in_page));
-               cur = min_t(unsigned long, cur,
-                       (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
-
-               copy_pages(extent_buffer_page(dst, dst_i),
-                          extent_buffer_page(dst, src_i),
-                          dst_off_in_page, src_off_in_page, cur);
-
-               src_offset += cur;
-               dst_offset += cur;
-               len -= cur;
-       }
-}
-EXPORT_SYMBOL(memcpy_extent_buffer);
-
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-                          unsigned long src_offset, unsigned long len)
-{
-       size_t cur;
-       size_t dst_off_in_page;
-       size_t src_off_in_page;
-       unsigned long dst_end = dst_offset + len - 1;
-       unsigned long src_end = src_offset + len - 1;
-       size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
-       unsigned long dst_i;
-       unsigned long src_i;
-
-       if (src_offset + len > dst->len) {
-               printk("memmove bogus src_offset %lu move len %lu len %lu\n",
-                      src_offset, len, dst->len);
-               BUG_ON(1);
-       }
-       if (dst_offset + len > dst->len) {
-               printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
-                      dst_offset, len, dst->len);
-               BUG_ON(1);
-       }
-       if (dst_offset < src_offset) {
-               memcpy_extent_buffer(dst, dst_offset, src_offset, len);
-               return;
-       }
-       while(len > 0) {
-               dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
-               src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
-
-               dst_off_in_page = (start_offset + dst_end) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
-               src_off_in_page = (start_offset + src_end) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
-
-               cur = min_t(unsigned long, len, src_off_in_page + 1);
-               cur = min(cur, dst_off_in_page + 1);
-               move_pages(extent_buffer_page(dst, dst_i),
-                          extent_buffer_page(dst, src_i),
-                          dst_off_in_page - cur + 1,
-                          src_off_in_page - cur + 1, cur);
-
-               dst_end -= cur;
-               src_end -= cur;
-               len -= cur;
-       }
-}
-EXPORT_SYMBOL(memmove_extent_buffer);
index ea60f5447b5bea2d8370f6cb34e7e55b95458e8e..56314217cfc0639aee6d40906df6523a3ac43563 100644 (file)
 
 #include <linux/rbtree.h>
 
+#define EXTENT_MAP_LAST_BYTE (u64)-4
 #define EXTENT_MAP_HOLE (u64)-3
 #define EXTENT_MAP_INLINE (u64)-2
 #define EXTENT_MAP_DELALLOC (u64)-1
 
-/* bits for the extent state */
-#define EXTENT_DIRTY 1
-#define EXTENT_WRITEBACK (1 << 1)
-#define EXTENT_UPTODATE (1 << 2)
-#define EXTENT_LOCKED (1 << 3)
-#define EXTENT_NEW (1 << 4)
-#define EXTENT_DELALLOC (1 << 5)
-#define EXTENT_DEFRAG (1 << 6)
-#define EXTENT_DEFRAG_DONE (1 << 7)
-#define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_CSUM (1 << 9)
-#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
-
-/*
- * page->private values.  Every page that is controlled by the extent
- * map has page->private set to one.
- */
-#define EXTENT_PAGE_PRIVATE 1
-#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
-
-
-struct extent_map_ops {
-       int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
-       int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
-       int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
-       int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end);
-       void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end);
-};
-
-struct extent_map_tree {
-       struct rb_root map;
-       struct rb_root state;
-       struct address_space *mapping;
-       u64 dirty_bytes;
-       rwlock_t lock;
-       struct extent_map_ops *ops;
-       spinlock_t lru_lock;
-       struct list_head buffer_lru;
-       int lru_size;
-};
-
-/* note, this must start with the same fields as fs/extent_map.c:tree_entry */
 struct extent_map {
-       u64 start;
-       u64 end; /* inclusive */
-       int in_tree;
        struct rb_node rb_node;
-       /* block_start and block_end are in bytes */
+
+       /* all of these are in bytes */
+       u64 start;
+       u64 len;
        u64 block_start;
-       u64 block_end; /* inclusive */
+       unsigned long flags;
        struct block_device *bdev;
        atomic_t refs;
-};
-
-/* note, this must start with the same fields as fs/extent_map.c:tree_entry */
-struct extent_state {
-       u64 start;
-       u64 end; /* inclusive */
        int in_tree;
-       struct rb_node rb_node;
-       wait_queue_head_t wq;
-       atomic_t refs;
-       unsigned long state;
-
-       /* for use by the FS */
-       u64 private;
-
-       struct list_head list;
 };
 
-struct extent_buffer {
-       u64 start;
-       unsigned long len;
-       char *map_token;
-       char *kaddr;
-       unsigned long map_start;
-       unsigned long map_len;
-       struct page *first_page;
-       struct list_head lru;
-       atomic_t refs;
-       int flags;
+struct extent_map_tree {
+       struct rb_root map;
+       struct extent_map *last;
+       spinlock_t lock;
 };
 
-typedef struct extent_map *(get_extent_t)(struct inode *inode,
-                                         struct page *page,
-                                         size_t page_offset,
-                                         u64 start, u64 end,
-                                         int create);
+static inline u64 extent_map_end(struct extent_map *em)
+{
+       if (em->start + em->len < em->start)
+               return (u64)-1;
+       return em->start + em->len;
+}
+
+static inline u64 extent_map_block_end(struct extent_map *em)
+{
+       if (em->block_start + em->len < em->block_start)
+               return (u64)-1;
+       return em->block_start + em->len;
+}
 
-void extent_map_tree_init(struct extent_map_tree *tree,
-                         struct address_space *mapping, gfp_t mask);
-void extent_map_tree_empty_lru(struct extent_map_tree *tree);
+void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask);
 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
-                                        u64 start, u64 end);
+                                        u64 start, u64 len);
 int add_extent_mapping(struct extent_map_tree *tree,
                       struct extent_map *em);
 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
-int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page);
-int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask);
-int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask);
+
 struct extent_map *alloc_extent_map(gfp_t mask);
 void free_extent_map(struct extent_map *em);
-int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
-                         get_extent_t *get_extent);
 int __init extent_map_init(void);
 void extent_map_exit(void);
-
-u64 count_range_bits(struct extent_map_tree *tree,
-                    u64 *start, u64 search_end,
-                    u64 max_bytes, unsigned long bits);
-
-int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
-                  int bits, int filled);
-int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
-                     int bits, gfp_t mask);
-int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
-                   int bits, gfp_t mask);
-int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
-                       gfp_t mask);
-int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
-                  gfp_t mask);
-int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
-                    gfp_t mask);
-int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
-                      gfp_t mask);
-int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
-                    gfp_t mask);
-int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
-                         u64 *start_ret, u64 *end_ret, int bits);
-int extent_invalidatepage(struct extent_map_tree *tree,
-                         struct page *page, unsigned long offset);
-int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
-                         get_extent_t *get_extent,
-                         struct writeback_control *wbc);
-int extent_writepages(struct extent_map_tree *tree,
-                     struct address_space *mapping,
-                     get_extent_t *get_extent,
-                     struct writeback_control *wbc);
-int extent_readpages(struct extent_map_tree *tree,
-                    struct address_space *mapping,
-                    struct list_head *pages, unsigned nr_pages,
-                    get_extent_t get_extent);
-int extent_prepare_write(struct extent_map_tree *tree,
-                        struct inode *inode, struct page *page,
-                        unsigned from, unsigned to, get_extent_t *get_extent);
-int extent_commit_write(struct extent_map_tree *tree,
-                       struct inode *inode, struct page *page,
-                       unsigned from, unsigned to);
-sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
-               get_extent_t *get_extent);
-int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end);
-int set_state_private(struct extent_map_tree *tree, u64 start, u64 private);
-int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private);
-void set_page_extent_mapped(struct page *page);
-
-struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
-                                         u64 start, unsigned long len,
-                                         struct page *page0,
-                                         gfp_t mask);
-struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
-                                        u64 start, unsigned long len,
-                                         gfp_t mask);
-void free_extent_buffer(struct extent_buffer *eb);
-int read_extent_buffer_pages(struct extent_map_tree *tree,
-                            struct extent_buffer *eb, u64 start, int wait);
-
-static inline void extent_buffer_get(struct extent_buffer *eb)
-{
-       atomic_inc(&eb->refs);
-}
-
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
-                         unsigned long start,
-                         unsigned long len);
-void read_extent_buffer(struct extent_buffer *eb, void *dst,
-                       unsigned long start,
-                       unsigned long len);
-void write_extent_buffer(struct extent_buffer *eb, const void *src,
-                        unsigned long start, unsigned long len);
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
-                       unsigned long dst_offset, unsigned long src_offset,
-                       unsigned long len);
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-                          unsigned long src_offset, unsigned long len);
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-                          unsigned long src_offset, unsigned long len);
-void memset_extent_buffer(struct extent_buffer *eb, char c,
-                         unsigned long start, unsigned long len);
-int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
-                                   struct extent_buffer *eb);
-int clear_extent_buffer_dirty(struct extent_map_tree *tree,
-                             struct extent_buffer *eb);
-int set_extent_buffer_dirty(struct extent_map_tree *tree,
-                            struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_map_tree *tree,
-                              struct extent_buffer *eb);
-int extent_buffer_uptodate(struct extent_map_tree *tree,
-                          struct extent_buffer *eb);
-int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-                     unsigned long min_len, char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km);
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-                     unsigned long min_len, char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km);
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
 #endif
index 1cd8c908811ef46f894f6abb179999c96a2a3665..c5bb00f92396a7a892b8bde0511d5ea48e558fca 100644 (file)
@@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
        int err = 0;
        int i;
        struct inode *inode = fdentry(file)->d_inode;
-       struct extent_map *em;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        u64 hint_byte;
        u64 num_bytes;
        u64 start_pos;
@@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
        u64 end_pos = pos + write_bytes;
        u64 inline_size;
        loff_t isize = i_size_read(inode);
-       em = alloc_extent_map(GFP_NOFS);
-       if (!em)
-               return -ENOMEM;
-
-       em->bdev = inode->i_sb->s_bdev;
 
        start_pos = pos & ~((u64)root->sectorsize - 1);
        num_bytes = (write_bytes + pos - start_pos +
@@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 
        end_of_last_block = start_pos + num_bytes - 1;
 
-       lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
+       lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
        mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
        if (!trans) {
@@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
        if ((end_of_last_block & 4095) == 0) {
                printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
        }
-       set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
+       set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 
        /* FIXME...EIEIO, ENOSPC and more */
 
@@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
                                                       inode->i_ino,
                                                       last_pos_in_file,
                                                       0, 0, hole_size);
+                       btrfs_drop_extent_cache(inode, last_pos_in_file,
+                                       last_pos_in_file + hole_size -1);
                        btrfs_check_file(root, inode);
                }
                if (err)
@@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
                last_end += PAGE_CACHE_SIZE - 1;
                if (start_pos < isize) {
                        u64 delalloc_start = start_pos;
-                       existing_delalloc = count_range_bits(em_tree,
+                       existing_delalloc = count_range_bits(io_tree,
                                             &delalloc_start,
                                             end_of_last_block, (u64)-1,
                                             EXTENT_DELALLOC);
                }
-               set_extent_delalloc(em_tree, start_pos, end_of_last_block,
+               set_extent_delalloc(io_tree, start_pos, end_of_last_block,
                                 GFP_NOFS);
                spin_lock(&root->fs_info->delalloc_lock);
                root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
@@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
                inline_size -= start_pos;
                err = insert_inline_extent(trans, root, inode, start_pos,
                                           inline_size, pages, 0, num_pages);
+               btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
                BUG_ON(err);
        }
        if (end_pos > isize) {
@@ -356,8 +353,7 @@ failed:
        err = btrfs_end_transaction(trans, root);
 out_unlock:
        mutex_unlock(&root->fs_info->fs_mutex);
-       unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
-       free_extent_map(em);
+       unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
        return err;
 }
 
@@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 
        while(1) {
+               spin_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, end);
-               if (!em)
+               if (!em) {
+                       spin_unlock(&em_tree->lock);
                        break;
+               }
                remove_extent_mapping(em_tree, em);
+               spin_unlock(&em_tree->lock);
+
                /* once for us */
                free_extent_map(em);
                /* once for the tree*/
index 67005480e1391b228b791084ccc8de7a790ee2bb..16d3aef45d189a561fafa15700181b1bde39ae93 100644 (file)
@@ -53,7 +53,7 @@ static struct inode_operations btrfs_file_inode_operations;
 static struct address_space_operations btrfs_aops;
 static struct address_space_operations btrfs_symlink_aops;
 static struct file_operations btrfs_dir_file_operations;
-static struct extent_map_ops btrfs_extent_map_ops;
+static struct extent_io_ops btrfs_extent_io_ops;
 
 static struct kmem_cache *btrfs_inode_cachep;
 struct kmem_cache *btrfs_trans_handle_cachep;
@@ -104,6 +104,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
        u64 num_bytes;
        u64 cur_alloc_size;
        u64 blocksize = root->sectorsize;
+       u64 orig_start = start;
+       u64 orig_num_bytes;
        struct btrfs_key ins;
        int ret;
 
@@ -115,6 +117,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
        num_bytes = max(blocksize,  num_bytes);
        ret = btrfs_drop_extents(trans, root, inode,
                                 start, start + num_bytes, start, &alloc_hint);
+       orig_num_bytes = num_bytes;
 
        if (alloc_hint == EXTENT_MAP_INLINE)
                goto out;
@@ -138,6 +141,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
                alloc_hint = ins.objectid + ins.offset;
                start += cur_alloc_size;
        }
+       btrfs_drop_extent_cache(inode, orig_start,
+                               orig_start + orig_num_bytes - 1);
        btrfs_add_ordered_inode(inode);
 out:
        btrfs_end_transaction(trans, root);
@@ -297,7 +302,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
        int ret = 0;
        struct inode *inode = page->mapping->host;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_csum_item *item;
        struct btrfs_path *path = NULL;
        u32 csum;
@@ -317,7 +322,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
        }
        read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
                           BTRFS_CRC32_SIZE);
-       set_state_private(em_tree, start, csum);
+       set_state_private(io_tree, start, csum);
 out:
        if (path)
                btrfs_free_path(path);
@@ -329,17 +334,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
 {
        size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
        struct inode *inode = page->mapping->host;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        char *kaddr;
        u64 private;
        int ret;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u32 csum = ~(u32)0;
        unsigned long flags;
+
        if (btrfs_test_opt(root, NODATASUM) ||
            btrfs_test_flag(inode, NODATASUM))
                return 0;
-       ret = get_state_private(em_tree, start, &private);
+
+       ret = get_state_private(io_tree, start, &private);
        local_irq_save(flags);
        kaddr = kmap_atomic(page, KM_IRQ0);
        if (ret) {
@@ -428,7 +435,7 @@ void btrfs_read_locked_inode(struct inode *inode)
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
                inode->i_mapping->a_ops = &btrfs_aops;
-               BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
+               BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
                inode->i_fop = &btrfs_file_operations;
                inode->i_op = &btrfs_file_inode_operations;
                break;
@@ -873,7 +880,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page,
                              size_t zero_start)
 {
        char *kaddr;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
@@ -884,12 +891,12 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page,
        WARN_ON(!PageLocked(page));
        set_page_extent_mapped(page);
 
-       lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+       lock_extent(io_tree, page_start, page_end, GFP_NOFS);
        delalloc_start = page_start;
-       existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree,
+       existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree,
                                             &delalloc_start, page_end,
                                             PAGE_CACHE_SIZE, EXTENT_DELALLOC);
-       set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start,
+       set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
                            page_end, GFP_NOFS);
 
        spin_lock(&root->fs_info->delalloc_lock);
@@ -903,7 +910,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page,
                kunmap(page);
        }
        set_page_dirty(page);
-       unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
+       unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
        return ret;
 }
@@ -961,7 +968,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
            attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
                struct btrfs_trans_handle *trans;
                struct btrfs_root *root = BTRFS_I(inode)->root;
-               struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+               struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 
                u64 mask = root->sectorsize - 1;
                u64 pos = (inode->i_size + mask) & ~mask;
@@ -986,7 +993,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
 
                btrfs_truncate_page(inode->i_mapping, inode->i_size);
 
-               lock_extent(em_tree, pos, block_end, GFP_NOFS);
+               lock_extent(io_tree, pos, block_end, GFP_NOFS);
                hole_size = block_end - hole_start;
 
                mutex_lock(&root->fs_info->fs_mutex);
@@ -1001,11 +1008,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
                                                       inode->i_ino,
                                                       hole_start, 0, 0,
                                                       hole_size);
+                       btrfs_drop_extent_cache(inode, hole_start,
+                                               hole_size - 1);
                        btrfs_check_file(root, inode);
                }
                btrfs_end_transaction(trans, root);
                mutex_unlock(&root->fs_info->fs_mutex);
-               unlock_extent(em_tree, pos, block_end, GFP_NOFS);
+               unlock_extent(io_tree, pos, block_end, GFP_NOFS);
                if (err)
                        return err;
        }
@@ -1189,7 +1198,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
        struct btrfs_iget_args *args = p;
        inode->i_ino = args->ino;
        BTRFS_I(inode)->root = args->root;
-       extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+       extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+       extent_io_tree_init(&BTRFS_I(inode)->io_tree,
                             inode->i_mapping, GFP_NOFS);
        return 0;
 }
@@ -1485,7 +1495,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (!inode)
                return ERR_PTR(-ENOMEM);
 
-       extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+       extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+       extent_io_tree_init(&BTRFS_I(inode)->io_tree,
                             inode->i_mapping, GFP_NOFS);
        BTRFS_I(inode)->root = root;
 
@@ -1672,9 +1683,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                inode->i_mapping->a_ops = &btrfs_aops;
                inode->i_fop = &btrfs_file_operations;
                inode->i_op = &btrfs_file_inode_operations;
-               extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+               extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+               extent_io_tree_init(&BTRFS_I(inode)->io_tree,
                                     inode->i_mapping, GFP_NOFS);
-               BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
+               BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
        dir->i_sb->s_dirt = 1;
        btrfs_update_inode_block_group(trans, inode);
@@ -1816,7 +1828,7 @@ out_unlock:
 }
 
 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
-                                   size_t page_offset, u64 start, u64 end,
+                                   size_t page_offset, u64 start, u64 len,
                                    int create)
 {
        int ret;
@@ -1826,7 +1838,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
        u64 extent_end = 0;
        u64 objectid = inode->i_ino;
        u32 found_type;
-       int failed_insert = 0;
        struct btrfs_path *path;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_file_extent_item *item;
@@ -1834,6 +1845,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_trans_handle *trans = NULL;
 
        path = btrfs_alloc_path();
@@ -1841,24 +1853,26 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
        mutex_lock(&root->fs_info->fs_mutex);
 
 again:
-       em = lookup_extent_mapping(em_tree, start, end);
+       spin_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, start, len);
+       spin_unlock(&em_tree->lock);
+
        if (em) {
                if (em->start > start) {
-                       printk("get_extent start %Lu em start %Lu\n",
-                              start, em->start);
+                       printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
+                              start, len, em->start, em->len);
                        WARN_ON(1);
                }
                goto out;
        }
+       em = alloc_extent_map(GFP_NOFS);
        if (!em) {
-               em = alloc_extent_map(GFP_NOFS);
-               if (!em) {
-                       err = -ENOMEM;
-                       goto out;
-               }
-               em->start = EXTENT_MAP_HOLE;
-               em->end = EXTENT_MAP_HOLE;
+               err = -ENOMEM;
+               goto out;
        }
+
+       em->start = EXTENT_MAP_HOLE;
+       em->len = (u64)-1;
        em->bdev = inode->i_sb->s_bdev;
        ret = btrfs_lookup_file_extent(trans, root, path,
                                       objectid, start, trans != NULL);
@@ -1893,28 +1907,25 @@ again:
                if (start < extent_start || start >= extent_end) {
                        em->start = start;
                        if (start < extent_start) {
-                               if (end < extent_start)
+                               if (start + len <= extent_start)
                                        goto not_found;
-                               em->end = extent_end - 1;
+                               em->len = extent_end - extent_start;
                        } else {
-                               em->end = end;
+                               em->len = len;
                        }
                        goto not_found_em;
                }
                bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
                if (bytenr == 0) {
                        em->start = extent_start;
-                       em->end = extent_end - 1;
+                       em->len = extent_end - extent_start;
                        em->block_start = EXTENT_MAP_HOLE;
-                       em->block_end = EXTENT_MAP_HOLE;
                        goto insert;
                }
                bytenr += btrfs_file_extent_offset(leaf, item);
                em->block_start = bytenr;
-               em->block_end = em->block_start +
-                       btrfs_file_extent_num_bytes(leaf, item) - 1;
                em->start = extent_start;
-               em->end = extent_end - 1;
+               em->len = extent_end - extent_start;
                goto insert;
        } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
                unsigned long ptr;
@@ -1925,25 +1936,24 @@ again:
 
                size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
                                                    path->slots[0]));
-               extent_end = (extent_start + size - 1) |
-                       ((u64)root->sectorsize - 1);
+               extent_end = (extent_start + size + root->sectorsize - 1) &
+                       ~((u64)root->sectorsize - 1);
                if (start < extent_start || start >= extent_end) {
                        em->start = start;
                        if (start < extent_start) {
-                               if (end < extent_start)
+                               if (start + len <= extent_start)
                                        goto not_found;
-                               em->end = extent_end;
+                               em->len = extent_end - extent_start;
                        } else {
-                               em->end = end;
+                               em->len = len;
                        }
                        goto not_found_em;
                }
                em->block_start = EXTENT_MAP_INLINE;
-               em->block_end = EXTENT_MAP_INLINE;
 
                if (!page) {
                        em->start = extent_start;
-                       em->end = extent_start + size - 1;
+                       em->len = size;
                        goto out;
                }
 
@@ -1952,8 +1962,7 @@ again:
                copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset,
                                size - extent_offset);
                em->start = extent_start + extent_offset;
-               em->end = (em->start + copy_size -1) |
-                       ((u64)root->sectorsize -1);
+               em->len = copy_size;
                map = kmap(page);
                ptr = btrfs_file_extent_inline_start(item) + extent_offset;
                if (create == 0 && !PageUptodate(page)) {
@@ -1974,7 +1983,8 @@ again:
                        btrfs_mark_buffer_dirty(leaf);
                }
                kunmap(page);
-               set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS);
+               set_extent_uptodate(io_tree, em->start,
+                                   extent_map_end(em) - 1, GFP_NOFS);
                goto insert;
        } else {
                printk("unkknown found_type %d\n", found_type);
@@ -1982,33 +1992,29 @@ again:
        }
 not_found:
        em->start = start;
-       em->end = end;
+       em->len = len;
 not_found_em:
        em->block_start = EXTENT_MAP_HOLE;
-       em->block_end = EXTENT_MAP_HOLE;
 insert:
        btrfs_release_path(root, path);
-       if (em->start > start || em->end < start) {
-               printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end);
+       if (em->start > start || extent_map_end(em) <= start) {
+               printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
                err = -EIO;
                goto out;
        }
+
+       err = 0;
+       spin_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        if (ret == -EEXIST) {
                free_extent_map(em);
-               em = NULL;
-               if (0 && failed_insert == 1) {
-                       btrfs_drop_extent_cache(inode, start, end);
-               }
-               failed_insert++;
-               if (failed_insert > 5) {
-                       printk("failing to insert %Lu %Lu\n", start, end);
+               em = lookup_extent_mapping(em_tree, start, len);
+               if (!em) {
                        err = -EIO;
-                       goto out;
+                       printk("failing to insert %Lu %Lu\n", start, len);
                }
-               goto again;
        }
-       err = 0;
+       spin_unlock(&em_tree->lock);
 out:
        btrfs_free_path(path);
        if (trans) {
@@ -2032,14 +2038,14 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
 
 int btrfs_readpage(struct file *file, struct page *page)
 {
-       struct extent_map_tree *tree;
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
+       struct extent_io_tree *tree;
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
        return extent_read_full_page(tree, page, btrfs_get_extent);
 }
 
 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-       struct extent_map_tree *tree;
+       struct extent_io_tree *tree;
 
 
        if (current->flags & PF_MEMALLOC) {
@@ -2047,15 +2053,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
                unlock_page(page);
                return 0;
        }
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
        return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
 }
 
 static int btrfs_writepages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
-       struct extent_map_tree *tree;
-       tree = &BTRFS_I(mapping->host)->extent_tree;
+       struct extent_io_tree *tree;
+       tree = &BTRFS_I(mapping->host)->io_tree;
        return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
 }
 
@@ -2063,19 +2069,21 @@ static int
 btrfs_readpages(struct file *file, struct address_space *mapping,
                struct list_head *pages, unsigned nr_pages)
 {
-       struct extent_map_tree *tree;
-       tree = &BTRFS_I(mapping->host)->extent_tree;
+       struct extent_io_tree *tree;
+       tree = &BTRFS_I(mapping->host)->io_tree;
        return extent_readpages(tree, mapping, pages, nr_pages,
                                btrfs_get_extent);
 }
 
 static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 {
-       struct extent_map_tree *tree;
+       struct extent_io_tree *tree;
+       struct extent_map_tree *map;
        int ret;
 
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
-       ret = try_release_extent_mapping(tree, page);
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
+       map = &BTRFS_I(page->mapping->host)->extent_tree;
+       ret = try_release_extent_mapping(map, tree, page);
        if (ret == 1) {
                ClearPagePrivate(page);
                set_page_private(page, 0);
@@ -2086,9 +2094,9 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 
 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 {
-       struct extent_map_tree *tree;
+       struct extent_io_tree *tree;
 
-       tree = &BTRFS_I(page->mapping->host)->extent_tree;
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
        extent_invalidatepage(tree, page, offset);
        btrfs_releasepage(page, GFP_NOFS);
 }
@@ -2374,7 +2382,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
 int btrfs_defrag_file(struct file *file) {
        struct inode *inode = fdentry(file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct page *page;
        unsigned long last_index;
        unsigned long ra_index = 0;
@@ -2414,13 +2422,13 @@ int btrfs_defrag_file(struct file *file) {
                page_start = (u64)page->index << PAGE_CACHE_SHIFT;
                page_end = page_start + PAGE_CACHE_SIZE - 1;
 
-               lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+               lock_extent(io_tree, page_start, page_end, GFP_NOFS);
                delalloc_start = page_start;
                existing_delalloc =
-                       count_range_bits(&BTRFS_I(inode)->extent_tree,
+                       count_range_bits(&BTRFS_I(inode)->io_tree,
                                         &delalloc_start, page_end,
                                         PAGE_CACHE_SIZE, EXTENT_DELALLOC);
-               set_extent_delalloc(em_tree, page_start,
+               set_extent_delalloc(io_tree, page_start,
                                    page_end, GFP_NOFS);
 
                spin_lock(&root->fs_info->delalloc_lock);
@@ -2428,7 +2436,7 @@ int btrfs_defrag_file(struct file *file) {
                                                 existing_delalloc;
                spin_unlock(&root->fs_info->delalloc_lock);
 
-               unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
+               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                set_page_dirty(page);
                unlock_page(page);
                page_cache_release(page);
@@ -2842,9 +2850,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                inode->i_mapping->a_ops = &btrfs_aops;
                inode->i_fop = &btrfs_file_operations;
                inode->i_op = &btrfs_file_inode_operations;
-               extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+               extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+               extent_io_tree_init(&BTRFS_I(inode)->io_tree,
                                     inode->i_mapping, GFP_NOFS);
-               BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
+               BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
        dir->i_sb->s_dirt = 1;
        btrfs_update_inode_block_group(trans, inode);
@@ -2934,7 +2943,7 @@ static struct file_operations btrfs_dir_file_operations = {
 #endif
 };
 
-static struct extent_map_ops btrfs_extent_map_ops = {
+static struct extent_io_ops btrfs_extent_io_ops = {
        .fill_delalloc = run_delalloc_range,
        .writepage_io_hook = btrfs_writepage_io_hook,
        .readpage_io_hook = btrfs_readpage_io_hook,
index 8b52c69fda2eaaaaa0947140e17a633ea806e1ed..f8a1016600b182ae31814dba3852fb35885ab69c 100644 (file)
@@ -468,10 +468,15 @@ static int __init init_btrfs_fs(void)
        err = btrfs_init_cachep();
        if (err)
                goto free_transaction_sys;
-       err = extent_map_init();
+
+       err = extent_io_init();
        if (err)
                goto free_cachep;
 
+       err = extent_map_init();
+       if (err)
+               goto free_extent_io;
+
        err = register_filesystem(&btrfs_fs_type);
        if (err)
                goto free_extent_map;
@@ -479,6 +484,8 @@ static int __init init_btrfs_fs(void)
 
 free_extent_map:
        extent_map_exit();
+free_extent_io:
+       extent_io_exit();
 free_cachep:
        btrfs_destroy_cachep();
 free_transaction_sys:
@@ -492,6 +499,7 @@ static void __exit exit_btrfs_fs(void)
        btrfs_exit_transaction_sys();
        btrfs_destroy_cachep();
        extent_map_exit();
+       extent_io_exit();
        unregister_filesystem(&btrfs_fs_type);
        btrfs_exit_sysfs();
 }
index 163c01a244982977ac44745e3366a974888ff72b..b4a1bc62a784f04dadb609969596cbda40e15d0b 100644 (file)
@@ -70,7 +70,7 @@ static int join_transaction(struct btrfs_root *root)
                INIT_LIST_HEAD(&cur_trans->pending_snapshots);
                list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
                btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
-               extent_map_tree_init(&cur_trans->dirty_pages,
+               extent_io_tree_init(&cur_trans->dirty_pages,
                                     root->fs_info->btree_inode->i_mapping,
                                     GFP_NOFS);
        } else {
@@ -153,7 +153,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
        int ret;
        int err;
        int werr = 0;
-       struct extent_map_tree *dirty_pages;
+       struct extent_io_tree *dirty_pages;
        struct page *page;
        struct inode *btree_inode = root->fs_info->btree_inode;
        u64 start;
@@ -610,7 +610,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
        struct list_head dirty_fs_roots;
-       struct extent_map_tree *pinned_copy;
+       struct extent_io_tree *pinned_copy;
        DEFINE_WAIT(wait);
        int ret;
 
@@ -639,7 +639,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        if (!pinned_copy)
                return -ENOMEM;
 
-       extent_map_tree_init(pinned_copy,
+       extent_io_tree_init(pinned_copy,
                             root->fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        trans->transaction->in_commit = 1;
index fd52e9b23922c0e53cb45b49ddb78048b6472d8d..c3172ddb332116bfcf20347948fc547e1a1e29d5 100644 (file)
@@ -29,7 +29,7 @@ struct btrfs_transaction {
        int use_count;
        int commit_done;
        struct list_head list;
-       struct extent_map_tree dirty_pages;
+       struct extent_io_tree dirty_pages;
        unsigned long start_time;
        struct btrfs_ordered_inode_tree ordered_inode_tree;
        wait_queue_head_t writer_wait;