Btrfs: Search data ordered extents first for checksums on read
[linux-2.6-block.git] / fs / btrfs / extent_io.c
index 36a943e51808e1a18b7839de2a520257f914539c..e3547a992d5c80f5965b5413b0fb6b06ee1368a3 100644 (file)
@@ -26,6 +26,7 @@ static struct kmem_cache *extent_buffer_cache;
 
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
+static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
 
 #define BUFFER_LRU_MAX 64
 
@@ -64,15 +65,22 @@ free_state_cache:
 void extent_io_exit(void)
 {
        struct extent_state *state;
+       struct extent_buffer *eb;
 
        while (!list_empty(&states)) {
-               state = list_entry(states.next, struct extent_state, list);
+               state = list_entry(states.next, struct extent_state, leak_list);
                printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
-               list_del(&state->list);
+               list_del(&state->leak_list);
                kmem_cache_free(extent_state_cache, state);
 
        }
 
+       while (!list_empty(&buffers)) {
+               eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+               printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs));
+               list_del(&eb->leak_list);
+               kmem_cache_free(extent_buffer_cache, eb);
+       }
        if (extent_state_cache)
                kmem_cache_destroy(extent_state_cache);
        if (extent_buffer_cache)
@@ -83,39 +91,29 @@ void extent_io_tree_init(struct extent_io_tree *tree,
                          struct address_space *mapping, gfp_t mask)
 {
        tree->state.rb_node = NULL;
+       tree->buffer.rb_node = NULL;
        tree->ops = NULL;
        tree->dirty_bytes = 0;
        spin_lock_init(&tree->lock);
-       spin_lock_init(&tree->lru_lock);
+       spin_lock_init(&tree->buffer_lock);
        tree->mapping = mapping;
-       INIT_LIST_HEAD(&tree->buffer_lru);
-       tree->lru_size = 0;
-       tree->last = NULL;
 }
 EXPORT_SYMBOL(extent_io_tree_init);
 
-void extent_io_tree_empty_lru(struct extent_io_tree *tree)
-{
-       struct extent_buffer *eb;
-       while(!list_empty(&tree->buffer_lru)) {
-               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
-                               lru);
-               list_del_init(&eb->lru);
-               free_extent_buffer(eb);
-       }
-}
-EXPORT_SYMBOL(extent_io_tree_empty_lru);
-
 struct extent_state *alloc_extent_state(gfp_t mask)
 {
        struct extent_state *state;
+       unsigned long flags;
 
        state = kmem_cache_alloc(extent_state_cache, mask);
-       if (!state || IS_ERR(state))
+       if (!state)
                return state;
        state->state = 0;
        state->private = 0;
        state->tree = NULL;
+       spin_lock_irqsave(&leak_lock, flags);
+       list_add(&state->leak_list, &states);
+       spin_unlock_irqrestore(&leak_lock, flags);
 
        atomic_set(&state->refs, 1);
        init_waitqueue_head(&state->wq);
@@ -128,7 +126,11 @@ void free_extent_state(struct extent_state *state)
        if (!state)
                return;
        if (atomic_dec_and_test(&state->refs)) {
+               unsigned long flags;
                WARN_ON(state->tree);
+               spin_lock_irqsave(&leak_lock, flags);
+               list_del(&state->leak_list);
+               spin_unlock_irqrestore(&leak_lock, flags);
                kmem_cache_free(extent_state_cache, state);
        }
 }
@@ -170,12 +172,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
        struct tree_entry *entry;
        struct tree_entry *prev_entry = NULL;
 
-       if (tree->last) {
-               struct extent_state *state;
-               state = tree->last;
-               if (state->start <= offset && offset <= state->end)
-                       return &tree->last->rb_node;
-       }
        while(n) {
                entry = rb_entry(n, struct tree_entry, rb_node);
                prev = n;
@@ -186,7 +182,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
                else if (offset > entry->end)
                        n = n->rb_right;
                else {
-                       tree->last = rb_entry(n, struct extent_state, rb_node);
                        return n;
                }
        }
@@ -220,15 +215,55 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
 
        ret = __etree_search(tree, offset, &prev, NULL);
        if (!ret) {
-               if (prev) {
-                       tree->last = rb_entry(prev, struct extent_state,
-                                             rb_node);
-               }
                return prev;
        }
        return ret;
 }
 
+static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
+                                         u64 offset, struct rb_node *node)
+{
+       struct rb_root *root = &tree->buffer;
+       struct rb_node ** p = &root->rb_node;
+       struct rb_node * parent = NULL;
+       struct extent_buffer *eb;
+
+       while(*p) {
+               parent = *p;
+               eb = rb_entry(parent, struct extent_buffer, rb_node);
+
+               if (offset < eb->start)
+                       p = &(*p)->rb_left;
+               else if (offset > eb->start)
+                       p = &(*p)->rb_right;
+               else
+                       return eb;
+       }
+
+       rb_link_node(node, parent, p);
+       rb_insert_color(node, root);
+       return NULL;
+}
+
+static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
+                                          u64 offset)
+{
+       struct rb_root *root = &tree->buffer;
+       struct rb_node * n = root->rb_node;
+       struct extent_buffer *eb;
+
+       while(n) {
+               eb = rb_entry(n, struct extent_buffer, rb_node);
+               if (offset < eb->start)
+                       n = n->rb_left;
+               else if (offset > eb->start)
+                       n = n->rb_right;
+               else
+                       return eb;
+       }
+       return NULL;
+}
+
 /*
  * utility function to look for merge candidates inside a given range.
  * Any extents with matching state are merged together into a single
@@ -254,8 +289,6 @@ static int merge_state(struct extent_io_tree *tree,
                    other->state == state->state) {
                        state->start = other->start;
                        other->tree = NULL;
-                       if (tree->last == other)
-                               tree->last = NULL;
                        rb_erase(&other->rb_node, &tree->state);
                        free_extent_state(other);
                }
@@ -267,8 +300,6 @@ static int merge_state(struct extent_io_tree *tree,
                    other->state == state->state) {
                        other->start = state->start;
                        state->tree = NULL;
-                       if (tree->last == state)
-                               tree->last = NULL;
                        rb_erase(&state->rb_node, &tree->state);
                        free_extent_state(state);
                }
@@ -331,7 +362,6 @@ static int insert_state(struct extent_io_tree *tree,
                return -EEXIST;
        }
        state->tree = tree;
-       tree->last = state;
        merge_state(tree, state);
        return 0;
 }
@@ -397,8 +427,6 @@ static int clear_state_bit(struct extent_io_tree *tree,
        if (delete || state->state == 0) {
                if (state->tree) {
                        clear_state_cb(tree, state, state->state);
-                       if (tree->last == state)
-                               tree->last = NULL;
                        rb_erase(&state->rb_node, &tree->state);
                        state->tree = NULL;
                        free_extent_state(state);
@@ -776,6 +804,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 }
 EXPORT_SYMBOL(set_extent_dirty);
 
+int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+                      gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+}
+EXPORT_SYMBOL(set_extent_ordered);
+
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                    int bits, gfp_t mask)
 {
@@ -795,8 +830,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask)
 {
        return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
-                             mask);
+                             EXTENT_DELALLOC | EXTENT_DIRTY,
+                             0, NULL, mask);
 }
 EXPORT_SYMBOL(set_extent_delalloc);
 
@@ -808,6 +843,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 }
 EXPORT_SYMBOL(clear_extent_dirty);
 
+int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+                        gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_ordered);
+
 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask)
 {
@@ -940,7 +982,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
         * our range starts.
         */
        node = tree_search(tree, start);
-       if (!node || IS_ERR(node)) {
+       if (!node) {
                goto out;
        }
 
@@ -962,6 +1004,35 @@ out:
 }
 EXPORT_SYMBOL(find_first_extent_bit);
 
+struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
+                                                u64 start, int bits)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(tree, start);
+       if (!node) {
+               goto out;
+       }
+
+       while(1) {
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (state->end >= start && (state->state & bits)) {
+                       return state;
+               }
+               node = rb_next(node);
+               if (!node)
+                       break;
+       }
+out:
+       return NULL;
+}
+EXPORT_SYMBOL(find_first_extent_bit_state);
+
 u64 find_lock_delalloc_range(struct extent_io_tree *tree,
                             u64 *start, u64 *end, u64 max_bytes)
 {
@@ -978,8 +1049,9 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
         */
 search_again:
        node = tree_search(tree, cur_start);
-       if (!node || IS_ERR(node)) {
-               *end = (u64)-1;
+       if (!node) {
+               if (!found)
+                       *end = (u64)-1;
                goto out;
        }
 
@@ -1066,7 +1138,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
         * our range starts.
         */
        node = tree_search(tree, cur_start);
-       if (!node || IS_ERR(node)) {
+       if (!node) {
                goto out;
        }
 
@@ -1167,7 +1239,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
         * our range starts.
         */
        node = tree_search(tree, start);
-       if (!node || IS_ERR(node)) {
+       if (!node) {
                ret = -ENOENT;
                goto out;
        }
@@ -1194,7 +1266,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
         * our range starts.
         */
        node = tree_search(tree, start);
-       if (!node || IS_ERR(node)) {
+       if (!node) {
                ret = -ENOENT;
                goto out;
        }
@@ -1319,7 +1391,7 @@ static int end_bio_extent_writepage(struct bio *bio,
                                   unsigned int bytes_done, int err)
 #endif
 {
-       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = err == 0;
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
        struct extent_state *state = bio->bi_private;
        struct extent_io_tree *tree = state->tree;
@@ -1328,6 +1400,7 @@ static int end_bio_extent_writepage(struct bio *bio,
        u64 end;
        u64 cur;
        int whole_page;
+       int ret;
        unsigned long flags;
 
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
@@ -1347,6 +1420,23 @@ static int end_bio_extent_writepage(struct bio *bio,
 
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
+               if (tree->ops && tree->ops->writepage_end_io_hook) {
+                       ret = tree->ops->writepage_end_io_hook(page, start,
+                                                      end, state, uptodate);
+                       if (ret)
+                               uptodate = 0;
+               }
+
+               if (!uptodate && tree->ops &&
+                   tree->ops->writepage_io_failed_hook) {
+                       ret = tree->ops->writepage_io_failed_hook(bio, page,
+                                                        start, end, state);
+                       if (ret == 0) {
+                               state = NULL;
+                               uptodate = (err == 0);
+                               continue;
+                       }
+               }
 
                if (!uptodate) {
                        clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
@@ -1354,11 +1444,6 @@ static int end_bio_extent_writepage(struct bio *bio,
                        SetPageError(page);
                }
 
-               if (tree->ops && tree->ops->writepage_end_io_hook) {
-                       tree->ops->writepage_end_io_hook(page, start, end,
-                                                        state);
-               }
-
                /*
                 * bios can get merged in funny ways, and so we need to
                 * be careful with the state variable.  We know the
@@ -1488,6 +1573,17 @@ static int end_bio_extent_readpage(struct bio *bio,
                        if (ret)
                                uptodate = 0;
                }
+               if (!uptodate && tree->ops &&
+                   tree->ops->readpage_io_failed_hook) {
+                       ret = tree->ops->readpage_io_failed_hook(bio, page,
+                                                        start, end, state);
+                       if (ret == 0) {
+                               state = NULL;
+                               uptodate =
+                                       test_bit(BIO_UPTODATE, &bio->bi_flags);
+                               continue;
+                       }
+               }
 
                spin_lock_irqsave(&tree->lock, flags);
                if (!state || state->end != end) {
@@ -1502,8 +1598,9 @@ static int end_bio_extent_readpage(struct bio *bio,
                        }
                        if (!state) {
                                spin_unlock_irqrestore(&tree->lock, flags);
-                               set_extent_uptodate(tree, start, end,
-                                                   GFP_ATOMIC);
+                               if (uptodate)
+                                       set_extent_uptodate(tree, start, end,
+                                                           GFP_ATOMIC);
                                unlock_extent(tree, start, end, GFP_ATOMIC);
                                goto next_io;
                        }
@@ -1521,8 +1618,10 @@ static int end_bio_extent_readpage(struct bio *bio,
                        } else {
                                state = NULL;
                        }
-                       set_state_cb(tree, clear, EXTENT_UPTODATE);
-                       clear->state |= EXTENT_UPTODATE;
+                       if (uptodate) {
+                               set_state_cb(tree, clear, EXTENT_UPTODATE);
+                               clear->state |= EXTENT_UPTODATE;
+                       }
                        clear_state_bit(tree, clear, EXTENT_LOCKED,
                                        1, 0);
                        if (cur == start)
@@ -1633,15 +1732,15 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
        }
 
        if (bio) {
+               bio->bi_size = 0;
                bio->bi_bdev = bdev;
                bio->bi_sector = first_sector;
        }
        return bio;
 }
 
-static int submit_one_bio(int rw, struct bio *bio)
+static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
 {
-       u64 maxsector;
        int ret = 0;
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
        struct page *page = bvec->bv_page;
@@ -1669,14 +1768,11 @@ static int submit_one_bio(int rw, struct bio *bio)
 
        bio_get(bio);
 
-        maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-       if (maxsector < bio->bi_sector) {
-               printk("sector too large max %Lu got %llu\n", maxsector,
-                       (unsigned long long)bio->bi_sector);
-               WARN_ON(1);
-       }
-
-       submit_bio(rw, bio);
+       if (tree->ops && tree->ops->submit_bio_hook)
+               tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+                                          mirror_num);
+       else
+               submit_bio(rw, bio);
        if (bio_flagged(bio, BIO_EOPNOTSUPP))
                ret = -EOPNOTSUPP;
        bio_put(bio);
@@ -1689,7 +1785,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                              struct block_device *bdev,
                              struct bio **bio_ret,
                              unsigned long max_pages,
-                             bio_end_io_t end_io_func)
+                             bio_end_io_t end_io_func,
+                             int mirror_num)
 {
        int ret = 0;
        struct bio *bio;
@@ -1698,14 +1795,16 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
        if (bio_ret && *bio_ret) {
                bio = *bio_ret;
                if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
+                   (tree->ops && tree->ops->merge_bio_hook &&
+                    tree->ops->merge_bio_hook(page, offset, size, bio)) ||
                    bio_add_page(bio, page, size, offset) < size) {
-                       ret = submit_one_bio(rw, bio);
+                       ret = submit_one_bio(rw, bio, mirror_num);
                        bio = NULL;
                } else {
                        return 0;
                }
        }
-       nr = min_t(int, max_pages, bio_get_nr_vecs(bdev));
+       nr = bio_get_nr_vecs(bdev);
        bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
        if (!bio) {
                printk("failed to allocate bio nr %d\n", nr);
@@ -1719,7 +1818,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
        if (bio_ret) {
                *bio_ret = bio;
        } else {
-               ret = submit_one_bio(rw, bio);
+               ret = submit_one_bio(rw, bio, mirror_num);
        }
 
        return ret;
@@ -1729,9 +1828,8 @@ void set_page_extent_mapped(struct page *page)
 {
        if (!PagePrivate(page)) {
                SetPagePrivate(page);
-               WARN_ON(!page->mapping->a_ops->invalidatepage);
-               set_page_private(page, EXTENT_PAGE_PRIVATE);
                page_cache_get(page);
+               set_page_private(page, EXTENT_PAGE_PRIVATE);
        }
 }
 
@@ -1748,7 +1846,7 @@ void set_page_extent_head(struct page *page, unsigned long len)
 static int __extent_read_full_page(struct extent_io_tree *tree,
                                   struct page *page,
                                   get_extent_t *get_extent,
-                                  struct bio **bio)
+                                  struct bio **bio, int mirror_num)
 {
        struct inode *inode = page->mapping->host;
        u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
@@ -1793,9 +1891,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        unlock_extent(tree, cur, end, GFP_NOFS);
                        break;
                }
-
                extent_offset = cur - em->start;
+               if (extent_map_end(em) <= cur) {
+printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
+               }
                BUG_ON(extent_map_end(em) <= cur);
+               if (end < cur) {
+printk("2bad mapping end %Lu cur %Lu\n", end, cur);
+               }
                BUG_ON(end < cur);
 
                iosize = min(extent_map_end(em) - cur, end - cur + 1);
@@ -1846,18 +1949,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                                                          cur + iosize - 1);
                }
                if (!ret) {
-                       unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
-                       nr -= page->index;
+                       unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+                       pnr -= page->index;
                        ret = submit_extent_page(READ, tree, page,
                                         sector, iosize, page_offset,
-                                        bdev, bio, nr,
-                                        end_bio_extent_readpage);
+                                        bdev, bio, pnr,
+                                        end_bio_extent_readpage, mirror_num);
+                       nr++;
                }
                if (ret)
                        SetPageError(page);
                cur = cur + iosize;
                page_offset += iosize;
-               nr++;
        }
        if (!nr) {
                if (!PageError(page))
@@ -1873,9 +1976,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        struct bio *bio = NULL;
        int ret;
 
-       ret = __extent_read_full_page(tree, page, get_extent, &bio);
+       ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
        if (bio)
-               submit_one_bio(READ, bio);
+               submit_one_bio(READ, bio, 0);
        return ret;
 }
 EXPORT_SYMBOL(extent_read_full_page);
@@ -1901,12 +2004,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 last_byte = i_size_read(inode);
        u64 block_start;
        u64 iosize;
+       u64 unlock_start;
        sector_t sector;
        struct extent_map *em;
        struct block_device *bdev;
        int ret;
        int nr = 0;
-       size_t page_offset = 0;
+       size_t pg_offset = 0;
        size_t blocksize;
        loff_t i_size = i_size_read(inode);
        unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1914,8 +2018,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 delalloc_end;
 
        WARN_ON(!PageLocked(page));
-       if (page->index > end_index) {
-               clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+       pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+       if (page->index > end_index ||
+          (page->index == end_index && !pg_offset)) {
+               page->mapping->a_ops->invalidatepage(page, 0);
                unlock_page(page);
                return 0;
        }
@@ -1923,13 +2029,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        if (page->index == end_index) {
                char *userpage;
 
-               size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
-
                userpage = kmap_atomic(page, KM_USER0);
-               memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
-               flush_dcache_page(page);
+               memset(userpage + pg_offset, 0,
+                      PAGE_CACHE_SIZE - pg_offset);
                kunmap_atomic(userpage, KM_USER0);
+               flush_dcache_page(page);
        }
+       pg_offset = 0;
 
        set_page_extent_mapped(page);
 
@@ -1952,6 +2058,17 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                delalloc_start = delalloc_end + 1;
        }
        lock_extent(tree, start, page_end, GFP_NOFS);
+       unlock_start = start;
+
+       if (tree->ops && tree->ops->writepage_start_hook) {
+               ret = tree->ops->writepage_start_hook(page, start, page_end);
+               if (ret == -EAGAIN) {
+                       unlock_extent(tree, start, page_end, GFP_NOFS);
+                       redirty_page_for_writepage(wbc, page);
+                       unlock_page(page);
+                       return 0;
+               }
+       }
 
        end = page_end;
        if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
@@ -1960,6 +2077,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
        if (last_byte <= start) {
                clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+               unlock_extent(tree, start, page_end, GFP_NOFS);
+               if (tree->ops && tree->ops->writepage_end_io_hook)
+                       tree->ops->writepage_end_io_hook(page, start,
+                                                        page_end, NULL, 1);
+               unlock_start = page_end + 1;
                goto done;
        }
 
@@ -1969,9 +2091,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        while (cur <= end) {
                if (cur >= last_byte) {
                        clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
+                       unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
+                       if (tree->ops && tree->ops->writepage_end_io_hook)
+                               tree->ops->writepage_end_io_hook(page, cur,
+                                                        page_end, NULL, 1);
+                       unlock_start = page_end + 1;
                        break;
                }
-               em = epd->get_extent(inode, page, page_offset, cur,
+               em = epd->get_extent(inode, page, pg_offset, cur,
                                     end - cur + 1, 1);
                if (IS_ERR(em) || !em) {
                        SetPageError(page);
@@ -1993,8 +2120,17 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                    block_start == EXTENT_MAP_INLINE) {
                        clear_extent_dirty(tree, cur,
                                           cur + iosize - 1, GFP_NOFS);
+
+                       unlock_extent(tree, unlock_start, cur + iosize -1,
+                                     GFP_NOFS);
+
+                       if (tree->ops && tree->ops->writepage_end_io_hook)
+                               tree->ops->writepage_end_io_hook(page, cur,
+                                                        cur + iosize - 1,
+                                                        NULL, 1);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
+                       unlock_start = cur;
                        continue;
                }
 
@@ -2002,7 +2138,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
                                   EXTENT_DIRTY, 0)) {
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
                clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -2012,10 +2148,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                } else {
                        ret = 0;
                }
-               if (ret)
+               if (ret) {
                        SetPageError(page);
-               else {
+               else {
                        unsigned long max_nr = end_index + 1;
+
                        set_range_writeback(tree, cur, cur + iosize - 1);
                        if (!PageWriteback(page)) {
                                printk("warning page %lu not writeback, "
@@ -2025,14 +2162,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        }
 
                        ret = submit_extent_page(WRITE, tree, page, sector,
-                                                iosize, page_offset, bdev,
+                                                iosize, pg_offset, bdev,
                                                 &epd->bio, max_nr,
-                                                end_bio_extent_writepage);
+                                                end_bio_extent_writepage, 0);
                        if (ret)
                                SetPageError(page);
                }
                cur = cur + iosize;
-               page_offset += iosize;
+               pg_offset += iosize;
                nr++;
        }
 done:
@@ -2041,13 +2178,13 @@ done:
                set_page_writeback(page);
                end_page_writeback(page);
        }
-       unlock_extent(tree, start, page_end, GFP_NOFS);
+       if (unlock_start <= page_end)
+               unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
        unlock_page(page);
        return 0;
 }
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
 /* Taken directly from 2.6.23 for 2.6.18 back port */
 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
                                 void *data);
@@ -2194,7 +2331,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 
        write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
        if (epd.bio) {
-               submit_one_bio(WRITE, epd.bio);
+               submit_one_bio(WRITE, epd.bio, 0);
        }
        return ret;
 }
@@ -2215,7 +2352,7 @@ int extent_writepages(struct extent_io_tree *tree,
 
        ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
        if (epd.bio) {
-               submit_one_bio(WRITE, epd.bio);
+               submit_one_bio(WRITE, epd.bio, 0);
        }
        return ret;
 }
@@ -2247,7 +2384,8 @@ int extent_readpages(struct extent_io_tree *tree,
                        page_cache_get(page);
                        if (!pagevec_add(&pvec, page))
                                __pagevec_lru_add(&pvec);
-                       __extent_read_full_page(tree, page, get_extent, &bio);
+                       __extent_read_full_page(tree, page, get_extent,
+                                               &bio, 0);
                }
                page_cache_release(page);
        }
@@ -2255,7 +2393,7 @@ int extent_readpages(struct extent_io_tree *tree,
                __pagevec_lru_add(&pvec);
        BUG_ON(!list_empty(pages));
        if (bio)
-               submit_one_bio(READ, bio);
+               submit_one_bio(READ, bio, 0);
        return 0;
 }
 EXPORT_SYMBOL(extent_readpages);
@@ -2380,7 +2518,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
                        ret = submit_extent_page(READ, tree, page,
                                         sector, iosize, page_offset, em->bdev,
                                         NULL, 1,
-                                        end_bio_extent_preparewrite);
+                                        end_bio_extent_preparewrite, 0);
                        iocount++;
                        block_start = block_start + iosize;
                } else {
@@ -2403,6 +2541,32 @@ err:
 }
 EXPORT_SYMBOL(extent_prepare_write);
 
+/*
+ * a helper for releasepage, this tests for areas of the page that
+ * are locked or under IO and drops the related state bits if it is safe
+ * to drop the page.
+ */
+int try_release_extent_state(struct extent_map_tree *map,
+                            struct extent_io_tree *tree, struct page *page,
+                            gfp_t mask)
+{
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 end = start + PAGE_CACHE_SIZE - 1;
+       int ret = 1;
+
+       if (test_range_bit(tree, start, end,
+                          EXTENT_IOBITS | EXTENT_ORDERED, 0))
+               ret = 0;
+       else {
+               if ((mask & GFP_NOFS) == GFP_NOFS)
+                       mask = GFP_NOFS;
+               clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
+                                1, 1, mask);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(try_release_extent_state);
+
 /*
  * a helper for releasepage.  As long as there are no locked extents
  * in the range corresponding to the page, both state records and extent
@@ -2415,19 +2579,20 @@ int try_release_extent_mapping(struct extent_map_tree *map,
        struct extent_map *em;
        u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
-       u64 orig_start = start;
-       int ret = 1;
 
        if ((mask & __GFP_WAIT) &&
            page->mapping->host->i_size > 16 * 1024 * 1024) {
+               u64 len;
                while (start <= end) {
+                       len = end - start + 1;
                        spin_lock(&map->lock);
-                       em = lookup_extent_mapping(map, start, end);
+                       em = lookup_extent_mapping(map, start, len);
                        if (!em || IS_ERR(em)) {
                                spin_unlock(&map->lock);
                                break;
                        }
-                       if (em->start != start) {
+                       if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
+                           em->start != start) {
                                spin_unlock(&map->lock);
                                free_extent_map(em);
                                break;
@@ -2446,15 +2611,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                        free_extent_map(em);
                }
        }
-       if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0))
-               ret = 0;
-       else {
-               if ((mask & GFP_NOFS) == GFP_NOFS)
-                       mask = GFP_NOFS;
-               clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
-                                1, 1, mask);
-       }
-       return ret;
+       return try_release_extent_state(map, tree, page, mask);
 }
 EXPORT_SYMBOL(try_release_extent_mapping);
 
@@ -2480,51 +2637,6 @@ out:
        return sector;
 }
 
-static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
-{
-       if (list_empty(&eb->lru)) {
-               extent_buffer_get(eb);
-               list_add(&eb->lru, &tree->buffer_lru);
-               tree->lru_size++;
-               if (tree->lru_size >= BUFFER_LRU_MAX) {
-                       struct extent_buffer *rm;
-                       rm = list_entry(tree->buffer_lru.prev,
-                                       struct extent_buffer, lru);
-                       tree->lru_size--;
-                       list_del_init(&rm->lru);
-                       free_extent_buffer(rm);
-               }
-       } else
-               list_move(&eb->lru, &tree->buffer_lru);
-       return 0;
-}
-static struct extent_buffer *find_lru(struct extent_io_tree *tree,
-                                     u64 start, unsigned long len)
-{
-       struct list_head *lru = &tree->buffer_lru;
-       struct list_head *cur = lru->next;
-       struct extent_buffer *eb;
-
-       if (list_empty(lru))
-               return NULL;
-
-       do {
-               eb = list_entry(cur, struct extent_buffer, lru);
-               if (eb->start == start && eb->len == len) {
-                       extent_buffer_get(eb);
-                       return eb;
-               }
-               cur = cur->next;
-       } while (cur != lru);
-       return NULL;
-}
-
-static inline unsigned long num_extent_pages(u64 start, u64 len)
-{
-       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-               (start >> PAGE_CACHE_SHIFT);
-}
-
 static inline struct page *extent_buffer_page(struct extent_buffer *eb,
                                              unsigned long i)
 {
@@ -2541,24 +2653,27 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb,
        return p;
 }
 
+static inline unsigned long num_extent_pages(u64 start, u64 len)
+{
+       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+               (start >> PAGE_CACHE_SHIFT);
+}
+
 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
                                                   u64 start,
                                                   unsigned long len,
                                                   gfp_t mask)
 {
        struct extent_buffer *eb = NULL;
-
-       spin_lock(&tree->lru_lock);
-       eb = find_lru(tree, start, len);
-       spin_unlock(&tree->lru_lock);
-       if (eb) {
-               return eb;
-       }
+       unsigned long flags;
 
        eb = kmem_cache_zalloc(extent_buffer_cache, mask);
-       INIT_LIST_HEAD(&eb->lru);
        eb->start = start;
        eb->len = len;
+       mutex_init(&eb->mutex);
+       spin_lock_irqsave(&leak_lock, flags);
+       list_add(&eb->leak_list, &buffers);
+       spin_unlock_irqrestore(&leak_lock, flags);
        atomic_set(&eb->refs, 1);
 
        return eb;
@@ -2566,6 +2681,10 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
 
 static void __free_extent_buffer(struct extent_buffer *eb)
 {
+       unsigned long flags;
+       spin_lock_irqsave(&leak_lock, flags);
+       list_del(&eb->leak_list);
+       spin_unlock_irqrestore(&leak_lock, flags);
        kmem_cache_free(extent_buffer_cache, eb);
 }
 
@@ -2578,17 +2697,24 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        unsigned long i;
        unsigned long index = start >> PAGE_CACHE_SHIFT;
        struct extent_buffer *eb;
+       struct extent_buffer *exists = NULL;
        struct page *p;
        struct address_space *mapping = tree->mapping;
        int uptodate = 1;
 
+       spin_lock(&tree->buffer_lock);
+       eb = buffer_search(tree, start);
+       if (eb) {
+               atomic_inc(&eb->refs);
+               spin_unlock(&tree->buffer_lock);
+               return eb;
+       }
+       spin_unlock(&tree->buffer_lock);
+
        eb = __alloc_extent_buffer(tree, start, len, mask);
-       if (!eb || IS_ERR(eb))
+       if (!eb)
                return NULL;
 
-       if (eb->flags & EXTENT_BUFFER_FILLED)
-               goto lru_add;
-
        if (page0) {
                eb->first_page = page0;
                i = 1;
@@ -2596,8 +2722,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                page_cache_get(page0);
                mark_page_accessed(page0);
                set_page_extent_mapped(page0);
-               WARN_ON(!PageUptodate(page0));
                set_page_extent_head(page0, len);
+               uptodate = PageUptodate(page0);
        } else {
                i = 0;
        }
@@ -2605,7 +2731,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
                if (!p) {
                        WARN_ON(1);
-                       goto fail;
+                       goto free_eb;
                }
                set_page_extent_mapped(p);
                mark_page_accessed(p);
@@ -2623,25 +2749,28 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                eb->flags |= EXTENT_UPTODATE;
        eb->flags |= EXTENT_BUFFER_FILLED;
 
-lru_add:
-       spin_lock(&tree->lru_lock);
-       add_lru(tree, eb);
-       spin_unlock(&tree->lru_lock);
+       spin_lock(&tree->buffer_lock);
+       exists = buffer_tree_insert(tree, start, &eb->rb_node);
+       if (exists) {
+               /* add one reference for the caller */
+               atomic_inc(&exists->refs);
+               spin_unlock(&tree->buffer_lock);
+               goto free_eb;
+       }
+       spin_unlock(&tree->buffer_lock);
+
+       /* add one reference for the tree */
+       atomic_inc(&eb->refs);
        return eb;
 
-fail:
-       spin_lock(&tree->lru_lock);
-       list_del_init(&eb->lru);
-       spin_unlock(&tree->lru_lock);
+free_eb:
        if (!atomic_dec_and_test(&eb->refs))
-               return NULL;
-       for (index = 1; index < i; index++) {
+               return exists;
+       for (index = 1; index < i; index++)
                page_cache_release(extent_buffer_page(eb, index));
-       }
-       if (i > 0)
-               page_cache_release(extent_buffer_page(eb, 0));
+       page_cache_release(extent_buffer_page(eb, 0));
        __free_extent_buffer(eb);
-       return NULL;
+       return exists;
 }
 EXPORT_SYMBOL(alloc_extent_buffer);
 
@@ -2649,84 +2778,27 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
                                         u64 start, unsigned long len,
                                          gfp_t mask)
 {
-       unsigned long num_pages = num_extent_pages(start, len);
-       unsigned long i;
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
        struct extent_buffer *eb;
-       struct page *p;
-       struct address_space *mapping = tree->mapping;
-       int uptodate = 1;
-
-       eb = __alloc_extent_buffer(tree, start, len, mask);
-       if (!eb || IS_ERR(eb))
-               return NULL;
 
-       if (eb->flags & EXTENT_BUFFER_FILLED)
-               goto lru_add;
+       spin_lock(&tree->buffer_lock);
+       eb = buffer_search(tree, start);
+       if (eb)
+               atomic_inc(&eb->refs);
+       spin_unlock(&tree->buffer_lock);
 
-       for (i = 0; i < num_pages; i++, index++) {
-               p = find_lock_page(mapping, index);
-               if (!p) {
-                       goto fail;
-               }
-               set_page_extent_mapped(p);
-               mark_page_accessed(p);
-
-               if (i == 0) {
-                       eb->first_page = p;
-                       set_page_extent_head(p, len);
-               } else {
-                       set_page_private(p, EXTENT_PAGE_PRIVATE);
-               }
-
-               if (!PageUptodate(p))
-                       uptodate = 0;
-               unlock_page(p);
-       }
-       if (uptodate)
-               eb->flags |= EXTENT_UPTODATE;
-       eb->flags |= EXTENT_BUFFER_FILLED;
-
-lru_add:
-       spin_lock(&tree->lru_lock);
-       add_lru(tree, eb);
-       spin_unlock(&tree->lru_lock);
        return eb;
-fail:
-       spin_lock(&tree->lru_lock);
-       list_del_init(&eb->lru);
-       spin_unlock(&tree->lru_lock);
-       if (!atomic_dec_and_test(&eb->refs))
-               return NULL;
-       for (index = 1; index < i; index++) {
-               page_cache_release(extent_buffer_page(eb, index));
-       }
-       if (i > 0)
-               page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
-       return NULL;
 }
 EXPORT_SYMBOL(find_extent_buffer);
 
 void free_extent_buffer(struct extent_buffer *eb)
 {
-       unsigned long i;
-       unsigned long num_pages;
-
        if (!eb)
                return;
 
        if (!atomic_dec_and_test(&eb->refs))
                return;
 
-       WARN_ON(!list_empty(&eb->lru));
-       num_pages = num_extent_pages(eb->start, eb->len);
-
-       for (i = 1; i < num_pages; i++) {
-               page_cache_release(extent_buffer_page(eb, i));
-       }
-       page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
+       WARN_ON(1);
 }
 EXPORT_SYMBOL(free_extent_buffer);
 
@@ -2822,6 +2894,25 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
 }
 EXPORT_SYMBOL(set_extent_buffer_dirty);
 
+int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
+                               struct extent_buffer *eb)
+{
+       unsigned long i;
+       struct page *page;
+       unsigned long num_pages;
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+       eb->flags &= ~EXTENT_UPTODATE;
+
+       clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                             GFP_NOFS);
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               ClearPageUptodate(page);
+       }
+       return 0;
+}
+
 int set_extent_buffer_uptodate(struct extent_io_tree *tree,
                                struct extent_buffer *eb)
 {
@@ -2847,32 +2938,81 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
 }
 EXPORT_SYMBOL(set_extent_buffer_uptodate);
 
+int extent_range_uptodate(struct extent_io_tree *tree,
+                         u64 start, u64 end)
+{
+       struct page *page;
+       int ret;
+       int pg_uptodate = 1;
+       int uptodate;
+       unsigned long index;
+
+       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
+       if (ret)
+               return 1;
+       while(start <= end) {
+               index = start >> PAGE_CACHE_SHIFT;
+               page = find_get_page(tree->mapping, index);
+               uptodate = PageUptodate(page);
+               page_cache_release(page);
+               if (!uptodate) {
+                       pg_uptodate = 0;
+                       break;
+               }
+               start += PAGE_CACHE_SIZE;
+       }
+       return pg_uptodate;
+}
+
 int extent_buffer_uptodate(struct extent_io_tree *tree,
-                            struct extent_buffer *eb)
+                          struct extent_buffer *eb)
 {
+       int ret = 0;
+       unsigned long num_pages;
+       unsigned long i;
+       struct page *page;
+       int pg_uptodate = 1;
+
        if (eb->flags & EXTENT_UPTODATE)
                return 1;
-       return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+
+       ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
                           EXTENT_UPTODATE, 1);
+       if (ret)
+               return ret;
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = 0; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               if (!PageUptodate(page)) {
+                       pg_uptodate = 0;
+                       break;
+               }
+       }
+       return pg_uptodate;
 }
 EXPORT_SYMBOL(extent_buffer_uptodate);
 
 int read_extent_buffer_pages(struct extent_io_tree *tree,
                             struct extent_buffer *eb,
-                            u64 start,
-                            int wait)
+                            u64 start, int wait,
+                            get_extent_t *get_extent, int mirror_num)
 {
        unsigned long i;
        unsigned long start_i;
        struct page *page;
        int err;
        int ret = 0;
+       int locked_pages = 0;
+       int all_uptodate = 1;
+       int inc_all_pages = 0;
        unsigned long num_pages;
+       struct bio *bio = NULL;
 
        if (eb->flags & EXTENT_UPTODATE)
                return 0;
 
-       if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+       if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
                           EXTENT_UPTODATE, 1)) {
                return 0;
        }
@@ -2888,18 +3028,34 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = start_i; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
-               if (PageUptodate(page)) {
-                       continue;
-               }
                if (!wait) {
-                       if (TestSetPageLocked(page)) {
-                               continue;
-                       }
+                       if (TestSetPageLocked(page))
+                               goto unlock_exit;
                } else {
                        lock_page(page);
                }
+               locked_pages++;
+               if (!PageUptodate(page)) {
+                       all_uptodate = 0;
+               }
+       }
+       if (all_uptodate) {
+               if (start_i == 0)
+                       eb->flags |= EXTENT_UPTODATE;
+               goto unlock_exit;
+       }
+
+       for (i = start_i; i < num_pages; i++) {
+               page = extent_buffer_page(eb, i);
+               if (inc_all_pages)
+                       page_cache_get(page);
                if (!PageUptodate(page)) {
-                       err = page->mapping->a_ops->readpage(NULL, page);
+                       if (start_i == 0)
+                               inc_all_pages = 1;
+                       ClearPageError(page);
+                       err = __extent_read_full_page(tree, page,
+                                                     get_extent, &bio,
+                                                     mirror_num);
                        if (err) {
                                ret = err;
                        }
@@ -2908,6 +3064,9 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                }
        }
 
+       if (bio)
+               submit_one_bio(READ, bio, mirror_num);
+
        if (ret || !wait) {
                return ret;
        }
@@ -2921,6 +3080,16 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (!ret)
                eb->flags |= EXTENT_UPTODATE;
        return ret;
+
+unlock_exit:
+       i = start_i;
+       while(locked_pages > 0) {
+               page = extent_buffer_page(eb, i);
+               i++;
+               unlock_page(page);
+               locked_pages--;
+       }
+       return ret;
 }
 EXPORT_SYMBOL(read_extent_buffer_pages);
 
@@ -2935,7 +3104,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
        char *dst = (char *)dstv;
        size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
        unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-       unsigned long num_pages = num_extent_pages(eb->start, eb->len);
 
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
@@ -2944,11 +3112,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
-               if (!PageUptodate(page)) {
-                       printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
-                       WARN_ON(1);
-               }
-               WARN_ON(!PageUptodate(page));
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
                kaddr = kmap_atomic(page, KM_USER1);
@@ -2992,7 +3155,6 @@ printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len,
        }
 
        p = extent_buffer_page(eb, i);
-       WARN_ON(!PageUptodate(p));
        kaddr = kmap_atomic(p, km);
        *token = kaddr;
        *map = kaddr + offset;
@@ -3052,7 +3214,6 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
-               WARN_ON(!PageUptodate(page));
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
 
@@ -3302,3 +3463,35 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        }
 }
 EXPORT_SYMBOL(memmove_extent_buffer);
+
+int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
+{
+       u64 start = page_offset(page);
+       struct extent_buffer *eb;
+       int ret = 1;
+       unsigned long i;
+       unsigned long num_pages;
+
+       spin_lock(&tree->buffer_lock);
+       eb = buffer_search(tree, start);
+       if (!eb)
+               goto out;
+
+       if (atomic_read(&eb->refs) > 1) {
+               ret = 0;
+               goto out;
+       }
+       /* at this point we can safely release the extent buffer */
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = 0; i < num_pages; i++) {
+               struct page *page = extent_buffer_page(eb, i);
+               page_cache_release(page);
+       }
+       rb_erase(&eb->rb_node, &tree->buffer);
+       __free_extent_buffer(eb);
+out:
+       spin_unlock(&tree->buffer_lock);
+       return ret;
+}
+EXPORT_SYMBOL(try_release_extent_buffer);
+