Btrfs: Search data ordered extents first for checksums on read
[linux-2.6-block.git] / fs / btrfs / extent_io.c
index d4a63ae7ed1b24dec42525af419ff97b57d16927..e3547a992d5c80f5965b5413b0fb6b06ee1368a3 100644 (file)
@@ -91,29 +91,15 @@ void extent_io_tree_init(struct extent_io_tree *tree,
                          struct address_space *mapping, gfp_t mask)
 {
        tree->state.rb_node = NULL;
+       tree->buffer.rb_node = NULL;
        tree->ops = NULL;
        tree->dirty_bytes = 0;
        spin_lock_init(&tree->lock);
-       spin_lock_init(&tree->lru_lock);
+       spin_lock_init(&tree->buffer_lock);
        tree->mapping = mapping;
-       INIT_LIST_HEAD(&tree->buffer_lru);
-       tree->lru_size = 0;
-       tree->last = NULL;
 }
 EXPORT_SYMBOL(extent_io_tree_init);
 
-void extent_io_tree_empty_lru(struct extent_io_tree *tree)
-{
-       struct extent_buffer *eb;
-       while(!list_empty(&tree->buffer_lru)) {
-               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
-                               lru);
-               list_del_init(&eb->lru);
-               free_extent_buffer(eb);
-       }
-}
-EXPORT_SYMBOL(extent_io_tree_empty_lru);
-
 struct extent_state *alloc_extent_state(gfp_t mask)
 {
        struct extent_state *state;
@@ -186,12 +172,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
        struct tree_entry *entry;
        struct tree_entry *prev_entry = NULL;
 
-       if (tree->last) {
-               struct extent_state *state;
-               state = tree->last;
-               if (state->start <= offset && offset <= state->end)
-                       return &tree->last->rb_node;
-       }
        while(n) {
                entry = rb_entry(n, struct tree_entry, rb_node);
                prev = n;
@@ -202,7 +182,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
                else if (offset > entry->end)
                        n = n->rb_right;
                else {
-                       tree->last = rb_entry(n, struct extent_state, rb_node);
                        return n;
                }
        }
@@ -236,15 +215,55 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
 
        ret = __etree_search(tree, offset, &prev, NULL);
        if (!ret) {
-               if (prev) {
-                       tree->last = rb_entry(prev, struct extent_state,
-                                             rb_node);
-               }
                return prev;
        }
        return ret;
 }
 
+static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
+                                         u64 offset, struct rb_node *node)
+{
+       struct rb_root *root = &tree->buffer;
+       struct rb_node ** p = &root->rb_node;
+       struct rb_node * parent = NULL;
+       struct extent_buffer *eb;
+
+       while(*p) {
+               parent = *p;
+               eb = rb_entry(parent, struct extent_buffer, rb_node);
+
+               if (offset < eb->start)
+                       p = &(*p)->rb_left;
+               else if (offset > eb->start)
+                       p = &(*p)->rb_right;
+               else
+                       return eb;
+       }
+
+       rb_link_node(node, parent, p);
+       rb_insert_color(node, root);
+       return NULL;
+}
+
+static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
+                                          u64 offset)
+{
+       struct rb_root *root = &tree->buffer;
+       struct rb_node * n = root->rb_node;
+       struct extent_buffer *eb;
+
+       while(n) {
+               eb = rb_entry(n, struct extent_buffer, rb_node);
+               if (offset < eb->start)
+                       n = n->rb_left;
+               else if (offset > eb->start)
+                       n = n->rb_right;
+               else
+                       return eb;
+       }
+       return NULL;
+}
+
 /*
  * utility function to look for merge candidates inside a given range.
  * Any extents with matching state are merged together into a single
@@ -270,8 +289,6 @@ static int merge_state(struct extent_io_tree *tree,
                    other->state == state->state) {
                        state->start = other->start;
                        other->tree = NULL;
-                       if (tree->last == other)
-                               tree->last = state;
                        rb_erase(&other->rb_node, &tree->state);
                        free_extent_state(other);
                }
@@ -283,8 +300,6 @@ static int merge_state(struct extent_io_tree *tree,
                    other->state == state->state) {
                        other->start = state->start;
                        state->tree = NULL;
-                       if (tree->last == state)
-                               tree->last = other;
                        rb_erase(&state->rb_node, &tree->state);
                        free_extent_state(state);
                }
@@ -347,7 +362,6 @@ static int insert_state(struct extent_io_tree *tree,
                return -EEXIST;
        }
        state->tree = tree;
-       tree->last = state;
        merge_state(tree, state);
        return 0;
 }
@@ -413,9 +427,6 @@ static int clear_state_bit(struct extent_io_tree *tree,
        if (delete || state->state == 0) {
                if (state->tree) {
                        clear_state_cb(tree, state, state->state);
-                       if (tree->last == state) {
-                               tree->last = extent_state_next(state);
-                       }
                        rb_erase(&state->rb_node, &tree->state);
                        state->tree = NULL;
                        free_extent_state(state);
@@ -1817,9 +1828,8 @@ void set_page_extent_mapped(struct page *page)
 {
        if (!PagePrivate(page)) {
                SetPagePrivate(page);
-               WARN_ON(!page->mapping->a_ops->invalidatepage);
-               set_page_private(page, EXTENT_PAGE_PRIVATE);
                page_cache_get(page);
+               set_page_private(page, EXTENT_PAGE_PRIVATE);
        }
 }
 
@@ -1939,18 +1949,18 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur);
                                                          cur + iosize - 1);
                }
                if (!ret) {
-                       unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
-                       nr -= page->index;
+                       unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+                       pnr -= page->index;
                        ret = submit_extent_page(READ, tree, page,
                                         sector, iosize, page_offset,
-                                        bdev, bio, nr,
+                                        bdev, bio, pnr,
                                         end_bio_extent_readpage, mirror_num);
+                       nr++;
                }
                if (ret)
                        SetPageError(page);
                cur = cur + iosize;
                page_offset += iosize;
-               nr++;
        }
        if (!nr) {
                if (!PageError(page))
@@ -2627,51 +2637,6 @@ out:
        return sector;
 }
 
-static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
-{
-       if (list_empty(&eb->lru)) {
-               extent_buffer_get(eb);
-               list_add(&eb->lru, &tree->buffer_lru);
-               tree->lru_size++;
-               if (tree->lru_size >= BUFFER_LRU_MAX) {
-                       struct extent_buffer *rm;
-                       rm = list_entry(tree->buffer_lru.prev,
-                                       struct extent_buffer, lru);
-                       tree->lru_size--;
-                       list_del_init(&rm->lru);
-                       free_extent_buffer(rm);
-               }
-       } else
-               list_move(&eb->lru, &tree->buffer_lru);
-       return 0;
-}
-static struct extent_buffer *find_lru(struct extent_io_tree *tree,
-                                     u64 start, unsigned long len)
-{
-       struct list_head *lru = &tree->buffer_lru;
-       struct list_head *cur = lru->next;
-       struct extent_buffer *eb;
-
-       if (list_empty(lru))
-               return NULL;
-
-       do {
-               eb = list_entry(cur, struct extent_buffer, lru);
-               if (eb->start == start && eb->len == len) {
-                       extent_buffer_get(eb);
-                       return eb;
-               }
-               cur = cur->next;
-       } while (cur != lru);
-       return NULL;
-}
-
-static inline unsigned long num_extent_pages(u64 start, u64 len)
-{
-       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-               (start >> PAGE_CACHE_SHIFT);
-}
-
 static inline struct page *extent_buffer_page(struct extent_buffer *eb,
                                              unsigned long i)
 {
@@ -2688,44 +2653,10 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb,
        return p;
 }
 
-int release_extent_buffer_tail_pages(struct extent_buffer *eb)
-{
-       unsigned long num_pages = num_extent_pages(eb->start, eb->len);
-       struct page *page;
-       unsigned long i;
-
-       if (num_pages == 1)
-               return 0;
-       for (i = 1; i < num_pages; i++) {
-               page = extent_buffer_page(eb, i);
-               page_cache_release(page);
-       }
-       return 0;
-}
-
-
-int invalidate_extent_lru(struct extent_io_tree *tree, u64 start,
-                         unsigned long len)
+static inline unsigned long num_extent_pages(u64 start, u64 len)
 {
-       struct list_head *lru = &tree->buffer_lru;
-       struct list_head *cur = lru->next;
-       struct extent_buffer *eb;
-       int found = 0;
-
-       spin_lock(&tree->lru_lock);
-       if (list_empty(lru))
-               goto out;
-
-       do {
-               eb = list_entry(cur, struct extent_buffer, lru);
-               if (eb->start <= start && eb->start + eb->len > start) {
-                       eb->flags &= ~EXTENT_UPTODATE;
-               }
-               cur = cur->next;
-       } while (cur != lru);
-out:
-       spin_unlock(&tree->lru_lock);
-       return found;
+       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+               (start >> PAGE_CACHE_SHIFT);
 }
 
 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
@@ -2736,17 +2667,10 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
        struct extent_buffer *eb = NULL;
        unsigned long flags;
 
-       spin_lock(&tree->lru_lock);
-       eb = find_lru(tree, start, len);
-       spin_unlock(&tree->lru_lock);
-       if (eb) {
-               return eb;
-       }
-
        eb = kmem_cache_zalloc(extent_buffer_cache, mask);
-       INIT_LIST_HEAD(&eb->lru);
        eb->start = start;
        eb->len = len;
+       mutex_init(&eb->mutex);
        spin_lock_irqsave(&leak_lock, flags);
        list_add(&eb->leak_list, &buffers);
        spin_unlock_irqrestore(&leak_lock, flags);
@@ -2773,17 +2697,24 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        unsigned long i;
        unsigned long index = start >> PAGE_CACHE_SHIFT;
        struct extent_buffer *eb;
+       struct extent_buffer *exists = NULL;
        struct page *p;
        struct address_space *mapping = tree->mapping;
        int uptodate = 1;
 
+       spin_lock(&tree->buffer_lock);
+       eb = buffer_search(tree, start);
+       if (eb) {
+               atomic_inc(&eb->refs);
+               spin_unlock(&tree->buffer_lock);
+               return eb;
+       }
+       spin_unlock(&tree->buffer_lock);
+
        eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb)
                return NULL;
 
-       if (eb->flags & EXTENT_BUFFER_FILLED)
-               goto lru_add;
-
        if (page0) {
                eb->first_page = page0;
                i = 1;
@@ -2800,7 +2731,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
                if (!p) {
                        WARN_ON(1);
-                       goto fail;
+                       goto free_eb;
                }
                set_page_extent_mapped(p);
                mark_page_accessed(p);
@@ -2818,25 +2749,28 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                eb->flags |= EXTENT_UPTODATE;
        eb->flags |= EXTENT_BUFFER_FILLED;
 
-lru_add:
-       spin_lock(&tree->lru_lock);
-       add_lru(tree, eb);
-       spin_unlock(&tree->lru_lock);
+       spin_lock(&tree->buffer_lock);
+       exists = buffer_tree_insert(tree, start, &eb->rb_node);
+       if (exists) {
+               /* add one reference for the caller */
+               atomic_inc(&exists->refs);
+               spin_unlock(&tree->buffer_lock);
+               goto free_eb;
+       }
+       spin_unlock(&tree->buffer_lock);
+
+       /* add one reference for the tree */
+       atomic_inc(&eb->refs);
        return eb;
 
-fail:
-       spin_lock(&tree->lru_lock);
-       list_del_init(&eb->lru);
-       spin_unlock(&tree->lru_lock);
+free_eb:
        if (!atomic_dec_and_test(&eb->refs))
-               return NULL;
-       for (index = 1; index < i; index++) {
+               return exists;
+       for (index = 1; index < i; index++)
                page_cache_release(extent_buffer_page(eb, index));
-       }
-       if (i > 0)
-               page_cache_release(extent_buffer_page(eb, 0));
+       page_cache_release(extent_buffer_page(eb, 0));
        __free_extent_buffer(eb);
-       return NULL;
+       return exists;
 }
 EXPORT_SYMBOL(alloc_extent_buffer);
 
@@ -2844,89 +2778,27 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
                                         u64 start, unsigned long len,
                                          gfp_t mask)
 {
-       unsigned long num_pages = num_extent_pages(start, len);
-       unsigned long i;
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
        struct extent_buffer *eb;
-       struct page *p;
-       struct address_space *mapping = tree->mapping;
-       int uptodate = 1;
 
-       eb = __alloc_extent_buffer(tree, start, len, mask);
-       if (!eb)
-               return NULL;
-
-       if (eb->flags & EXTENT_BUFFER_FILLED)
-               goto lru_add;
-
-       for (i = 0; i < num_pages; i++, index++) {
-               p = find_get_page(mapping, index);
-               if (!p) {
-                       goto fail;
-               }
-               if (TestSetPageLocked(p)) {
-                       page_cache_release(p);
-                       goto fail;
-               }
-
-               set_page_extent_mapped(p);
-               mark_page_accessed(p);
-
-               if (i == 0) {
-                       eb->first_page = p;
-                       set_page_extent_head(p, len);
-               } else {
-                       set_page_private(p, EXTENT_PAGE_PRIVATE);
-               }
-
-               if (!PageUptodate(p))
-                       uptodate = 0;
-               unlock_page(p);
-       }
-       if (uptodate)
-               eb->flags |= EXTENT_UPTODATE;
-       eb->flags |= EXTENT_BUFFER_FILLED;
+       spin_lock(&tree->buffer_lock);
+       eb = buffer_search(tree, start);
+       if (eb)
+               atomic_inc(&eb->refs);
+       spin_unlock(&tree->buffer_lock);
 
-lru_add:
-       spin_lock(&tree->lru_lock);
-       add_lru(tree, eb);
-       spin_unlock(&tree->lru_lock);
        return eb;
-fail:
-       spin_lock(&tree->lru_lock);
-       list_del_init(&eb->lru);
-       spin_unlock(&tree->lru_lock);
-       if (!atomic_dec_and_test(&eb->refs))
-               return NULL;
-       for (index = 1; index < i; index++) {
-               page_cache_release(extent_buffer_page(eb, index));
-       }
-       if (i > 0)
-               page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
-       return NULL;
 }
 EXPORT_SYMBOL(find_extent_buffer);
 
 void free_extent_buffer(struct extent_buffer *eb)
 {
-       unsigned long i;
-       unsigned long num_pages;
-
        if (!eb)
                return;
 
        if (!atomic_dec_and_test(&eb->refs))
                return;
 
-       WARN_ON(!list_empty(&eb->lru));
-       num_pages = num_extent_pages(eb->start, eb->len);
-
-       for (i = 1; i < num_pages; i++) {
-               page_cache_release(extent_buffer_page(eb, i));
-       }
-       page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
+       WARN_ON(1);
 }
 EXPORT_SYMBOL(free_extent_buffer);
 
@@ -2946,6 +2818,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
 
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
+               lock_page(page);
                if (i == 0)
                        set_page_extent_head(page, eb->len);
                else
@@ -2963,6 +2836,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                        end  = start + PAGE_CACHE_SIZE - 1;
                        if (test_range_bit(tree, start, end,
                                           EXTENT_DIRTY, 0)) {
+                               unlock_page(page);
                                continue;
                        }
                }
@@ -2974,6 +2848,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                                                PAGECACHE_TAG_DIRTY);
                }
                read_unlock_irq(&page->mapping->tree_lock);
+               unlock_page(page);
        }
        return 0;
 }
@@ -3002,12 +2877,17 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
                 * on us if the page isn't already dirty.
                 */
                if (i == 0) {
+                       lock_page(page);
                        set_page_extent_head(page, eb->len);
                } else if (PagePrivate(page) &&
                           page->private != EXTENT_PAGE_PRIVATE) {
+                       lock_page(page);
                        set_page_extent_mapped(page);
+                       unlock_page(page);
                }
                __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
+               if (i == 0)
+                       unlock_page(page);
        }
        return set_extent_dirty(tree, eb->start,
                                eb->start + eb->len - 1, GFP_NOFS);
@@ -3583,3 +3463,35 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        }
 }
 EXPORT_SYMBOL(memmove_extent_buffer);
+
+int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
+{
+       u64 start = page_offset(page);
+       struct extent_buffer *eb;
+       int ret = 1;
+       unsigned long i;
+       unsigned long num_pages;
+
+       spin_lock(&tree->buffer_lock);
+       eb = buffer_search(tree, start);
+       if (!eb)
+               goto out;
+
+       if (atomic_read(&eb->refs) > 1) {
+               ret = 0;
+               goto out;
+       }
+       /* at this point we can safely release the extent buffer */
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = 0; i < num_pages; i++) {
+               struct page *page = extent_buffer_page(eb, i);
+               page_cache_release(page);
+       }
+       rb_erase(&eb->rb_node, &tree->buffer);
+       __free_extent_buffer(eb);
+out:
+       spin_unlock(&tree->buffer_lock);
+       return ret;
+}
+EXPORT_SYMBOL(try_release_extent_buffer);
+