static LIST_HEAD(buffers);
static LIST_HEAD(states);
+static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
#define BUFFER_LRU_MAX 64
void extent_io_exit(void)
{
struct extent_state *state;
+ struct extent_buffer *eb;
while (!list_empty(&states)) {
- state = list_entry(states.next, struct extent_state, list);
+ state = list_entry(states.next, struct extent_state, leak_list);
printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
- list_del(&state->list);
+ list_del(&state->leak_list);
kmem_cache_free(extent_state_cache, state);
}
+ while (!list_empty(&buffers)) {
+ eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+ printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs));
+ list_del(&eb->leak_list);
+ kmem_cache_free(extent_buffer_cache, eb);
+ }
if (extent_state_cache)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
struct address_space *mapping, gfp_t mask)
{
tree->state.rb_node = NULL;
+ tree->buffer.rb_node = NULL;
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
- spin_lock_init(&tree->lru_lock);
+ spin_lock_init(&tree->buffer_lock);
tree->mapping = mapping;
- INIT_LIST_HEAD(&tree->buffer_lru);
- tree->lru_size = 0;
- tree->last = NULL;
}
EXPORT_SYMBOL(extent_io_tree_init);
-void extent_io_tree_empty_lru(struct extent_io_tree *tree)
-{
- struct extent_buffer *eb;
- while(!list_empty(&tree->buffer_lru)) {
- eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
- lru);
- list_del_init(&eb->lru);
- free_extent_buffer(eb);
- }
-}
-EXPORT_SYMBOL(extent_io_tree_empty_lru);
-
struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
+ unsigned long flags;
state = kmem_cache_alloc(extent_state_cache, mask);
- if (!state || IS_ERR(state))
+ if (!state)
return state;
state->state = 0;
state->private = 0;
state->tree = NULL;
+ spin_lock_irqsave(&leak_lock, flags);
+ list_add(&state->leak_list, &states);
+ spin_unlock_irqrestore(&leak_lock, flags);
atomic_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
if (!state)
return;
if (atomic_dec_and_test(&state->refs)) {
+ unsigned long flags;
WARN_ON(state->tree);
+ spin_lock_irqsave(&leak_lock, flags);
+ list_del(&state->leak_list);
+ spin_unlock_irqrestore(&leak_lock, flags);
kmem_cache_free(extent_state_cache, state);
}
}
struct tree_entry *entry;
struct tree_entry *prev_entry = NULL;
- if (tree->last) {
- struct extent_state *state;
- state = tree->last;
- if (state->start <= offset && offset <= state->end)
- return &tree->last->rb_node;
- }
while(n) {
entry = rb_entry(n, struct tree_entry, rb_node);
prev = n;
else if (offset > entry->end)
n = n->rb_right;
else {
- tree->last = rb_entry(n, struct extent_state, rb_node);
return n;
}
}
ret = __etree_search(tree, offset, &prev, NULL);
if (!ret) {
- if (prev) {
- tree->last = rb_entry(prev, struct extent_state,
- rb_node);
- }
return prev;
}
return ret;
}
+static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
+ u64 offset, struct rb_node *node)
+{
+ struct rb_root *root = &tree->buffer;
+ struct rb_node ** p = &root->rb_node;
+ struct rb_node * parent = NULL;
+ struct extent_buffer *eb;
+
+ while(*p) {
+ parent = *p;
+ eb = rb_entry(parent, struct extent_buffer, rb_node);
+
+ if (offset < eb->start)
+ p = &(*p)->rb_left;
+ else if (offset > eb->start)
+ p = &(*p)->rb_right;
+ else
+ return eb;
+ }
+
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
+ u64 offset)
+{
+ struct rb_root *root = &tree->buffer;
+ struct rb_node * n = root->rb_node;
+ struct extent_buffer *eb;
+
+ while(n) {
+ eb = rb_entry(n, struct extent_buffer, rb_node);
+ if (offset < eb->start)
+ n = n->rb_left;
+ else if (offset > eb->start)
+ n = n->rb_right;
+ else
+ return eb;
+ }
+ return NULL;
+}
+
/*
* utility function to look for merge candidates inside a given range.
* Any extents with matching state are merged together into a single
other->state == state->state) {
state->start = other->start;
other->tree = NULL;
- if (tree->last == other)
- tree->last = NULL;
rb_erase(&other->rb_node, &tree->state);
free_extent_state(other);
}
other->state == state->state) {
other->start = state->start;
state->tree = NULL;
- if (tree->last == state)
- tree->last = NULL;
rb_erase(&state->rb_node, &tree->state);
free_extent_state(state);
}
return -EEXIST;
}
state->tree = tree;
- tree->last = state;
merge_state(tree, state);
return 0;
}
if (delete || state->state == 0) {
if (state->tree) {
clear_state_cb(tree, state, state->state);
- if (tree->last == state)
- tree->last = NULL;
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
free_extent_state(state);
}
EXPORT_SYMBOL(set_extent_dirty);
+int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+}
+EXPORT_SYMBOL(set_extent_ordered);
+
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
gfp_t mask)
{
return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
- mask);
+ EXTENT_DELALLOC | EXTENT_DIRTY,
+ 0, NULL, mask);
}
EXPORT_SYMBOL(set_extent_delalloc);
}
EXPORT_SYMBOL(clear_extent_dirty);
+int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_ordered);
+
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
* our range starts.
*/
node = tree_search(tree, start);
- if (!node || IS_ERR(node)) {
+ if (!node) {
goto out;
}
}
EXPORT_SYMBOL(find_first_extent_bit);
+struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
+ u64 start, int bits)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, start);
+ if (!node) {
+ goto out;
+ }
+
+ while(1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->end >= start && (state->state & bits)) {
+ return state;
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ return NULL;
+}
+EXPORT_SYMBOL(find_first_extent_bit_state);
+
u64 find_lock_delalloc_range(struct extent_io_tree *tree,
u64 *start, u64 *end, u64 max_bytes)
{
*/
search_again:
node = tree_search(tree, cur_start);
- if (!node || IS_ERR(node)) {
- *end = (u64)-1;
+ if (!node) {
+ if (!found)
+ *end = (u64)-1;
goto out;
}
* our range starts.
*/
node = tree_search(tree, cur_start);
- if (!node || IS_ERR(node)) {
+ if (!node) {
goto out;
}
* our range starts.
*/
node = tree_search(tree, start);
- if (!node || IS_ERR(node)) {
+ if (!node) {
ret = -ENOENT;
goto out;
}
* our range starts.
*/
node = tree_search(tree, start);
- if (!node || IS_ERR(node)) {
+ if (!node) {
ret = -ENOENT;
goto out;
}
unsigned int bytes_done, int err)
#endif
{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ int uptodate = err == 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct extent_state *state = bio->bi_private;
struct extent_io_tree *tree = state->tree;
u64 end;
u64 cur;
int whole_page;
+ int ret;
unsigned long flags;
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
+ if (tree->ops && tree->ops->writepage_end_io_hook) {
+ ret = tree->ops->writepage_end_io_hook(page, start,
+ end, state, uptodate);
+ if (ret)
+ uptodate = 0;
+ }
+
+ if (!uptodate && tree->ops &&
+ tree->ops->writepage_io_failed_hook) {
+ ret = tree->ops->writepage_io_failed_hook(bio, page,
+ start, end, state);
+ if (ret == 0) {
+ state = NULL;
+ uptodate = (err == 0);
+ continue;
+ }
+ }
if (!uptodate) {
clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
SetPageError(page);
}
- if (tree->ops && tree->ops->writepage_end_io_hook) {
- tree->ops->writepage_end_io_hook(page, start, end,
- state);
- }
-
/*
* bios can get merged in funny ways, and so we need to
* be careful with the state variable. We know the
if (ret)
uptodate = 0;
}
+ if (!uptodate && tree->ops &&
+ tree->ops->readpage_io_failed_hook) {
+ ret = tree->ops->readpage_io_failed_hook(bio, page,
+ start, end, state);
+ if (ret == 0) {
+ state = NULL;
+ uptodate =
+ test_bit(BIO_UPTODATE, &bio->bi_flags);
+ continue;
+ }
+ }
spin_lock_irqsave(&tree->lock, flags);
if (!state || state->end != end) {
}
if (!state) {
spin_unlock_irqrestore(&tree->lock, flags);
- set_extent_uptodate(tree, start, end,
- GFP_ATOMIC);
+ if (uptodate)
+ set_extent_uptodate(tree, start, end,
+ GFP_ATOMIC);
unlock_extent(tree, start, end, GFP_ATOMIC);
goto next_io;
}
} else {
state = NULL;
}
- set_state_cb(tree, clear, EXTENT_UPTODATE);
- clear->state |= EXTENT_UPTODATE;
+ if (uptodate) {
+ set_state_cb(tree, clear, EXTENT_UPTODATE);
+ clear->state |= EXTENT_UPTODATE;
+ }
clear_state_bit(tree, clear, EXTENT_LOCKED,
1, 0);
if (cur == start)
}
if (bio) {
+ bio->bi_size = 0;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
}
return bio;
}
-static int submit_one_bio(int rw, struct bio *bio)
+static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
{
- u64 maxsector;
int ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
bio_get(bio);
- maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
- if (maxsector < bio->bi_sector) {
- printk("sector too large max %Lu got %llu\n", maxsector,
- (unsigned long long)bio->bi_sector);
- WARN_ON(1);
- }
-
- submit_bio(rw, bio);
+ if (tree->ops && tree->ops->submit_bio_hook)
+ tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+ mirror_num);
+ else
+ submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
bio_put(bio);
struct block_device *bdev,
struct bio **bio_ret,
unsigned long max_pages,
- bio_end_io_t end_io_func)
+ bio_end_io_t end_io_func,
+ int mirror_num)
{
int ret = 0;
struct bio *bio;
if (bio_ret && *bio_ret) {
bio = *bio_ret;
if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
+ (tree->ops && tree->ops->merge_bio_hook &&
+ tree->ops->merge_bio_hook(page, offset, size, bio)) ||
bio_add_page(bio, page, size, offset) < size) {
- ret = submit_one_bio(rw, bio);
+ ret = submit_one_bio(rw, bio, mirror_num);
bio = NULL;
} else {
return 0;
if (bio_ret) {
*bio_ret = bio;
} else {
- ret = submit_one_bio(rw, bio);
+ ret = submit_one_bio(rw, bio, mirror_num);
}
return ret;
{
if (!PagePrivate(page)) {
SetPagePrivate(page);
- WARN_ON(!page->mapping->a_ops->invalidatepage);
- set_page_private(page, EXTENT_PAGE_PRIVATE);
page_cache_get(page);
+ set_page_private(page, EXTENT_PAGE_PRIVATE);
}
}
static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
- struct bio **bio)
+ struct bio **bio, int mirror_num)
{
struct inode *inode = page->mapping->host;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
unlock_extent(tree, cur, end, GFP_NOFS);
break;
}
-
extent_offset = cur - em->start;
+ if (extent_map_end(em) <= cur) {
+printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
+ }
BUG_ON(extent_map_end(em) <= cur);
+ if (end < cur) {
+printk("2bad mapping end %Lu cur %Lu\n", end, cur);
+ }
BUG_ON(end < cur);
iosize = min(extent_map_end(em) - cur, end - cur + 1);
cur + iosize - 1);
}
if (!ret) {
- unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
- nr -= page->index;
+ unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+ pnr -= page->index;
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset,
- bdev, bio, nr,
- end_bio_extent_readpage);
+ bdev, bio, pnr,
+ end_bio_extent_readpage, mirror_num);
+ nr++;
}
if (ret)
SetPageError(page);
cur = cur + iosize;
page_offset += iosize;
- nr++;
}
if (!nr) {
if (!PageError(page))
struct bio *bio = NULL;
int ret;
- ret = __extent_read_full_page(tree, page, get_extent, &bio);
+ ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
if (bio)
- submit_one_bio(READ, bio);
+ submit_one_bio(READ, bio, 0);
return ret;
}
EXPORT_SYMBOL(extent_read_full_page);
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
+ u64 unlock_start;
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
int ret;
int nr = 0;
- size_t page_offset = 0;
+ size_t pg_offset = 0;
size_t blocksize;
loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
u64 delalloc_end;
WARN_ON(!PageLocked(page));
- if (page->index > end_index) {
- clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+ pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+ if (page->index > end_index ||
+ (page->index == end_index && !pg_offset)) {
+ page->mapping->a_ops->invalidatepage(page, 0);
unlock_page(page);
return 0;
}
if (page->index == end_index) {
char *userpage;
- size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
-
userpage = kmap_atomic(page, KM_USER0);
- memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
+ memset(userpage + pg_offset, 0,
+ PAGE_CACHE_SIZE - pg_offset);
kunmap_atomic(userpage, KM_USER0);
+ flush_dcache_page(page);
}
+ pg_offset = 0;
set_page_extent_mapped(page);
delalloc_start = delalloc_end + 1;
}
lock_extent(tree, start, page_end, GFP_NOFS);
+ unlock_start = start;
+
+ if (tree->ops && tree->ops->writepage_start_hook) {
+ ret = tree->ops->writepage_start_hook(page, start, page_end);
+ if (ret == -EAGAIN) {
+ unlock_extent(tree, start, page_end, GFP_NOFS);
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+ }
+ }
end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
if (last_byte <= start) {
clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+ unlock_extent(tree, start, page_end, GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start,
+ page_end, NULL, 1);
+ unlock_start = page_end + 1;
goto done;
}
while (cur <= end) {
if (cur >= last_byte) {
clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
+ unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, cur,
+ page_end, NULL, 1);
+ unlock_start = page_end + 1;
break;
}
- em = epd->get_extent(inode, page, page_offset, cur,
+ em = epd->get_extent(inode, page, pg_offset, cur,
end - cur + 1, 1);
if (IS_ERR(em) || !em) {
SetPageError(page);
block_start == EXTENT_MAP_INLINE) {
clear_extent_dirty(tree, cur,
cur + iosize - 1, GFP_NOFS);
+
+ unlock_extent(tree, unlock_start, cur + iosize -1,
+ GFP_NOFS);
+
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, cur,
+ cur + iosize - 1,
+ NULL, 1);
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
+ unlock_start = cur;
continue;
}
if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
EXTENT_DIRTY, 0)) {
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
continue;
}
clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
} else {
ret = 0;
}
- if (ret)
+ if (ret) {
SetPageError(page);
- else {
+ } else {
unsigned long max_nr = end_index + 1;
+
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
printk("warning page %lu not writeback, "
}
ret = submit_extent_page(WRITE, tree, page, sector,
- iosize, page_offset, bdev,
+ iosize, pg_offset, bdev,
&epd->bio, max_nr,
- end_bio_extent_writepage);
+ end_bio_extent_writepage, 0);
if (ret)
SetPageError(page);
}
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
nr++;
}
done:
set_page_writeback(page);
end_page_writeback(page);
}
- unlock_extent(tree, start, page_end, GFP_NOFS);
+ if (unlock_start <= page_end)
+ unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
return 0;
}
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
/* Taken directly from 2.6.23 for 2.6.18 back port */
typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
void *data);
write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
if (epd.bio) {
- submit_one_bio(WRITE, epd.bio);
+ submit_one_bio(WRITE, epd.bio, 0);
}
return ret;
}
ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
if (epd.bio) {
- submit_one_bio(WRITE, epd.bio);
+ submit_one_bio(WRITE, epd.bio, 0);
}
return ret;
}
page_cache_get(page);
if (!pagevec_add(&pvec, page))
__pagevec_lru_add(&pvec);
- __extent_read_full_page(tree, page, get_extent, &bio);
+ __extent_read_full_page(tree, page, get_extent,
+ &bio, 0);
}
page_cache_release(page);
}
__pagevec_lru_add(&pvec);
BUG_ON(!list_empty(pages));
if (bio)
- submit_one_bio(READ, bio);
+ submit_one_bio(READ, bio, 0);
return 0;
}
EXPORT_SYMBOL(extent_readpages);
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset, em->bdev,
NULL, 1,
- end_bio_extent_preparewrite);
+ end_bio_extent_preparewrite, 0);
iocount++;
block_start = block_start + iosize;
} else {
}
EXPORT_SYMBOL(extent_prepare_write);
+/*
+ * a helper for releasepage, this tests for areas of the page that
+ * are locked or under IO and drops the related state bits if it is safe
+ * to drop the page.
+ */
+int try_release_extent_state(struct extent_map_tree *map,
+ struct extent_io_tree *tree, struct page *page,
+ gfp_t mask)
+{
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ int ret = 1;
+
+ if (test_range_bit(tree, start, end,
+ EXTENT_IOBITS | EXTENT_ORDERED, 0))
+ ret = 0;
+ else {
+ if ((mask & GFP_NOFS) == GFP_NOFS)
+ mask = GFP_NOFS;
+ clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
+ 1, 1, mask);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(try_release_extent_state);
+
/*
* a helper for releasepage. As long as there are no locked extents
* in the range corresponding to the page, both state records and extent
struct extent_map *em;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- u64 orig_start = start;
- int ret = 1;
+
if ((mask & __GFP_WAIT) &&
page->mapping->host->i_size > 16 * 1024 * 1024) {
u64 len;
spin_unlock(&map->lock);
break;
}
- if (em->start != start) {
+ if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
+ em->start != start) {
spin_unlock(&map->lock);
free_extent_map(em);
break;
free_extent_map(em);
}
}
- if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0))
- ret = 0;
- else {
- if ((mask & GFP_NOFS) == GFP_NOFS)
- mask = GFP_NOFS;
- clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
- 1, 1, mask);
- }
- return ret;
+ return try_release_extent_state(map, tree, page, mask);
}
EXPORT_SYMBOL(try_release_extent_mapping);
return sector;
}
-static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
-{
- if (list_empty(&eb->lru)) {
- extent_buffer_get(eb);
- list_add(&eb->lru, &tree->buffer_lru);
- tree->lru_size++;
- if (tree->lru_size >= BUFFER_LRU_MAX) {
- struct extent_buffer *rm;
- rm = list_entry(tree->buffer_lru.prev,
- struct extent_buffer, lru);
- tree->lru_size--;
- list_del_init(&rm->lru);
- free_extent_buffer(rm);
- }
- } else
- list_move(&eb->lru, &tree->buffer_lru);
- return 0;
-}
-static struct extent_buffer *find_lru(struct extent_io_tree *tree,
- u64 start, unsigned long len)
-{
- struct list_head *lru = &tree->buffer_lru;
- struct list_head *cur = lru->next;
- struct extent_buffer *eb;
-
- if (list_empty(lru))
- return NULL;
-
- do {
- eb = list_entry(cur, struct extent_buffer, lru);
- if (eb->start == start && eb->len == len) {
- extent_buffer_get(eb);
- return eb;
- }
- cur = cur->next;
- } while (cur != lru);
- return NULL;
-}
-
-static inline unsigned long num_extent_pages(u64 start, u64 len)
-{
- return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
- (start >> PAGE_CACHE_SHIFT);
-}
-
static inline struct page *extent_buffer_page(struct extent_buffer *eb,
unsigned long i)
{
return p;
}
+static inline unsigned long num_extent_pages(u64 start, u64 len)
+{
+ return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+ (start >> PAGE_CACHE_SHIFT);
+}
+
static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
u64 start,
unsigned long len,
gfp_t mask)
{
struct extent_buffer *eb = NULL;
-
- spin_lock(&tree->lru_lock);
- eb = find_lru(tree, start, len);
- spin_unlock(&tree->lru_lock);
- if (eb) {
- return eb;
- }
+ unsigned long flags;
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
- INIT_LIST_HEAD(&eb->lru);
eb->start = start;
eb->len = len;
+ mutex_init(&eb->mutex);
+ spin_lock_irqsave(&leak_lock, flags);
+ list_add(&eb->leak_list, &buffers);
+ spin_unlock_irqrestore(&leak_lock, flags);
atomic_set(&eb->refs, 1);
return eb;
static void __free_extent_buffer(struct extent_buffer *eb)
{
+ unsigned long flags;
+ spin_lock_irqsave(&leak_lock, flags);
+ list_del(&eb->leak_list);
+ spin_unlock_irqrestore(&leak_lock, flags);
kmem_cache_free(extent_buffer_cache, eb);
}
unsigned long i;
unsigned long index = start >> PAGE_CACHE_SHIFT;
struct extent_buffer *eb;
+ struct extent_buffer *exists = NULL;
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
+ spin_lock(&tree->buffer_lock);
+ eb = buffer_search(tree, start);
+ if (eb) {
+ atomic_inc(&eb->refs);
+ spin_unlock(&tree->buffer_lock);
+ return eb;
+ }
+ spin_unlock(&tree->buffer_lock);
+
eb = __alloc_extent_buffer(tree, start, len, mask);
- if (!eb || IS_ERR(eb))
+ if (!eb)
return NULL;
- if (eb->flags & EXTENT_BUFFER_FILLED)
- goto lru_add;
-
if (page0) {
eb->first_page = page0;
i = 1;
page_cache_get(page0);
mark_page_accessed(page0);
set_page_extent_mapped(page0);
- WARN_ON(!PageUptodate(page0));
set_page_extent_head(page0, len);
+ uptodate = PageUptodate(page0);
} else {
i = 0;
}
p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
if (!p) {
WARN_ON(1);
- goto fail;
+ goto free_eb;
}
set_page_extent_mapped(p);
mark_page_accessed(p);
eb->flags |= EXTENT_UPTODATE;
eb->flags |= EXTENT_BUFFER_FILLED;
-lru_add:
- spin_lock(&tree->lru_lock);
- add_lru(tree, eb);
- spin_unlock(&tree->lru_lock);
+ spin_lock(&tree->buffer_lock);
+ exists = buffer_tree_insert(tree, start, &eb->rb_node);
+ if (exists) {
+ /* add one reference for the caller */
+ atomic_inc(&exists->refs);
+ spin_unlock(&tree->buffer_lock);
+ goto free_eb;
+ }
+ spin_unlock(&tree->buffer_lock);
+
+ /* add one reference for the tree */
+ atomic_inc(&eb->refs);
return eb;
-fail:
- spin_lock(&tree->lru_lock);
- list_del_init(&eb->lru);
- spin_unlock(&tree->lru_lock);
+free_eb:
if (!atomic_dec_and_test(&eb->refs))
- return NULL;
- for (index = 1; index < i; index++) {
+ return exists;
+ for (index = 1; index < i; index++)
page_cache_release(extent_buffer_page(eb, index));
- }
- if (i > 0)
- page_cache_release(extent_buffer_page(eb, 0));
+ page_cache_release(extent_buffer_page(eb, 0));
__free_extent_buffer(eb);
- return NULL;
+ return exists;
}
EXPORT_SYMBOL(alloc_extent_buffer);
u64 start, unsigned long len,
gfp_t mask)
{
- unsigned long num_pages = num_extent_pages(start, len);
- unsigned long i;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
struct extent_buffer *eb;
- struct page *p;
- struct address_space *mapping = tree->mapping;
- int uptodate = 1;
-
- eb = __alloc_extent_buffer(tree, start, len, mask);
- if (!eb || IS_ERR(eb))
- return NULL;
-
- if (eb->flags & EXTENT_BUFFER_FILLED)
- goto lru_add;
- for (i = 0; i < num_pages; i++, index++) {
- p = find_lock_page(mapping, index);
- if (!p) {
- goto fail;
- }
- set_page_extent_mapped(p);
- mark_page_accessed(p);
-
- if (i == 0) {
- eb->first_page = p;
- set_page_extent_head(p, len);
- } else {
- set_page_private(p, EXTENT_PAGE_PRIVATE);
- }
-
- if (!PageUptodate(p))
- uptodate = 0;
- unlock_page(p);
- }
- if (uptodate)
- eb->flags |= EXTENT_UPTODATE;
- eb->flags |= EXTENT_BUFFER_FILLED;
+ spin_lock(&tree->buffer_lock);
+ eb = buffer_search(tree, start);
+ if (eb)
+ atomic_inc(&eb->refs);
+ spin_unlock(&tree->buffer_lock);
-lru_add:
- spin_lock(&tree->lru_lock);
- add_lru(tree, eb);
- spin_unlock(&tree->lru_lock);
return eb;
-fail:
- spin_lock(&tree->lru_lock);
- list_del_init(&eb->lru);
- spin_unlock(&tree->lru_lock);
- if (!atomic_dec_and_test(&eb->refs))
- return NULL;
- for (index = 1; index < i; index++) {
- page_cache_release(extent_buffer_page(eb, index));
- }
- if (i > 0)
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
- return NULL;
}
EXPORT_SYMBOL(find_extent_buffer);
void free_extent_buffer(struct extent_buffer *eb)
{
- unsigned long i;
- unsigned long num_pages;
-
if (!eb)
return;
if (!atomic_dec_and_test(&eb->refs))
return;
- WARN_ON(!list_empty(&eb->lru));
- num_pages = num_extent_pages(eb->start, eb->len);
-
- for (i = 1; i < num_pages; i++) {
- page_cache_release(extent_buffer_page(eb, i));
- }
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
+ WARN_ON(1);
}
EXPORT_SYMBOL(free_extent_buffer);
}
EXPORT_SYMBOL(set_extent_buffer_dirty);
+int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ unsigned long i;
+ struct page *page;
+ unsigned long num_pages;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ eb->flags &= ~EXTENT_UPTODATE;
+
+ clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ GFP_NOFS);
+ for (i = 0; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ ClearPageUptodate(page);
+ }
+ return 0;
+}
+
int set_extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb)
{
}
EXPORT_SYMBOL(set_extent_buffer_uptodate);
+int extent_range_uptodate(struct extent_io_tree *tree,
+ u64 start, u64 end)
+{
+ struct page *page;
+ int ret;
+ int pg_uptodate = 1;
+ int uptodate;
+ unsigned long index;
+
+ ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
+ if (ret)
+ return 1;
+ while(start <= end) {
+ index = start >> PAGE_CACHE_SHIFT;
+ page = find_get_page(tree->mapping, index);
+ uptodate = PageUptodate(page);
+ page_cache_release(page);
+ if (!uptodate) {
+ pg_uptodate = 0;
+ break;
+ }
+ start += PAGE_CACHE_SIZE;
+ }
+ return pg_uptodate;
+}
+
int extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+ struct extent_buffer *eb)
{
+ int ret = 0;
+ unsigned long num_pages;
+ unsigned long i;
+ struct page *page;
+ int pg_uptodate = 1;
+
if (eb->flags & EXTENT_UPTODATE)
return 1;
- return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+
+ ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
EXTENT_UPTODATE, 1);
+ if (ret)
+ return ret;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ if (!PageUptodate(page)) {
+ pg_uptodate = 0;
+ break;
+ }
+ }
+ return pg_uptodate;
}
EXPORT_SYMBOL(extent_buffer_uptodate);
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb,
u64 start, int wait,
- get_extent_t *get_extent)
+ get_extent_t *get_extent, int mirror_num)
{
unsigned long i;
unsigned long start_i;
struct page *page;
int err;
int ret = 0;
+ int locked_pages = 0;
+ int all_uptodate = 1;
+ int inc_all_pages = 0;
unsigned long num_pages;
struct bio *bio = NULL;
-
if (eb->flags & EXTENT_UPTODATE)
return 0;
- if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
EXTENT_UPTODATE, 1)) {
return 0;
}
num_pages = num_extent_pages(eb->start, eb->len);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
- if (PageUptodate(page)) {
- continue;
- }
if (!wait) {
- if (TestSetPageLocked(page)) {
- continue;
- }
+ if (TestSetPageLocked(page))
+ goto unlock_exit;
} else {
lock_page(page);
}
+ locked_pages++;
if (!PageUptodate(page)) {
+ all_uptodate = 0;
+ }
+ }
+ if (all_uptodate) {
+ if (start_i == 0)
+ eb->flags |= EXTENT_UPTODATE;
+ goto unlock_exit;
+ }
+
+ for (i = start_i; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ if (inc_all_pages)
+ page_cache_get(page);
+ if (!PageUptodate(page)) {
+ if (start_i == 0)
+ inc_all_pages = 1;
+ ClearPageError(page);
err = __extent_read_full_page(tree, page,
- get_extent, &bio);
+ get_extent, &bio,
+ mirror_num);
if (err) {
ret = err;
}
}
if (bio)
- submit_one_bio(READ, bio);
+ submit_one_bio(READ, bio, mirror_num);
if (ret || !wait) {
return ret;
if (!ret)
eb->flags |= EXTENT_UPTODATE;
return ret;
+
+unlock_exit:
+ i = start_i;
+ while(locked_pages > 0) {
+ page = extent_buffer_page(eb, i);
+ i++;
+ unlock_page(page);
+ locked_pages--;
+ }
+ return ret;
}
EXPORT_SYMBOL(read_extent_buffer_pages);
char *dst = (char *)dstv;
size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
- unsigned long num_pages = num_extent_pages(eb->start, eb->len);
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
while(len > 0) {
page = extent_buffer_page(eb, i);
- if (!PageUptodate(page)) {
- printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
- WARN_ON(1);
- }
- WARN_ON(!PageUptodate(page));
cur = min(len, (PAGE_CACHE_SIZE - offset));
kaddr = kmap_atomic(page, KM_USER1);
}
p = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(p));
kaddr = kmap_atomic(p, km);
*token = kaddr;
*map = kaddr + offset;
while(len > 0) {
page = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(page));
cur = min(len, (PAGE_CACHE_SIZE - offset));
}
}
EXPORT_SYMBOL(memmove_extent_buffer);
+
+int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
+{
+ u64 start = page_offset(page);
+ struct extent_buffer *eb;
+ int ret = 1;
+ unsigned long i;
+ unsigned long num_pages;
+
+ spin_lock(&tree->buffer_lock);
+ eb = buffer_search(tree, start);
+ if (!eb)
+ goto out;
+
+ if (atomic_read(&eb->refs) > 1) {
+ ret = 0;
+ goto out;
+ }
+ /* at this point we can safely release the extent buffer */
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ struct page *page = extent_buffer_page(eb, i);
+ page_cache_release(page);
+ }
+ rb_erase(&eb->rb_node, &tree->buffer);
+ __free_extent_buffer(eb);
+out:
+ spin_unlock(&tree->buffer_lock);
+ return ret;
+}
+EXPORT_SYMBOL(try_release_extent_buffer);
+