1 #include <linux/bitops.h>
2 #include <linux/slab.h>
6 #include <linux/pagemap.h>
7 #include <linux/page-flags.h>
8 #include <linux/module.h>
9 #include <linux/spinlock.h>
10 #include <linux/blkdev.h>
11 #include <linux/swap.h>
12 #include <linux/version.h>
13 #include <linux/writeback.h>
14 #include <linux/pagevec.h>
15 #include "extent_io.h"
16 #include "extent_map.h"
18 /* temporary define until extent_map moves out of btrfs */
19 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
20 unsigned long extra_flags,
21 void (*ctor)(void *, struct kmem_cache *,
24 static struct kmem_cache *extent_state_cache;
25 static struct kmem_cache *extent_buffer_cache;
27 static LIST_HEAD(buffers);
28 static LIST_HEAD(states);
30 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
31 #define BUFFER_LRU_MAX 64
37 struct rb_node rb_node;
40 struct extent_page_data {
42 struct extent_io_tree *tree;
43 get_extent_t *get_extent;
46 int __init extent_io_init(void)
48 extent_state_cache = btrfs_cache_create("extent_state",
49 sizeof(struct extent_state), 0,
51 if (!extent_state_cache)
54 extent_buffer_cache = btrfs_cache_create("extent_buffers",
55 sizeof(struct extent_buffer), 0,
57 if (!extent_buffer_cache)
58 goto free_state_cache;
62 kmem_cache_destroy(extent_state_cache);
66 void extent_io_exit(void)
68 struct extent_state *state;
70 while (!list_empty(&states)) {
71 state = list_entry(states.next, struct extent_state, list);
72 printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
73 list_del(&state->list);
74 kmem_cache_free(extent_state_cache, state);
78 if (extent_state_cache)
79 kmem_cache_destroy(extent_state_cache);
80 if (extent_buffer_cache)
81 kmem_cache_destroy(extent_buffer_cache);
84 void extent_io_tree_init(struct extent_io_tree *tree,
85 struct address_space *mapping, gfp_t mask)
87 tree->state.rb_node = NULL;
89 tree->dirty_bytes = 0;
90 rwlock_init(&tree->lock);
91 spin_lock_init(&tree->lru_lock);
92 tree->mapping = mapping;
93 INIT_LIST_HEAD(&tree->buffer_lru);
96 EXPORT_SYMBOL(extent_io_tree_init);
98 void extent_io_tree_empty_lru(struct extent_io_tree *tree)
100 struct extent_buffer *eb;
101 while(!list_empty(&tree->buffer_lru)) {
102 eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
104 list_del_init(&eb->lru);
105 free_extent_buffer(eb);
108 EXPORT_SYMBOL(extent_io_tree_empty_lru);
110 struct extent_state *alloc_extent_state(gfp_t mask)
112 struct extent_state *state;
115 state = kmem_cache_alloc(extent_state_cache, mask);
116 if (!state || IS_ERR(state))
122 spin_lock_irqsave(&state_lock, flags);
123 list_add(&state->list, &states);
124 spin_unlock_irqrestore(&state_lock, flags);
126 atomic_set(&state->refs, 1);
127 init_waitqueue_head(&state->wq);
130 EXPORT_SYMBOL(alloc_extent_state);
132 void free_extent_state(struct extent_state *state)
137 if (atomic_dec_and_test(&state->refs)) {
138 WARN_ON(state->in_tree);
139 spin_lock_irqsave(&state_lock, flags);
140 list_del(&state->list);
141 spin_unlock_irqrestore(&state_lock, flags);
142 kmem_cache_free(extent_state_cache, state);
145 EXPORT_SYMBOL(free_extent_state);
147 static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
148 struct rb_node *node)
150 struct rb_node ** p = &root->rb_node;
151 struct rb_node * parent = NULL;
152 struct tree_entry *entry;
156 entry = rb_entry(parent, struct tree_entry, rb_node);
158 if (offset < entry->start)
160 else if (offset > entry->end)
166 entry = rb_entry(node, struct tree_entry, rb_node);
168 rb_link_node(node, parent, p);
169 rb_insert_color(node, root);
173 static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
174 struct rb_node **prev_ret,
175 struct rb_node **next_ret)
177 struct rb_node * n = root->rb_node;
178 struct rb_node *prev = NULL;
179 struct rb_node *orig_prev = NULL;
180 struct tree_entry *entry;
181 struct tree_entry *prev_entry = NULL;
184 entry = rb_entry(n, struct tree_entry, rb_node);
188 if (offset < entry->start)
190 else if (offset > entry->end)
198 while(prev && offset > prev_entry->end) {
199 prev = rb_next(prev);
200 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
207 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
208 while(prev && offset < prev_entry->start) {
209 prev = rb_prev(prev);
210 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
217 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
219 struct rb_node *prev;
221 ret = __tree_search(root, offset, &prev, NULL);
228 * utility function to look for merge candidates inside a given range.
229 * Any extents with matching state are merged together into a single
230 * extent in the tree. Extents with EXTENT_IO in their state field
231 * are not merged because the end_io handlers need to be able to do
232 * operations on them without sleeping (or doing allocations/splits).
234 * This should be called with the tree lock held.
236 static int merge_state(struct extent_io_tree *tree,
237 struct extent_state *state)
239 struct extent_state *other;
240 struct rb_node *other_node;
242 if (state->state & EXTENT_IOBITS)
245 other_node = rb_prev(&state->rb_node);
247 other = rb_entry(other_node, struct extent_state, rb_node);
248 if (other->end == state->start - 1 &&
249 other->state == state->state) {
250 state->start = other->start;
252 rb_erase(&other->rb_node, &tree->state);
253 free_extent_state(other);
256 other_node = rb_next(&state->rb_node);
258 other = rb_entry(other_node, struct extent_state, rb_node);
259 if (other->start == state->end + 1 &&
260 other->state == state->state) {
261 other->start = state->start;
263 rb_erase(&state->rb_node, &tree->state);
264 free_extent_state(state);
271 * insert an extent_state struct into the tree. 'bits' are set on the
272 * struct before it is inserted.
274 * This may return -EEXIST if the extent is already there, in which case the
275 * state struct is freed.
277 * The tree lock is not taken internally. This is a utility function and
278 * probably isn't what you want to call (see set/clear_extent_bit).
280 static int insert_state(struct extent_io_tree *tree,
281 struct extent_state *state, u64 start, u64 end,
284 struct rb_node *node;
287 printk("end < start %Lu %Lu\n", end, start);
290 if (bits & EXTENT_DIRTY)
291 tree->dirty_bytes += end - start + 1;
292 state->state |= bits;
293 state->start = start;
295 node = tree_insert(&tree->state, end, &state->rb_node);
297 struct extent_state *found;
298 found = rb_entry(node, struct extent_state, rb_node);
299 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
300 free_extent_state(state);
303 merge_state(tree, state);
308 * split a given extent state struct in two, inserting the preallocated
309 * struct 'prealloc' as the newly created second half. 'split' indicates an
310 * offset inside 'orig' where it should be split.
313 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
314 * are two extent state structs in the tree:
315 * prealloc: [orig->start, split - 1]
316 * orig: [ split, orig->end ]
318 * The tree locks are not taken by this function. They need to be held
321 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
322 struct extent_state *prealloc, u64 split)
324 struct rb_node *node;
325 prealloc->start = orig->start;
326 prealloc->end = split - 1;
327 prealloc->state = orig->state;
330 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
332 struct extent_state *found;
333 found = rb_entry(node, struct extent_state, rb_node);
334 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
335 free_extent_state(prealloc);
342 * utility function to clear some bits in an extent state struct.
343 * it will optionally wake up any one waiting on this state (wake == 1), or
344 * forcibly remove the state from the tree (delete == 1).
346 * If no bits are set on the state struct after clearing things, the
347 * struct is freed and removed from the tree
349 static int clear_state_bit(struct extent_io_tree *tree,
350 struct extent_state *state, int bits, int wake,
353 int ret = state->state & bits;
355 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
356 u64 range = state->end - state->start + 1;
357 WARN_ON(range > tree->dirty_bytes);
358 tree->dirty_bytes -= range;
360 state->state &= ~bits;
363 if (delete || state->state == 0) {
364 if (state->in_tree) {
365 rb_erase(&state->rb_node, &tree->state);
367 free_extent_state(state);
372 merge_state(tree, state);
378 * clear some bits on a range in the tree. This may require splitting
379 * or inserting elements in the tree, so the gfp mask is used to
380 * indicate which allocations or sleeping are allowed.
382 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
383 * the given range from the tree regardless of state (ie for truncate).
385 * the range [start, end] is inclusive.
387 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
388 * bits were already set, or zero if none of the bits were already set.
390 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
391 int bits, int wake, int delete, gfp_t mask)
393 struct extent_state *state;
394 struct extent_state *prealloc = NULL;
395 struct rb_node *node;
401 if (!prealloc && (mask & __GFP_WAIT)) {
402 prealloc = alloc_extent_state(mask);
407 write_lock_irqsave(&tree->lock, flags);
409 * this search will find the extents that end after
412 node = tree_search(&tree->state, start);
415 state = rb_entry(node, struct extent_state, rb_node);
416 if (state->start > end)
418 WARN_ON(state->end < start);
421 * | ---- desired range ---- |
423 * | ------------- state -------------- |
425 * We need to split the extent we found, and may flip
426 * bits on second half.
428 * If the extent we found extends past our range, we
429 * just split and search again. It'll get split again
430 * the next time though.
432 * If the extent we found is inside our range, we clear
433 * the desired bit on it.
436 if (state->start < start) {
437 err = split_state(tree, state, prealloc, start);
438 BUG_ON(err == -EEXIST);
442 if (state->end <= end) {
443 start = state->end + 1;
444 set |= clear_state_bit(tree, state, bits,
447 start = state->start;
452 * | ---- desired range ---- |
454 * We need to split the extent, and clear the bit
457 if (state->start <= end && state->end > end) {
458 err = split_state(tree, state, prealloc, end + 1);
459 BUG_ON(err == -EEXIST);
463 set |= clear_state_bit(tree, prealloc, bits,
469 start = state->end + 1;
470 set |= clear_state_bit(tree, state, bits, wake, delete);
474 write_unlock_irqrestore(&tree->lock, flags);
476 free_extent_state(prealloc);
483 write_unlock_irqrestore(&tree->lock, flags);
484 if (mask & __GFP_WAIT)
488 EXPORT_SYMBOL(clear_extent_bit);
490 static int wait_on_state(struct extent_io_tree *tree,
491 struct extent_state *state)
494 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
495 read_unlock_irq(&tree->lock);
497 read_lock_irq(&tree->lock);
498 finish_wait(&state->wq, &wait);
503 * waits for one or more bits to clear on a range in the state tree.
504 * The range [start, end] is inclusive.
505 * The tree lock is taken by this function
507 int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
509 struct extent_state *state;
510 struct rb_node *node;
512 read_lock_irq(&tree->lock);
516 * this search will find all the extents that end after
519 node = tree_search(&tree->state, start);
523 state = rb_entry(node, struct extent_state, rb_node);
525 if (state->start > end)
528 if (state->state & bits) {
529 start = state->start;
530 atomic_inc(&state->refs);
531 wait_on_state(tree, state);
532 free_extent_state(state);
535 start = state->end + 1;
540 if (need_resched()) {
541 read_unlock_irq(&tree->lock);
543 read_lock_irq(&tree->lock);
547 read_unlock_irq(&tree->lock);
550 EXPORT_SYMBOL(wait_extent_bit);
552 static void set_state_bits(struct extent_io_tree *tree,
553 struct extent_state *state,
556 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
557 u64 range = state->end - state->start + 1;
558 tree->dirty_bytes += range;
560 state->state |= bits;
564 * set some bits on a range in the tree. This may require allocations
565 * or sleeping, so the gfp mask is used to indicate what is allowed.
567 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
568 * range already has the desired bits set. The start of the existing
569 * range is returned in failed_start in this case.
571 * [start, end] is inclusive
572 * This takes the tree lock.
574 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
575 int exclusive, u64 *failed_start, gfp_t mask)
577 struct extent_state *state;
578 struct extent_state *prealloc = NULL;
579 struct rb_node *node;
586 if (!prealloc && (mask & __GFP_WAIT)) {
587 prealloc = alloc_extent_state(mask);
592 write_lock_irqsave(&tree->lock, flags);
594 * this search will find all the extents that end after
597 node = tree_search(&tree->state, start);
599 err = insert_state(tree, prealloc, start, end, bits);
601 BUG_ON(err == -EEXIST);
605 state = rb_entry(node, struct extent_state, rb_node);
606 last_start = state->start;
607 last_end = state->end;
610 * | ---- desired range ---- |
613 * Just lock what we found and keep going
615 if (state->start == start && state->end <= end) {
616 set = state->state & bits;
617 if (set && exclusive) {
618 *failed_start = state->start;
622 set_state_bits(tree, state, bits);
623 start = state->end + 1;
624 merge_state(tree, state);
629 * | ---- desired range ---- |
632 * | ------------- state -------------- |
634 * We need to split the extent we found, and may flip bits on
637 * If the extent we found extends past our
638 * range, we just split and search again. It'll get split
639 * again the next time though.
641 * If the extent we found is inside our range, we set the
644 if (state->start < start) {
645 set = state->state & bits;
646 if (exclusive && set) {
647 *failed_start = start;
651 err = split_state(tree, state, prealloc, start);
652 BUG_ON(err == -EEXIST);
656 if (state->end <= end) {
657 set_state_bits(tree, state, bits);
658 start = state->end + 1;
659 merge_state(tree, state);
661 start = state->start;
666 * | ---- desired range ---- |
667 * | state | or | state |
669 * There's a hole, we need to insert something in it and
670 * ignore the extent we found.
672 if (state->start > start) {
674 if (end < last_start)
677 this_end = last_start -1;
678 err = insert_state(tree, prealloc, start, this_end,
681 BUG_ON(err == -EEXIST);
684 start = this_end + 1;
688 * | ---- desired range ---- |
690 * We need to split the extent, and set the bit
693 if (state->start <= end && state->end > end) {
694 set = state->state & bits;
695 if (exclusive && set) {
696 *failed_start = start;
700 err = split_state(tree, state, prealloc, end + 1);
701 BUG_ON(err == -EEXIST);
703 set_state_bits(tree, prealloc, bits);
704 merge_state(tree, prealloc);
712 write_unlock_irqrestore(&tree->lock, flags);
714 free_extent_state(prealloc);
721 write_unlock_irqrestore(&tree->lock, flags);
722 if (mask & __GFP_WAIT)
726 EXPORT_SYMBOL(set_extent_bit);
728 /* wrappers around set/clear extent bit */
729 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
732 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
735 EXPORT_SYMBOL(set_extent_dirty);
737 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
738 int bits, gfp_t mask)
740 return set_extent_bit(tree, start, end, bits, 0, NULL,
743 EXPORT_SYMBOL(set_extent_bits);
745 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
746 int bits, gfp_t mask)
748 return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
750 EXPORT_SYMBOL(clear_extent_bits);
752 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
755 return set_extent_bit(tree, start, end,
756 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
759 EXPORT_SYMBOL(set_extent_delalloc);
761 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
764 return clear_extent_bit(tree, start, end,
765 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
767 EXPORT_SYMBOL(clear_extent_dirty);
769 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
772 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
775 EXPORT_SYMBOL(set_extent_new);
777 int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
780 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
782 EXPORT_SYMBOL(clear_extent_new);
784 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
787 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
790 EXPORT_SYMBOL(set_extent_uptodate);
792 int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
795 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
797 EXPORT_SYMBOL(clear_extent_uptodate);
799 int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
802 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
805 EXPORT_SYMBOL(set_extent_writeback);
807 int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
810 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
812 EXPORT_SYMBOL(clear_extent_writeback);
814 int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
816 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
818 EXPORT_SYMBOL(wait_on_extent_writeback);
821 * locks a range in ascending order, waiting for any locked regions
822 * it hits on the way. [start,end] are inclusive, and this will sleep.
824 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
829 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
830 &failed_start, mask);
831 if (err == -EEXIST && (mask & __GFP_WAIT)) {
832 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
833 start = failed_start;
837 WARN_ON(start > end);
841 EXPORT_SYMBOL(lock_extent);
843 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
846 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
848 EXPORT_SYMBOL(unlock_extent);
851 * helper function to set pages and extents in the tree dirty
853 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
855 unsigned long index = start >> PAGE_CACHE_SHIFT;
856 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
859 while (index <= end_index) {
860 page = find_get_page(tree->mapping, index);
862 __set_page_dirty_nobuffers(page);
863 page_cache_release(page);
866 set_extent_dirty(tree, start, end, GFP_NOFS);
869 EXPORT_SYMBOL(set_range_dirty);
872 * helper function to set both pages and extents in the tree writeback
874 int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
876 unsigned long index = start >> PAGE_CACHE_SHIFT;
877 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
880 while (index <= end_index) {
881 page = find_get_page(tree->mapping, index);
883 set_page_writeback(page);
884 page_cache_release(page);
887 set_extent_writeback(tree, start, end, GFP_NOFS);
890 EXPORT_SYMBOL(set_range_writeback);
892 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
893 u64 *start_ret, u64 *end_ret, int bits)
895 struct rb_node *node;
896 struct extent_state *state;
899 read_lock_irq(&tree->lock);
901 * this search will find all the extents that end after
904 node = tree_search(&tree->state, start);
905 if (!node || IS_ERR(node)) {
910 state = rb_entry(node, struct extent_state, rb_node);
911 if (state->end >= start && (state->state & bits)) {
912 *start_ret = state->start;
913 *end_ret = state->end;
917 node = rb_next(node);
922 read_unlock_irq(&tree->lock);
925 EXPORT_SYMBOL(find_first_extent_bit);
927 u64 find_lock_delalloc_range(struct extent_io_tree *tree,
928 u64 *start, u64 *end, u64 max_bytes)
930 struct rb_node *node;
931 struct extent_state *state;
932 u64 cur_start = *start;
936 write_lock_irq(&tree->lock);
938 * this search will find all the extents that end after
942 node = tree_search(&tree->state, cur_start);
943 if (!node || IS_ERR(node)) {
949 state = rb_entry(node, struct extent_state, rb_node);
950 if (found && state->start != cur_start) {
953 if (!(state->state & EXTENT_DELALLOC)) {
959 struct extent_state *prev_state;
960 struct rb_node *prev_node = node;
962 prev_node = rb_prev(prev_node);
965 prev_state = rb_entry(prev_node,
968 if (!(prev_state->state & EXTENT_DELALLOC))
974 if (state->state & EXTENT_LOCKED) {
976 atomic_inc(&state->refs);
977 prepare_to_wait(&state->wq, &wait,
978 TASK_UNINTERRUPTIBLE);
979 write_unlock_irq(&tree->lock);
981 write_lock_irq(&tree->lock);
982 finish_wait(&state->wq, &wait);
983 free_extent_state(state);
986 state->state |= EXTENT_LOCKED;
988 *start = state->start;
991 cur_start = state->end + 1;
992 node = rb_next(node);
995 total_bytes += state->end - state->start + 1;
996 if (total_bytes >= max_bytes)
1000 write_unlock_irq(&tree->lock);
1004 u64 count_range_bits(struct extent_io_tree *tree,
1005 u64 *start, u64 search_end, u64 max_bytes,
1008 struct rb_node *node;
1009 struct extent_state *state;
1010 u64 cur_start = *start;
1011 u64 total_bytes = 0;
1014 if (search_end <= cur_start) {
1015 printk("search_end %Lu start %Lu\n", search_end, cur_start);
1020 write_lock_irq(&tree->lock);
1021 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1022 total_bytes = tree->dirty_bytes;
1026 * this search will find all the extents that end after
1029 node = tree_search(&tree->state, cur_start);
1030 if (!node || IS_ERR(node)) {
1035 state = rb_entry(node, struct extent_state, rb_node);
1036 if (state->start > search_end)
1038 if (state->end >= cur_start && (state->state & bits)) {
1039 total_bytes += min(search_end, state->end) + 1 -
1040 max(cur_start, state->start);
1041 if (total_bytes >= max_bytes)
1044 *start = state->start;
1048 node = rb_next(node);
1053 write_unlock_irq(&tree->lock);
1057 * helper function to lock both pages and extents in the tree.
1058 * pages must be locked first.
1060 int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1062 unsigned long index = start >> PAGE_CACHE_SHIFT;
1063 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1067 while (index <= end_index) {
1068 page = grab_cache_page(tree->mapping, index);
1074 err = PTR_ERR(page);
1079 lock_extent(tree, start, end, GFP_NOFS);
1084 * we failed above in getting the page at 'index', so we undo here
1085 * up to but not including the page at 'index'
1088 index = start >> PAGE_CACHE_SHIFT;
1089 while (index < end_index) {
1090 page = find_get_page(tree->mapping, index);
1092 page_cache_release(page);
1097 EXPORT_SYMBOL(lock_range);
1100 * helper function to unlock both pages and extents in the tree.
1102 int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1104 unsigned long index = start >> PAGE_CACHE_SHIFT;
1105 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1108 while (index <= end_index) {
1109 page = find_get_page(tree->mapping, index);
1111 page_cache_release(page);
1114 unlock_extent(tree, start, end, GFP_NOFS);
1117 EXPORT_SYMBOL(unlock_range);
1119 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1121 struct rb_node *node;
1122 struct extent_state *state;
1125 write_lock_irq(&tree->lock);
1127 * this search will find all the extents that end after
1130 node = tree_search(&tree->state, start);
1131 if (!node || IS_ERR(node)) {
1135 state = rb_entry(node, struct extent_state, rb_node);
1136 if (state->start != start) {
1140 state->private = private;
1142 write_unlock_irq(&tree->lock);
1146 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1148 struct rb_node *node;
1149 struct extent_state *state;
1152 read_lock_irq(&tree->lock);
1154 * this search will find all the extents that end after
1157 node = tree_search(&tree->state, start);
1158 if (!node || IS_ERR(node)) {
1162 state = rb_entry(node, struct extent_state, rb_node);
1163 if (state->start != start) {
1167 *private = state->private;
1169 read_unlock_irq(&tree->lock);
1174 * searches a range in the state tree for a given mask.
1175 * If 'filled' == 1, this returns 1 only if ever extent in the tree
1176 * has the bits set. Otherwise, 1 is returned if any bit in the
1177 * range is found set.
1179 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1180 int bits, int filled)
1182 struct extent_state *state = NULL;
1183 struct rb_node *node;
1185 unsigned long flags;
1187 read_lock_irqsave(&tree->lock, flags);
1188 node = tree_search(&tree->state, start);
1189 while (node && start <= end) {
1190 state = rb_entry(node, struct extent_state, rb_node);
1192 if (filled && state->start > start) {
1197 if (state->start > end)
1200 if (state->state & bits) {
1204 } else if (filled) {
1208 start = state->end + 1;
1211 node = rb_next(node);
1218 read_unlock_irqrestore(&tree->lock, flags);
1221 EXPORT_SYMBOL(test_range_bit);
1224 * helper function to set a given page up to date if all the
1225 * extents in the tree for that page are up to date
1227 static int check_page_uptodate(struct extent_io_tree *tree,
1230 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1231 u64 end = start + PAGE_CACHE_SIZE - 1;
1232 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1233 SetPageUptodate(page);
1238 * helper function to unlock a page if all the extents in the tree
1239 * for that page are unlocked
1241 static int check_page_locked(struct extent_io_tree *tree,
1244 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1245 u64 end = start + PAGE_CACHE_SIZE - 1;
1246 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1252 * helper function to end page writeback if all the extents
1253 * in the tree for that page are done with writeback
1255 static int check_page_writeback(struct extent_io_tree *tree,
1258 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1259 u64 end = start + PAGE_CACHE_SIZE - 1;
1260 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1261 end_page_writeback(page);
1265 /* lots and lots of room for performance fixes in the end_bio funcs */
1268 * after a writepage IO is done, we need to:
1269 * clear the uptodate bits on error
1270 * clear the writeback bits in the extent tree for this IO
1271 * end_page_writeback if the page has no more pending IO
1273 * Scheduling is not allowed, so the extent state tree is expected
1274 * to have one and only one object corresponding to this IO.
1276 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1277 static void end_bio_extent_writepage(struct bio *bio, int err)
1279 static int end_bio_extent_writepage(struct bio *bio,
1280 unsigned int bytes_done, int err)
1283 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1284 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1285 struct extent_io_tree *tree = bio->bi_private;
1290 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1296 struct page *page = bvec->bv_page;
1297 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1299 end = start + bvec->bv_len - 1;
1301 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1306 if (--bvec >= bio->bi_io_vec)
1307 prefetchw(&bvec->bv_page->flags);
1310 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1311 ClearPageUptodate(page);
1314 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1317 end_page_writeback(page);
1319 check_page_writeback(tree, page);
1320 if (tree->ops && tree->ops->writepage_end_io_hook)
1321 tree->ops->writepage_end_io_hook(page, start, end);
1322 } while (bvec >= bio->bi_io_vec);
1325 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1331 * after a readpage IO is done, we need to:
1332 * clear the uptodate bits on error
1333 * set the uptodate bits if things worked
1334 * set the page up to date if all extents in the tree are uptodate
1335 * clear the lock bit in the extent tree
1336 * unlock the page if there are no other extents locked for it
1338 * Scheduling is not allowed, so the extent state tree is expected
1339 * to have one and only one object corresponding to this IO.
1341 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1342 static void end_bio_extent_readpage(struct bio *bio, int err)
1344 static int end_bio_extent_readpage(struct bio *bio,
1345 unsigned int bytes_done, int err)
1348 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1349 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1350 struct extent_io_tree *tree = bio->bi_private;
1356 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1362 struct page *page = bvec->bv_page;
1363 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1365 end = start + bvec->bv_len - 1;
1367 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1372 if (--bvec >= bio->bi_io_vec)
1373 prefetchw(&bvec->bv_page->flags);
1375 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1376 ret = tree->ops->readpage_end_io_hook(page, start, end);
1381 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1383 SetPageUptodate(page);
1385 check_page_uptodate(tree, page);
1387 ClearPageUptodate(page);
1391 unlock_extent(tree, start, end, GFP_ATOMIC);
1396 check_page_locked(tree, page);
1397 } while (bvec >= bio->bi_io_vec);
1400 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1406 * IO done from prepare_write is pretty simple, we just unlock
1407 * the structs in the extent tree when done, and set the uptodate bits
1410 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1411 static void end_bio_extent_preparewrite(struct bio *bio, int err)
1413 static int end_bio_extent_preparewrite(struct bio *bio,
1414 unsigned int bytes_done, int err)
1417 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1418 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1419 struct extent_io_tree *tree = bio->bi_private;
1423 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1429 struct page *page = bvec->bv_page;
1430 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1432 end = start + bvec->bv_len - 1;
1434 if (--bvec >= bio->bi_io_vec)
1435 prefetchw(&bvec->bv_page->flags);
1438 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1440 ClearPageUptodate(page);
1444 unlock_extent(tree, start, end, GFP_ATOMIC);
1446 } while (bvec >= bio->bi_io_vec);
1449 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1455 extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1460 bio = bio_alloc(gfp_flags, nr_vecs);
1462 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1463 while (!bio && (nr_vecs /= 2))
1464 bio = bio_alloc(gfp_flags, nr_vecs);
1468 bio->bi_bdev = bdev;
1469 bio->bi_sector = first_sector;
1474 static int submit_one_bio(int rw, struct bio *bio)
1481 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
1482 if (maxsector < bio->bi_sector) {
1483 printk("sector too large max %Lu got %llu\n", maxsector,
1484 (unsigned long long)bio->bi_sector);
1488 submit_bio(rw, bio);
1489 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1495 static int submit_extent_page(int rw, struct extent_io_tree *tree,
1496 struct page *page, sector_t sector,
1497 size_t size, unsigned long offset,
1498 struct block_device *bdev,
1499 struct bio **bio_ret,
1500 unsigned long max_pages,
1501 bio_end_io_t end_io_func)
1507 if (bio_ret && *bio_ret) {
1509 if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
1510 bio_add_page(bio, page, size, offset) < size) {
1511 ret = submit_one_bio(rw, bio);
1517 nr = min_t(int, max_pages, bio_get_nr_vecs(bdev));
1518 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1520 printk("failed to allocate bio nr %d\n", nr);
1522 bio_add_page(bio, page, size, offset);
1523 bio->bi_end_io = end_io_func;
1524 bio->bi_private = tree;
1528 ret = submit_one_bio(rw, bio);
1534 void set_page_extent_mapped(struct page *page)
1536 if (!PagePrivate(page)) {
1537 SetPagePrivate(page);
1538 WARN_ON(!page->mapping->a_ops->invalidatepage);
1539 set_page_private(page, EXTENT_PAGE_PRIVATE);
1540 page_cache_get(page);
1544 void set_page_extent_head(struct page *page, unsigned long len)
1546 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1550 * basic readpage implementation. Locked extent state structs are inserted
1551 * into the tree that are removed when the IO is done (by the end_io
1554 static int __extent_read_full_page(struct extent_io_tree *tree,
1556 get_extent_t *get_extent,
1559 struct inode *inode = page->mapping->host;
1560 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1561 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1565 u64 last_byte = i_size_read(inode);
1569 struct extent_map *em;
1570 struct block_device *bdev;
1573 size_t page_offset = 0;
1575 size_t blocksize = inode->i_sb->s_blocksize;
1577 set_page_extent_mapped(page);
1580 lock_extent(tree, start, end, GFP_NOFS);
1582 while (cur <= end) {
1583 if (cur >= last_byte) {
1585 iosize = PAGE_CACHE_SIZE - page_offset;
1586 userpage = kmap_atomic(page, KM_USER0);
1587 memset(userpage + page_offset, 0, iosize);
1588 flush_dcache_page(page);
1589 kunmap_atomic(userpage, KM_USER0);
1590 set_extent_uptodate(tree, cur, cur + iosize - 1,
1592 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1595 em = get_extent(inode, page, page_offset, cur,
1597 if (IS_ERR(em) || !em) {
1599 unlock_extent(tree, cur, end, GFP_NOFS);
1603 extent_offset = cur - em->start;
1604 BUG_ON(extent_map_end(em) <= cur);
1607 iosize = min(extent_map_end(em) - cur, end - cur + 1);
1608 cur_end = min(extent_map_end(em) - 1, end);
1609 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1610 sector = (em->block_start + extent_offset) >> 9;
1612 block_start = em->block_start;
1613 free_extent_map(em);
1616 /* we've found a hole, just zero and go on */
1617 if (block_start == EXTENT_MAP_HOLE) {
1619 userpage = kmap_atomic(page, KM_USER0);
1620 memset(userpage + page_offset, 0, iosize);
1621 flush_dcache_page(page);
1622 kunmap_atomic(userpage, KM_USER0);
1624 set_extent_uptodate(tree, cur, cur + iosize - 1,
1626 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1628 page_offset += iosize;
1631 /* the get_extent function already copied into the page */
1632 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1633 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1635 page_offset += iosize;
1640 if (tree->ops && tree->ops->readpage_io_hook) {
1641 ret = tree->ops->readpage_io_hook(page, cur,
1645 unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
1647 ret = submit_extent_page(READ, tree, page,
1648 sector, iosize, page_offset,
1650 end_bio_extent_readpage);
1655 page_offset += iosize;
1659 if (!PageError(page))
1660 SetPageUptodate(page);
1666 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
1667 get_extent_t *get_extent)
1669 struct bio *bio = NULL;
1672 ret = __extent_read_full_page(tree, page, get_extent, &bio);
1674 submit_one_bio(READ, bio);
1677 EXPORT_SYMBOL(extent_read_full_page);
1680 * the writepage semantics are similar to regular writepage. extent
1681 * records are inserted to lock ranges in the tree, and as dirty areas
1682 * are found, they are marked writeback. Then the lock bits are removed
1683 * and the end_io handler clears the writeback ranges
1685 static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1688 struct inode *inode = page->mapping->host;
1689 struct extent_page_data *epd = data;
1690 struct extent_io_tree *tree = epd->tree;
1691 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1693 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1697 u64 last_byte = i_size_read(inode);
1701 struct extent_map *em;
1702 struct block_device *bdev;
1705 size_t page_offset = 0;
1707 loff_t i_size = i_size_read(inode);
1708 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
1712 WARN_ON(!PageLocked(page));
1713 if (page->index > end_index) {
1714 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1719 if (page->index == end_index) {
1722 size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
1724 userpage = kmap_atomic(page, KM_USER0);
1725 memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
1726 flush_dcache_page(page);
1727 kunmap_atomic(userpage, KM_USER0);
1730 set_page_extent_mapped(page);
1732 delalloc_start = start;
1734 while(delalloc_end < page_end) {
1735 nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
1738 if (nr_delalloc == 0) {
1739 delalloc_start = delalloc_end + 1;
1742 tree->ops->fill_delalloc(inode, delalloc_start,
1744 clear_extent_bit(tree, delalloc_start,
1746 EXTENT_LOCKED | EXTENT_DELALLOC,
1748 delalloc_start = delalloc_end + 1;
1750 lock_extent(tree, start, page_end, GFP_NOFS);
1753 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1754 printk("found delalloc bits after lock_extent\n");
1757 if (last_byte <= start) {
1758 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1762 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1763 blocksize = inode->i_sb->s_blocksize;
1765 while (cur <= end) {
1766 if (cur >= last_byte) {
1767 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
1770 em = epd->get_extent(inode, page, page_offset, cur,
1772 if (IS_ERR(em) || !em) {
1777 extent_offset = cur - em->start;
1778 BUG_ON(extent_map_end(em) <= cur);
1780 iosize = min(extent_map_end(em) - cur, end - cur + 1);
1781 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1782 sector = (em->block_start + extent_offset) >> 9;
1784 block_start = em->block_start;
1785 free_extent_map(em);
1788 if (block_start == EXTENT_MAP_HOLE ||
1789 block_start == EXTENT_MAP_INLINE) {
1790 clear_extent_dirty(tree, cur,
1791 cur + iosize - 1, GFP_NOFS);
1793 page_offset += iosize;
1797 /* leave this out until we have a page_mkwrite call */
1798 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
1801 page_offset += iosize;
1804 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
1805 if (tree->ops && tree->ops->writepage_io_hook) {
1806 ret = tree->ops->writepage_io_hook(page, cur,
1814 unsigned long max_nr = end_index + 1;
1815 set_range_writeback(tree, cur, cur + iosize - 1);
1816 if (!PageWriteback(page)) {
1817 printk("warning page %lu not writeback, "
1818 "cur %llu end %llu\n", page->index,
1819 (unsigned long long)cur,
1820 (unsigned long long)end);
1823 ret = submit_extent_page(WRITE, tree, page, sector,
1824 iosize, page_offset, bdev,
1826 end_bio_extent_writepage);
1831 page_offset += iosize;
1836 /* make sure the mapping tag for page dirty gets cleared */
1837 set_page_writeback(page);
1838 end_page_writeback(page);
1840 unlock_extent(tree, start, page_end, GFP_NOFS);
1845 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1847 /* Taken directly from 2.6.23 for 2.6.18 back port */
1848 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
1852 * write_cache_pages - walk the list of dirty pages of the given address space
1853 * and write all of them.
1854 * @mapping: address space structure to write
1855 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
1856 * @writepage: function called for each page
1857 * @data: data passed to writepage function
1859 * If a page is already under I/O, write_cache_pages() skips it, even
1860 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
1861 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
1862 * and msync() need to guarantee that all the data which was dirty at the time
1863 * the call was made get new I/O started against them. If wbc->sync_mode is
1864 * WB_SYNC_ALL then we were called for data integrity and we must wait for
1865 * existing IO to complete.
1867 static int write_cache_pages(struct address_space *mapping,
1868 struct writeback_control *wbc, writepage_t writepage,
1871 struct backing_dev_info *bdi = mapping->backing_dev_info;
1874 struct pagevec pvec;
1877 pgoff_t end; /* Inclusive */
1879 int range_whole = 0;
1881 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1882 wbc->encountered_congestion = 1;
1886 pagevec_init(&pvec, 0);
1887 if (wbc->range_cyclic) {
1888 index = mapping->writeback_index; /* Start from prev offset */
1891 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1892 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1893 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1898 while (!done && (index <= end) &&
1899 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1900 PAGECACHE_TAG_DIRTY,
1901 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
1905 for (i = 0; i < nr_pages; i++) {
1906 struct page *page = pvec.pages[i];
1909 * At this point we hold neither mapping->tree_lock nor
1910 * lock on the page itself: the page may be truncated or
1911 * invalidated (changing page->mapping to NULL), or even
1912 * swizzled back from swapper_space to tmpfs file
1917 if (unlikely(page->mapping != mapping)) {
1922 if (!wbc->range_cyclic && page->index > end) {
1928 if (wbc->sync_mode != WB_SYNC_NONE)
1929 wait_on_page_writeback(page);
1931 if (PageWriteback(page) ||
1932 !clear_page_dirty_for_io(page)) {
1937 ret = (*writepage)(page, wbc, data);
1939 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
1943 if (ret || (--(wbc->nr_to_write) <= 0))
1945 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1946 wbc->encountered_congestion = 1;
1950 pagevec_release(&pvec);
1953 if (!scanned && !done) {
1955 * We hit the last page and there is more work to be done: wrap
1956 * back to the start of the file
1962 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1963 mapping->writeback_index = index;
1968 int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
1969 get_extent_t *get_extent,
1970 struct writeback_control *wbc)
1973 struct address_space *mapping = page->mapping;
1974 struct extent_page_data epd = {
1977 .get_extent = get_extent,
1979 struct writeback_control wbc_writepages = {
1981 .sync_mode = WB_SYNC_NONE,
1982 .older_than_this = NULL,
1984 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
1985 .range_end = (loff_t)-1,
1989 ret = __extent_writepage(page, wbc, &epd);
1991 write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
1993 submit_one_bio(WRITE, epd.bio);
1997 EXPORT_SYMBOL(extent_write_full_page);
2000 int extent_writepages(struct extent_io_tree *tree,
2001 struct address_space *mapping,
2002 get_extent_t *get_extent,
2003 struct writeback_control *wbc)
2006 struct extent_page_data epd = {
2009 .get_extent = get_extent,
2012 ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
2014 submit_one_bio(WRITE, epd.bio);
2018 EXPORT_SYMBOL(extent_writepages);
2020 int extent_readpages(struct extent_io_tree *tree,
2021 struct address_space *mapping,
2022 struct list_head *pages, unsigned nr_pages,
2023 get_extent_t get_extent)
2025 struct bio *bio = NULL;
2027 struct pagevec pvec;
2029 pagevec_init(&pvec, 0);
2030 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2031 struct page *page = list_entry(pages->prev, struct page, lru);
2033 prefetchw(&page->flags);
2034 list_del(&page->lru);
2036 * what we want to do here is call add_to_page_cache_lru,
2037 * but that isn't exported, so we reproduce it here
2039 if (!add_to_page_cache(page, mapping,
2040 page->index, GFP_KERNEL)) {
2042 /* open coding of lru_cache_add, also not exported */
2043 page_cache_get(page);
2044 if (!pagevec_add(&pvec, page))
2045 __pagevec_lru_add(&pvec);
2046 __extent_read_full_page(tree, page, get_extent, &bio);
2048 page_cache_release(page);
2050 if (pagevec_count(&pvec))
2051 __pagevec_lru_add(&pvec);
2052 BUG_ON(!list_empty(pages));
2054 submit_one_bio(READ, bio);
2057 EXPORT_SYMBOL(extent_readpages);
2060 * basic invalidatepage code, this waits on any locked or writeback
2061 * ranges corresponding to the page, and then deletes any extent state
2062 * records from the tree
2064 int extent_invalidatepage(struct extent_io_tree *tree,
2065 struct page *page, unsigned long offset)
2067 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2068 u64 end = start + PAGE_CACHE_SIZE - 1;
2069 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2071 start += (offset + blocksize -1) & ~(blocksize - 1);
2075 lock_extent(tree, start, end, GFP_NOFS);
2076 wait_on_extent_writeback(tree, start, end);
2077 clear_extent_bit(tree, start, end,
2078 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2082 EXPORT_SYMBOL(extent_invalidatepage);
2085 * simple commit_write call, set_range_dirty is used to mark both
2086 * the pages and the extent records as dirty
2088 int extent_commit_write(struct extent_io_tree *tree,
2089 struct inode *inode, struct page *page,
2090 unsigned from, unsigned to)
2092 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2094 set_page_extent_mapped(page);
2095 set_page_dirty(page);
2097 if (pos > inode->i_size) {
2098 i_size_write(inode, pos);
2099 mark_inode_dirty(inode);
2103 EXPORT_SYMBOL(extent_commit_write);
2105 int extent_prepare_write(struct extent_io_tree *tree,
2106 struct inode *inode, struct page *page,
2107 unsigned from, unsigned to, get_extent_t *get_extent)
2109 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2110 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2112 u64 orig_block_start;
2115 struct extent_map *em;
2116 unsigned blocksize = 1 << inode->i_blkbits;
2117 size_t page_offset = 0;
2118 size_t block_off_start;
2119 size_t block_off_end;
2125 set_page_extent_mapped(page);
2127 block_start = (page_start + from) & ~((u64)blocksize - 1);
2128 block_end = (page_start + to - 1) | (blocksize - 1);
2129 orig_block_start = block_start;
2131 lock_extent(tree, page_start, page_end, GFP_NOFS);
2132 while(block_start <= block_end) {
2133 em = get_extent(inode, page, page_offset, block_start,
2134 block_end - block_start + 1, 1);
2135 if (IS_ERR(em) || !em) {
2138 cur_end = min(block_end, extent_map_end(em) - 1);
2139 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2140 block_off_end = block_off_start + blocksize;
2141 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2143 if (!PageUptodate(page) && isnew &&
2144 (block_off_end > to || block_off_start < from)) {
2147 kaddr = kmap_atomic(page, KM_USER0);
2148 if (block_off_end > to)
2149 memset(kaddr + to, 0, block_off_end - to);
2150 if (block_off_start < from)
2151 memset(kaddr + block_off_start, 0,
2152 from - block_off_start);
2153 flush_dcache_page(page);
2154 kunmap_atomic(kaddr, KM_USER0);
2156 if ((em->block_start != EXTENT_MAP_HOLE &&
2157 em->block_start != EXTENT_MAP_INLINE) &&
2158 !isnew && !PageUptodate(page) &&
2159 (block_off_end > to || block_off_start < from) &&
2160 !test_range_bit(tree, block_start, cur_end,
2161 EXTENT_UPTODATE, 1)) {
2163 u64 extent_offset = block_start - em->start;
2165 sector = (em->block_start + extent_offset) >> 9;
2166 iosize = (cur_end - block_start + blocksize) &
2167 ~((u64)blocksize - 1);
2169 * we've already got the extent locked, but we
2170 * need to split the state such that our end_bio
2171 * handler can clear the lock.
2173 set_extent_bit(tree, block_start,
2174 block_start + iosize - 1,
2175 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
2176 ret = submit_extent_page(READ, tree, page,
2177 sector, iosize, page_offset, em->bdev,
2179 end_bio_extent_preparewrite);
2181 block_start = block_start + iosize;
2183 set_extent_uptodate(tree, block_start, cur_end,
2185 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2186 block_start = cur_end + 1;
2188 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2189 free_extent_map(em);
2192 wait_extent_bit(tree, orig_block_start,
2193 block_end, EXTENT_LOCKED);
2195 check_page_uptodate(tree, page);
2197 /* FIXME, zero out newly allocated blocks on error */
2200 EXPORT_SYMBOL(extent_prepare_write);
2203 * a helper for releasepage. As long as there are no locked extents
2204 * in the range corresponding to the page, both state records and extent
2205 * map records are removed
2207 int try_release_extent_mapping(struct extent_map_tree *map,
2208 struct extent_io_tree *tree, struct page *page)
2210 struct extent_map *em;
2211 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2212 u64 end = start + PAGE_CACHE_SIZE - 1;
2213 u64 orig_start = start;
2216 while (start <= end) {
2217 spin_lock(&map->lock);
2218 em = lookup_extent_mapping(map, start, end);
2219 if (!em || IS_ERR(em)) {
2220 spin_unlock(&map->lock);
2223 if (!test_range_bit(tree, em->start, extent_map_end(em) - 1,
2224 EXTENT_LOCKED, 0)) {
2225 remove_extent_mapping(map, em);
2226 /* once for the rb tree */
2227 free_extent_map(em);
2229 start = extent_map_end(em);
2230 spin_unlock(&map->lock);
2233 free_extent_map(em);
2235 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
2238 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
2242 EXPORT_SYMBOL(try_release_extent_mapping);
2244 sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2245 get_extent_t *get_extent)
2247 struct inode *inode = mapping->host;
2248 u64 start = iblock << inode->i_blkbits;
2249 sector_t sector = 0;
2250 struct extent_map *em;
2252 em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
2253 if (!em || IS_ERR(em))
2256 if (em->block_start == EXTENT_MAP_INLINE ||
2257 em->block_start == EXTENT_MAP_HOLE)
2260 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
2261 printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start);
2263 free_extent_map(em);
2267 static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
2269 if (list_empty(&eb->lru)) {
2270 extent_buffer_get(eb);
2271 list_add(&eb->lru, &tree->buffer_lru);
2273 if (tree->lru_size >= BUFFER_LRU_MAX) {
2274 struct extent_buffer *rm;
2275 rm = list_entry(tree->buffer_lru.prev,
2276 struct extent_buffer, lru);
2278 list_del_init(&rm->lru);
2279 free_extent_buffer(rm);
2282 list_move(&eb->lru, &tree->buffer_lru);
2285 static struct extent_buffer *find_lru(struct extent_io_tree *tree,
2286 u64 start, unsigned long len)
2288 struct list_head *lru = &tree->buffer_lru;
2289 struct list_head *cur = lru->next;
2290 struct extent_buffer *eb;
2292 if (list_empty(lru))
2296 eb = list_entry(cur, struct extent_buffer, lru);
2297 if (eb->start == start && eb->len == len) {
2298 extent_buffer_get(eb);
2302 } while (cur != lru);
2306 static inline unsigned long num_extent_pages(u64 start, u64 len)
2308 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
2309 (start >> PAGE_CACHE_SHIFT);
2312 static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2316 struct address_space *mapping;
2319 return eb->first_page;
2320 i += eb->start >> PAGE_CACHE_SHIFT;
2321 mapping = eb->first_page->mapping;
2322 read_lock_irq(&mapping->tree_lock);
2323 p = radix_tree_lookup(&mapping->page_tree, i);
2324 read_unlock_irq(&mapping->tree_lock);
2328 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2333 struct extent_buffer *eb = NULL;
2335 spin_lock(&tree->lru_lock);
2336 eb = find_lru(tree, start, len);
2337 spin_unlock(&tree->lru_lock);
2342 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
2343 INIT_LIST_HEAD(&eb->lru);
2346 atomic_set(&eb->refs, 1);
2351 static void __free_extent_buffer(struct extent_buffer *eb)
2353 kmem_cache_free(extent_buffer_cache, eb);
2356 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2357 u64 start, unsigned long len,
2361 unsigned long num_pages = num_extent_pages(start, len);
2363 unsigned long index = start >> PAGE_CACHE_SHIFT;
2364 struct extent_buffer *eb;
2366 struct address_space *mapping = tree->mapping;
2369 eb = __alloc_extent_buffer(tree, start, len, mask);
2370 if (!eb || IS_ERR(eb))
2373 if (eb->flags & EXTENT_BUFFER_FILLED)
2377 eb->first_page = page0;
2380 page_cache_get(page0);
2381 mark_page_accessed(page0);
2382 set_page_extent_mapped(page0);
2383 WARN_ON(!PageUptodate(page0));
2384 set_page_extent_head(page0, len);
2388 for (; i < num_pages; i++, index++) {
2389 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
2394 set_page_extent_mapped(p);
2395 mark_page_accessed(p);
2398 set_page_extent_head(p, len);
2400 set_page_private(p, EXTENT_PAGE_PRIVATE);
2402 if (!PageUptodate(p))
2407 eb->flags |= EXTENT_UPTODATE;
2408 eb->flags |= EXTENT_BUFFER_FILLED;
2411 spin_lock(&tree->lru_lock);
2413 spin_unlock(&tree->lru_lock);
2417 spin_lock(&tree->lru_lock);
2418 list_del_init(&eb->lru);
2419 spin_unlock(&tree->lru_lock);
2420 if (!atomic_dec_and_test(&eb->refs))
2422 for (index = 1; index < i; index++) {
2423 page_cache_release(extent_buffer_page(eb, index));
2426 page_cache_release(extent_buffer_page(eb, 0));
2427 __free_extent_buffer(eb);
2430 EXPORT_SYMBOL(alloc_extent_buffer);
2432 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
2433 u64 start, unsigned long len,
2436 unsigned long num_pages = num_extent_pages(start, len);
2438 unsigned long index = start >> PAGE_CACHE_SHIFT;
2439 struct extent_buffer *eb;
2441 struct address_space *mapping = tree->mapping;
2444 eb = __alloc_extent_buffer(tree, start, len, mask);
2445 if (!eb || IS_ERR(eb))
2448 if (eb->flags & EXTENT_BUFFER_FILLED)
2451 for (i = 0; i < num_pages; i++, index++) {
2452 p = find_lock_page(mapping, index);
2456 set_page_extent_mapped(p);
2457 mark_page_accessed(p);
2461 set_page_extent_head(p, len);
2463 set_page_private(p, EXTENT_PAGE_PRIVATE);
2466 if (!PageUptodate(p))
2471 eb->flags |= EXTENT_UPTODATE;
2472 eb->flags |= EXTENT_BUFFER_FILLED;
2475 spin_lock(&tree->lru_lock);
2477 spin_unlock(&tree->lru_lock);
2480 spin_lock(&tree->lru_lock);
2481 list_del_init(&eb->lru);
2482 spin_unlock(&tree->lru_lock);
2483 if (!atomic_dec_and_test(&eb->refs))
2485 for (index = 1; index < i; index++) {
2486 page_cache_release(extent_buffer_page(eb, index));
2489 page_cache_release(extent_buffer_page(eb, 0));
2490 __free_extent_buffer(eb);
2493 EXPORT_SYMBOL(find_extent_buffer);
2495 void free_extent_buffer(struct extent_buffer *eb)
2498 unsigned long num_pages;
2503 if (!atomic_dec_and_test(&eb->refs))
2506 WARN_ON(!list_empty(&eb->lru));
2507 num_pages = num_extent_pages(eb->start, eb->len);
2509 for (i = 1; i < num_pages; i++) {
2510 page_cache_release(extent_buffer_page(eb, i));
2512 page_cache_release(extent_buffer_page(eb, 0));
2513 __free_extent_buffer(eb);
2515 EXPORT_SYMBOL(free_extent_buffer);
2517 int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2518 struct extent_buffer *eb)
2522 unsigned long num_pages;
2525 u64 start = eb->start;
2526 u64 end = start + eb->len - 1;
2528 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
2529 num_pages = num_extent_pages(eb->start, eb->len);
2531 for (i = 0; i < num_pages; i++) {
2532 page = extent_buffer_page(eb, i);
2535 set_page_extent_head(page, eb->len);
2537 set_page_private(page, EXTENT_PAGE_PRIVATE);
2540 * if we're on the last page or the first page and the
2541 * block isn't aligned on a page boundary, do extra checks
2542 * to make sure we don't clean page that is partially dirty
2544 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2545 ((i == num_pages - 1) &&
2546 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2547 start = (u64)page->index << PAGE_CACHE_SHIFT;
2548 end = start + PAGE_CACHE_SIZE - 1;
2549 if (test_range_bit(tree, start, end,
2555 clear_page_dirty_for_io(page);
2556 write_lock_irq(&page->mapping->tree_lock);
2557 if (!PageDirty(page)) {
2558 radix_tree_tag_clear(&page->mapping->page_tree,
2560 PAGECACHE_TAG_DIRTY);
2562 write_unlock_irq(&page->mapping->tree_lock);
2567 EXPORT_SYMBOL(clear_extent_buffer_dirty);
2569 int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
2570 struct extent_buffer *eb)
2572 return wait_on_extent_writeback(tree, eb->start,
2573 eb->start + eb->len - 1);
2575 EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2577 int set_extent_buffer_dirty(struct extent_io_tree *tree,
2578 struct extent_buffer *eb)
2581 unsigned long num_pages;
2583 num_pages = num_extent_pages(eb->start, eb->len);
2584 for (i = 0; i < num_pages; i++) {
2585 struct page *page = extent_buffer_page(eb, i);
2586 /* writepage may need to do something special for the
2587 * first page, we have to make sure page->private is
2588 * properly set. releasepage may drop page->private
2589 * on us if the page isn't already dirty.
2593 set_page_extent_head(page, eb->len);
2594 } else if (PagePrivate(page) &&
2595 page->private != EXTENT_PAGE_PRIVATE) {
2597 set_page_extent_mapped(page);
2600 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
2604 return set_extent_dirty(tree, eb->start,
2605 eb->start + eb->len - 1, GFP_NOFS);
2607 EXPORT_SYMBOL(set_extent_buffer_dirty);
2609 int set_extent_buffer_uptodate(struct extent_io_tree *tree,
2610 struct extent_buffer *eb)
2614 unsigned long num_pages;
2616 num_pages = num_extent_pages(eb->start, eb->len);
2618 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2620 for (i = 0; i < num_pages; i++) {
2621 page = extent_buffer_page(eb, i);
2622 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2623 ((i == num_pages - 1) &&
2624 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2625 check_page_uptodate(tree, page);
2628 SetPageUptodate(page);
2632 EXPORT_SYMBOL(set_extent_buffer_uptodate);
2634 int extent_buffer_uptodate(struct extent_io_tree *tree,
2635 struct extent_buffer *eb)
2637 if (eb->flags & EXTENT_UPTODATE)
2639 return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2640 EXTENT_UPTODATE, 1);
2642 EXPORT_SYMBOL(extent_buffer_uptodate);
2644 int read_extent_buffer_pages(struct extent_io_tree *tree,
2645 struct extent_buffer *eb,
2650 unsigned long start_i;
2654 unsigned long num_pages;
2656 if (eb->flags & EXTENT_UPTODATE)
2659 if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2660 EXTENT_UPTODATE, 1)) {
2665 WARN_ON(start < eb->start);
2666 start_i = (start >> PAGE_CACHE_SHIFT) -
2667 (eb->start >> PAGE_CACHE_SHIFT);
2672 num_pages = num_extent_pages(eb->start, eb->len);
2673 for (i = start_i; i < num_pages; i++) {
2674 page = extent_buffer_page(eb, i);
2675 if (PageUptodate(page)) {
2679 if (TestSetPageLocked(page)) {
2685 if (!PageUptodate(page)) {
2686 err = page->mapping->a_ops->readpage(NULL, page);
2699 for (i = start_i; i < num_pages; i++) {
2700 page = extent_buffer_page(eb, i);
2701 wait_on_page_locked(page);
2702 if (!PageUptodate(page)) {
2707 eb->flags |= EXTENT_UPTODATE;
2710 EXPORT_SYMBOL(read_extent_buffer_pages);
2712 void read_extent_buffer(struct extent_buffer *eb, void *dstv,
2713 unsigned long start,
2720 char *dst = (char *)dstv;
2721 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2722 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2723 unsigned long num_pages = num_extent_pages(eb->start, eb->len);
2725 WARN_ON(start > eb->len);
2726 WARN_ON(start + len > eb->start + eb->len);
2728 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2731 page = extent_buffer_page(eb, i);
2732 if (!PageUptodate(page)) {
2733 printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
2736 WARN_ON(!PageUptodate(page));
2738 cur = min(len, (PAGE_CACHE_SIZE - offset));
2739 kaddr = kmap_atomic(page, KM_USER1);
2740 memcpy(dst, kaddr + offset, cur);
2741 kunmap_atomic(kaddr, KM_USER1);
2749 EXPORT_SYMBOL(read_extent_buffer);
2751 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
2752 unsigned long min_len, char **token, char **map,
2753 unsigned long *map_start,
2754 unsigned long *map_len, int km)
2756 size_t offset = start & (PAGE_CACHE_SIZE - 1);
2759 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2760 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2761 unsigned long end_i = (start_offset + start + min_len - 1) >>
2768 offset = start_offset;
2772 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
2774 if (start + min_len > eb->len) {
2775 printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
2779 p = extent_buffer_page(eb, i);
2780 WARN_ON(!PageUptodate(p));
2781 kaddr = kmap_atomic(p, km);
2783 *map = kaddr + offset;
2784 *map_len = PAGE_CACHE_SIZE - offset;
2787 EXPORT_SYMBOL(map_private_extent_buffer);
2789 int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
2790 unsigned long min_len,
2791 char **token, char **map,
2792 unsigned long *map_start,
2793 unsigned long *map_len, int km)
2797 if (eb->map_token) {
2798 unmap_extent_buffer(eb, eb->map_token, km);
2799 eb->map_token = NULL;
2802 err = map_private_extent_buffer(eb, start, min_len, token, map,
2803 map_start, map_len, km);
2805 eb->map_token = *token;
2807 eb->map_start = *map_start;
2808 eb->map_len = *map_len;
2812 EXPORT_SYMBOL(map_extent_buffer);
2814 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
2816 kunmap_atomic(token, km);
2818 EXPORT_SYMBOL(unmap_extent_buffer);
2820 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
2821 unsigned long start,
2828 char *ptr = (char *)ptrv;
2829 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2830 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2833 WARN_ON(start > eb->len);
2834 WARN_ON(start + len > eb->start + eb->len);
2836 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2839 page = extent_buffer_page(eb, i);
2840 WARN_ON(!PageUptodate(page));
2842 cur = min(len, (PAGE_CACHE_SIZE - offset));
2844 kaddr = kmap_atomic(page, KM_USER0);
2845 ret = memcmp(ptr, kaddr + offset, cur);
2846 kunmap_atomic(kaddr, KM_USER0);
2857 EXPORT_SYMBOL(memcmp_extent_buffer);
2859 void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
2860 unsigned long start, unsigned long len)
2866 char *src = (char *)srcv;
2867 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2868 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2870 WARN_ON(start > eb->len);
2871 WARN_ON(start + len > eb->start + eb->len);
2873 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2876 page = extent_buffer_page(eb, i);
2877 WARN_ON(!PageUptodate(page));
2879 cur = min(len, PAGE_CACHE_SIZE - offset);
2880 kaddr = kmap_atomic(page, KM_USER1);
2881 memcpy(kaddr + offset, src, cur);
2882 kunmap_atomic(kaddr, KM_USER1);
2890 EXPORT_SYMBOL(write_extent_buffer);
2892 void memset_extent_buffer(struct extent_buffer *eb, char c,
2893 unsigned long start, unsigned long len)
2899 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2900 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2902 WARN_ON(start > eb->len);
2903 WARN_ON(start + len > eb->start + eb->len);
2905 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2908 page = extent_buffer_page(eb, i);
2909 WARN_ON(!PageUptodate(page));
2911 cur = min(len, PAGE_CACHE_SIZE - offset);
2912 kaddr = kmap_atomic(page, KM_USER0);
2913 memset(kaddr + offset, c, cur);
2914 kunmap_atomic(kaddr, KM_USER0);
2921 EXPORT_SYMBOL(memset_extent_buffer);
2923 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
2924 unsigned long dst_offset, unsigned long src_offset,
2927 u64 dst_len = dst->len;
2932 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2933 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2935 WARN_ON(src->len != dst_len);
2937 offset = (start_offset + dst_offset) &
2938 ((unsigned long)PAGE_CACHE_SIZE - 1);
2941 page = extent_buffer_page(dst, i);
2942 WARN_ON(!PageUptodate(page));
2944 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
2946 kaddr = kmap_atomic(page, KM_USER0);
2947 read_extent_buffer(src, kaddr + offset, src_offset, cur);
2948 kunmap_atomic(kaddr, KM_USER0);
2956 EXPORT_SYMBOL(copy_extent_buffer);
2958 static void move_pages(struct page *dst_page, struct page *src_page,
2959 unsigned long dst_off, unsigned long src_off,
2962 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2963 if (dst_page == src_page) {
2964 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
2966 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
2967 char *p = dst_kaddr + dst_off + len;
2968 char *s = src_kaddr + src_off + len;
2973 kunmap_atomic(src_kaddr, KM_USER1);
2975 kunmap_atomic(dst_kaddr, KM_USER0);
2978 static void copy_pages(struct page *dst_page, struct page *src_page,
2979 unsigned long dst_off, unsigned long src_off,
2982 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2985 if (dst_page != src_page)
2986 src_kaddr = kmap_atomic(src_page, KM_USER1);
2988 src_kaddr = dst_kaddr;
2990 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
2991 kunmap_atomic(dst_kaddr, KM_USER0);
2992 if (dst_page != src_page)
2993 kunmap_atomic(src_kaddr, KM_USER1);
2996 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2997 unsigned long src_offset, unsigned long len)
3000 size_t dst_off_in_page;
3001 size_t src_off_in_page;
3002 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3003 unsigned long dst_i;
3004 unsigned long src_i;
3006 if (src_offset + len > dst->len) {
3007 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3008 src_offset, len, dst->len);
3011 if (dst_offset + len > dst->len) {
3012 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3013 dst_offset, len, dst->len);
3018 dst_off_in_page = (start_offset + dst_offset) &
3019 ((unsigned long)PAGE_CACHE_SIZE - 1);
3020 src_off_in_page = (start_offset + src_offset) &
3021 ((unsigned long)PAGE_CACHE_SIZE - 1);
3023 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3024 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3026 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3028 cur = min_t(unsigned long, cur,
3029 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
3031 copy_pages(extent_buffer_page(dst, dst_i),
3032 extent_buffer_page(dst, src_i),
3033 dst_off_in_page, src_off_in_page, cur);
3040 EXPORT_SYMBOL(memcpy_extent_buffer);
3042 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3043 unsigned long src_offset, unsigned long len)
3046 size_t dst_off_in_page;
3047 size_t src_off_in_page;
3048 unsigned long dst_end = dst_offset + len - 1;
3049 unsigned long src_end = src_offset + len - 1;
3050 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3051 unsigned long dst_i;
3052 unsigned long src_i;
3054 if (src_offset + len > dst->len) {
3055 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3056 src_offset, len, dst->len);
3059 if (dst_offset + len > dst->len) {
3060 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3061 dst_offset, len, dst->len);
3064 if (dst_offset < src_offset) {
3065 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3069 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
3070 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
3072 dst_off_in_page = (start_offset + dst_end) &
3073 ((unsigned long)PAGE_CACHE_SIZE - 1);
3074 src_off_in_page = (start_offset + src_end) &
3075 ((unsigned long)PAGE_CACHE_SIZE - 1);
3077 cur = min_t(unsigned long, len, src_off_in_page + 1);
3078 cur = min(cur, dst_off_in_page + 1);
3079 move_pages(extent_buffer_page(dst, dst_i),
3080 extent_buffer_page(dst, src_i),
3081 dst_off_in_page - cur + 1,
3082 src_off_in_page - cur + 1, cur);
3089 EXPORT_SYMBOL(memmove_extent_buffer);