static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
+static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
+
/*
* end_io_wq structs are used to do processing in task context when an IO is
* complete. This is used during reads to verify checksums, and it is used
struct extent_map *em;
int ret;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em) {
em->bdev =
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
goto out;
}
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
em = alloc_extent_map(GFP_NOFS);
if (!em) {
em->block_start = 0;
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
u64 failed_start = em->start;
free_extent_map(em);
em = NULL;
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret)
em = ERR_PTR(ret);
offset = page_offset(page);
em_tree = &BTRFS_I(inode)->extent_tree;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em) {
__unplug_io_fn(bdi, page);
return;
free_extent_map(em);
}
+/*
+ * If this fails, caller must call bdi_destroy() to get rid of the
+ * bdi again.
+ */
static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
{
- bdi_init(bdi);
+ int err;
+
+ bdi->capabilities = BDI_CAP_MAP_COPY;
+ err = bdi_init(bdi);
+ if (err)
+ return err;
+
+ err = bdi_register(bdi, NULL, "btrfs-%d",
+ atomic_inc_return(&btrfs_bdi_num));
+ if (err)
+ return err;
+
bdi->ra_pages = default_backing_dev_info.ra_pages;
- bdi->state = 0;
- bdi->capabilities = default_backing_dev_info.capabilities;
bdi->unplug_io_fn = btrfs_unplug_io_fn;
bdi->unplug_io_data = info;
bdi->congested_fn = btrfs_congested_fn;
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
- setup_bdi(fs_info, &fs_info->bdi);
+ if (setup_bdi(fs_info, &fs_info->bdi))
+ goto fail_bdi;
fs_info->btree_inode = new_inode(sb);
fs_info->btree_inode->i_ino = 1;
fs_info->btree_inode->i_nlink = 1;
err = -EINVAL;
goto fail_iput;
}
-
+ printk("thread pool is %d\n", fs_info->thread_pool_size);
/*
* we need to start all the end_io workers up front because the
* queue work function gets called at interrupt time, and so it
fs_info->endio_workers.idle_thresh = 4;
fs_info->endio_meta_workers.idle_thresh = 4;
- fs_info->endio_write_workers.idle_thresh = 64;
- fs_info->endio_meta_write_workers.idle_thresh = 64;
+ fs_info->endio_write_workers.idle_thresh = 2;
+ fs_info->endio_meta_write_workers.idle_thresh = 2;
+
+ fs_info->endio_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_workers.atomic_worker_start = 1;
+ fs_info->endio_write_workers.atomic_worker_start = 1;
+ fs_info->endio_meta_write_workers.atomic_worker_start = 1;
btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->delalloc_workers, 1);
btrfs_start_workers(&fs_info->fixup_workers, 1);
- btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_meta_write_workers,
- fs_info->thread_pool_size);
- btrfs_start_workers(&fs_info->endio_write_workers,
- fs_info->thread_pool_size);
+ btrfs_start_workers(&fs_info->endio_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+ btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_write_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
+fail_bdi:
bdi_destroy(&fs_info->bdi);
-
fail:
kfree(extent_root);
kfree(tree_root);
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
-#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
}
ret = btrfs_drop_extents(trans, root, inode, start,
- aligned_end, aligned_end, start, &hint_byte);
+ aligned_end, aligned_end, start,
+ &hint_byte, 1);
BUG_ON(ret);
if (isize > actual_end)
inline_len, compressed_size,
compressed_pages);
BUG_ON(ret);
- btrfs_drop_extent_cache(inode, start, aligned_end, 0);
+ btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
}
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
start, end, NULL, 1, 0,
- 0, 1, 1, 1);
+ 0, 1, 1, 1, 0);
ret = 0;
goto free_pages_out;
}
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- NULL, 1, 1, 0, 1, 1, 0);
+ NULL, 1, 1, 0, 1, 1, 0, 0);
ret = btrfs_submit_compressed_write(inode,
async_extent->start,
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
start, end, NULL, 1, 1,
- 1, 1, 1, 1);
+ 1, 1, 1, 1, 0);
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
/* we're not doing compressed IO, don't unlock the first
* page (which the caller expects to stay locked), don't
* clear any dirty bits and don't set any writeback bits
+ *
+ * Do set the Private2 bit so we know this page was properly
+ * setup for writepage
*/
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
start, start + ram_size - 1,
locked_page, unlock, 1,
- 1, 0, 0, 0);
+ 1, 0, 0, 0, 1);
disk_num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
int limit = 10 * 1024 * 1042;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
- EXTENT_DELALLOC, 1, 0, GFP_NOFS);
+ EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
while (start < end) {
async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
async_cow->inode = inode;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
while (1) {
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
cur_offset, cur_offset + num_bytes - 1,
- locked_page, 1, 1, 1, 0, 0, 0);
+ locked_page, 1, 1, 1, 0, 0, 0, 1);
cur_offset = extent_end;
if (cur_offset > end)
break;
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
/* already ordered? We're done */
- if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
- EXTENT_ORDERED, 0)) {
+ if (PagePrivate2(page))
goto out;
- }
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
struct inode *inode = page->mapping->host;
struct btrfs_writepage_fixup *fixup;
struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
- ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
- EXTENT_ORDERED, 0);
- if (ret)
+ /* this page is properly in the ordered list */
+ if (TestClearPagePrivate2(page))
return 0;
if (PageChecked(page))
BUG_ON(!path);
path->leave_spinning = 1;
+
+ /*
+ * we may be replacing one extent in the tree with another.
+ * The new extent is pinned in the extent map, and we don't want
+ * to drop it from the cache until it is completely in the btree.
+ *
+ * So, tell btrfs_drop_extents to leave this extent in the cache.
+ * the caller is expected to unpin it and allow it to be merged
+ * with the others.
+ */
ret = btrfs_drop_extents(trans, root, inode, file_pos,
file_pos + num_bytes, locked_end,
- file_pos, &hint);
+ file_pos, &hint, 0);
BUG_ON(ret);
ins.objectid = inode->i_ino;
btrfs_mark_buffer_dirty(leaf);
inode_add_bytes(inode, num_bytes);
- btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
ordered_extent->len,
compressed, 0, 0,
BTRFS_FILE_EXTENT_REG);
+ unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+ ordered_extent->file_offset,
+ ordered_extent->len);
BUG_ON(ret);
}
unlock_extent(io_tree, ordered_extent->file_offset,
static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate)
{
+ ClearPagePrivate2(page);
return btrfs_finish_ordered_io(page->mapping->host, start, end);
}
failrec->last_mirror = 0;
failrec->bio_flags = 0;
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
if (em->start > start || em->start + em->len < start) {
free_extent_map(em);
em = NULL;
}
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (!em || IS_ERR(em)) {
kfree(failrec);
return 0;
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
- test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
+ test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
GFP_NOFS);
return 0;
* any xattrs or acls
*/
maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
- if (!maybe_acls) {
- BTRFS_I(inode)->i_acl = NULL;
- BTRFS_I(inode)->i_default_acl = NULL;
- }
+ if (!maybe_acls)
+ cache_no_acl(inode);
BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
alloc_group_block, 0);
btrfs_update_inode(trans, root, dir);
btrfs_drop_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
- dir->i_sb->s_dirt = 1;
out:
return ret;
}
pending_del_nr);
}
btrfs_free_path(path);
- inode->i_sb->s_dirt = 1;
return ret;
}
cur_offset,
cur_offset + hole_size,
block_end,
- cur_offset, &hint_byte);
+ cur_offset, &hint_byte, 1);
if (err)
break;
err = btrfs_insert_file_extent(trans, root,
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_inode *entry;
- struct rb_node **p = &root->inode_tree.rb_node;
- struct rb_node *parent = NULL;
+ struct rb_node **p;
+ struct rb_node *parent;
+
+again:
+ p = &root->inode_tree.rb_node;
+ parent = NULL;
spin_lock(&root->inode_lock);
while (*p) {
entry = rb_entry(parent, struct btrfs_inode, rb_node);
if (inode->i_ino < entry->vfs_inode.i_ino)
- p = &(*p)->rb_left;
+ p = &parent->rb_left;
else if (inode->i_ino > entry->vfs_inode.i_ino)
- p = &(*p)->rb_right;
+ p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
(I_WILL_FREE | I_FREEING | I_CLEAR)));
- break;
+ rb_erase(parent, &root->inode_tree);
+ RB_CLEAR_NODE(parent);
+ spin_unlock(&root->inode_lock);
+ goto again;
}
}
rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ spin_lock(&root->inode_lock);
if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
- spin_lock(&root->inode_lock);
rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
- spin_unlock(&root->inode_lock);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
}
+ spin_unlock(&root->inode_lock);
}
static noinline void init_btrfs_i(struct inode *inode)
{
struct btrfs_inode *bi = BTRFS_I(inode);
- bi->i_acl = BTRFS_ACL_NOT_CACHED;
- bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
-
bi->generation = 0;
bi->sequence = 0;
bi->last_trans = 0;
init_special_inode(inode, inode->i_mode, rdev);
btrfs_update_inode(trans, root, inode);
}
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
out_unlock:
inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
out_unlock:
if (err)
drop_inode = 1;
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, dir);
err = btrfs_update_inode(trans, root, inode);
d_instantiate(dentry, inode);
drop_on_err = 0;
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
int compressed;
again:
- spin_lock(&em_tree->lock);
+ read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em)
em->bdev = root->fs_info->fs_devices->latest_bdev;
- spin_unlock(&em_tree->lock);
+ read_unlock(&em_tree->lock);
if (em) {
if (em->start > start || em->start + em->len <= start)
map = kmap(page);
read_extent_buffer(leaf, map + pg_offset, ptr,
copy_size);
+ if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+ memset(map + pg_offset + copy_size, 0,
+ PAGE_CACHE_SIZE - pg_offset -
+ copy_size);
+ }
kunmap(page);
}
flush_dcache_page(page);
}
err = 0;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
err = 0;
}
}
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
out:
if (path)
btrfs_free_path(path);
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+
+ /*
+ * we have the page locked, so new writeback can't start,
+ * and the dirty bit won't be cleared while we are here.
+ *
+ * Wait for IO on this page so that we can safely clear
+ * the PagePrivate2 bit and do ordered accounting
+ */
wait_on_page_writeback(page);
+
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (offset) {
btrfs_releasepage(page, GFP_NOFS);
return;
}
-
lock_extent(tree, page_start, page_end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(page->mapping->host,
page_offset(page));
*/
clear_extent_bit(tree, page_start, page_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_LOCKED, 1, 0, GFP_NOFS);
- btrfs_finish_ordered_io(page->mapping->host,
- page_start, page_end);
+ EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+ /*
+ * whoever cleared the private bit is responsible
+ * for the finish_ordered_io
+ */
+ if (TestClearPagePrivate2(page)) {
+ btrfs_finish_ordered_io(page->mapping->host,
+ page_start, page_end);
+ }
btrfs_put_ordered_extent(ordered);
lock_extent(tree, page_start, page_end, GFP_NOFS);
}
clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_ORDERED,
- 1, 1, GFP_NOFS);
+ EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+ 1, 1, NULL, GFP_NOFS);
__btrfs_releasepage(page, GFP_NOFS);
ClearPageChecked(page);
}
ClearPageChecked(page);
set_page_dirty(page);
+ SetPageUptodate(page);
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
+ if (!ret)
+ return VM_FAULT_LOCKED;
unlock_page(page);
out:
return ret;
ei->last_trans = 0;
ei->logged_trans = 0;
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
- ei->i_acl = BTRFS_ACL_NOT_CACHED;
- ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
INIT_LIST_HEAD(&ei->i_orphan);
INIT_LIST_HEAD(&ei->ordered_operations);
return &ei->vfs_inode;
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
- if (BTRFS_I(inode)->i_acl &&
- BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
- posix_acl_release(BTRFS_I(inode)->i_acl);
- if (BTRFS_I(inode)->i_default_acl &&
- BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
- posix_acl_release(BTRFS_I(inode)->i_default_acl);
-
/*
* Make sure we're properly removed from the ordered operation
* lists.
inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
if (drop_inode)
0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
BUG_ON(ret);
+ btrfs_drop_extent_cache(inode, cur_offset,
+ cur_offset + ins.offset -1, 0);
num_bytes -= ins.offset;
cur_offset += ins.offset;
alloc_hint = ins.objectid + ins.offset;