#define BTRFS_MAX_LEVEL 8
+/*
+ * files bigger than this get some pre-flushing when they are added
+ * to the ordered operations list. That way we limit the total
+ * work done by the commit
+ */
+#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)
+
/* holds pointers to all of the tree roots */
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
int locks[BTRFS_MAX_LEVEL];
int reada;
/* keep some upper locks as we walk down */
- int keep_locks;
- int skip_locking;
int lowest_level;
/*
* set by btrfs_split_item, tells search_slot to keep all locks
* and to force calls to keep space in the nodes
*/
- int search_for_split;
+ unsigned int search_for_split:1;
+ unsigned int keep_locks:1;
+ unsigned int skip_locking:1;
+ unsigned int leave_spinning:1;
};
/*
struct rw_semaphore groups_sem;
};
-struct btrfs_free_space {
- struct rb_node bytes_index;
- struct rb_node offset_index;
- u64 offset;
- u64 bytes;
+/*
+ * free clusters are used to claim free space in relatively large chunks,
+ * allowing us to do less seeky writes. They are used for all metadata
+ * allocations and data allocations in ssd mode.
+ */
+struct btrfs_free_cluster {
+ spinlock_t lock;
+ spinlock_t refill_lock;
+ struct rb_root root;
+
+ /* largest extent in this cluster */
+ u64 max_size;
+
+ /* first extent starting offset */
+ u64 window_start;
+
+ struct btrfs_block_group_cache *block_group;
+ /*
+ * when a cluster is allocated from a block group, we put the
+ * cluster onto a list in the block group so that it can
+ * be freed before the block group is freed.
+ */
+ struct list_head block_group_list;
};
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
spinlock_t lock;
- struct mutex alloc_mutex;
struct mutex cache_mutex;
u64 pinned;
u64 reserved;
struct btrfs_space_info *space_info;
/* free space cache stuff */
+ spinlock_t tree_lock;
struct rb_root free_space_bytes;
struct rb_root free_space_offset;
/* usage count */
atomic_t count;
+
+ /* List of struct btrfs_free_clusters for this block group.
+ * Today it will only have one thing on it, but that may change
+ */
+ struct list_head cluster_list;
};
struct btrfs_leaf_ref_tree {
struct rb_root block_group_cache_tree;
struct extent_io_tree pinned_extents;
- struct extent_io_tree pending_del;
- struct extent_io_tree extent_ins;
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
u64 generation;
u64 last_trans_committed;
- u64 last_trans_new_blockgroup;
+
+ /*
+ * this is updated to the current trans every time a full commit
+ * is required instead of the faster short fsync log commits
+ */
+ u64 last_trans_log_full_commit;
u64 open_ioctl_trans;
unsigned long mount_opt;
u64 max_extent;
struct mutex tree_log_mutex;
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
- struct mutex extent_ins_mutex;
- struct mutex pinned_mutex;
struct mutex chunk_mutex;
struct mutex drop_mutex;
struct mutex volume_mutex;
struct mutex tree_reloc_mutex;
+
+ /*
+ * this protects the ordered operations list only while we are
+ * processing all of the entries on it. This way we make
+ * sure the commit code doesn't find the list temporarily empty
+ * because another function happens to be doing non-waiting preflush
+ * before jumping into the main commit.
+ */
+ struct mutex ordered_operations_mutex;
+
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
* ordered extents
*/
spinlock_t ordered_extent_lock;
+
+ /*
+ * all of the data=ordered extents pending writeback
+ * these can span multiple transactions and basically include
+ * every dirty data page that isn't from nodatacow
+ */
struct list_head ordered_extents;
+
+ /*
+ * all of the inodes that have delalloc bytes. It is possible for
+ * this list to be empty even when there is still dirty data=ordered
+ * extents waiting to finish IO.
+ */
struct list_head delalloc_inodes;
+ /*
+ * special rename and truncate targets that must be on disk before
+ * we're allowed to commit. This is basically the ext3 style
+ * data=ordered list.
+ */
+ struct list_head ordered_operations;
+
/*
* there is a pool of worker threads for checksumming during writes
* and a pool for checksumming after reads. This is because readers
atomic_t throttle_gen;
u64 total_pinned;
+
+ /* protected by the delalloc lock, used to keep from writing
+ * metadata until there is a nice batch
+ */
+ u64 dirty_metadata_bytes;
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;
- u64 last_alloc;
- u64 last_data_alloc;
+
+ /* data_alloc_cluster is only used in ssd mode */
+ struct btrfs_free_cluster data_alloc_cluster;
+
+ /* all metadata allocations go through this cluster */
+ struct btrfs_free_cluster meta_alloc_cluster;
spinlock_t ref_cache_lock;
u64 total_ref_cache_size;
#define BTRFS_MOUNT_SSD (1 << 3)
#define BTRFS_MOUNT_DEGRADED (1 << 4)
#define BTRFS_MOUNT_COMPRESS (1 << 5)
+#define BTRFS_MOUNT_NOTREELOG (1 << 6)
+#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
}
/* extent-tree.c */
+void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u32 *refs);
int btrfs_update_pinned_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int pin);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid, u64 bytenr);
-int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
u64 root_objectid, u64 ref_generation,
u64 owner_objectid);
int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
+ struct btrfs_root *root, u64 bytenr, u64 num_bytes,
u64 orig_parent, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner_objectid);
int btrfs_init_acl(struct inode *inode, struct inode *dir);
int btrfs_acl_chmod(struct inode *inode);
-/* free-space-cache.c */
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
- u64 bytenr, u64 size);
-int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes);
-int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
- u64 bytenr, u64 size);
-int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes);
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
- *block_group);
-struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
- *block_group, u64 offset,
- u64 bytes);
-void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
- u64 bytes);
-u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
#endif