| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | /* |
| 3 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 4 | */ |
| 5 | |
| 6 | #ifndef BTRFS_TRANSACTION_H |
| 7 | #define BTRFS_TRANSACTION_H |
| 8 | |
| 9 | #include <linux/atomic.h> |
| 10 | #include <linux/refcount.h> |
| 11 | #include <linux/list.h> |
| 12 | #include <linux/time64.h> |
| 13 | #include <linux/mutex.h> |
| 14 | #include <linux/wait.h> |
| 15 | #include "btrfs_inode.h" |
| 16 | #include "delayed-ref.h" |
| 17 | #include "extent-io-tree.h" |
| 18 | #include "block-rsv.h" |
| 19 | #include "messages.h" |
| 20 | #include "misc.h" |
| 21 | |
| 22 | struct dentry; |
| 23 | struct inode; |
| 24 | struct btrfs_pending_snapshot; |
| 25 | struct btrfs_fs_info; |
| 26 | struct btrfs_root_item; |
| 27 | struct btrfs_root; |
| 28 | struct btrfs_path; |
| 29 | |
| 30 | /* |
| 31 | * Signal that a direct IO write is in progress, to avoid deadlock for sync |
| 32 | * direct IO writes when fsync is called during the direct IO write path. |
| 33 | */ |
| 34 | #define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1) |
| 35 | |
| 36 | /* Radix-tree tag for roots that are part of the transaction. */ |
| 37 | #define BTRFS_ROOT_TRANS_TAG 0 |
| 38 | |
| 39 | enum btrfs_trans_state { |
| 40 | TRANS_STATE_RUNNING, |
| 41 | TRANS_STATE_COMMIT_PREP, |
| 42 | TRANS_STATE_COMMIT_START, |
| 43 | TRANS_STATE_COMMIT_DOING, |
| 44 | TRANS_STATE_UNBLOCKED, |
| 45 | TRANS_STATE_SUPER_COMMITTED, |
| 46 | TRANS_STATE_COMPLETED, |
| 47 | TRANS_STATE_MAX, |
| 48 | }; |
| 49 | |
| 50 | #define BTRFS_TRANS_HAVE_FREE_BGS 0 |
| 51 | #define BTRFS_TRANS_DIRTY_BG_RUN 1 |
| 52 | #define BTRFS_TRANS_CACHE_ENOSPC 2 |
| 53 | |
| 54 | struct btrfs_transaction { |
| 55 | u64 transid; |
| 56 | /* |
| 57 | * total external writers(USERSPACE/START/ATTACH) in this |
| 58 | * transaction, it must be zero before the transaction is |
| 59 | * being committed |
| 60 | */ |
| 61 | atomic_t num_extwriters; |
| 62 | /* |
| 63 | * total writers in this transaction, it must be zero before the |
| 64 | * transaction can end |
| 65 | */ |
| 66 | atomic_t num_writers; |
| 67 | refcount_t use_count; |
| 68 | |
| 69 | unsigned long flags; |
| 70 | |
| 71 | /* Be protected by fs_info->trans_lock when we want to change it. */ |
| 72 | enum btrfs_trans_state state; |
| 73 | int aborted; |
| 74 | struct list_head list; |
| 75 | struct extent_io_tree dirty_pages; |
| 76 | time64_t start_time; |
| 77 | wait_queue_head_t writer_wait; |
| 78 | wait_queue_head_t commit_wait; |
| 79 | struct list_head pending_snapshots; |
| 80 | struct list_head dev_update_list; |
| 81 | struct list_head switch_commits; |
| 82 | struct list_head dirty_bgs; |
| 83 | |
| 84 | /* |
| 85 | * There is no explicit lock which protects io_bgs, rather its |
| 86 | * consistency is implied by the fact that all the sites which modify |
| 87 | * it do so under some form of transaction critical section, namely: |
| 88 | * |
| 89 | * - btrfs_start_dirty_block_groups - This function can only ever be |
| 90 | * run by one of the transaction committers. Refer to |
| 91 | * BTRFS_TRANS_DIRTY_BG_RUN usage in btrfs_commit_transaction |
| 92 | * |
| 93 | * - btrfs_write_dirty_blockgroups - this is called by |
| 94 | * commit_cowonly_roots from transaction critical section |
| 95 | * (TRANS_STATE_COMMIT_DOING) |
| 96 | * |
| 97 | * - btrfs_cleanup_dirty_bgs - called on transaction abort |
| 98 | */ |
| 99 | struct list_head io_bgs; |
| 100 | struct list_head dropped_roots; |
| 101 | struct extent_io_tree pinned_extents; |
| 102 | |
| 103 | /* |
| 104 | * we need to make sure block group deletion doesn't race with |
| 105 | * free space cache writeout. This mutex keeps them from stomping |
| 106 | * on each other |
| 107 | */ |
| 108 | struct mutex cache_write_mutex; |
| 109 | spinlock_t dirty_bgs_lock; |
| 110 | /* Protected by spin lock fs_info->unused_bgs_lock. */ |
| 111 | struct list_head deleted_bgs; |
| 112 | spinlock_t dropped_roots_lock; |
| 113 | struct btrfs_delayed_ref_root delayed_refs; |
| 114 | struct btrfs_fs_info *fs_info; |
| 115 | |
| 116 | /* |
| 117 | * Number of ordered extents the transaction must wait for before |
| 118 | * committing. These are ordered extents started by a fast fsync. |
| 119 | */ |
| 120 | atomic_t pending_ordered; |
| 121 | wait_queue_head_t pending_wait; |
| 122 | }; |
| 123 | |
| 124 | enum { |
| 125 | ENUM_BIT(__TRANS_FREEZABLE), |
| 126 | ENUM_BIT(__TRANS_START), |
| 127 | ENUM_BIT(__TRANS_ATTACH), |
| 128 | ENUM_BIT(__TRANS_JOIN), |
| 129 | ENUM_BIT(__TRANS_JOIN_NOLOCK), |
| 130 | ENUM_BIT(__TRANS_DUMMY), |
| 131 | ENUM_BIT(__TRANS_JOIN_NOSTART), |
| 132 | }; |
| 133 | |
| 134 | #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) |
| 135 | #define TRANS_ATTACH (__TRANS_ATTACH) |
| 136 | #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) |
| 137 | #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) |
| 138 | #define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART) |
| 139 | |
| 140 | #define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH) |
| 141 | |
| 142 | struct btrfs_trans_handle { |
| 143 | u64 transid; |
| 144 | u64 bytes_reserved; |
| 145 | u64 delayed_refs_bytes_reserved; |
| 146 | u64 chunk_bytes_reserved; |
| 147 | unsigned long delayed_ref_updates; |
| 148 | unsigned long delayed_ref_csum_deletions; |
| 149 | struct btrfs_transaction *transaction; |
| 150 | struct btrfs_block_rsv *block_rsv; |
| 151 | struct btrfs_block_rsv *orig_rsv; |
| 152 | /* Set by a task that wants to create a snapshot. */ |
| 153 | struct btrfs_pending_snapshot *pending_snapshot; |
| 154 | refcount_t use_count; |
| 155 | unsigned int type; |
| 156 | /* |
| 157 | * Error code of transaction abort, set outside of locks and must use |
| 158 | * the READ_ONCE/WRITE_ONCE access |
| 159 | */ |
| 160 | short aborted; |
| 161 | bool adding_csums; |
| 162 | bool allocating_chunk; |
| 163 | bool removing_chunk; |
| 164 | bool reloc_reserved; |
| 165 | bool in_fsync; |
| 166 | struct btrfs_fs_info *fs_info; |
| 167 | struct list_head new_bgs; |
| 168 | struct btrfs_block_rsv delayed_rsv; |
| 169 | }; |
| 170 | |
| 171 | /* |
| 172 | * The abort status can be changed between calls and is not protected by locks. |
| 173 | * This accepts btrfs_transaction and btrfs_trans_handle as types. Once it's |
| 174 | * set to a non-zero value it does not change, so the macro should be in checks |
| 175 | * but is not necessary for further reads of the value. |
| 176 | */ |
| 177 | #define TRANS_ABORTED(trans) (unlikely(READ_ONCE((trans)->aborted))) |
| 178 | |
| 179 | struct btrfs_pending_snapshot { |
| 180 | struct dentry *dentry; |
| 181 | struct btrfs_inode *dir; |
| 182 | struct btrfs_root *root; |
| 183 | struct btrfs_root_item *root_item; |
| 184 | struct btrfs_root *snap; |
| 185 | struct btrfs_qgroup_inherit *inherit; |
| 186 | struct btrfs_path *path; |
| 187 | /* block reservation for the operation */ |
| 188 | struct btrfs_block_rsv block_rsv; |
| 189 | /* extra metadata reservation for relocation */ |
| 190 | int error; |
| 191 | /* Preallocated anonymous block device number */ |
| 192 | dev_t anon_dev; |
| 193 | bool readonly; |
| 194 | struct list_head list; |
| 195 | }; |
| 196 | |
| 197 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, |
| 198 | struct btrfs_inode *inode) |
| 199 | { |
| 200 | spin_lock(&inode->lock); |
| 201 | inode->last_trans = trans->transaction->transid; |
| 202 | inode->last_sub_trans = btrfs_get_root_log_transid(inode->root); |
| 203 | inode->last_log_commit = inode->last_sub_trans - 1; |
| 204 | spin_unlock(&inode->lock); |
| 205 | } |
| 206 | |
| 207 | /* |
| 208 | * Make qgroup codes to skip given qgroupid, means the old/new_roots for |
| 209 | * qgroup won't contain the qgroupid in it. |
| 210 | */ |
| 211 | static inline void btrfs_set_skip_qgroup(struct btrfs_trans_handle *trans, |
| 212 | u64 qgroupid) |
| 213 | { |
| 214 | struct btrfs_delayed_ref_root *delayed_refs; |
| 215 | |
| 216 | delayed_refs = &trans->transaction->delayed_refs; |
| 217 | WARN_ON(delayed_refs->qgroup_to_skip); |
| 218 | delayed_refs->qgroup_to_skip = qgroupid; |
| 219 | } |
| 220 | |
| 221 | static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans) |
| 222 | { |
| 223 | struct btrfs_delayed_ref_root *delayed_refs; |
| 224 | |
| 225 | delayed_refs = &trans->transaction->delayed_refs; |
| 226 | WARN_ON(!delayed_refs->qgroup_to_skip); |
| 227 | delayed_refs->qgroup_to_skip = 0; |
| 228 | } |
| 229 | |
| 230 | /* |
| 231 | * We want the transaction abort to print stack trace only for errors where the |
| 232 | * cause could be a bug, eg. due to ENOSPC, and not for common errors that are |
| 233 | * caused by external factors. |
| 234 | */ |
| 235 | static inline bool btrfs_abort_should_print_stack(int error) |
| 236 | { |
| 237 | switch (error) { |
| 238 | case -EIO: |
| 239 | case -EROFS: |
| 240 | case -ENOMEM: |
| 241 | return false; |
| 242 | } |
| 243 | return true; |
| 244 | } |
| 245 | |
| 246 | /* |
| 247 | * Call btrfs_abort_transaction as early as possible when an error condition is |
| 248 | * detected, that way the exact stack trace is reported for some errors. |
| 249 | */ |
| 250 | #define btrfs_abort_transaction(trans, error) \ |
| 251 | do { \ |
| 252 | bool __first = false; \ |
| 253 | /* Report first abort since mount */ \ |
| 254 | if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ |
| 255 | &((trans)->fs_info->fs_state))) { \ |
| 256 | __first = true; \ |
| 257 | if (WARN(btrfs_abort_should_print_stack(error), \ |
| 258 | KERN_ERR \ |
| 259 | "BTRFS: Transaction aborted (error %d)\n", \ |
| 260 | (error))) { \ |
| 261 | /* Stack trace printed. */ \ |
| 262 | } else { \ |
| 263 | btrfs_err((trans)->fs_info, \ |
| 264 | "Transaction aborted (error %d)", \ |
| 265 | (error)); \ |
| 266 | } \ |
| 267 | } \ |
| 268 | __btrfs_abort_transaction((trans), __func__, \ |
| 269 | __LINE__, (error), __first); \ |
| 270 | } while (0) |
| 271 | |
| 272 | int btrfs_end_transaction(struct btrfs_trans_handle *trans); |
| 273 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 274 | unsigned int num_items); |
| 275 | struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( |
| 276 | struct btrfs_root *root, |
| 277 | unsigned int num_items); |
| 278 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
| 279 | struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root); |
| 280 | struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root); |
| 281 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); |
| 282 | struct btrfs_trans_handle *btrfs_attach_transaction_barrier( |
| 283 | struct btrfs_root *root); |
| 284 | int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid); |
| 285 | |
| 286 | void btrfs_add_dead_root(struct btrfs_root *root); |
| 287 | void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info); |
| 288 | int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info); |
| 289 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans); |
| 290 | void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans); |
| 291 | int btrfs_commit_current_transaction(struct btrfs_root *root); |
| 292 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); |
| 293 | bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans); |
| 294 | void btrfs_throttle(struct btrfs_fs_info *fs_info); |
| 295 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
| 296 | struct btrfs_root *root); |
| 297 | int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info, |
| 298 | struct extent_io_tree *dirty_pages, int mark); |
| 299 | int btrfs_wait_tree_log_extents(struct btrfs_root *root, int mark); |
| 300 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); |
| 301 | void btrfs_put_transaction(struct btrfs_transaction *transaction); |
| 302 | void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, |
| 303 | struct btrfs_root *root); |
| 304 | void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans); |
| 305 | void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans, |
| 306 | const char *function, |
| 307 | unsigned int line, int error, bool first_hit); |
| 308 | |
| 309 | int __init btrfs_transaction_init(void); |
| 310 | void __cold btrfs_transaction_exit(void); |
| 311 | |
| 312 | #endif |