Commit | Line | Data |
---|---|---|
c1d7c514 | 1 | // SPDX-License-Identifier: GPL-2.0 |
925baedd CM |
2 | /* |
3 | * Copyright (C) 2008 Oracle. All rights reserved. | |
925baedd | 4 | */ |
c1d7c514 | 5 | |
925baedd | 6 | #include <linux/sched.h> |
925baedd CM |
7 | #include <linux/pagemap.h> |
8 | #include <linux/spinlock.h> | |
9 | #include <linux/page-flags.h> | |
4881ee5a | 10 | #include <asm/bug.h> |
602cbe91 | 11 | #include "misc.h" |
925baedd CM |
12 | #include "ctree.h" |
13 | #include "extent_io.h" | |
14 | #include "locking.h" | |
15 | ||
d4e253bb DS |
16 | /* |
17 | * Extent buffer locking | |
18 | * ===================== | |
19 | * | |
196d59ab JB |
20 | * We use a rw_semaphore for tree locking, and the semantics are exactly the |
21 | * same: | |
d4e253bb DS |
22 | * |
23 | * - reader/writer exclusion | |
24 | * - writer/writer exclusion | |
25 | * - reader/reader sharing | |
d4e253bb | 26 | * - try-lock semantics for readers and writers |
d4e253bb | 27 | * |
4048daed JB |
28 | * The rwsem implementation does opportunistic spinning which reduces number of |
29 | * times the locking task needs to sleep. | |
d4e253bb DS |
30 | */ |
31 | ||
b4ce94de | 32 | /* |
196d59ab JB |
33 | * __btrfs_tree_read_lock - lock extent buffer for read |
34 | * @eb: the eb to be locked | |
35 | * @nest: the nesting level to be used for lockdep | |
4048daed | 36 | * @recurse: unused |
d4e253bb | 37 | * |
196d59ab JB |
38 | * This takes the read lock on the extent buffer, using the specified nesting |
39 | * level for lockdep purposes. | |
b4ce94de | 40 | */ |
fd7ba1c1 JB |
41 | void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest, |
42 | bool recurse) | |
b4ce94de | 43 | { |
34e73cc9 QW |
44 | u64 start_ns = 0; |
45 | ||
46 | if (trace_btrfs_tree_read_lock_enabled()) | |
47 | start_ns = ktime_get_ns(); | |
196d59ab | 48 | |
196d59ab | 49 | down_read_nested(&eb->lock, nest); |
196d59ab | 50 | eb->lock_owner = current->pid; |
34e73cc9 | 51 | trace_btrfs_tree_read_lock(eb, start_ns); |
b4ce94de CM |
52 | } |
53 | ||
51899412 JB |
54 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
55 | { | |
fd7ba1c1 | 56 | __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, false); |
51899412 JB |
57 | } |
58 | ||
b4ce94de | 59 | /* |
196d59ab | 60 | * Try-lock for read. |
d4e253bb DS |
61 | * |
62 | * Retrun 1 if the rwlock has been taken, 0 otherwise | |
b4ce94de | 63 | */ |
bd681513 | 64 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
b4ce94de | 65 | { |
196d59ab JB |
66 | if (down_read_trylock(&eb->lock)) { |
67 | eb->lock_owner = current->pid; | |
68 | trace_btrfs_try_tree_read_lock(eb); | |
69 | return 1; | |
b9473439 | 70 | } |
196d59ab | 71 | return 0; |
b4ce94de CM |
72 | } |
73 | ||
74 | /* | |
196d59ab | 75 | * Try-lock for write. |
d4e253bb DS |
76 | * |
77 | * Retrun 1 if the rwlock has been taken, 0 otherwise | |
b4ce94de | 78 | */ |
bd681513 | 79 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
b4ce94de | 80 | { |
196d59ab JB |
81 | if (down_write_trylock(&eb->lock)) { |
82 | eb->lock_owner = current->pid; | |
83 | trace_btrfs_try_tree_write_lock(eb); | |
84 | return 1; | |
bd681513 | 85 | } |
196d59ab | 86 | return 0; |
b4ce94de CM |
87 | } |
88 | ||
89 | /* | |
4048daed | 90 | * Release read lock. |
bd681513 CM |
91 | */ |
92 | void btrfs_tree_read_unlock(struct extent_buffer *eb) | |
93 | { | |
31aab402 | 94 | trace_btrfs_tree_read_unlock(eb); |
196d59ab JB |
95 | eb->lock_owner = 0; |
96 | up_read(&eb->lock); | |
bd681513 CM |
97 | } |
98 | ||
bd681513 | 99 | /* |
196d59ab JB |
100 | * __btrfs_tree_lock - lock eb for write |
101 | * @eb: the eb to lock | |
102 | * @nest: the nesting to use for the lock | |
d4e253bb | 103 | * |
196d59ab | 104 | * Returns with the eb->lock write locked. |
b4ce94de | 105 | */ |
fd7ba1c1 | 106 | void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) |
78d933c7 | 107 | __acquires(&eb->lock) |
b4ce94de | 108 | { |
34e73cc9 QW |
109 | u64 start_ns = 0; |
110 | ||
111 | if (trace_btrfs_tree_lock_enabled()) | |
112 | start_ns = ktime_get_ns(); | |
113 | ||
196d59ab | 114 | down_write_nested(&eb->lock, nest); |
5b25f70f | 115 | eb->lock_owner = current->pid; |
34e73cc9 | 116 | trace_btrfs_tree_lock(eb, start_ns); |
925baedd CM |
117 | } |
118 | ||
fd7ba1c1 JB |
119 | void btrfs_tree_lock(struct extent_buffer *eb) |
120 | { | |
121 | __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL); | |
122 | } | |
123 | ||
bd681513 | 124 | /* |
196d59ab | 125 | * Release the write lock. |
bd681513 | 126 | */ |
143bede5 | 127 | void btrfs_tree_unlock(struct extent_buffer *eb) |
925baedd | 128 | { |
31aab402 | 129 | trace_btrfs_tree_unlock(eb); |
ea4ebde0 | 130 | eb->lock_owner = 0; |
196d59ab | 131 | up_write(&eb->lock); |
925baedd | 132 | } |
ed2b1d36 | 133 | |
1f95ec01 DS |
134 | /* |
135 | * This releases any locks held in the path starting at level and going all the | |
136 | * way up to the root. | |
137 | * | |
138 | * btrfs_search_slot will keep the lock held on higher nodes in a few corner | |
139 | * cases, such as COW of the block at slot zero in the node. This ignores | |
140 | * those rules, and it should only be called when there are no more updates to | |
141 | * be done higher up in the tree. | |
142 | */ | |
143 | void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |
144 | { | |
145 | int i; | |
146 | ||
147 | if (path->keep_locks) | |
148 | return; | |
149 | ||
150 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | |
151 | if (!path->nodes[i]) | |
152 | continue; | |
153 | if (!path->locks[i]) | |
154 | continue; | |
155 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); | |
156 | path->locks[i] = 0; | |
157 | } | |
158 | } | |
b908c334 DS |
159 | |
160 | /* | |
161 | * Loop around taking references on and locking the root node of the tree until | |
162 | * we end up with a lock on the root node. | |
163 | * | |
164 | * Return: root extent buffer with write lock held | |
165 | */ | |
166 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |
167 | { | |
168 | struct extent_buffer *eb; | |
169 | ||
170 | while (1) { | |
171 | eb = btrfs_root_node(root); | |
172 | btrfs_tree_lock(eb); | |
173 | if (eb == root->node) | |
174 | break; | |
175 | btrfs_tree_unlock(eb); | |
176 | free_extent_buffer(eb); | |
177 | } | |
178 | return eb; | |
179 | } | |
180 | ||
181 | /* | |
182 | * Loop around taking references on and locking the root node of the tree until | |
183 | * we end up with a lock on the root node. | |
184 | * | |
185 | * Return: root extent buffer with read lock held | |
186 | */ | |
1bb96598 | 187 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) |
b908c334 DS |
188 | { |
189 | struct extent_buffer *eb; | |
190 | ||
191 | while (1) { | |
192 | eb = btrfs_root_node(root); | |
1bb96598 | 193 | btrfs_tree_read_lock(eb); |
b908c334 DS |
194 | if (eb == root->node) |
195 | break; | |
196 | btrfs_tree_read_unlock(eb); | |
197 | free_extent_buffer(eb); | |
198 | } | |
199 | return eb; | |
200 | } | |
2992df73 NB |
201 | |
202 | /* | |
203 | * DREW locks | |
204 | * ========== | |
205 | * | |
206 | * DREW stands for double-reader-writer-exclusion lock. It's used in situation | |
207 | * where you want to provide A-B exclusion but not AA or BB. | |
208 | * | |
209 | * Currently implementation gives more priority to reader. If a reader and a | |
210 | * writer both race to acquire their respective sides of the lock the writer | |
211 | * would yield its lock as soon as it detects a concurrent reader. Additionally | |
212 | * if there are pending readers no new writers would be allowed to come in and | |
213 | * acquire the lock. | |
214 | */ | |
215 | ||
216 | int btrfs_drew_lock_init(struct btrfs_drew_lock *lock) | |
217 | { | |
218 | int ret; | |
219 | ||
220 | ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL); | |
221 | if (ret) | |
222 | return ret; | |
223 | ||
224 | atomic_set(&lock->readers, 0); | |
225 | init_waitqueue_head(&lock->pending_readers); | |
226 | init_waitqueue_head(&lock->pending_writers); | |
227 | ||
228 | return 0; | |
229 | } | |
230 | ||
231 | void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock) | |
232 | { | |
233 | percpu_counter_destroy(&lock->writers); | |
234 | } | |
235 | ||
236 | /* Return true if acquisition is successful, false otherwise */ | |
237 | bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock) | |
238 | { | |
239 | if (atomic_read(&lock->readers)) | |
240 | return false; | |
241 | ||
242 | percpu_counter_inc(&lock->writers); | |
243 | ||
244 | /* Ensure writers count is updated before we check for pending readers */ | |
245 | smp_mb(); | |
246 | if (atomic_read(&lock->readers)) { | |
247 | btrfs_drew_write_unlock(lock); | |
248 | return false; | |
249 | } | |
250 | ||
251 | return true; | |
252 | } | |
253 | ||
254 | void btrfs_drew_write_lock(struct btrfs_drew_lock *lock) | |
255 | { | |
256 | while (true) { | |
257 | if (btrfs_drew_try_write_lock(lock)) | |
258 | return; | |
259 | wait_event(lock->pending_writers, !atomic_read(&lock->readers)); | |
260 | } | |
261 | } | |
262 | ||
263 | void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock) | |
264 | { | |
265 | percpu_counter_dec(&lock->writers); | |
266 | cond_wake_up(&lock->pending_readers); | |
267 | } | |
268 | ||
269 | void btrfs_drew_read_lock(struct btrfs_drew_lock *lock) | |
270 | { | |
271 | atomic_inc(&lock->readers); | |
272 | ||
273 | /* | |
274 | * Ensure the pending reader count is perceieved BEFORE this reader | |
275 | * goes to sleep in case of active writers. This guarantees new writers | |
276 | * won't be allowed and that the current reader will be woken up when | |
277 | * the last active writer finishes its jobs. | |
278 | */ | |
279 | smp_mb__after_atomic(); | |
280 | ||
281 | wait_event(lock->pending_readers, | |
282 | percpu_counter_sum(&lock->writers) == 0); | |
283 | } | |
284 | ||
285 | void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock) | |
286 | { | |
287 | /* | |
288 | * atomic_dec_and_test implies a full barrier, so woken up writers | |
289 | * are guaranteed to see the decrement | |
290 | */ | |
291 | if (atomic_dec_and_test(&lock->readers)) | |
292 | wake_up(&lock->pending_writers); | |
293 | } |