Commit | Line | Data |
---|---|---|
c1d7c514 | 1 | // SPDX-License-Identifier: GPL-2.0 |
925baedd CM |
2 | /* |
3 | * Copyright (C) 2008 Oracle. All rights reserved. | |
925baedd | 4 | */ |
c1d7c514 | 5 | |
925baedd | 6 | #include <linux/sched.h> |
925baedd CM |
7 | #include <linux/pagemap.h> |
8 | #include <linux/spinlock.h> | |
9 | #include <linux/page-flags.h> | |
4881ee5a | 10 | #include <asm/bug.h> |
602cbe91 | 11 | #include "misc.h" |
925baedd CM |
12 | #include "ctree.h" |
13 | #include "extent_io.h" | |
14 | #include "locking.h" | |
15 | ||
e4e9fd0f | 16 | #ifdef CONFIG_BTRFS_DEBUG |
d6156218 | 17 | static inline void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) |
e4e9fd0f | 18 | { |
f3dc24c5 DS |
19 | WARN_ON(eb->spinning_writers); |
20 | eb->spinning_writers++; | |
e4e9fd0f DS |
21 | } |
22 | ||
d6156218 | 23 | static inline void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) |
e4e9fd0f | 24 | { |
f3dc24c5 DS |
25 | WARN_ON(eb->spinning_writers != 1); |
26 | eb->spinning_writers--; | |
e4e9fd0f DS |
27 | } |
28 | ||
d6156218 | 29 | static inline void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) |
e4e9fd0f | 30 | { |
f3dc24c5 | 31 | WARN_ON(eb->spinning_writers); |
e4e9fd0f DS |
32 | } |
33 | ||
d6156218 | 34 | static inline void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) |
225948de DS |
35 | { |
36 | atomic_inc(&eb->spinning_readers); | |
37 | } | |
38 | ||
d6156218 | 39 | static inline void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) |
225948de DS |
40 | { |
41 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | |
42 | atomic_dec(&eb->spinning_readers); | |
43 | } | |
44 | ||
d6156218 | 45 | static inline void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) |
58a2ddae DS |
46 | { |
47 | atomic_inc(&eb->read_locks); | |
48 | } | |
49 | ||
d6156218 | 50 | static inline void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) |
58a2ddae DS |
51 | { |
52 | atomic_dec(&eb->read_locks); | |
53 | } | |
54 | ||
d6156218 | 55 | static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb) |
58a2ddae DS |
56 | { |
57 | BUG_ON(!atomic_read(&eb->read_locks)); | |
58 | } | |
59 | ||
d6156218 | 60 | static inline void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) |
e3f15388 | 61 | { |
00801ae4 | 62 | eb->write_locks++; |
e3f15388 DS |
63 | } |
64 | ||
d6156218 | 65 | static inline void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) |
e3f15388 | 66 | { |
00801ae4 | 67 | eb->write_locks--; |
e3f15388 DS |
68 | } |
69 | ||
e4e9fd0f DS |
70 | #else |
71 | static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { } | |
72 | static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { } | |
73 | static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { } | |
225948de DS |
74 | static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { } |
75 | static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { } | |
58a2ddae DS |
76 | static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { } |
77 | static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { } | |
78 | static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { } | |
e3f15388 DS |
79 | static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { } |
80 | static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { } | |
e4e9fd0f DS |
81 | #endif |
82 | ||
b95be2d9 DS |
83 | void btrfs_set_lock_blocking_read(struct extent_buffer *eb) |
84 | { | |
31aab402 | 85 | trace_btrfs_set_lock_blocking_read(eb); |
b95be2d9 DS |
86 | /* |
87 | * No lock is required. The lock owner may change if we have a read | |
88 | * lock, but it won't change to or away from us. If we have the write | |
89 | * lock, we are the owner and it'll never change. | |
90 | */ | |
91 | if (eb->lock_nested && current->pid == eb->lock_owner) | |
92 | return; | |
93 | btrfs_assert_tree_read_locked(eb); | |
94 | atomic_inc(&eb->blocking_readers); | |
afd495a8 | 95 | btrfs_assert_spinning_readers_put(eb); |
b95be2d9 DS |
96 | read_unlock(&eb->lock); |
97 | } | |
98 | ||
99 | void btrfs_set_lock_blocking_write(struct extent_buffer *eb) | |
925baedd | 100 | { |
31aab402 | 101 | trace_btrfs_set_lock_blocking_write(eb); |
ea4ebde0 | 102 | /* |
b95be2d9 DS |
103 | * No lock is required. The lock owner may change if we have a read |
104 | * lock, but it won't change to or away from us. If we have the write | |
105 | * lock, we are the owner and it'll never change. | |
ea4ebde0 CM |
106 | */ |
107 | if (eb->lock_nested && current->pid == eb->lock_owner) | |
108 | return; | |
06297d8c | 109 | if (eb->blocking_writers == 0) { |
843ccf9f | 110 | btrfs_assert_spinning_writers_put(eb); |
b95be2d9 | 111 | btrfs_assert_tree_locked(eb); |
a4477988 | 112 | WRITE_ONCE(eb->blocking_writers, 1); |
b95be2d9 | 113 | write_unlock(&eb->lock); |
b4ce94de | 114 | } |
b4ce94de | 115 | } |
f9efa9c7 | 116 | |
b4ce94de | 117 | /* |
bd681513 CM |
118 | * take a spinning read lock. This will wait for any blocking |
119 | * writers | |
b4ce94de | 120 | */ |
bd681513 | 121 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
b4ce94de | 122 | { |
34e73cc9 QW |
123 | u64 start_ns = 0; |
124 | ||
125 | if (trace_btrfs_tree_read_lock_enabled()) | |
126 | start_ns = ktime_get_ns(); | |
bd681513 | 127 | again: |
5b25f70f | 128 | read_lock(&eb->lock); |
06297d8c DS |
129 | BUG_ON(eb->blocking_writers == 0 && |
130 | current->pid == eb->lock_owner); | |
06297d8c | 131 | if (eb->blocking_writers) { |
f5c2a525 DS |
132 | if (current->pid == eb->lock_owner) { |
133 | /* | |
134 | * This extent is already write-locked by our thread. | |
135 | * We allow an additional read lock to be added because | |
136 | * it's for the same thread. btrfs_find_all_roots() | |
137 | * depends on this as it may be called on a partly | |
138 | * (write-)locked tree. | |
139 | */ | |
140 | BUG_ON(eb->lock_nested); | |
141 | eb->lock_nested = true; | |
142 | read_unlock(&eb->lock); | |
143 | trace_btrfs_tree_read_lock(eb, start_ns); | |
144 | return; | |
145 | } | |
bd681513 | 146 | read_unlock(&eb->lock); |
39f9d028 | 147 | wait_event(eb->write_lock_wq, |
a4477988 | 148 | READ_ONCE(eb->blocking_writers) == 0); |
bd681513 | 149 | goto again; |
b4ce94de | 150 | } |
5c9c799a | 151 | btrfs_assert_tree_read_locks_get(eb); |
afd495a8 | 152 | btrfs_assert_spinning_readers_get(eb); |
34e73cc9 | 153 | trace_btrfs_tree_read_lock(eb, start_ns); |
b4ce94de CM |
154 | } |
155 | ||
f82c458a CM |
156 | /* |
157 | * take a spinning read lock. | |
158 | * returns 1 if we get the read lock and 0 if we don't | |
159 | * this won't wait for blocking writers | |
160 | */ | |
161 | int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) | |
162 | { | |
a4477988 | 163 | if (READ_ONCE(eb->blocking_writers)) |
f82c458a CM |
164 | return 0; |
165 | ||
166 | read_lock(&eb->lock); | |
a4477988 DS |
167 | /* Refetch value after lock */ |
168 | if (READ_ONCE(eb->blocking_writers)) { | |
f82c458a CM |
169 | read_unlock(&eb->lock); |
170 | return 0; | |
171 | } | |
5c9c799a | 172 | btrfs_assert_tree_read_locks_get(eb); |
afd495a8 | 173 | btrfs_assert_spinning_readers_get(eb); |
31aab402 | 174 | trace_btrfs_tree_read_lock_atomic(eb); |
f82c458a CM |
175 | return 1; |
176 | } | |
177 | ||
b4ce94de | 178 | /* |
bd681513 CM |
179 | * returns 1 if we get the read lock and 0 if we don't |
180 | * this won't wait for blocking writers | |
b4ce94de | 181 | */ |
bd681513 | 182 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
b4ce94de | 183 | { |
a4477988 | 184 | if (READ_ONCE(eb->blocking_writers)) |
bd681513 | 185 | return 0; |
b4ce94de | 186 | |
ea4ebde0 CM |
187 | if (!read_trylock(&eb->lock)) |
188 | return 0; | |
189 | ||
a4477988 DS |
190 | /* Refetch value after lock */ |
191 | if (READ_ONCE(eb->blocking_writers)) { | |
bd681513 CM |
192 | read_unlock(&eb->lock); |
193 | return 0; | |
b9473439 | 194 | } |
5c9c799a | 195 | btrfs_assert_tree_read_locks_get(eb); |
afd495a8 | 196 | btrfs_assert_spinning_readers_get(eb); |
31aab402 | 197 | trace_btrfs_try_tree_read_lock(eb); |
bd681513 | 198 | return 1; |
b4ce94de CM |
199 | } |
200 | ||
201 | /* | |
bd681513 CM |
202 | * returns 1 if we get the read lock and 0 if we don't |
203 | * this won't wait for blocking writers or readers | |
b4ce94de | 204 | */ |
bd681513 | 205 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
b4ce94de | 206 | { |
a4477988 | 207 | if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers)) |
bd681513 | 208 | return 0; |
ea4ebde0 | 209 | |
f82c458a | 210 | write_lock(&eb->lock); |
a4477988 DS |
211 | /* Refetch value after lock */ |
212 | if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers)) { | |
bd681513 CM |
213 | write_unlock(&eb->lock); |
214 | return 0; | |
215 | } | |
c79adfc0 | 216 | btrfs_assert_tree_write_locks_get(eb); |
843ccf9f | 217 | btrfs_assert_spinning_writers_get(eb); |
5b25f70f | 218 | eb->lock_owner = current->pid; |
31aab402 | 219 | trace_btrfs_try_tree_write_lock(eb); |
b4ce94de CM |
220 | return 1; |
221 | } | |
222 | ||
223 | /* | |
bd681513 CM |
224 | * drop a spinning read lock |
225 | */ | |
226 | void btrfs_tree_read_unlock(struct extent_buffer *eb) | |
227 | { | |
31aab402 | 228 | trace_btrfs_tree_read_unlock(eb); |
ea4ebde0 CM |
229 | /* |
230 | * if we're nested, we have the write lock. No new locking | |
231 | * is needed as long as we are the lock owner. | |
232 | * The write unlock will do a barrier for us, and the lock_nested | |
233 | * field only matters to the lock owner. | |
234 | */ | |
235 | if (eb->lock_nested && current->pid == eb->lock_owner) { | |
ed1b4ed7 | 236 | eb->lock_nested = false; |
ea4ebde0 | 237 | return; |
5b25f70f | 238 | } |
bd681513 | 239 | btrfs_assert_tree_read_locked(eb); |
afd495a8 | 240 | btrfs_assert_spinning_readers_put(eb); |
5c9c799a | 241 | btrfs_assert_tree_read_locks_put(eb); |
bd681513 CM |
242 | read_unlock(&eb->lock); |
243 | } | |
244 | ||
245 | /* | |
246 | * drop a blocking read lock | |
247 | */ | |
248 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | |
249 | { | |
31aab402 | 250 | trace_btrfs_tree_read_unlock_blocking(eb); |
ea4ebde0 CM |
251 | /* |
252 | * if we're nested, we have the write lock. No new locking | |
253 | * is needed as long as we are the lock owner. | |
254 | * The write unlock will do a barrier for us, and the lock_nested | |
255 | * field only matters to the lock owner. | |
256 | */ | |
257 | if (eb->lock_nested && current->pid == eb->lock_owner) { | |
ed1b4ed7 | 258 | eb->lock_nested = false; |
ea4ebde0 | 259 | return; |
5b25f70f | 260 | } |
bd681513 CM |
261 | btrfs_assert_tree_read_locked(eb); |
262 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | |
093258e6 DS |
263 | /* atomic_dec_and_test implies a barrier */ |
264 | if (atomic_dec_and_test(&eb->blocking_readers)) | |
265 | cond_wake_up_nomb(&eb->read_lock_wq); | |
5c9c799a | 266 | btrfs_assert_tree_read_locks_put(eb); |
bd681513 CM |
267 | } |
268 | ||
269 | /* | |
270 | * take a spinning write lock. This will wait for both | |
271 | * blocking readers or writers | |
b4ce94de | 272 | */ |
143bede5 | 273 | void btrfs_tree_lock(struct extent_buffer *eb) |
b4ce94de | 274 | { |
34e73cc9 QW |
275 | u64 start_ns = 0; |
276 | ||
277 | if (trace_btrfs_tree_lock_enabled()) | |
278 | start_ns = ktime_get_ns(); | |
279 | ||
166f66d0 | 280 | WARN_ON(eb->lock_owner == current->pid); |
bd681513 CM |
281 | again: |
282 | wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); | |
a4477988 | 283 | wait_event(eb->write_lock_wq, READ_ONCE(eb->blocking_writers) == 0); |
bd681513 | 284 | write_lock(&eb->lock); |
a4477988 DS |
285 | /* Refetch value after lock */ |
286 | if (atomic_read(&eb->blocking_readers) || | |
287 | READ_ONCE(eb->blocking_writers)) { | |
bd681513 | 288 | write_unlock(&eb->lock); |
bd681513 CM |
289 | goto again; |
290 | } | |
843ccf9f | 291 | btrfs_assert_spinning_writers_get(eb); |
c79adfc0 | 292 | btrfs_assert_tree_write_locks_get(eb); |
5b25f70f | 293 | eb->lock_owner = current->pid; |
34e73cc9 | 294 | trace_btrfs_tree_lock(eb, start_ns); |
925baedd CM |
295 | } |
296 | ||
bd681513 CM |
297 | /* |
298 | * drop a spinning or a blocking write lock. | |
299 | */ | |
143bede5 | 300 | void btrfs_tree_unlock(struct extent_buffer *eb) |
925baedd | 301 | { |
a4477988 DS |
302 | /* |
303 | * This is read both locked and unlocked but always by the same thread | |
304 | * that already owns the lock so we don't need to use READ_ONCE | |
305 | */ | |
06297d8c | 306 | int blockers = eb->blocking_writers; |
bd681513 CM |
307 | |
308 | BUG_ON(blockers > 1); | |
309 | ||
310 | btrfs_assert_tree_locked(eb); | |
31aab402 | 311 | trace_btrfs_tree_unlock(eb); |
ea4ebde0 | 312 | eb->lock_owner = 0; |
c79adfc0 | 313 | btrfs_assert_tree_write_locks_put(eb); |
bd681513 CM |
314 | |
315 | if (blockers) { | |
843ccf9f | 316 | btrfs_assert_no_spinning_writers(eb); |
a4477988 DS |
317 | /* Unlocked write */ |
318 | WRITE_ONCE(eb->blocking_writers, 0); | |
6e7ca09b NB |
319 | /* |
320 | * We need to order modifying blocking_writers above with | |
321 | * actually waking up the sleepers to ensure they see the | |
322 | * updated value of blocking_writers | |
323 | */ | |
324 | cond_wake_up(&eb->write_lock_wq); | |
bd681513 | 325 | } else { |
843ccf9f | 326 | btrfs_assert_spinning_writers_put(eb); |
bd681513 CM |
327 | write_unlock(&eb->lock); |
328 | } | |
925baedd | 329 | } |
ed2b1d36 DS |
330 | |
331 | /* | |
332 | * Set all locked nodes in the path to blocking locks. This should be done | |
333 | * before scheduling | |
334 | */ | |
335 | void btrfs_set_path_blocking(struct btrfs_path *p) | |
336 | { | |
337 | int i; | |
338 | ||
339 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | |
340 | if (!p->nodes[i] || !p->locks[i]) | |
341 | continue; | |
342 | /* | |
343 | * If we currently have a spinning reader or writer lock this | |
344 | * will bump the count of blocking holders and drop the | |
345 | * spinlock. | |
346 | */ | |
347 | if (p->locks[i] == BTRFS_READ_LOCK) { | |
348 | btrfs_set_lock_blocking_read(p->nodes[i]); | |
349 | p->locks[i] = BTRFS_READ_LOCK_BLOCKING; | |
350 | } else if (p->locks[i] == BTRFS_WRITE_LOCK) { | |
351 | btrfs_set_lock_blocking_write(p->nodes[i]); | |
352 | p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; | |
353 | } | |
354 | } | |
355 | } | |
1f95ec01 DS |
356 | |
357 | /* | |
358 | * This releases any locks held in the path starting at level and going all the | |
359 | * way up to the root. | |
360 | * | |
361 | * btrfs_search_slot will keep the lock held on higher nodes in a few corner | |
362 | * cases, such as COW of the block at slot zero in the node. This ignores | |
363 | * those rules, and it should only be called when there are no more updates to | |
364 | * be done higher up in the tree. | |
365 | */ | |
366 | void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |
367 | { | |
368 | int i; | |
369 | ||
370 | if (path->keep_locks) | |
371 | return; | |
372 | ||
373 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | |
374 | if (!path->nodes[i]) | |
375 | continue; | |
376 | if (!path->locks[i]) | |
377 | continue; | |
378 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); | |
379 | path->locks[i] = 0; | |
380 | } | |
381 | } |