Commit | Line | Data |
---|---|---|
c1d7c514 | 1 | // SPDX-License-Identifier: GPL-2.0 |
925baedd CM |
2 | /* |
3 | * Copyright (C) 2008 Oracle. All rights reserved. | |
925baedd | 4 | */ |
c1d7c514 | 5 | |
925baedd | 6 | #include <linux/sched.h> |
925baedd CM |
7 | #include <linux/pagemap.h> |
8 | #include <linux/spinlock.h> | |
9 | #include <linux/page-flags.h> | |
4881ee5a | 10 | #include <asm/bug.h> |
602cbe91 | 11 | #include "misc.h" |
925baedd CM |
12 | #include "ctree.h" |
13 | #include "extent_io.h" | |
14 | #include "locking.h" | |
15 | ||
d4e253bb DS |
16 | /* |
17 | * Extent buffer locking | |
18 | * ===================== | |
19 | * | |
196d59ab JB |
20 | * We use a rw_semaphore for tree locking, and the semantics are exactly the |
21 | * same: | |
d4e253bb DS |
22 | * |
23 | * - reader/writer exclusion | |
24 | * - writer/writer exclusion | |
25 | * - reader/reader sharing | |
d4e253bb | 26 | * - try-lock semantics for readers and writers |
d4e253bb | 27 | * |
4048daed JB |
28 | * The rwsem implementation does opportunistic spinning which reduces number of |
29 | * times the locking task needs to sleep. | |
d4e253bb DS |
30 | */ |
31 | ||
b4ce94de | 32 | /* |
196d59ab JB |
33 | * __btrfs_tree_read_lock - lock extent buffer for read |
34 | * @eb: the eb to be locked | |
35 | * @nest: the nesting level to be used for lockdep | |
d4e253bb | 36 | * |
196d59ab JB |
37 | * This takes the read lock on the extent buffer, using the specified nesting |
38 | * level for lockdep purposes. | |
b4ce94de | 39 | */ |
0ecae6ff | 40 | void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) |
b4ce94de | 41 | { |
34e73cc9 QW |
42 | u64 start_ns = 0; |
43 | ||
44 | if (trace_btrfs_tree_read_lock_enabled()) | |
45 | start_ns = ktime_get_ns(); | |
196d59ab | 46 | |
196d59ab | 47 | down_read_nested(&eb->lock, nest); |
196d59ab | 48 | eb->lock_owner = current->pid; |
34e73cc9 | 49 | trace_btrfs_tree_read_lock(eb, start_ns); |
b4ce94de CM |
50 | } |
51 | ||
51899412 JB |
52 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
53 | { | |
0ecae6ff | 54 | __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL); |
51899412 JB |
55 | } |
56 | ||
b4ce94de | 57 | /* |
196d59ab | 58 | * Try-lock for read. |
d4e253bb DS |
59 | * |
60 | * Retrun 1 if the rwlock has been taken, 0 otherwise | |
b4ce94de | 61 | */ |
bd681513 | 62 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
b4ce94de | 63 | { |
196d59ab JB |
64 | if (down_read_trylock(&eb->lock)) { |
65 | eb->lock_owner = current->pid; | |
66 | trace_btrfs_try_tree_read_lock(eb); | |
67 | return 1; | |
b9473439 | 68 | } |
196d59ab | 69 | return 0; |
b4ce94de CM |
70 | } |
71 | ||
72 | /* | |
196d59ab | 73 | * Try-lock for write. |
d4e253bb DS |
74 | * |
75 | * Retrun 1 if the rwlock has been taken, 0 otherwise | |
b4ce94de | 76 | */ |
bd681513 | 77 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
b4ce94de | 78 | { |
196d59ab JB |
79 | if (down_write_trylock(&eb->lock)) { |
80 | eb->lock_owner = current->pid; | |
81 | trace_btrfs_try_tree_write_lock(eb); | |
82 | return 1; | |
bd681513 | 83 | } |
196d59ab | 84 | return 0; |
b4ce94de CM |
85 | } |
86 | ||
87 | /* | |
4048daed | 88 | * Release read lock. |
bd681513 CM |
89 | */ |
90 | void btrfs_tree_read_unlock(struct extent_buffer *eb) | |
91 | { | |
31aab402 | 92 | trace_btrfs_tree_read_unlock(eb); |
196d59ab JB |
93 | eb->lock_owner = 0; |
94 | up_read(&eb->lock); | |
bd681513 CM |
95 | } |
96 | ||
bd681513 | 97 | /* |
196d59ab JB |
98 | * __btrfs_tree_lock - lock eb for write |
99 | * @eb: the eb to lock | |
100 | * @nest: the nesting to use for the lock | |
d4e253bb | 101 | * |
196d59ab | 102 | * Returns with the eb->lock write locked. |
b4ce94de | 103 | */ |
fd7ba1c1 | 104 | void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) |
78d933c7 | 105 | __acquires(&eb->lock) |
b4ce94de | 106 | { |
34e73cc9 QW |
107 | u64 start_ns = 0; |
108 | ||
109 | if (trace_btrfs_tree_lock_enabled()) | |
110 | start_ns = ktime_get_ns(); | |
111 | ||
196d59ab | 112 | down_write_nested(&eb->lock, nest); |
5b25f70f | 113 | eb->lock_owner = current->pid; |
34e73cc9 | 114 | trace_btrfs_tree_lock(eb, start_ns); |
925baedd CM |
115 | } |
116 | ||
fd7ba1c1 JB |
117 | void btrfs_tree_lock(struct extent_buffer *eb) |
118 | { | |
119 | __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL); | |
120 | } | |
121 | ||
bd681513 | 122 | /* |
196d59ab | 123 | * Release the write lock. |
bd681513 | 124 | */ |
143bede5 | 125 | void btrfs_tree_unlock(struct extent_buffer *eb) |
925baedd | 126 | { |
31aab402 | 127 | trace_btrfs_tree_unlock(eb); |
ea4ebde0 | 128 | eb->lock_owner = 0; |
196d59ab | 129 | up_write(&eb->lock); |
925baedd | 130 | } |
ed2b1d36 | 131 | |
1f95ec01 DS |
132 | /* |
133 | * This releases any locks held in the path starting at level and going all the | |
134 | * way up to the root. | |
135 | * | |
136 | * btrfs_search_slot will keep the lock held on higher nodes in a few corner | |
137 | * cases, such as COW of the block at slot zero in the node. This ignores | |
138 | * those rules, and it should only be called when there are no more updates to | |
139 | * be done higher up in the tree. | |
140 | */ | |
141 | void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |
142 | { | |
143 | int i; | |
144 | ||
145 | if (path->keep_locks) | |
146 | return; | |
147 | ||
148 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | |
149 | if (!path->nodes[i]) | |
150 | continue; | |
151 | if (!path->locks[i]) | |
152 | continue; | |
153 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); | |
154 | path->locks[i] = 0; | |
155 | } | |
156 | } | |
b908c334 DS |
157 | |
158 | /* | |
159 | * Loop around taking references on and locking the root node of the tree until | |
160 | * we end up with a lock on the root node. | |
161 | * | |
162 | * Return: root extent buffer with write lock held | |
163 | */ | |
164 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |
165 | { | |
166 | struct extent_buffer *eb; | |
167 | ||
168 | while (1) { | |
169 | eb = btrfs_root_node(root); | |
170 | btrfs_tree_lock(eb); | |
171 | if (eb == root->node) | |
172 | break; | |
173 | btrfs_tree_unlock(eb); | |
174 | free_extent_buffer(eb); | |
175 | } | |
176 | return eb; | |
177 | } | |
178 | ||
179 | /* | |
180 | * Loop around taking references on and locking the root node of the tree until | |
181 | * we end up with a lock on the root node. | |
182 | * | |
183 | * Return: root extent buffer with read lock held | |
184 | */ | |
1bb96598 | 185 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) |
b908c334 DS |
186 | { |
187 | struct extent_buffer *eb; | |
188 | ||
189 | while (1) { | |
190 | eb = btrfs_root_node(root); | |
1bb96598 | 191 | btrfs_tree_read_lock(eb); |
b908c334 DS |
192 | if (eb == root->node) |
193 | break; | |
194 | btrfs_tree_read_unlock(eb); | |
195 | free_extent_buffer(eb); | |
196 | } | |
197 | return eb; | |
198 | } | |
2992df73 NB |
199 | |
200 | /* | |
201 | * DREW locks | |
202 | * ========== | |
203 | * | |
204 | * DREW stands for double-reader-writer-exclusion lock. It's used in situation | |
205 | * where you want to provide A-B exclusion but not AA or BB. | |
206 | * | |
207 | * Currently implementation gives more priority to reader. If a reader and a | |
208 | * writer both race to acquire their respective sides of the lock the writer | |
209 | * would yield its lock as soon as it detects a concurrent reader. Additionally | |
210 | * if there are pending readers no new writers would be allowed to come in and | |
211 | * acquire the lock. | |
212 | */ | |
213 | ||
214 | int btrfs_drew_lock_init(struct btrfs_drew_lock *lock) | |
215 | { | |
216 | int ret; | |
217 | ||
218 | ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL); | |
219 | if (ret) | |
220 | return ret; | |
221 | ||
222 | atomic_set(&lock->readers, 0); | |
223 | init_waitqueue_head(&lock->pending_readers); | |
224 | init_waitqueue_head(&lock->pending_writers); | |
225 | ||
226 | return 0; | |
227 | } | |
228 | ||
229 | void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock) | |
230 | { | |
231 | percpu_counter_destroy(&lock->writers); | |
232 | } | |
233 | ||
234 | /* Return true if acquisition is successful, false otherwise */ | |
235 | bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock) | |
236 | { | |
237 | if (atomic_read(&lock->readers)) | |
238 | return false; | |
239 | ||
240 | percpu_counter_inc(&lock->writers); | |
241 | ||
242 | /* Ensure writers count is updated before we check for pending readers */ | |
243 | smp_mb(); | |
244 | if (atomic_read(&lock->readers)) { | |
245 | btrfs_drew_write_unlock(lock); | |
246 | return false; | |
247 | } | |
248 | ||
249 | return true; | |
250 | } | |
251 | ||
252 | void btrfs_drew_write_lock(struct btrfs_drew_lock *lock) | |
253 | { | |
254 | while (true) { | |
255 | if (btrfs_drew_try_write_lock(lock)) | |
256 | return; | |
257 | wait_event(lock->pending_writers, !atomic_read(&lock->readers)); | |
258 | } | |
259 | } | |
260 | ||
261 | void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock) | |
262 | { | |
263 | percpu_counter_dec(&lock->writers); | |
264 | cond_wake_up(&lock->pending_readers); | |
265 | } | |
266 | ||
267 | void btrfs_drew_read_lock(struct btrfs_drew_lock *lock) | |
268 | { | |
269 | atomic_inc(&lock->readers); | |
270 | ||
271 | /* | |
272 | * Ensure the pending reader count is perceieved BEFORE this reader | |
273 | * goes to sleep in case of active writers. This guarantees new writers | |
274 | * won't be allowed and that the current reader will be woken up when | |
275 | * the last active writer finishes its jobs. | |
276 | */ | |
277 | smp_mb__after_atomic(); | |
278 | ||
279 | wait_event(lock->pending_readers, | |
280 | percpu_counter_sum(&lock->writers) == 0); | |
281 | } | |
282 | ||
283 | void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock) | |
284 | { | |
285 | /* | |
286 | * atomic_dec_and_test implies a full barrier, so woken up writers | |
287 | * are guaranteed to see the decrement | |
288 | */ | |
289 | if (atomic_dec_and_test(&lock->readers)) | |
290 | wake_up(&lock->pending_writers); | |
291 | } |