btrfs: use btrfs_tree_read_lock in btrfs_search_slot
[linux-2.6-block.git] / fs / btrfs / locking.c
CommitLineData
c1d7c514 1// SPDX-License-Identifier: GPL-2.0
925baedd
CM
2/*
3 * Copyright (C) 2008 Oracle. All rights reserved.
925baedd 4 */
c1d7c514 5
925baedd 6#include <linux/sched.h>
925baedd
CM
7#include <linux/pagemap.h>
8#include <linux/spinlock.h>
9#include <linux/page-flags.h>
4881ee5a 10#include <asm/bug.h>
602cbe91 11#include "misc.h"
925baedd
CM
12#include "ctree.h"
13#include "extent_io.h"
14#include "locking.h"
15
d4e253bb
DS
16/*
17 * Extent buffer locking
18 * =====================
19 *
196d59ab
JB
20 * We use a rw_semaphore for tree locking, and the semantics are exactly the
21 * same:
d4e253bb
DS
22 *
23 * - reader/writer exclusion
24 * - writer/writer exclusion
25 * - reader/reader sharing
d4e253bb 26 * - try-lock semantics for readers and writers
d4e253bb 27 *
4048daed
JB
28 * The rwsem implementation does opportunistic spinning which reduces number of
29 * times the locking task needs to sleep.
d4e253bb
DS
30 */
31
b4ce94de 32/*
196d59ab
JB
33 * __btrfs_tree_read_lock - lock extent buffer for read
34 * @eb: the eb to be locked
35 * @nest: the nesting level to be used for lockdep
4048daed 36 * @recurse: unused
d4e253bb 37 *
196d59ab
JB
38 * This takes the read lock on the extent buffer, using the specified nesting
39 * level for lockdep purposes.
b4ce94de 40 */
fd7ba1c1
JB
41void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
42 bool recurse)
b4ce94de 43{
34e73cc9
QW
44 u64 start_ns = 0;
45
46 if (trace_btrfs_tree_read_lock_enabled())
47 start_ns = ktime_get_ns();
196d59ab 48
196d59ab 49 down_read_nested(&eb->lock, nest);
196d59ab 50 eb->lock_owner = current->pid;
34e73cc9 51 trace_btrfs_tree_read_lock(eb, start_ns);
b4ce94de
CM
52}
53
51899412
JB
54void btrfs_tree_read_lock(struct extent_buffer *eb)
55{
fd7ba1c1 56 __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, false);
51899412
JB
57}
58
b4ce94de 59/*
196d59ab 60 * Try-lock for read.
d4e253bb
DS
61 *
62 * Retrun 1 if the rwlock has been taken, 0 otherwise
b4ce94de 63 */
bd681513 64int btrfs_try_tree_read_lock(struct extent_buffer *eb)
b4ce94de 65{
196d59ab
JB
66 if (down_read_trylock(&eb->lock)) {
67 eb->lock_owner = current->pid;
68 trace_btrfs_try_tree_read_lock(eb);
69 return 1;
b9473439 70 }
196d59ab 71 return 0;
b4ce94de
CM
72}
73
74/*
196d59ab 75 * Try-lock for write.
d4e253bb
DS
76 *
77 * Retrun 1 if the rwlock has been taken, 0 otherwise
b4ce94de 78 */
bd681513 79int btrfs_try_tree_write_lock(struct extent_buffer *eb)
b4ce94de 80{
196d59ab
JB
81 if (down_write_trylock(&eb->lock)) {
82 eb->lock_owner = current->pid;
83 trace_btrfs_try_tree_write_lock(eb);
84 return 1;
bd681513 85 }
196d59ab 86 return 0;
b4ce94de
CM
87}
88
89/*
4048daed 90 * Release read lock.
bd681513
CM
91 */
92void btrfs_tree_read_unlock(struct extent_buffer *eb)
93{
31aab402 94 trace_btrfs_tree_read_unlock(eb);
196d59ab
JB
95 eb->lock_owner = 0;
96 up_read(&eb->lock);
bd681513
CM
97}
98
bd681513 99/*
196d59ab
JB
100 * __btrfs_tree_lock - lock eb for write
101 * @eb: the eb to lock
102 * @nest: the nesting to use for the lock
d4e253bb 103 *
196d59ab 104 * Returns with the eb->lock write locked.
b4ce94de 105 */
fd7ba1c1 106void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
78d933c7 107 __acquires(&eb->lock)
b4ce94de 108{
34e73cc9
QW
109 u64 start_ns = 0;
110
111 if (trace_btrfs_tree_lock_enabled())
112 start_ns = ktime_get_ns();
113
196d59ab 114 down_write_nested(&eb->lock, nest);
5b25f70f 115 eb->lock_owner = current->pid;
34e73cc9 116 trace_btrfs_tree_lock(eb, start_ns);
925baedd
CM
117}
118
fd7ba1c1
JB
119void btrfs_tree_lock(struct extent_buffer *eb)
120{
121 __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
122}
123
bd681513 124/*
196d59ab 125 * Release the write lock.
bd681513 126 */
143bede5 127void btrfs_tree_unlock(struct extent_buffer *eb)
925baedd 128{
31aab402 129 trace_btrfs_tree_unlock(eb);
ea4ebde0 130 eb->lock_owner = 0;
196d59ab 131 up_write(&eb->lock);
925baedd 132}
ed2b1d36 133
1f95ec01
DS
134/*
135 * This releases any locks held in the path starting at level and going all the
136 * way up to the root.
137 *
138 * btrfs_search_slot will keep the lock held on higher nodes in a few corner
139 * cases, such as COW of the block at slot zero in the node. This ignores
140 * those rules, and it should only be called when there are no more updates to
141 * be done higher up in the tree.
142 */
143void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
144{
145 int i;
146
147 if (path->keep_locks)
148 return;
149
150 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
151 if (!path->nodes[i])
152 continue;
153 if (!path->locks[i])
154 continue;
155 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
156 path->locks[i] = 0;
157 }
158}
b908c334
DS
159
160/*
161 * Loop around taking references on and locking the root node of the tree until
162 * we end up with a lock on the root node.
163 *
164 * Return: root extent buffer with write lock held
165 */
166struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
167{
168 struct extent_buffer *eb;
169
170 while (1) {
171 eb = btrfs_root_node(root);
172 btrfs_tree_lock(eb);
173 if (eb == root->node)
174 break;
175 btrfs_tree_unlock(eb);
176 free_extent_buffer(eb);
177 }
178 return eb;
179}
180
181/*
182 * Loop around taking references on and locking the root node of the tree until
183 * we end up with a lock on the root node.
184 *
185 * Return: root extent buffer with read lock held
186 */
1bb96598 187struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
b908c334
DS
188{
189 struct extent_buffer *eb;
190
191 while (1) {
192 eb = btrfs_root_node(root);
1bb96598 193 btrfs_tree_read_lock(eb);
b908c334
DS
194 if (eb == root->node)
195 break;
196 btrfs_tree_read_unlock(eb);
197 free_extent_buffer(eb);
198 }
199 return eb;
200}
2992df73
NB
201
202/*
203 * DREW locks
204 * ==========
205 *
206 * DREW stands for double-reader-writer-exclusion lock. It's used in situation
207 * where you want to provide A-B exclusion but not AA or BB.
208 *
209 * Currently implementation gives more priority to reader. If a reader and a
210 * writer both race to acquire their respective sides of the lock the writer
211 * would yield its lock as soon as it detects a concurrent reader. Additionally
212 * if there are pending readers no new writers would be allowed to come in and
213 * acquire the lock.
214 */
215
216int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
217{
218 int ret;
219
220 ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
221 if (ret)
222 return ret;
223
224 atomic_set(&lock->readers, 0);
225 init_waitqueue_head(&lock->pending_readers);
226 init_waitqueue_head(&lock->pending_writers);
227
228 return 0;
229}
230
231void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
232{
233 percpu_counter_destroy(&lock->writers);
234}
235
236/* Return true if acquisition is successful, false otherwise */
237bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
238{
239 if (atomic_read(&lock->readers))
240 return false;
241
242 percpu_counter_inc(&lock->writers);
243
244 /* Ensure writers count is updated before we check for pending readers */
245 smp_mb();
246 if (atomic_read(&lock->readers)) {
247 btrfs_drew_write_unlock(lock);
248 return false;
249 }
250
251 return true;
252}
253
254void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
255{
256 while (true) {
257 if (btrfs_drew_try_write_lock(lock))
258 return;
259 wait_event(lock->pending_writers, !atomic_read(&lock->readers));
260 }
261}
262
263void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
264{
265 percpu_counter_dec(&lock->writers);
266 cond_wake_up(&lock->pending_readers);
267}
268
269void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
270{
271 atomic_inc(&lock->readers);
272
273 /*
274 * Ensure the pending reader count is perceieved BEFORE this reader
275 * goes to sleep in case of active writers. This guarantees new writers
276 * won't be allowed and that the current reader will be woken up when
277 * the last active writer finishes its jobs.
278 */
279 smp_mb__after_atomic();
280
281 wait_event(lock->pending_readers,
282 percpu_counter_sum(&lock->writers) == 0);
283}
284
285void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
286{
287 /*
288 * atomic_dec_and_test implies a full barrier, so woken up writers
289 * are guaranteed to see the decrement
290 */
291 if (atomic_dec_and_test(&lock->readers))
292 wake_up(&lock->pending_writers);
293}