[linux-2.6-block.git] / fs / btrfs / locking.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2008 Oracle.  All rights reserved.
 */

#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/page-flags.h>
#include <asm/bug.h>
#include "misc.h"
#include "ctree.h"
#include "extent_io.h"
#include "locking.h"

/*
 * Extent buffer locking
 * =====================
 *
 * We use a rw_semaphore for tree locking, and the semantics are exactly the
 * same:
 *
 * - reader/writer exclusion
 * - writer/writer exclusion
 * - reader/reader sharing
 * - try-lock semantics for readers and writers
 *
 * The rwsem implementation does opportunistic spinning which reduces number of
 * times the locking task needs to sleep.
 */

/*
 * __btrfs_tree_read_lock - lock extent buffer for read
 * @eb:		the eb to be locked
 * @nest:	the nesting level to be used for lockdep
 *
 * This takes the read lock on the extent buffer, using the specified nesting
 * level for lockdep purposes.
 */
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
{
	u64 start_ns = 0;

	if (trace_btrfs_tree_read_lock_enabled())
		start_ns = ktime_get_ns();

	down_read_nested(&eb->lock, nest);
	eb->lock_owner = current->pid;
	trace_btrfs_tree_read_lock(eb, start_ns);
}

void btrfs_tree_read_lock(struct extent_buffer *eb)
{
	__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
}

/*
 * Try-lock for read.
 *
 * Retrun 1 if the rwlock has been taken, 0 otherwise
 */
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
	if (down_read_trylock(&eb->lock)) {
		eb->lock_owner = current->pid;
		trace_btrfs_try_tree_read_lock(eb);
		return 1;
	}
	return 0;
}

/*
 * Try-lock for write.
 *
 * Retrun 1 if the rwlock has been taken, 0 otherwise
 */
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
	if (down_write_trylock(&eb->lock)) {
		eb->lock_owner = current->pid;
		trace_btrfs_try_tree_write_lock(eb);
		return 1;
	}
	return 0;
}

/*
 * Release read lock.
 */
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
	trace_btrfs_tree_read_unlock(eb);
	eb->lock_owner = 0;
	up_read(&eb->lock);
}

/*
 * __btrfs_tree_lock - lock eb for write
 * @eb:		the eb to lock
 * @nest:	the nesting to use for the lock
 *
 * Returns with the eb->lock write locked.
 */
void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
	__acquires(&eb->lock)
{
	u64 start_ns = 0;

	if (trace_btrfs_tree_lock_enabled())
		start_ns = ktime_get_ns();

	down_write_nested(&eb->lock, nest);
	eb->lock_owner = current->pid;
	trace_btrfs_tree_lock(eb, start_ns);
}

void btrfs_tree_lock(struct extent_buffer *eb)
{
	__btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
}

/*
 * Release the write lock.
 */
void btrfs_tree_unlock(struct extent_buffer *eb)
{
	trace_btrfs_tree_unlock(eb);
	eb->lock_owner = 0;
	up_write(&eb->lock);
}

/*
 * This releases any locks held in the path starting at level and going all the
 * way up to the root.
 *
 * btrfs_search_slot will keep the lock held on higher nodes in a few corner
 * cases, such as COW of the block at slot zero in the node.  This ignores
 * those rules, and it should only be called when there are no more updates to
 * be done higher up in the tree.
 */
void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
{
	int i;

	if (path->keep_locks)
		return;

	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
		if (!path->nodes[i])
			continue;
		if (!path->locks[i])
			continue;
		btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
		path->locks[i] = 0;
	}
}

/*
 * Loop around taking references on and locking the root node of the tree until
 * we end up with a lock on the root node.
 *
 * Return: root extent buffer with write lock held
 */
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
{
	struct extent_buffer *eb;

	while (1) {
		eb = btrfs_root_node(root);
		btrfs_tree_lock(eb);
		if (eb == root->node)
			break;
		btrfs_tree_unlock(eb);
		free_extent_buffer(eb);
	}
	return eb;
}

/*
 * Loop around taking references on and locking the root node of the tree until
 * we end up with a lock on the root node.
 *
 * Return: root extent buffer with read lock held
 */
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
	struct extent_buffer *eb;

	while (1) {
		eb = btrfs_root_node(root);
		btrfs_tree_read_lock(eb);
		if (eb == root->node)
			break;
		btrfs_tree_read_unlock(eb);
		free_extent_buffer(eb);
	}
	return eb;
}

/*
 * DREW locks
 * ==========
 *
 * DREW stands for double-reader-writer-exclusion lock. It's used in situation
 * where you want to provide A-B exclusion but not AA or BB.
 *
 * Currently implementation gives more priority to reader. If a reader and a
 * writer both race to acquire their respective sides of the lock the writer
 * would yield its lock as soon as it detects a concurrent reader. Additionally
 * if there are pending readers no new writers would be allowed to come in and
 * acquire the lock.
 */

int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
{
	int ret;

	ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
	if (ret)
		return ret;

	atomic_set(&lock->readers, 0);
	init_waitqueue_head(&lock->pending_readers);
	init_waitqueue_head(&lock->pending_writers);

	return 0;
}

void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
{
	percpu_counter_destroy(&lock->writers);
}

/* Return true if acquisition is successful, false otherwise */
bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
{
	if (atomic_read(&lock->readers))
		return false;

	percpu_counter_inc(&lock->writers);

	/* Ensure writers count is updated before we check for pending readers */
	smp_mb();
	if (atomic_read(&lock->readers)) {
		btrfs_drew_write_unlock(lock);
		return false;
	}

	return true;
}

void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
{
	while (true) {
		if (btrfs_drew_try_write_lock(lock))
			return;
		wait_event(lock->pending_writers, !atomic_read(&lock->readers));
	}
}

void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
{
	percpu_counter_dec(&lock->writers);
	cond_wake_up(&lock->pending_readers);
}

void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
{
	atomic_inc(&lock->readers);

	/*
	 * Ensure the pending reader count is perceieved BEFORE this reader
	 * goes to sleep in case of active writers. This guarantees new writers
	 * won't be allowed and that the current reader will be woken up when
	 * the last active writer finishes its jobs.
	 */
	smp_mb__after_atomic();

	wait_event(lock->pending_readers,
		   percpu_counter_sum(&lock->writers) == 0);
}

void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
{
	/*
	 * atomic_dec_and_test implies a full barrier, so woken up writers
	 * are guaranteed to see the decrement
	 */
	if (atomic_dec_and_test(&lock->readers))
		wake_up(&lock->pending_writers);
}
Commit	Line	Data
c1d7c514	1	// SPDX-License-Identifier: GPL-2.0
925baedd CM	2	/*
925baedd CM	3	* Copyright (C) 2008 Oracle. All rights reserved.
925baedd	4	*/
c1d7c514	5
925baedd	6	#include <linux/sched.h>
925baedd CM	7	#include <linux/pagemap.h>
	8	#include <linux/spinlock.h>
	9	#include <linux/page-flags.h>
4881ee5a	10	#include <asm/bug.h>
602cbe91	11	#include "misc.h"
925baedd CM	12	#include "ctree.h"
	13	#include "extent_io.h"
	14	#include "locking.h"
	15
d4e253bb DS	16	/*
	17	* Extent buffer locking
	18	* =====================
	19	*
196d59ab JB	20	* We use a rw_semaphore for tree locking, and the semantics are exactly the
196d59ab JB	21	* same:
d4e253bb DS	22	*
	23	* - reader/writer exclusion
	24	* - writer/writer exclusion
	25	* - reader/reader sharing
d4e253bb	26	* - try-lock semantics for readers and writers
d4e253bb	27	*
4048daed JB	28	* The rwsem implementation does opportunistic spinning which reduces number of
4048daed JB	29	* times the locking task needs to sleep.
d4e253bb DS	30	*/
d4e253bb DS	31
b4ce94de	32	/*
196d59ab JB	33	* __btrfs_tree_read_lock - lock extent buffer for read
	34	* @eb: the eb to be locked
	35	* @nest: the nesting level to be used for lockdep
d4e253bb	36	*
196d59ab JB	37	* This takes the read lock on the extent buffer, using the specified nesting
196d59ab JB	38	* level for lockdep purposes.
b4ce94de	39	*/
0ecae6ff	40	void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
b4ce94de	41	{
34e73cc9 QW	42	u64 start_ns = 0;
	43
	44	if (trace_btrfs_tree_read_lock_enabled())
	45	start_ns = ktime_get_ns();
196d59ab	46
196d59ab	47	down_read_nested(&eb->lock, nest);
196d59ab	48	eb->lock_owner = current->pid;
34e73cc9	49	trace_btrfs_tree_read_lock(eb, start_ns);
b4ce94de CM	50	}
b4ce94de CM	51
51899412 JB	52	void btrfs_tree_read_lock(struct extent_buffer *eb)
51899412 JB	53	{
0ecae6ff	54	__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
51899412 JB	55	}
51899412 JB	56
b4ce94de	57	/*
196d59ab	58	* Try-lock for read.
d4e253bb DS	59	*
d4e253bb DS	60	* Retrun 1 if the rwlock has been taken, 0 otherwise
b4ce94de	61	*/
bd681513	62	int btrfs_try_tree_read_lock(struct extent_buffer *eb)
b4ce94de	63	{
196d59ab JB	64	if (down_read_trylock(&eb->lock)) {
	65	eb->lock_owner = current->pid;
	66	trace_btrfs_try_tree_read_lock(eb);
	67	return 1;
b9473439	68	}
196d59ab	69	return 0;
b4ce94de CM	70	}
	71
	72	/*
196d59ab	73	* Try-lock for write.
d4e253bb DS	74	*
d4e253bb DS	75	* Retrun 1 if the rwlock has been taken, 0 otherwise
b4ce94de	76	*/
bd681513	77	int btrfs_try_tree_write_lock(struct extent_buffer *eb)
b4ce94de	78	{
196d59ab JB	79	if (down_write_trylock(&eb->lock)) {
	80	eb->lock_owner = current->pid;
	81	trace_btrfs_try_tree_write_lock(eb);
	82	return 1;
bd681513	83	}
196d59ab	84	return 0;
b4ce94de CM	85	}
	86
	87	/*
4048daed	88	* Release read lock.
bd681513 CM	89	*/
	90	void btrfs_tree_read_unlock(struct extent_buffer *eb)
	91	{
31aab402	92	trace_btrfs_tree_read_unlock(eb);
196d59ab JB	93	eb->lock_owner = 0;
196d59ab JB	94	up_read(&eb->lock);
bd681513 CM	95	}
bd681513 CM	96
bd681513	97	/*
196d59ab JB	98	* __btrfs_tree_lock - lock eb for write
	99	* @eb: the eb to lock
	100	* @nest: the nesting to use for the lock
d4e253bb	101	*
196d59ab	102	* Returns with the eb->lock write locked.
b4ce94de	103	*/
fd7ba1c1	104	void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
78d933c7	105	__acquires(&eb->lock)
b4ce94de	106	{
34e73cc9 QW	107	u64 start_ns = 0;
	108
	109	if (trace_btrfs_tree_lock_enabled())
	110	start_ns = ktime_get_ns();
	111
196d59ab	112	down_write_nested(&eb->lock, nest);
5b25f70f	113	eb->lock_owner = current->pid;
34e73cc9	114	trace_btrfs_tree_lock(eb, start_ns);
925baedd CM	115	}
925baedd CM	116
fd7ba1c1 JB	117	void btrfs_tree_lock(struct extent_buffer *eb)
	118	{
	119	__btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
	120	}
	121
bd681513	122	/*
196d59ab	123	* Release the write lock.
bd681513	124	*/
143bede5	125	void btrfs_tree_unlock(struct extent_buffer *eb)
925baedd	126	{
31aab402	127	trace_btrfs_tree_unlock(eb);
ea4ebde0	128	eb->lock_owner = 0;
196d59ab	129	up_write(&eb->lock);
925baedd	130	}
ed2b1d36	131
1f95ec01 DS	132	/*
	133	* This releases any locks held in the path starting at level and going all the
	134	* way up to the root.
	135	*
	136	* btrfs_search_slot will keep the lock held on higher nodes in a few corner
	137	* cases, such as COW of the block at slot zero in the node. This ignores
	138	* those rules, and it should only be called when there are no more updates to
	139	* be done higher up in the tree.
	140	*/
	141	void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
	142	{
	143	int i;
	144
	145	if (path->keep_locks)
	146	return;
	147
	148	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
	149	if (!path->nodes[i])
	150	continue;
	151	if (!path->locks[i])
	152	continue;
	153	btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
	154	path->locks[i] = 0;
	155	}
	156	}
b908c334 DS	157
	158	/*
	159	* Loop around taking references on and locking the root node of the tree until
	160	* we end up with a lock on the root node.
	161	*
	162	* Return: root extent buffer with write lock held
	163	*/
	164	struct extent_buffer btrfs_lock_root_node(struct btrfs_root root)
	165	{
	166	struct extent_buffer *eb;
	167
	168	while (1) {
	169	eb = btrfs_root_node(root);
	170	btrfs_tree_lock(eb);
	171	if (eb == root->node)
	172	break;
	173	btrfs_tree_unlock(eb);
	174	free_extent_buffer(eb);
	175	}
	176	return eb;
	177	}
	178
	179	/*
	180	* Loop around taking references on and locking the root node of the tree until
	181	* we end up with a lock on the root node.
	182	*
	183	* Return: root extent buffer with read lock held
	184	*/
1bb96598	185	struct extent_buffer btrfs_read_lock_root_node(struct btrfs_root root)
b908c334 DS	186	{
	187	struct extent_buffer *eb;
	188
	189	while (1) {
	190	eb = btrfs_root_node(root);
1bb96598	191	btrfs_tree_read_lock(eb);
b908c334 DS	192	if (eb == root->node)
	193	break;
	194	btrfs_tree_read_unlock(eb);
	195	free_extent_buffer(eb);
	196	}
	197	return eb;
	198	}
2992df73 NB	199
	200	/*
	201	* DREW locks
	202	* ==========
	203	*
	204	* DREW stands for double-reader-writer-exclusion lock. It's used in situation
	205	* where you want to provide A-B exclusion but not AA or BB.
	206	*
	207	* Currently implementation gives more priority to reader. If a reader and a
	208	* writer both race to acquire their respective sides of the lock the writer
	209	* would yield its lock as soon as it detects a concurrent reader. Additionally
	210	* if there are pending readers no new writers would be allowed to come in and
	211	* acquire the lock.
	212	*/
	213
	214	int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
	215	{
	216	int ret;
	217
	218	ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
	219	if (ret)
	220	return ret;
	221
	222	atomic_set(&lock->readers, 0);
	223	init_waitqueue_head(&lock->pending_readers);
	224	init_waitqueue_head(&lock->pending_writers);
	225
	226	return 0;
	227	}
	228
	229	void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
	230	{
	231	percpu_counter_destroy(&lock->writers);
	232	}
	233
	234	/* Return true if acquisition is successful, false otherwise */
	235	bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
	236	{
	237	if (atomic_read(&lock->readers))
	238	return false;
	239
	240	percpu_counter_inc(&lock->writers);
	241
	242	/* Ensure writers count is updated before we check for pending readers */
	243	smp_mb();
	244	if (atomic_read(&lock->readers)) {
	245	btrfs_drew_write_unlock(lock);
	246	return false;
	247	}
	248
	249	return true;
	250	}
	251
	252	void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
	253	{
	254	while (true) {
	255	if (btrfs_drew_try_write_lock(lock))
	256	return;
	257	wait_event(lock->pending_writers, !atomic_read(&lock->readers));
	258	}
	259	}
	260
	261	void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
	262	{
263	percpu_counter_dec(&lock->writers);
264	cond_wake_up(&lock->pending_readers);
265	}
266
267	void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
268	{
269	atomic_inc(&lock->readers);
270
271	/*
272	* Ensure the pending reader count is perceieved BEFORE this reader
273	* goes to sleep in case of active writers. This guarantees new writers
274	* won't be allowed and that the current reader will be woken up when
275	* the last active writer finishes its jobs.
276	*/
277	smp_mb__after_atomic();
278
279	wait_event(lock->pending_readers,
280	percpu_counter_sum(&lock->writers) == 0);
281	}
282
283	void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
284	{
285	/*
286	* atomic_dec_and_test implies a full barrier, so woken up writers
287	* are guaranteed to see the decrement
288	*/
289	if (atomic_dec_and_test(&lock->readers))
290	wake_up(&lock->pending_writers);
291	}