Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
c4e05116 IM |
2 | /* kernel/rwsem.c: R/W semaphores, public implementation |
3 | * | |
4 | * Written by David Howells (dhowells@redhat.com). | |
5 | * Derived from asm-i386/semaphore.h | |
5dec94d4 WL |
6 | * |
7 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | |
8 | * and Michel Lespinasse <walken@google.com> | |
9 | * | |
10 | * Optimistic spinning by Tim Chen <tim.c.chen@intel.com> | |
11 | * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. | |
12 | * | |
4f23dbc1 WL |
13 | * Rwsem count bit fields re-definition and rwsem rearchitecture by |
14 | * Waiman Long <longman@redhat.com> and | |
15 | * Peter Zijlstra <peterz@infradead.org>. | |
c4e05116 IM |
16 | */ |
17 | ||
18 | #include <linux/types.h> | |
19 | #include <linux/kernel.h> | |
c7af77b5 | 20 | #include <linux/sched.h> |
5dec94d4 WL |
21 | #include <linux/sched/rt.h> |
22 | #include <linux/sched/task.h> | |
b17b0153 | 23 | #include <linux/sched/debug.h> |
5dec94d4 WL |
24 | #include <linux/sched/wake_q.h> |
25 | #include <linux/sched/signal.h> | |
7d43f1ce | 26 | #include <linux/sched/clock.h> |
9984de1a | 27 | #include <linux/export.h> |
c4e05116 | 28 | #include <linux/rwsem.h> |
60063497 | 29 | #include <linux/atomic.h> |
ee042be1 | 30 | #include <trace/events/lock.h> |
c4e05116 | 31 | |
42254105 | 32 | #ifndef CONFIG_PREEMPT_RT |
5dec94d4 WL |
33 | #include "lock_events.h" |
34 | ||
35 | /* | |
617f3ef9 | 36 | * The least significant 2 bits of the owner value has the following |
5dec94d4 | 37 | * meanings when set. |
d566c786 | 38 | * - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint) |
617f3ef9 | 39 | * - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock |
5dec94d4 | 40 | * |
617f3ef9 WL |
41 | * When the rwsem is reader-owned and a spinning writer has timed out, |
42 | * the nonspinnable bit will be set to disable optimistic spinning. | |
7d43f1ce | 43 | |
5dec94d4 WL |
44 | * When a writer acquires a rwsem, it puts its task_struct pointer |
45 | * into the owner field. It is cleared after an unlock. | |
46 | * | |
47 | * When a reader acquires a rwsem, it will also puts its task_struct | |
7d43f1ce WL |
48 | * pointer into the owner field with the RWSEM_READER_OWNED bit set. |
49 | * On unlock, the owner field will largely be left untouched. So | |
50 | * for a free or reader-owned rwsem, the owner value may contain | |
51 | * information about the last reader that acquires the rwsem. | |
5dec94d4 WL |
52 | * |
53 | * That information may be helpful in debugging cases where the system | |
54 | * seems to hang on a reader owned rwsem especially if only one reader | |
55 | * is involved. Ideally we would like to track all the readers that own | |
56 | * a rwsem, but the overhead is simply too big. | |
5cfd92e1 | 57 | * |
617f3ef9 WL |
58 | * A fast path reader optimistic lock stealing is supported when the rwsem |
59 | * is previously owned by a writer and the following conditions are met: | |
617f3ef9 WL |
60 | * - rwsem is not currently writer owned |
61 | * - the handoff isn't set. | |
5dec94d4 WL |
62 | */ |
63 | #define RWSEM_READER_OWNED (1UL << 0) | |
617f3ef9 | 64 | #define RWSEM_NONSPINNABLE (1UL << 1) |
02f1082b | 65 | #define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) |
5dec94d4 WL |
66 | |
67 | #ifdef CONFIG_DEBUG_RWSEMS | |
68 | # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ | |
69 | if (!debug_locks_silent && \ | |
fce45cd4 | 70 | WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ |
5dec94d4 | 71 | #c, atomic_long_read(&(sem)->count), \ |
fce45cd4 | 72 | (unsigned long) sem->magic, \ |
94a9717b | 73 | atomic_long_read(&(sem)->owner), (long)current, \ |
5dec94d4 WL |
74 | list_empty(&(sem)->wait_list) ? "" : "not ")) \ |
75 | debug_locks_off(); \ | |
76 | } while (0) | |
77 | #else | |
78 | # define DEBUG_RWSEMS_WARN_ON(c, sem) | |
79 | #endif | |
80 | ||
81 | /* | |
a15ea1a3 | 82 | * On 64-bit architectures, the bit definitions of the count are: |
5dec94d4 | 83 | * |
a15ea1a3 WL |
84 | * Bit 0 - writer locked bit |
85 | * Bit 1 - waiters present bit | |
86 | * Bit 2 - lock handoff bit | |
87 | * Bits 3-7 - reserved | |
88 | * Bits 8-62 - 55-bit reader count | |
89 | * Bit 63 - read fail bit | |
90 | * | |
91 | * On 32-bit architectures, the bit definitions of the count are: | |
92 | * | |
93 | * Bit 0 - writer locked bit | |
94 | * Bit 1 - waiters present bit | |
95 | * Bit 2 - lock handoff bit | |
96 | * Bits 3-7 - reserved | |
97 | * Bits 8-30 - 23-bit reader count | |
98 | * Bit 31 - read fail bit | |
99 | * | |
100 | * It is not likely that the most significant bit (read fail bit) will ever | |
101 | * be set. This guard bit is still checked anyway in the down_read() fastpath | |
102 | * just in case we need to use up more of the reader bits for other purpose | |
103 | * in the future. | |
5dec94d4 WL |
104 | * |
105 | * atomic_long_fetch_add() is used to obtain reader lock, whereas | |
106 | * atomic_long_cmpxchg() will be used to obtain writer lock. | |
4f23dbc1 WL |
107 | * |
108 | * There are three places where the lock handoff bit may be set or cleared. | |
d257cc8c WL |
109 | * 1) rwsem_mark_wake() for readers -- set, clear |
110 | * 2) rwsem_try_write_lock() for writers -- set, clear | |
111 | * 3) rwsem_del_waiter() -- clear | |
4f23dbc1 WL |
112 | * |
113 | * For all the above cases, wait_lock will be held. A writer must also | |
114 | * be the first one in the wait_list to be eligible for setting the handoff | |
115 | * bit. So concurrent setting/clearing of handoff bit is not possible. | |
5dec94d4 WL |
116 | */ |
117 | #define RWSEM_WRITER_LOCKED (1UL << 0) | |
118 | #define RWSEM_FLAG_WAITERS (1UL << 1) | |
4f23dbc1 | 119 | #define RWSEM_FLAG_HANDOFF (1UL << 2) |
a15ea1a3 | 120 | #define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1)) |
4f23dbc1 | 121 | |
5dec94d4 WL |
122 | #define RWSEM_READER_SHIFT 8 |
123 | #define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT) | |
124 | #define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1)) | |
125 | #define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED | |
126 | #define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK) | |
4f23dbc1 | 127 | #define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\ |
a15ea1a3 | 128 | RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL) |
5dec94d4 WL |
129 | |
130 | /* | |
131 | * All writes to owner are protected by WRITE_ONCE() to make sure that | |
132 | * store tearing can't happen as optimistic spinners may read and use | |
133 | * the owner value concurrently without lock. Read from owner, however, | |
134 | * may not need READ_ONCE() as long as the pointer value is only used | |
135 | * for comparison and isn't being dereferenced. | |
48dfb5d2 GK |
136 | * |
137 | * Both rwsem_{set,clear}_owner() functions should be in the same | |
138 | * preempt disable section as the atomic op that changes sem->count. | |
5dec94d4 WL |
139 | */ |
140 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | |
141 | { | |
48dfb5d2 | 142 | lockdep_assert_preemption_disabled(); |
94a9717b | 143 | atomic_long_set(&sem->owner, (long)current); |
5dec94d4 WL |
144 | } |
145 | ||
146 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | |
147 | { | |
48dfb5d2 | 148 | lockdep_assert_preemption_disabled(); |
94a9717b WL |
149 | atomic_long_set(&sem->owner, 0); |
150 | } | |
151 | ||
152 | /* | |
153 | * Test the flags in the owner field. | |
154 | */ | |
155 | static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags) | |
156 | { | |
157 | return atomic_long_read(&sem->owner) & flags; | |
5dec94d4 WL |
158 | } |
159 | ||
160 | /* | |
161 | * The task_struct pointer of the last owning reader will be left in | |
162 | * the owner field. | |
163 | * | |
164 | * Note that the owner value just indicates the task has owned the rwsem | |
165 | * previously, it may not be the real owner or one of the real owners | |
166 | * anymore when that field is examined, so take it with a grain of salt. | |
5cfd92e1 WL |
167 | * |
168 | * The reader non-spinnable bit is preserved. | |
5dec94d4 WL |
169 | */ |
170 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | |
171 | struct task_struct *owner) | |
172 | { | |
5cfd92e1 | 173 | unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED | |
617f3ef9 | 174 | (atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE); |
5dec94d4 | 175 | |
94a9717b | 176 | atomic_long_set(&sem->owner, val); |
5dec94d4 WL |
177 | } |
178 | ||
179 | static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) | |
180 | { | |
181 | __rwsem_set_reader_owned(sem, current); | |
182 | } | |
183 | ||
184 | /* | |
94a9717b | 185 | * Return true if the rwsem is owned by a reader. |
5dec94d4 | 186 | */ |
94a9717b | 187 | static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) |
5dec94d4 | 188 | { |
94a9717b WL |
189 | #ifdef CONFIG_DEBUG_RWSEMS |
190 | /* | |
191 | * Check the count to see if it is write-locked. | |
192 | */ | |
193 | long count = atomic_long_read(&sem->count); | |
194 | ||
195 | if (count & RWSEM_WRITER_MASK) | |
196 | return false; | |
197 | #endif | |
198 | return rwsem_test_oflags(sem, RWSEM_READER_OWNED); | |
5dec94d4 WL |
199 | } |
200 | ||
201 | #ifdef CONFIG_DEBUG_RWSEMS | |
202 | /* | |
203 | * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there | |
204 | * is a task pointer in owner of a reader-owned rwsem, it will be the | |
205 | * real owner or one of the real owners. The only exception is when the | |
206 | * unlock is done by up_read_non_owner(). | |
207 | */ | |
208 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |
209 | { | |
94a9717b WL |
210 | unsigned long val = atomic_long_read(&sem->owner); |
211 | ||
212 | while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) { | |
213 | if (atomic_long_try_cmpxchg(&sem->owner, &val, | |
214 | val & RWSEM_OWNER_FLAGS_MASK)) | |
215 | return; | |
216 | } | |
5dec94d4 WL |
217 | } |
218 | #else | |
219 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |
220 | { | |
221 | } | |
222 | #endif | |
223 | ||
7d43f1ce WL |
224 | /* |
225 | * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag | |
226 | * remains set. Otherwise, the operation will be aborted. | |
227 | */ | |
228 | static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem) | |
229 | { | |
230 | unsigned long owner = atomic_long_read(&sem->owner); | |
231 | ||
232 | do { | |
233 | if (!(owner & RWSEM_READER_OWNED)) | |
234 | break; | |
235 | if (owner & RWSEM_NONSPINNABLE) | |
236 | break; | |
237 | } while (!atomic_long_try_cmpxchg(&sem->owner, &owner, | |
238 | owner | RWSEM_NONSPINNABLE)); | |
239 | } | |
240 | ||
c8fe8b05 | 241 | static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp) |
a15ea1a3 | 242 | { |
c8fe8b05 | 243 | *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count); |
3379116a | 244 | |
c8fe8b05 | 245 | if (WARN_ON_ONCE(*cntp < 0)) |
a15ea1a3 | 246 | rwsem_set_nonspinnable(sem); |
3379116a | 247 | |
c8fe8b05 | 248 | if (!(*cntp & RWSEM_READ_FAILED_MASK)) { |
3379116a PZ |
249 | rwsem_set_reader_owned(sem); |
250 | return true; | |
251 | } | |
252 | ||
253 | return false; | |
a15ea1a3 WL |
254 | } |
255 | ||
285c61ae PZ |
256 | static inline bool rwsem_write_trylock(struct rw_semaphore *sem) |
257 | { | |
258 | long tmp = RWSEM_UNLOCKED_VALUE; | |
259 | ||
260 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) { | |
261 | rwsem_set_owner(sem); | |
1d61659c | 262 | return true; |
285c61ae PZ |
263 | } |
264 | ||
1d61659c | 265 | return false; |
285c61ae PZ |
266 | } |
267 | ||
94a9717b WL |
268 | /* |
269 | * Return just the real task structure pointer of the owner | |
270 | */ | |
271 | static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) | |
272 | { | |
273 | return (struct task_struct *) | |
274 | (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); | |
275 | } | |
276 | ||
277 | /* | |
278 | * Return the real task structure pointer of the owner and the embedded | |
279 | * flags in the owner. pflags must be non-NULL. | |
280 | */ | |
281 | static inline struct task_struct * | |
282 | rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags) | |
283 | { | |
284 | unsigned long owner = atomic_long_read(&sem->owner); | |
285 | ||
286 | *pflags = owner & RWSEM_OWNER_FLAGS_MASK; | |
287 | return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK); | |
288 | } | |
289 | ||
5dec94d4 WL |
290 | /* |
291 | * Guide to the rw_semaphore's count field. | |
292 | * | |
293 | * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned | |
294 | * by a writer. | |
295 | * | |
296 | * The lock is owned by readers when | |
297 | * (1) the RWSEM_WRITER_LOCKED isn't set in count, | |
298 | * (2) some of the reader bits are set in count, and | |
299 | * (3) the owner field has RWSEM_READ_OWNED bit set. | |
300 | * | |
301 | * Having some reader bits set is not enough to guarantee a readers owned | |
302 | * lock as the readers may be in the process of backing out from the count | |
303 | * and a writer has just released the lock. So another writer may steal | |
304 | * the lock immediately after that. | |
305 | */ | |
306 | ||
307 | /* | |
308 | * Initialize an rwsem: | |
309 | */ | |
310 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | |
311 | struct lock_class_key *key) | |
312 | { | |
313 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
314 | /* | |
315 | * Make sure we are not reinitializing a held semaphore: | |
316 | */ | |
317 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
de8f5e4f | 318 | lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); |
fce45cd4 DB |
319 | #endif |
320 | #ifdef CONFIG_DEBUG_RWSEMS | |
321 | sem->magic = sem; | |
5dec94d4 WL |
322 | #endif |
323 | atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); | |
324 | raw_spin_lock_init(&sem->wait_lock); | |
325 | INIT_LIST_HEAD(&sem->wait_list); | |
94a9717b | 326 | atomic_long_set(&sem->owner, 0L); |
5dec94d4 WL |
327 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
328 | osq_lock_init(&sem->osq); | |
329 | #endif | |
330 | } | |
5dec94d4 WL |
331 | EXPORT_SYMBOL(__init_rwsem); |
332 | ||
333 | enum rwsem_waiter_type { | |
334 | RWSEM_WAITING_FOR_WRITE, | |
335 | RWSEM_WAITING_FOR_READ | |
336 | }; | |
337 | ||
338 | struct rwsem_waiter { | |
339 | struct list_head list; | |
340 | struct task_struct *task; | |
341 | enum rwsem_waiter_type type; | |
4f23dbc1 | 342 | unsigned long timeout; |
d257cc8c | 343 | bool handoff_set; |
5dec94d4 | 344 | }; |
4f23dbc1 WL |
345 | #define rwsem_first_waiter(sem) \ |
346 | list_first_entry(&sem->wait_list, struct rwsem_waiter, list) | |
5dec94d4 WL |
347 | |
348 | enum rwsem_wake_type { | |
349 | RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ | |
350 | RWSEM_WAKE_READERS, /* Wake readers only */ | |
351 | RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ | |
352 | }; | |
353 | ||
4f23dbc1 WL |
354 | /* |
355 | * The typical HZ value is either 250 or 1000. So set the minimum waiting | |
356 | * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait | |
357 | * queue before initiating the handoff protocol. | |
358 | */ | |
359 | #define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250) | |
360 | ||
d3681e26 WL |
361 | /* |
362 | * Magic number to batch-wakeup waiting readers, even when writers are | |
363 | * also present in the queue. This both limits the amount of work the | |
364 | * waking thread must do and also prevents any potential counter overflow, | |
365 | * however unlikely. | |
366 | */ | |
367 | #define MAX_READERS_WAKEUP 0x100 | |
368 | ||
d257cc8c WL |
369 | static inline void |
370 | rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) | |
371 | { | |
372 | lockdep_assert_held(&sem->wait_lock); | |
373 | list_add_tail(&waiter->list, &sem->wait_list); | |
374 | /* caller will set RWSEM_FLAG_WAITERS */ | |
375 | } | |
376 | ||
377 | /* | |
378 | * Remove a waiter from the wait_list and clear flags. | |
379 | * | |
380 | * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of | |
381 | * this function. Modify with care. | |
1ee32619 WL |
382 | * |
383 | * Return: true if wait_list isn't empty and false otherwise | |
d257cc8c | 384 | */ |
1ee32619 | 385 | static inline bool |
d257cc8c WL |
386 | rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) |
387 | { | |
388 | lockdep_assert_held(&sem->wait_lock); | |
389 | list_del(&waiter->list); | |
390 | if (likely(!list_empty(&sem->wait_list))) | |
1ee32619 | 391 | return true; |
d257cc8c WL |
392 | |
393 | atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); | |
1ee32619 | 394 | return false; |
d257cc8c WL |
395 | } |
396 | ||
5dec94d4 WL |
397 | /* |
398 | * handle the lock release when processes blocked on it that can now run | |
399 | * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must | |
400 | * have been set. | |
401 | * - there must be someone on the queue | |
402 | * - the wait_lock must be held by the caller | |
403 | * - tasks are marked for wakeup, the caller must later invoke wake_up_q() | |
404 | * to actually wakeup the blocked task(s) and drop the reference count, | |
405 | * preferably when the wait_lock is released | |
406 | * - woken process blocks are discarded from the list after having task zeroed | |
407 | * - writers are only marked woken if downgrading is false | |
d257cc8c WL |
408 | * |
409 | * Implies rwsem_del_waiter() for all woken readers. | |
5dec94d4 | 410 | */ |
6cef7ff6 WL |
411 | static void rwsem_mark_wake(struct rw_semaphore *sem, |
412 | enum rwsem_wake_type wake_type, | |
413 | struct wake_q_head *wake_q) | |
5dec94d4 WL |
414 | { |
415 | struct rwsem_waiter *waiter, *tmp; | |
416 | long oldcount, woken = 0, adjustment = 0; | |
417 | struct list_head wlist; | |
418 | ||
4f23dbc1 WL |
419 | lockdep_assert_held(&sem->wait_lock); |
420 | ||
5dec94d4 WL |
421 | /* |
422 | * Take a peek at the queue head waiter such that we can determine | |
423 | * the wakeup(s) to perform. | |
424 | */ | |
4f23dbc1 | 425 | waiter = rwsem_first_waiter(sem); |
5dec94d4 WL |
426 | |
427 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | |
428 | if (wake_type == RWSEM_WAKE_ANY) { | |
429 | /* | |
430 | * Mark writer at the front of the queue for wakeup. | |
431 | * Until the task is actually later awoken later by | |
432 | * the caller, other writers are able to steal it. | |
433 | * Readers, on the other hand, will block as they | |
434 | * will notice the queued writer. | |
435 | */ | |
436 | wake_q_add(wake_q, waiter->task); | |
437 | lockevent_inc(rwsem_wake_writer); | |
438 | } | |
439 | ||
440 | return; | |
441 | } | |
442 | ||
a15ea1a3 WL |
443 | /* |
444 | * No reader wakeup if there are too many of them already. | |
445 | */ | |
446 | if (unlikely(atomic_long_read(&sem->count) < 0)) | |
447 | return; | |
448 | ||
5dec94d4 WL |
449 | /* |
450 | * Writers might steal the lock before we grant it to the next reader. | |
451 | * We prefer to do the first reader grant before counting readers | |
452 | * so we can bail out early if a writer stole the lock. | |
453 | */ | |
454 | if (wake_type != RWSEM_WAKE_READ_OWNED) { | |
5cfd92e1 WL |
455 | struct task_struct *owner; |
456 | ||
5dec94d4 WL |
457 | adjustment = RWSEM_READER_BIAS; |
458 | oldcount = atomic_long_fetch_add(adjustment, &sem->count); | |
459 | if (unlikely(oldcount & RWSEM_WRITER_MASK)) { | |
4f23dbc1 WL |
460 | /* |
461 | * When we've been waiting "too" long (for writers | |
462 | * to give up the lock), request a HANDOFF to | |
463 | * force the issue. | |
464 | */ | |
6eebd5fb WL |
465 | if (time_after(jiffies, waiter->timeout)) { |
466 | if (!(oldcount & RWSEM_FLAG_HANDOFF)) { | |
467 | adjustment -= RWSEM_FLAG_HANDOFF; | |
468 | lockevent_inc(rwsem_rlock_handoff); | |
469 | } | |
470 | waiter->handoff_set = true; | |
4f23dbc1 WL |
471 | } |
472 | ||
473 | atomic_long_add(-adjustment, &sem->count); | |
5dec94d4 WL |
474 | return; |
475 | } | |
476 | /* | |
477 | * Set it to reader-owned to give spinners an early | |
478 | * indication that readers now have the lock. | |
5cfd92e1 WL |
479 | * The reader nonspinnable bit seen at slowpath entry of |
480 | * the reader is copied over. | |
5dec94d4 | 481 | */ |
5cfd92e1 | 482 | owner = waiter->task; |
5cfd92e1 | 483 | __rwsem_set_reader_owned(sem, owner); |
5dec94d4 WL |
484 | } |
485 | ||
486 | /* | |
d3681e26 WL |
487 | * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the |
488 | * queue. We know that the woken will be at least 1 as we accounted | |
5dec94d4 WL |
489 | * for above. Note we increment the 'active part' of the count by the |
490 | * number of readers before waking any processes up. | |
491 | * | |
d3681e26 WL |
492 | * This is an adaptation of the phase-fair R/W locks where at the |
493 | * reader phase (first waiter is a reader), all readers are eligible | |
494 | * to acquire the lock at the same time irrespective of their order | |
495 | * in the queue. The writers acquire the lock according to their | |
496 | * order in the queue. | |
497 | * | |
5dec94d4 WL |
498 | * We have to do wakeup in 2 passes to prevent the possibility that |
499 | * the reader count may be decremented before it is incremented. It | |
500 | * is because the to-be-woken waiter may not have slept yet. So it | |
501 | * may see waiter->task got cleared, finish its critical section and | |
502 | * do an unlock before the reader count increment. | |
503 | * | |
504 | * 1) Collect the read-waiters in a separate list, count them and | |
505 | * fully increment the reader count in rwsem. | |
506 | * 2) For each waiters in the new list, clear waiter->task and | |
507 | * put them into wake_q to be woken up later. | |
508 | */ | |
d3681e26 WL |
509 | INIT_LIST_HEAD(&wlist); |
510 | list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { | |
5dec94d4 | 511 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) |
d3681e26 | 512 | continue; |
5dec94d4 WL |
513 | |
514 | woken++; | |
d3681e26 WL |
515 | list_move_tail(&waiter->list, &wlist); |
516 | ||
517 | /* | |
518 | * Limit # of readers that can be woken up per wakeup call. | |
519 | */ | |
5197fcd0 | 520 | if (unlikely(woken >= MAX_READERS_WAKEUP)) |
d3681e26 | 521 | break; |
5dec94d4 | 522 | } |
5dec94d4 WL |
523 | |
524 | adjustment = woken * RWSEM_READER_BIAS - adjustment; | |
525 | lockevent_cond_inc(rwsem_wake_reader, woken); | |
d257cc8c WL |
526 | |
527 | oldcount = atomic_long_read(&sem->count); | |
5dec94d4 | 528 | if (list_empty(&sem->wait_list)) { |
d257cc8c WL |
529 | /* |
530 | * Combined with list_move_tail() above, this implies | |
531 | * rwsem_del_waiter(). | |
532 | */ | |
5dec94d4 | 533 | adjustment -= RWSEM_FLAG_WAITERS; |
d257cc8c WL |
534 | if (oldcount & RWSEM_FLAG_HANDOFF) |
535 | adjustment -= RWSEM_FLAG_HANDOFF; | |
536 | } else if (woken) { | |
537 | /* | |
538 | * When we've woken a reader, we no longer need to force | |
539 | * writers to give up the lock and we can clear HANDOFF. | |
540 | */ | |
541 | if (oldcount & RWSEM_FLAG_HANDOFF) | |
542 | adjustment -= RWSEM_FLAG_HANDOFF; | |
5dec94d4 WL |
543 | } |
544 | ||
545 | if (adjustment) | |
546 | atomic_long_add(adjustment, &sem->count); | |
547 | ||
548 | /* 2nd pass */ | |
549 | list_for_each_entry_safe(waiter, tmp, &wlist, list) { | |
550 | struct task_struct *tsk; | |
551 | ||
552 | tsk = waiter->task; | |
553 | get_task_struct(tsk); | |
554 | ||
555 | /* | |
556 | * Ensure calling get_task_struct() before setting the reader | |
6cef7ff6 | 557 | * waiter to nil such that rwsem_down_read_slowpath() cannot |
5dec94d4 WL |
558 | * race with do_exit() by always holding a reference count |
559 | * to the task to wakeup. | |
560 | */ | |
561 | smp_store_release(&waiter->task, NULL); | |
562 | /* | |
563 | * Ensure issuing the wakeup (either by us or someone else) | |
564 | * after setting the reader waiter to nil. | |
565 | */ | |
566 | wake_q_add_safe(wake_q, tsk); | |
567 | } | |
568 | } | |
569 | ||
1ee32619 WL |
570 | /* |
571 | * Remove a waiter and try to wake up other waiters in the wait queue | |
572 | * This function is called from the out_nolock path of both the reader and | |
573 | * writer slowpaths with wait_lock held. It releases the wait_lock and | |
574 | * optionally wake up waiters before it returns. | |
575 | */ | |
576 | static inline void | |
577 | rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, | |
578 | struct wake_q_head *wake_q) | |
579 | __releases(&sem->wait_lock) | |
580 | { | |
581 | bool first = rwsem_first_waiter(sem) == waiter; | |
582 | ||
583 | wake_q_init(wake_q); | |
584 | ||
585 | /* | |
586 | * If the wait_list isn't empty and the waiter to be deleted is | |
587 | * the first waiter, we wake up the remaining waiters as they may | |
588 | * be eligible to acquire or spin on the lock. | |
589 | */ | |
590 | if (rwsem_del_waiter(sem, waiter) && first) | |
591 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q); | |
592 | raw_spin_unlock_irq(&sem->wait_lock); | |
593 | if (!wake_q_empty(wake_q)) | |
594 | wake_up_q(wake_q); | |
595 | } | |
596 | ||
5dec94d4 WL |
597 | /* |
598 | * This function must be called with the sem->wait_lock held to prevent | |
599 | * race conditions between checking the rwsem wait list and setting the | |
600 | * sem->count accordingly. | |
4f23dbc1 | 601 | * |
d257cc8c | 602 | * Implies rwsem_del_waiter() on success. |
5dec94d4 | 603 | */ |
00f3c5a3 | 604 | static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, |
d257cc8c | 605 | struct rwsem_waiter *waiter) |
5dec94d4 | 606 | { |
6eebd5fb | 607 | struct rwsem_waiter *first = rwsem_first_waiter(sem); |
00f3c5a3 | 608 | long count, new; |
5dec94d4 | 609 | |
4f23dbc1 | 610 | lockdep_assert_held(&sem->wait_lock); |
5dec94d4 | 611 | |
00f3c5a3 | 612 | count = atomic_long_read(&sem->count); |
4f23dbc1 WL |
613 | do { |
614 | bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); | |
5dec94d4 | 615 | |
d257cc8c | 616 | if (has_handoff) { |
6eebd5fb WL |
617 | /* |
618 | * Honor handoff bit and yield only when the first | |
619 | * waiter is the one that set it. Otherwisee, we | |
620 | * still try to acquire the rwsem. | |
621 | */ | |
622 | if (first->handoff_set && (waiter != first)) | |
d257cc8c | 623 | return false; |
d257cc8c | 624 | } |
5dec94d4 | 625 | |
4f23dbc1 WL |
626 | new = count; |
627 | ||
628 | if (count & RWSEM_LOCK_MASK) { | |
b613c7f3 WL |
629 | /* |
630 | * A waiter (first or not) can set the handoff bit | |
631 | * if it is an RT task or wait in the wait queue | |
632 | * for too long. | |
633 | */ | |
d257cc8c WL |
634 | if (has_handoff || (!rt_task(waiter->task) && |
635 | !time_after(jiffies, waiter->timeout))) | |
4f23dbc1 WL |
636 | return false; |
637 | ||
638 | new |= RWSEM_FLAG_HANDOFF; | |
639 | } else { | |
640 | new |= RWSEM_WRITER_LOCKED; | |
641 | new &= ~RWSEM_FLAG_HANDOFF; | |
642 | ||
643 | if (list_is_singular(&sem->wait_list)) | |
644 | new &= ~RWSEM_FLAG_WAITERS; | |
645 | } | |
646 | } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new)); | |
647 | ||
648 | /* | |
b613c7f3 WL |
649 | * We have either acquired the lock with handoff bit cleared or set |
650 | * the handoff bit. Only the first waiter can have its handoff_set | |
651 | * set here to enable optimistic spinning in slowpath loop. | |
4f23dbc1 | 652 | */ |
d257cc8c | 653 | if (new & RWSEM_FLAG_HANDOFF) { |
b613c7f3 | 654 | first->handoff_set = true; |
d257cc8c | 655 | lockevent_inc(rwsem_wlock_handoff); |
4f23dbc1 | 656 | return false; |
d257cc8c | 657 | } |
4f23dbc1 | 658 | |
d257cc8c WL |
659 | /* |
660 | * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on | |
661 | * success. | |
662 | */ | |
663 | list_del(&waiter->list); | |
4f23dbc1 WL |
664 | rwsem_set_owner(sem); |
665 | return true; | |
5dec94d4 WL |
666 | } |
667 | ||
7cdacc5f YX |
668 | /* |
669 | * The rwsem_spin_on_owner() function returns the following 4 values | |
670 | * depending on the lock owner state. | |
671 | * OWNER_NULL : owner is currently NULL | |
672 | * OWNER_WRITER: when owner changes and is a writer | |
673 | * OWNER_READER: when owner changes and the new owner may be a reader. | |
674 | * OWNER_NONSPINNABLE: | |
675 | * when optimistic spinning has to stop because either the | |
676 | * owner stops running, is unknown, or its timeslice has | |
677 | * been used up. | |
678 | */ | |
679 | enum owner_state { | |
680 | OWNER_NULL = 1 << 0, | |
681 | OWNER_WRITER = 1 << 1, | |
682 | OWNER_READER = 1 << 2, | |
683 | OWNER_NONSPINNABLE = 1 << 3, | |
684 | }; | |
685 | ||
5dec94d4 WL |
686 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
687 | /* | |
688 | * Try to acquire write lock before the writer has been put on wait queue. | |
689 | */ | |
690 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | |
691 | { | |
692 | long count = atomic_long_read(&sem->count); | |
693 | ||
4f23dbc1 | 694 | while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) { |
5dec94d4 | 695 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, |
4f23dbc1 | 696 | count | RWSEM_WRITER_LOCKED)) { |
5dec94d4 | 697 | rwsem_set_owner(sem); |
617f3ef9 | 698 | lockevent_inc(rwsem_opt_lock); |
5dec94d4 WL |
699 | return true; |
700 | } | |
701 | } | |
702 | return false; | |
703 | } | |
704 | ||
617f3ef9 | 705 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
5dec94d4 WL |
706 | { |
707 | struct task_struct *owner; | |
94a9717b | 708 | unsigned long flags; |
5dec94d4 WL |
709 | bool ret = true; |
710 | ||
cf69482d WL |
711 | if (need_resched()) { |
712 | lockevent_inc(rwsem_opt_fail); | |
5dec94d4 | 713 | return false; |
cf69482d | 714 | } |
5dec94d4 | 715 | |
6c2787f2 YX |
716 | /* |
717 | * Disable preemption is equal to the RCU read-side crital section, | |
718 | * thus the task_strcut structure won't go away. | |
719 | */ | |
94a9717b | 720 | owner = rwsem_owner_flags(sem, &flags); |
78134300 WL |
721 | /* |
722 | * Don't check the read-owner as the entry may be stale. | |
723 | */ | |
617f3ef9 | 724 | if ((flags & RWSEM_NONSPINNABLE) || |
78134300 | 725 | (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner))) |
94a9717b | 726 | ret = false; |
cf69482d WL |
727 | |
728 | lockevent_cond_inc(rwsem_opt_fail, !ret); | |
5dec94d4 WL |
729 | return ret; |
730 | } | |
731 | ||
7d43f1ce | 732 | #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER) |
3f6d517a | 733 | |
94a9717b | 734 | static inline enum owner_state |
617f3ef9 | 735 | rwsem_owner_state(struct task_struct *owner, unsigned long flags) |
5dec94d4 | 736 | { |
617f3ef9 | 737 | if (flags & RWSEM_NONSPINNABLE) |
3f6d517a WL |
738 | return OWNER_NONSPINNABLE; |
739 | ||
94a9717b | 740 | if (flags & RWSEM_READER_OWNED) |
3f6d517a WL |
741 | return OWNER_READER; |
742 | ||
94a9717b | 743 | return owner ? OWNER_WRITER : OWNER_NULL; |
3f6d517a WL |
744 | } |
745 | ||
7d43f1ce | 746 | static noinline enum owner_state |
617f3ef9 | 747 | rwsem_spin_on_owner(struct rw_semaphore *sem) |
3f6d517a | 748 | { |
94a9717b WL |
749 | struct task_struct *new, *owner; |
750 | unsigned long flags, new_flags; | |
751 | enum owner_state state; | |
3f6d517a | 752 | |
6c2787f2 YX |
753 | lockdep_assert_preemption_disabled(); |
754 | ||
94a9717b | 755 | owner = rwsem_owner_flags(sem, &flags); |
617f3ef9 | 756 | state = rwsem_owner_state(owner, flags); |
3f6d517a WL |
757 | if (state != OWNER_WRITER) |
758 | return state; | |
5dec94d4 | 759 | |
3f6d517a | 760 | for (;;) { |
91d2a812 WL |
761 | /* |
762 | * When a waiting writer set the handoff flag, it may spin | |
763 | * on the owner as well. Once that writer acquires the lock, | |
764 | * we can spin on it. So we don't need to quit even when the | |
765 | * handoff bit is set. | |
766 | */ | |
94a9717b WL |
767 | new = rwsem_owner_flags(sem, &new_flags); |
768 | if ((new != owner) || (new_flags != flags)) { | |
617f3ef9 | 769 | state = rwsem_owner_state(new, new_flags); |
3f6d517a WL |
770 | break; |
771 | } | |
772 | ||
5dec94d4 WL |
773 | /* |
774 | * Ensure we emit the owner->on_cpu, dereference _after_ | |
775 | * checking sem->owner still matches owner, if that fails, | |
776 | * owner might point to free()d memory, if it still matches, | |
6c2787f2 YX |
777 | * our spinning context already disabled preemption which is |
778 | * equal to RCU read-side crital section ensures the memory | |
779 | * stays valid. | |
5dec94d4 WL |
780 | */ |
781 | barrier(); | |
782 | ||
5dec94d4 | 783 | if (need_resched() || !owner_on_cpu(owner)) { |
3f6d517a WL |
784 | state = OWNER_NONSPINNABLE; |
785 | break; | |
5dec94d4 WL |
786 | } |
787 | ||
788 | cpu_relax(); | |
789 | } | |
5dec94d4 | 790 | |
3f6d517a | 791 | return state; |
5dec94d4 WL |
792 | } |
793 | ||
7d43f1ce WL |
794 | /* |
795 | * Calculate reader-owned rwsem spinning threshold for writer | |
796 | * | |
797 | * The more readers own the rwsem, the longer it will take for them to | |
798 | * wind down and free the rwsem. So the empirical formula used to | |
799 | * determine the actual spinning time limit here is: | |
800 | * | |
801 | * Spinning threshold = (10 + nr_readers/2)us | |
802 | * | |
803 | * The limit is capped to a maximum of 25us (30 readers). This is just | |
804 | * a heuristic and is subjected to change in the future. | |
805 | */ | |
806 | static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem) | |
807 | { | |
808 | long count = atomic_long_read(&sem->count); | |
809 | int readers = count >> RWSEM_READER_SHIFT; | |
810 | u64 delta; | |
811 | ||
812 | if (readers > 30) | |
813 | readers = 30; | |
814 | delta = (20 + readers) * NSEC_PER_USEC / 2; | |
815 | ||
816 | return sched_clock() + delta; | |
817 | } | |
818 | ||
617f3ef9 | 819 | static bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
5dec94d4 WL |
820 | { |
821 | bool taken = false; | |
990fa738 | 822 | int prev_owner_state = OWNER_NULL; |
7d43f1ce WL |
823 | int loop = 0; |
824 | u64 rspin_threshold = 0; | |
5dec94d4 | 825 | |
5dec94d4 | 826 | /* sem->wait_lock should not be held when doing optimistic spinning */ |
5dec94d4 WL |
827 | if (!osq_lock(&sem->osq)) |
828 | goto done; | |
829 | ||
830 | /* | |
831 | * Optimistically spin on the owner field and attempt to acquire the | |
832 | * lock whenever the owner changes. Spinning will be stopped when: | |
833 | * 1) the owning writer isn't running; or | |
7d43f1ce | 834 | * 2) readers own the lock and spinning time has exceeded limit. |
5dec94d4 | 835 | */ |
990fa738 | 836 | for (;;) { |
7d43f1ce | 837 | enum owner_state owner_state; |
990fa738 | 838 | |
617f3ef9 | 839 | owner_state = rwsem_spin_on_owner(sem); |
990fa738 WL |
840 | if (!(owner_state & OWNER_SPINNABLE)) |
841 | break; | |
842 | ||
5dec94d4 WL |
843 | /* |
844 | * Try to acquire the lock | |
845 | */ | |
617f3ef9 | 846 | taken = rwsem_try_write_lock_unqueued(sem); |
cf69482d WL |
847 | |
848 | if (taken) | |
5dec94d4 | 849 | break; |
5dec94d4 | 850 | |
7d43f1ce WL |
851 | /* |
852 | * Time-based reader-owned rwsem optimistic spinning | |
853 | */ | |
617f3ef9 | 854 | if (owner_state == OWNER_READER) { |
7d43f1ce WL |
855 | /* |
856 | * Re-initialize rspin_threshold every time when | |
857 | * the owner state changes from non-reader to reader. | |
858 | * This allows a writer to steal the lock in between | |
859 | * 2 reader phases and have the threshold reset at | |
860 | * the beginning of the 2nd reader phase. | |
861 | */ | |
862 | if (prev_owner_state != OWNER_READER) { | |
617f3ef9 | 863 | if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)) |
7d43f1ce WL |
864 | break; |
865 | rspin_threshold = rwsem_rspin_threshold(sem); | |
866 | loop = 0; | |
867 | } | |
868 | ||
869 | /* | |
870 | * Check time threshold once every 16 iterations to | |
871 | * avoid calling sched_clock() too frequently so | |
872 | * as to reduce the average latency between the times | |
873 | * when the lock becomes free and when the spinner | |
874 | * is ready to do a trylock. | |
875 | */ | |
876 | else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) { | |
877 | rwsem_set_nonspinnable(sem); | |
878 | lockevent_inc(rwsem_opt_nospin); | |
879 | break; | |
880 | } | |
881 | } | |
882 | ||
5dec94d4 | 883 | /* |
990fa738 WL |
884 | * An RT task cannot do optimistic spinning if it cannot |
885 | * be sure the lock holder is running or live-lock may | |
886 | * happen if the current task and the lock holder happen | |
887 | * to run in the same CPU. However, aborting optimistic | |
888 | * spinning while a NULL owner is detected may miss some | |
889 | * opportunity where spinning can continue without causing | |
890 | * problem. | |
891 | * | |
892 | * There are 2 possible cases where an RT task may be able | |
893 | * to continue spinning. | |
894 | * | |
895 | * 1) The lock owner is in the process of releasing the | |
896 | * lock, sem->owner is cleared but the lock has not | |
897 | * been released yet. | |
898 | * 2) The lock was free and owner cleared, but another | |
899 | * task just comes in and acquire the lock before | |
900 | * we try to get it. The new owner may be a spinnable | |
901 | * writer. | |
902 | * | |
e2db7592 | 903 | * To take advantage of two scenarios listed above, the RT |
990fa738 WL |
904 | * task is made to retry one more time to see if it can |
905 | * acquire the lock or continue spinning on the new owning | |
906 | * writer. Of course, if the time lag is long enough or the | |
907 | * new owner is not a writer or spinnable, the RT task will | |
908 | * quit spinning. | |
909 | * | |
910 | * If the owner is a writer, the need_resched() check is | |
911 | * done inside rwsem_spin_on_owner(). If the owner is not | |
912 | * a writer, need_resched() check needs to be done here. | |
5dec94d4 | 913 | */ |
990fa738 WL |
914 | if (owner_state != OWNER_WRITER) { |
915 | if (need_resched()) | |
916 | break; | |
917 | if (rt_task(current) && | |
918 | (prev_owner_state != OWNER_WRITER)) | |
919 | break; | |
920 | } | |
921 | prev_owner_state = owner_state; | |
5dec94d4 WL |
922 | |
923 | /* | |
924 | * The cpu_relax() call is a compiler barrier which forces | |
925 | * everything in this loop to be re-loaded. We don't need | |
926 | * memory barriers as we'll eventually observe the right | |
927 | * values at the cost of a few extra spins. | |
928 | */ | |
929 | cpu_relax(); | |
930 | } | |
931 | osq_unlock(&sem->osq); | |
932 | done: | |
5dec94d4 WL |
933 | lockevent_cond_inc(rwsem_opt_fail, !taken); |
934 | return taken; | |
935 | } | |
7d43f1ce WL |
936 | |
937 | /* | |
617f3ef9 | 938 | * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should |
7d43f1ce | 939 | * only be called when the reader count reaches 0. |
5cfd92e1 | 940 | */ |
617f3ef9 | 941 | static inline void clear_nonspinnable(struct rw_semaphore *sem) |
5cfd92e1 | 942 | { |
54c1ee4d | 943 | if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))) |
617f3ef9 | 944 | atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner); |
1a728dff WL |
945 | } |
946 | ||
5dec94d4 | 947 | #else |
617f3ef9 | 948 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
cf69482d WL |
949 | { |
950 | return false; | |
951 | } | |
952 | ||
617f3ef9 | 953 | static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
5dec94d4 WL |
954 | { |
955 | return false; | |
956 | } | |
7d43f1ce | 957 | |
617f3ef9 | 958 | static inline void clear_nonspinnable(struct rw_semaphore *sem) { } |
1a728dff | 959 | |
7cdacc5f | 960 | static inline enum owner_state |
617f3ef9 | 961 | rwsem_spin_on_owner(struct rw_semaphore *sem) |
91d2a812 | 962 | { |
7cdacc5f | 963 | return OWNER_NONSPINNABLE; |
91d2a812 | 964 | } |
5dec94d4 WL |
965 | #endif |
966 | ||
54c1ee4d WL |
967 | /* |
968 | * Prepare to wake up waiter(s) in the wait queue by putting them into the | |
969 | * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely | |
970 | * reader-owned, wake up read lock waiters in queue front or wake up any | |
971 | * front waiter otherwise. | |
972 | ||
973 | * This is being called from both reader and writer slow paths. | |
974 | */ | |
975 | static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count, | |
976 | struct wake_q_head *wake_q) | |
977 | { | |
978 | enum rwsem_wake_type wake_type; | |
979 | ||
980 | if (count & RWSEM_WRITER_MASK) | |
981 | return; | |
982 | ||
983 | if (count & RWSEM_READER_MASK) { | |
984 | wake_type = RWSEM_WAKE_READERS; | |
985 | } else { | |
986 | wake_type = RWSEM_WAKE_ANY; | |
987 | clear_nonspinnable(sem); | |
988 | } | |
989 | rwsem_mark_wake(sem, wake_type, wake_q); | |
990 | } | |
991 | ||
5dec94d4 WL |
992 | /* |
993 | * Wait for the read lock to be granted | |
994 | */ | |
6cef7ff6 | 995 | static struct rw_semaphore __sched * |
2f064a59 | 996 | rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state) |
5dec94d4 | 997 | { |
617f3ef9 | 998 | long adjustment = -RWSEM_READER_BIAS; |
2f06f702 | 999 | long rcnt = (count >> RWSEM_READER_SHIFT); |
5dec94d4 WL |
1000 | struct rwsem_waiter waiter; |
1001 | DEFINE_WAKE_Q(wake_q); | |
1002 | ||
2f06f702 WL |
1003 | /* |
1004 | * To prevent a constant stream of readers from starving a sleeping | |
d566c786 WL |
1005 | * writer, don't attempt optimistic lock stealing if the lock is |
1006 | * very likely owned by readers. | |
2f06f702 | 1007 | */ |
617f3ef9 WL |
1008 | if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) && |
1009 | (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED)) | |
2f06f702 WL |
1010 | goto queue; |
1011 | ||
1a728dff | 1012 | /* |
617f3ef9 | 1013 | * Reader optimistic lock stealing. |
1a728dff | 1014 | */ |
617f3ef9 | 1015 | if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) { |
1a728dff WL |
1016 | rwsem_set_reader_owned(sem); |
1017 | lockevent_inc(rwsem_rlock_steal); | |
1a728dff | 1018 | |
cf69482d | 1019 | /* |
617f3ef9 WL |
1020 | * Wake up other readers in the wait queue if it is |
1021 | * the first reader. | |
cf69482d | 1022 | */ |
617f3ef9 | 1023 | if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) { |
cf69482d WL |
1024 | raw_spin_lock_irq(&sem->wait_lock); |
1025 | if (!list_empty(&sem->wait_list)) | |
1026 | rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, | |
1027 | &wake_q); | |
1028 | raw_spin_unlock_irq(&sem->wait_lock); | |
1029 | wake_up_q(&wake_q); | |
1030 | } | |
1031 | return sem; | |
1032 | } | |
1033 | ||
1034 | queue: | |
5dec94d4 WL |
1035 | waiter.task = current; |
1036 | waiter.type = RWSEM_WAITING_FOR_READ; | |
4f23dbc1 | 1037 | waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; |
6eebd5fb | 1038 | waiter.handoff_set = false; |
5dec94d4 WL |
1039 | |
1040 | raw_spin_lock_irq(&sem->wait_lock); | |
1041 | if (list_empty(&sem->wait_list)) { | |
1042 | /* | |
1043 | * In case the wait queue is empty and the lock isn't owned | |
f9e21aa9 WL |
1044 | * by a writer, this reader can exit the slowpath and return |
1045 | * immediately as its RWSEM_READER_BIAS has already been set | |
1046 | * in the count. | |
5dec94d4 | 1047 | */ |
f9e21aa9 | 1048 | if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) { |
e1b98fa3 JS |
1049 | /* Provide lock ACQUIRE */ |
1050 | smp_acquire__after_ctrl_dep(); | |
5dec94d4 WL |
1051 | raw_spin_unlock_irq(&sem->wait_lock); |
1052 | rwsem_set_reader_owned(sem); | |
1053 | lockevent_inc(rwsem_rlock_fast); | |
1054 | return sem; | |
1055 | } | |
1056 | adjustment += RWSEM_FLAG_WAITERS; | |
1057 | } | |
d257cc8c | 1058 | rwsem_add_waiter(sem, &waiter); |
5dec94d4 WL |
1059 | |
1060 | /* we're now waiting on the lock, but no longer actively locking */ | |
617f3ef9 | 1061 | count = atomic_long_add_return(adjustment, &sem->count); |
5dec94d4 | 1062 | |
54c1ee4d | 1063 | rwsem_cond_wake_waiter(sem, count, &wake_q); |
5dec94d4 | 1064 | raw_spin_unlock_irq(&sem->wait_lock); |
54c1ee4d WL |
1065 | |
1066 | if (!wake_q_empty(&wake_q)) | |
1067 | wake_up_q(&wake_q); | |
5dec94d4 | 1068 | |
ee042be1 NK |
1069 | trace_contention_begin(sem, LCB_F_READ); |
1070 | ||
5dec94d4 | 1071 | /* wait to be given the lock */ |
6ffddfb9 | 1072 | for (;;) { |
5dec94d4 | 1073 | set_current_state(state); |
99143f82 | 1074 | if (!smp_load_acquire(&waiter.task)) { |
6ffddfb9 | 1075 | /* Matches rwsem_mark_wake()'s smp_store_release(). */ |
5dec94d4 | 1076 | break; |
99143f82 | 1077 | } |
5dec94d4 WL |
1078 | if (signal_pending_state(state, current)) { |
1079 | raw_spin_lock_irq(&sem->wait_lock); | |
1080 | if (waiter.task) | |
1081 | goto out_nolock; | |
1082 | raw_spin_unlock_irq(&sem->wait_lock); | |
6ffddfb9 | 1083 | /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ |
5dec94d4 WL |
1084 | break; |
1085 | } | |
3f524553 | 1086 | schedule_preempt_disabled(); |
5dec94d4 WL |
1087 | lockevent_inc(rwsem_sleep_reader); |
1088 | } | |
1089 | ||
1090 | __set_current_state(TASK_RUNNING); | |
1091 | lockevent_inc(rwsem_rlock); | |
ee042be1 | 1092 | trace_contention_end(sem, 0); |
5dec94d4 | 1093 | return sem; |
6ffddfb9 | 1094 | |
5dec94d4 | 1095 | out_nolock: |
1ee32619 | 1096 | rwsem_del_wake_waiter(sem, &waiter, &wake_q); |
5dec94d4 WL |
1097 | __set_current_state(TASK_RUNNING); |
1098 | lockevent_inc(rwsem_rlock_fail); | |
ee042be1 | 1099 | trace_contention_end(sem, -EINTR); |
5dec94d4 WL |
1100 | return ERR_PTR(-EINTR); |
1101 | } | |
1102 | ||
5dec94d4 WL |
1103 | /* |
1104 | * Wait until we successfully acquire the write lock | |
1105 | */ | |
c441e934 | 1106 | static struct rw_semaphore __sched * |
6cef7ff6 | 1107 | rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) |
5dec94d4 | 1108 | { |
5dec94d4 | 1109 | struct rwsem_waiter waiter; |
5dec94d4 WL |
1110 | DEFINE_WAKE_Q(wake_q); |
1111 | ||
1112 | /* do optimistic spinning and steal lock if possible */ | |
617f3ef9 | 1113 | if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) { |
6ffddfb9 | 1114 | /* rwsem_optimistic_spin() implies ACQUIRE on success */ |
5dec94d4 | 1115 | return sem; |
6ffddfb9 | 1116 | } |
5dec94d4 WL |
1117 | |
1118 | /* | |
1119 | * Optimistic spinning failed, proceed to the slowpath | |
1120 | * and block until we can acquire the sem. | |
1121 | */ | |
1122 | waiter.task = current; | |
1123 | waiter.type = RWSEM_WAITING_FOR_WRITE; | |
4f23dbc1 | 1124 | waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; |
d257cc8c | 1125 | waiter.handoff_set = false; |
5dec94d4 WL |
1126 | |
1127 | raw_spin_lock_irq(&sem->wait_lock); | |
d257cc8c | 1128 | rwsem_add_waiter(sem, &waiter); |
5dec94d4 WL |
1129 | |
1130 | /* we're now waiting on the lock */ | |
d257cc8c | 1131 | if (rwsem_first_waiter(sem) != &waiter) { |
54c1ee4d WL |
1132 | rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count), |
1133 | &wake_q); | |
00f3c5a3 WL |
1134 | if (!wake_q_empty(&wake_q)) { |
1135 | /* | |
1136 | * We want to minimize wait_lock hold time especially | |
1137 | * when a large number of readers are to be woken up. | |
1138 | */ | |
1139 | raw_spin_unlock_irq(&sem->wait_lock); | |
1140 | wake_up_q(&wake_q); | |
00f3c5a3 WL |
1141 | raw_spin_lock_irq(&sem->wait_lock); |
1142 | } | |
5dec94d4 | 1143 | } else { |
00f3c5a3 | 1144 | atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); |
5dec94d4 WL |
1145 | } |
1146 | ||
1147 | /* wait until we successfully acquire the lock */ | |
1148 | set_current_state(state); | |
ee042be1 NK |
1149 | trace_contention_begin(sem, LCB_F_WRITE); |
1150 | ||
6ffddfb9 | 1151 | for (;;) { |
d257cc8c | 1152 | if (rwsem_try_write_lock(sem, &waiter)) { |
6ffddfb9 | 1153 | /* rwsem_try_write_lock() implies ACQUIRE on success */ |
5dec94d4 | 1154 | break; |
6ffddfb9 | 1155 | } |
4f23dbc1 | 1156 | |
5dec94d4 WL |
1157 | raw_spin_unlock_irq(&sem->wait_lock); |
1158 | ||
d257cc8c WL |
1159 | if (signal_pending_state(state, current)) |
1160 | goto out_nolock; | |
1161 | ||
91d2a812 WL |
1162 | /* |
1163 | * After setting the handoff bit and failing to acquire | |
1164 | * the lock, attempt to spin on owner to accelerate lock | |
1165 | * transfer. If the previous owner is a on-cpu writer and it | |
1166 | * has just released the lock, OWNER_NULL will be returned. | |
1167 | * In this case, we attempt to acquire the lock again | |
1168 | * without sleeping. | |
1169 | */ | |
d257cc8c | 1170 | if (waiter.handoff_set) { |
7cdacc5f YX |
1171 | enum owner_state owner_state; |
1172 | ||
7cdacc5f | 1173 | owner_state = rwsem_spin_on_owner(sem); |
7cdacc5f YX |
1174 | if (owner_state == OWNER_NULL) |
1175 | goto trylock_again; | |
1176 | } | |
91d2a812 | 1177 | |
1d61659c | 1178 | schedule_preempt_disabled(); |
d257cc8c WL |
1179 | lockevent_inc(rwsem_sleep_writer); |
1180 | set_current_state(state); | |
91d2a812 | 1181 | trylock_again: |
5dec94d4 WL |
1182 | raw_spin_lock_irq(&sem->wait_lock); |
1183 | } | |
1184 | __set_current_state(TASK_RUNNING); | |
5dec94d4 WL |
1185 | raw_spin_unlock_irq(&sem->wait_lock); |
1186 | lockevent_inc(rwsem_wlock); | |
ee042be1 | 1187 | trace_contention_end(sem, 0); |
d257cc8c | 1188 | return sem; |
5dec94d4 WL |
1189 | |
1190 | out_nolock: | |
1191 | __set_current_state(TASK_RUNNING); | |
1192 | raw_spin_lock_irq(&sem->wait_lock); | |
1ee32619 | 1193 | rwsem_del_wake_waiter(sem, &waiter, &wake_q); |
5dec94d4 | 1194 | lockevent_inc(rwsem_wlock_fail); |
ee042be1 | 1195 | trace_contention_end(sem, -EINTR); |
5dec94d4 WL |
1196 | return ERR_PTR(-EINTR); |
1197 | } | |
1198 | ||
5dec94d4 WL |
1199 | /* |
1200 | * handle waking up a waiter on the semaphore | |
1201 | * - up_read/up_write has decremented the active part of count if we come here | |
1202 | */ | |
d4e5076c | 1203 | static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
5dec94d4 WL |
1204 | { |
1205 | unsigned long flags; | |
1206 | DEFINE_WAKE_Q(wake_q); | |
1207 | ||
1208 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
1209 | ||
1210 | if (!list_empty(&sem->wait_list)) | |
6cef7ff6 | 1211 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
5dec94d4 WL |
1212 | |
1213 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
1214 | wake_up_q(&wake_q); | |
1215 | ||
1216 | return sem; | |
1217 | } | |
5dec94d4 WL |
1218 | |
1219 | /* | |
1220 | * downgrade a write lock into a read lock | |
1221 | * - caller incremented waiting part of count and discovered it still negative | |
1222 | * - just wake up any readers at the front of the queue | |
1223 | */ | |
6cef7ff6 | 1224 | static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
5dec94d4 WL |
1225 | { |
1226 | unsigned long flags; | |
1227 | DEFINE_WAKE_Q(wake_q); | |
1228 | ||
1229 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
1230 | ||
1231 | if (!list_empty(&sem->wait_list)) | |
6cef7ff6 | 1232 | rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); |
5dec94d4 WL |
1233 | |
1234 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
1235 | wake_up_q(&wake_q); | |
1236 | ||
1237 | return sem; | |
1238 | } | |
5dec94d4 WL |
1239 | |
1240 | /* | |
1241 | * lock for reading | |
1242 | */ | |
92cc5d00 | 1243 | static __always_inline int __down_read_common(struct rw_semaphore *sem, int state) |
5dec94d4 | 1244 | { |
3f524553 | 1245 | int ret = 0; |
c8fe8b05 WL |
1246 | long count; |
1247 | ||
3f524553 | 1248 | preempt_disable(); |
c8fe8b05 | 1249 | if (!rwsem_read_trylock(sem, &count)) { |
3f524553 WL |
1250 | if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) { |
1251 | ret = -EINTR; | |
1252 | goto out; | |
1253 | } | |
94a9717b | 1254 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
5dec94d4 | 1255 | } |
3f524553 WL |
1256 | out: |
1257 | preempt_enable(); | |
1258 | return ret; | |
c995e638 PZ |
1259 | } |
1260 | ||
92cc5d00 | 1261 | static __always_inline void __down_read(struct rw_semaphore *sem) |
c995e638 PZ |
1262 | { |
1263 | __down_read_common(sem, TASK_UNINTERRUPTIBLE); | |
5dec94d4 WL |
1264 | } |
1265 | ||
92cc5d00 | 1266 | static __always_inline int __down_read_interruptible(struct rw_semaphore *sem) |
31784cff | 1267 | { |
c995e638 | 1268 | return __down_read_common(sem, TASK_INTERRUPTIBLE); |
31784cff EB |
1269 | } |
1270 | ||
92cc5d00 | 1271 | static __always_inline int __down_read_killable(struct rw_semaphore *sem) |
5dec94d4 | 1272 | { |
c995e638 | 1273 | return __down_read_common(sem, TASK_KILLABLE); |
5dec94d4 WL |
1274 | } |
1275 | ||
1276 | static inline int __down_read_trylock(struct rw_semaphore *sem) | |
1277 | { | |
3f524553 | 1278 | int ret = 0; |
fce45cd4 DB |
1279 | long tmp; |
1280 | ||
1281 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); | |
1282 | ||
3f524553 | 1283 | preempt_disable(); |
14c24048 MS |
1284 | tmp = atomic_long_read(&sem->count); |
1285 | while (!(tmp & RWSEM_READ_FAILED_MASK)) { | |
5dec94d4 | 1286 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, |
14c24048 | 1287 | tmp + RWSEM_READER_BIAS)) { |
5dec94d4 | 1288 | rwsem_set_reader_owned(sem); |
3f524553 WL |
1289 | ret = 1; |
1290 | break; | |
5dec94d4 | 1291 | } |
14c24048 | 1292 | } |
3f524553 WL |
1293 | preempt_enable(); |
1294 | return ret; | |
5dec94d4 WL |
1295 | } |
1296 | ||
1297 | /* | |
1298 | * lock for writing | |
1299 | */ | |
c995e638 | 1300 | static inline int __down_write_common(struct rw_semaphore *sem, int state) |
5dec94d4 | 1301 | { |
1d61659c WL |
1302 | int ret = 0; |
1303 | ||
1304 | preempt_disable(); | |
285c61ae | 1305 | if (unlikely(!rwsem_write_trylock(sem))) { |
c995e638 | 1306 | if (IS_ERR(rwsem_down_write_slowpath(sem, state))) |
1d61659c | 1307 | ret = -EINTR; |
6cef7ff6 | 1308 | } |
1d61659c WL |
1309 | preempt_enable(); |
1310 | return ret; | |
5dec94d4 WL |
1311 | } |
1312 | ||
c995e638 PZ |
1313 | static inline void __down_write(struct rw_semaphore *sem) |
1314 | { | |
1315 | __down_write_common(sem, TASK_UNINTERRUPTIBLE); | |
1316 | } | |
1317 | ||
1318 | static inline int __down_write_killable(struct rw_semaphore *sem) | |
1319 | { | |
1320 | return __down_write_common(sem, TASK_KILLABLE); | |
1321 | } | |
1322 | ||
5dec94d4 WL |
1323 | static inline int __down_write_trylock(struct rw_semaphore *sem) |
1324 | { | |
1d61659c WL |
1325 | int ret; |
1326 | ||
1327 | preempt_disable(); | |
fce45cd4 | 1328 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
1d61659c WL |
1329 | ret = rwsem_write_trylock(sem); |
1330 | preempt_enable(); | |
1331 | ||
1332 | return ret; | |
5dec94d4 WL |
1333 | } |
1334 | ||
1335 | /* | |
1336 | * unlock after reading | |
1337 | */ | |
7f26482a | 1338 | static inline void __up_read(struct rw_semaphore *sem) |
5dec94d4 WL |
1339 | { |
1340 | long tmp; | |
1341 | ||
fce45cd4 | 1342 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
94a9717b | 1343 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
fce45cd4 | 1344 | |
3f524553 | 1345 | preempt_disable(); |
5dec94d4 WL |
1346 | rwsem_clear_reader_owned(sem); |
1347 | tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); | |
a15ea1a3 | 1348 | DEBUG_RWSEMS_WARN_ON(tmp < 0, sem); |
6cef7ff6 | 1349 | if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == |
7d43f1ce | 1350 | RWSEM_FLAG_WAITERS)) { |
617f3ef9 | 1351 | clear_nonspinnable(sem); |
d4e5076c | 1352 | rwsem_wake(sem); |
7d43f1ce | 1353 | } |
3f524553 | 1354 | preempt_enable(); |
5dec94d4 WL |
1355 | } |
1356 | ||
1357 | /* | |
1358 | * unlock after writing | |
1359 | */ | |
7f26482a | 1360 | static inline void __up_write(struct rw_semaphore *sem) |
5dec94d4 | 1361 | { |
6cef7ff6 WL |
1362 | long tmp; |
1363 | ||
fce45cd4 | 1364 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
02f1082b WL |
1365 | /* |
1366 | * sem->owner may differ from current if the ownership is transferred | |
1367 | * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. | |
1368 | */ | |
94a9717b WL |
1369 | DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && |
1370 | !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); | |
fce45cd4 | 1371 | |
48dfb5d2 | 1372 | preempt_disable(); |
5dec94d4 | 1373 | rwsem_clear_owner(sem); |
6cef7ff6 WL |
1374 | tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); |
1375 | if (unlikely(tmp & RWSEM_FLAG_WAITERS)) | |
d4e5076c | 1376 | rwsem_wake(sem); |
1d61659c | 1377 | preempt_enable(); |
5dec94d4 WL |
1378 | } |
1379 | ||
1380 | /* | |
1381 | * downgrade write lock to read lock | |
1382 | */ | |
1383 | static inline void __downgrade_write(struct rw_semaphore *sem) | |
1384 | { | |
1385 | long tmp; | |
1386 | ||
1387 | /* | |
1388 | * When downgrading from exclusive to shared ownership, | |
1389 | * anything inside the write-locked region cannot leak | |
1390 | * into the read side. In contrast, anything in the | |
1391 | * read-locked region is ok to be re-ordered into the | |
1392 | * write side. As such, rely on RELEASE semantics. | |
1393 | */ | |
94a9717b | 1394 | DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem); |
1d61659c | 1395 | preempt_disable(); |
5dec94d4 WL |
1396 | tmp = atomic_long_fetch_add_release( |
1397 | -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count); | |
1398 | rwsem_set_reader_owned(sem); | |
1399 | if (tmp & RWSEM_FLAG_WAITERS) | |
1400 | rwsem_downgrade_wake(sem); | |
1d61659c | 1401 | preempt_enable(); |
5dec94d4 | 1402 | } |
4fc828e2 | 1403 | |
42254105 TG |
1404 | #else /* !CONFIG_PREEMPT_RT */ |
1405 | ||
e17ba59b | 1406 | #define RT_MUTEX_BUILD_MUTEX |
42254105 TG |
1407 | #include "rtmutex.c" |
1408 | ||
1409 | #define rwbase_set_and_save_current_state(state) \ | |
1410 | set_current_state(state) | |
1411 | ||
1412 | #define rwbase_restore_current_state() \ | |
1413 | __set_current_state(TASK_RUNNING) | |
1414 | ||
1415 | #define rwbase_rtmutex_lock_state(rtm, state) \ | |
1416 | __rt_mutex_lock(rtm, state) | |
1417 | ||
1418 | #define rwbase_rtmutex_slowlock_locked(rtm, state) \ | |
add46132 | 1419 | __rt_mutex_slowlock_locked(rtm, NULL, state) |
42254105 TG |
1420 | |
1421 | #define rwbase_rtmutex_unlock(rtm) \ | |
1422 | __rt_mutex_unlock(rtm) | |
1423 | ||
1424 | #define rwbase_rtmutex_trylock(rtm) \ | |
1425 | __rt_mutex_trylock(rtm) | |
1426 | ||
1427 | #define rwbase_signal_pending_state(state, current) \ | |
1428 | signal_pending_state(state, current) | |
1429 | ||
d14f9e93 SAS |
1430 | #define rwbase_pre_schedule() \ |
1431 | rt_mutex_pre_schedule() | |
1432 | ||
42254105 | 1433 | #define rwbase_schedule() \ |
d14f9e93 SAS |
1434 | rt_mutex_schedule() |
1435 | ||
1436 | #define rwbase_post_schedule() \ | |
1437 | rt_mutex_post_schedule() | |
42254105 TG |
1438 | |
1439 | #include "rwbase_rt.c" | |
1440 | ||
15eb7c88 | 1441 | void __init_rwsem(struct rw_semaphore *sem, const char *name, |
42254105 TG |
1442 | struct lock_class_key *key) |
1443 | { | |
15eb7c88 MG |
1444 | init_rwbase_rt(&(sem)->rwbase); |
1445 | ||
1446 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
42254105 TG |
1447 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
1448 | lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); | |
42254105 | 1449 | #endif |
15eb7c88 MG |
1450 | } |
1451 | EXPORT_SYMBOL(__init_rwsem); | |
42254105 TG |
1452 | |
1453 | static inline void __down_read(struct rw_semaphore *sem) | |
1454 | { | |
1455 | rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); | |
1456 | } | |
1457 | ||
1458 | static inline int __down_read_interruptible(struct rw_semaphore *sem) | |
1459 | { | |
1460 | return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE); | |
1461 | } | |
1462 | ||
1463 | static inline int __down_read_killable(struct rw_semaphore *sem) | |
1464 | { | |
1465 | return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE); | |
1466 | } | |
1467 | ||
1468 | static inline int __down_read_trylock(struct rw_semaphore *sem) | |
1469 | { | |
1470 | return rwbase_read_trylock(&sem->rwbase); | |
1471 | } | |
1472 | ||
1473 | static inline void __up_read(struct rw_semaphore *sem) | |
1474 | { | |
1475 | rwbase_read_unlock(&sem->rwbase, TASK_NORMAL); | |
1476 | } | |
1477 | ||
1478 | static inline void __sched __down_write(struct rw_semaphore *sem) | |
1479 | { | |
1480 | rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); | |
1481 | } | |
1482 | ||
1483 | static inline int __sched __down_write_killable(struct rw_semaphore *sem) | |
1484 | { | |
1485 | return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE); | |
1486 | } | |
1487 | ||
1488 | static inline int __down_write_trylock(struct rw_semaphore *sem) | |
1489 | { | |
1490 | return rwbase_write_trylock(&sem->rwbase); | |
1491 | } | |
1492 | ||
1493 | static inline void __up_write(struct rw_semaphore *sem) | |
1494 | { | |
1495 | rwbase_write_unlock(&sem->rwbase); | |
1496 | } | |
1497 | ||
1498 | static inline void __downgrade_write(struct rw_semaphore *sem) | |
1499 | { | |
1500 | rwbase_write_downgrade(&sem->rwbase); | |
1501 | } | |
1502 | ||
1503 | /* Debug stubs for the common API */ | |
1504 | #define DEBUG_RWSEMS_WARN_ON(c, sem) | |
1505 | ||
1506 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | |
1507 | struct task_struct *owner) | |
1508 | { | |
1509 | } | |
1510 | ||
1511 | static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) | |
1512 | { | |
1513 | int count = atomic_read(&sem->rwbase.readers); | |
1514 | ||
1515 | return count < 0 && count != READER_BIAS; | |
1516 | } | |
1517 | ||
1518 | #endif /* CONFIG_PREEMPT_RT */ | |
1519 | ||
c4e05116 IM |
1520 | /* |
1521 | * lock for reading | |
1522 | */ | |
c7af77b5 | 1523 | void __sched down_read(struct rw_semaphore *sem) |
c4e05116 IM |
1524 | { |
1525 | might_sleep(); | |
1526 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1527 | ||
4fe87745 | 1528 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
c4e05116 | 1529 | } |
c4e05116 IM |
1530 | EXPORT_SYMBOL(down_read); |
1531 | ||
31784cff EB |
1532 | int __sched down_read_interruptible(struct rw_semaphore *sem) |
1533 | { | |
1534 | might_sleep(); | |
1535 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1536 | ||
1537 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) { | |
1538 | rwsem_release(&sem->dep_map, _RET_IP_); | |
1539 | return -EINTR; | |
1540 | } | |
1541 | ||
1542 | return 0; | |
1543 | } | |
1544 | EXPORT_SYMBOL(down_read_interruptible); | |
1545 | ||
76f8507f KT |
1546 | int __sched down_read_killable(struct rw_semaphore *sem) |
1547 | { | |
1548 | might_sleep(); | |
1549 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1550 | ||
1551 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { | |
5facae4f | 1552 | rwsem_release(&sem->dep_map, _RET_IP_); |
76f8507f KT |
1553 | return -EINTR; |
1554 | } | |
1555 | ||
76f8507f KT |
1556 | return 0; |
1557 | } | |
76f8507f KT |
1558 | EXPORT_SYMBOL(down_read_killable); |
1559 | ||
c4e05116 IM |
1560 | /* |
1561 | * trylock for reading -- returns 1 if successful, 0 if contention | |
1562 | */ | |
1563 | int down_read_trylock(struct rw_semaphore *sem) | |
1564 | { | |
1565 | int ret = __down_read_trylock(sem); | |
1566 | ||
c7580c1e | 1567 | if (ret == 1) |
c4e05116 IM |
1568 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); |
1569 | return ret; | |
1570 | } | |
c4e05116 IM |
1571 | EXPORT_SYMBOL(down_read_trylock); |
1572 | ||
1573 | /* | |
1574 | * lock for writing | |
1575 | */ | |
c7af77b5 | 1576 | void __sched down_write(struct rw_semaphore *sem) |
c4e05116 IM |
1577 | { |
1578 | might_sleep(); | |
1579 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | |
4fe87745 | 1580 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
c4e05116 | 1581 | } |
c4e05116 IM |
1582 | EXPORT_SYMBOL(down_write); |
1583 | ||
916633a4 MH |
1584 | /* |
1585 | * lock for writing | |
1586 | */ | |
1587 | int __sched down_write_killable(struct rw_semaphore *sem) | |
1588 | { | |
1589 | might_sleep(); | |
1590 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | |
1591 | ||
6cef7ff6 WL |
1592 | if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, |
1593 | __down_write_killable)) { | |
5facae4f | 1594 | rwsem_release(&sem->dep_map, _RET_IP_); |
916633a4 MH |
1595 | return -EINTR; |
1596 | } | |
1597 | ||
916633a4 MH |
1598 | return 0; |
1599 | } | |
916633a4 MH |
1600 | EXPORT_SYMBOL(down_write_killable); |
1601 | ||
c4e05116 IM |
1602 | /* |
1603 | * trylock for writing -- returns 1 if successful, 0 if contention | |
1604 | */ | |
1605 | int down_write_trylock(struct rw_semaphore *sem) | |
1606 | { | |
1607 | int ret = __down_write_trylock(sem); | |
1608 | ||
c7580c1e | 1609 | if (ret == 1) |
428e6ce0 | 1610 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
4fc828e2 | 1611 | |
c4e05116 IM |
1612 | return ret; |
1613 | } | |
c4e05116 IM |
1614 | EXPORT_SYMBOL(down_write_trylock); |
1615 | ||
1616 | /* | |
1617 | * release a read lock | |
1618 | */ | |
1619 | void up_read(struct rw_semaphore *sem) | |
1620 | { | |
5facae4f | 1621 | rwsem_release(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1622 | __up_read(sem); |
1623 | } | |
c4e05116 IM |
1624 | EXPORT_SYMBOL(up_read); |
1625 | ||
1626 | /* | |
1627 | * release a write lock | |
1628 | */ | |
1629 | void up_write(struct rw_semaphore *sem) | |
1630 | { | |
5facae4f | 1631 | rwsem_release(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1632 | __up_write(sem); |
1633 | } | |
c4e05116 IM |
1634 | EXPORT_SYMBOL(up_write); |
1635 | ||
1636 | /* | |
1637 | * downgrade write lock to read lock | |
1638 | */ | |
1639 | void downgrade_write(struct rw_semaphore *sem) | |
1640 | { | |
6419c4af | 1641 | lock_downgrade(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1642 | __downgrade_write(sem); |
1643 | } | |
c4e05116 | 1644 | EXPORT_SYMBOL(downgrade_write); |
4ea2176d IM |
1645 | |
1646 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
1647 | ||
1648 | void down_read_nested(struct rw_semaphore *sem, int subclass) | |
1649 | { | |
1650 | might_sleep(); | |
1651 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | |
4fe87745 | 1652 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
4ea2176d | 1653 | } |
4ea2176d IM |
1654 | EXPORT_SYMBOL(down_read_nested); |
1655 | ||
0f9368b5 EB |
1656 | int down_read_killable_nested(struct rw_semaphore *sem, int subclass) |
1657 | { | |
1658 | might_sleep(); | |
1659 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | |
1660 | ||
1661 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { | |
1662 | rwsem_release(&sem->dep_map, _RET_IP_); | |
1663 | return -EINTR; | |
1664 | } | |
1665 | ||
1666 | return 0; | |
1667 | } | |
1668 | EXPORT_SYMBOL(down_read_killable_nested); | |
1669 | ||
1b963c81 JK |
1670 | void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) |
1671 | { | |
1672 | might_sleep(); | |
1673 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | |
1b963c81 JK |
1674 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
1675 | } | |
1b963c81 JK |
1676 | EXPORT_SYMBOL(_down_write_nest_lock); |
1677 | ||
84759c6d KO |
1678 | void down_read_non_owner(struct rw_semaphore *sem) |
1679 | { | |
1680 | might_sleep(); | |
84759c6d | 1681 | __down_read(sem); |
3f524553 WL |
1682 | /* |
1683 | * The owner value for a reader-owned lock is mostly for debugging | |
1684 | * purpose only and is not critical to the correct functioning of | |
1685 | * rwsem. So it is perfectly fine to set it in a preempt-enabled | |
1686 | * context here. | |
1687 | */ | |
925b9cd1 | 1688 | __rwsem_set_reader_owned(sem, NULL); |
84759c6d | 1689 | } |
84759c6d KO |
1690 | EXPORT_SYMBOL(down_read_non_owner); |
1691 | ||
4ea2176d IM |
1692 | void down_write_nested(struct rw_semaphore *sem, int subclass) |
1693 | { | |
1694 | might_sleep(); | |
1695 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | |
4fe87745 | 1696 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
4ea2176d | 1697 | } |
4ea2176d IM |
1698 | EXPORT_SYMBOL(down_write_nested); |
1699 | ||
887bddfa AV |
1700 | int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) |
1701 | { | |
1702 | might_sleep(); | |
1703 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | |
1704 | ||
6cef7ff6 WL |
1705 | if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, |
1706 | __down_write_killable)) { | |
5facae4f | 1707 | rwsem_release(&sem->dep_map, _RET_IP_); |
887bddfa AV |
1708 | return -EINTR; |
1709 | } | |
1710 | ||
887bddfa AV |
1711 | return 0; |
1712 | } | |
887bddfa AV |
1713 | EXPORT_SYMBOL(down_write_killable_nested); |
1714 | ||
84759c6d KO |
1715 | void up_read_non_owner(struct rw_semaphore *sem) |
1716 | { | |
94a9717b | 1717 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
84759c6d KO |
1718 | __up_read(sem); |
1719 | } | |
84759c6d KO |
1720 | EXPORT_SYMBOL(up_read_non_owner); |
1721 | ||
4ea2176d | 1722 | #endif |