Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
c4e05116 IM |
2 | /* kernel/rwsem.c: R/W semaphores, public implementation |
3 | * | |
4 | * Written by David Howells (dhowells@redhat.com). | |
5 | * Derived from asm-i386/semaphore.h | |
5dec94d4 WL |
6 | * |
7 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | |
8 | * and Michel Lespinasse <walken@google.com> | |
9 | * | |
10 | * Optimistic spinning by Tim Chen <tim.c.chen@intel.com> | |
11 | * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. | |
12 | * | |
4f23dbc1 WL |
13 | * Rwsem count bit fields re-definition and rwsem rearchitecture by |
14 | * Waiman Long <longman@redhat.com> and | |
15 | * Peter Zijlstra <peterz@infradead.org>. | |
c4e05116 IM |
16 | */ |
17 | ||
18 | #include <linux/types.h> | |
19 | #include <linux/kernel.h> | |
c7af77b5 | 20 | #include <linux/sched.h> |
5dec94d4 WL |
21 | #include <linux/sched/rt.h> |
22 | #include <linux/sched/task.h> | |
b17b0153 | 23 | #include <linux/sched/debug.h> |
5dec94d4 WL |
24 | #include <linux/sched/wake_q.h> |
25 | #include <linux/sched/signal.h> | |
7d43f1ce | 26 | #include <linux/sched/clock.h> |
9984de1a | 27 | #include <linux/export.h> |
c4e05116 | 28 | #include <linux/rwsem.h> |
60063497 | 29 | #include <linux/atomic.h> |
c4e05116 | 30 | |
42254105 | 31 | #ifndef CONFIG_PREEMPT_RT |
5dec94d4 WL |
32 | #include "lock_events.h" |
33 | ||
34 | /* | |
617f3ef9 | 35 | * The least significant 2 bits of the owner value has the following |
5dec94d4 | 36 | * meanings when set. |
02f1082b | 37 | * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers |
617f3ef9 | 38 | * - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock |
5dec94d4 | 39 | * |
617f3ef9 WL |
40 | * When the rwsem is reader-owned and a spinning writer has timed out, |
41 | * the nonspinnable bit will be set to disable optimistic spinning. | |
7d43f1ce | 42 | |
5dec94d4 WL |
43 | * When a writer acquires a rwsem, it puts its task_struct pointer |
44 | * into the owner field. It is cleared after an unlock. | |
45 | * | |
46 | * When a reader acquires a rwsem, it will also puts its task_struct | |
7d43f1ce WL |
47 | * pointer into the owner field with the RWSEM_READER_OWNED bit set. |
48 | * On unlock, the owner field will largely be left untouched. So | |
49 | * for a free or reader-owned rwsem, the owner value may contain | |
50 | * information about the last reader that acquires the rwsem. | |
5dec94d4 WL |
51 | * |
52 | * That information may be helpful in debugging cases where the system | |
53 | * seems to hang on a reader owned rwsem especially if only one reader | |
54 | * is involved. Ideally we would like to track all the readers that own | |
55 | * a rwsem, but the overhead is simply too big. | |
5cfd92e1 | 56 | * |
617f3ef9 WL |
57 | * A fast path reader optimistic lock stealing is supported when the rwsem |
58 | * is previously owned by a writer and the following conditions are met: | |
617f3ef9 WL |
59 | * - rwsem is not currently writer owned |
60 | * - the handoff isn't set. | |
5dec94d4 WL |
61 | */ |
62 | #define RWSEM_READER_OWNED (1UL << 0) | |
617f3ef9 | 63 | #define RWSEM_NONSPINNABLE (1UL << 1) |
02f1082b | 64 | #define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) |
5dec94d4 WL |
65 | |
66 | #ifdef CONFIG_DEBUG_RWSEMS | |
67 | # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ | |
68 | if (!debug_locks_silent && \ | |
fce45cd4 | 69 | WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ |
5dec94d4 | 70 | #c, atomic_long_read(&(sem)->count), \ |
fce45cd4 | 71 | (unsigned long) sem->magic, \ |
94a9717b | 72 | atomic_long_read(&(sem)->owner), (long)current, \ |
5dec94d4 WL |
73 | list_empty(&(sem)->wait_list) ? "" : "not ")) \ |
74 | debug_locks_off(); \ | |
75 | } while (0) | |
76 | #else | |
77 | # define DEBUG_RWSEMS_WARN_ON(c, sem) | |
78 | #endif | |
79 | ||
80 | /* | |
a15ea1a3 | 81 | * On 64-bit architectures, the bit definitions of the count are: |
5dec94d4 | 82 | * |
a15ea1a3 WL |
83 | * Bit 0 - writer locked bit |
84 | * Bit 1 - waiters present bit | |
85 | * Bit 2 - lock handoff bit | |
86 | * Bits 3-7 - reserved | |
87 | * Bits 8-62 - 55-bit reader count | |
88 | * Bit 63 - read fail bit | |
89 | * | |
90 | * On 32-bit architectures, the bit definitions of the count are: | |
91 | * | |
92 | * Bit 0 - writer locked bit | |
93 | * Bit 1 - waiters present bit | |
94 | * Bit 2 - lock handoff bit | |
95 | * Bits 3-7 - reserved | |
96 | * Bits 8-30 - 23-bit reader count | |
97 | * Bit 31 - read fail bit | |
98 | * | |
99 | * It is not likely that the most significant bit (read fail bit) will ever | |
100 | * be set. This guard bit is still checked anyway in the down_read() fastpath | |
101 | * just in case we need to use up more of the reader bits for other purpose | |
102 | * in the future. | |
5dec94d4 WL |
103 | * |
104 | * atomic_long_fetch_add() is used to obtain reader lock, whereas | |
105 | * atomic_long_cmpxchg() will be used to obtain writer lock. | |
4f23dbc1 WL |
106 | * |
107 | * There are three places where the lock handoff bit may be set or cleared. | |
d257cc8c WL |
108 | * 1) rwsem_mark_wake() for readers -- set, clear |
109 | * 2) rwsem_try_write_lock() for writers -- set, clear | |
110 | * 3) rwsem_del_waiter() -- clear | |
4f23dbc1 WL |
111 | * |
112 | * For all the above cases, wait_lock will be held. A writer must also | |
113 | * be the first one in the wait_list to be eligible for setting the handoff | |
114 | * bit. So concurrent setting/clearing of handoff bit is not possible. | |
5dec94d4 WL |
115 | */ |
116 | #define RWSEM_WRITER_LOCKED (1UL << 0) | |
117 | #define RWSEM_FLAG_WAITERS (1UL << 1) | |
4f23dbc1 | 118 | #define RWSEM_FLAG_HANDOFF (1UL << 2) |
a15ea1a3 | 119 | #define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1)) |
4f23dbc1 | 120 | |
5dec94d4 WL |
121 | #define RWSEM_READER_SHIFT 8 |
122 | #define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT) | |
123 | #define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1)) | |
124 | #define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED | |
125 | #define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK) | |
4f23dbc1 | 126 | #define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\ |
a15ea1a3 | 127 | RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL) |
5dec94d4 WL |
128 | |
129 | /* | |
130 | * All writes to owner are protected by WRITE_ONCE() to make sure that | |
131 | * store tearing can't happen as optimistic spinners may read and use | |
132 | * the owner value concurrently without lock. Read from owner, however, | |
133 | * may not need READ_ONCE() as long as the pointer value is only used | |
134 | * for comparison and isn't being dereferenced. | |
135 | */ | |
136 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | |
137 | { | |
94a9717b | 138 | atomic_long_set(&sem->owner, (long)current); |
5dec94d4 WL |
139 | } |
140 | ||
141 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | |
142 | { | |
94a9717b WL |
143 | atomic_long_set(&sem->owner, 0); |
144 | } | |
145 | ||
146 | /* | |
147 | * Test the flags in the owner field. | |
148 | */ | |
149 | static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags) | |
150 | { | |
151 | return atomic_long_read(&sem->owner) & flags; | |
5dec94d4 WL |
152 | } |
153 | ||
154 | /* | |
155 | * The task_struct pointer of the last owning reader will be left in | |
156 | * the owner field. | |
157 | * | |
158 | * Note that the owner value just indicates the task has owned the rwsem | |
159 | * previously, it may not be the real owner or one of the real owners | |
160 | * anymore when that field is examined, so take it with a grain of salt. | |
5cfd92e1 WL |
161 | * |
162 | * The reader non-spinnable bit is preserved. | |
5dec94d4 WL |
163 | */ |
164 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | |
165 | struct task_struct *owner) | |
166 | { | |
5cfd92e1 | 167 | unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED | |
617f3ef9 | 168 | (atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE); |
5dec94d4 | 169 | |
94a9717b | 170 | atomic_long_set(&sem->owner, val); |
5dec94d4 WL |
171 | } |
172 | ||
173 | static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) | |
174 | { | |
175 | __rwsem_set_reader_owned(sem, current); | |
176 | } | |
177 | ||
178 | /* | |
94a9717b | 179 | * Return true if the rwsem is owned by a reader. |
5dec94d4 | 180 | */ |
94a9717b | 181 | static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) |
5dec94d4 | 182 | { |
94a9717b WL |
183 | #ifdef CONFIG_DEBUG_RWSEMS |
184 | /* | |
185 | * Check the count to see if it is write-locked. | |
186 | */ | |
187 | long count = atomic_long_read(&sem->count); | |
188 | ||
189 | if (count & RWSEM_WRITER_MASK) | |
190 | return false; | |
191 | #endif | |
192 | return rwsem_test_oflags(sem, RWSEM_READER_OWNED); | |
5dec94d4 WL |
193 | } |
194 | ||
195 | #ifdef CONFIG_DEBUG_RWSEMS | |
196 | /* | |
197 | * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there | |
198 | * is a task pointer in owner of a reader-owned rwsem, it will be the | |
199 | * real owner or one of the real owners. The only exception is when the | |
200 | * unlock is done by up_read_non_owner(). | |
201 | */ | |
202 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |
203 | { | |
94a9717b WL |
204 | unsigned long val = atomic_long_read(&sem->owner); |
205 | ||
206 | while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) { | |
207 | if (atomic_long_try_cmpxchg(&sem->owner, &val, | |
208 | val & RWSEM_OWNER_FLAGS_MASK)) | |
209 | return; | |
210 | } | |
5dec94d4 WL |
211 | } |
212 | #else | |
213 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |
214 | { | |
215 | } | |
216 | #endif | |
217 | ||
7d43f1ce WL |
218 | /* |
219 | * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag | |
220 | * remains set. Otherwise, the operation will be aborted. | |
221 | */ | |
222 | static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem) | |
223 | { | |
224 | unsigned long owner = atomic_long_read(&sem->owner); | |
225 | ||
226 | do { | |
227 | if (!(owner & RWSEM_READER_OWNED)) | |
228 | break; | |
229 | if (owner & RWSEM_NONSPINNABLE) | |
230 | break; | |
231 | } while (!atomic_long_try_cmpxchg(&sem->owner, &owner, | |
232 | owner | RWSEM_NONSPINNABLE)); | |
233 | } | |
234 | ||
c8fe8b05 | 235 | static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp) |
a15ea1a3 | 236 | { |
c8fe8b05 | 237 | *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count); |
3379116a | 238 | |
c8fe8b05 | 239 | if (WARN_ON_ONCE(*cntp < 0)) |
a15ea1a3 | 240 | rwsem_set_nonspinnable(sem); |
3379116a | 241 | |
c8fe8b05 | 242 | if (!(*cntp & RWSEM_READ_FAILED_MASK)) { |
3379116a PZ |
243 | rwsem_set_reader_owned(sem); |
244 | return true; | |
245 | } | |
246 | ||
247 | return false; | |
a15ea1a3 WL |
248 | } |
249 | ||
285c61ae PZ |
250 | static inline bool rwsem_write_trylock(struct rw_semaphore *sem) |
251 | { | |
252 | long tmp = RWSEM_UNLOCKED_VALUE; | |
253 | ||
254 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) { | |
255 | rwsem_set_owner(sem); | |
256 | return true; | |
257 | } | |
258 | ||
259 | return false; | |
260 | } | |
261 | ||
94a9717b WL |
262 | /* |
263 | * Return just the real task structure pointer of the owner | |
264 | */ | |
265 | static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) | |
266 | { | |
267 | return (struct task_struct *) | |
268 | (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); | |
269 | } | |
270 | ||
271 | /* | |
272 | * Return the real task structure pointer of the owner and the embedded | |
273 | * flags in the owner. pflags must be non-NULL. | |
274 | */ | |
275 | static inline struct task_struct * | |
276 | rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags) | |
277 | { | |
278 | unsigned long owner = atomic_long_read(&sem->owner); | |
279 | ||
280 | *pflags = owner & RWSEM_OWNER_FLAGS_MASK; | |
281 | return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK); | |
282 | } | |
283 | ||
5dec94d4 WL |
284 | /* |
285 | * Guide to the rw_semaphore's count field. | |
286 | * | |
287 | * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned | |
288 | * by a writer. | |
289 | * | |
290 | * The lock is owned by readers when | |
291 | * (1) the RWSEM_WRITER_LOCKED isn't set in count, | |
292 | * (2) some of the reader bits are set in count, and | |
293 | * (3) the owner field has RWSEM_READ_OWNED bit set. | |
294 | * | |
295 | * Having some reader bits set is not enough to guarantee a readers owned | |
296 | * lock as the readers may be in the process of backing out from the count | |
297 | * and a writer has just released the lock. So another writer may steal | |
298 | * the lock immediately after that. | |
299 | */ | |
300 | ||
301 | /* | |
302 | * Initialize an rwsem: | |
303 | */ | |
304 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | |
305 | struct lock_class_key *key) | |
306 | { | |
307 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
308 | /* | |
309 | * Make sure we are not reinitializing a held semaphore: | |
310 | */ | |
311 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
de8f5e4f | 312 | lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); |
fce45cd4 DB |
313 | #endif |
314 | #ifdef CONFIG_DEBUG_RWSEMS | |
315 | sem->magic = sem; | |
5dec94d4 WL |
316 | #endif |
317 | atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); | |
318 | raw_spin_lock_init(&sem->wait_lock); | |
319 | INIT_LIST_HEAD(&sem->wait_list); | |
94a9717b | 320 | atomic_long_set(&sem->owner, 0L); |
5dec94d4 WL |
321 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
322 | osq_lock_init(&sem->osq); | |
323 | #endif | |
324 | } | |
5dec94d4 WL |
325 | EXPORT_SYMBOL(__init_rwsem); |
326 | ||
327 | enum rwsem_waiter_type { | |
328 | RWSEM_WAITING_FOR_WRITE, | |
329 | RWSEM_WAITING_FOR_READ | |
330 | }; | |
331 | ||
332 | struct rwsem_waiter { | |
333 | struct list_head list; | |
334 | struct task_struct *task; | |
335 | enum rwsem_waiter_type type; | |
4f23dbc1 | 336 | unsigned long timeout; |
d257cc8c WL |
337 | |
338 | /* Writer only, not initialized in reader */ | |
339 | bool handoff_set; | |
5dec94d4 | 340 | }; |
4f23dbc1 WL |
341 | #define rwsem_first_waiter(sem) \ |
342 | list_first_entry(&sem->wait_list, struct rwsem_waiter, list) | |
5dec94d4 WL |
343 | |
344 | enum rwsem_wake_type { | |
345 | RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ | |
346 | RWSEM_WAKE_READERS, /* Wake readers only */ | |
347 | RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ | |
348 | }; | |
349 | ||
4f23dbc1 WL |
350 | /* |
351 | * The typical HZ value is either 250 or 1000. So set the minimum waiting | |
352 | * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait | |
353 | * queue before initiating the handoff protocol. | |
354 | */ | |
355 | #define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250) | |
356 | ||
d3681e26 WL |
357 | /* |
358 | * Magic number to batch-wakeup waiting readers, even when writers are | |
359 | * also present in the queue. This both limits the amount of work the | |
360 | * waking thread must do and also prevents any potential counter overflow, | |
361 | * however unlikely. | |
362 | */ | |
363 | #define MAX_READERS_WAKEUP 0x100 | |
364 | ||
d257cc8c WL |
365 | static inline void |
366 | rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) | |
367 | { | |
368 | lockdep_assert_held(&sem->wait_lock); | |
369 | list_add_tail(&waiter->list, &sem->wait_list); | |
370 | /* caller will set RWSEM_FLAG_WAITERS */ | |
371 | } | |
372 | ||
373 | /* | |
374 | * Remove a waiter from the wait_list and clear flags. | |
375 | * | |
376 | * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of | |
377 | * this function. Modify with care. | |
378 | */ | |
379 | static inline void | |
380 | rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) | |
381 | { | |
382 | lockdep_assert_held(&sem->wait_lock); | |
383 | list_del(&waiter->list); | |
384 | if (likely(!list_empty(&sem->wait_list))) | |
385 | return; | |
386 | ||
387 | atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); | |
388 | } | |
389 | ||
5dec94d4 WL |
390 | /* |
391 | * handle the lock release when processes blocked on it that can now run | |
392 | * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must | |
393 | * have been set. | |
394 | * - there must be someone on the queue | |
395 | * - the wait_lock must be held by the caller | |
396 | * - tasks are marked for wakeup, the caller must later invoke wake_up_q() | |
397 | * to actually wakeup the blocked task(s) and drop the reference count, | |
398 | * preferably when the wait_lock is released | |
399 | * - woken process blocks are discarded from the list after having task zeroed | |
400 | * - writers are only marked woken if downgrading is false | |
d257cc8c WL |
401 | * |
402 | * Implies rwsem_del_waiter() for all woken readers. | |
5dec94d4 | 403 | */ |
6cef7ff6 WL |
404 | static void rwsem_mark_wake(struct rw_semaphore *sem, |
405 | enum rwsem_wake_type wake_type, | |
406 | struct wake_q_head *wake_q) | |
5dec94d4 WL |
407 | { |
408 | struct rwsem_waiter *waiter, *tmp; | |
409 | long oldcount, woken = 0, adjustment = 0; | |
410 | struct list_head wlist; | |
411 | ||
4f23dbc1 WL |
412 | lockdep_assert_held(&sem->wait_lock); |
413 | ||
5dec94d4 WL |
414 | /* |
415 | * Take a peek at the queue head waiter such that we can determine | |
416 | * the wakeup(s) to perform. | |
417 | */ | |
4f23dbc1 | 418 | waiter = rwsem_first_waiter(sem); |
5dec94d4 WL |
419 | |
420 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | |
421 | if (wake_type == RWSEM_WAKE_ANY) { | |
422 | /* | |
423 | * Mark writer at the front of the queue for wakeup. | |
424 | * Until the task is actually later awoken later by | |
425 | * the caller, other writers are able to steal it. | |
426 | * Readers, on the other hand, will block as they | |
427 | * will notice the queued writer. | |
428 | */ | |
429 | wake_q_add(wake_q, waiter->task); | |
430 | lockevent_inc(rwsem_wake_writer); | |
431 | } | |
432 | ||
433 | return; | |
434 | } | |
435 | ||
a15ea1a3 WL |
436 | /* |
437 | * No reader wakeup if there are too many of them already. | |
438 | */ | |
439 | if (unlikely(atomic_long_read(&sem->count) < 0)) | |
440 | return; | |
441 | ||
5dec94d4 WL |
442 | /* |
443 | * Writers might steal the lock before we grant it to the next reader. | |
444 | * We prefer to do the first reader grant before counting readers | |
445 | * so we can bail out early if a writer stole the lock. | |
446 | */ | |
447 | if (wake_type != RWSEM_WAKE_READ_OWNED) { | |
5cfd92e1 WL |
448 | struct task_struct *owner; |
449 | ||
5dec94d4 WL |
450 | adjustment = RWSEM_READER_BIAS; |
451 | oldcount = atomic_long_fetch_add(adjustment, &sem->count); | |
452 | if (unlikely(oldcount & RWSEM_WRITER_MASK)) { | |
4f23dbc1 WL |
453 | /* |
454 | * When we've been waiting "too" long (for writers | |
455 | * to give up the lock), request a HANDOFF to | |
456 | * force the issue. | |
457 | */ | |
458 | if (!(oldcount & RWSEM_FLAG_HANDOFF) && | |
459 | time_after(jiffies, waiter->timeout)) { | |
460 | adjustment -= RWSEM_FLAG_HANDOFF; | |
461 | lockevent_inc(rwsem_rlock_handoff); | |
462 | } | |
463 | ||
464 | atomic_long_add(-adjustment, &sem->count); | |
5dec94d4 WL |
465 | return; |
466 | } | |
467 | /* | |
468 | * Set it to reader-owned to give spinners an early | |
469 | * indication that readers now have the lock. | |
5cfd92e1 WL |
470 | * The reader nonspinnable bit seen at slowpath entry of |
471 | * the reader is copied over. | |
5dec94d4 | 472 | */ |
5cfd92e1 | 473 | owner = waiter->task; |
5cfd92e1 | 474 | __rwsem_set_reader_owned(sem, owner); |
5dec94d4 WL |
475 | } |
476 | ||
477 | /* | |
d3681e26 WL |
478 | * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the |
479 | * queue. We know that the woken will be at least 1 as we accounted | |
5dec94d4 WL |
480 | * for above. Note we increment the 'active part' of the count by the |
481 | * number of readers before waking any processes up. | |
482 | * | |
d3681e26 WL |
483 | * This is an adaptation of the phase-fair R/W locks where at the |
484 | * reader phase (first waiter is a reader), all readers are eligible | |
485 | * to acquire the lock at the same time irrespective of their order | |
486 | * in the queue. The writers acquire the lock according to their | |
487 | * order in the queue. | |
488 | * | |
5dec94d4 WL |
489 | * We have to do wakeup in 2 passes to prevent the possibility that |
490 | * the reader count may be decremented before it is incremented. It | |
491 | * is because the to-be-woken waiter may not have slept yet. So it | |
492 | * may see waiter->task got cleared, finish its critical section and | |
493 | * do an unlock before the reader count increment. | |
494 | * | |
495 | * 1) Collect the read-waiters in a separate list, count them and | |
496 | * fully increment the reader count in rwsem. | |
497 | * 2) For each waiters in the new list, clear waiter->task and | |
498 | * put them into wake_q to be woken up later. | |
499 | */ | |
d3681e26 WL |
500 | INIT_LIST_HEAD(&wlist); |
501 | list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { | |
5dec94d4 | 502 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) |
d3681e26 | 503 | continue; |
5dec94d4 WL |
504 | |
505 | woken++; | |
d3681e26 WL |
506 | list_move_tail(&waiter->list, &wlist); |
507 | ||
508 | /* | |
509 | * Limit # of readers that can be woken up per wakeup call. | |
510 | */ | |
5197fcd0 | 511 | if (unlikely(woken >= MAX_READERS_WAKEUP)) |
d3681e26 | 512 | break; |
5dec94d4 | 513 | } |
5dec94d4 WL |
514 | |
515 | adjustment = woken * RWSEM_READER_BIAS - adjustment; | |
516 | lockevent_cond_inc(rwsem_wake_reader, woken); | |
d257cc8c WL |
517 | |
518 | oldcount = atomic_long_read(&sem->count); | |
5dec94d4 | 519 | if (list_empty(&sem->wait_list)) { |
d257cc8c WL |
520 | /* |
521 | * Combined with list_move_tail() above, this implies | |
522 | * rwsem_del_waiter(). | |
523 | */ | |
5dec94d4 | 524 | adjustment -= RWSEM_FLAG_WAITERS; |
d257cc8c WL |
525 | if (oldcount & RWSEM_FLAG_HANDOFF) |
526 | adjustment -= RWSEM_FLAG_HANDOFF; | |
527 | } else if (woken) { | |
528 | /* | |
529 | * When we've woken a reader, we no longer need to force | |
530 | * writers to give up the lock and we can clear HANDOFF. | |
531 | */ | |
532 | if (oldcount & RWSEM_FLAG_HANDOFF) | |
533 | adjustment -= RWSEM_FLAG_HANDOFF; | |
5dec94d4 WL |
534 | } |
535 | ||
536 | if (adjustment) | |
537 | atomic_long_add(adjustment, &sem->count); | |
538 | ||
539 | /* 2nd pass */ | |
540 | list_for_each_entry_safe(waiter, tmp, &wlist, list) { | |
541 | struct task_struct *tsk; | |
542 | ||
543 | tsk = waiter->task; | |
544 | get_task_struct(tsk); | |
545 | ||
546 | /* | |
547 | * Ensure calling get_task_struct() before setting the reader | |
6cef7ff6 | 548 | * waiter to nil such that rwsem_down_read_slowpath() cannot |
5dec94d4 WL |
549 | * race with do_exit() by always holding a reference count |
550 | * to the task to wakeup. | |
551 | */ | |
552 | smp_store_release(&waiter->task, NULL); | |
553 | /* | |
554 | * Ensure issuing the wakeup (either by us or someone else) | |
555 | * after setting the reader waiter to nil. | |
556 | */ | |
557 | wake_q_add_safe(wake_q, tsk); | |
558 | } | |
559 | } | |
560 | ||
561 | /* | |
562 | * This function must be called with the sem->wait_lock held to prevent | |
563 | * race conditions between checking the rwsem wait list and setting the | |
564 | * sem->count accordingly. | |
4f23dbc1 | 565 | * |
d257cc8c | 566 | * Implies rwsem_del_waiter() on success. |
5dec94d4 | 567 | */ |
00f3c5a3 | 568 | static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, |
d257cc8c | 569 | struct rwsem_waiter *waiter) |
5dec94d4 | 570 | { |
d257cc8c | 571 | bool first = rwsem_first_waiter(sem) == waiter; |
00f3c5a3 | 572 | long count, new; |
5dec94d4 | 573 | |
4f23dbc1 | 574 | lockdep_assert_held(&sem->wait_lock); |
5dec94d4 | 575 | |
00f3c5a3 | 576 | count = atomic_long_read(&sem->count); |
4f23dbc1 WL |
577 | do { |
578 | bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); | |
5dec94d4 | 579 | |
d257cc8c WL |
580 | if (has_handoff) { |
581 | if (!first) | |
582 | return false; | |
583 | ||
584 | /* First waiter inherits a previously set handoff bit */ | |
585 | waiter->handoff_set = true; | |
586 | } | |
5dec94d4 | 587 | |
4f23dbc1 WL |
588 | new = count; |
589 | ||
590 | if (count & RWSEM_LOCK_MASK) { | |
d257cc8c WL |
591 | if (has_handoff || (!rt_task(waiter->task) && |
592 | !time_after(jiffies, waiter->timeout))) | |
4f23dbc1 WL |
593 | return false; |
594 | ||
595 | new |= RWSEM_FLAG_HANDOFF; | |
596 | } else { | |
597 | new |= RWSEM_WRITER_LOCKED; | |
598 | new &= ~RWSEM_FLAG_HANDOFF; | |
599 | ||
600 | if (list_is_singular(&sem->wait_list)) | |
601 | new &= ~RWSEM_FLAG_WAITERS; | |
602 | } | |
603 | } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new)); | |
604 | ||
605 | /* | |
606 | * We have either acquired the lock with handoff bit cleared or | |
607 | * set the handoff bit. | |
608 | */ | |
d257cc8c WL |
609 | if (new & RWSEM_FLAG_HANDOFF) { |
610 | waiter->handoff_set = true; | |
611 | lockevent_inc(rwsem_wlock_handoff); | |
4f23dbc1 | 612 | return false; |
d257cc8c | 613 | } |
4f23dbc1 | 614 | |
d257cc8c WL |
615 | /* |
616 | * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on | |
617 | * success. | |
618 | */ | |
619 | list_del(&waiter->list); | |
4f23dbc1 WL |
620 | rwsem_set_owner(sem); |
621 | return true; | |
5dec94d4 WL |
622 | } |
623 | ||
7cdacc5f YX |
624 | /* |
625 | * The rwsem_spin_on_owner() function returns the following 4 values | |
626 | * depending on the lock owner state. | |
627 | * OWNER_NULL : owner is currently NULL | |
628 | * OWNER_WRITER: when owner changes and is a writer | |
629 | * OWNER_READER: when owner changes and the new owner may be a reader. | |
630 | * OWNER_NONSPINNABLE: | |
631 | * when optimistic spinning has to stop because either the | |
632 | * owner stops running, is unknown, or its timeslice has | |
633 | * been used up. | |
634 | */ | |
635 | enum owner_state { | |
636 | OWNER_NULL = 1 << 0, | |
637 | OWNER_WRITER = 1 << 1, | |
638 | OWNER_READER = 1 << 2, | |
639 | OWNER_NONSPINNABLE = 1 << 3, | |
640 | }; | |
641 | ||
5dec94d4 WL |
642 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
643 | /* | |
644 | * Try to acquire write lock before the writer has been put on wait queue. | |
645 | */ | |
646 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | |
647 | { | |
648 | long count = atomic_long_read(&sem->count); | |
649 | ||
4f23dbc1 | 650 | while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) { |
5dec94d4 | 651 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, |
4f23dbc1 | 652 | count | RWSEM_WRITER_LOCKED)) { |
5dec94d4 | 653 | rwsem_set_owner(sem); |
617f3ef9 | 654 | lockevent_inc(rwsem_opt_lock); |
5dec94d4 WL |
655 | return true; |
656 | } | |
657 | } | |
658 | return false; | |
659 | } | |
660 | ||
617f3ef9 | 661 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
5dec94d4 WL |
662 | { |
663 | struct task_struct *owner; | |
94a9717b | 664 | unsigned long flags; |
5dec94d4 WL |
665 | bool ret = true; |
666 | ||
cf69482d WL |
667 | if (need_resched()) { |
668 | lockevent_inc(rwsem_opt_fail); | |
5dec94d4 | 669 | return false; |
cf69482d | 670 | } |
5dec94d4 | 671 | |
cf69482d | 672 | preempt_disable(); |
6c2787f2 YX |
673 | /* |
674 | * Disable preemption is equal to the RCU read-side crital section, | |
675 | * thus the task_strcut structure won't go away. | |
676 | */ | |
94a9717b | 677 | owner = rwsem_owner_flags(sem, &flags); |
78134300 WL |
678 | /* |
679 | * Don't check the read-owner as the entry may be stale. | |
680 | */ | |
617f3ef9 | 681 | if ((flags & RWSEM_NONSPINNABLE) || |
78134300 | 682 | (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner))) |
94a9717b | 683 | ret = false; |
cf69482d WL |
684 | preempt_enable(); |
685 | ||
686 | lockevent_cond_inc(rwsem_opt_fail, !ret); | |
5dec94d4 WL |
687 | return ret; |
688 | } | |
689 | ||
7d43f1ce | 690 | #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER) |
3f6d517a | 691 | |
94a9717b | 692 | static inline enum owner_state |
617f3ef9 | 693 | rwsem_owner_state(struct task_struct *owner, unsigned long flags) |
5dec94d4 | 694 | { |
617f3ef9 | 695 | if (flags & RWSEM_NONSPINNABLE) |
3f6d517a WL |
696 | return OWNER_NONSPINNABLE; |
697 | ||
94a9717b | 698 | if (flags & RWSEM_READER_OWNED) |
3f6d517a WL |
699 | return OWNER_READER; |
700 | ||
94a9717b | 701 | return owner ? OWNER_WRITER : OWNER_NULL; |
3f6d517a WL |
702 | } |
703 | ||
7d43f1ce | 704 | static noinline enum owner_state |
617f3ef9 | 705 | rwsem_spin_on_owner(struct rw_semaphore *sem) |
3f6d517a | 706 | { |
94a9717b WL |
707 | struct task_struct *new, *owner; |
708 | unsigned long flags, new_flags; | |
709 | enum owner_state state; | |
3f6d517a | 710 | |
6c2787f2 YX |
711 | lockdep_assert_preemption_disabled(); |
712 | ||
94a9717b | 713 | owner = rwsem_owner_flags(sem, &flags); |
617f3ef9 | 714 | state = rwsem_owner_state(owner, flags); |
3f6d517a WL |
715 | if (state != OWNER_WRITER) |
716 | return state; | |
5dec94d4 | 717 | |
3f6d517a | 718 | for (;;) { |
91d2a812 WL |
719 | /* |
720 | * When a waiting writer set the handoff flag, it may spin | |
721 | * on the owner as well. Once that writer acquires the lock, | |
722 | * we can spin on it. So we don't need to quit even when the | |
723 | * handoff bit is set. | |
724 | */ | |
94a9717b WL |
725 | new = rwsem_owner_flags(sem, &new_flags); |
726 | if ((new != owner) || (new_flags != flags)) { | |
617f3ef9 | 727 | state = rwsem_owner_state(new, new_flags); |
3f6d517a WL |
728 | break; |
729 | } | |
730 | ||
5dec94d4 WL |
731 | /* |
732 | * Ensure we emit the owner->on_cpu, dereference _after_ | |
733 | * checking sem->owner still matches owner, if that fails, | |
734 | * owner might point to free()d memory, if it still matches, | |
6c2787f2 YX |
735 | * our spinning context already disabled preemption which is |
736 | * equal to RCU read-side crital section ensures the memory | |
737 | * stays valid. | |
5dec94d4 WL |
738 | */ |
739 | barrier(); | |
740 | ||
5dec94d4 | 741 | if (need_resched() || !owner_on_cpu(owner)) { |
3f6d517a WL |
742 | state = OWNER_NONSPINNABLE; |
743 | break; | |
5dec94d4 WL |
744 | } |
745 | ||
746 | cpu_relax(); | |
747 | } | |
5dec94d4 | 748 | |
3f6d517a | 749 | return state; |
5dec94d4 WL |
750 | } |
751 | ||
7d43f1ce WL |
752 | /* |
753 | * Calculate reader-owned rwsem spinning threshold for writer | |
754 | * | |
755 | * The more readers own the rwsem, the longer it will take for them to | |
756 | * wind down and free the rwsem. So the empirical formula used to | |
757 | * determine the actual spinning time limit here is: | |
758 | * | |
759 | * Spinning threshold = (10 + nr_readers/2)us | |
760 | * | |
761 | * The limit is capped to a maximum of 25us (30 readers). This is just | |
762 | * a heuristic and is subjected to change in the future. | |
763 | */ | |
764 | static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem) | |
765 | { | |
766 | long count = atomic_long_read(&sem->count); | |
767 | int readers = count >> RWSEM_READER_SHIFT; | |
768 | u64 delta; | |
769 | ||
770 | if (readers > 30) | |
771 | readers = 30; | |
772 | delta = (20 + readers) * NSEC_PER_USEC / 2; | |
773 | ||
774 | return sched_clock() + delta; | |
775 | } | |
776 | ||
617f3ef9 | 777 | static bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
5dec94d4 WL |
778 | { |
779 | bool taken = false; | |
990fa738 | 780 | int prev_owner_state = OWNER_NULL; |
7d43f1ce WL |
781 | int loop = 0; |
782 | u64 rspin_threshold = 0; | |
5dec94d4 WL |
783 | |
784 | preempt_disable(); | |
785 | ||
786 | /* sem->wait_lock should not be held when doing optimistic spinning */ | |
5dec94d4 WL |
787 | if (!osq_lock(&sem->osq)) |
788 | goto done; | |
789 | ||
790 | /* | |
791 | * Optimistically spin on the owner field and attempt to acquire the | |
792 | * lock whenever the owner changes. Spinning will be stopped when: | |
793 | * 1) the owning writer isn't running; or | |
7d43f1ce | 794 | * 2) readers own the lock and spinning time has exceeded limit. |
5dec94d4 | 795 | */ |
990fa738 | 796 | for (;;) { |
7d43f1ce | 797 | enum owner_state owner_state; |
990fa738 | 798 | |
617f3ef9 | 799 | owner_state = rwsem_spin_on_owner(sem); |
990fa738 WL |
800 | if (!(owner_state & OWNER_SPINNABLE)) |
801 | break; | |
802 | ||
5dec94d4 WL |
803 | /* |
804 | * Try to acquire the lock | |
805 | */ | |
617f3ef9 | 806 | taken = rwsem_try_write_lock_unqueued(sem); |
cf69482d WL |
807 | |
808 | if (taken) | |
5dec94d4 | 809 | break; |
5dec94d4 | 810 | |
7d43f1ce WL |
811 | /* |
812 | * Time-based reader-owned rwsem optimistic spinning | |
813 | */ | |
617f3ef9 | 814 | if (owner_state == OWNER_READER) { |
7d43f1ce WL |
815 | /* |
816 | * Re-initialize rspin_threshold every time when | |
817 | * the owner state changes from non-reader to reader. | |
818 | * This allows a writer to steal the lock in between | |
819 | * 2 reader phases and have the threshold reset at | |
820 | * the beginning of the 2nd reader phase. | |
821 | */ | |
822 | if (prev_owner_state != OWNER_READER) { | |
617f3ef9 | 823 | if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)) |
7d43f1ce WL |
824 | break; |
825 | rspin_threshold = rwsem_rspin_threshold(sem); | |
826 | loop = 0; | |
827 | } | |
828 | ||
829 | /* | |
830 | * Check time threshold once every 16 iterations to | |
831 | * avoid calling sched_clock() too frequently so | |
832 | * as to reduce the average latency between the times | |
833 | * when the lock becomes free and when the spinner | |
834 | * is ready to do a trylock. | |
835 | */ | |
836 | else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) { | |
837 | rwsem_set_nonspinnable(sem); | |
838 | lockevent_inc(rwsem_opt_nospin); | |
839 | break; | |
840 | } | |
841 | } | |
842 | ||
5dec94d4 | 843 | /* |
990fa738 WL |
844 | * An RT task cannot do optimistic spinning if it cannot |
845 | * be sure the lock holder is running or live-lock may | |
846 | * happen if the current task and the lock holder happen | |
847 | * to run in the same CPU. However, aborting optimistic | |
848 | * spinning while a NULL owner is detected may miss some | |
849 | * opportunity where spinning can continue without causing | |
850 | * problem. | |
851 | * | |
852 | * There are 2 possible cases where an RT task may be able | |
853 | * to continue spinning. | |
854 | * | |
855 | * 1) The lock owner is in the process of releasing the | |
856 | * lock, sem->owner is cleared but the lock has not | |
857 | * been released yet. | |
858 | * 2) The lock was free and owner cleared, but another | |
859 | * task just comes in and acquire the lock before | |
860 | * we try to get it. The new owner may be a spinnable | |
861 | * writer. | |
862 | * | |
e2db7592 | 863 | * To take advantage of two scenarios listed above, the RT |
990fa738 WL |
864 | * task is made to retry one more time to see if it can |
865 | * acquire the lock or continue spinning on the new owning | |
866 | * writer. Of course, if the time lag is long enough or the | |
867 | * new owner is not a writer or spinnable, the RT task will | |
868 | * quit spinning. | |
869 | * | |
870 | * If the owner is a writer, the need_resched() check is | |
871 | * done inside rwsem_spin_on_owner(). If the owner is not | |
872 | * a writer, need_resched() check needs to be done here. | |
5dec94d4 | 873 | */ |
990fa738 WL |
874 | if (owner_state != OWNER_WRITER) { |
875 | if (need_resched()) | |
876 | break; | |
877 | if (rt_task(current) && | |
878 | (prev_owner_state != OWNER_WRITER)) | |
879 | break; | |
880 | } | |
881 | prev_owner_state = owner_state; | |
5dec94d4 WL |
882 | |
883 | /* | |
884 | * The cpu_relax() call is a compiler barrier which forces | |
885 | * everything in this loop to be re-loaded. We don't need | |
886 | * memory barriers as we'll eventually observe the right | |
887 | * values at the cost of a few extra spins. | |
888 | */ | |
889 | cpu_relax(); | |
890 | } | |
891 | osq_unlock(&sem->osq); | |
892 | done: | |
893 | preempt_enable(); | |
894 | lockevent_cond_inc(rwsem_opt_fail, !taken); | |
895 | return taken; | |
896 | } | |
7d43f1ce WL |
897 | |
898 | /* | |
617f3ef9 | 899 | * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should |
7d43f1ce | 900 | * only be called when the reader count reaches 0. |
5cfd92e1 | 901 | */ |
617f3ef9 | 902 | static inline void clear_nonspinnable(struct rw_semaphore *sem) |
5cfd92e1 | 903 | { |
617f3ef9 WL |
904 | if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)) |
905 | atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner); | |
1a728dff WL |
906 | } |
907 | ||
5dec94d4 | 908 | #else |
617f3ef9 | 909 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
cf69482d WL |
910 | { |
911 | return false; | |
912 | } | |
913 | ||
617f3ef9 | 914 | static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
5dec94d4 WL |
915 | { |
916 | return false; | |
917 | } | |
7d43f1ce | 918 | |
617f3ef9 | 919 | static inline void clear_nonspinnable(struct rw_semaphore *sem) { } |
1a728dff | 920 | |
7cdacc5f | 921 | static inline enum owner_state |
617f3ef9 | 922 | rwsem_spin_on_owner(struct rw_semaphore *sem) |
91d2a812 | 923 | { |
7cdacc5f | 924 | return OWNER_NONSPINNABLE; |
91d2a812 | 925 | } |
5dec94d4 WL |
926 | #endif |
927 | ||
928 | /* | |
929 | * Wait for the read lock to be granted | |
930 | */ | |
6cef7ff6 | 931 | static struct rw_semaphore __sched * |
2f064a59 | 932 | rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state) |
5dec94d4 | 933 | { |
617f3ef9 | 934 | long adjustment = -RWSEM_READER_BIAS; |
2f06f702 | 935 | long rcnt = (count >> RWSEM_READER_SHIFT); |
5dec94d4 WL |
936 | struct rwsem_waiter waiter; |
937 | DEFINE_WAKE_Q(wake_q); | |
a15ea1a3 | 938 | bool wake = false; |
5dec94d4 | 939 | |
2f06f702 WL |
940 | /* |
941 | * To prevent a constant stream of readers from starving a sleeping | |
617f3ef9 WL |
942 | * waiter, don't attempt optimistic lock stealing if the lock is |
943 | * currently owned by readers. | |
2f06f702 | 944 | */ |
617f3ef9 WL |
945 | if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) && |
946 | (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED)) | |
2f06f702 WL |
947 | goto queue; |
948 | ||
1a728dff | 949 | /* |
617f3ef9 | 950 | * Reader optimistic lock stealing. |
1a728dff | 951 | */ |
617f3ef9 | 952 | if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) { |
1a728dff WL |
953 | rwsem_set_reader_owned(sem); |
954 | lockevent_inc(rwsem_rlock_steal); | |
1a728dff | 955 | |
cf69482d | 956 | /* |
617f3ef9 WL |
957 | * Wake up other readers in the wait queue if it is |
958 | * the first reader. | |
cf69482d | 959 | */ |
617f3ef9 | 960 | if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) { |
cf69482d WL |
961 | raw_spin_lock_irq(&sem->wait_lock); |
962 | if (!list_empty(&sem->wait_list)) | |
963 | rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, | |
964 | &wake_q); | |
965 | raw_spin_unlock_irq(&sem->wait_lock); | |
966 | wake_up_q(&wake_q); | |
967 | } | |
968 | return sem; | |
969 | } | |
970 | ||
971 | queue: | |
5dec94d4 WL |
972 | waiter.task = current; |
973 | waiter.type = RWSEM_WAITING_FOR_READ; | |
4f23dbc1 | 974 | waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; |
5dec94d4 WL |
975 | |
976 | raw_spin_lock_irq(&sem->wait_lock); | |
977 | if (list_empty(&sem->wait_list)) { | |
978 | /* | |
979 | * In case the wait queue is empty and the lock isn't owned | |
f9e21aa9 WL |
980 | * by a writer, this reader can exit the slowpath and return |
981 | * immediately as its RWSEM_READER_BIAS has already been set | |
982 | * in the count. | |
5dec94d4 | 983 | */ |
f9e21aa9 | 984 | if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) { |
e1b98fa3 JS |
985 | /* Provide lock ACQUIRE */ |
986 | smp_acquire__after_ctrl_dep(); | |
5dec94d4 WL |
987 | raw_spin_unlock_irq(&sem->wait_lock); |
988 | rwsem_set_reader_owned(sem); | |
989 | lockevent_inc(rwsem_rlock_fast); | |
990 | return sem; | |
991 | } | |
992 | adjustment += RWSEM_FLAG_WAITERS; | |
993 | } | |
d257cc8c | 994 | rwsem_add_waiter(sem, &waiter); |
5dec94d4 WL |
995 | |
996 | /* we're now waiting on the lock, but no longer actively locking */ | |
617f3ef9 | 997 | count = atomic_long_add_return(adjustment, &sem->count); |
5dec94d4 WL |
998 | |
999 | /* | |
1000 | * If there are no active locks, wake the front queued process(es). | |
1001 | * | |
1002 | * If there are no writers and we are first in the queue, | |
1003 | * wake our own waiter to join the existing active readers ! | |
1004 | */ | |
7d43f1ce | 1005 | if (!(count & RWSEM_LOCK_MASK)) { |
617f3ef9 | 1006 | clear_nonspinnable(sem); |
7d43f1ce WL |
1007 | wake = true; |
1008 | } | |
1009 | if (wake || (!(count & RWSEM_WRITER_MASK) && | |
1010 | (adjustment & RWSEM_FLAG_WAITERS))) | |
6cef7ff6 | 1011 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
5dec94d4 WL |
1012 | |
1013 | raw_spin_unlock_irq(&sem->wait_lock); | |
1014 | wake_up_q(&wake_q); | |
1015 | ||
1016 | /* wait to be given the lock */ | |
6ffddfb9 | 1017 | for (;;) { |
5dec94d4 | 1018 | set_current_state(state); |
99143f82 | 1019 | if (!smp_load_acquire(&waiter.task)) { |
6ffddfb9 | 1020 | /* Matches rwsem_mark_wake()'s smp_store_release(). */ |
5dec94d4 | 1021 | break; |
99143f82 | 1022 | } |
5dec94d4 WL |
1023 | if (signal_pending_state(state, current)) { |
1024 | raw_spin_lock_irq(&sem->wait_lock); | |
1025 | if (waiter.task) | |
1026 | goto out_nolock; | |
1027 | raw_spin_unlock_irq(&sem->wait_lock); | |
6ffddfb9 | 1028 | /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ |
5dec94d4 WL |
1029 | break; |
1030 | } | |
1031 | schedule(); | |
1032 | lockevent_inc(rwsem_sleep_reader); | |
1033 | } | |
1034 | ||
1035 | __set_current_state(TASK_RUNNING); | |
1036 | lockevent_inc(rwsem_rlock); | |
1037 | return sem; | |
6ffddfb9 | 1038 | |
5dec94d4 | 1039 | out_nolock: |
d257cc8c | 1040 | rwsem_del_waiter(sem, &waiter); |
5dec94d4 WL |
1041 | raw_spin_unlock_irq(&sem->wait_lock); |
1042 | __set_current_state(TASK_RUNNING); | |
1043 | lockevent_inc(rwsem_rlock_fail); | |
1044 | return ERR_PTR(-EINTR); | |
1045 | } | |
1046 | ||
5dec94d4 WL |
1047 | /* |
1048 | * Wait until we successfully acquire the write lock | |
1049 | */ | |
c441e934 | 1050 | static struct rw_semaphore __sched * |
6cef7ff6 | 1051 | rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) |
5dec94d4 WL |
1052 | { |
1053 | long count; | |
5dec94d4 | 1054 | struct rwsem_waiter waiter; |
5dec94d4 WL |
1055 | DEFINE_WAKE_Q(wake_q); |
1056 | ||
1057 | /* do optimistic spinning and steal lock if possible */ | |
617f3ef9 | 1058 | if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) { |
6ffddfb9 | 1059 | /* rwsem_optimistic_spin() implies ACQUIRE on success */ |
5dec94d4 | 1060 | return sem; |
6ffddfb9 | 1061 | } |
5dec94d4 WL |
1062 | |
1063 | /* | |
1064 | * Optimistic spinning failed, proceed to the slowpath | |
1065 | * and block until we can acquire the sem. | |
1066 | */ | |
1067 | waiter.task = current; | |
1068 | waiter.type = RWSEM_WAITING_FOR_WRITE; | |
4f23dbc1 | 1069 | waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; |
d257cc8c | 1070 | waiter.handoff_set = false; |
5dec94d4 WL |
1071 | |
1072 | raw_spin_lock_irq(&sem->wait_lock); | |
d257cc8c | 1073 | rwsem_add_waiter(sem, &waiter); |
5dec94d4 WL |
1074 | |
1075 | /* we're now waiting on the lock */ | |
d257cc8c | 1076 | if (rwsem_first_waiter(sem) != &waiter) { |
5dec94d4 WL |
1077 | count = atomic_long_read(&sem->count); |
1078 | ||
1079 | /* | |
4f23dbc1 | 1080 | * If there were already threads queued before us and: |
c034f48e | 1081 | * 1) there are no active locks, wake the front |
4f23dbc1 WL |
1082 | * queued process(es) as the handoff bit might be set. |
1083 | * 2) there are no active writers and some readers, the lock | |
1084 | * must be read owned; so we try to wake any read lock | |
1085 | * waiters that were queued ahead of us. | |
5dec94d4 | 1086 | */ |
4f23dbc1 WL |
1087 | if (count & RWSEM_WRITER_MASK) |
1088 | goto wait; | |
5dec94d4 | 1089 | |
4f23dbc1 WL |
1090 | rwsem_mark_wake(sem, (count & RWSEM_READER_MASK) |
1091 | ? RWSEM_WAKE_READERS | |
1092 | : RWSEM_WAKE_ANY, &wake_q); | |
5dec94d4 | 1093 | |
00f3c5a3 WL |
1094 | if (!wake_q_empty(&wake_q)) { |
1095 | /* | |
1096 | * We want to minimize wait_lock hold time especially | |
1097 | * when a large number of readers are to be woken up. | |
1098 | */ | |
1099 | raw_spin_unlock_irq(&sem->wait_lock); | |
1100 | wake_up_q(&wake_q); | |
1101 | wake_q_init(&wake_q); /* Used again, reinit */ | |
1102 | raw_spin_lock_irq(&sem->wait_lock); | |
1103 | } | |
5dec94d4 | 1104 | } else { |
00f3c5a3 | 1105 | atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); |
5dec94d4 WL |
1106 | } |
1107 | ||
4f23dbc1 | 1108 | wait: |
5dec94d4 WL |
1109 | /* wait until we successfully acquire the lock */ |
1110 | set_current_state(state); | |
6ffddfb9 | 1111 | for (;;) { |
d257cc8c | 1112 | if (rwsem_try_write_lock(sem, &waiter)) { |
6ffddfb9 | 1113 | /* rwsem_try_write_lock() implies ACQUIRE on success */ |
5dec94d4 | 1114 | break; |
6ffddfb9 | 1115 | } |
4f23dbc1 | 1116 | |
5dec94d4 WL |
1117 | raw_spin_unlock_irq(&sem->wait_lock); |
1118 | ||
d257cc8c WL |
1119 | if (signal_pending_state(state, current)) |
1120 | goto out_nolock; | |
1121 | ||
91d2a812 WL |
1122 | /* |
1123 | * After setting the handoff bit and failing to acquire | |
1124 | * the lock, attempt to spin on owner to accelerate lock | |
1125 | * transfer. If the previous owner is a on-cpu writer and it | |
1126 | * has just released the lock, OWNER_NULL will be returned. | |
1127 | * In this case, we attempt to acquire the lock again | |
1128 | * without sleeping. | |
1129 | */ | |
d257cc8c | 1130 | if (waiter.handoff_set) { |
7cdacc5f YX |
1131 | enum owner_state owner_state; |
1132 | ||
1133 | preempt_disable(); | |
1134 | owner_state = rwsem_spin_on_owner(sem); | |
1135 | preempt_enable(); | |
1136 | ||
1137 | if (owner_state == OWNER_NULL) | |
1138 | goto trylock_again; | |
1139 | } | |
91d2a812 | 1140 | |
d257cc8c WL |
1141 | schedule(); |
1142 | lockevent_inc(rwsem_sleep_writer); | |
1143 | set_current_state(state); | |
91d2a812 | 1144 | trylock_again: |
5dec94d4 WL |
1145 | raw_spin_lock_irq(&sem->wait_lock); |
1146 | } | |
1147 | __set_current_state(TASK_RUNNING); | |
5dec94d4 WL |
1148 | raw_spin_unlock_irq(&sem->wait_lock); |
1149 | lockevent_inc(rwsem_wlock); | |
d257cc8c | 1150 | return sem; |
5dec94d4 WL |
1151 | |
1152 | out_nolock: | |
1153 | __set_current_state(TASK_RUNNING); | |
1154 | raw_spin_lock_irq(&sem->wait_lock); | |
d257cc8c WL |
1155 | rwsem_del_waiter(sem, &waiter); |
1156 | if (!list_empty(&sem->wait_list)) | |
6cef7ff6 | 1157 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
5dec94d4 WL |
1158 | raw_spin_unlock_irq(&sem->wait_lock); |
1159 | wake_up_q(&wake_q); | |
1160 | lockevent_inc(rwsem_wlock_fail); | |
5dec94d4 WL |
1161 | return ERR_PTR(-EINTR); |
1162 | } | |
1163 | ||
5dec94d4 WL |
1164 | /* |
1165 | * handle waking up a waiter on the semaphore | |
1166 | * - up_read/up_write has decremented the active part of count if we come here | |
1167 | */ | |
d4e5076c | 1168 | static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
5dec94d4 WL |
1169 | { |
1170 | unsigned long flags; | |
1171 | DEFINE_WAKE_Q(wake_q); | |
1172 | ||
1173 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
1174 | ||
1175 | if (!list_empty(&sem->wait_list)) | |
6cef7ff6 | 1176 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
5dec94d4 WL |
1177 | |
1178 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
1179 | wake_up_q(&wake_q); | |
1180 | ||
1181 | return sem; | |
1182 | } | |
5dec94d4 WL |
1183 | |
1184 | /* | |
1185 | * downgrade a write lock into a read lock | |
1186 | * - caller incremented waiting part of count and discovered it still negative | |
1187 | * - just wake up any readers at the front of the queue | |
1188 | */ | |
6cef7ff6 | 1189 | static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
5dec94d4 WL |
1190 | { |
1191 | unsigned long flags; | |
1192 | DEFINE_WAKE_Q(wake_q); | |
1193 | ||
1194 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
1195 | ||
1196 | if (!list_empty(&sem->wait_list)) | |
6cef7ff6 | 1197 | rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); |
5dec94d4 WL |
1198 | |
1199 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
1200 | wake_up_q(&wake_q); | |
1201 | ||
1202 | return sem; | |
1203 | } | |
5dec94d4 WL |
1204 | |
1205 | /* | |
1206 | * lock for reading | |
1207 | */ | |
c995e638 | 1208 | static inline int __down_read_common(struct rw_semaphore *sem, int state) |
5dec94d4 | 1209 | { |
c8fe8b05 WL |
1210 | long count; |
1211 | ||
1212 | if (!rwsem_read_trylock(sem, &count)) { | |
1213 | if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) | |
c995e638 | 1214 | return -EINTR; |
94a9717b | 1215 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
5dec94d4 | 1216 | } |
c995e638 PZ |
1217 | return 0; |
1218 | } | |
1219 | ||
1220 | static inline void __down_read(struct rw_semaphore *sem) | |
1221 | { | |
1222 | __down_read_common(sem, TASK_UNINTERRUPTIBLE); | |
5dec94d4 WL |
1223 | } |
1224 | ||
31784cff EB |
1225 | static inline int __down_read_interruptible(struct rw_semaphore *sem) |
1226 | { | |
c995e638 | 1227 | return __down_read_common(sem, TASK_INTERRUPTIBLE); |
31784cff EB |
1228 | } |
1229 | ||
5dec94d4 WL |
1230 | static inline int __down_read_killable(struct rw_semaphore *sem) |
1231 | { | |
c995e638 | 1232 | return __down_read_common(sem, TASK_KILLABLE); |
5dec94d4 WL |
1233 | } |
1234 | ||
1235 | static inline int __down_read_trylock(struct rw_semaphore *sem) | |
1236 | { | |
fce45cd4 DB |
1237 | long tmp; |
1238 | ||
1239 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); | |
1240 | ||
14c24048 MS |
1241 | tmp = atomic_long_read(&sem->count); |
1242 | while (!(tmp & RWSEM_READ_FAILED_MASK)) { | |
5dec94d4 | 1243 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, |
14c24048 | 1244 | tmp + RWSEM_READER_BIAS)) { |
5dec94d4 WL |
1245 | rwsem_set_reader_owned(sem); |
1246 | return 1; | |
1247 | } | |
14c24048 | 1248 | } |
5dec94d4 WL |
1249 | return 0; |
1250 | } | |
1251 | ||
1252 | /* | |
1253 | * lock for writing | |
1254 | */ | |
c995e638 | 1255 | static inline int __down_write_common(struct rw_semaphore *sem, int state) |
5dec94d4 | 1256 | { |
285c61ae | 1257 | if (unlikely(!rwsem_write_trylock(sem))) { |
c995e638 | 1258 | if (IS_ERR(rwsem_down_write_slowpath(sem, state))) |
5dec94d4 | 1259 | return -EINTR; |
6cef7ff6 | 1260 | } |
285c61ae | 1261 | |
5dec94d4 WL |
1262 | return 0; |
1263 | } | |
1264 | ||
c995e638 PZ |
1265 | static inline void __down_write(struct rw_semaphore *sem) |
1266 | { | |
1267 | __down_write_common(sem, TASK_UNINTERRUPTIBLE); | |
1268 | } | |
1269 | ||
1270 | static inline int __down_write_killable(struct rw_semaphore *sem) | |
1271 | { | |
1272 | return __down_write_common(sem, TASK_KILLABLE); | |
1273 | } | |
1274 | ||
5dec94d4 WL |
1275 | static inline int __down_write_trylock(struct rw_semaphore *sem) |
1276 | { | |
fce45cd4 | 1277 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
285c61ae | 1278 | return rwsem_write_trylock(sem); |
5dec94d4 WL |
1279 | } |
1280 | ||
1281 | /* | |
1282 | * unlock after reading | |
1283 | */ | |
7f26482a | 1284 | static inline void __up_read(struct rw_semaphore *sem) |
5dec94d4 WL |
1285 | { |
1286 | long tmp; | |
1287 | ||
fce45cd4 | 1288 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
94a9717b | 1289 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
fce45cd4 | 1290 | |
5dec94d4 WL |
1291 | rwsem_clear_reader_owned(sem); |
1292 | tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); | |
a15ea1a3 | 1293 | DEBUG_RWSEMS_WARN_ON(tmp < 0, sem); |
6cef7ff6 | 1294 | if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == |
7d43f1ce | 1295 | RWSEM_FLAG_WAITERS)) { |
617f3ef9 | 1296 | clear_nonspinnable(sem); |
d4e5076c | 1297 | rwsem_wake(sem); |
7d43f1ce | 1298 | } |
5dec94d4 WL |
1299 | } |
1300 | ||
1301 | /* | |
1302 | * unlock after writing | |
1303 | */ | |
7f26482a | 1304 | static inline void __up_write(struct rw_semaphore *sem) |
5dec94d4 | 1305 | { |
6cef7ff6 WL |
1306 | long tmp; |
1307 | ||
fce45cd4 | 1308 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
02f1082b WL |
1309 | /* |
1310 | * sem->owner may differ from current if the ownership is transferred | |
1311 | * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. | |
1312 | */ | |
94a9717b WL |
1313 | DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && |
1314 | !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); | |
fce45cd4 | 1315 | |
5dec94d4 | 1316 | rwsem_clear_owner(sem); |
6cef7ff6 WL |
1317 | tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); |
1318 | if (unlikely(tmp & RWSEM_FLAG_WAITERS)) | |
d4e5076c | 1319 | rwsem_wake(sem); |
5dec94d4 WL |
1320 | } |
1321 | ||
1322 | /* | |
1323 | * downgrade write lock to read lock | |
1324 | */ | |
1325 | static inline void __downgrade_write(struct rw_semaphore *sem) | |
1326 | { | |
1327 | long tmp; | |
1328 | ||
1329 | /* | |
1330 | * When downgrading from exclusive to shared ownership, | |
1331 | * anything inside the write-locked region cannot leak | |
1332 | * into the read side. In contrast, anything in the | |
1333 | * read-locked region is ok to be re-ordered into the | |
1334 | * write side. As such, rely on RELEASE semantics. | |
1335 | */ | |
94a9717b | 1336 | DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem); |
5dec94d4 WL |
1337 | tmp = atomic_long_fetch_add_release( |
1338 | -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count); | |
1339 | rwsem_set_reader_owned(sem); | |
1340 | if (tmp & RWSEM_FLAG_WAITERS) | |
1341 | rwsem_downgrade_wake(sem); | |
1342 | } | |
4fc828e2 | 1343 | |
42254105 TG |
1344 | #else /* !CONFIG_PREEMPT_RT */ |
1345 | ||
e17ba59b | 1346 | #define RT_MUTEX_BUILD_MUTEX |
42254105 TG |
1347 | #include "rtmutex.c" |
1348 | ||
1349 | #define rwbase_set_and_save_current_state(state) \ | |
1350 | set_current_state(state) | |
1351 | ||
1352 | #define rwbase_restore_current_state() \ | |
1353 | __set_current_state(TASK_RUNNING) | |
1354 | ||
1355 | #define rwbase_rtmutex_lock_state(rtm, state) \ | |
1356 | __rt_mutex_lock(rtm, state) | |
1357 | ||
1358 | #define rwbase_rtmutex_slowlock_locked(rtm, state) \ | |
add46132 | 1359 | __rt_mutex_slowlock_locked(rtm, NULL, state) |
42254105 TG |
1360 | |
1361 | #define rwbase_rtmutex_unlock(rtm) \ | |
1362 | __rt_mutex_unlock(rtm) | |
1363 | ||
1364 | #define rwbase_rtmutex_trylock(rtm) \ | |
1365 | __rt_mutex_trylock(rtm) | |
1366 | ||
1367 | #define rwbase_signal_pending_state(state, current) \ | |
1368 | signal_pending_state(state, current) | |
1369 | ||
1370 | #define rwbase_schedule() \ | |
1371 | schedule() | |
1372 | ||
1373 | #include "rwbase_rt.c" | |
1374 | ||
15eb7c88 | 1375 | void __init_rwsem(struct rw_semaphore *sem, const char *name, |
42254105 TG |
1376 | struct lock_class_key *key) |
1377 | { | |
15eb7c88 MG |
1378 | init_rwbase_rt(&(sem)->rwbase); |
1379 | ||
1380 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
42254105 TG |
1381 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
1382 | lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); | |
42254105 | 1383 | #endif |
15eb7c88 MG |
1384 | } |
1385 | EXPORT_SYMBOL(__init_rwsem); | |
42254105 TG |
1386 | |
1387 | static inline void __down_read(struct rw_semaphore *sem) | |
1388 | { | |
1389 | rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); | |
1390 | } | |
1391 | ||
1392 | static inline int __down_read_interruptible(struct rw_semaphore *sem) | |
1393 | { | |
1394 | return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE); | |
1395 | } | |
1396 | ||
1397 | static inline int __down_read_killable(struct rw_semaphore *sem) | |
1398 | { | |
1399 | return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE); | |
1400 | } | |
1401 | ||
1402 | static inline int __down_read_trylock(struct rw_semaphore *sem) | |
1403 | { | |
1404 | return rwbase_read_trylock(&sem->rwbase); | |
1405 | } | |
1406 | ||
1407 | static inline void __up_read(struct rw_semaphore *sem) | |
1408 | { | |
1409 | rwbase_read_unlock(&sem->rwbase, TASK_NORMAL); | |
1410 | } | |
1411 | ||
1412 | static inline void __sched __down_write(struct rw_semaphore *sem) | |
1413 | { | |
1414 | rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); | |
1415 | } | |
1416 | ||
1417 | static inline int __sched __down_write_killable(struct rw_semaphore *sem) | |
1418 | { | |
1419 | return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE); | |
1420 | } | |
1421 | ||
1422 | static inline int __down_write_trylock(struct rw_semaphore *sem) | |
1423 | { | |
1424 | return rwbase_write_trylock(&sem->rwbase); | |
1425 | } | |
1426 | ||
1427 | static inline void __up_write(struct rw_semaphore *sem) | |
1428 | { | |
1429 | rwbase_write_unlock(&sem->rwbase); | |
1430 | } | |
1431 | ||
1432 | static inline void __downgrade_write(struct rw_semaphore *sem) | |
1433 | { | |
1434 | rwbase_write_downgrade(&sem->rwbase); | |
1435 | } | |
1436 | ||
1437 | /* Debug stubs for the common API */ | |
1438 | #define DEBUG_RWSEMS_WARN_ON(c, sem) | |
1439 | ||
1440 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | |
1441 | struct task_struct *owner) | |
1442 | { | |
1443 | } | |
1444 | ||
1445 | static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) | |
1446 | { | |
1447 | int count = atomic_read(&sem->rwbase.readers); | |
1448 | ||
1449 | return count < 0 && count != READER_BIAS; | |
1450 | } | |
1451 | ||
1452 | #endif /* CONFIG_PREEMPT_RT */ | |
1453 | ||
c4e05116 IM |
1454 | /* |
1455 | * lock for reading | |
1456 | */ | |
c7af77b5 | 1457 | void __sched down_read(struct rw_semaphore *sem) |
c4e05116 IM |
1458 | { |
1459 | might_sleep(); | |
1460 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1461 | ||
4fe87745 | 1462 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
c4e05116 | 1463 | } |
c4e05116 IM |
1464 | EXPORT_SYMBOL(down_read); |
1465 | ||
31784cff EB |
1466 | int __sched down_read_interruptible(struct rw_semaphore *sem) |
1467 | { | |
1468 | might_sleep(); | |
1469 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1470 | ||
1471 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) { | |
1472 | rwsem_release(&sem->dep_map, _RET_IP_); | |
1473 | return -EINTR; | |
1474 | } | |
1475 | ||
1476 | return 0; | |
1477 | } | |
1478 | EXPORT_SYMBOL(down_read_interruptible); | |
1479 | ||
76f8507f KT |
1480 | int __sched down_read_killable(struct rw_semaphore *sem) |
1481 | { | |
1482 | might_sleep(); | |
1483 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1484 | ||
1485 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { | |
5facae4f | 1486 | rwsem_release(&sem->dep_map, _RET_IP_); |
76f8507f KT |
1487 | return -EINTR; |
1488 | } | |
1489 | ||
76f8507f KT |
1490 | return 0; |
1491 | } | |
76f8507f KT |
1492 | EXPORT_SYMBOL(down_read_killable); |
1493 | ||
c4e05116 IM |
1494 | /* |
1495 | * trylock for reading -- returns 1 if successful, 0 if contention | |
1496 | */ | |
1497 | int down_read_trylock(struct rw_semaphore *sem) | |
1498 | { | |
1499 | int ret = __down_read_trylock(sem); | |
1500 | ||
c7580c1e | 1501 | if (ret == 1) |
c4e05116 IM |
1502 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); |
1503 | return ret; | |
1504 | } | |
c4e05116 IM |
1505 | EXPORT_SYMBOL(down_read_trylock); |
1506 | ||
1507 | /* | |
1508 | * lock for writing | |
1509 | */ | |
c7af77b5 | 1510 | void __sched down_write(struct rw_semaphore *sem) |
c4e05116 IM |
1511 | { |
1512 | might_sleep(); | |
1513 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | |
4fe87745 | 1514 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
c4e05116 | 1515 | } |
c4e05116 IM |
1516 | EXPORT_SYMBOL(down_write); |
1517 | ||
916633a4 MH |
1518 | /* |
1519 | * lock for writing | |
1520 | */ | |
1521 | int __sched down_write_killable(struct rw_semaphore *sem) | |
1522 | { | |
1523 | might_sleep(); | |
1524 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | |
1525 | ||
6cef7ff6 WL |
1526 | if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, |
1527 | __down_write_killable)) { | |
5facae4f | 1528 | rwsem_release(&sem->dep_map, _RET_IP_); |
916633a4 MH |
1529 | return -EINTR; |
1530 | } | |
1531 | ||
916633a4 MH |
1532 | return 0; |
1533 | } | |
916633a4 MH |
1534 | EXPORT_SYMBOL(down_write_killable); |
1535 | ||
c4e05116 IM |
1536 | /* |
1537 | * trylock for writing -- returns 1 if successful, 0 if contention | |
1538 | */ | |
1539 | int down_write_trylock(struct rw_semaphore *sem) | |
1540 | { | |
1541 | int ret = __down_write_trylock(sem); | |
1542 | ||
c7580c1e | 1543 | if (ret == 1) |
428e6ce0 | 1544 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
4fc828e2 | 1545 | |
c4e05116 IM |
1546 | return ret; |
1547 | } | |
c4e05116 IM |
1548 | EXPORT_SYMBOL(down_write_trylock); |
1549 | ||
1550 | /* | |
1551 | * release a read lock | |
1552 | */ | |
1553 | void up_read(struct rw_semaphore *sem) | |
1554 | { | |
5facae4f | 1555 | rwsem_release(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1556 | __up_read(sem); |
1557 | } | |
c4e05116 IM |
1558 | EXPORT_SYMBOL(up_read); |
1559 | ||
1560 | /* | |
1561 | * release a write lock | |
1562 | */ | |
1563 | void up_write(struct rw_semaphore *sem) | |
1564 | { | |
5facae4f | 1565 | rwsem_release(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1566 | __up_write(sem); |
1567 | } | |
c4e05116 IM |
1568 | EXPORT_SYMBOL(up_write); |
1569 | ||
1570 | /* | |
1571 | * downgrade write lock to read lock | |
1572 | */ | |
1573 | void downgrade_write(struct rw_semaphore *sem) | |
1574 | { | |
6419c4af | 1575 | lock_downgrade(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1576 | __downgrade_write(sem); |
1577 | } | |
c4e05116 | 1578 | EXPORT_SYMBOL(downgrade_write); |
4ea2176d IM |
1579 | |
1580 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
1581 | ||
1582 | void down_read_nested(struct rw_semaphore *sem, int subclass) | |
1583 | { | |
1584 | might_sleep(); | |
1585 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | |
4fe87745 | 1586 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
4ea2176d | 1587 | } |
4ea2176d IM |
1588 | EXPORT_SYMBOL(down_read_nested); |
1589 | ||
0f9368b5 EB |
1590 | int down_read_killable_nested(struct rw_semaphore *sem, int subclass) |
1591 | { | |
1592 | might_sleep(); | |
1593 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | |
1594 | ||
1595 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { | |
1596 | rwsem_release(&sem->dep_map, _RET_IP_); | |
1597 | return -EINTR; | |
1598 | } | |
1599 | ||
1600 | return 0; | |
1601 | } | |
1602 | EXPORT_SYMBOL(down_read_killable_nested); | |
1603 | ||
1b963c81 JK |
1604 | void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) |
1605 | { | |
1606 | might_sleep(); | |
1607 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | |
1b963c81 JK |
1608 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
1609 | } | |
1b963c81 JK |
1610 | EXPORT_SYMBOL(_down_write_nest_lock); |
1611 | ||
84759c6d KO |
1612 | void down_read_non_owner(struct rw_semaphore *sem) |
1613 | { | |
1614 | might_sleep(); | |
84759c6d | 1615 | __down_read(sem); |
925b9cd1 | 1616 | __rwsem_set_reader_owned(sem, NULL); |
84759c6d | 1617 | } |
84759c6d KO |
1618 | EXPORT_SYMBOL(down_read_non_owner); |
1619 | ||
4ea2176d IM |
1620 | void down_write_nested(struct rw_semaphore *sem, int subclass) |
1621 | { | |
1622 | might_sleep(); | |
1623 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | |
4fe87745 | 1624 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
4ea2176d | 1625 | } |
4ea2176d IM |
1626 | EXPORT_SYMBOL(down_write_nested); |
1627 | ||
887bddfa AV |
1628 | int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) |
1629 | { | |
1630 | might_sleep(); | |
1631 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | |
1632 | ||
6cef7ff6 WL |
1633 | if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, |
1634 | __down_write_killable)) { | |
5facae4f | 1635 | rwsem_release(&sem->dep_map, _RET_IP_); |
887bddfa AV |
1636 | return -EINTR; |
1637 | } | |
1638 | ||
887bddfa AV |
1639 | return 0; |
1640 | } | |
887bddfa AV |
1641 | EXPORT_SYMBOL(down_write_killable_nested); |
1642 | ||
84759c6d KO |
1643 | void up_read_non_owner(struct rw_semaphore *sem) |
1644 | { | |
94a9717b | 1645 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
84759c6d KO |
1646 | __up_read(sem); |
1647 | } | |
84759c6d KO |
1648 | EXPORT_SYMBOL(up_read_non_owner); |
1649 | ||
4ea2176d | 1650 | #endif |