Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
c4e05116 IM |
2 | /* kernel/rwsem.c: R/W semaphores, public implementation |
3 | * | |
4 | * Written by David Howells (dhowells@redhat.com). | |
5 | * Derived from asm-i386/semaphore.h | |
5dec94d4 WL |
6 | * |
7 | * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | |
8 | * and Michel Lespinasse <walken@google.com> | |
9 | * | |
10 | * Optimistic spinning by Tim Chen <tim.c.chen@intel.com> | |
11 | * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. | |
12 | * | |
4f23dbc1 WL |
13 | * Rwsem count bit fields re-definition and rwsem rearchitecture by |
14 | * Waiman Long <longman@redhat.com> and | |
15 | * Peter Zijlstra <peterz@infradead.org>. | |
c4e05116 IM |
16 | */ |
17 | ||
18 | #include <linux/types.h> | |
19 | #include <linux/kernel.h> | |
c7af77b5 | 20 | #include <linux/sched.h> |
5dec94d4 WL |
21 | #include <linux/sched/rt.h> |
22 | #include <linux/sched/task.h> | |
b17b0153 | 23 | #include <linux/sched/debug.h> |
5dec94d4 WL |
24 | #include <linux/sched/wake_q.h> |
25 | #include <linux/sched/signal.h> | |
7d43f1ce | 26 | #include <linux/sched/clock.h> |
9984de1a | 27 | #include <linux/export.h> |
c4e05116 | 28 | #include <linux/rwsem.h> |
60063497 | 29 | #include <linux/atomic.h> |
c4e05116 | 30 | |
42254105 | 31 | #ifndef CONFIG_PREEMPT_RT |
5dec94d4 WL |
32 | #include "lock_events.h" |
33 | ||
34 | /* | |
617f3ef9 | 35 | * The least significant 2 bits of the owner value has the following |
5dec94d4 | 36 | * meanings when set. |
02f1082b | 37 | * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers |
617f3ef9 | 38 | * - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock |
5dec94d4 | 39 | * |
617f3ef9 WL |
40 | * When the rwsem is reader-owned and a spinning writer has timed out, |
41 | * the nonspinnable bit will be set to disable optimistic spinning. | |
7d43f1ce | 42 | |
5dec94d4 WL |
43 | * When a writer acquires a rwsem, it puts its task_struct pointer |
44 | * into the owner field. It is cleared after an unlock. | |
45 | * | |
46 | * When a reader acquires a rwsem, it will also puts its task_struct | |
7d43f1ce WL |
47 | * pointer into the owner field with the RWSEM_READER_OWNED bit set. |
48 | * On unlock, the owner field will largely be left untouched. So | |
49 | * for a free or reader-owned rwsem, the owner value may contain | |
50 | * information about the last reader that acquires the rwsem. | |
5dec94d4 WL |
51 | * |
52 | * That information may be helpful in debugging cases where the system | |
53 | * seems to hang on a reader owned rwsem especially if only one reader | |
54 | * is involved. Ideally we would like to track all the readers that own | |
55 | * a rwsem, but the overhead is simply too big. | |
5cfd92e1 | 56 | * |
617f3ef9 WL |
57 | * A fast path reader optimistic lock stealing is supported when the rwsem |
58 | * is previously owned by a writer and the following conditions are met: | |
59 | * - OSQ is empty | |
60 | * - rwsem is not currently writer owned | |
61 | * - the handoff isn't set. | |
5dec94d4 WL |
62 | */ |
63 | #define RWSEM_READER_OWNED (1UL << 0) | |
617f3ef9 | 64 | #define RWSEM_NONSPINNABLE (1UL << 1) |
02f1082b | 65 | #define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) |
5dec94d4 WL |
66 | |
67 | #ifdef CONFIG_DEBUG_RWSEMS | |
68 | # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ | |
69 | if (!debug_locks_silent && \ | |
fce45cd4 | 70 | WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ |
5dec94d4 | 71 | #c, atomic_long_read(&(sem)->count), \ |
fce45cd4 | 72 | (unsigned long) sem->magic, \ |
94a9717b | 73 | atomic_long_read(&(sem)->owner), (long)current, \ |
5dec94d4 WL |
74 | list_empty(&(sem)->wait_list) ? "" : "not ")) \ |
75 | debug_locks_off(); \ | |
76 | } while (0) | |
77 | #else | |
78 | # define DEBUG_RWSEMS_WARN_ON(c, sem) | |
79 | #endif | |
80 | ||
81 | /* | |
a15ea1a3 | 82 | * On 64-bit architectures, the bit definitions of the count are: |
5dec94d4 | 83 | * |
a15ea1a3 WL |
84 | * Bit 0 - writer locked bit |
85 | * Bit 1 - waiters present bit | |
86 | * Bit 2 - lock handoff bit | |
87 | * Bits 3-7 - reserved | |
88 | * Bits 8-62 - 55-bit reader count | |
89 | * Bit 63 - read fail bit | |
90 | * | |
91 | * On 32-bit architectures, the bit definitions of the count are: | |
92 | * | |
93 | * Bit 0 - writer locked bit | |
94 | * Bit 1 - waiters present bit | |
95 | * Bit 2 - lock handoff bit | |
96 | * Bits 3-7 - reserved | |
97 | * Bits 8-30 - 23-bit reader count | |
98 | * Bit 31 - read fail bit | |
99 | * | |
100 | * It is not likely that the most significant bit (read fail bit) will ever | |
101 | * be set. This guard bit is still checked anyway in the down_read() fastpath | |
102 | * just in case we need to use up more of the reader bits for other purpose | |
103 | * in the future. | |
5dec94d4 WL |
104 | * |
105 | * atomic_long_fetch_add() is used to obtain reader lock, whereas | |
106 | * atomic_long_cmpxchg() will be used to obtain writer lock. | |
4f23dbc1 WL |
107 | * |
108 | * There are three places where the lock handoff bit may be set or cleared. | |
109 | * 1) rwsem_mark_wake() for readers. | |
110 | * 2) rwsem_try_write_lock() for writers. | |
111 | * 3) Error path of rwsem_down_write_slowpath(). | |
112 | * | |
113 | * For all the above cases, wait_lock will be held. A writer must also | |
114 | * be the first one in the wait_list to be eligible for setting the handoff | |
115 | * bit. So concurrent setting/clearing of handoff bit is not possible. | |
5dec94d4 WL |
116 | */ |
117 | #define RWSEM_WRITER_LOCKED (1UL << 0) | |
118 | #define RWSEM_FLAG_WAITERS (1UL << 1) | |
4f23dbc1 | 119 | #define RWSEM_FLAG_HANDOFF (1UL << 2) |
a15ea1a3 | 120 | #define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1)) |
4f23dbc1 | 121 | |
5dec94d4 WL |
122 | #define RWSEM_READER_SHIFT 8 |
123 | #define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT) | |
124 | #define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1)) | |
125 | #define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED | |
126 | #define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK) | |
4f23dbc1 | 127 | #define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\ |
a15ea1a3 | 128 | RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL) |
5dec94d4 WL |
129 | |
130 | /* | |
131 | * All writes to owner are protected by WRITE_ONCE() to make sure that | |
132 | * store tearing can't happen as optimistic spinners may read and use | |
133 | * the owner value concurrently without lock. Read from owner, however, | |
134 | * may not need READ_ONCE() as long as the pointer value is only used | |
135 | * for comparison and isn't being dereferenced. | |
136 | */ | |
137 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | |
138 | { | |
94a9717b | 139 | atomic_long_set(&sem->owner, (long)current); |
5dec94d4 WL |
140 | } |
141 | ||
142 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | |
143 | { | |
94a9717b WL |
144 | atomic_long_set(&sem->owner, 0); |
145 | } | |
146 | ||
147 | /* | |
148 | * Test the flags in the owner field. | |
149 | */ | |
150 | static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags) | |
151 | { | |
152 | return atomic_long_read(&sem->owner) & flags; | |
5dec94d4 WL |
153 | } |
154 | ||
155 | /* | |
156 | * The task_struct pointer of the last owning reader will be left in | |
157 | * the owner field. | |
158 | * | |
159 | * Note that the owner value just indicates the task has owned the rwsem | |
160 | * previously, it may not be the real owner or one of the real owners | |
161 | * anymore when that field is examined, so take it with a grain of salt. | |
5cfd92e1 WL |
162 | * |
163 | * The reader non-spinnable bit is preserved. | |
5dec94d4 WL |
164 | */ |
165 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | |
166 | struct task_struct *owner) | |
167 | { | |
5cfd92e1 | 168 | unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED | |
617f3ef9 | 169 | (atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE); |
5dec94d4 | 170 | |
94a9717b | 171 | atomic_long_set(&sem->owner, val); |
5dec94d4 WL |
172 | } |
173 | ||
174 | static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) | |
175 | { | |
176 | __rwsem_set_reader_owned(sem, current); | |
177 | } | |
178 | ||
179 | /* | |
94a9717b | 180 | * Return true if the rwsem is owned by a reader. |
5dec94d4 | 181 | */ |
94a9717b | 182 | static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) |
5dec94d4 | 183 | { |
94a9717b WL |
184 | #ifdef CONFIG_DEBUG_RWSEMS |
185 | /* | |
186 | * Check the count to see if it is write-locked. | |
187 | */ | |
188 | long count = atomic_long_read(&sem->count); | |
189 | ||
190 | if (count & RWSEM_WRITER_MASK) | |
191 | return false; | |
192 | #endif | |
193 | return rwsem_test_oflags(sem, RWSEM_READER_OWNED); | |
5dec94d4 WL |
194 | } |
195 | ||
196 | #ifdef CONFIG_DEBUG_RWSEMS | |
197 | /* | |
198 | * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there | |
199 | * is a task pointer in owner of a reader-owned rwsem, it will be the | |
200 | * real owner or one of the real owners. The only exception is when the | |
201 | * unlock is done by up_read_non_owner(). | |
202 | */ | |
203 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |
204 | { | |
94a9717b WL |
205 | unsigned long val = atomic_long_read(&sem->owner); |
206 | ||
207 | while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) { | |
208 | if (atomic_long_try_cmpxchg(&sem->owner, &val, | |
209 | val & RWSEM_OWNER_FLAGS_MASK)) | |
210 | return; | |
211 | } | |
5dec94d4 WL |
212 | } |
213 | #else | |
214 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |
215 | { | |
216 | } | |
217 | #endif | |
218 | ||
7d43f1ce WL |
219 | /* |
220 | * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag | |
221 | * remains set. Otherwise, the operation will be aborted. | |
222 | */ | |
223 | static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem) | |
224 | { | |
225 | unsigned long owner = atomic_long_read(&sem->owner); | |
226 | ||
227 | do { | |
228 | if (!(owner & RWSEM_READER_OWNED)) | |
229 | break; | |
230 | if (owner & RWSEM_NONSPINNABLE) | |
231 | break; | |
232 | } while (!atomic_long_try_cmpxchg(&sem->owner, &owner, | |
233 | owner | RWSEM_NONSPINNABLE)); | |
234 | } | |
235 | ||
c8fe8b05 | 236 | static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp) |
a15ea1a3 | 237 | { |
c8fe8b05 | 238 | *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count); |
3379116a | 239 | |
c8fe8b05 | 240 | if (WARN_ON_ONCE(*cntp < 0)) |
a15ea1a3 | 241 | rwsem_set_nonspinnable(sem); |
3379116a | 242 | |
c8fe8b05 | 243 | if (!(*cntp & RWSEM_READ_FAILED_MASK)) { |
3379116a PZ |
244 | rwsem_set_reader_owned(sem); |
245 | return true; | |
246 | } | |
247 | ||
248 | return false; | |
a15ea1a3 WL |
249 | } |
250 | ||
285c61ae PZ |
251 | static inline bool rwsem_write_trylock(struct rw_semaphore *sem) |
252 | { | |
253 | long tmp = RWSEM_UNLOCKED_VALUE; | |
254 | ||
255 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) { | |
256 | rwsem_set_owner(sem); | |
257 | return true; | |
258 | } | |
259 | ||
260 | return false; | |
261 | } | |
262 | ||
94a9717b WL |
263 | /* |
264 | * Return just the real task structure pointer of the owner | |
265 | */ | |
266 | static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) | |
267 | { | |
268 | return (struct task_struct *) | |
269 | (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); | |
270 | } | |
271 | ||
272 | /* | |
273 | * Return the real task structure pointer of the owner and the embedded | |
274 | * flags in the owner. pflags must be non-NULL. | |
275 | */ | |
276 | static inline struct task_struct * | |
277 | rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags) | |
278 | { | |
279 | unsigned long owner = atomic_long_read(&sem->owner); | |
280 | ||
281 | *pflags = owner & RWSEM_OWNER_FLAGS_MASK; | |
282 | return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK); | |
283 | } | |
284 | ||
5dec94d4 WL |
285 | /* |
286 | * Guide to the rw_semaphore's count field. | |
287 | * | |
288 | * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned | |
289 | * by a writer. | |
290 | * | |
291 | * The lock is owned by readers when | |
292 | * (1) the RWSEM_WRITER_LOCKED isn't set in count, | |
293 | * (2) some of the reader bits are set in count, and | |
294 | * (3) the owner field has RWSEM_READ_OWNED bit set. | |
295 | * | |
296 | * Having some reader bits set is not enough to guarantee a readers owned | |
297 | * lock as the readers may be in the process of backing out from the count | |
298 | * and a writer has just released the lock. So another writer may steal | |
299 | * the lock immediately after that. | |
300 | */ | |
301 | ||
302 | /* | |
303 | * Initialize an rwsem: | |
304 | */ | |
305 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | |
306 | struct lock_class_key *key) | |
307 | { | |
308 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
309 | /* | |
310 | * Make sure we are not reinitializing a held semaphore: | |
311 | */ | |
312 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
de8f5e4f | 313 | lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); |
fce45cd4 DB |
314 | #endif |
315 | #ifdef CONFIG_DEBUG_RWSEMS | |
316 | sem->magic = sem; | |
5dec94d4 WL |
317 | #endif |
318 | atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); | |
319 | raw_spin_lock_init(&sem->wait_lock); | |
320 | INIT_LIST_HEAD(&sem->wait_list); | |
94a9717b | 321 | atomic_long_set(&sem->owner, 0L); |
5dec94d4 WL |
322 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
323 | osq_lock_init(&sem->osq); | |
324 | #endif | |
325 | } | |
5dec94d4 WL |
326 | EXPORT_SYMBOL(__init_rwsem); |
327 | ||
328 | enum rwsem_waiter_type { | |
329 | RWSEM_WAITING_FOR_WRITE, | |
330 | RWSEM_WAITING_FOR_READ | |
331 | }; | |
332 | ||
333 | struct rwsem_waiter { | |
334 | struct list_head list; | |
335 | struct task_struct *task; | |
336 | enum rwsem_waiter_type type; | |
4f23dbc1 | 337 | unsigned long timeout; |
5dec94d4 | 338 | }; |
4f23dbc1 WL |
339 | #define rwsem_first_waiter(sem) \ |
340 | list_first_entry(&sem->wait_list, struct rwsem_waiter, list) | |
5dec94d4 WL |
341 | |
342 | enum rwsem_wake_type { | |
343 | RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ | |
344 | RWSEM_WAKE_READERS, /* Wake readers only */ | |
345 | RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ | |
346 | }; | |
347 | ||
4f23dbc1 WL |
348 | enum writer_wait_state { |
349 | WRITER_NOT_FIRST, /* Writer is not first in wait list */ | |
350 | WRITER_FIRST, /* Writer is first in wait list */ | |
351 | WRITER_HANDOFF /* Writer is first & handoff needed */ | |
352 | }; | |
353 | ||
354 | /* | |
355 | * The typical HZ value is either 250 or 1000. So set the minimum waiting | |
356 | * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait | |
357 | * queue before initiating the handoff protocol. | |
358 | */ | |
359 | #define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250) | |
360 | ||
d3681e26 WL |
361 | /* |
362 | * Magic number to batch-wakeup waiting readers, even when writers are | |
363 | * also present in the queue. This both limits the amount of work the | |
364 | * waking thread must do and also prevents any potential counter overflow, | |
365 | * however unlikely. | |
366 | */ | |
367 | #define MAX_READERS_WAKEUP 0x100 | |
368 | ||
5dec94d4 WL |
369 | /* |
370 | * handle the lock release when processes blocked on it that can now run | |
371 | * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must | |
372 | * have been set. | |
373 | * - there must be someone on the queue | |
374 | * - the wait_lock must be held by the caller | |
375 | * - tasks are marked for wakeup, the caller must later invoke wake_up_q() | |
376 | * to actually wakeup the blocked task(s) and drop the reference count, | |
377 | * preferably when the wait_lock is released | |
378 | * - woken process blocks are discarded from the list after having task zeroed | |
379 | * - writers are only marked woken if downgrading is false | |
380 | */ | |
6cef7ff6 WL |
381 | static void rwsem_mark_wake(struct rw_semaphore *sem, |
382 | enum rwsem_wake_type wake_type, | |
383 | struct wake_q_head *wake_q) | |
5dec94d4 WL |
384 | { |
385 | struct rwsem_waiter *waiter, *tmp; | |
386 | long oldcount, woken = 0, adjustment = 0; | |
387 | struct list_head wlist; | |
388 | ||
4f23dbc1 WL |
389 | lockdep_assert_held(&sem->wait_lock); |
390 | ||
5dec94d4 WL |
391 | /* |
392 | * Take a peek at the queue head waiter such that we can determine | |
393 | * the wakeup(s) to perform. | |
394 | */ | |
4f23dbc1 | 395 | waiter = rwsem_first_waiter(sem); |
5dec94d4 WL |
396 | |
397 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | |
398 | if (wake_type == RWSEM_WAKE_ANY) { | |
399 | /* | |
400 | * Mark writer at the front of the queue for wakeup. | |
401 | * Until the task is actually later awoken later by | |
402 | * the caller, other writers are able to steal it. | |
403 | * Readers, on the other hand, will block as they | |
404 | * will notice the queued writer. | |
405 | */ | |
406 | wake_q_add(wake_q, waiter->task); | |
407 | lockevent_inc(rwsem_wake_writer); | |
408 | } | |
409 | ||
410 | return; | |
411 | } | |
412 | ||
a15ea1a3 WL |
413 | /* |
414 | * No reader wakeup if there are too many of them already. | |
415 | */ | |
416 | if (unlikely(atomic_long_read(&sem->count) < 0)) | |
417 | return; | |
418 | ||
5dec94d4 WL |
419 | /* |
420 | * Writers might steal the lock before we grant it to the next reader. | |
421 | * We prefer to do the first reader grant before counting readers | |
422 | * so we can bail out early if a writer stole the lock. | |
423 | */ | |
424 | if (wake_type != RWSEM_WAKE_READ_OWNED) { | |
5cfd92e1 WL |
425 | struct task_struct *owner; |
426 | ||
5dec94d4 WL |
427 | adjustment = RWSEM_READER_BIAS; |
428 | oldcount = atomic_long_fetch_add(adjustment, &sem->count); | |
429 | if (unlikely(oldcount & RWSEM_WRITER_MASK)) { | |
4f23dbc1 WL |
430 | /* |
431 | * When we've been waiting "too" long (for writers | |
432 | * to give up the lock), request a HANDOFF to | |
433 | * force the issue. | |
434 | */ | |
435 | if (!(oldcount & RWSEM_FLAG_HANDOFF) && | |
436 | time_after(jiffies, waiter->timeout)) { | |
437 | adjustment -= RWSEM_FLAG_HANDOFF; | |
438 | lockevent_inc(rwsem_rlock_handoff); | |
439 | } | |
440 | ||
441 | atomic_long_add(-adjustment, &sem->count); | |
5dec94d4 WL |
442 | return; |
443 | } | |
444 | /* | |
445 | * Set it to reader-owned to give spinners an early | |
446 | * indication that readers now have the lock. | |
5cfd92e1 WL |
447 | * The reader nonspinnable bit seen at slowpath entry of |
448 | * the reader is copied over. | |
5dec94d4 | 449 | */ |
5cfd92e1 | 450 | owner = waiter->task; |
5cfd92e1 | 451 | __rwsem_set_reader_owned(sem, owner); |
5dec94d4 WL |
452 | } |
453 | ||
454 | /* | |
d3681e26 WL |
455 | * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the |
456 | * queue. We know that the woken will be at least 1 as we accounted | |
5dec94d4 WL |
457 | * for above. Note we increment the 'active part' of the count by the |
458 | * number of readers before waking any processes up. | |
459 | * | |
d3681e26 WL |
460 | * This is an adaptation of the phase-fair R/W locks where at the |
461 | * reader phase (first waiter is a reader), all readers are eligible | |
462 | * to acquire the lock at the same time irrespective of their order | |
463 | * in the queue. The writers acquire the lock according to their | |
464 | * order in the queue. | |
465 | * | |
5dec94d4 WL |
466 | * We have to do wakeup in 2 passes to prevent the possibility that |
467 | * the reader count may be decremented before it is incremented. It | |
468 | * is because the to-be-woken waiter may not have slept yet. So it | |
469 | * may see waiter->task got cleared, finish its critical section and | |
470 | * do an unlock before the reader count increment. | |
471 | * | |
472 | * 1) Collect the read-waiters in a separate list, count them and | |
473 | * fully increment the reader count in rwsem. | |
474 | * 2) For each waiters in the new list, clear waiter->task and | |
475 | * put them into wake_q to be woken up later. | |
476 | */ | |
d3681e26 WL |
477 | INIT_LIST_HEAD(&wlist); |
478 | list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { | |
5dec94d4 | 479 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) |
d3681e26 | 480 | continue; |
5dec94d4 WL |
481 | |
482 | woken++; | |
d3681e26 WL |
483 | list_move_tail(&waiter->list, &wlist); |
484 | ||
485 | /* | |
486 | * Limit # of readers that can be woken up per wakeup call. | |
487 | */ | |
488 | if (woken >= MAX_READERS_WAKEUP) | |
489 | break; | |
5dec94d4 | 490 | } |
5dec94d4 WL |
491 | |
492 | adjustment = woken * RWSEM_READER_BIAS - adjustment; | |
493 | lockevent_cond_inc(rwsem_wake_reader, woken); | |
494 | if (list_empty(&sem->wait_list)) { | |
495 | /* hit end of list above */ | |
496 | adjustment -= RWSEM_FLAG_WAITERS; | |
497 | } | |
498 | ||
4f23dbc1 WL |
499 | /* |
500 | * When we've woken a reader, we no longer need to force writers | |
501 | * to give up the lock and we can clear HANDOFF. | |
502 | */ | |
503 | if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF)) | |
504 | adjustment -= RWSEM_FLAG_HANDOFF; | |
505 | ||
5dec94d4 WL |
506 | if (adjustment) |
507 | atomic_long_add(adjustment, &sem->count); | |
508 | ||
509 | /* 2nd pass */ | |
510 | list_for_each_entry_safe(waiter, tmp, &wlist, list) { | |
511 | struct task_struct *tsk; | |
512 | ||
513 | tsk = waiter->task; | |
514 | get_task_struct(tsk); | |
515 | ||
516 | /* | |
517 | * Ensure calling get_task_struct() before setting the reader | |
6cef7ff6 | 518 | * waiter to nil such that rwsem_down_read_slowpath() cannot |
5dec94d4 WL |
519 | * race with do_exit() by always holding a reference count |
520 | * to the task to wakeup. | |
521 | */ | |
522 | smp_store_release(&waiter->task, NULL); | |
523 | /* | |
524 | * Ensure issuing the wakeup (either by us or someone else) | |
525 | * after setting the reader waiter to nil. | |
526 | */ | |
527 | wake_q_add_safe(wake_q, tsk); | |
528 | } | |
529 | } | |
530 | ||
531 | /* | |
532 | * This function must be called with the sem->wait_lock held to prevent | |
533 | * race conditions between checking the rwsem wait list and setting the | |
534 | * sem->count accordingly. | |
4f23dbc1 WL |
535 | * |
536 | * If wstate is WRITER_HANDOFF, it will make sure that either the handoff | |
537 | * bit is set or the lock is acquired with handoff bit cleared. | |
5dec94d4 | 538 | */ |
00f3c5a3 | 539 | static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, |
4f23dbc1 | 540 | enum writer_wait_state wstate) |
5dec94d4 | 541 | { |
00f3c5a3 | 542 | long count, new; |
5dec94d4 | 543 | |
4f23dbc1 | 544 | lockdep_assert_held(&sem->wait_lock); |
5dec94d4 | 545 | |
00f3c5a3 | 546 | count = atomic_long_read(&sem->count); |
4f23dbc1 WL |
547 | do { |
548 | bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); | |
5dec94d4 | 549 | |
4f23dbc1 WL |
550 | if (has_handoff && wstate == WRITER_NOT_FIRST) |
551 | return false; | |
5dec94d4 | 552 | |
4f23dbc1 WL |
553 | new = count; |
554 | ||
555 | if (count & RWSEM_LOCK_MASK) { | |
556 | if (has_handoff || (wstate != WRITER_HANDOFF)) | |
557 | return false; | |
558 | ||
559 | new |= RWSEM_FLAG_HANDOFF; | |
560 | } else { | |
561 | new |= RWSEM_WRITER_LOCKED; | |
562 | new &= ~RWSEM_FLAG_HANDOFF; | |
563 | ||
564 | if (list_is_singular(&sem->wait_list)) | |
565 | new &= ~RWSEM_FLAG_WAITERS; | |
566 | } | |
567 | } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new)); | |
568 | ||
569 | /* | |
570 | * We have either acquired the lock with handoff bit cleared or | |
571 | * set the handoff bit. | |
572 | */ | |
573 | if (new & RWSEM_FLAG_HANDOFF) | |
574 | return false; | |
575 | ||
576 | rwsem_set_owner(sem); | |
577 | return true; | |
5dec94d4 WL |
578 | } |
579 | ||
7cdacc5f YX |
580 | /* |
581 | * The rwsem_spin_on_owner() function returns the following 4 values | |
582 | * depending on the lock owner state. | |
583 | * OWNER_NULL : owner is currently NULL | |
584 | * OWNER_WRITER: when owner changes and is a writer | |
585 | * OWNER_READER: when owner changes and the new owner may be a reader. | |
586 | * OWNER_NONSPINNABLE: | |
587 | * when optimistic spinning has to stop because either the | |
588 | * owner stops running, is unknown, or its timeslice has | |
589 | * been used up. | |
590 | */ | |
591 | enum owner_state { | |
592 | OWNER_NULL = 1 << 0, | |
593 | OWNER_WRITER = 1 << 1, | |
594 | OWNER_READER = 1 << 2, | |
595 | OWNER_NONSPINNABLE = 1 << 3, | |
596 | }; | |
597 | ||
5dec94d4 WL |
598 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
599 | /* | |
600 | * Try to acquire write lock before the writer has been put on wait queue. | |
601 | */ | |
602 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | |
603 | { | |
604 | long count = atomic_long_read(&sem->count); | |
605 | ||
4f23dbc1 | 606 | while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) { |
5dec94d4 | 607 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, |
4f23dbc1 | 608 | count | RWSEM_WRITER_LOCKED)) { |
5dec94d4 | 609 | rwsem_set_owner(sem); |
617f3ef9 | 610 | lockevent_inc(rwsem_opt_lock); |
5dec94d4 WL |
611 | return true; |
612 | } | |
613 | } | |
614 | return false; | |
615 | } | |
616 | ||
617 | static inline bool owner_on_cpu(struct task_struct *owner) | |
618 | { | |
619 | /* | |
620 | * As lock holder preemption issue, we both skip spinning if | |
621 | * task is not on cpu or its cpu is preempted | |
622 | */ | |
623 | return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); | |
624 | } | |
625 | ||
617f3ef9 | 626 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
5dec94d4 WL |
627 | { |
628 | struct task_struct *owner; | |
94a9717b | 629 | unsigned long flags; |
5dec94d4 WL |
630 | bool ret = true; |
631 | ||
cf69482d WL |
632 | if (need_resched()) { |
633 | lockevent_inc(rwsem_opt_fail); | |
5dec94d4 | 634 | return false; |
cf69482d | 635 | } |
5dec94d4 | 636 | |
cf69482d | 637 | preempt_disable(); |
6c2787f2 YX |
638 | /* |
639 | * Disable preemption is equal to the RCU read-side crital section, | |
640 | * thus the task_strcut structure won't go away. | |
641 | */ | |
94a9717b | 642 | owner = rwsem_owner_flags(sem, &flags); |
78134300 WL |
643 | /* |
644 | * Don't check the read-owner as the entry may be stale. | |
645 | */ | |
617f3ef9 | 646 | if ((flags & RWSEM_NONSPINNABLE) || |
78134300 | 647 | (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner))) |
94a9717b | 648 | ret = false; |
cf69482d WL |
649 | preempt_enable(); |
650 | ||
651 | lockevent_cond_inc(rwsem_opt_fail, !ret); | |
5dec94d4 WL |
652 | return ret; |
653 | } | |
654 | ||
7d43f1ce | 655 | #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER) |
3f6d517a | 656 | |
94a9717b | 657 | static inline enum owner_state |
617f3ef9 | 658 | rwsem_owner_state(struct task_struct *owner, unsigned long flags) |
5dec94d4 | 659 | { |
617f3ef9 | 660 | if (flags & RWSEM_NONSPINNABLE) |
3f6d517a WL |
661 | return OWNER_NONSPINNABLE; |
662 | ||
94a9717b | 663 | if (flags & RWSEM_READER_OWNED) |
3f6d517a WL |
664 | return OWNER_READER; |
665 | ||
94a9717b | 666 | return owner ? OWNER_WRITER : OWNER_NULL; |
3f6d517a WL |
667 | } |
668 | ||
7d43f1ce | 669 | static noinline enum owner_state |
617f3ef9 | 670 | rwsem_spin_on_owner(struct rw_semaphore *sem) |
3f6d517a | 671 | { |
94a9717b WL |
672 | struct task_struct *new, *owner; |
673 | unsigned long flags, new_flags; | |
674 | enum owner_state state; | |
3f6d517a | 675 | |
6c2787f2 YX |
676 | lockdep_assert_preemption_disabled(); |
677 | ||
94a9717b | 678 | owner = rwsem_owner_flags(sem, &flags); |
617f3ef9 | 679 | state = rwsem_owner_state(owner, flags); |
3f6d517a WL |
680 | if (state != OWNER_WRITER) |
681 | return state; | |
5dec94d4 | 682 | |
3f6d517a | 683 | for (;;) { |
91d2a812 WL |
684 | /* |
685 | * When a waiting writer set the handoff flag, it may spin | |
686 | * on the owner as well. Once that writer acquires the lock, | |
687 | * we can spin on it. So we don't need to quit even when the | |
688 | * handoff bit is set. | |
689 | */ | |
94a9717b WL |
690 | new = rwsem_owner_flags(sem, &new_flags); |
691 | if ((new != owner) || (new_flags != flags)) { | |
617f3ef9 | 692 | state = rwsem_owner_state(new, new_flags); |
3f6d517a WL |
693 | break; |
694 | } | |
695 | ||
5dec94d4 WL |
696 | /* |
697 | * Ensure we emit the owner->on_cpu, dereference _after_ | |
698 | * checking sem->owner still matches owner, if that fails, | |
699 | * owner might point to free()d memory, if it still matches, | |
6c2787f2 YX |
700 | * our spinning context already disabled preemption which is |
701 | * equal to RCU read-side crital section ensures the memory | |
702 | * stays valid. | |
5dec94d4 WL |
703 | */ |
704 | barrier(); | |
705 | ||
5dec94d4 | 706 | if (need_resched() || !owner_on_cpu(owner)) { |
3f6d517a WL |
707 | state = OWNER_NONSPINNABLE; |
708 | break; | |
5dec94d4 WL |
709 | } |
710 | ||
711 | cpu_relax(); | |
712 | } | |
5dec94d4 | 713 | |
3f6d517a | 714 | return state; |
5dec94d4 WL |
715 | } |
716 | ||
7d43f1ce WL |
717 | /* |
718 | * Calculate reader-owned rwsem spinning threshold for writer | |
719 | * | |
720 | * The more readers own the rwsem, the longer it will take for them to | |
721 | * wind down and free the rwsem. So the empirical formula used to | |
722 | * determine the actual spinning time limit here is: | |
723 | * | |
724 | * Spinning threshold = (10 + nr_readers/2)us | |
725 | * | |
726 | * The limit is capped to a maximum of 25us (30 readers). This is just | |
727 | * a heuristic and is subjected to change in the future. | |
728 | */ | |
729 | static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem) | |
730 | { | |
731 | long count = atomic_long_read(&sem->count); | |
732 | int readers = count >> RWSEM_READER_SHIFT; | |
733 | u64 delta; | |
734 | ||
735 | if (readers > 30) | |
736 | readers = 30; | |
737 | delta = (20 + readers) * NSEC_PER_USEC / 2; | |
738 | ||
739 | return sched_clock() + delta; | |
740 | } | |
741 | ||
617f3ef9 | 742 | static bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
5dec94d4 WL |
743 | { |
744 | bool taken = false; | |
990fa738 | 745 | int prev_owner_state = OWNER_NULL; |
7d43f1ce WL |
746 | int loop = 0; |
747 | u64 rspin_threshold = 0; | |
5dec94d4 WL |
748 | |
749 | preempt_disable(); | |
750 | ||
751 | /* sem->wait_lock should not be held when doing optimistic spinning */ | |
5dec94d4 WL |
752 | if (!osq_lock(&sem->osq)) |
753 | goto done; | |
754 | ||
755 | /* | |
756 | * Optimistically spin on the owner field and attempt to acquire the | |
757 | * lock whenever the owner changes. Spinning will be stopped when: | |
758 | * 1) the owning writer isn't running; or | |
7d43f1ce | 759 | * 2) readers own the lock and spinning time has exceeded limit. |
5dec94d4 | 760 | */ |
990fa738 | 761 | for (;;) { |
7d43f1ce | 762 | enum owner_state owner_state; |
990fa738 | 763 | |
617f3ef9 | 764 | owner_state = rwsem_spin_on_owner(sem); |
990fa738 WL |
765 | if (!(owner_state & OWNER_SPINNABLE)) |
766 | break; | |
767 | ||
5dec94d4 WL |
768 | /* |
769 | * Try to acquire the lock | |
770 | */ | |
617f3ef9 | 771 | taken = rwsem_try_write_lock_unqueued(sem); |
cf69482d WL |
772 | |
773 | if (taken) | |
5dec94d4 | 774 | break; |
5dec94d4 | 775 | |
7d43f1ce WL |
776 | /* |
777 | * Time-based reader-owned rwsem optimistic spinning | |
778 | */ | |
617f3ef9 | 779 | if (owner_state == OWNER_READER) { |
7d43f1ce WL |
780 | /* |
781 | * Re-initialize rspin_threshold every time when | |
782 | * the owner state changes from non-reader to reader. | |
783 | * This allows a writer to steal the lock in between | |
784 | * 2 reader phases and have the threshold reset at | |
785 | * the beginning of the 2nd reader phase. | |
786 | */ | |
787 | if (prev_owner_state != OWNER_READER) { | |
617f3ef9 | 788 | if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)) |
7d43f1ce WL |
789 | break; |
790 | rspin_threshold = rwsem_rspin_threshold(sem); | |
791 | loop = 0; | |
792 | } | |
793 | ||
794 | /* | |
795 | * Check time threshold once every 16 iterations to | |
796 | * avoid calling sched_clock() too frequently so | |
797 | * as to reduce the average latency between the times | |
798 | * when the lock becomes free and when the spinner | |
799 | * is ready to do a trylock. | |
800 | */ | |
801 | else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) { | |
802 | rwsem_set_nonspinnable(sem); | |
803 | lockevent_inc(rwsem_opt_nospin); | |
804 | break; | |
805 | } | |
806 | } | |
807 | ||
5dec94d4 | 808 | /* |
990fa738 WL |
809 | * An RT task cannot do optimistic spinning if it cannot |
810 | * be sure the lock holder is running or live-lock may | |
811 | * happen if the current task and the lock holder happen | |
812 | * to run in the same CPU. However, aborting optimistic | |
813 | * spinning while a NULL owner is detected may miss some | |
814 | * opportunity where spinning can continue without causing | |
815 | * problem. | |
816 | * | |
817 | * There are 2 possible cases where an RT task may be able | |
818 | * to continue spinning. | |
819 | * | |
820 | * 1) The lock owner is in the process of releasing the | |
821 | * lock, sem->owner is cleared but the lock has not | |
822 | * been released yet. | |
823 | * 2) The lock was free and owner cleared, but another | |
824 | * task just comes in and acquire the lock before | |
825 | * we try to get it. The new owner may be a spinnable | |
826 | * writer. | |
827 | * | |
e2db7592 | 828 | * To take advantage of two scenarios listed above, the RT |
990fa738 WL |
829 | * task is made to retry one more time to see if it can |
830 | * acquire the lock or continue spinning on the new owning | |
831 | * writer. Of course, if the time lag is long enough or the | |
832 | * new owner is not a writer or spinnable, the RT task will | |
833 | * quit spinning. | |
834 | * | |
835 | * If the owner is a writer, the need_resched() check is | |
836 | * done inside rwsem_spin_on_owner(). If the owner is not | |
837 | * a writer, need_resched() check needs to be done here. | |
5dec94d4 | 838 | */ |
990fa738 WL |
839 | if (owner_state != OWNER_WRITER) { |
840 | if (need_resched()) | |
841 | break; | |
842 | if (rt_task(current) && | |
843 | (prev_owner_state != OWNER_WRITER)) | |
844 | break; | |
845 | } | |
846 | prev_owner_state = owner_state; | |
5dec94d4 WL |
847 | |
848 | /* | |
849 | * The cpu_relax() call is a compiler barrier which forces | |
850 | * everything in this loop to be re-loaded. We don't need | |
851 | * memory barriers as we'll eventually observe the right | |
852 | * values at the cost of a few extra spins. | |
853 | */ | |
854 | cpu_relax(); | |
855 | } | |
856 | osq_unlock(&sem->osq); | |
857 | done: | |
858 | preempt_enable(); | |
859 | lockevent_cond_inc(rwsem_opt_fail, !taken); | |
860 | return taken; | |
861 | } | |
7d43f1ce WL |
862 | |
863 | /* | |
617f3ef9 | 864 | * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should |
7d43f1ce | 865 | * only be called when the reader count reaches 0. |
5cfd92e1 | 866 | */ |
617f3ef9 | 867 | static inline void clear_nonspinnable(struct rw_semaphore *sem) |
5cfd92e1 | 868 | { |
617f3ef9 WL |
869 | if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)) |
870 | atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner); | |
1a728dff WL |
871 | } |
872 | ||
5dec94d4 | 873 | #else |
617f3ef9 | 874 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
cf69482d WL |
875 | { |
876 | return false; | |
877 | } | |
878 | ||
617f3ef9 | 879 | static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
5dec94d4 WL |
880 | { |
881 | return false; | |
882 | } | |
7d43f1ce | 883 | |
617f3ef9 | 884 | static inline void clear_nonspinnable(struct rw_semaphore *sem) { } |
1a728dff | 885 | |
7cdacc5f | 886 | static inline enum owner_state |
617f3ef9 | 887 | rwsem_spin_on_owner(struct rw_semaphore *sem) |
91d2a812 | 888 | { |
7cdacc5f | 889 | return OWNER_NONSPINNABLE; |
91d2a812 | 890 | } |
5dec94d4 WL |
891 | #endif |
892 | ||
893 | /* | |
894 | * Wait for the read lock to be granted | |
895 | */ | |
6cef7ff6 | 896 | static struct rw_semaphore __sched * |
2f064a59 | 897 | rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state) |
5dec94d4 | 898 | { |
617f3ef9 | 899 | long adjustment = -RWSEM_READER_BIAS; |
2f06f702 | 900 | long rcnt = (count >> RWSEM_READER_SHIFT); |
5dec94d4 WL |
901 | struct rwsem_waiter waiter; |
902 | DEFINE_WAKE_Q(wake_q); | |
a15ea1a3 | 903 | bool wake = false; |
5dec94d4 | 904 | |
2f06f702 WL |
905 | /* |
906 | * To prevent a constant stream of readers from starving a sleeping | |
617f3ef9 WL |
907 | * waiter, don't attempt optimistic lock stealing if the lock is |
908 | * currently owned by readers. | |
2f06f702 | 909 | */ |
617f3ef9 WL |
910 | if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) && |
911 | (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED)) | |
2f06f702 WL |
912 | goto queue; |
913 | ||
1a728dff | 914 | /* |
617f3ef9 | 915 | * Reader optimistic lock stealing. |
1a728dff | 916 | */ |
617f3ef9 | 917 | if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) { |
1a728dff WL |
918 | rwsem_set_reader_owned(sem); |
919 | lockevent_inc(rwsem_rlock_steal); | |
1a728dff | 920 | |
cf69482d | 921 | /* |
617f3ef9 WL |
922 | * Wake up other readers in the wait queue if it is |
923 | * the first reader. | |
cf69482d | 924 | */ |
617f3ef9 | 925 | if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) { |
cf69482d WL |
926 | raw_spin_lock_irq(&sem->wait_lock); |
927 | if (!list_empty(&sem->wait_list)) | |
928 | rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, | |
929 | &wake_q); | |
930 | raw_spin_unlock_irq(&sem->wait_lock); | |
931 | wake_up_q(&wake_q); | |
932 | } | |
933 | return sem; | |
934 | } | |
935 | ||
936 | queue: | |
5dec94d4 WL |
937 | waiter.task = current; |
938 | waiter.type = RWSEM_WAITING_FOR_READ; | |
4f23dbc1 | 939 | waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; |
5dec94d4 WL |
940 | |
941 | raw_spin_lock_irq(&sem->wait_lock); | |
942 | if (list_empty(&sem->wait_list)) { | |
943 | /* | |
944 | * In case the wait queue is empty and the lock isn't owned | |
4f23dbc1 WL |
945 | * by a writer or has the handoff bit set, this reader can |
946 | * exit the slowpath and return immediately as its | |
947 | * RWSEM_READER_BIAS has already been set in the count. | |
5dec94d4 | 948 | */ |
617f3ef9 | 949 | if (!(atomic_long_read(&sem->count) & |
4f23dbc1 | 950 | (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) { |
e1b98fa3 JS |
951 | /* Provide lock ACQUIRE */ |
952 | smp_acquire__after_ctrl_dep(); | |
5dec94d4 WL |
953 | raw_spin_unlock_irq(&sem->wait_lock); |
954 | rwsem_set_reader_owned(sem); | |
955 | lockevent_inc(rwsem_rlock_fast); | |
956 | return sem; | |
957 | } | |
958 | adjustment += RWSEM_FLAG_WAITERS; | |
959 | } | |
960 | list_add_tail(&waiter.list, &sem->wait_list); | |
961 | ||
962 | /* we're now waiting on the lock, but no longer actively locking */ | |
617f3ef9 | 963 | count = atomic_long_add_return(adjustment, &sem->count); |
5dec94d4 WL |
964 | |
965 | /* | |
966 | * If there are no active locks, wake the front queued process(es). | |
967 | * | |
968 | * If there are no writers and we are first in the queue, | |
969 | * wake our own waiter to join the existing active readers ! | |
970 | */ | |
7d43f1ce | 971 | if (!(count & RWSEM_LOCK_MASK)) { |
617f3ef9 | 972 | clear_nonspinnable(sem); |
7d43f1ce WL |
973 | wake = true; |
974 | } | |
975 | if (wake || (!(count & RWSEM_WRITER_MASK) && | |
976 | (adjustment & RWSEM_FLAG_WAITERS))) | |
6cef7ff6 | 977 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
5dec94d4 WL |
978 | |
979 | raw_spin_unlock_irq(&sem->wait_lock); | |
980 | wake_up_q(&wake_q); | |
981 | ||
982 | /* wait to be given the lock */ | |
6ffddfb9 | 983 | for (;;) { |
5dec94d4 | 984 | set_current_state(state); |
99143f82 | 985 | if (!smp_load_acquire(&waiter.task)) { |
6ffddfb9 | 986 | /* Matches rwsem_mark_wake()'s smp_store_release(). */ |
5dec94d4 | 987 | break; |
99143f82 | 988 | } |
5dec94d4 WL |
989 | if (signal_pending_state(state, current)) { |
990 | raw_spin_lock_irq(&sem->wait_lock); | |
991 | if (waiter.task) | |
992 | goto out_nolock; | |
993 | raw_spin_unlock_irq(&sem->wait_lock); | |
6ffddfb9 | 994 | /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ |
5dec94d4 WL |
995 | break; |
996 | } | |
997 | schedule(); | |
998 | lockevent_inc(rwsem_sleep_reader); | |
999 | } | |
1000 | ||
1001 | __set_current_state(TASK_RUNNING); | |
1002 | lockevent_inc(rwsem_rlock); | |
1003 | return sem; | |
6ffddfb9 | 1004 | |
5dec94d4 WL |
1005 | out_nolock: |
1006 | list_del(&waiter.list); | |
4f23dbc1 WL |
1007 | if (list_empty(&sem->wait_list)) { |
1008 | atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF, | |
1009 | &sem->count); | |
1010 | } | |
5dec94d4 WL |
1011 | raw_spin_unlock_irq(&sem->wait_lock); |
1012 | __set_current_state(TASK_RUNNING); | |
1013 | lockevent_inc(rwsem_rlock_fail); | |
1014 | return ERR_PTR(-EINTR); | |
1015 | } | |
1016 | ||
5dec94d4 WL |
1017 | /* |
1018 | * Wait until we successfully acquire the write lock | |
1019 | */ | |
6cef7ff6 WL |
1020 | static struct rw_semaphore * |
1021 | rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) | |
5dec94d4 WL |
1022 | { |
1023 | long count; | |
4f23dbc1 | 1024 | enum writer_wait_state wstate; |
5dec94d4 WL |
1025 | struct rwsem_waiter waiter; |
1026 | struct rw_semaphore *ret = sem; | |
1027 | DEFINE_WAKE_Q(wake_q); | |
1028 | ||
1029 | /* do optimistic spinning and steal lock if possible */ | |
617f3ef9 | 1030 | if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) { |
6ffddfb9 | 1031 | /* rwsem_optimistic_spin() implies ACQUIRE on success */ |
5dec94d4 | 1032 | return sem; |
6ffddfb9 | 1033 | } |
5dec94d4 WL |
1034 | |
1035 | /* | |
1036 | * Optimistic spinning failed, proceed to the slowpath | |
1037 | * and block until we can acquire the sem. | |
1038 | */ | |
1039 | waiter.task = current; | |
1040 | waiter.type = RWSEM_WAITING_FOR_WRITE; | |
4f23dbc1 | 1041 | waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; |
5dec94d4 WL |
1042 | |
1043 | raw_spin_lock_irq(&sem->wait_lock); | |
1044 | ||
1045 | /* account for this before adding a new element to the list */ | |
4f23dbc1 | 1046 | wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST; |
5dec94d4 WL |
1047 | |
1048 | list_add_tail(&waiter.list, &sem->wait_list); | |
1049 | ||
1050 | /* we're now waiting on the lock */ | |
4f23dbc1 | 1051 | if (wstate == WRITER_NOT_FIRST) { |
5dec94d4 WL |
1052 | count = atomic_long_read(&sem->count); |
1053 | ||
1054 | /* | |
4f23dbc1 | 1055 | * If there were already threads queued before us and: |
c034f48e | 1056 | * 1) there are no active locks, wake the front |
4f23dbc1 WL |
1057 | * queued process(es) as the handoff bit might be set. |
1058 | * 2) there are no active writers and some readers, the lock | |
1059 | * must be read owned; so we try to wake any read lock | |
1060 | * waiters that were queued ahead of us. | |
5dec94d4 | 1061 | */ |
4f23dbc1 WL |
1062 | if (count & RWSEM_WRITER_MASK) |
1063 | goto wait; | |
5dec94d4 | 1064 | |
4f23dbc1 WL |
1065 | rwsem_mark_wake(sem, (count & RWSEM_READER_MASK) |
1066 | ? RWSEM_WAKE_READERS | |
1067 | : RWSEM_WAKE_ANY, &wake_q); | |
5dec94d4 | 1068 | |
00f3c5a3 WL |
1069 | if (!wake_q_empty(&wake_q)) { |
1070 | /* | |
1071 | * We want to minimize wait_lock hold time especially | |
1072 | * when a large number of readers are to be woken up. | |
1073 | */ | |
1074 | raw_spin_unlock_irq(&sem->wait_lock); | |
1075 | wake_up_q(&wake_q); | |
1076 | wake_q_init(&wake_q); /* Used again, reinit */ | |
1077 | raw_spin_lock_irq(&sem->wait_lock); | |
1078 | } | |
5dec94d4 | 1079 | } else { |
00f3c5a3 | 1080 | atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); |
5dec94d4 WL |
1081 | } |
1082 | ||
4f23dbc1 | 1083 | wait: |
5dec94d4 WL |
1084 | /* wait until we successfully acquire the lock */ |
1085 | set_current_state(state); | |
6ffddfb9 PZ |
1086 | for (;;) { |
1087 | if (rwsem_try_write_lock(sem, wstate)) { | |
1088 | /* rwsem_try_write_lock() implies ACQUIRE on success */ | |
5dec94d4 | 1089 | break; |
6ffddfb9 | 1090 | } |
4f23dbc1 | 1091 | |
5dec94d4 WL |
1092 | raw_spin_unlock_irq(&sem->wait_lock); |
1093 | ||
91d2a812 WL |
1094 | /* |
1095 | * After setting the handoff bit and failing to acquire | |
1096 | * the lock, attempt to spin on owner to accelerate lock | |
1097 | * transfer. If the previous owner is a on-cpu writer and it | |
1098 | * has just released the lock, OWNER_NULL will be returned. | |
1099 | * In this case, we attempt to acquire the lock again | |
1100 | * without sleeping. | |
1101 | */ | |
7cdacc5f YX |
1102 | if (wstate == WRITER_HANDOFF) { |
1103 | enum owner_state owner_state; | |
1104 | ||
1105 | preempt_disable(); | |
1106 | owner_state = rwsem_spin_on_owner(sem); | |
1107 | preempt_enable(); | |
1108 | ||
1109 | if (owner_state == OWNER_NULL) | |
1110 | goto trylock_again; | |
1111 | } | |
91d2a812 | 1112 | |
5dec94d4 | 1113 | /* Block until there are no active lockers. */ |
4f23dbc1 | 1114 | for (;;) { |
5dec94d4 WL |
1115 | if (signal_pending_state(state, current)) |
1116 | goto out_nolock; | |
1117 | ||
1118 | schedule(); | |
1119 | lockevent_inc(rwsem_sleep_writer); | |
1120 | set_current_state(state); | |
4f23dbc1 WL |
1121 | /* |
1122 | * If HANDOFF bit is set, unconditionally do | |
1123 | * a trylock. | |
1124 | */ | |
1125 | if (wstate == WRITER_HANDOFF) | |
1126 | break; | |
1127 | ||
1128 | if ((wstate == WRITER_NOT_FIRST) && | |
1129 | (rwsem_first_waiter(sem) == &waiter)) | |
1130 | wstate = WRITER_FIRST; | |
1131 | ||
5dec94d4 | 1132 | count = atomic_long_read(&sem->count); |
4f23dbc1 WL |
1133 | if (!(count & RWSEM_LOCK_MASK)) |
1134 | break; | |
1135 | ||
1136 | /* | |
1137 | * The setting of the handoff bit is deferred | |
1138 | * until rwsem_try_write_lock() is called. | |
1139 | */ | |
1140 | if ((wstate == WRITER_FIRST) && (rt_task(current) || | |
1141 | time_after(jiffies, waiter.timeout))) { | |
1142 | wstate = WRITER_HANDOFF; | |
1143 | lockevent_inc(rwsem_wlock_handoff); | |
1144 | break; | |
1145 | } | |
1146 | } | |
91d2a812 | 1147 | trylock_again: |
5dec94d4 WL |
1148 | raw_spin_lock_irq(&sem->wait_lock); |
1149 | } | |
1150 | __set_current_state(TASK_RUNNING); | |
1151 | list_del(&waiter.list); | |
1152 | raw_spin_unlock_irq(&sem->wait_lock); | |
1153 | lockevent_inc(rwsem_wlock); | |
1154 | ||
1155 | return ret; | |
1156 | ||
1157 | out_nolock: | |
1158 | __set_current_state(TASK_RUNNING); | |
1159 | raw_spin_lock_irq(&sem->wait_lock); | |
1160 | list_del(&waiter.list); | |
4f23dbc1 WL |
1161 | |
1162 | if (unlikely(wstate == WRITER_HANDOFF)) | |
1163 | atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count); | |
1164 | ||
5dec94d4 WL |
1165 | if (list_empty(&sem->wait_list)) |
1166 | atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count); | |
1167 | else | |
6cef7ff6 | 1168 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
5dec94d4 WL |
1169 | raw_spin_unlock_irq(&sem->wait_lock); |
1170 | wake_up_q(&wake_q); | |
1171 | lockevent_inc(rwsem_wlock_fail); | |
1172 | ||
1173 | return ERR_PTR(-EINTR); | |
1174 | } | |
1175 | ||
5dec94d4 WL |
1176 | /* |
1177 | * handle waking up a waiter on the semaphore | |
1178 | * - up_read/up_write has decremented the active part of count if we come here | |
1179 | */ | |
d4e5076c | 1180 | static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
5dec94d4 WL |
1181 | { |
1182 | unsigned long flags; | |
1183 | DEFINE_WAKE_Q(wake_q); | |
1184 | ||
1185 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
1186 | ||
1187 | if (!list_empty(&sem->wait_list)) | |
6cef7ff6 | 1188 | rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
5dec94d4 WL |
1189 | |
1190 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
1191 | wake_up_q(&wake_q); | |
1192 | ||
1193 | return sem; | |
1194 | } | |
5dec94d4 WL |
1195 | |
1196 | /* | |
1197 | * downgrade a write lock into a read lock | |
1198 | * - caller incremented waiting part of count and discovered it still negative | |
1199 | * - just wake up any readers at the front of the queue | |
1200 | */ | |
6cef7ff6 | 1201 | static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
5dec94d4 WL |
1202 | { |
1203 | unsigned long flags; | |
1204 | DEFINE_WAKE_Q(wake_q); | |
1205 | ||
1206 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
1207 | ||
1208 | if (!list_empty(&sem->wait_list)) | |
6cef7ff6 | 1209 | rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); |
5dec94d4 WL |
1210 | |
1211 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
1212 | wake_up_q(&wake_q); | |
1213 | ||
1214 | return sem; | |
1215 | } | |
5dec94d4 WL |
1216 | |
1217 | /* | |
1218 | * lock for reading | |
1219 | */ | |
c995e638 | 1220 | static inline int __down_read_common(struct rw_semaphore *sem, int state) |
5dec94d4 | 1221 | { |
c8fe8b05 WL |
1222 | long count; |
1223 | ||
1224 | if (!rwsem_read_trylock(sem, &count)) { | |
1225 | if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) | |
c995e638 | 1226 | return -EINTR; |
94a9717b | 1227 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
5dec94d4 | 1228 | } |
c995e638 PZ |
1229 | return 0; |
1230 | } | |
1231 | ||
1232 | static inline void __down_read(struct rw_semaphore *sem) | |
1233 | { | |
1234 | __down_read_common(sem, TASK_UNINTERRUPTIBLE); | |
5dec94d4 WL |
1235 | } |
1236 | ||
31784cff EB |
1237 | static inline int __down_read_interruptible(struct rw_semaphore *sem) |
1238 | { | |
c995e638 | 1239 | return __down_read_common(sem, TASK_INTERRUPTIBLE); |
31784cff EB |
1240 | } |
1241 | ||
5dec94d4 WL |
1242 | static inline int __down_read_killable(struct rw_semaphore *sem) |
1243 | { | |
c995e638 | 1244 | return __down_read_common(sem, TASK_KILLABLE); |
5dec94d4 WL |
1245 | } |
1246 | ||
1247 | static inline int __down_read_trylock(struct rw_semaphore *sem) | |
1248 | { | |
fce45cd4 DB |
1249 | long tmp; |
1250 | ||
1251 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); | |
1252 | ||
5dec94d4 WL |
1253 | /* |
1254 | * Optimize for the case when the rwsem is not locked at all. | |
1255 | */ | |
fce45cd4 | 1256 | tmp = RWSEM_UNLOCKED_VALUE; |
5dec94d4 WL |
1257 | do { |
1258 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, | |
1259 | tmp + RWSEM_READER_BIAS)) { | |
1260 | rwsem_set_reader_owned(sem); | |
1261 | return 1; | |
1262 | } | |
1263 | } while (!(tmp & RWSEM_READ_FAILED_MASK)); | |
1264 | return 0; | |
1265 | } | |
1266 | ||
1267 | /* | |
1268 | * lock for writing | |
1269 | */ | |
c995e638 | 1270 | static inline int __down_write_common(struct rw_semaphore *sem, int state) |
5dec94d4 | 1271 | { |
285c61ae | 1272 | if (unlikely(!rwsem_write_trylock(sem))) { |
c995e638 | 1273 | if (IS_ERR(rwsem_down_write_slowpath(sem, state))) |
5dec94d4 | 1274 | return -EINTR; |
6cef7ff6 | 1275 | } |
285c61ae | 1276 | |
5dec94d4 WL |
1277 | return 0; |
1278 | } | |
1279 | ||
c995e638 PZ |
1280 | static inline void __down_write(struct rw_semaphore *sem) |
1281 | { | |
1282 | __down_write_common(sem, TASK_UNINTERRUPTIBLE); | |
1283 | } | |
1284 | ||
1285 | static inline int __down_write_killable(struct rw_semaphore *sem) | |
1286 | { | |
1287 | return __down_write_common(sem, TASK_KILLABLE); | |
1288 | } | |
1289 | ||
5dec94d4 WL |
1290 | static inline int __down_write_trylock(struct rw_semaphore *sem) |
1291 | { | |
fce45cd4 | 1292 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
285c61ae | 1293 | return rwsem_write_trylock(sem); |
5dec94d4 WL |
1294 | } |
1295 | ||
1296 | /* | |
1297 | * unlock after reading | |
1298 | */ | |
7f26482a | 1299 | static inline void __up_read(struct rw_semaphore *sem) |
5dec94d4 WL |
1300 | { |
1301 | long tmp; | |
1302 | ||
fce45cd4 | 1303 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
94a9717b | 1304 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
fce45cd4 | 1305 | |
5dec94d4 WL |
1306 | rwsem_clear_reader_owned(sem); |
1307 | tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); | |
a15ea1a3 | 1308 | DEBUG_RWSEMS_WARN_ON(tmp < 0, sem); |
6cef7ff6 | 1309 | if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == |
7d43f1ce | 1310 | RWSEM_FLAG_WAITERS)) { |
617f3ef9 | 1311 | clear_nonspinnable(sem); |
d4e5076c | 1312 | rwsem_wake(sem); |
7d43f1ce | 1313 | } |
5dec94d4 WL |
1314 | } |
1315 | ||
1316 | /* | |
1317 | * unlock after writing | |
1318 | */ | |
7f26482a | 1319 | static inline void __up_write(struct rw_semaphore *sem) |
5dec94d4 | 1320 | { |
6cef7ff6 WL |
1321 | long tmp; |
1322 | ||
fce45cd4 | 1323 | DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); |
02f1082b WL |
1324 | /* |
1325 | * sem->owner may differ from current if the ownership is transferred | |
1326 | * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. | |
1327 | */ | |
94a9717b WL |
1328 | DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && |
1329 | !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); | |
fce45cd4 | 1330 | |
5dec94d4 | 1331 | rwsem_clear_owner(sem); |
6cef7ff6 WL |
1332 | tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); |
1333 | if (unlikely(tmp & RWSEM_FLAG_WAITERS)) | |
d4e5076c | 1334 | rwsem_wake(sem); |
5dec94d4 WL |
1335 | } |
1336 | ||
1337 | /* | |
1338 | * downgrade write lock to read lock | |
1339 | */ | |
1340 | static inline void __downgrade_write(struct rw_semaphore *sem) | |
1341 | { | |
1342 | long tmp; | |
1343 | ||
1344 | /* | |
1345 | * When downgrading from exclusive to shared ownership, | |
1346 | * anything inside the write-locked region cannot leak | |
1347 | * into the read side. In contrast, anything in the | |
1348 | * read-locked region is ok to be re-ordered into the | |
1349 | * write side. As such, rely on RELEASE semantics. | |
1350 | */ | |
94a9717b | 1351 | DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem); |
5dec94d4 WL |
1352 | tmp = atomic_long_fetch_add_release( |
1353 | -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count); | |
1354 | rwsem_set_reader_owned(sem); | |
1355 | if (tmp & RWSEM_FLAG_WAITERS) | |
1356 | rwsem_downgrade_wake(sem); | |
1357 | } | |
4fc828e2 | 1358 | |
42254105 TG |
1359 | #else /* !CONFIG_PREEMPT_RT */ |
1360 | ||
e17ba59b | 1361 | #define RT_MUTEX_BUILD_MUTEX |
42254105 TG |
1362 | #include "rtmutex.c" |
1363 | ||
1364 | #define rwbase_set_and_save_current_state(state) \ | |
1365 | set_current_state(state) | |
1366 | ||
1367 | #define rwbase_restore_current_state() \ | |
1368 | __set_current_state(TASK_RUNNING) | |
1369 | ||
1370 | #define rwbase_rtmutex_lock_state(rtm, state) \ | |
1371 | __rt_mutex_lock(rtm, state) | |
1372 | ||
1373 | #define rwbase_rtmutex_slowlock_locked(rtm, state) \ | |
add46132 | 1374 | __rt_mutex_slowlock_locked(rtm, NULL, state) |
42254105 TG |
1375 | |
1376 | #define rwbase_rtmutex_unlock(rtm) \ | |
1377 | __rt_mutex_unlock(rtm) | |
1378 | ||
1379 | #define rwbase_rtmutex_trylock(rtm) \ | |
1380 | __rt_mutex_trylock(rtm) | |
1381 | ||
1382 | #define rwbase_signal_pending_state(state, current) \ | |
1383 | signal_pending_state(state, current) | |
1384 | ||
1385 | #define rwbase_schedule() \ | |
1386 | schedule() | |
1387 | ||
1388 | #include "rwbase_rt.c" | |
1389 | ||
15eb7c88 | 1390 | void __init_rwsem(struct rw_semaphore *sem, const char *name, |
42254105 TG |
1391 | struct lock_class_key *key) |
1392 | { | |
15eb7c88 MG |
1393 | init_rwbase_rt(&(sem)->rwbase); |
1394 | ||
1395 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
42254105 TG |
1396 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
1397 | lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); | |
42254105 | 1398 | #endif |
15eb7c88 MG |
1399 | } |
1400 | EXPORT_SYMBOL(__init_rwsem); | |
42254105 TG |
1401 | |
1402 | static inline void __down_read(struct rw_semaphore *sem) | |
1403 | { | |
1404 | rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); | |
1405 | } | |
1406 | ||
1407 | static inline int __down_read_interruptible(struct rw_semaphore *sem) | |
1408 | { | |
1409 | return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE); | |
1410 | } | |
1411 | ||
1412 | static inline int __down_read_killable(struct rw_semaphore *sem) | |
1413 | { | |
1414 | return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE); | |
1415 | } | |
1416 | ||
1417 | static inline int __down_read_trylock(struct rw_semaphore *sem) | |
1418 | { | |
1419 | return rwbase_read_trylock(&sem->rwbase); | |
1420 | } | |
1421 | ||
1422 | static inline void __up_read(struct rw_semaphore *sem) | |
1423 | { | |
1424 | rwbase_read_unlock(&sem->rwbase, TASK_NORMAL); | |
1425 | } | |
1426 | ||
1427 | static inline void __sched __down_write(struct rw_semaphore *sem) | |
1428 | { | |
1429 | rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); | |
1430 | } | |
1431 | ||
1432 | static inline int __sched __down_write_killable(struct rw_semaphore *sem) | |
1433 | { | |
1434 | return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE); | |
1435 | } | |
1436 | ||
1437 | static inline int __down_write_trylock(struct rw_semaphore *sem) | |
1438 | { | |
1439 | return rwbase_write_trylock(&sem->rwbase); | |
1440 | } | |
1441 | ||
1442 | static inline void __up_write(struct rw_semaphore *sem) | |
1443 | { | |
1444 | rwbase_write_unlock(&sem->rwbase); | |
1445 | } | |
1446 | ||
1447 | static inline void __downgrade_write(struct rw_semaphore *sem) | |
1448 | { | |
1449 | rwbase_write_downgrade(&sem->rwbase); | |
1450 | } | |
1451 | ||
1452 | /* Debug stubs for the common API */ | |
1453 | #define DEBUG_RWSEMS_WARN_ON(c, sem) | |
1454 | ||
1455 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | |
1456 | struct task_struct *owner) | |
1457 | { | |
1458 | } | |
1459 | ||
1460 | static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) | |
1461 | { | |
1462 | int count = atomic_read(&sem->rwbase.readers); | |
1463 | ||
1464 | return count < 0 && count != READER_BIAS; | |
1465 | } | |
1466 | ||
1467 | #endif /* CONFIG_PREEMPT_RT */ | |
1468 | ||
c4e05116 IM |
1469 | /* |
1470 | * lock for reading | |
1471 | */ | |
c7af77b5 | 1472 | void __sched down_read(struct rw_semaphore *sem) |
c4e05116 IM |
1473 | { |
1474 | might_sleep(); | |
1475 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1476 | ||
4fe87745 | 1477 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
c4e05116 | 1478 | } |
c4e05116 IM |
1479 | EXPORT_SYMBOL(down_read); |
1480 | ||
31784cff EB |
1481 | int __sched down_read_interruptible(struct rw_semaphore *sem) |
1482 | { | |
1483 | might_sleep(); | |
1484 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1485 | ||
1486 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) { | |
1487 | rwsem_release(&sem->dep_map, _RET_IP_); | |
1488 | return -EINTR; | |
1489 | } | |
1490 | ||
1491 | return 0; | |
1492 | } | |
1493 | EXPORT_SYMBOL(down_read_interruptible); | |
1494 | ||
76f8507f KT |
1495 | int __sched down_read_killable(struct rw_semaphore *sem) |
1496 | { | |
1497 | might_sleep(); | |
1498 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | |
1499 | ||
1500 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { | |
5facae4f | 1501 | rwsem_release(&sem->dep_map, _RET_IP_); |
76f8507f KT |
1502 | return -EINTR; |
1503 | } | |
1504 | ||
76f8507f KT |
1505 | return 0; |
1506 | } | |
76f8507f KT |
1507 | EXPORT_SYMBOL(down_read_killable); |
1508 | ||
c4e05116 IM |
1509 | /* |
1510 | * trylock for reading -- returns 1 if successful, 0 if contention | |
1511 | */ | |
1512 | int down_read_trylock(struct rw_semaphore *sem) | |
1513 | { | |
1514 | int ret = __down_read_trylock(sem); | |
1515 | ||
c7580c1e | 1516 | if (ret == 1) |
c4e05116 IM |
1517 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); |
1518 | return ret; | |
1519 | } | |
c4e05116 IM |
1520 | EXPORT_SYMBOL(down_read_trylock); |
1521 | ||
1522 | /* | |
1523 | * lock for writing | |
1524 | */ | |
c7af77b5 | 1525 | void __sched down_write(struct rw_semaphore *sem) |
c4e05116 IM |
1526 | { |
1527 | might_sleep(); | |
1528 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | |
4fe87745 | 1529 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
c4e05116 | 1530 | } |
c4e05116 IM |
1531 | EXPORT_SYMBOL(down_write); |
1532 | ||
916633a4 MH |
1533 | /* |
1534 | * lock for writing | |
1535 | */ | |
1536 | int __sched down_write_killable(struct rw_semaphore *sem) | |
1537 | { | |
1538 | might_sleep(); | |
1539 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | |
1540 | ||
6cef7ff6 WL |
1541 | if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, |
1542 | __down_write_killable)) { | |
5facae4f | 1543 | rwsem_release(&sem->dep_map, _RET_IP_); |
916633a4 MH |
1544 | return -EINTR; |
1545 | } | |
1546 | ||
916633a4 MH |
1547 | return 0; |
1548 | } | |
916633a4 MH |
1549 | EXPORT_SYMBOL(down_write_killable); |
1550 | ||
c4e05116 IM |
1551 | /* |
1552 | * trylock for writing -- returns 1 if successful, 0 if contention | |
1553 | */ | |
1554 | int down_write_trylock(struct rw_semaphore *sem) | |
1555 | { | |
1556 | int ret = __down_write_trylock(sem); | |
1557 | ||
c7580c1e | 1558 | if (ret == 1) |
428e6ce0 | 1559 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
4fc828e2 | 1560 | |
c4e05116 IM |
1561 | return ret; |
1562 | } | |
c4e05116 IM |
1563 | EXPORT_SYMBOL(down_write_trylock); |
1564 | ||
1565 | /* | |
1566 | * release a read lock | |
1567 | */ | |
1568 | void up_read(struct rw_semaphore *sem) | |
1569 | { | |
5facae4f | 1570 | rwsem_release(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1571 | __up_read(sem); |
1572 | } | |
c4e05116 IM |
1573 | EXPORT_SYMBOL(up_read); |
1574 | ||
1575 | /* | |
1576 | * release a write lock | |
1577 | */ | |
1578 | void up_write(struct rw_semaphore *sem) | |
1579 | { | |
5facae4f | 1580 | rwsem_release(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1581 | __up_write(sem); |
1582 | } | |
c4e05116 IM |
1583 | EXPORT_SYMBOL(up_write); |
1584 | ||
1585 | /* | |
1586 | * downgrade write lock to read lock | |
1587 | */ | |
1588 | void downgrade_write(struct rw_semaphore *sem) | |
1589 | { | |
6419c4af | 1590 | lock_downgrade(&sem->dep_map, _RET_IP_); |
c4e05116 IM |
1591 | __downgrade_write(sem); |
1592 | } | |
c4e05116 | 1593 | EXPORT_SYMBOL(downgrade_write); |
4ea2176d IM |
1594 | |
1595 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
1596 | ||
1597 | void down_read_nested(struct rw_semaphore *sem, int subclass) | |
1598 | { | |
1599 | might_sleep(); | |
1600 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | |
4fe87745 | 1601 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
4ea2176d | 1602 | } |
4ea2176d IM |
1603 | EXPORT_SYMBOL(down_read_nested); |
1604 | ||
0f9368b5 EB |
1605 | int down_read_killable_nested(struct rw_semaphore *sem, int subclass) |
1606 | { | |
1607 | might_sleep(); | |
1608 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | |
1609 | ||
1610 | if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { | |
1611 | rwsem_release(&sem->dep_map, _RET_IP_); | |
1612 | return -EINTR; | |
1613 | } | |
1614 | ||
1615 | return 0; | |
1616 | } | |
1617 | EXPORT_SYMBOL(down_read_killable_nested); | |
1618 | ||
1b963c81 JK |
1619 | void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) |
1620 | { | |
1621 | might_sleep(); | |
1622 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | |
1b963c81 JK |
1623 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
1624 | } | |
1b963c81 JK |
1625 | EXPORT_SYMBOL(_down_write_nest_lock); |
1626 | ||
84759c6d KO |
1627 | void down_read_non_owner(struct rw_semaphore *sem) |
1628 | { | |
1629 | might_sleep(); | |
84759c6d | 1630 | __down_read(sem); |
925b9cd1 | 1631 | __rwsem_set_reader_owned(sem, NULL); |
84759c6d | 1632 | } |
84759c6d KO |
1633 | EXPORT_SYMBOL(down_read_non_owner); |
1634 | ||
4ea2176d IM |
1635 | void down_write_nested(struct rw_semaphore *sem, int subclass) |
1636 | { | |
1637 | might_sleep(); | |
1638 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | |
4fe87745 | 1639 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
4ea2176d | 1640 | } |
4ea2176d IM |
1641 | EXPORT_SYMBOL(down_write_nested); |
1642 | ||
887bddfa AV |
1643 | int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) |
1644 | { | |
1645 | might_sleep(); | |
1646 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | |
1647 | ||
6cef7ff6 WL |
1648 | if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, |
1649 | __down_write_killable)) { | |
5facae4f | 1650 | rwsem_release(&sem->dep_map, _RET_IP_); |
887bddfa AV |
1651 | return -EINTR; |
1652 | } | |
1653 | ||
887bddfa AV |
1654 | return 0; |
1655 | } | |
887bddfa AV |
1656 | EXPORT_SYMBOL(down_write_killable_nested); |
1657 | ||
84759c6d KO |
1658 | void up_read_non_owner(struct rw_semaphore *sem) |
1659 | { | |
94a9717b | 1660 | DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); |
84759c6d KO |
1661 | __up_read(sem); |
1662 | } | |
84759c6d KO |
1663 | EXPORT_SYMBOL(up_read_non_owner); |
1664 | ||
4ea2176d | 1665 | #endif |