Commit | Line | Data |
---|---|---|
e3b3d0f5 | 1 | // SPDX-License-Identifier: GPL-2.0 |
4898e640 PH |
2 | /* |
3 | * Ldisc rw semaphore | |
4 | * | |
5 | * The ldisc semaphore is semantically a rw_semaphore but which enforces | |
6 | * an alternate policy, namely: | |
7 | * 1) Supports lock wait timeouts | |
8 | * 2) Write waiter has priority | |
9 | * 3) Downgrading is not supported | |
10 | * | |
11 | * Implementation notes: | |
12 | * 1) Upper half of semaphore count is a wait count (differs from rwsem | |
13 | * in that rwsem normalizes the upper half to the wait bias) | |
14 | * 2) Lacks overflow checking | |
15 | * | |
16 | * The generic counting was copied and modified from include/asm-generic/rwsem.h | |
17 | * by Paul Mackerras <paulus@samba.org>. | |
18 | * | |
19 | * The scheduling policy was copied and modified from lib/rwsem.c | |
20 | * Written by David Howells (dhowells@redhat.com). | |
21 | * | |
22 | * This implementation incorporates the write lock stealing work of | |
23 | * Michel Lespinasse <walken@google.com>. | |
24 | * | |
25 | * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> | |
4898e640 PH |
26 | */ |
27 | ||
28 | #include <linux/list.h> | |
29 | #include <linux/spinlock.h> | |
30 | #include <linux/atomic.h> | |
31 | #include <linux/tty.h> | |
32 | #include <linux/sched.h> | |
b17b0153 | 33 | #include <linux/sched/debug.h> |
0881e7bd | 34 | #include <linux/sched/task.h> |
4898e640 PH |
35 | |
36 | ||
37 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
38 | # define __acq(l, s, t, r, c, n, i) \ | |
39 | lock_acquire(&(l)->dep_map, s, t, r, c, n, i) | |
40 | # define __rel(l, n, i) \ | |
41 | lock_release(&(l)->dep_map, n, i) | |
fb9edbe9 ON |
42 | #define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i) |
43 | #define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i) | |
44 | #define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i) | |
45 | #define lockdep_release(l, n, i) __rel(l, n, i) | |
4898e640 PH |
46 | #else |
47 | # define lockdep_acquire(l, s, t, i) do { } while (0) | |
48 | # define lockdep_acquire_nest(l, s, t, n, i) do { } while (0) | |
49 | # define lockdep_acquire_read(l, s, t, i) do { } while (0) | |
50 | # define lockdep_release(l, n, i) do { } while (0) | |
51 | #endif | |
52 | ||
53 | #ifdef CONFIG_LOCK_STAT | |
54 | # define lock_stat(_lock, stat) lock_##stat(&(_lock)->dep_map, _RET_IP_) | |
55 | #else | |
56 | # define lock_stat(_lock, stat) do { } while (0) | |
57 | #endif | |
58 | ||
59 | ||
60 | #if BITS_PER_LONG == 64 | |
61 | # define LDSEM_ACTIVE_MASK 0xffffffffL | |
62 | #else | |
63 | # define LDSEM_ACTIVE_MASK 0x0000ffffL | |
64 | #endif | |
65 | ||
66 | #define LDSEM_UNLOCKED 0L | |
67 | #define LDSEM_ACTIVE_BIAS 1L | |
68 | #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) | |
69 | #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS | |
70 | #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) | |
71 | ||
72 | struct ldsem_waiter { | |
73 | struct list_head list; | |
74 | struct task_struct *task; | |
75 | }; | |
76 | ||
4898e640 PH |
77 | /* |
78 | * Initialize an ldsem: | |
79 | */ | |
80 | void __init_ldsem(struct ld_semaphore *sem, const char *name, | |
81 | struct lock_class_key *key) | |
82 | { | |
83 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
84 | /* | |
85 | * Make sure we are not reinitializing a held semaphore: | |
86 | */ | |
87 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
88 | lockdep_init_map(&sem->dep_map, name, key, 0); | |
89 | #endif | |
5fd691af | 90 | atomic_long_set(&sem->count, LDSEM_UNLOCKED); |
4898e640 PH |
91 | sem->wait_readers = 0; |
92 | raw_spin_lock_init(&sem->wait_lock); | |
93 | INIT_LIST_HEAD(&sem->read_wait); | |
94 | INIT_LIST_HEAD(&sem->write_wait); | |
95 | } | |
96 | ||
97 | static void __ldsem_wake_readers(struct ld_semaphore *sem) | |
98 | { | |
99 | struct ldsem_waiter *waiter, *next; | |
100 | struct task_struct *tsk; | |
101 | long adjust, count; | |
102 | ||
5fd691af PZ |
103 | /* |
104 | * Try to grant read locks to all readers on the read wait list. | |
4898e640 PH |
105 | * Note the 'active part' of the count is incremented by |
106 | * the number of readers before waking any processes up. | |
107 | */ | |
108 | adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); | |
5fd691af | 109 | count = atomic_long_add_return(adjust, &sem->count); |
4898e640 PH |
110 | do { |
111 | if (count > 0) | |
112 | break; | |
5fd691af | 113 | if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) |
4898e640 PH |
114 | return; |
115 | } while (1); | |
116 | ||
117 | list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { | |
118 | tsk = waiter->task; | |
119 | smp_mb(); | |
120 | waiter->task = NULL; | |
121 | wake_up_process(tsk); | |
122 | put_task_struct(tsk); | |
123 | } | |
124 | INIT_LIST_HEAD(&sem->read_wait); | |
125 | sem->wait_readers = 0; | |
126 | } | |
127 | ||
128 | static inline int writer_trylock(struct ld_semaphore *sem) | |
129 | { | |
5fd691af PZ |
130 | /* |
131 | * Only wake this writer if the active part of the count can be | |
4898e640 PH |
132 | * transitioned from 0 -> 1 |
133 | */ | |
5fd691af | 134 | long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); |
4898e640 PH |
135 | do { |
136 | if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) | |
137 | return 1; | |
5fd691af | 138 | if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) |
4898e640 PH |
139 | return 0; |
140 | } while (1); | |
141 | } | |
142 | ||
143 | static void __ldsem_wake_writer(struct ld_semaphore *sem) | |
144 | { | |
145 | struct ldsem_waiter *waiter; | |
146 | ||
147 | waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); | |
148 | wake_up_process(waiter->task); | |
149 | } | |
150 | ||
151 | /* | |
152 | * handle the lock release when processes blocked on it that can now run | |
153 | * - if we come here from up_xxxx(), then: | |
154 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) | |
155 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) | |
156 | * - the spinlock must be held by the caller | |
157 | * - woken process blocks are discarded from the list after having task zeroed | |
158 | */ | |
159 | static void __ldsem_wake(struct ld_semaphore *sem) | |
160 | { | |
161 | if (!list_empty(&sem->write_wait)) | |
162 | __ldsem_wake_writer(sem); | |
163 | else if (!list_empty(&sem->read_wait)) | |
164 | __ldsem_wake_readers(sem); | |
165 | } | |
166 | ||
167 | static void ldsem_wake(struct ld_semaphore *sem) | |
168 | { | |
169 | unsigned long flags; | |
170 | ||
171 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
172 | __ldsem_wake(sem); | |
173 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
174 | } | |
175 | ||
176 | /* | |
177 | * wait for the read lock to be granted | |
178 | */ | |
179 | static struct ld_semaphore __sched * | |
180 | down_read_failed(struct ld_semaphore *sem, long count, long timeout) | |
181 | { | |
182 | struct ldsem_waiter waiter; | |
4898e640 PH |
183 | long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; |
184 | ||
185 | /* set up my own style of waitqueue */ | |
186 | raw_spin_lock_irq(&sem->wait_lock); | |
187 | ||
5fd691af PZ |
188 | /* |
189 | * Try to reverse the lock attempt but if the count has changed | |
4898e640 | 190 | * so that reversing fails, check if there are are no waiters, |
5fd691af PZ |
191 | * and early-out if not |
192 | */ | |
4898e640 | 193 | do { |
5fd691af PZ |
194 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { |
195 | count += adjust; | |
4898e640 | 196 | break; |
5fd691af | 197 | } |
4898e640 PH |
198 | if (count > 0) { |
199 | raw_spin_unlock_irq(&sem->wait_lock); | |
200 | return sem; | |
201 | } | |
202 | } while (1); | |
203 | ||
204 | list_add_tail(&waiter.list, &sem->read_wait); | |
205 | sem->wait_readers++; | |
206 | ||
5376f2e7 DB |
207 | waiter.task = current; |
208 | get_task_struct(current); | |
4898e640 PH |
209 | |
210 | /* if there are no active locks, wake the new lock owner(s) */ | |
211 | if ((count & LDSEM_ACTIVE_MASK) == 0) | |
212 | __ldsem_wake(sem); | |
213 | ||
214 | raw_spin_unlock_irq(&sem->wait_lock); | |
215 | ||
216 | /* wait to be given the lock */ | |
217 | for (;;) { | |
642fa448 | 218 | set_current_state(TASK_UNINTERRUPTIBLE); |
4898e640 PH |
219 | |
220 | if (!waiter.task) | |
221 | break; | |
222 | if (!timeout) | |
223 | break; | |
224 | timeout = schedule_timeout(timeout); | |
225 | } | |
226 | ||
642fa448 | 227 | __set_current_state(TASK_RUNNING); |
4898e640 PH |
228 | |
229 | if (!timeout) { | |
5fd691af PZ |
230 | /* |
231 | * Lock timed out but check if this task was just | |
4898e640 | 232 | * granted lock ownership - if so, pretend there |
5fd691af PZ |
233 | * was no timeout; otherwise, cleanup lock wait. |
234 | */ | |
4898e640 PH |
235 | raw_spin_lock_irq(&sem->wait_lock); |
236 | if (waiter.task) { | |
5fd691af | 237 | atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); |
4898e640 PH |
238 | list_del(&waiter.list); |
239 | raw_spin_unlock_irq(&sem->wait_lock); | |
240 | put_task_struct(waiter.task); | |
241 | return NULL; | |
242 | } | |
243 | raw_spin_unlock_irq(&sem->wait_lock); | |
244 | } | |
245 | ||
246 | return sem; | |
247 | } | |
248 | ||
249 | /* | |
250 | * wait for the write lock to be granted | |
251 | */ | |
252 | static struct ld_semaphore __sched * | |
253 | down_write_failed(struct ld_semaphore *sem, long count, long timeout) | |
254 | { | |
255 | struct ldsem_waiter waiter; | |
4898e640 PH |
256 | long adjust = -LDSEM_ACTIVE_BIAS; |
257 | int locked = 0; | |
258 | ||
259 | /* set up my own style of waitqueue */ | |
260 | raw_spin_lock_irq(&sem->wait_lock); | |
261 | ||
5fd691af PZ |
262 | /* |
263 | * Try to reverse the lock attempt but if the count has changed | |
4898e640 | 264 | * so that reversing fails, check if the lock is now owned, |
5fd691af PZ |
265 | * and early-out if so. |
266 | */ | |
4898e640 | 267 | do { |
5fd691af | 268 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) |
4898e640 PH |
269 | break; |
270 | if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { | |
271 | raw_spin_unlock_irq(&sem->wait_lock); | |
272 | return sem; | |
273 | } | |
274 | } while (1); | |
275 | ||
276 | list_add_tail(&waiter.list, &sem->write_wait); | |
277 | ||
5376f2e7 | 278 | waiter.task = current; |
4898e640 | 279 | |
642fa448 | 280 | set_current_state(TASK_UNINTERRUPTIBLE); |
4898e640 PH |
281 | for (;;) { |
282 | if (!timeout) | |
283 | break; | |
284 | raw_spin_unlock_irq(&sem->wait_lock); | |
285 | timeout = schedule_timeout(timeout); | |
286 | raw_spin_lock_irq(&sem->wait_lock); | |
642fa448 | 287 | set_current_state(TASK_UNINTERRUPTIBLE); |
f9ce5ccf GKH |
288 | locked = writer_trylock(sem); |
289 | if (locked) | |
4898e640 PH |
290 | break; |
291 | } | |
292 | ||
293 | if (!locked) | |
5fd691af | 294 | atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); |
4898e640 PH |
295 | list_del(&waiter.list); |
296 | raw_spin_unlock_irq(&sem->wait_lock); | |
297 | ||
642fa448 | 298 | __set_current_state(TASK_RUNNING); |
4898e640 PH |
299 | |
300 | /* lock wait may have timed out */ | |
301 | if (!locked) | |
302 | return NULL; | |
303 | return sem; | |
304 | } | |
305 | ||
306 | ||
307 | ||
fc0285f2 | 308 | static int __ldsem_down_read_nested(struct ld_semaphore *sem, |
4898e640 PH |
309 | int subclass, long timeout) |
310 | { | |
311 | long count; | |
312 | ||
313 | lockdep_acquire_read(sem, subclass, 0, _RET_IP_); | |
314 | ||
5fd691af | 315 | count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); |
4898e640 PH |
316 | if (count <= 0) { |
317 | lock_stat(sem, contended); | |
318 | if (!down_read_failed(sem, count, timeout)) { | |
319 | lockdep_release(sem, 1, _RET_IP_); | |
320 | return 0; | |
321 | } | |
322 | } | |
323 | lock_stat(sem, acquired); | |
324 | return 1; | |
325 | } | |
326 | ||
5ef6504e | 327 | static int __ldsem_down_write_nested(struct ld_semaphore *sem, |
4898e640 PH |
328 | int subclass, long timeout) |
329 | { | |
330 | long count; | |
331 | ||
332 | lockdep_acquire(sem, subclass, 0, _RET_IP_); | |
333 | ||
5fd691af | 334 | count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); |
4898e640 PH |
335 | if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { |
336 | lock_stat(sem, contended); | |
337 | if (!down_write_failed(sem, count, timeout)) { | |
338 | lockdep_release(sem, 1, _RET_IP_); | |
339 | return 0; | |
340 | } | |
341 | } | |
342 | lock_stat(sem, acquired); | |
343 | return 1; | |
344 | } | |
345 | ||
346 | ||
347 | /* | |
348 | * lock for reading -- returns 1 if successful, 0 if timed out | |
349 | */ | |
350 | int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) | |
351 | { | |
352 | might_sleep(); | |
353 | return __ldsem_down_read_nested(sem, 0, timeout); | |
354 | } | |
355 | ||
356 | /* | |
357 | * trylock for reading -- returns 1 if successful, 0 if contention | |
358 | */ | |
359 | int ldsem_down_read_trylock(struct ld_semaphore *sem) | |
360 | { | |
5fd691af | 361 | long count = atomic_long_read(&sem->count); |
4898e640 PH |
362 | |
363 | while (count >= 0) { | |
5fd691af | 364 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { |
4898e640 PH |
365 | lockdep_acquire_read(sem, 0, 1, _RET_IP_); |
366 | lock_stat(sem, acquired); | |
367 | return 1; | |
368 | } | |
369 | } | |
370 | return 0; | |
371 | } | |
372 | ||
373 | /* | |
374 | * lock for writing -- returns 1 if successful, 0 if timed out | |
375 | */ | |
376 | int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) | |
377 | { | |
378 | might_sleep(); | |
379 | return __ldsem_down_write_nested(sem, 0, timeout); | |
380 | } | |
381 | ||
382 | /* | |
383 | * trylock for writing -- returns 1 if successful, 0 if contention | |
384 | */ | |
385 | int ldsem_down_write_trylock(struct ld_semaphore *sem) | |
386 | { | |
5fd691af | 387 | long count = atomic_long_read(&sem->count); |
4898e640 PH |
388 | |
389 | while ((count & LDSEM_ACTIVE_MASK) == 0) { | |
5fd691af | 390 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { |
4898e640 PH |
391 | lockdep_acquire(sem, 0, 1, _RET_IP_); |
392 | lock_stat(sem, acquired); | |
393 | return 1; | |
394 | } | |
395 | } | |
396 | return 0; | |
397 | } | |
398 | ||
399 | /* | |
400 | * release a read lock | |
401 | */ | |
402 | void ldsem_up_read(struct ld_semaphore *sem) | |
403 | { | |
404 | long count; | |
405 | ||
406 | lockdep_release(sem, 1, _RET_IP_); | |
407 | ||
5fd691af | 408 | count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); |
4898e640 PH |
409 | if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) |
410 | ldsem_wake(sem); | |
411 | } | |
412 | ||
413 | /* | |
414 | * release a write lock | |
415 | */ | |
416 | void ldsem_up_write(struct ld_semaphore *sem) | |
417 | { | |
418 | long count; | |
419 | ||
420 | lockdep_release(sem, 1, _RET_IP_); | |
421 | ||
5fd691af | 422 | count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); |
4898e640 PH |
423 | if (count < 0) |
424 | ldsem_wake(sem); | |
425 | } | |
426 | ||
427 | ||
428 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
429 | ||
430 | int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) | |
431 | { | |
432 | might_sleep(); | |
433 | return __ldsem_down_read_nested(sem, subclass, timeout); | |
434 | } | |
435 | ||
436 | int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, | |
437 | long timeout) | |
438 | { | |
439 | might_sleep(); | |
440 | return __ldsem_down_write_nested(sem, subclass, timeout); | |
441 | } | |
442 | ||
443 | #endif |