Commit | Line | Data |
---|---|---|
e3b3d0f5 | 1 | // SPDX-License-Identifier: GPL-2.0 |
4898e640 PH |
2 | /* |
3 | * Ldisc rw semaphore | |
4 | * | |
5 | * The ldisc semaphore is semantically a rw_semaphore but which enforces | |
6 | * an alternate policy, namely: | |
7 | * 1) Supports lock wait timeouts | |
8 | * 2) Write waiter has priority | |
9 | * 3) Downgrading is not supported | |
10 | * | |
11 | * Implementation notes: | |
12 | * 1) Upper half of semaphore count is a wait count (differs from rwsem | |
13 | * in that rwsem normalizes the upper half to the wait bias) | |
14 | * 2) Lacks overflow checking | |
15 | * | |
16 | * The generic counting was copied and modified from include/asm-generic/rwsem.h | |
17 | * by Paul Mackerras <paulus@samba.org>. | |
18 | * | |
19 | * The scheduling policy was copied and modified from lib/rwsem.c | |
20 | * Written by David Howells (dhowells@redhat.com). | |
21 | * | |
22 | * This implementation incorporates the write lock stealing work of | |
23 | * Michel Lespinasse <walken@google.com>. | |
24 | * | |
25 | * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> | |
4898e640 PH |
26 | */ |
27 | ||
28 | #include <linux/list.h> | |
29 | #include <linux/spinlock.h> | |
30 | #include <linux/atomic.h> | |
31 | #include <linux/tty.h> | |
32 | #include <linux/sched.h> | |
b17b0153 | 33 | #include <linux/sched/debug.h> |
0881e7bd | 34 | #include <linux/sched/task.h> |
4898e640 PH |
35 | |
36 | ||
4898e640 PH |
37 | #if BITS_PER_LONG == 64 |
38 | # define LDSEM_ACTIVE_MASK 0xffffffffL | |
39 | #else | |
40 | # define LDSEM_ACTIVE_MASK 0x0000ffffL | |
41 | #endif | |
42 | ||
43 | #define LDSEM_UNLOCKED 0L | |
44 | #define LDSEM_ACTIVE_BIAS 1L | |
45 | #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) | |
46 | #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS | |
47 | #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) | |
48 | ||
49 | struct ldsem_waiter { | |
50 | struct list_head list; | |
51 | struct task_struct *task; | |
52 | }; | |
53 | ||
4898e640 PH |
54 | /* |
55 | * Initialize an ldsem: | |
56 | */ | |
57 | void __init_ldsem(struct ld_semaphore *sem, const char *name, | |
58 | struct lock_class_key *key) | |
59 | { | |
60 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
61 | /* | |
62 | * Make sure we are not reinitializing a held semaphore: | |
63 | */ | |
64 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
65 | lockdep_init_map(&sem->dep_map, name, key, 0); | |
66 | #endif | |
5fd691af | 67 | atomic_long_set(&sem->count, LDSEM_UNLOCKED); |
4898e640 PH |
68 | sem->wait_readers = 0; |
69 | raw_spin_lock_init(&sem->wait_lock); | |
70 | INIT_LIST_HEAD(&sem->read_wait); | |
71 | INIT_LIST_HEAD(&sem->write_wait); | |
72 | } | |
73 | ||
74 | static void __ldsem_wake_readers(struct ld_semaphore *sem) | |
75 | { | |
76 | struct ldsem_waiter *waiter, *next; | |
77 | struct task_struct *tsk; | |
78 | long adjust, count; | |
79 | ||
5fd691af PZ |
80 | /* |
81 | * Try to grant read locks to all readers on the read wait list. | |
4898e640 PH |
82 | * Note the 'active part' of the count is incremented by |
83 | * the number of readers before waking any processes up. | |
84 | */ | |
85 | adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); | |
5fd691af | 86 | count = atomic_long_add_return(adjust, &sem->count); |
4898e640 PH |
87 | do { |
88 | if (count > 0) | |
89 | break; | |
5fd691af | 90 | if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) |
4898e640 PH |
91 | return; |
92 | } while (1); | |
93 | ||
94 | list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { | |
95 | tsk = waiter->task; | |
952041a8 | 96 | smp_store_release(&waiter->task, NULL); |
4898e640 PH |
97 | wake_up_process(tsk); |
98 | put_task_struct(tsk); | |
99 | } | |
100 | INIT_LIST_HEAD(&sem->read_wait); | |
101 | sem->wait_readers = 0; | |
102 | } | |
103 | ||
104 | static inline int writer_trylock(struct ld_semaphore *sem) | |
105 | { | |
5fd691af PZ |
106 | /* |
107 | * Only wake this writer if the active part of the count can be | |
4898e640 PH |
108 | * transitioned from 0 -> 1 |
109 | */ | |
5fd691af | 110 | long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); |
4898e640 PH |
111 | do { |
112 | if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) | |
113 | return 1; | |
5fd691af | 114 | if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) |
4898e640 PH |
115 | return 0; |
116 | } while (1); | |
117 | } | |
118 | ||
119 | static void __ldsem_wake_writer(struct ld_semaphore *sem) | |
120 | { | |
121 | struct ldsem_waiter *waiter; | |
122 | ||
123 | waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); | |
124 | wake_up_process(waiter->task); | |
125 | } | |
126 | ||
127 | /* | |
128 | * handle the lock release when processes blocked on it that can now run | |
129 | * - if we come here from up_xxxx(), then: | |
130 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) | |
131 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) | |
132 | * - the spinlock must be held by the caller | |
133 | * - woken process blocks are discarded from the list after having task zeroed | |
134 | */ | |
135 | static void __ldsem_wake(struct ld_semaphore *sem) | |
136 | { | |
137 | if (!list_empty(&sem->write_wait)) | |
138 | __ldsem_wake_writer(sem); | |
139 | else if (!list_empty(&sem->read_wait)) | |
140 | __ldsem_wake_readers(sem); | |
141 | } | |
142 | ||
143 | static void ldsem_wake(struct ld_semaphore *sem) | |
144 | { | |
145 | unsigned long flags; | |
146 | ||
147 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
148 | __ldsem_wake(sem); | |
149 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
150 | } | |
151 | ||
152 | /* | |
153 | * wait for the read lock to be granted | |
154 | */ | |
155 | static struct ld_semaphore __sched * | |
156 | down_read_failed(struct ld_semaphore *sem, long count, long timeout) | |
157 | { | |
158 | struct ldsem_waiter waiter; | |
4898e640 PH |
159 | long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; |
160 | ||
161 | /* set up my own style of waitqueue */ | |
162 | raw_spin_lock_irq(&sem->wait_lock); | |
163 | ||
5fd691af PZ |
164 | /* |
165 | * Try to reverse the lock attempt but if the count has changed | |
4898e640 | 166 | * so that reversing fails, check if there are are no waiters, |
5fd691af PZ |
167 | * and early-out if not |
168 | */ | |
4898e640 | 169 | do { |
5fd691af PZ |
170 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { |
171 | count += adjust; | |
4898e640 | 172 | break; |
5fd691af | 173 | } |
4898e640 PH |
174 | if (count > 0) { |
175 | raw_spin_unlock_irq(&sem->wait_lock); | |
176 | return sem; | |
177 | } | |
178 | } while (1); | |
179 | ||
180 | list_add_tail(&waiter.list, &sem->read_wait); | |
181 | sem->wait_readers++; | |
182 | ||
5376f2e7 DB |
183 | waiter.task = current; |
184 | get_task_struct(current); | |
4898e640 PH |
185 | |
186 | /* if there are no active locks, wake the new lock owner(s) */ | |
187 | if ((count & LDSEM_ACTIVE_MASK) == 0) | |
188 | __ldsem_wake(sem); | |
189 | ||
190 | raw_spin_unlock_irq(&sem->wait_lock); | |
191 | ||
192 | /* wait to be given the lock */ | |
193 | for (;;) { | |
642fa448 | 194 | set_current_state(TASK_UNINTERRUPTIBLE); |
4898e640 | 195 | |
952041a8 | 196 | if (!smp_load_acquire(&waiter.task)) |
4898e640 PH |
197 | break; |
198 | if (!timeout) | |
199 | break; | |
200 | timeout = schedule_timeout(timeout); | |
201 | } | |
202 | ||
642fa448 | 203 | __set_current_state(TASK_RUNNING); |
4898e640 PH |
204 | |
205 | if (!timeout) { | |
5fd691af PZ |
206 | /* |
207 | * Lock timed out but check if this task was just | |
4898e640 | 208 | * granted lock ownership - if so, pretend there |
5fd691af PZ |
209 | * was no timeout; otherwise, cleanup lock wait. |
210 | */ | |
4898e640 PH |
211 | raw_spin_lock_irq(&sem->wait_lock); |
212 | if (waiter.task) { | |
5fd691af | 213 | atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); |
2f588cee | 214 | sem->wait_readers--; |
4898e640 PH |
215 | list_del(&waiter.list); |
216 | raw_spin_unlock_irq(&sem->wait_lock); | |
217 | put_task_struct(waiter.task); | |
218 | return NULL; | |
219 | } | |
220 | raw_spin_unlock_irq(&sem->wait_lock); | |
221 | } | |
222 | ||
223 | return sem; | |
224 | } | |
225 | ||
226 | /* | |
227 | * wait for the write lock to be granted | |
228 | */ | |
229 | static struct ld_semaphore __sched * | |
230 | down_write_failed(struct ld_semaphore *sem, long count, long timeout) | |
231 | { | |
232 | struct ldsem_waiter waiter; | |
4898e640 PH |
233 | long adjust = -LDSEM_ACTIVE_BIAS; |
234 | int locked = 0; | |
235 | ||
236 | /* set up my own style of waitqueue */ | |
237 | raw_spin_lock_irq(&sem->wait_lock); | |
238 | ||
5fd691af PZ |
239 | /* |
240 | * Try to reverse the lock attempt but if the count has changed | |
4898e640 | 241 | * so that reversing fails, check if the lock is now owned, |
5fd691af PZ |
242 | * and early-out if so. |
243 | */ | |
4898e640 | 244 | do { |
5fd691af | 245 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) |
4898e640 PH |
246 | break; |
247 | if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { | |
248 | raw_spin_unlock_irq(&sem->wait_lock); | |
249 | return sem; | |
250 | } | |
251 | } while (1); | |
252 | ||
253 | list_add_tail(&waiter.list, &sem->write_wait); | |
254 | ||
5376f2e7 | 255 | waiter.task = current; |
4898e640 | 256 | |
642fa448 | 257 | set_current_state(TASK_UNINTERRUPTIBLE); |
4898e640 PH |
258 | for (;;) { |
259 | if (!timeout) | |
260 | break; | |
261 | raw_spin_unlock_irq(&sem->wait_lock); | |
262 | timeout = schedule_timeout(timeout); | |
263 | raw_spin_lock_irq(&sem->wait_lock); | |
642fa448 | 264 | set_current_state(TASK_UNINTERRUPTIBLE); |
f9ce5ccf GKH |
265 | locked = writer_trylock(sem); |
266 | if (locked) | |
4898e640 PH |
267 | break; |
268 | } | |
269 | ||
270 | if (!locked) | |
5fd691af | 271 | atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); |
4898e640 | 272 | list_del(&waiter.list); |
231f8fd0 DS |
273 | |
274 | /* | |
275 | * In case of timeout, wake up every reader who gave the right of way | |
276 | * to writer. Prevent separation readers into two groups: | |
277 | * one that helds semaphore and another that sleeps. | |
278 | * (in case of no contention with a writer) | |
279 | */ | |
280 | if (!locked && list_empty(&sem->write_wait)) | |
281 | __ldsem_wake_readers(sem); | |
282 | ||
4898e640 PH |
283 | raw_spin_unlock_irq(&sem->wait_lock); |
284 | ||
642fa448 | 285 | __set_current_state(TASK_RUNNING); |
4898e640 PH |
286 | |
287 | /* lock wait may have timed out */ | |
288 | if (!locked) | |
289 | return NULL; | |
290 | return sem; | |
291 | } | |
292 | ||
293 | ||
294 | ||
fc0285f2 | 295 | static int __ldsem_down_read_nested(struct ld_semaphore *sem, |
4898e640 PH |
296 | int subclass, long timeout) |
297 | { | |
298 | long count; | |
299 | ||
c18ad0cf | 300 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); |
4898e640 | 301 | |
5fd691af | 302 | count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); |
4898e640 | 303 | if (count <= 0) { |
c18ad0cf | 304 | lock_contended(&sem->dep_map, _RET_IP_); |
4898e640 | 305 | if (!down_read_failed(sem, count, timeout)) { |
c18ad0cf | 306 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 PH |
307 | return 0; |
308 | } | |
309 | } | |
c18ad0cf | 310 | lock_acquired(&sem->dep_map, _RET_IP_); |
4898e640 PH |
311 | return 1; |
312 | } | |
313 | ||
5ef6504e | 314 | static int __ldsem_down_write_nested(struct ld_semaphore *sem, |
4898e640 PH |
315 | int subclass, long timeout) |
316 | { | |
317 | long count; | |
318 | ||
c18ad0cf | 319 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); |
4898e640 | 320 | |
5fd691af | 321 | count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); |
4898e640 | 322 | if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { |
c18ad0cf | 323 | lock_contended(&sem->dep_map, _RET_IP_); |
4898e640 | 324 | if (!down_write_failed(sem, count, timeout)) { |
c18ad0cf | 325 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 PH |
326 | return 0; |
327 | } | |
328 | } | |
c18ad0cf | 329 | lock_acquired(&sem->dep_map, _RET_IP_); |
4898e640 PH |
330 | return 1; |
331 | } | |
332 | ||
333 | ||
334 | /* | |
335 | * lock for reading -- returns 1 if successful, 0 if timed out | |
336 | */ | |
337 | int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) | |
338 | { | |
339 | might_sleep(); | |
340 | return __ldsem_down_read_nested(sem, 0, timeout); | |
341 | } | |
342 | ||
343 | /* | |
344 | * trylock for reading -- returns 1 if successful, 0 if contention | |
345 | */ | |
346 | int ldsem_down_read_trylock(struct ld_semaphore *sem) | |
347 | { | |
5fd691af | 348 | long count = atomic_long_read(&sem->count); |
4898e640 PH |
349 | |
350 | while (count >= 0) { | |
5fd691af | 351 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { |
c18ad0cf PZ |
352 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); |
353 | lock_acquired(&sem->dep_map, _RET_IP_); | |
4898e640 PH |
354 | return 1; |
355 | } | |
356 | } | |
357 | return 0; | |
358 | } | |
359 | ||
360 | /* | |
361 | * lock for writing -- returns 1 if successful, 0 if timed out | |
362 | */ | |
363 | int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) | |
364 | { | |
365 | might_sleep(); | |
366 | return __ldsem_down_write_nested(sem, 0, timeout); | |
367 | } | |
368 | ||
369 | /* | |
370 | * trylock for writing -- returns 1 if successful, 0 if contention | |
371 | */ | |
372 | int ldsem_down_write_trylock(struct ld_semaphore *sem) | |
373 | { | |
5fd691af | 374 | long count = atomic_long_read(&sem->count); |
4898e640 PH |
375 | |
376 | while ((count & LDSEM_ACTIVE_MASK) == 0) { | |
5fd691af | 377 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { |
c18ad0cf PZ |
378 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
379 | lock_acquired(&sem->dep_map, _RET_IP_); | |
4898e640 PH |
380 | return 1; |
381 | } | |
382 | } | |
383 | return 0; | |
384 | } | |
385 | ||
386 | /* | |
387 | * release a read lock | |
388 | */ | |
389 | void ldsem_up_read(struct ld_semaphore *sem) | |
390 | { | |
391 | long count; | |
392 | ||
c18ad0cf | 393 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 | 394 | |
5fd691af | 395 | count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); |
4898e640 PH |
396 | if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) |
397 | ldsem_wake(sem); | |
398 | } | |
399 | ||
400 | /* | |
401 | * release a write lock | |
402 | */ | |
403 | void ldsem_up_write(struct ld_semaphore *sem) | |
404 | { | |
405 | long count; | |
406 | ||
c18ad0cf | 407 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 | 408 | |
5fd691af | 409 | count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); |
4898e640 PH |
410 | if (count < 0) |
411 | ldsem_wake(sem); | |
412 | } | |
413 | ||
414 | ||
415 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
416 | ||
417 | int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) | |
418 | { | |
419 | might_sleep(); | |
420 | return __ldsem_down_read_nested(sem, subclass, timeout); | |
421 | } | |
422 | ||
423 | int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, | |
424 | long timeout) | |
425 | { | |
426 | might_sleep(); | |
427 | return __ldsem_down_write_nested(sem, subclass, timeout); | |
428 | } | |
429 | ||
430 | #endif |