Commit | Line | Data |
---|---|---|
e3b3d0f5 | 1 | // SPDX-License-Identifier: GPL-2.0 |
4898e640 PH |
2 | /* |
3 | * Ldisc rw semaphore | |
4 | * | |
5 | * The ldisc semaphore is semantically a rw_semaphore but which enforces | |
6 | * an alternate policy, namely: | |
7 | * 1) Supports lock wait timeouts | |
8 | * 2) Write waiter has priority | |
9 | * 3) Downgrading is not supported | |
10 | * | |
11 | * Implementation notes: | |
12 | * 1) Upper half of semaphore count is a wait count (differs from rwsem | |
13 | * in that rwsem normalizes the upper half to the wait bias) | |
14 | * 2) Lacks overflow checking | |
15 | * | |
16 | * The generic counting was copied and modified from include/asm-generic/rwsem.h | |
17 | * by Paul Mackerras <paulus@samba.org>. | |
18 | * | |
19 | * The scheduling policy was copied and modified from lib/rwsem.c | |
20 | * Written by David Howells (dhowells@redhat.com). | |
21 | * | |
22 | * This implementation incorporates the write lock stealing work of | |
23 | * Michel Lespinasse <walken@google.com>. | |
24 | * | |
25 | * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> | |
4898e640 PH |
26 | */ |
27 | ||
28 | #include <linux/list.h> | |
29 | #include <linux/spinlock.h> | |
30 | #include <linux/atomic.h> | |
31 | #include <linux/tty.h> | |
32 | #include <linux/sched.h> | |
b17b0153 | 33 | #include <linux/sched/debug.h> |
0881e7bd | 34 | #include <linux/sched/task.h> |
4898e640 PH |
35 | |
36 | ||
4898e640 PH |
37 | #if BITS_PER_LONG == 64 |
38 | # define LDSEM_ACTIVE_MASK 0xffffffffL | |
39 | #else | |
40 | # define LDSEM_ACTIVE_MASK 0x0000ffffL | |
41 | #endif | |
42 | ||
43 | #define LDSEM_UNLOCKED 0L | |
44 | #define LDSEM_ACTIVE_BIAS 1L | |
45 | #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) | |
46 | #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS | |
47 | #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) | |
48 | ||
49 | struct ldsem_waiter { | |
50 | struct list_head list; | |
51 | struct task_struct *task; | |
52 | }; | |
53 | ||
4898e640 PH |
54 | /* |
55 | * Initialize an ldsem: | |
56 | */ | |
57 | void __init_ldsem(struct ld_semaphore *sem, const char *name, | |
58 | struct lock_class_key *key) | |
59 | { | |
60 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
61 | /* | |
62 | * Make sure we are not reinitializing a held semaphore: | |
63 | */ | |
64 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
65 | lockdep_init_map(&sem->dep_map, name, key, 0); | |
66 | #endif | |
5fd691af | 67 | atomic_long_set(&sem->count, LDSEM_UNLOCKED); |
4898e640 PH |
68 | sem->wait_readers = 0; |
69 | raw_spin_lock_init(&sem->wait_lock); | |
70 | INIT_LIST_HEAD(&sem->read_wait); | |
71 | INIT_LIST_HEAD(&sem->write_wait); | |
72 | } | |
73 | ||
74 | static void __ldsem_wake_readers(struct ld_semaphore *sem) | |
75 | { | |
76 | struct ldsem_waiter *waiter, *next; | |
77 | struct task_struct *tsk; | |
78 | long adjust, count; | |
79 | ||
5fd691af PZ |
80 | /* |
81 | * Try to grant read locks to all readers on the read wait list. | |
4898e640 PH |
82 | * Note the 'active part' of the count is incremented by |
83 | * the number of readers before waking any processes up. | |
84 | */ | |
85 | adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); | |
5fd691af | 86 | count = atomic_long_add_return(adjust, &sem->count); |
4898e640 PH |
87 | do { |
88 | if (count > 0) | |
89 | break; | |
5fd691af | 90 | if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) |
4898e640 PH |
91 | return; |
92 | } while (1); | |
93 | ||
94 | list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { | |
95 | tsk = waiter->task; | |
96 | smp_mb(); | |
97 | waiter->task = NULL; | |
98 | wake_up_process(tsk); | |
99 | put_task_struct(tsk); | |
100 | } | |
101 | INIT_LIST_HEAD(&sem->read_wait); | |
102 | sem->wait_readers = 0; | |
103 | } | |
104 | ||
105 | static inline int writer_trylock(struct ld_semaphore *sem) | |
106 | { | |
5fd691af PZ |
107 | /* |
108 | * Only wake this writer if the active part of the count can be | |
4898e640 PH |
109 | * transitioned from 0 -> 1 |
110 | */ | |
5fd691af | 111 | long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); |
4898e640 PH |
112 | do { |
113 | if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) | |
114 | return 1; | |
5fd691af | 115 | if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) |
4898e640 PH |
116 | return 0; |
117 | } while (1); | |
118 | } | |
119 | ||
120 | static void __ldsem_wake_writer(struct ld_semaphore *sem) | |
121 | { | |
122 | struct ldsem_waiter *waiter; | |
123 | ||
124 | waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); | |
125 | wake_up_process(waiter->task); | |
126 | } | |
127 | ||
128 | /* | |
129 | * handle the lock release when processes blocked on it that can now run | |
130 | * - if we come here from up_xxxx(), then: | |
131 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) | |
132 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) | |
133 | * - the spinlock must be held by the caller | |
134 | * - woken process blocks are discarded from the list after having task zeroed | |
135 | */ | |
136 | static void __ldsem_wake(struct ld_semaphore *sem) | |
137 | { | |
138 | if (!list_empty(&sem->write_wait)) | |
139 | __ldsem_wake_writer(sem); | |
140 | else if (!list_empty(&sem->read_wait)) | |
141 | __ldsem_wake_readers(sem); | |
142 | } | |
143 | ||
144 | static void ldsem_wake(struct ld_semaphore *sem) | |
145 | { | |
146 | unsigned long flags; | |
147 | ||
148 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
149 | __ldsem_wake(sem); | |
150 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
151 | } | |
152 | ||
153 | /* | |
154 | * wait for the read lock to be granted | |
155 | */ | |
156 | static struct ld_semaphore __sched * | |
157 | down_read_failed(struct ld_semaphore *sem, long count, long timeout) | |
158 | { | |
159 | struct ldsem_waiter waiter; | |
4898e640 PH |
160 | long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; |
161 | ||
162 | /* set up my own style of waitqueue */ | |
163 | raw_spin_lock_irq(&sem->wait_lock); | |
164 | ||
5fd691af PZ |
165 | /* |
166 | * Try to reverse the lock attempt but if the count has changed | |
4898e640 | 167 | * so that reversing fails, check if there are are no waiters, |
5fd691af PZ |
168 | * and early-out if not |
169 | */ | |
4898e640 | 170 | do { |
5fd691af PZ |
171 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { |
172 | count += adjust; | |
4898e640 | 173 | break; |
5fd691af | 174 | } |
4898e640 PH |
175 | if (count > 0) { |
176 | raw_spin_unlock_irq(&sem->wait_lock); | |
177 | return sem; | |
178 | } | |
179 | } while (1); | |
180 | ||
181 | list_add_tail(&waiter.list, &sem->read_wait); | |
182 | sem->wait_readers++; | |
183 | ||
5376f2e7 DB |
184 | waiter.task = current; |
185 | get_task_struct(current); | |
4898e640 PH |
186 | |
187 | /* if there are no active locks, wake the new lock owner(s) */ | |
188 | if ((count & LDSEM_ACTIVE_MASK) == 0) | |
189 | __ldsem_wake(sem); | |
190 | ||
191 | raw_spin_unlock_irq(&sem->wait_lock); | |
192 | ||
193 | /* wait to be given the lock */ | |
194 | for (;;) { | |
642fa448 | 195 | set_current_state(TASK_UNINTERRUPTIBLE); |
4898e640 PH |
196 | |
197 | if (!waiter.task) | |
198 | break; | |
199 | if (!timeout) | |
200 | break; | |
201 | timeout = schedule_timeout(timeout); | |
202 | } | |
203 | ||
642fa448 | 204 | __set_current_state(TASK_RUNNING); |
4898e640 PH |
205 | |
206 | if (!timeout) { | |
5fd691af PZ |
207 | /* |
208 | * Lock timed out but check if this task was just | |
4898e640 | 209 | * granted lock ownership - if so, pretend there |
5fd691af PZ |
210 | * was no timeout; otherwise, cleanup lock wait. |
211 | */ | |
4898e640 PH |
212 | raw_spin_lock_irq(&sem->wait_lock); |
213 | if (waiter.task) { | |
5fd691af | 214 | atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); |
2f588cee | 215 | sem->wait_readers--; |
4898e640 PH |
216 | list_del(&waiter.list); |
217 | raw_spin_unlock_irq(&sem->wait_lock); | |
218 | put_task_struct(waiter.task); | |
219 | return NULL; | |
220 | } | |
221 | raw_spin_unlock_irq(&sem->wait_lock); | |
222 | } | |
223 | ||
224 | return sem; | |
225 | } | |
226 | ||
227 | /* | |
228 | * wait for the write lock to be granted | |
229 | */ | |
230 | static struct ld_semaphore __sched * | |
231 | down_write_failed(struct ld_semaphore *sem, long count, long timeout) | |
232 | { | |
233 | struct ldsem_waiter waiter; | |
4898e640 PH |
234 | long adjust = -LDSEM_ACTIVE_BIAS; |
235 | int locked = 0; | |
236 | ||
237 | /* set up my own style of waitqueue */ | |
238 | raw_spin_lock_irq(&sem->wait_lock); | |
239 | ||
5fd691af PZ |
240 | /* |
241 | * Try to reverse the lock attempt but if the count has changed | |
4898e640 | 242 | * so that reversing fails, check if the lock is now owned, |
5fd691af PZ |
243 | * and early-out if so. |
244 | */ | |
4898e640 | 245 | do { |
5fd691af | 246 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) |
4898e640 PH |
247 | break; |
248 | if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { | |
249 | raw_spin_unlock_irq(&sem->wait_lock); | |
250 | return sem; | |
251 | } | |
252 | } while (1); | |
253 | ||
254 | list_add_tail(&waiter.list, &sem->write_wait); | |
255 | ||
5376f2e7 | 256 | waiter.task = current; |
4898e640 | 257 | |
642fa448 | 258 | set_current_state(TASK_UNINTERRUPTIBLE); |
4898e640 PH |
259 | for (;;) { |
260 | if (!timeout) | |
261 | break; | |
262 | raw_spin_unlock_irq(&sem->wait_lock); | |
263 | timeout = schedule_timeout(timeout); | |
264 | raw_spin_lock_irq(&sem->wait_lock); | |
642fa448 | 265 | set_current_state(TASK_UNINTERRUPTIBLE); |
f9ce5ccf GKH |
266 | locked = writer_trylock(sem); |
267 | if (locked) | |
4898e640 PH |
268 | break; |
269 | } | |
270 | ||
271 | if (!locked) | |
5fd691af | 272 | atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); |
4898e640 | 273 | list_del(&waiter.list); |
231f8fd0 DS |
274 | |
275 | /* | |
276 | * In case of timeout, wake up every reader who gave the right of way | |
277 | * to writer. Prevent separation readers into two groups: | |
278 | * one that helds semaphore and another that sleeps. | |
279 | * (in case of no contention with a writer) | |
280 | */ | |
281 | if (!locked && list_empty(&sem->write_wait)) | |
282 | __ldsem_wake_readers(sem); | |
283 | ||
4898e640 PH |
284 | raw_spin_unlock_irq(&sem->wait_lock); |
285 | ||
642fa448 | 286 | __set_current_state(TASK_RUNNING); |
4898e640 PH |
287 | |
288 | /* lock wait may have timed out */ | |
289 | if (!locked) | |
290 | return NULL; | |
291 | return sem; | |
292 | } | |
293 | ||
294 | ||
295 | ||
fc0285f2 | 296 | static int __ldsem_down_read_nested(struct ld_semaphore *sem, |
4898e640 PH |
297 | int subclass, long timeout) |
298 | { | |
299 | long count; | |
300 | ||
c18ad0cf | 301 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); |
4898e640 | 302 | |
5fd691af | 303 | count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); |
4898e640 | 304 | if (count <= 0) { |
c18ad0cf | 305 | lock_contended(&sem->dep_map, _RET_IP_); |
4898e640 | 306 | if (!down_read_failed(sem, count, timeout)) { |
c18ad0cf | 307 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 PH |
308 | return 0; |
309 | } | |
310 | } | |
c18ad0cf | 311 | lock_acquired(&sem->dep_map, _RET_IP_); |
4898e640 PH |
312 | return 1; |
313 | } | |
314 | ||
5ef6504e | 315 | static int __ldsem_down_write_nested(struct ld_semaphore *sem, |
4898e640 PH |
316 | int subclass, long timeout) |
317 | { | |
318 | long count; | |
319 | ||
c18ad0cf | 320 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); |
4898e640 | 321 | |
5fd691af | 322 | count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); |
4898e640 | 323 | if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { |
c18ad0cf | 324 | lock_contended(&sem->dep_map, _RET_IP_); |
4898e640 | 325 | if (!down_write_failed(sem, count, timeout)) { |
c18ad0cf | 326 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 PH |
327 | return 0; |
328 | } | |
329 | } | |
c18ad0cf | 330 | lock_acquired(&sem->dep_map, _RET_IP_); |
4898e640 PH |
331 | return 1; |
332 | } | |
333 | ||
334 | ||
335 | /* | |
336 | * lock for reading -- returns 1 if successful, 0 if timed out | |
337 | */ | |
338 | int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) | |
339 | { | |
340 | might_sleep(); | |
341 | return __ldsem_down_read_nested(sem, 0, timeout); | |
342 | } | |
343 | ||
344 | /* | |
345 | * trylock for reading -- returns 1 if successful, 0 if contention | |
346 | */ | |
347 | int ldsem_down_read_trylock(struct ld_semaphore *sem) | |
348 | { | |
5fd691af | 349 | long count = atomic_long_read(&sem->count); |
4898e640 PH |
350 | |
351 | while (count >= 0) { | |
5fd691af | 352 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { |
c18ad0cf PZ |
353 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); |
354 | lock_acquired(&sem->dep_map, _RET_IP_); | |
4898e640 PH |
355 | return 1; |
356 | } | |
357 | } | |
358 | return 0; | |
359 | } | |
360 | ||
361 | /* | |
362 | * lock for writing -- returns 1 if successful, 0 if timed out | |
363 | */ | |
364 | int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) | |
365 | { | |
366 | might_sleep(); | |
367 | return __ldsem_down_write_nested(sem, 0, timeout); | |
368 | } | |
369 | ||
370 | /* | |
371 | * trylock for writing -- returns 1 if successful, 0 if contention | |
372 | */ | |
373 | int ldsem_down_write_trylock(struct ld_semaphore *sem) | |
374 | { | |
5fd691af | 375 | long count = atomic_long_read(&sem->count); |
4898e640 PH |
376 | |
377 | while ((count & LDSEM_ACTIVE_MASK) == 0) { | |
5fd691af | 378 | if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { |
c18ad0cf PZ |
379 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
380 | lock_acquired(&sem->dep_map, _RET_IP_); | |
4898e640 PH |
381 | return 1; |
382 | } | |
383 | } | |
384 | return 0; | |
385 | } | |
386 | ||
387 | /* | |
388 | * release a read lock | |
389 | */ | |
390 | void ldsem_up_read(struct ld_semaphore *sem) | |
391 | { | |
392 | long count; | |
393 | ||
c18ad0cf | 394 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 | 395 | |
5fd691af | 396 | count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); |
4898e640 PH |
397 | if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) |
398 | ldsem_wake(sem); | |
399 | } | |
400 | ||
401 | /* | |
402 | * release a write lock | |
403 | */ | |
404 | void ldsem_up_write(struct ld_semaphore *sem) | |
405 | { | |
406 | long count; | |
407 | ||
c18ad0cf | 408 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
4898e640 | 409 | |
5fd691af | 410 | count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); |
4898e640 PH |
411 | if (count < 0) |
412 | ldsem_wake(sem); | |
413 | } | |
414 | ||
415 | ||
416 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
417 | ||
418 | int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) | |
419 | { | |
420 | might_sleep(); | |
421 | return __ldsem_down_read_nested(sem, subclass, timeout); | |
422 | } | |
423 | ||
424 | int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, | |
425 | long timeout) | |
426 | { | |
427 | might_sleep(); | |
428 | return __ldsem_down_write_nested(sem, subclass, timeout); | |
429 | } | |
430 | ||
431 | #endif |