Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
9390ef0c | 2 | #include <linux/atomic.h> |
a1fd3e24 | 3 | #include <linux/percpu.h> |
7f26482a | 4 | #include <linux/wait.h> |
8ebe3473 | 5 | #include <linux/lockdep.h> |
a1fd3e24 ON |
6 | #include <linux/percpu-rwsem.h> |
7 | #include <linux/rcupdate.h> | |
8 | #include <linux/sched.h> | |
7f26482a | 9 | #include <linux/sched/task.h> |
a1fd3e24 ON |
10 | #include <linux/errno.h> |
11 | ||
80127a39 | 12 | int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, |
1751060e | 13 | const char *name, struct lock_class_key *key) |
a1fd3e24 | 14 | { |
80127a39 PZ |
15 | sem->read_count = alloc_percpu(int); |
16 | if (unlikely(!sem->read_count)) | |
a1fd3e24 ON |
17 | return -ENOMEM; |
18 | ||
95bf33b5 | 19 | rcu_sync_init(&sem->rss); |
52b94129 | 20 | rcuwait_init(&sem->writer); |
7f26482a PZ |
21 | init_waitqueue_head(&sem->waiters); |
22 | atomic_set(&sem->block, 0); | |
1751060e PZ |
23 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
24 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
25 | lockdep_init_map(&sem->dep_map, name, key, 0); | |
26 | #endif | |
a1fd3e24 ON |
27 | return 0; |
28 | } | |
302707fd | 29 | EXPORT_SYMBOL_GPL(__percpu_init_rwsem); |
a1fd3e24 | 30 | |
80127a39 | 31 | void percpu_free_rwsem(struct percpu_rw_semaphore *sem) |
a1fd3e24 | 32 | { |
95b19f68 ON |
33 | /* |
34 | * XXX: temporary kludge. The error path in alloc_super() | |
35 | * assumes that percpu_free_rwsem() is safe after kzalloc(). | |
36 | */ | |
80127a39 | 37 | if (!sem->read_count) |
95b19f68 ON |
38 | return; |
39 | ||
80127a39 PZ |
40 | rcu_sync_dtor(&sem->rss); |
41 | free_percpu(sem->read_count); | |
42 | sem->read_count = NULL; /* catch use after free bugs */ | |
a1fd3e24 | 43 | } |
c8585c6f | 44 | EXPORT_SYMBOL_GPL(percpu_free_rwsem); |
a1fd3e24 | 45 | |
75ff6457 | 46 | static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) |
a1fd3e24 | 47 | { |
e6b1a44e | 48 | this_cpu_inc(*sem->read_count); |
71365d40 | 49 | |
80127a39 PZ |
50 | /* |
51 | * Due to having preemption disabled the decrement happens on | |
52 | * the same CPU as the increment, avoiding the | |
53 | * increment-on-one-CPU-and-decrement-on-another problem. | |
54 | * | |
7f26482a PZ |
55 | * If the reader misses the writer's assignment of sem->block, then the |
56 | * writer is guaranteed to see the reader's increment. | |
80127a39 PZ |
57 | * |
58 | * Conversely, any readers that increment their sem->read_count after | |
7f26482a PZ |
59 | * the writer looks are guaranteed to see the sem->block value, which |
60 | * in turn means that they are guaranteed to immediately decrement | |
61 | * their sem->read_count, so that it doesn't matter that the writer | |
62 | * missed them. | |
80127a39 | 63 | */ |
a1fd3e24 | 64 | |
80127a39 | 65 | smp_mb(); /* A matches D */ |
a1fd3e24 | 66 | |
80127a39 | 67 | /* |
7f26482a | 68 | * If !sem->block the critical section starts here, matched by the |
80127a39 PZ |
69 | * release in percpu_up_write(). |
70 | */ | |
7f26482a | 71 | if (likely(!atomic_read_acquire(&sem->block))) |
206c98ff | 72 | return true; |
a1fd3e24 | 73 | |
e6b1a44e | 74 | this_cpu_dec(*sem->read_count); |
75ff6457 PZ |
75 | |
76 | /* Prod writer to re-evaluate readers_active_check() */ | |
77 | rcuwait_wake_up(&sem->writer); | |
78 | ||
79 | return false; | |
80 | } | |
81 | ||
7f26482a PZ |
82 | static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem) |
83 | { | |
84 | if (atomic_read(&sem->block)) | |
85 | return false; | |
86 | ||
87 | return atomic_xchg(&sem->block, 1) == 0; | |
88 | } | |
89 | ||
90 | static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader) | |
91 | { | |
92 | if (reader) { | |
93 | bool ret; | |
94 | ||
95 | preempt_disable(); | |
96 | ret = __percpu_down_read_trylock(sem); | |
97 | preempt_enable(); | |
98 | ||
99 | return ret; | |
100 | } | |
101 | return __percpu_down_write_trylock(sem); | |
102 | } | |
103 | ||
104 | /* | |
105 | * The return value of wait_queue_entry::func means: | |
106 | * | |
107 | * <0 - error, wakeup is terminated and the error is returned | |
108 | * 0 - no wakeup, a next waiter is tried | |
109 | * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive. | |
110 | * | |
111 | * We use EXCLUSIVE for both readers and writers to preserve FIFO order, | |
112 | * and play games with the return value to allow waking multiple readers. | |
113 | * | |
114 | * Specifically, we wake readers until we've woken a single writer, or until a | |
115 | * trylock fails. | |
116 | */ | |
117 | static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry, | |
118 | unsigned int mode, int wake_flags, | |
119 | void *key) | |
120 | { | |
7f26482a PZ |
121 | bool reader = wq_entry->flags & WQ_FLAG_CUSTOM; |
122 | struct percpu_rw_semaphore *sem = key; | |
d22cc7f6 | 123 | struct task_struct *p; |
7f26482a PZ |
124 | |
125 | /* concurrent against percpu_down_write(), can get stolen */ | |
126 | if (!__percpu_rwsem_trylock(sem, reader)) | |
127 | return 1; | |
128 | ||
d22cc7f6 | 129 | p = get_task_struct(wq_entry->private); |
7f26482a PZ |
130 | list_del_init(&wq_entry->entry); |
131 | smp_store_release(&wq_entry->private, NULL); | |
132 | ||
133 | wake_up_process(p); | |
134 | put_task_struct(p); | |
135 | ||
136 | return !reader; /* wake (readers until) 1 writer */ | |
137 | } | |
138 | ||
139 | static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) | |
140 | { | |
141 | DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function); | |
142 | bool wait; | |
143 | ||
144 | spin_lock_irq(&sem->waiters.lock); | |
145 | /* | |
146 | * Serialize against the wakeup in percpu_up_write(), if we fail | |
147 | * the trylock, the wakeup must see us on the list. | |
148 | */ | |
149 | wait = !__percpu_rwsem_trylock(sem, reader); | |
150 | if (wait) { | |
151 | wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM; | |
152 | __add_wait_queue_entry_tail(&sem->waiters, &wq_entry); | |
153 | } | |
154 | spin_unlock_irq(&sem->waiters.lock); | |
155 | ||
156 | while (wait) { | |
157 | set_current_state(TASK_UNINTERRUPTIBLE); | |
158 | if (!smp_load_acquire(&wq_entry.private)) | |
159 | break; | |
160 | schedule(); | |
161 | } | |
162 | __set_current_state(TASK_RUNNING); | |
163 | } | |
164 | ||
75ff6457 PZ |
165 | bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) |
166 | { | |
167 | if (__percpu_down_read_trylock(sem)) | |
168 | return true; | |
cc5f730b | 169 | |
80127a39 | 170 | if (try) |
206c98ff | 171 | return false; |
a1fd3e24 | 172 | |
7f26482a PZ |
173 | preempt_enable(); |
174 | percpu_rwsem_wait(sem, /* .reader = */ true); | |
80127a39 | 175 | preempt_disable(); |
7f26482a | 176 | |
206c98ff | 177 | return true; |
9287f692 | 178 | } |
80127a39 | 179 | EXPORT_SYMBOL_GPL(__percpu_down_read); |
9287f692 | 180 | |
80127a39 PZ |
181 | #define per_cpu_sum(var) \ |
182 | ({ \ | |
183 | typeof(var) __sum = 0; \ | |
184 | int cpu; \ | |
185 | compiletime_assert_atomic_type(__sum); \ | |
186 | for_each_possible_cpu(cpu) \ | |
187 | __sum += per_cpu(var, cpu); \ | |
188 | __sum; \ | |
189 | }) | |
a1fd3e24 | 190 | |
80127a39 PZ |
191 | /* |
192 | * Return true if the modular sum of the sem->read_count per-CPU variable is | |
193 | * zero. If this sum is zero, then it is stable due to the fact that if any | |
194 | * newly arriving readers increment a given counter, they will immediately | |
195 | * decrement that same counter. | |
7f26482a PZ |
196 | * |
197 | * Assumes sem->block is set. | |
80127a39 PZ |
198 | */ |
199 | static bool readers_active_check(struct percpu_rw_semaphore *sem) | |
a1fd3e24 | 200 | { |
80127a39 PZ |
201 | if (per_cpu_sum(*sem->read_count) != 0) |
202 | return false; | |
203 | ||
204 | /* | |
205 | * If we observed the decrement; ensure we see the entire critical | |
206 | * section. | |
207 | */ | |
a1fd3e24 | 208 | |
80127a39 | 209 | smp_mb(); /* C matches B */ |
a1fd3e24 | 210 | |
80127a39 | 211 | return true; |
a1fd3e24 ON |
212 | } |
213 | ||
80127a39 | 214 | void percpu_down_write(struct percpu_rw_semaphore *sem) |
a1fd3e24 | 215 | { |
41f0e291 | 216 | might_sleep(); |
1751060e PZ |
217 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); |
218 | ||
80127a39 PZ |
219 | /* Notify readers to take the slow path. */ |
220 | rcu_sync_enter(&sem->rss); | |
221 | ||
a1fd3e24 | 222 | /* |
7f26482a PZ |
223 | * Try set sem->block; this provides writer-writer exclusion. |
224 | * Having sem->block set makes new readers block. | |
a1fd3e24 | 225 | */ |
7f26482a PZ |
226 | if (!__percpu_down_write_trylock(sem)) |
227 | percpu_rwsem_wait(sem, /* .reader = */ false); | |
a1fd3e24 | 228 | |
7f26482a | 229 | /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ |
9390ef0c | 230 | |
80127a39 | 231 | /* |
7f26482a PZ |
232 | * If they don't see our store of sem->block, then we are guaranteed to |
233 | * see their sem->read_count increment, and therefore will wait for | |
234 | * them. | |
80127a39 | 235 | */ |
a1fd3e24 | 236 | |
7f26482a | 237 | /* Wait for all active readers to complete. */ |
80fbaf1c | 238 | rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE); |
a1fd3e24 | 239 | } |
302707fd | 240 | EXPORT_SYMBOL_GPL(percpu_down_write); |
a1fd3e24 | 241 | |
80127a39 | 242 | void percpu_up_write(struct percpu_rw_semaphore *sem) |
a1fd3e24 | 243 | { |
1751060e PZ |
244 | rwsem_release(&sem->dep_map, _RET_IP_); |
245 | ||
a1fd3e24 | 246 | /* |
80127a39 PZ |
247 | * Signal the writer is done, no fast path yet. |
248 | * | |
249 | * One reason that we cannot just immediately flip to readers_fast is | |
250 | * that new readers might fail to see the results of this writer's | |
251 | * critical section. | |
252 | * | |
253 | * Therefore we force it through the slow path which guarantees an | |
254 | * acquire and thereby guarantees the critical section's consistency. | |
255 | */ | |
7f26482a | 256 | atomic_set_release(&sem->block, 0); |
80127a39 PZ |
257 | |
258 | /* | |
7f26482a | 259 | * Prod any pending reader/writer to make progress. |
80127a39 | 260 | */ |
7f26482a | 261 | __wake_up(&sem->waiters, TASK_NORMAL, 1, sem); |
80127a39 PZ |
262 | |
263 | /* | |
264 | * Once this completes (at least one RCU-sched grace period hence) the | |
265 | * reader fast path will be available again. Safe to use outside the | |
266 | * exclusive write lock because its counting. | |
a1fd3e24 | 267 | */ |
80127a39 | 268 | rcu_sync_exit(&sem->rss); |
a1fd3e24 | 269 | } |
302707fd | 270 | EXPORT_SYMBOL_GPL(percpu_up_write); |