Commit | Line | Data |
---|---|---|
75404e07 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
9740ca4e ML |
2 | #ifndef _LINUX_MMAP_LOCK_H |
3 | #define _LINUX_MMAP_LOCK_H | |
4 | ||
75404e07 LS |
5 | /* Avoid a dependency loop by declaring here. */ |
6 | extern int rcuwait_wake_up(struct rcuwait *w); | |
7 | ||
2b5067a8 AR |
8 | #include <linux/lockdep.h> |
9 | #include <linux/mm_types.h> | |
42fc5414 | 10 | #include <linux/mmdebug.h> |
2b5067a8 AR |
11 | #include <linux/rwsem.h> |
12 | #include <linux/tracepoint-defs.h> | |
13 | #include <linux/types.h> | |
c042c505 | 14 | #include <linux/cleanup.h> |
42fc5414 | 15 | |
14c3656b | 16 | #define MMAP_LOCK_INITIALIZER(name) \ |
da1c55f1 | 17 | .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), |
14c3656b | 18 | |
2b5067a8 AR |
19 | DECLARE_TRACEPOINT(mmap_lock_start_locking); |
20 | DECLARE_TRACEPOINT(mmap_lock_acquire_returned); | |
21 | DECLARE_TRACEPOINT(mmap_lock_released); | |
22 | ||
23 | #ifdef CONFIG_TRACING | |
24 | ||
25 | void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write); | |
26 | void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, | |
27 | bool success); | |
28 | void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write); | |
29 | ||
30 | static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, | |
31 | bool write) | |
32 | { | |
33 | if (tracepoint_enabled(mmap_lock_start_locking)) | |
34 | __mmap_lock_do_trace_start_locking(mm, write); | |
35 | } | |
36 | ||
37 | static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, | |
38 | bool write, bool success) | |
39 | { | |
40 | if (tracepoint_enabled(mmap_lock_acquire_returned)) | |
41 | __mmap_lock_do_trace_acquire_returned(mm, write, success); | |
42 | } | |
43 | ||
44 | static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) | |
45 | { | |
46 | if (tracepoint_enabled(mmap_lock_released)) | |
47 | __mmap_lock_do_trace_released(mm, write); | |
48 | } | |
49 | ||
50 | #else /* !CONFIG_TRACING */ | |
51 | ||
52 | static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, | |
53 | bool write) | |
54 | { | |
55 | } | |
56 | ||
57 | static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, | |
58 | bool write, bool success) | |
59 | { | |
60 | } | |
61 | ||
62 | static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) | |
63 | { | |
64 | } | |
65 | ||
66 | #endif /* CONFIG_TRACING */ | |
67 | ||
ba168b52 | 68 | static inline void mmap_assert_locked(const struct mm_struct *mm) |
438b6e12 | 69 | { |
ba168b52 | 70 | rwsem_assert_held(&mm->mmap_lock); |
438b6e12 SB |
71 | } |
72 | ||
ba168b52 | 73 | static inline void mmap_assert_write_locked(const struct mm_struct *mm) |
438b6e12 | 74 | { |
ba168b52 | 75 | rwsem_assert_held_write(&mm->mmap_lock); |
438b6e12 SB |
76 | } |
77 | ||
5e31275c | 78 | #ifdef CONFIG_PER_VMA_LOCK |
03a001b1 | 79 | |
eb449bd9 | 80 | static inline void mm_lock_seqcount_init(struct mm_struct *mm) |
5e31275c | 81 | { |
eb449bd9 SB |
82 | seqcount_init(&mm->mm_lock_seq); |
83 | } | |
84 | ||
85 | static inline void mm_lock_seqcount_begin(struct mm_struct *mm) | |
86 | { | |
87 | do_raw_write_seqcount_begin(&mm->mm_lock_seq); | |
88 | } | |
89 | ||
90 | static inline void mm_lock_seqcount_end(struct mm_struct *mm) | |
91 | { | |
92 | ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); | |
93 | do_raw_write_seqcount_end(&mm->mm_lock_seq); | |
5e31275c | 94 | } |
eb449bd9 | 95 | |
03a001b1 SB |
96 | static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) |
97 | { | |
98 | /* | |
99 | * Since mmap_lock is a sleeping lock, and waiting for it to become | |
100 | * unlocked is more or less equivalent with taking it ourselves, don't | |
101 | * bother with the speculative path if mmap_lock is already write-locked | |
102 | * and take the slow path, which takes the lock. | |
103 | */ | |
104 | return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); | |
105 | } | |
106 | ||
107 | static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) | |
108 | { | |
109 | return read_seqcount_retry(&mm->mm_lock_seq, seq); | |
110 | } | |
111 | ||
75404e07 LS |
112 | static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) |
113 | { | |
114 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
115 | static struct lock_class_key lockdep_key; | |
116 | ||
117 | lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); | |
118 | #endif | |
119 | if (reset_refcnt) | |
120 | refcount_set(&vma->vm_refcnt, 0); | |
121 | vma->vm_lock_seq = UINT_MAX; | |
122 | } | |
123 | ||
124 | static inline bool is_vma_writer_only(int refcnt) | |
125 | { | |
126 | /* | |
127 | * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma | |
128 | * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on | |
129 | * a detached vma happens only in vma_mark_detached() and is a rare | |
130 | * case, therefore most of the time there will be no unnecessary wakeup. | |
131 | */ | |
132 | return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; | |
133 | } | |
134 | ||
135 | static inline void vma_refcount_put(struct vm_area_struct *vma) | |
136 | { | |
137 | /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ | |
138 | struct mm_struct *mm = vma->vm_mm; | |
139 | int oldcnt; | |
140 | ||
141 | rwsem_release(&vma->vmlock_dep_map, _RET_IP_); | |
142 | if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { | |
143 | ||
144 | if (is_vma_writer_only(oldcnt - 1)) | |
145 | rcuwait_wake_up(&mm->vma_writer_wait); | |
146 | } | |
147 | } | |
148 | ||
149 | /* | |
150 | * Try to read-lock a vma. The function is allowed to occasionally yield false | |
151 | * locked result to avoid performance overhead, in which case we fall back to | |
152 | * using mmap_lock. The function should never yield false unlocked result. | |
153 | * False locked result is possible if mm_lock_seq overflows or if vma gets | |
154 | * reused and attached to a different mm before we lock it. | |
155 | * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got | |
156 | * detached. | |
157 | */ | |
158 | static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, | |
159 | struct vm_area_struct *vma) | |
160 | { | |
161 | int oldcnt; | |
162 | ||
163 | /* | |
164 | * Check before locking. A race might cause false locked result. | |
165 | * We can use READ_ONCE() for the mm_lock_seq here, and don't need | |
166 | * ACQUIRE semantics, because this is just a lockless check whose result | |
167 | * we don't rely on for anything - the mm_lock_seq read against which we | |
168 | * need ordering is below. | |
169 | */ | |
170 | if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) | |
171 | return NULL; | |
172 | ||
173 | /* | |
174 | * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() | |
175 | * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. | |
176 | * Acquire fence is required here to avoid reordering against later | |
177 | * vm_lock_seq check and checks inside lock_vma_under_rcu(). | |
178 | */ | |
179 | if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, | |
180 | VMA_REF_LIMIT))) { | |
181 | /* return EAGAIN if vma got detached from under us */ | |
182 | return oldcnt ? NULL : ERR_PTR(-EAGAIN); | |
183 | } | |
184 | ||
185 | rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); | |
186 | /* | |
187 | * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. | |
188 | * False unlocked result is impossible because we modify and check | |
189 | * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq | |
190 | * modification invalidates all existing locks. | |
191 | * | |
192 | * We must use ACQUIRE semantics for the mm_lock_seq so that if we are | |
193 | * racing with vma_end_write_all(), we only start reading from the VMA | |
194 | * after it has been unlocked. | |
195 | * This pairs with RELEASE semantics in vma_end_write_all(). | |
196 | */ | |
197 | if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { | |
198 | vma_refcount_put(vma); | |
199 | return NULL; | |
200 | } | |
201 | ||
202 | return vma; | |
203 | } | |
204 | ||
205 | /* | |
206 | * Use only while holding mmap read lock which guarantees that locking will not | |
207 | * fail (nobody can concurrently write-lock the vma). vma_start_read() should | |
208 | * not be used in such cases because it might fail due to mm_lock_seq overflow. | |
209 | * This functionality is used to obtain vma read lock and drop the mmap read lock. | |
210 | */ | |
211 | static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) | |
212 | { | |
213 | int oldcnt; | |
214 | ||
215 | mmap_assert_locked(vma->vm_mm); | |
216 | if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, | |
217 | VMA_REF_LIMIT))) | |
218 | return false; | |
219 | ||
220 | rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); | |
221 | return true; | |
222 | } | |
223 | ||
224 | /* | |
225 | * Use only while holding mmap read lock which guarantees that locking will not | |
226 | * fail (nobody can concurrently write-lock the vma). vma_start_read() should | |
227 | * not be used in such cases because it might fail due to mm_lock_seq overflow. | |
228 | * This functionality is used to obtain vma read lock and drop the mmap read lock. | |
229 | */ | |
230 | static inline bool vma_start_read_locked(struct vm_area_struct *vma) | |
231 | { | |
232 | return vma_start_read_locked_nested(vma, 0); | |
233 | } | |
234 | ||
235 | static inline void vma_end_read(struct vm_area_struct *vma) | |
236 | { | |
237 | vma_refcount_put(vma); | |
238 | } | |
239 | ||
240 | /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ | |
241 | static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) | |
242 | { | |
243 | mmap_assert_write_locked(vma->vm_mm); | |
244 | ||
245 | /* | |
246 | * current task is holding mmap_write_lock, both vma->vm_lock_seq and | |
247 | * mm->mm_lock_seq can't be concurrently modified. | |
248 | */ | |
249 | *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; | |
250 | return (vma->vm_lock_seq == *mm_lock_seq); | |
251 | } | |
252 | ||
253 | void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); | |
254 | ||
255 | /* | |
256 | * Begin writing to a VMA. | |
257 | * Exclude concurrent readers under the per-VMA lock until the currently | |
258 | * write-locked mmap_lock is dropped or downgraded. | |
259 | */ | |
260 | static inline void vma_start_write(struct vm_area_struct *vma) | |
261 | { | |
262 | unsigned int mm_lock_seq; | |
263 | ||
264 | if (__is_vma_write_locked(vma, &mm_lock_seq)) | |
265 | return; | |
266 | ||
267 | __vma_start_write(vma, mm_lock_seq); | |
268 | } | |
269 | ||
270 | static inline void vma_assert_write_locked(struct vm_area_struct *vma) | |
271 | { | |
272 | unsigned int mm_lock_seq; | |
273 | ||
274 | VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); | |
275 | } | |
276 | ||
277 | static inline void vma_assert_locked(struct vm_area_struct *vma) | |
278 | { | |
279 | unsigned int mm_lock_seq; | |
280 | ||
281 | VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && | |
282 | !__is_vma_write_locked(vma, &mm_lock_seq), vma); | |
283 | } | |
284 | ||
285 | /* | |
286 | * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these | |
287 | * assertions should be made either under mmap_write_lock or when the object | |
288 | * has been isolated under mmap_write_lock, ensuring no competing writers. | |
289 | */ | |
290 | static inline void vma_assert_attached(struct vm_area_struct *vma) | |
291 | { | |
292 | WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); | |
293 | } | |
294 | ||
295 | static inline void vma_assert_detached(struct vm_area_struct *vma) | |
296 | { | |
297 | WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); | |
298 | } | |
299 | ||
300 | static inline void vma_mark_attached(struct vm_area_struct *vma) | |
301 | { | |
302 | vma_assert_write_locked(vma); | |
303 | vma_assert_detached(vma); | |
304 | refcount_set_release(&vma->vm_refcnt, 1); | |
305 | } | |
306 | ||
307 | void vma_mark_detached(struct vm_area_struct *vma); | |
308 | ||
309 | struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, | |
310 | unsigned long address); | |
311 | ||
03a001b1 SB |
312 | #else /* CONFIG_PER_VMA_LOCK */ |
313 | ||
eb449bd9 SB |
314 | static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} |
315 | static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} | |
316 | static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} | |
03a001b1 SB |
317 | |
318 | static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) | |
319 | { | |
320 | return false; | |
321 | } | |
322 | ||
323 | static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) | |
324 | { | |
325 | return true; | |
326 | } | |
75404e07 LS |
327 | static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} |
328 | static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, | |
329 | struct vm_area_struct *vma) | |
330 | { return NULL; } | |
331 | static inline void vma_end_read(struct vm_area_struct *vma) {} | |
332 | static inline void vma_start_write(struct vm_area_struct *vma) {} | |
333 | static inline void vma_assert_write_locked(struct vm_area_struct *vma) | |
334 | { mmap_assert_write_locked(vma->vm_mm); } | |
335 | static inline void vma_assert_attached(struct vm_area_struct *vma) {} | |
336 | static inline void vma_assert_detached(struct vm_area_struct *vma) {} | |
337 | static inline void vma_mark_attached(struct vm_area_struct *vma) {} | |
338 | static inline void vma_mark_detached(struct vm_area_struct *vma) {} | |
339 | ||
340 | static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, | |
341 | unsigned long address) | |
342 | { | |
343 | return NULL; | |
344 | } | |
345 | ||
346 | static inline void vma_assert_locked(struct vm_area_struct *vma) | |
347 | { | |
348 | mmap_assert_locked(vma->vm_mm); | |
349 | } | |
03a001b1 SB |
350 | |
351 | #endif /* CONFIG_PER_VMA_LOCK */ | |
5e31275c | 352 | |
9740ca4e ML |
353 | static inline void mmap_write_lock(struct mm_struct *mm) |
354 | { | |
2b5067a8 | 355 | __mmap_lock_trace_start_locking(mm, true); |
da1c55f1 | 356 | down_write(&mm->mmap_lock); |
eb449bd9 | 357 | mm_lock_seqcount_begin(mm); |
2b5067a8 | 358 | __mmap_lock_trace_acquire_returned(mm, true, true); |
9740ca4e ML |
359 | } |
360 | ||
aaa2cc56 ML |
361 | static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) |
362 | { | |
2b5067a8 | 363 | __mmap_lock_trace_start_locking(mm, true); |
da1c55f1 | 364 | down_write_nested(&mm->mmap_lock, subclass); |
eb449bd9 | 365 | mm_lock_seqcount_begin(mm); |
2b5067a8 | 366 | __mmap_lock_trace_acquire_returned(mm, true, true); |
aaa2cc56 ML |
367 | } |
368 | ||
9740ca4e ML |
369 | static inline int mmap_write_lock_killable(struct mm_struct *mm) |
370 | { | |
2b5067a8 AR |
371 | int ret; |
372 | ||
373 | __mmap_lock_trace_start_locking(mm, true); | |
374 | ret = down_write_killable(&mm->mmap_lock); | |
eb449bd9 SB |
375 | if (!ret) |
376 | mm_lock_seqcount_begin(mm); | |
2b5067a8 AR |
377 | __mmap_lock_trace_acquire_returned(mm, true, ret == 0); |
378 | return ret; | |
9740ca4e ML |
379 | } |
380 | ||
eb449bd9 SB |
381 | /* |
382 | * Drop all currently-held per-VMA locks. | |
383 | * This is called from the mmap_lock implementation directly before releasing | |
384 | * a write-locked mmap_lock (or downgrading it to read-locked). | |
385 | * This should normally NOT be called manually from other places. | |
386 | * If you want to call this manually anyway, keep in mind that this will release | |
387 | * *all* VMA write locks, including ones from further up the stack. | |
388 | */ | |
389 | static inline void vma_end_write_all(struct mm_struct *mm) | |
390 | { | |
391 | mmap_assert_write_locked(mm); | |
392 | mm_lock_seqcount_end(mm); | |
393 | } | |
394 | ||
9740ca4e ML |
395 | static inline void mmap_write_unlock(struct mm_struct *mm) |
396 | { | |
2b5067a8 | 397 | __mmap_lock_trace_released(mm, true); |
5e31275c | 398 | vma_end_write_all(mm); |
10994316 | 399 | up_write(&mm->mmap_lock); |
9740ca4e ML |
400 | } |
401 | ||
402 | static inline void mmap_write_downgrade(struct mm_struct *mm) | |
403 | { | |
2b5067a8 | 404 | __mmap_lock_trace_acquire_returned(mm, false, true); |
5e31275c | 405 | vma_end_write_all(mm); |
10994316 | 406 | downgrade_write(&mm->mmap_lock); |
9740ca4e ML |
407 | } |
408 | ||
409 | static inline void mmap_read_lock(struct mm_struct *mm) | |
410 | { | |
2b5067a8 | 411 | __mmap_lock_trace_start_locking(mm, false); |
da1c55f1 | 412 | down_read(&mm->mmap_lock); |
2b5067a8 | 413 | __mmap_lock_trace_acquire_returned(mm, false, true); |
9740ca4e ML |
414 | } |
415 | ||
416 | static inline int mmap_read_lock_killable(struct mm_struct *mm) | |
417 | { | |
2b5067a8 AR |
418 | int ret; |
419 | ||
420 | __mmap_lock_trace_start_locking(mm, false); | |
421 | ret = down_read_killable(&mm->mmap_lock); | |
422 | __mmap_lock_trace_acquire_returned(mm, false, ret == 0); | |
423 | return ret; | |
9740ca4e ML |
424 | } |
425 | ||
426 | static inline bool mmap_read_trylock(struct mm_struct *mm) | |
427 | { | |
2b5067a8 AR |
428 | bool ret; |
429 | ||
430 | __mmap_lock_trace_start_locking(mm, false); | |
431 | ret = down_read_trylock(&mm->mmap_lock) != 0; | |
432 | __mmap_lock_trace_acquire_returned(mm, false, ret); | |
433 | return ret; | |
9740ca4e ML |
434 | } |
435 | ||
436 | static inline void mmap_read_unlock(struct mm_struct *mm) | |
437 | { | |
2b5067a8 | 438 | __mmap_lock_trace_released(mm, false); |
10994316 | 439 | up_read(&mm->mmap_lock); |
9740ca4e ML |
440 | } |
441 | ||
c042c505 PZ |
442 | DEFINE_GUARD(mmap_read_lock, struct mm_struct *, |
443 | mmap_read_lock(_T), mmap_read_unlock(_T)) | |
444 | ||
0cc55a02 ML |
445 | static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) |
446 | { | |
2b5067a8 | 447 | __mmap_lock_trace_released(mm, false); |
10994316 | 448 | up_read_non_owner(&mm->mmap_lock); |
0cc55a02 ML |
449 | } |
450 | ||
07e5bfe6 CC |
451 | static inline int mmap_lock_is_contended(struct mm_struct *mm) |
452 | { | |
453 | return rwsem_is_contended(&mm->mmap_lock); | |
454 | } | |
455 | ||
9740ca4e | 456 | #endif /* _LINUX_MMAP_LOCK_H */ |