Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /* |
3 | * mm/mremap.c | |
4 | * | |
5 | * (C) Copyright 1996 Linus Torvalds | |
6 | * | |
046c6884 | 7 | * Address space accounting code <alan@lxorguk.ukuu.org.uk> |
1da177e4 LT |
8 | * (C) Copyright 2002 Red Hat Inc, All Rights Reserved |
9 | */ | |
10 | ||
11 | #include <linux/mm.h> | |
ca3d76b0 | 12 | #include <linux/mm_inline.h> |
1da177e4 | 13 | #include <linux/hugetlb.h> |
1da177e4 | 14 | #include <linux/shm.h> |
1ff82995 | 15 | #include <linux/ksm.h> |
1da177e4 LT |
16 | #include <linux/mman.h> |
17 | #include <linux/swap.h> | |
c59ede7b | 18 | #include <linux/capability.h> |
1da177e4 | 19 | #include <linux/fs.h> |
6dec97dc | 20 | #include <linux/swapops.h> |
1da177e4 LT |
21 | #include <linux/highmem.h> |
22 | #include <linux/security.h> | |
23 | #include <linux/syscalls.h> | |
cddb8a5c | 24 | #include <linux/mmu_notifier.h> |
2581d202 | 25 | #include <linux/uaccess.h> |
72f87654 | 26 | #include <linux/userfaultfd_k.h> |
ca3d76b0 | 27 | #include <linux/mempolicy.h> |
1da177e4 | 28 | |
1da177e4 | 29 | #include <asm/cacheflush.h> |
3bbda69c | 30 | #include <asm/tlb.h> |
0881ace2 | 31 | #include <asm/pgalloc.h> |
1da177e4 | 32 | |
ba470de4 RR |
33 | #include "internal.h" |
34 | ||
221bf5ca LS |
35 | /* Classify the kind of remap operation being performed. */ |
36 | enum mremap_type { | |
37 | MREMAP_INVALID, /* Initial state. */ | |
38 | MREMAP_NO_RESIZE, /* old_len == new_len, if not moved, do nothing. */ | |
39 | MREMAP_SHRINK, /* old_len > new_len. */ | |
40 | MREMAP_EXPAND, /* old_len < new_len. */ | |
41 | }; | |
42 | ||
43 | /* | |
44 | * Describes a VMA mremap() operation and is threaded throughout it. | |
45 | * | |
46 | * Any of the fields may be mutated by the operation, however these values will | |
47 | * always accurately reflect the remap (for instance, we may adjust lengths and | |
48 | * delta to account for hugetlb alignment). | |
49 | */ | |
50 | struct vma_remap_struct { | |
51 | /* User-provided state. */ | |
52 | unsigned long addr; /* User-specified address from which we remap. */ | |
53 | unsigned long old_len; /* Length of range being remapped. */ | |
54 | unsigned long new_len; /* Desired new length of mapping. */ | |
000c0691 | 55 | const unsigned long flags; /* user-specified MREMAP_* flags. */ |
221bf5ca LS |
56 | unsigned long new_addr; /* Optionally, desired new address. */ |
57 | ||
58 | /* uffd state. */ | |
59 | struct vm_userfaultfd_ctx *uf; | |
60 | struct list_head *uf_unmap_early; | |
61 | struct list_head *uf_unmap; | |
62 | ||
63 | /* VMA state, determined in do_mremap(). */ | |
64 | struct vm_area_struct *vma; | |
65 | ||
66 | /* Internal state, determined in do_mremap(). */ | |
67 | unsigned long delta; /* Absolute delta of old_len,new_len. */ | |
2cf442d7 | 68 | bool populate_expand; /* mlock()'d expanded, must populate. */ |
221bf5ca LS |
69 | enum mremap_type remap_type; /* expand, shrink, etc. */ |
70 | bool mmap_locked; /* Is mm currently write-locked? */ | |
d5c8aec0 | 71 | unsigned long charged; /* If VM_ACCOUNT, # pages to account. */ |
d23cb648 | 72 | bool vmi_needs_invalidate; /* Is the VMA iterator invalidated? */ |
221bf5ca LS |
73 | }; |
74 | ||
c49dd340 | 75 | static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr) |
1da177e4 LT |
76 | { |
77 | pgd_t *pgd; | |
c2febafc | 78 | p4d_t *p4d; |
1da177e4 | 79 | pud_t *pud; |
1da177e4 LT |
80 | |
81 | pgd = pgd_offset(mm, addr); | |
82 | if (pgd_none_or_clear_bad(pgd)) | |
83 | return NULL; | |
84 | ||
c2febafc KS |
85 | p4d = p4d_offset(pgd, addr); |
86 | if (p4d_none_or_clear_bad(p4d)) | |
87 | return NULL; | |
88 | ||
89 | pud = pud_offset(p4d, addr); | |
1da177e4 LT |
90 | if (pud_none_or_clear_bad(pud)) |
91 | return NULL; | |
92 | ||
c49dd340 KS |
93 | return pud; |
94 | } | |
95 | ||
96 | static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) | |
97 | { | |
98 | pud_t *pud; | |
99 | pmd_t *pmd; | |
100 | ||
101 | pud = get_old_pud(mm, addr); | |
102 | if (!pud) | |
103 | return NULL; | |
104 | ||
1da177e4 | 105 | pmd = pmd_offset(pud, addr); |
37a1c49a | 106 | if (pmd_none(*pmd)) |
1da177e4 LT |
107 | return NULL; |
108 | ||
7be7a546 | 109 | return pmd; |
1da177e4 LT |
110 | } |
111 | ||
664dc4da | 112 | static pud_t *alloc_new_pud(struct mm_struct *mm, unsigned long addr) |
1da177e4 LT |
113 | { |
114 | pgd_t *pgd; | |
c2febafc | 115 | p4d_t *p4d; |
1da177e4 LT |
116 | |
117 | pgd = pgd_offset(mm, addr); | |
c2febafc KS |
118 | p4d = p4d_alloc(mm, pgd, addr); |
119 | if (!p4d) | |
120 | return NULL; | |
c49dd340 KS |
121 | |
122 | return pud_alloc(mm, p4d, addr); | |
123 | } | |
124 | ||
664dc4da | 125 | static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) |
c49dd340 KS |
126 | { |
127 | pud_t *pud; | |
128 | pmd_t *pmd; | |
129 | ||
664dc4da | 130 | pud = alloc_new_pud(mm, addr); |
1da177e4 | 131 | if (!pud) |
c74df32c | 132 | return NULL; |
7be7a546 | 133 | |
1da177e4 | 134 | pmd = pmd_alloc(mm, pud, addr); |
57a8f0cd | 135 | if (!pmd) |
c74df32c | 136 | return NULL; |
7be7a546 | 137 | |
8ac1f832 | 138 | VM_BUG_ON(pmd_trans_huge(*pmd)); |
c74df32c | 139 | |
7be7a546 | 140 | return pmd; |
1da177e4 LT |
141 | } |
142 | ||
1d069b7d HD |
143 | static void take_rmap_locks(struct vm_area_struct *vma) |
144 | { | |
145 | if (vma->vm_file) | |
146 | i_mmap_lock_write(vma->vm_file->f_mapping); | |
147 | if (vma->anon_vma) | |
148 | anon_vma_lock_write(vma->anon_vma); | |
149 | } | |
150 | ||
151 | static void drop_rmap_locks(struct vm_area_struct *vma) | |
152 | { | |
153 | if (vma->anon_vma) | |
154 | anon_vma_unlock_write(vma->anon_vma); | |
155 | if (vma->vm_file) | |
156 | i_mmap_unlock_write(vma->vm_file->f_mapping); | |
157 | } | |
158 | ||
6dec97dc CG |
159 | static pte_t move_soft_dirty_pte(pte_t pte) |
160 | { | |
161 | /* | |
162 | * Set soft dirty bit so we can notice | |
163 | * in userspace the ptes were moved. | |
164 | */ | |
165 | #ifdef CONFIG_MEM_SOFT_DIRTY | |
166 | if (pte_present(pte)) | |
167 | pte = pte_mksoft_dirty(pte); | |
168 | else if (is_swap_pte(pte)) | |
169 | pte = pte_swp_mksoft_dirty(pte); | |
6dec97dc CG |
170 | #endif |
171 | return pte; | |
172 | } | |
173 | ||
f822a9a8 DJ |
174 | static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr, |
175 | pte_t *ptep, pte_t pte, int max_nr) | |
176 | { | |
f822a9a8 DJ |
177 | struct folio *folio; |
178 | ||
179 | if (max_nr == 1) | |
180 | return 1; | |
181 | ||
0b5be138 LS |
182 | /* Avoid expensive folio lookup if we stand no chance of benefit. */ |
183 | if (pte_batch_hint(ptep, pte) == 1) | |
184 | return 1; | |
185 | ||
f822a9a8 DJ |
186 | folio = vm_normal_folio(vma, addr, pte); |
187 | if (!folio || !folio_test_large(folio)) | |
188 | return 1; | |
189 | ||
dd80cfd4 | 190 | return folio_pte_batch(folio, ptep, pte, max_nr); |
f822a9a8 DJ |
191 | } |
192 | ||
664dc4da LS |
193 | static int move_ptes(struct pagetable_move_control *pmc, |
194 | unsigned long extent, pmd_t *old_pmd, pmd_t *new_pmd) | |
1da177e4 | 195 | { |
664dc4da | 196 | struct vm_area_struct *vma = pmc->old; |
0cef0bb8 | 197 | bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma); |
1da177e4 | 198 | struct mm_struct *mm = vma->vm_mm; |
94dab12d | 199 | pte_t *old_ptep, *new_ptep; |
f822a9a8 | 200 | pte_t old_pte, pte; |
838d0235 | 201 | pmd_t dummy_pmdval; |
4c21e2f2 | 202 | spinlock_t *old_ptl, *new_ptl; |
5d190420 | 203 | bool force_flush = false; |
664dc4da LS |
204 | unsigned long old_addr = pmc->old_addr; |
205 | unsigned long new_addr = pmc->new_addr; | |
206 | unsigned long old_end = old_addr + extent; | |
5d190420 | 207 | unsigned long len = old_end - old_addr; |
f822a9a8 DJ |
208 | int max_nr_ptes; |
209 | int nr_ptes; | |
a5be621e | 210 | int err = 0; |
1da177e4 | 211 | |
38a76013 | 212 | /* |
c8c06efa | 213 | * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma |
38a76013 ML |
214 | * locks to ensure that rmap will always observe either the old or the |
215 | * new ptes. This is the easiest way to avoid races with | |
216 | * truncate_pagecache(), page migration, etc... | |
217 | * | |
218 | * When need_rmap_locks is false, we use other ways to avoid | |
219 | * such races: | |
220 | * | |
221 | * - During exec() shift_arg_pages(), we use a specially tagged vma | |
222100ee | 222 | * which rmap call sites look for using vma_is_temporary_stack(). |
38a76013 ML |
223 | * |
224 | * - During mremap(), new_vma is often known to be placed after vma | |
225 | * in rmap traversal order. This ensures rmap will always observe | |
226 | * either the old pte, or the new pte, or both (the page table locks | |
227 | * serialize access to individual ptes, but only rmap traversal | |
228 | * order guarantees that we won't miss both the old and new ptes). | |
229 | */ | |
664dc4da | 230 | if (pmc->need_rmap_locks) |
1d069b7d | 231 | take_rmap_locks(vma); |
1da177e4 | 232 | |
4c21e2f2 HD |
233 | /* |
234 | * We don't have to worry about the ordering of src and dst | |
c1e8d7c6 | 235 | * pte locks because exclusive mmap_lock prevents deadlock. |
4c21e2f2 | 236 | */ |
94dab12d DJ |
237 | old_ptep = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); |
238 | if (!old_ptep) { | |
a5be621e HD |
239 | err = -EAGAIN; |
240 | goto out; | |
241 | } | |
838d0235 QZ |
242 | /* |
243 | * Now new_pte is none, so hpage_collapse_scan_file() path can not find | |
244 | * this by traversing file->f_mapping, so there is no concurrency with | |
245 | * retract_page_tables(). In addition, we already hold the exclusive | |
246 | * mmap_lock, so this new_pte page is stable, so there is no need to get | |
247 | * pmdval and do pmd_same() check. | |
248 | */ | |
94dab12d | 249 | new_ptep = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &dummy_pmdval, |
838d0235 | 250 | &new_ptl); |
94dab12d DJ |
251 | if (!new_ptep) { |
252 | pte_unmap_unlock(old_ptep, old_ptl); | |
a5be621e HD |
253 | err = -EAGAIN; |
254 | goto out; | |
255 | } | |
4c21e2f2 | 256 | if (new_ptl != old_ptl) |
f20dc5f7 | 257 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); |
3ea27719 | 258 | flush_tlb_batched_pending(vma->vm_mm); |
6606c3e0 | 259 | arch_enter_lazy_mmu_mode(); |
7be7a546 | 260 | |
f822a9a8 DJ |
261 | for (; old_addr < old_end; old_ptep += nr_ptes, old_addr += nr_ptes * PAGE_SIZE, |
262 | new_ptep += nr_ptes, new_addr += nr_ptes * PAGE_SIZE) { | |
94dab12d | 263 | VM_WARN_ON_ONCE(!pte_none(*new_ptep)); |
b36b701b | 264 | |
f822a9a8 DJ |
265 | nr_ptes = 1; |
266 | max_nr_ptes = (old_end - old_addr) >> PAGE_SHIFT; | |
267 | old_pte = ptep_get(old_ptep); | |
268 | if (pte_none(old_pte)) | |
7be7a546 | 269 | continue; |
5d190420 AL |
270 | |
271 | /* | |
eb66ae03 | 272 | * If we are remapping a valid PTE, make sure |
a2ce2666 | 273 | * to flush TLB before we drop the PTL for the |
eb66ae03 | 274 | * PTE. |
a2ce2666 | 275 | * |
eb66ae03 | 276 | * NOTE! Both old and new PTL matter: the old one |
a929e0d1 | 277 | * for racing with folio_mkclean(), the new one to |
eb66ae03 LT |
278 | * make sure the physical page stays valid until |
279 | * the TLB entry for the old mapping has been | |
280 | * flushed. | |
5d190420 | 281 | */ |
f822a9a8 DJ |
282 | if (pte_present(old_pte)) { |
283 | nr_ptes = mremap_folio_pte_batch(vma, old_addr, old_ptep, | |
284 | old_pte, max_nr_ptes); | |
5d190420 | 285 | force_flush = true; |
f822a9a8 | 286 | } |
3dfde978 | 287 | pte = get_and_clear_ptes(mm, old_addr, old_ptep, nr_ptes); |
82a616d0 | 288 | pte = move_pte(pte, old_addr, new_addr); |
6dec97dc | 289 | pte = move_soft_dirty_pte(pte); |
0cef0bb8 RR |
290 | |
291 | if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) | |
94dab12d | 292 | pte_clear(mm, new_addr, new_ptep); |
0cef0bb8 RR |
293 | else { |
294 | if (need_clear_uffd_wp) { | |
295 | if (pte_present(pte)) | |
296 | pte = pte_clear_uffd_wp(pte); | |
297 | else if (is_swap_pte(pte)) | |
298 | pte = pte_swp_clear_uffd_wp(pte); | |
299 | } | |
f822a9a8 | 300 | set_ptes(mm, new_addr, new_ptep, pte, nr_ptes); |
0cef0bb8 | 301 | } |
1da177e4 | 302 | } |
7be7a546 | 303 | |
6606c3e0 | 304 | arch_leave_lazy_mmu_mode(); |
eb66ae03 LT |
305 | if (force_flush) |
306 | flush_tlb_range(vma, old_end - len, old_end); | |
4c21e2f2 HD |
307 | if (new_ptl != old_ptl) |
308 | spin_unlock(new_ptl); | |
94dab12d DJ |
309 | pte_unmap(new_ptep - 1); |
310 | pte_unmap_unlock(old_ptep - 1, old_ptl); | |
a5be621e | 311 | out: |
664dc4da | 312 | if (pmc->need_rmap_locks) |
1d069b7d | 313 | drop_rmap_locks(vma); |
a5be621e | 314 | return err; |
1da177e4 LT |
315 | } |
316 | ||
3bbda69c AK |
317 | #ifndef arch_supports_page_table_move |
318 | #define arch_supports_page_table_move arch_supports_page_table_move | |
319 | static inline bool arch_supports_page_table_move(void) | |
320 | { | |
321 | return IS_ENABLED(CONFIG_HAVE_MOVE_PMD) || | |
322 | IS_ENABLED(CONFIG_HAVE_MOVE_PUD); | |
323 | } | |
324 | #endif | |
325 | ||
772e5b4a DH |
326 | static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc) |
327 | { | |
328 | /* | |
329 | * If we are moving a VMA that has uffd-wp registered but with | |
330 | * remap events disabled (new VMA will not be registered with uffd), we | |
331 | * need to ensure that the uffd-wp state is cleared from all pgtables. | |
332 | * This means recursing into lower page tables in move_page_tables(). | |
333 | * | |
334 | * We might get called with VMAs reversed when recovering from a | |
335 | * failed page table move. In that case, the | |
336 | * "old"-but-actually-"originally new" VMA during recovery will not have | |
337 | * a uffd context. Recursing into lower page tables during the original | |
338 | * move but not during the recovery move will cause trouble, because we | |
339 | * run into already-existing page tables. So check both VMAs. | |
340 | */ | |
341 | return !vma_has_uffd_without_event_remap(pmc->old) && | |
342 | !vma_has_uffd_without_event_remap(pmc->new); | |
343 | } | |
344 | ||
2c91bd4a | 345 | #ifdef CONFIG_HAVE_MOVE_PMD |
664dc4da LS |
346 | static bool move_normal_pmd(struct pagetable_move_control *pmc, |
347 | pmd_t *old_pmd, pmd_t *new_pmd) | |
2c91bd4a JFG |
348 | { |
349 | spinlock_t *old_ptl, *new_ptl; | |
664dc4da | 350 | struct vm_area_struct *vma = pmc->old; |
2c91bd4a | 351 | struct mm_struct *mm = vma->vm_mm; |
6fa1066f | 352 | bool res = false; |
2c91bd4a JFG |
353 | pmd_t pmd; |
354 | ||
3bbda69c AK |
355 | if (!arch_supports_page_table_move()) |
356 | return false; | |
772e5b4a DH |
357 | if (!uffd_supports_page_table_move(pmc)) |
358 | return false; | |
2c91bd4a JFG |
359 | /* |
360 | * The destination pmd shouldn't be established, free_pgtables() | |
f81fdd0c LT |
361 | * should have released it. |
362 | * | |
363 | * However, there's a case during execve() where we use mremap | |
364 | * to move the initial stack, and in that case the target area | |
365 | * may overlap the source area (always moving down). | |
366 | * | |
367 | * If everything is PMD-aligned, that works fine, as moving | |
368 | * each pmd down will clear the source pmd. But if we first | |
369 | * have a few 4kB-only pages that get moved down, and then | |
370 | * hit the "now the rest is PMD-aligned, let's do everything | |
371 | * one pmd at a time", we will still have the old (now empty | |
372 | * of any 4kB pages, but still there) PMD in the page table | |
373 | * tree. | |
374 | * | |
375 | * Warn on it once - because we really should try to figure | |
376 | * out how to do this better - but then say "I won't move | |
377 | * this pmd". | |
378 | * | |
379 | * One alternative might be to just unmap the target pmd at | |
380 | * this point, and verify that it really is empty. We'll see. | |
2c91bd4a | 381 | */ |
f81fdd0c | 382 | if (WARN_ON_ONCE(!pmd_none(*new_pmd))) |
2c91bd4a JFG |
383 | return false; |
384 | ||
385 | /* | |
386 | * We don't have to worry about the ordering of src and dst | |
c1e8d7c6 | 387 | * ptlocks because exclusive mmap_lock prevents deadlock. |
2c91bd4a | 388 | */ |
664dc4da | 389 | old_ptl = pmd_lock(mm, old_pmd); |
2c91bd4a JFG |
390 | new_ptl = pmd_lockptr(mm, new_pmd); |
391 | if (new_ptl != old_ptl) | |
392 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); | |
393 | ||
2c91bd4a | 394 | pmd = *old_pmd; |
6fa1066f JH |
395 | |
396 | /* Racing with collapse? */ | |
397 | if (unlikely(!pmd_present(pmd) || pmd_leaf(pmd))) | |
398 | goto out_unlock; | |
399 | /* Clear the pmd */ | |
2c91bd4a | 400 | pmd_clear(old_pmd); |
6fa1066f | 401 | res = true; |
2c91bd4a JFG |
402 | |
403 | VM_BUG_ON(!pmd_none(*new_pmd)); | |
404 | ||
0881ace2 | 405 | pmd_populate(mm, new_pmd, pmd_pgtable(pmd)); |
664dc4da | 406 | flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PMD_SIZE); |
6fa1066f | 407 | out_unlock: |
2c91bd4a JFG |
408 | if (new_ptl != old_ptl) |
409 | spin_unlock(new_ptl); | |
410 | spin_unlock(old_ptl); | |
411 | ||
6fa1066f | 412 | return res; |
2c91bd4a | 413 | } |
c49dd340 | 414 | #else |
664dc4da LS |
415 | static inline bool move_normal_pmd(struct pagetable_move_control *pmc, |
416 | pmd_t *old_pmd, pmd_t *new_pmd) | |
c49dd340 KS |
417 | { |
418 | return false; | |
419 | } | |
420 | #endif | |
421 | ||
d6655dff | 422 | #if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD) |
664dc4da LS |
423 | static bool move_normal_pud(struct pagetable_move_control *pmc, |
424 | pud_t *old_pud, pud_t *new_pud) | |
c49dd340 KS |
425 | { |
426 | spinlock_t *old_ptl, *new_ptl; | |
664dc4da | 427 | struct vm_area_struct *vma = pmc->old; |
c49dd340 KS |
428 | struct mm_struct *mm = vma->vm_mm; |
429 | pud_t pud; | |
430 | ||
3bbda69c AK |
431 | if (!arch_supports_page_table_move()) |
432 | return false; | |
772e5b4a DH |
433 | if (!uffd_supports_page_table_move(pmc)) |
434 | return false; | |
c49dd340 KS |
435 | /* |
436 | * The destination pud shouldn't be established, free_pgtables() | |
437 | * should have released it. | |
438 | */ | |
439 | if (WARN_ON_ONCE(!pud_none(*new_pud))) | |
440 | return false; | |
441 | ||
442 | /* | |
443 | * We don't have to worry about the ordering of src and dst | |
444 | * ptlocks because exclusive mmap_lock prevents deadlock. | |
445 | */ | |
664dc4da | 446 | old_ptl = pud_lock(mm, old_pud); |
c49dd340 KS |
447 | new_ptl = pud_lockptr(mm, new_pud); |
448 | if (new_ptl != old_ptl) | |
449 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); | |
450 | ||
451 | /* Clear the pud */ | |
452 | pud = *old_pud; | |
453 | pud_clear(old_pud); | |
454 | ||
455 | VM_BUG_ON(!pud_none(*new_pud)); | |
456 | ||
0881ace2 | 457 | pud_populate(mm, new_pud, pud_pgtable(pud)); |
664dc4da | 458 | flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PUD_SIZE); |
c49dd340 KS |
459 | if (new_ptl != old_ptl) |
460 | spin_unlock(new_ptl); | |
461 | spin_unlock(old_ptl); | |
462 | ||
463 | return true; | |
464 | } | |
465 | #else | |
664dc4da LS |
466 | static inline bool move_normal_pud(struct pagetable_move_control *pmc, |
467 | pud_t *old_pud, pud_t *new_pud) | |
c49dd340 KS |
468 | { |
469 | return false; | |
470 | } | |
2c91bd4a JFG |
471 | #endif |
472 | ||
54a948a1 | 473 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) |
664dc4da LS |
474 | static bool move_huge_pud(struct pagetable_move_control *pmc, |
475 | pud_t *old_pud, pud_t *new_pud) | |
7d846db7 AK |
476 | { |
477 | spinlock_t *old_ptl, *new_ptl; | |
664dc4da | 478 | struct vm_area_struct *vma = pmc->old; |
7d846db7 AK |
479 | struct mm_struct *mm = vma->vm_mm; |
480 | pud_t pud; | |
481 | ||
482 | /* | |
483 | * The destination pud shouldn't be established, free_pgtables() | |
484 | * should have released it. | |
485 | */ | |
486 | if (WARN_ON_ONCE(!pud_none(*new_pud))) | |
487 | return false; | |
488 | ||
489 | /* | |
490 | * We don't have to worry about the ordering of src and dst | |
491 | * ptlocks because exclusive mmap_lock prevents deadlock. | |
492 | */ | |
664dc4da | 493 | old_ptl = pud_lock(mm, old_pud); |
7d846db7 AK |
494 | new_ptl = pud_lockptr(mm, new_pud); |
495 | if (new_ptl != old_ptl) | |
496 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); | |
497 | ||
498 | /* Clear the pud */ | |
499 | pud = *old_pud; | |
500 | pud_clear(old_pud); | |
501 | ||
502 | VM_BUG_ON(!pud_none(*new_pud)); | |
503 | ||
504 | /* Set the new pud */ | |
505 | /* mark soft_ditry when we add pud level soft dirty support */ | |
664dc4da LS |
506 | set_pud_at(mm, pmc->new_addr, new_pud, pud); |
507 | flush_pud_tlb_range(vma, pmc->old_addr, pmc->old_addr + HPAGE_PUD_SIZE); | |
7d846db7 AK |
508 | if (new_ptl != old_ptl) |
509 | spin_unlock(new_ptl); | |
510 | spin_unlock(old_ptl); | |
511 | ||
512 | return true; | |
513 | } | |
514 | #else | |
664dc4da LS |
515 | static bool move_huge_pud(struct pagetable_move_control *pmc, |
516 | pud_t *old_pud, pud_t *new_pud) | |
517 | ||
7d846db7 AK |
518 | { |
519 | WARN_ON_ONCE(1); | |
520 | return false; | |
521 | ||
522 | } | |
523 | #endif | |
524 | ||
c49dd340 KS |
525 | enum pgt_entry { |
526 | NORMAL_PMD, | |
527 | HPAGE_PMD, | |
528 | NORMAL_PUD, | |
7d846db7 | 529 | HPAGE_PUD, |
c49dd340 KS |
530 | }; |
531 | ||
532 | /* | |
533 | * Returns an extent of the corresponding size for the pgt_entry specified if | |
534 | * valid. Else returns a smaller extent bounded by the end of the source and | |
535 | * destination pgt_entry. | |
536 | */ | |
a30a2909 | 537 | static __always_inline unsigned long get_extent(enum pgt_entry entry, |
664dc4da | 538 | struct pagetable_move_control *pmc) |
c49dd340 KS |
539 | { |
540 | unsigned long next, extent, mask, size; | |
664dc4da LS |
541 | unsigned long old_addr = pmc->old_addr; |
542 | unsigned long old_end = pmc->old_end; | |
543 | unsigned long new_addr = pmc->new_addr; | |
c49dd340 KS |
544 | |
545 | switch (entry) { | |
546 | case HPAGE_PMD: | |
547 | case NORMAL_PMD: | |
548 | mask = PMD_MASK; | |
549 | size = PMD_SIZE; | |
550 | break; | |
7d846db7 | 551 | case HPAGE_PUD: |
c49dd340 KS |
552 | case NORMAL_PUD: |
553 | mask = PUD_MASK; | |
554 | size = PUD_SIZE; | |
555 | break; | |
556 | default: | |
557 | BUILD_BUG(); | |
558 | break; | |
559 | } | |
560 | ||
561 | next = (old_addr + size) & mask; | |
562 | /* even if next overflowed, extent below will be ok */ | |
e05986ee KS |
563 | extent = next - old_addr; |
564 | if (extent > old_end - old_addr) | |
565 | extent = old_end - old_addr; | |
c49dd340 KS |
566 | next = (new_addr + size) & mask; |
567 | if (extent > next - new_addr) | |
568 | extent = next - new_addr; | |
569 | return extent; | |
570 | } | |
571 | ||
664dc4da LS |
572 | /* |
573 | * Should move_pgt_entry() acquire the rmap locks? This is either expressed in | |
574 | * the PMC, or overridden in the case of normal, larger page tables. | |
575 | */ | |
576 | static bool should_take_rmap_locks(struct pagetable_move_control *pmc, | |
577 | enum pgt_entry entry) | |
578 | { | |
579 | switch (entry) { | |
580 | case NORMAL_PMD: | |
581 | case NORMAL_PUD: | |
582 | return true; | |
583 | default: | |
584 | return pmc->need_rmap_locks; | |
585 | } | |
586 | } | |
587 | ||
c49dd340 KS |
588 | /* |
589 | * Attempts to speedup the move by moving entry at the level corresponding to | |
590 | * pgt_entry. Returns true if the move was successful, else false. | |
591 | */ | |
664dc4da LS |
592 | static bool move_pgt_entry(struct pagetable_move_control *pmc, |
593 | enum pgt_entry entry, void *old_entry, void *new_entry) | |
c49dd340 KS |
594 | { |
595 | bool moved = false; | |
664dc4da | 596 | bool need_rmap_locks = should_take_rmap_locks(pmc, entry); |
c49dd340 KS |
597 | |
598 | /* See comment in move_ptes() */ | |
599 | if (need_rmap_locks) | |
664dc4da | 600 | take_rmap_locks(pmc->old); |
c49dd340 KS |
601 | |
602 | switch (entry) { | |
603 | case NORMAL_PMD: | |
664dc4da | 604 | moved = move_normal_pmd(pmc, old_entry, new_entry); |
c49dd340 KS |
605 | break; |
606 | case NORMAL_PUD: | |
664dc4da | 607 | moved = move_normal_pud(pmc, old_entry, new_entry); |
c49dd340 KS |
608 | break; |
609 | case HPAGE_PMD: | |
610 | moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && | |
664dc4da | 611 | move_huge_pmd(pmc->old, pmc->old_addr, pmc->new_addr, old_entry, |
c49dd340 KS |
612 | new_entry); |
613 | break; | |
7d846db7 AK |
614 | case HPAGE_PUD: |
615 | moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && | |
664dc4da | 616 | move_huge_pud(pmc, old_entry, new_entry); |
7d846db7 AK |
617 | break; |
618 | ||
c49dd340 KS |
619 | default: |
620 | WARN_ON_ONCE(1); | |
621 | break; | |
622 | } | |
623 | ||
624 | if (need_rmap_locks) | |
664dc4da | 625 | drop_rmap_locks(pmc->old); |
c49dd340 KS |
626 | |
627 | return moved; | |
628 | } | |
629 | ||
af8ca1c1 | 630 | /* |
b1e5a3de JFG |
631 | * A helper to check if aligning down is OK. The aligned address should fall |
632 | * on *no mapping*. For the stack moving down, that's a special move within | |
633 | * the VMA that is created to span the source and destination of the move, | |
634 | * so we make an exception for it. | |
af8ca1c1 | 635 | */ |
2a4077f4 LS |
636 | static bool can_align_down(struct pagetable_move_control *pmc, |
637 | struct vm_area_struct *vma, unsigned long addr_to_align, | |
638 | unsigned long mask) | |
af8ca1c1 JFG |
639 | { |
640 | unsigned long addr_masked = addr_to_align & mask; | |
641 | ||
642 | /* | |
643 | * If @addr_to_align of either source or destination is not the beginning | |
644 | * of the corresponding VMA, we can't align down or we will destroy part | |
645 | * of the current mapping. | |
646 | */ | |
2a4077f4 | 647 | if (!pmc->for_stack && vma->vm_start != addr_to_align) |
af8ca1c1 JFG |
648 | return false; |
649 | ||
b1e5a3de | 650 | /* In the stack case we explicitly permit in-VMA alignment. */ |
2a4077f4 | 651 | if (pmc->for_stack && addr_masked >= vma->vm_start) |
b1e5a3de JFG |
652 | return true; |
653 | ||
af8ca1c1 JFG |
654 | /* |
655 | * Make sure the realignment doesn't cause the address to fall on an | |
656 | * existing mapping. | |
657 | */ | |
658 | return find_vma_intersection(vma->vm_mm, addr_masked, vma->vm_start) == NULL; | |
659 | } | |
660 | ||
2a4077f4 LS |
661 | /* |
662 | * Determine if are in fact able to realign for efficiency to a higher page | |
663 | * table boundary. | |
664 | */ | |
665 | static bool can_realign_addr(struct pagetable_move_control *pmc, | |
666 | unsigned long pagetable_mask) | |
af8ca1c1 | 667 | { |
2a4077f4 LS |
668 | unsigned long align_mask = ~pagetable_mask; |
669 | unsigned long old_align = pmc->old_addr & align_mask; | |
670 | unsigned long new_align = pmc->new_addr & align_mask; | |
671 | unsigned long pagetable_size = align_mask + 1; | |
672 | unsigned long old_align_next = pagetable_size - old_align; | |
673 | ||
674 | /* | |
675 | * We don't want to have to go hunting for VMAs from the end of the old | |
676 | * VMA to the next page table boundary, also we want to make sure the | |
677 | * operation is wortwhile. | |
678 | * | |
679 | * So ensure that we only perform this realignment if the end of the | |
680 | * range being copied reaches or crosses the page table boundary. | |
681 | * | |
682 | * boundary boundary | |
683 | * .<- old_align -> . | |
684 | * . |----------------.-----------| | |
685 | * . | vma . | | |
686 | * . |----------------.-----------| | |
687 | * . <----------------.-----------> | |
688 | * . len_in | |
689 | * <-------------------------------> | |
690 | * . pagetable_size . | |
691 | * . <----------------> | |
692 | * . old_align_next . | |
693 | */ | |
694 | if (pmc->len_in < old_align_next) | |
695 | return false; | |
696 | ||
af8ca1c1 | 697 | /* Skip if the addresses are already aligned. */ |
2a4077f4 LS |
698 | if (old_align == 0) |
699 | return false; | |
af8ca1c1 JFG |
700 | |
701 | /* Only realign if the new and old addresses are mutually aligned. */ | |
2a4077f4 LS |
702 | if (old_align != new_align) |
703 | return false; | |
af8ca1c1 JFG |
704 | |
705 | /* Ensure realignment doesn't cause overlap with existing mappings. */ | |
2a4077f4 LS |
706 | if (!can_align_down(pmc, pmc->old, pmc->old_addr, pagetable_mask) || |
707 | !can_align_down(pmc, pmc->new, pmc->new_addr, pagetable_mask)) | |
708 | return false; | |
709 | ||
710 | return true; | |
711 | } | |
712 | ||
713 | /* | |
714 | * Opportunistically realign to specified boundary for faster copy. | |
715 | * | |
716 | * Consider an mremap() of a VMA with page table boundaries as below, and no | |
717 | * preceding VMAs from the lower page table boundary to the start of the VMA, | |
718 | * with the end of the range reaching or crossing the page table boundary. | |
719 | * | |
720 | * boundary boundary | |
721 | * . |----------------.-----------| | |
722 | * . | vma . | | |
723 | * . |----------------.-----------| | |
724 | * . pmc->old_addr . pmc->old_end | |
725 | * . <----------------------------> | |
726 | * . move these page tables | |
727 | * | |
728 | * If we proceed with moving page tables in this scenario, we will have a lot of | |
729 | * work to do traversing old page tables and establishing new ones in the | |
730 | * destination across multiple lower level page tables. | |
731 | * | |
732 | * The idea here is simply to align pmc->old_addr, pmc->new_addr down to the | |
733 | * page table boundary, so we can simply copy a single page table entry for the | |
734 | * aligned portion of the VMA instead: | |
735 | * | |
736 | * boundary boundary | |
737 | * . |----------------.-----------| | |
738 | * . | vma . | | |
739 | * . |----------------.-----------| | |
740 | * pmc->old_addr . pmc->old_end | |
741 | * <-------------------------------------------> | |
742 | * . move these page tables | |
743 | */ | |
744 | static void try_realign_addr(struct pagetable_move_control *pmc, | |
745 | unsigned long pagetable_mask) | |
746 | { | |
747 | ||
748 | if (!can_realign_addr(pmc, pagetable_mask)) | |
af8ca1c1 JFG |
749 | return; |
750 | ||
2a4077f4 LS |
751 | /* |
752 | * Simply align to page table boundaries. Note that we do NOT update the | |
753 | * pmc->old_end value, and since the move_page_tables() operation spans | |
754 | * from [old_addr, old_end) (offsetting new_addr as it is performed), | |
755 | * this simply changes the start of the copy, not the end. | |
756 | */ | |
757 | pmc->old_addr &= pagetable_mask; | |
758 | pmc->new_addr &= pagetable_mask; | |
af8ca1c1 JFG |
759 | } |
760 | ||
664dc4da LS |
761 | /* Is the page table move operation done? */ |
762 | static bool pmc_done(struct pagetable_move_control *pmc) | |
763 | { | |
764 | return pmc->old_addr >= pmc->old_end; | |
765 | } | |
766 | ||
767 | /* Advance to the next page table, offset by extent bytes. */ | |
768 | static void pmc_next(struct pagetable_move_control *pmc, unsigned long extent) | |
769 | { | |
770 | pmc->old_addr += extent; | |
771 | pmc->new_addr += extent; | |
772 | } | |
773 | ||
774 | /* | |
775 | * Determine how many bytes in the specified input range have had their page | |
776 | * tables moved so far. | |
777 | */ | |
778 | static unsigned long pmc_progress(struct pagetable_move_control *pmc) | |
779 | { | |
780 | unsigned long orig_old_addr = pmc->old_end - pmc->len_in; | |
781 | unsigned long old_addr = pmc->old_addr; | |
782 | ||
783 | /* | |
784 | * Prevent negative return values when {old,new}_addr was realigned but | |
785 | * we broke out of the loop in move_page_tables() for the first PMD | |
786 | * itself. | |
787 | */ | |
788 | return old_addr < orig_old_addr ? 0 : old_addr - orig_old_addr; | |
789 | } | |
790 | ||
2a4077f4 | 791 | unsigned long move_page_tables(struct pagetable_move_control *pmc) |
1da177e4 | 792 | { |
664dc4da | 793 | unsigned long extent; |
ac46d4f3 | 794 | struct mmu_notifier_range range; |
7be7a546 | 795 | pmd_t *old_pmd, *new_pmd; |
7d846db7 | 796 | pud_t *old_pud, *new_pud; |
664dc4da | 797 | struct mm_struct *mm = pmc->old->vm_mm; |
1da177e4 | 798 | |
2a4077f4 | 799 | if (!pmc->len_in) |
01e67e04 PB |
800 | return 0; |
801 | ||
664dc4da | 802 | if (is_vm_hugetlb_page(pmc->old)) |
2a4077f4 LS |
803 | return move_hugetlb_page_tables(pmc->old, pmc->new, pmc->old_addr, |
804 | pmc->new_addr, pmc->len_in); | |
550a7d60 | 805 | |
af8ca1c1 JFG |
806 | /* |
807 | * If possible, realign addresses to PMD boundary for faster copy. | |
808 | * Only realign if the mremap copying hits a PMD boundary. | |
809 | */ | |
2a4077f4 | 810 | try_realign_addr(pmc, PMD_MASK); |
af8ca1c1 | 811 | |
664dc4da LS |
812 | flush_cache_range(pmc->old, pmc->old_addr, pmc->old_end); |
813 | mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, mm, | |
814 | pmc->old_addr, pmc->old_end); | |
ac46d4f3 | 815 | mmu_notifier_invalidate_range_start(&range); |
7b6efc2b | 816 | |
664dc4da | 817 | for (; !pmc_done(pmc); pmc_next(pmc, extent)) { |
1da177e4 | 818 | cond_resched(); |
c49dd340 KS |
819 | /* |
820 | * If extent is PUD-sized try to speed up the move by moving at the | |
821 | * PUD level if possible. | |
822 | */ | |
664dc4da | 823 | extent = get_extent(NORMAL_PUD, pmc); |
c49dd340 | 824 | |
664dc4da | 825 | old_pud = get_old_pud(mm, pmc->old_addr); |
7d846db7 AK |
826 | if (!old_pud) |
827 | continue; | |
664dc4da | 828 | new_pud = alloc_new_pud(mm, pmc->new_addr); |
7d846db7 AK |
829 | if (!new_pud) |
830 | break; | |
8a6a984c | 831 | if (pud_trans_huge(*old_pud)) { |
7d846db7 | 832 | if (extent == HPAGE_PUD_SIZE) { |
664dc4da | 833 | move_pgt_entry(pmc, HPAGE_PUD, old_pud, new_pud); |
7d846db7 | 834 | /* We ignore and continue on error? */ |
c49dd340 | 835 | continue; |
7d846db7 AK |
836 | } |
837 | } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { | |
664dc4da | 838 | if (move_pgt_entry(pmc, NORMAL_PUD, old_pud, new_pud)) |
c49dd340 KS |
839 | continue; |
840 | } | |
841 | ||
664dc4da LS |
842 | extent = get_extent(NORMAL_PMD, pmc); |
843 | old_pmd = get_old_pmd(mm, pmc->old_addr); | |
7be7a546 HD |
844 | if (!old_pmd) |
845 | continue; | |
664dc4da | 846 | new_pmd = alloc_new_pmd(mm, pmc->new_addr); |
7be7a546 HD |
847 | if (!new_pmd) |
848 | break; | |
a5be621e | 849 | again: |
8a6a984c | 850 | if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) { |
c49dd340 | 851 | if (extent == HPAGE_PMD_SIZE && |
664dc4da | 852 | move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd)) |
c49dd340 | 853 | continue; |
664dc4da | 854 | split_huge_pmd(pmc->old, old_pmd, pmc->old_addr); |
c49dd340 KS |
855 | } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && |
856 | extent == PMD_SIZE) { | |
2c91bd4a JFG |
857 | /* |
858 | * If the extent is PMD-sized, try to speed the move by | |
859 | * moving at the PMD level if possible. | |
860 | */ | |
664dc4da | 861 | if (move_pgt_entry(pmc, NORMAL_PMD, old_pmd, new_pmd)) |
2c91bd4a | 862 | continue; |
37a1c49a | 863 | } |
a5be621e HD |
864 | if (pmd_none(*old_pmd)) |
865 | continue; | |
2a4077f4 | 866 | if (pte_alloc(pmc->new->vm_mm, new_pmd)) |
37a1c49a | 867 | break; |
664dc4da | 868 | if (move_ptes(pmc, extent, old_pmd, new_pmd) < 0) |
a5be621e | 869 | goto again; |
1da177e4 | 870 | } |
7b6efc2b | 871 | |
ac46d4f3 | 872 | mmu_notifier_invalidate_range_end(&range); |
7be7a546 | 873 | |
664dc4da | 874 | return pmc_progress(pmc); |
1da177e4 LT |
875 | } |
876 | ||
221bf5ca LS |
877 | /* Set vrm->delta to the difference in VMA size specified by user. */ |
878 | static void vrm_set_delta(struct vma_remap_struct *vrm) | |
879 | { | |
880 | vrm->delta = abs_diff(vrm->old_len, vrm->new_len); | |
881 | } | |
882 | ||
883 | /* Determine what kind of remap this is - shrink, expand or no resize at all. */ | |
884 | static enum mremap_type vrm_remap_type(struct vma_remap_struct *vrm) | |
885 | { | |
886 | if (vrm->delta == 0) | |
887 | return MREMAP_NO_RESIZE; | |
888 | ||
889 | if (vrm->old_len > vrm->new_len) | |
890 | return MREMAP_SHRINK; | |
891 | ||
892 | return MREMAP_EXPAND; | |
893 | } | |
894 | ||
895 | /* | |
896 | * When moving a VMA to vrm->new_adr, does this result in the new and old VMAs | |
897 | * overlapping? | |
898 | */ | |
899 | static bool vrm_overlaps(struct vma_remap_struct *vrm) | |
900 | { | |
901 | unsigned long start_old = vrm->addr; | |
902 | unsigned long start_new = vrm->new_addr; | |
903 | unsigned long end_old = vrm->addr + vrm->old_len; | |
904 | unsigned long end_new = vrm->new_addr + vrm->new_len; | |
905 | ||
906 | /* | |
907 | * start_old end_old | |
908 | * |-----------| | |
909 | * | | | |
910 | * |-----------| | |
911 | * |-------------| | |
912 | * | | | |
913 | * |-------------| | |
914 | * start_new end_new | |
915 | */ | |
916 | if (end_old > start_new && end_new > start_old) | |
917 | return true; | |
918 | ||
919 | return false; | |
920 | } | |
921 | ||
000c0691 LS |
922 | /* |
923 | * Will a new address definitely be assigned? This either if the user specifies | |
924 | * it via MREMAP_FIXED, or if MREMAP_DONTUNMAP is used, indicating we will | |
925 | * always detemrine a target address. | |
926 | */ | |
221bf5ca LS |
927 | static bool vrm_implies_new_addr(struct vma_remap_struct *vrm) |
928 | { | |
929 | return vrm->flags & (MREMAP_FIXED | MREMAP_DONTUNMAP); | |
930 | } | |
931 | ||
932 | /* | |
933 | * Find an unmapped area for the requested vrm->new_addr. | |
934 | * | |
935 | * If MREMAP_FIXED then this is equivalent to a MAP_FIXED mmap() call. If only | |
936 | * MREMAP_DONTUNMAP is set, then this is equivalent to providing a hint to | |
937 | * mmap(), otherwise this is equivalent to mmap() specifying a NULL address. | |
938 | * | |
939 | * Returns 0 on success (with vrm->new_addr updated), or an error code upon | |
940 | * failure. | |
941 | */ | |
942 | static unsigned long vrm_set_new_addr(struct vma_remap_struct *vrm) | |
943 | { | |
944 | struct vm_area_struct *vma = vrm->vma; | |
945 | unsigned long map_flags = 0; | |
946 | /* Page Offset _into_ the VMA. */ | |
947 | pgoff_t internal_pgoff = (vrm->addr - vma->vm_start) >> PAGE_SHIFT; | |
948 | pgoff_t pgoff = vma->vm_pgoff + internal_pgoff; | |
949 | unsigned long new_addr = vrm_implies_new_addr(vrm) ? vrm->new_addr : 0; | |
950 | unsigned long res; | |
951 | ||
952 | if (vrm->flags & MREMAP_FIXED) | |
953 | map_flags |= MAP_FIXED; | |
954 | if (vma->vm_flags & VM_MAYSHARE) | |
955 | map_flags |= MAP_SHARED; | |
956 | ||
957 | res = get_unmapped_area(vma->vm_file, new_addr, vrm->new_len, pgoff, | |
958 | map_flags); | |
959 | if (IS_ERR_VALUE(res)) | |
960 | return res; | |
961 | ||
962 | vrm->new_addr = res; | |
963 | return 0; | |
964 | } | |
965 | ||
d5c8aec0 LS |
966 | /* |
967 | * Keep track of pages which have been added to the memory mapping. If the VMA | |
968 | * is accounted, also check to see if there is sufficient memory. | |
969 | * | |
970 | * Returns true on success, false if insufficient memory to charge. | |
971 | */ | |
000c0691 | 972 | static bool vrm_calc_charge(struct vma_remap_struct *vrm) |
1da177e4 | 973 | { |
d5c8aec0 LS |
974 | unsigned long charged; |
975 | ||
976 | if (!(vrm->vma->vm_flags & VM_ACCOUNT)) | |
977 | return true; | |
978 | ||
979 | /* | |
980 | * If we don't unmap the old mapping, then we account the entirety of | |
981 | * the length of the new one. Otherwise it's just the delta in size. | |
982 | */ | |
983 | if (vrm->flags & MREMAP_DONTUNMAP) | |
984 | charged = vrm->new_len >> PAGE_SHIFT; | |
985 | else | |
986 | charged = vrm->delta >> PAGE_SHIFT; | |
987 | ||
988 | ||
989 | /* This accounts 'charged' pages of memory. */ | |
990 | if (security_vm_enough_memory_mm(current->mm, charged)) | |
991 | return false; | |
992 | ||
993 | vrm->charged = charged; | |
994 | return true; | |
995 | } | |
996 | ||
997 | /* | |
998 | * an error has occurred so we will not be using vrm->charged memory. Unaccount | |
999 | * this memory if the VMA is accounted. | |
1000 | */ | |
1001 | static void vrm_uncharge(struct vma_remap_struct *vrm) | |
1002 | { | |
1003 | if (!(vrm->vma->vm_flags & VM_ACCOUNT)) | |
1004 | return; | |
1005 | ||
1006 | vm_unacct_memory(vrm->charged); | |
1007 | vrm->charged = 0; | |
1008 | } | |
1009 | ||
1010 | /* | |
1011 | * Update mm exec_vm, stack_vm, data_vm, and locked_vm fields as needed to | |
1012 | * account for 'bytes' memory used, and if locked, indicate this in the VRM so | |
1013 | * we can handle this correctly later. | |
1014 | */ | |
1015 | static void vrm_stat_account(struct vma_remap_struct *vrm, | |
1016 | unsigned long bytes) | |
1017 | { | |
1018 | unsigned long pages = bytes >> PAGE_SHIFT; | |
1019 | struct mm_struct *mm = current->mm; | |
1020 | struct vm_area_struct *vma = vrm->vma; | |
1021 | ||
1022 | vm_stat_account(mm, vma->vm_flags, pages); | |
2cf442d7 | 1023 | if (vma->vm_flags & VM_LOCKED) |
d5c8aec0 | 1024 | mm->locked_vm += pages; |
d5c8aec0 LS |
1025 | } |
1026 | ||
1027 | /* | |
1028 | * Perform checks before attempting to write a VMA prior to it being | |
1029 | * moved. | |
1030 | */ | |
b714ccb0 | 1031 | static unsigned long prep_move_vma(struct vma_remap_struct *vrm) |
d5c8aec0 LS |
1032 | { |
1033 | unsigned long err = 0; | |
1034 | struct vm_area_struct *vma = vrm->vma; | |
1035 | unsigned long old_addr = vrm->addr; | |
1036 | unsigned long old_len = vrm->old_len; | |
bfbe7110 | 1037 | vm_flags_t dummy = vma->vm_flags; |
1da177e4 LT |
1038 | |
1039 | /* | |
1040 | * We'd prefer to avoid failure later on in do_munmap: | |
1041 | * which may split one vma into three before unmapping. | |
1042 | */ | |
d5c8aec0 | 1043 | if (current->mm->map_count >= sysctl_max_map_count - 3) |
1da177e4 LT |
1044 | return -ENOMEM; |
1045 | ||
73d5e062 DS |
1046 | if (vma->vm_ops && vma->vm_ops->may_split) { |
1047 | if (vma->vm_start != old_addr) | |
1048 | err = vma->vm_ops->may_split(vma, old_addr); | |
1049 | if (!err && vma->vm_end != old_addr + old_len) | |
1050 | err = vma->vm_ops->may_split(vma, old_addr + old_len); | |
1051 | if (err) | |
1052 | return err; | |
1053 | } | |
1054 | ||
1ff82995 HD |
1055 | /* |
1056 | * Advise KSM to break any KSM pages in the area to be moved: | |
1057 | * it would be confusing if they were to turn up at the new | |
1058 | * location, where they happen to coincide with different KSM | |
1059 | * pages recently unmapped. But leave vma->vm_flags as it was, | |
1060 | * so KSM can come around to merge on vma and new_vma afterwards. | |
1061 | */ | |
7103ad32 | 1062 | err = ksm_madvise(vma, old_addr, old_addr + old_len, |
b714ccb0 | 1063 | MADV_UNMERGEABLE, &dummy); |
7103ad32 HD |
1064 | if (err) |
1065 | return err; | |
1ff82995 | 1066 | |
d5c8aec0 LS |
1067 | return 0; |
1068 | } | |
1069 | ||
b714ccb0 LS |
1070 | /* |
1071 | * Unmap source VMA for VMA move, turning it from a copy to a move, being | |
1072 | * careful to ensure we do not underflow memory account while doing so if an | |
1073 | * accountable move. | |
1074 | * | |
1075 | * This is best effort, if we fail to unmap then we simply try to correct | |
1076 | * accounting and exit. | |
1077 | */ | |
1078 | static void unmap_source_vma(struct vma_remap_struct *vrm) | |
d5c8aec0 LS |
1079 | { |
1080 | struct mm_struct *mm = current->mm; | |
b714ccb0 LS |
1081 | unsigned long addr = vrm->addr; |
1082 | unsigned long len = vrm->old_len; | |
d5c8aec0 | 1083 | struct vm_area_struct *vma = vrm->vma; |
b714ccb0 | 1084 | VMA_ITERATOR(vmi, mm, addr); |
d5c8aec0 | 1085 | int err; |
b714ccb0 LS |
1086 | unsigned long vm_start; |
1087 | unsigned long vm_end; | |
1088 | /* | |
1089 | * It might seem odd that we check for MREMAP_DONTUNMAP here, given this | |
1090 | * function implies that we unmap the original VMA, which seems | |
1091 | * contradictory. | |
1092 | * | |
1093 | * However, this occurs when this operation was attempted and an error | |
1094 | * arose, in which case we _do_ wish to unmap the _new_ VMA, which means | |
1095 | * we actually _do_ want it be unaccounted. | |
1096 | */ | |
1097 | bool accountable_move = (vma->vm_flags & VM_ACCOUNT) && | |
1098 | !(vrm->flags & MREMAP_DONTUNMAP); | |
d5c8aec0 | 1099 | |
b714ccb0 LS |
1100 | /* |
1101 | * So we perform a trick here to prevent incorrect accounting. Any merge | |
1102 | * or new VMA allocation performed in copy_vma() does not adjust | |
1103 | * accounting, it is expected that callers handle this. | |
1104 | * | |
1105 | * And indeed we already have, accounting appropriately in the case of | |
1106 | * both in vrm_charge(). | |
1107 | * | |
1108 | * However, when we unmap the existing VMA (to effect the move), this | |
1109 | * code will, if the VMA has VM_ACCOUNT set, attempt to unaccount | |
1110 | * removed pages. | |
1111 | * | |
1112 | * To avoid this we temporarily clear this flag, reinstating on any | |
1113 | * portions of the original VMA that remain. | |
1114 | */ | |
1115 | if (accountable_move) { | |
1116 | vm_flags_clear(vma, VM_ACCOUNT); | |
1117 | /* We are about to split vma, so store the start/end. */ | |
1118 | vm_start = vma->vm_start; | |
1119 | vm_end = vma->vm_end; | |
1120 | } | |
d5c8aec0 | 1121 | |
b714ccb0 LS |
1122 | err = do_vmi_munmap(&vmi, mm, addr, len, vrm->uf_unmap, /* unlock= */false); |
1123 | vrm->vma = NULL; /* Invalidated. */ | |
d23cb648 | 1124 | vrm->vmi_needs_invalidate = true; |
b714ccb0 LS |
1125 | if (err) { |
1126 | /* OOM: unable to split vma, just get accounts right */ | |
1127 | vm_acct_memory(len >> PAGE_SHIFT); | |
1128 | return; | |
1129 | } | |
1130 | ||
1131 | /* | |
1132 | * If we mremap() from a VMA like this: | |
1133 | * | |
1134 | * addr end | |
1135 | * | | | |
1136 | * v v | |
1137 | * |-------------| | |
1138 | * | | | |
1139 | * |-------------| | |
1140 | * | |
1141 | * Having cleared VM_ACCOUNT from the whole VMA, after we unmap above | |
1142 | * we'll end up with: | |
1143 | * | |
1144 | * addr end | |
1145 | * | | | |
1146 | * v v | |
1147 | * |---| |---| | |
1148 | * | A | | B | | |
1149 | * |---| |---| | |
1150 | * | |
1151 | * The VMI is still pointing at addr, so vma_prev() will give us A, and | |
1152 | * a subsequent or lone vma_next() will give as B. | |
1153 | * | |
1154 | * do_vmi_munmap() will have restored the VMI back to addr. | |
1155 | */ | |
1156 | if (accountable_move) { | |
1157 | unsigned long end = addr + len; | |
ad8ee77e | 1158 | |
b714ccb0 LS |
1159 | if (vm_start < addr) { |
1160 | struct vm_area_struct *prev = vma_prev(&vmi); | |
1161 | ||
1162 | vm_flags_set(prev, VM_ACCOUNT); /* Acquires VMA lock. */ | |
1163 | } | |
1164 | ||
1165 | if (vm_end > end) { | |
1166 | struct vm_area_struct *next = vma_next(&vmi); | |
1167 | ||
1168 | vm_flags_set(next, VM_ACCOUNT); /* Acquires VMA lock. */ | |
1169 | } | |
1170 | } | |
1171 | } | |
1172 | ||
1173 | /* | |
1174 | * Copy vrm->vma over to vrm->new_addr possibly adjusting size as part of the | |
1175 | * process. Additionally handle an error occurring on moving of page tables, | |
1176 | * where we reset vrm state to cause unmapping of the new VMA. | |
1177 | * | |
1178 | * Outputs the newly installed VMA to new_vma_ptr. Returns 0 on success or an | |
1179 | * error code. | |
1180 | */ | |
1181 | static int copy_vma_and_data(struct vma_remap_struct *vrm, | |
1182 | struct vm_area_struct **new_vma_ptr) | |
1183 | { | |
1184 | unsigned long internal_offset = vrm->addr - vrm->vma->vm_start; | |
1185 | unsigned long internal_pgoff = internal_offset >> PAGE_SHIFT; | |
1186 | unsigned long new_pgoff = vrm->vma->vm_pgoff + internal_pgoff; | |
1187 | unsigned long moved_len; | |
2a4077f4 | 1188 | struct vm_area_struct *vma = vrm->vma; |
b714ccb0 LS |
1189 | struct vm_area_struct *new_vma; |
1190 | int err = 0; | |
2a4077f4 | 1191 | PAGETABLE_MOVE(pmc, NULL, NULL, vrm->addr, vrm->new_addr, vrm->old_len); |
b714ccb0 | 1192 | |
2a4077f4 LS |
1193 | new_vma = copy_vma(&vma, vrm->new_addr, vrm->new_len, new_pgoff, |
1194 | &pmc.need_rmap_locks); | |
ad8ee77e | 1195 | if (!new_vma) { |
d5c8aec0 | 1196 | vrm_uncharge(vrm); |
b714ccb0 | 1197 | *new_vma_ptr = NULL; |
1da177e4 | 1198 | return -ENOMEM; |
ad8ee77e | 1199 | } |
d23cb648 LS |
1200 | /* By merging, we may have invalidated any iterator in use. */ |
1201 | if (vma != vrm->vma) | |
1202 | vrm->vmi_needs_invalidate = true; | |
1203 | ||
2a4077f4 LS |
1204 | vrm->vma = vma; |
1205 | pmc.old = vma; | |
1206 | pmc.new = new_vma; | |
1da177e4 | 1207 | |
2a4077f4 | 1208 | moved_len = move_page_tables(&pmc); |
b714ccb0 | 1209 | if (moved_len < vrm->old_len) |
df1eab30 | 1210 | err = -ENOMEM; |
b714ccb0 | 1211 | else if (vma->vm_ops && vma->vm_ops->mremap) |
14d07113 | 1212 | err = vma->vm_ops->mremap(new_vma); |
df1eab30 ON |
1213 | |
1214 | if (unlikely(err)) { | |
2a4077f4 LS |
1215 | PAGETABLE_MOVE(pmc_revert, new_vma, vma, vrm->new_addr, |
1216 | vrm->addr, moved_len); | |
1217 | ||
1da177e4 LT |
1218 | /* |
1219 | * On error, move entries back from new area to old, | |
1220 | * which will succeed since page tables still there, | |
1221 | * and then proceed to unmap new area instead of old. | |
1222 | */ | |
2a4077f4 LS |
1223 | pmc_revert.need_rmap_locks = true; |
1224 | move_page_tables(&pmc_revert); | |
1225 | ||
b714ccb0 LS |
1226 | vrm->vma = new_vma; |
1227 | vrm->old_len = vrm->new_len; | |
1228 | vrm->addr = vrm->new_addr; | |
4abad2ca | 1229 | } else { |
d5c8aec0 | 1230 | mremap_userfaultfd_prep(new_vma, vrm->uf); |
b2edffdd | 1231 | } |
1da177e4 | 1232 | |
ee40c992 | 1233 | fixup_hugetlb_reservations(vma); |
550a7d60 | 1234 | |
b714ccb0 LS |
1235 | *new_vma_ptr = new_vma; |
1236 | return err; | |
1237 | } | |
1238 | ||
1239 | /* | |
1240 | * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() and | |
1241 | * account flags on remaining VMA by convention (it cannot be mlock()'d any | |
1242 | * longer, as pages in range are no longer mapped), and removing anon_vma_chain | |
1243 | * links from it (if the entire VMA was copied over). | |
1244 | */ | |
1245 | static void dontunmap_complete(struct vma_remap_struct *vrm, | |
1246 | struct vm_area_struct *new_vma) | |
1247 | { | |
1248 | unsigned long start = vrm->addr; | |
1249 | unsigned long end = vrm->addr + vrm->old_len; | |
1250 | unsigned long old_start = vrm->vma->vm_start; | |
1251 | unsigned long old_end = vrm->vma->vm_end; | |
1252 | ||
1253 | /* | |
1254 | * We always clear VM_LOCKED[ONFAULT] | VM_ACCOUNT on the old | |
1255 | * vma. | |
1256 | */ | |
1257 | vm_flags_clear(vrm->vma, VM_LOCKED_MASK | VM_ACCOUNT); | |
1258 | ||
1259 | /* | |
1260 | * anon_vma links of the old vma is no longer needed after its page | |
1261 | * table has been moved. | |
1262 | */ | |
1263 | if (new_vma != vrm->vma && start == old_start && end == old_end) | |
1264 | unlink_anon_vmas(vrm->vma); | |
1265 | ||
1266 | /* Because we won't unmap we don't need to touch locked_vm. */ | |
1267 | } | |
1268 | ||
1269 | static unsigned long move_vma(struct vma_remap_struct *vrm) | |
1270 | { | |
1271 | struct mm_struct *mm = current->mm; | |
1272 | struct vm_area_struct *new_vma; | |
1273 | unsigned long hiwater_vm; | |
1274 | int err; | |
1275 | ||
1276 | err = prep_move_vma(vrm); | |
1277 | if (err) | |
1278 | return err; | |
1279 | ||
000c0691 LS |
1280 | /* |
1281 | * If accounted, determine the number of bytes the operation will | |
1282 | * charge. | |
1283 | */ | |
1284 | if (!vrm_calc_charge(vrm)) | |
b714ccb0 LS |
1285 | return -ENOMEM; |
1286 | ||
1287 | /* We don't want racing faults. */ | |
1288 | vma_start_write(vrm->vma); | |
1289 | ||
1290 | /* Perform copy step. */ | |
1291 | err = copy_vma_and_data(vrm, &new_vma); | |
1292 | /* | |
1293 | * If we established the copied-to VMA, we attempt to recover from the | |
1294 | * error by setting the destination VMA to the source VMA and unmapping | |
1295 | * it below. | |
1296 | */ | |
1297 | if (err && !new_vma) | |
1298 | return err; | |
1da177e4 | 1299 | |
71799062 | 1300 | /* |
365e9c87 HD |
1301 | * If we failed to move page tables we still do total_vm increment |
1302 | * since do_munmap() will decrement it by old_len == new_len. | |
1303 | * | |
1304 | * Since total_vm is about to be raised artificially high for a | |
1305 | * moment, we need to restore high watermark afterwards: if stats | |
1306 | * are taken meanwhile, total_vm and hiwater_vm appear too high. | |
1307 | * If this were a serious issue, we'd add a flag to do_munmap(). | |
71799062 | 1308 | */ |
365e9c87 | 1309 | hiwater_vm = mm->hiwater_vm; |
71799062 | 1310 | |
b714ccb0 LS |
1311 | vrm_stat_account(vrm, vrm->new_len); |
1312 | if (unlikely(!err && (vrm->flags & MREMAP_DONTUNMAP))) | |
1313 | dontunmap_complete(vrm, new_vma); | |
1314 | else | |
1315 | unmap_source_vma(vrm); | |
e346b381 | 1316 | |
365e9c87 | 1317 | mm->hiwater_vm = hiwater_vm; |
1da177e4 | 1318 | |
b714ccb0 | 1319 | return err ? (unsigned long)err : vrm->new_addr; |
1da177e4 LT |
1320 | } |
1321 | ||
4b6b0a51 | 1322 | /* |
221bf5ca LS |
1323 | * The user has requested that the VMA be shrunk (i.e., old_len > new_len), so |
1324 | * execute this, optionally dropping the mmap lock when we do so. | |
4b6b0a51 | 1325 | * |
221bf5ca LS |
1326 | * In both cases this invalidates the VMA, however if we don't drop the lock, |
1327 | * then load the correct VMA into vrm->vma afterwards. | |
1328 | */ | |
1329 | static unsigned long shrink_vma(struct vma_remap_struct *vrm, | |
1330 | bool drop_lock) | |
1331 | { | |
1332 | struct mm_struct *mm = current->mm; | |
1333 | unsigned long unmap_start = vrm->addr + vrm->new_len; | |
1334 | unsigned long unmap_bytes = vrm->delta; | |
1335 | unsigned long res; | |
1336 | VMA_ITERATOR(vmi, mm, unmap_start); | |
1337 | ||
1338 | VM_BUG_ON(vrm->remap_type != MREMAP_SHRINK); | |
1339 | ||
1340 | res = do_vmi_munmap(&vmi, mm, unmap_start, unmap_bytes, | |
1341 | vrm->uf_unmap, drop_lock); | |
1342 | vrm->vma = NULL; /* Invalidated. */ | |
1343 | if (res) | |
1344 | return res; | |
1345 | ||
1346 | /* | |
1347 | * If we've not dropped the lock, then we should reload the VMA to | |
1348 | * replace the invalidated VMA with the one that may have now been | |
1349 | * split. | |
1350 | */ | |
1351 | if (drop_lock) { | |
1352 | vrm->mmap_locked = false; | |
1353 | } else { | |
1354 | vrm->vma = vma_lookup(mm, vrm->addr); | |
1355 | if (!vrm->vma) | |
1356 | return -EFAULT; | |
1357 | } | |
1358 | ||
1359 | return 0; | |
1360 | } | |
1361 | ||
1362 | /* | |
1363 | * mremap_to() - remap a vma to a new location. | |
4b6b0a51 LH |
1364 | * Returns: The new address of the vma or an error. |
1365 | */ | |
221bf5ca | 1366 | static unsigned long mremap_to(struct vma_remap_struct *vrm) |
ecc1a899 AV |
1367 | { |
1368 | struct mm_struct *mm = current->mm; | |
221bf5ca | 1369 | unsigned long err; |
ecc1a899 | 1370 | |
221bf5ca | 1371 | if (vrm->flags & MREMAP_FIXED) { |
8be7258a JX |
1372 | /* |
1373 | * In mremap_to(). | |
1374 | * VMA is moved to dst address, and munmap dst first. | |
1375 | * do_munmap will check if dst is sealed. | |
1376 | */ | |
221bf5ca LS |
1377 | err = do_munmap(mm, vrm->new_addr, vrm->new_len, |
1378 | vrm->uf_unmap_early); | |
1379 | vrm->vma = NULL; /* Invalidated. */ | |
d23cb648 | 1380 | vrm->vmi_needs_invalidate = true; |
221bf5ca LS |
1381 | if (err) |
1382 | return err; | |
ecc1a899 | 1383 | |
221bf5ca LS |
1384 | /* |
1385 | * If we remap a portion of a VMA elsewhere in the same VMA, | |
1386 | * this can invalidate the old VMA. Reset. | |
1387 | */ | |
1388 | vrm->vma = vma_lookup(mm, vrm->addr); | |
1389 | if (!vrm->vma) | |
1390 | return -EFAULT; | |
ecc1a899 AV |
1391 | } |
1392 | ||
221bf5ca LS |
1393 | if (vrm->remap_type == MREMAP_SHRINK) { |
1394 | err = shrink_vma(vrm, /* drop_lock= */false); | |
1395 | if (err) | |
1396 | return err; | |
ecc1a899 | 1397 | |
221bf5ca LS |
1398 | /* Set up for the move now shrink has been executed. */ |
1399 | vrm->old_len = vrm->new_len; | |
e346b381 BG |
1400 | } |
1401 | ||
221bf5ca LS |
1402 | /* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */ |
1403 | if (vrm->flags & MREMAP_DONTUNMAP) { | |
1404 | vm_flags_t vm_flags = vrm->vma->vm_flags; | |
1405 | unsigned long pages = vrm->old_len >> PAGE_SHIFT; | |
9206de95 | 1406 | |
221bf5ca LS |
1407 | if (!may_expand_vm(mm, vm_flags, pages)) |
1408 | return -ENOMEM; | |
1409 | } | |
097eed10 | 1410 | |
221bf5ca LS |
1411 | err = vrm_set_new_addr(vrm); |
1412 | if (err) | |
1413 | return err; | |
e346b381 | 1414 | |
d5c8aec0 | 1415 | return move_vma(vrm); |
ecc1a899 AV |
1416 | } |
1417 | ||
1a0ef85f AV |
1418 | static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) |
1419 | { | |
f106af4e | 1420 | unsigned long end = vma->vm_end + delta; |
396a44cc | 1421 | |
9206de95 | 1422 | if (end < vma->vm_end) /* overflow */ |
f106af4e | 1423 | return 0; |
396a44cc | 1424 | if (find_vma_intersection(vma->vm_mm, vma->vm_end, end)) |
f106af4e AV |
1425 | return 0; |
1426 | if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start, | |
1427 | 0, MAP_FIXED) & ~PAGE_MASK) | |
1a0ef85f | 1428 | return 0; |
1a0ef85f AV |
1429 | return 1; |
1430 | } | |
1431 | ||
221bf5ca LS |
1432 | /* Determine whether we are actually able to execute an in-place expansion. */ |
1433 | static bool vrm_can_expand_in_place(struct vma_remap_struct *vrm) | |
85ea6bdd | 1434 | { |
221bf5ca LS |
1435 | /* Number of bytes from vrm->addr to end of VMA. */ |
1436 | unsigned long suffix_bytes = vrm->vma->vm_end - vrm->addr; | |
1437 | ||
1438 | /* If end of range aligns to end of VMA, we can just expand in-place. */ | |
1439 | if (suffix_bytes != vrm->old_len) | |
1440 | return false; | |
1441 | ||
1442 | /* Check whether this is feasible. */ | |
1443 | if (!vma_expandable(vrm->vma, vrm->delta)) | |
1444 | return false; | |
1445 | ||
1446 | return true; | |
85ea6bdd LS |
1447 | } |
1448 | ||
85ea6bdd LS |
1449 | /* |
1450 | * We know we can expand the VMA in-place by delta pages, so do so. | |
1451 | * | |
1452 | * If we discover the VMA is locked, update mm_struct statistics accordingly and | |
1453 | * indicate so to the caller. | |
1454 | */ | |
221bf5ca | 1455 | static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm) |
85ea6bdd LS |
1456 | { |
1457 | struct mm_struct *mm = current->mm; | |
221bf5ca | 1458 | struct vm_area_struct *vma = vrm->vma; |
85ea6bdd | 1459 | VMA_ITERATOR(vmi, mm, vma->vm_end); |
85ea6bdd | 1460 | |
000c0691 | 1461 | if (!vrm_calc_charge(vrm)) |
d5c8aec0 | 1462 | return -ENOMEM; |
85ea6bdd LS |
1463 | |
1464 | /* | |
1465 | * Function vma_merge_extend() is called on the | |
1466 | * extension we are adding to the already existing vma, | |
1467 | * vma_merge_extend() will merge this extension with the | |
1468 | * already existing vma (expand operation itself) and | |
1469 | * possibly also with the next vma if it becomes | |
1470 | * adjacent to the expanded vma and otherwise | |
1471 | * compatible. | |
1472 | */ | |
36eed540 | 1473 | vma = vma_merge_extend(&vmi, vma, vrm->delta); |
85ea6bdd | 1474 | if (!vma) { |
d5c8aec0 | 1475 | vrm_uncharge(vrm); |
85ea6bdd LS |
1476 | return -ENOMEM; |
1477 | } | |
36eed540 | 1478 | vrm->vma = vma; |
85ea6bdd | 1479 | |
d5c8aec0 | 1480 | vrm_stat_account(vrm, vrm->delta); |
85ea6bdd LS |
1481 | |
1482 | return 0; | |
1483 | } | |
1484 | ||
221bf5ca | 1485 | static bool align_hugetlb(struct vma_remap_struct *vrm) |
85ea6bdd | 1486 | { |
221bf5ca | 1487 | struct hstate *h __maybe_unused = hstate_vma(vrm->vma); |
85ea6bdd | 1488 | |
221bf5ca LS |
1489 | vrm->old_len = ALIGN(vrm->old_len, huge_page_size(h)); |
1490 | vrm->new_len = ALIGN(vrm->new_len, huge_page_size(h)); | |
85ea6bdd LS |
1491 | |
1492 | /* addrs must be huge page aligned */ | |
221bf5ca | 1493 | if (vrm->addr & ~huge_page_mask(h)) |
85ea6bdd | 1494 | return false; |
221bf5ca | 1495 | if (vrm->new_addr & ~huge_page_mask(h)) |
85ea6bdd LS |
1496 | return false; |
1497 | ||
1498 | /* | |
1499 | * Don't allow remap expansion, because the underlying hugetlb | |
1500 | * reservation is not yet capable to handle split reservation. | |
1501 | */ | |
221bf5ca | 1502 | if (vrm->new_len > vrm->old_len) |
85ea6bdd LS |
1503 | return false; |
1504 | ||
85ea6bdd LS |
1505 | return true; |
1506 | } | |
1507 | ||
1508 | /* | |
1509 | * We are mremap()'ing without specifying a fixed address to move to, but are | |
1510 | * requesting that the VMA's size be increased. | |
1511 | * | |
1512 | * Try to do so in-place, if this fails, then move the VMA to a new location to | |
1513 | * action the change. | |
1514 | */ | |
221bf5ca | 1515 | static unsigned long expand_vma(struct vma_remap_struct *vrm) |
85ea6bdd LS |
1516 | { |
1517 | unsigned long err; | |
85ea6bdd | 1518 | |
85ea6bdd LS |
1519 | /* |
1520 | * [addr, old_len) spans precisely to the end of the VMA, so try to | |
1521 | * expand it in-place. | |
1522 | */ | |
221bf5ca LS |
1523 | if (vrm_can_expand_in_place(vrm)) { |
1524 | err = expand_vma_in_place(vrm); | |
1525 | if (err) | |
85ea6bdd LS |
1526 | return err; |
1527 | ||
85ea6bdd | 1528 | /* OK we're done! */ |
f256a7a4 | 1529 | return vrm->addr; |
85ea6bdd LS |
1530 | } |
1531 | ||
1532 | /* | |
1533 | * We weren't able to just expand or shrink the area, | |
1534 | * we need to create a new one and move it. | |
1535 | */ | |
1536 | ||
1537 | /* We're not allowed to move the VMA, so error out. */ | |
d5c8aec0 | 1538 | if (!(vrm->flags & MREMAP_MAYMOVE)) |
85ea6bdd LS |
1539 | return -ENOMEM; |
1540 | ||
1541 | /* Find a new location to move the VMA to. */ | |
221bf5ca LS |
1542 | err = vrm_set_new_addr(vrm); |
1543 | if (err) | |
1544 | return err; | |
85ea6bdd | 1545 | |
d5c8aec0 | 1546 | return move_vma(vrm); |
85ea6bdd LS |
1547 | } |
1548 | ||
1da177e4 | 1549 | /* |
221bf5ca LS |
1550 | * Attempt to resize the VMA in-place, if we cannot, then move the VMA to the |
1551 | * first available address to perform the operation. | |
1da177e4 | 1552 | */ |
221bf5ca LS |
1553 | static unsigned long mremap_at(struct vma_remap_struct *vrm) |
1554 | { | |
1555 | unsigned long res; | |
1556 | ||
1557 | switch (vrm->remap_type) { | |
1558 | case MREMAP_INVALID: | |
1559 | break; | |
1560 | case MREMAP_NO_RESIZE: | |
1561 | /* NO-OP CASE - resizing to the same size. */ | |
1562 | return vrm->addr; | |
1563 | case MREMAP_SHRINK: | |
1564 | /* | |
1565 | * SHRINK CASE. Can always be done in-place. | |
1566 | * | |
1567 | * Simply unmap the shrunken portion of the VMA. This does all | |
1568 | * the needed commit accounting, and we indicate that the mmap | |
1569 | * lock should be dropped. | |
1570 | */ | |
1571 | res = shrink_vma(vrm, /* drop_lock= */true); | |
1572 | if (res) | |
1573 | return res; | |
1574 | ||
1575 | return vrm->addr; | |
1576 | case MREMAP_EXPAND: | |
1577 | return expand_vma(vrm); | |
1578 | } | |
1579 | ||
000c0691 LS |
1580 | /* Should not be possible. */ |
1581 | WARN_ON_ONCE(1); | |
1582 | return -EINVAL; | |
221bf5ca LS |
1583 | } |
1584 | ||
a85dc371 LS |
1585 | /* |
1586 | * Will this operation result in the VMA being expanded or moved and thus need | |
1587 | * to map a new portion of virtual address space? | |
1588 | */ | |
1589 | static bool vrm_will_map_new(struct vma_remap_struct *vrm) | |
1590 | { | |
1591 | if (vrm->remap_type == MREMAP_EXPAND) | |
1592 | return true; | |
1593 | ||
1594 | if (vrm_implies_new_addr(vrm)) | |
1595 | return true; | |
1596 | ||
1597 | return false; | |
1598 | } | |
1599 | ||
d23cb648 LS |
1600 | /* Does this remap ONLY move mappings? */ |
1601 | static bool vrm_move_only(struct vma_remap_struct *vrm) | |
1602 | { | |
1603 | if (!(vrm->flags & MREMAP_FIXED)) | |
1604 | return false; | |
1605 | ||
1606 | if (vrm->old_len != vrm->new_len) | |
1607 | return false; | |
1608 | ||
1609 | return true; | |
1610 | } | |
1611 | ||
9b2301bf LS |
1612 | static void notify_uffd(struct vma_remap_struct *vrm, bool failed) |
1613 | { | |
1614 | struct mm_struct *mm = current->mm; | |
1615 | ||
1616 | /* Regardless of success/failure, we always notify of any unmaps. */ | |
1617 | userfaultfd_unmap_complete(mm, vrm->uf_unmap_early); | |
1618 | if (failed) | |
1619 | mremap_userfaultfd_fail(vrm->uf); | |
1620 | else | |
1621 | mremap_userfaultfd_complete(vrm->uf, vrm->addr, | |
1622 | vrm->new_addr, vrm->old_len); | |
1623 | userfaultfd_unmap_complete(mm, vrm->uf_unmap); | |
1624 | } | |
1625 | ||
d23cb648 LS |
1626 | static bool vma_multi_allowed(struct vm_area_struct *vma) |
1627 | { | |
7c91e0b9 | 1628 | struct file *file = vma->vm_file; |
d23cb648 LS |
1629 | |
1630 | /* | |
1631 | * We can't support moving multiple uffd VMAs as notify requires | |
1632 | * mmap lock to be dropped. | |
1633 | */ | |
1634 | if (userfaultfd_armed(vma)) | |
1635 | return false; | |
1636 | ||
1637 | /* | |
1638 | * Custom get unmapped area might result in MREMAP_FIXED not | |
1639 | * being obeyed. | |
1640 | */ | |
7c91e0b9 LS |
1641 | if (!file || !file->f_op->get_unmapped_area) |
1642 | return true; | |
1643 | /* Known good. */ | |
1644 | if (vma_is_shmem(vma)) | |
1645 | return true; | |
1646 | if (is_vm_hugetlb_page(vma)) | |
1647 | return true; | |
1648 | if (file->f_op->get_unmapped_area == thp_get_unmapped_area) | |
1649 | return true; | |
d23cb648 | 1650 | |
7c91e0b9 | 1651 | return false; |
d23cb648 LS |
1652 | } |
1653 | ||
f256a7a4 LS |
1654 | static int check_prep_vma(struct vma_remap_struct *vrm) |
1655 | { | |
1656 | struct vm_area_struct *vma = vrm->vma; | |
9b2301bf LS |
1657 | struct mm_struct *mm = current->mm; |
1658 | unsigned long addr = vrm->addr; | |
1659 | unsigned long old_len, new_len, pgoff; | |
f256a7a4 LS |
1660 | |
1661 | if (!vma) | |
1662 | return -EFAULT; | |
1663 | ||
1664 | /* If mseal()'d, mremap() is prohibited. */ | |
8b291416 | 1665 | if (vma_is_sealed(vma)) |
f256a7a4 LS |
1666 | return -EPERM; |
1667 | ||
1668 | /* Align to hugetlb page size, if required. */ | |
1669 | if (is_vm_hugetlb_page(vma) && !align_hugetlb(vrm)) | |
1670 | return -EINVAL; | |
1671 | ||
1672 | vrm_set_delta(vrm); | |
1673 | vrm->remap_type = vrm_remap_type(vrm); | |
1674 | /* For convenience, we set new_addr even if VMA won't move. */ | |
1675 | if (!vrm_implies_new_addr(vrm)) | |
9b2301bf LS |
1676 | vrm->new_addr = addr; |
1677 | ||
1678 | /* Below only meaningful if we expand or move a VMA. */ | |
1679 | if (!vrm_will_map_new(vrm)) | |
1680 | return 0; | |
1681 | ||
1682 | old_len = vrm->old_len; | |
1683 | new_len = vrm->new_len; | |
1684 | ||
1685 | /* | |
1686 | * !old_len is a special case where an attempt is made to 'duplicate' | |
1687 | * a mapping. This makes no sense for private mappings as it will | |
1688 | * instead create a fresh/new mapping unrelated to the original. This | |
1689 | * is contrary to the basic idea of mremap which creates new mappings | |
1690 | * based on the original. There are no known use cases for this | |
1691 | * behavior. As a result, fail such attempts. | |
1692 | */ | |
1693 | if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) { | |
1694 | pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", | |
1695 | current->comm, current->pid); | |
1696 | return -EINVAL; | |
1697 | } | |
1698 | ||
1699 | if ((vrm->flags & MREMAP_DONTUNMAP) && | |
1700 | (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))) | |
1701 | return -EINVAL; | |
1702 | ||
1703 | /* | |
1704 | * We permit crossing of boundaries for the range being unmapped due to | |
1705 | * a shrink. | |
1706 | */ | |
1707 | if (vrm->remap_type == MREMAP_SHRINK) | |
1708 | old_len = new_len; | |
1709 | ||
d23cb648 LS |
1710 | /* |
1711 | * We can't remap across the end of VMAs, as another VMA may be | |
1712 | * adjacent: | |
1713 | * | |
1714 | * addr vma->vm_end | |
1715 | * |-----.----------| | |
1716 | * | . | | |
1717 | * |-----.----------| | |
1718 | * .<--------->xxx> | |
1719 | * old_len | |
1720 | * | |
1721 | * We also require that vma->vm_start <= addr < vma->vm_end. | |
1722 | */ | |
9b2301bf LS |
1723 | if (old_len > vma->vm_end - addr) |
1724 | return -EFAULT; | |
1725 | ||
1726 | if (new_len == old_len) | |
1727 | return 0; | |
1728 | ||
2cf442d7 LS |
1729 | /* We are expanding and the VMA is mlock()'d so we need to populate. */ |
1730 | if (vma->vm_flags & VM_LOCKED) | |
1731 | vrm->populate_expand = true; | |
1732 | ||
9b2301bf LS |
1733 | /* Need to be careful about a growing mapping */ |
1734 | pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; | |
1735 | pgoff += vma->vm_pgoff; | |
1736 | if (pgoff + (new_len >> PAGE_SHIFT) < pgoff) | |
1737 | return -EINVAL; | |
f256a7a4 | 1738 | |
9b2301bf LS |
1739 | if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) |
1740 | return -EFAULT; | |
1741 | ||
1742 | if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta)) | |
1743 | return -EAGAIN; | |
1744 | ||
1745 | if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT)) | |
1746 | return -ENOMEM; | |
a85dc371 | 1747 | |
f256a7a4 LS |
1748 | return 0; |
1749 | } | |
1750 | ||
9b2301bf LS |
1751 | /* |
1752 | * Are the parameters passed to mremap() valid? If so return 0, otherwise return | |
1753 | * error. | |
1754 | */ | |
1755 | static unsigned long check_mremap_params(struct vma_remap_struct *vrm) | |
1756 | ||
e49e76c2 | 1757 | { |
9b2301bf LS |
1758 | unsigned long addr = vrm->addr; |
1759 | unsigned long flags = vrm->flags; | |
e49e76c2 | 1760 | |
9b2301bf LS |
1761 | /* Ensure no unexpected flag values. */ |
1762 | if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) | |
1763 | return -EINVAL; | |
1764 | ||
1765 | /* Start address must be page-aligned. */ | |
1766 | if (offset_in_page(addr)) | |
1767 | return -EINVAL; | |
1768 | ||
1769 | /* | |
1770 | * We allow a zero old-len as a special case | |
1771 | * for DOS-emu "duplicate shm area" thing. But | |
1772 | * a zero new-len is nonsensical. | |
1773 | */ | |
1774 | if (!vrm->new_len) | |
1775 | return -EINVAL; | |
1776 | ||
78d2d32f CL |
1777 | /* Is the new length silly? */ |
1778 | if (vrm->new_len > TASK_SIZE) | |
9b2301bf LS |
1779 | return -EINVAL; |
1780 | ||
1781 | /* Remainder of checks are for cases with specific new_addr. */ | |
1782 | if (!vrm_implies_new_addr(vrm)) | |
1783 | return 0; | |
1784 | ||
78d2d32f CL |
1785 | /* Is the new address silly? */ |
1786 | if (vrm->new_addr > TASK_SIZE - vrm->new_len) | |
1787 | return -EINVAL; | |
1788 | ||
9b2301bf LS |
1789 | /* The new address must be page-aligned. */ |
1790 | if (offset_in_page(vrm->new_addr)) | |
1791 | return -EINVAL; | |
1792 | ||
1793 | /* A fixed address implies a move. */ | |
1794 | if (!(flags & MREMAP_MAYMOVE)) | |
1795 | return -EINVAL; | |
1796 | ||
1797 | /* MREMAP_DONTUNMAP does not allow resizing in the process. */ | |
1798 | if (flags & MREMAP_DONTUNMAP && vrm->old_len != vrm->new_len) | |
1799 | return -EINVAL; | |
1800 | ||
1801 | /* Target VMA must not overlap source VMA. */ | |
1802 | if (vrm_overlaps(vrm)) | |
1803 | return -EINVAL; | |
1804 | ||
1805 | /* | |
1806 | * move_vma() need us to stay 4 maps below the threshold, otherwise | |
1807 | * it will bail out at the very beginning. | |
1808 | * That is a problem if we have already unmaped the regions here | |
1809 | * (new_addr, and old_addr), because userspace will not know the | |
1810 | * state of the vma's after it gets -ENOMEM. | |
1811 | * So, to avoid such scenario we can pre-compute if the whole | |
1812 | * operation has high chances to success map-wise. | |
1813 | * Worst-scenario case is when both vma's (new_addr and old_addr) get | |
1814 | * split in 3 before unmapping it. | |
1815 | * That means 2 more maps (1 for each) to the ones we already hold. | |
1816 | * Check whether current map count plus 2 still leads us to 4 maps below | |
1817 | * the threshold, otherwise return -ENOMEM here to be more safe. | |
1818 | */ | |
1819 | if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3) | |
1820 | return -ENOMEM; | |
1821 | ||
1822 | return 0; | |
e49e76c2 LS |
1823 | } |
1824 | ||
d23cb648 LS |
1825 | static unsigned long remap_move(struct vma_remap_struct *vrm) |
1826 | { | |
1827 | struct vm_area_struct *vma; | |
1828 | unsigned long start = vrm->addr; | |
1829 | unsigned long end = vrm->addr + vrm->old_len; | |
1830 | unsigned long new_addr = vrm->new_addr; | |
d23cb648 LS |
1831 | unsigned long target_addr = new_addr; |
1832 | unsigned long res = -EFAULT; | |
1833 | unsigned long last_end; | |
d5f416c7 LS |
1834 | bool seen_vma = false; |
1835 | ||
d23cb648 LS |
1836 | VMA_ITERATOR(vmi, current->mm, start); |
1837 | ||
1838 | /* | |
1839 | * When moving VMAs we allow for batched moves across multiple VMAs, | |
1840 | * with all VMAs in the input range [addr, addr + old_len) being moved | |
1841 | * (and split as necessary). | |
1842 | */ | |
1843 | for_each_vma_range(vmi, vma, end) { | |
1844 | /* Account for start, end not aligned with VMA start, end. */ | |
1845 | unsigned long addr = max(vma->vm_start, start); | |
1846 | unsigned long len = min(end, vma->vm_end) - addr; | |
1847 | unsigned long offset, res_vma; | |
d5f416c7 | 1848 | bool multi_allowed; |
d23cb648 LS |
1849 | |
1850 | /* No gap permitted at the start of the range. */ | |
1851 | if (!seen_vma && start < vma->vm_start) | |
1852 | return -EFAULT; | |
1853 | ||
1854 | /* | |
1855 | * To sensibly move multiple VMAs, accounting for the fact that | |
1856 | * get_unmapped_area() may align even MAP_FIXED moves, we simply | |
1857 | * attempt to move such that the gaps between source VMAs remain | |
1858 | * consistent in destination VMAs, e.g.: | |
1859 | * | |
1860 | * X Y X Y | |
1861 | * <---> <-> <---> <-> | |
1862 | * |-------| |-----| |-----| |-------| |-----| |-----| | |
1863 | * | A | | B | | C | ---> | A' | | B' | | C' | | |
1864 | * |-------| |-----| |-----| |-------| |-----| |-----| | |
1865 | * new_addr | |
1866 | * | |
1867 | * So we map B' at A'->vm_end + X, and C' at B'->vm_end + Y. | |
1868 | */ | |
1869 | offset = seen_vma ? vma->vm_start - last_end : 0; | |
1870 | last_end = vma->vm_end; | |
1871 | ||
1872 | vrm->vma = vma; | |
1873 | vrm->addr = addr; | |
1874 | vrm->new_addr = target_addr + offset; | |
1875 | vrm->old_len = vrm->new_len = len; | |
1876 | ||
d5f416c7 LS |
1877 | multi_allowed = vma_multi_allowed(vma); |
1878 | if (!multi_allowed) { | |
1879 | /* This is not the first VMA, abort immediately. */ | |
1880 | if (seen_vma) | |
1881 | return -EFAULT; | |
1882 | /* This is the first, but there are more, abort. */ | |
1883 | if (vma->vm_end < end) | |
1884 | return -EFAULT; | |
1885 | } | |
d23cb648 LS |
1886 | |
1887 | res_vma = check_prep_vma(vrm); | |
1888 | if (!res_vma) | |
1889 | res_vma = mremap_to(vrm); | |
1890 | if (IS_ERR_VALUE(res_vma)) | |
1891 | return res_vma; | |
1892 | ||
1893 | if (!seen_vma) { | |
d5f416c7 | 1894 | VM_WARN_ON_ONCE(multi_allowed && res_vma != new_addr); |
d23cb648 LS |
1895 | res = res_vma; |
1896 | } | |
1897 | ||
1898 | /* mmap lock is only dropped on shrink. */ | |
1899 | VM_WARN_ON_ONCE(!vrm->mmap_locked); | |
1900 | /* This is a move, no expand should occur. */ | |
1901 | VM_WARN_ON_ONCE(vrm->populate_expand); | |
1902 | ||
1903 | if (vrm->vmi_needs_invalidate) { | |
1904 | vma_iter_invalidate(&vmi); | |
1905 | vrm->vmi_needs_invalidate = false; | |
1906 | } | |
1907 | seen_vma = true; | |
1908 | target_addr = res_vma + vrm->new_len; | |
1909 | } | |
1910 | ||
1911 | return res; | |
1912 | } | |
1913 | ||
221bf5ca | 1914 | static unsigned long do_mremap(struct vma_remap_struct *vrm) |
1da177e4 | 1915 | { |
d0de32d9 | 1916 | struct mm_struct *mm = current->mm; |
000c0691 | 1917 | unsigned long res; |
f9f11398 | 1918 | bool failed; |
057d3389 | 1919 | |
3215eace LS |
1920 | vrm->old_len = PAGE_ALIGN(vrm->old_len); |
1921 | vrm->new_len = PAGE_ALIGN(vrm->new_len); | |
1922 | ||
000c0691 LS |
1923 | res = check_mremap_params(vrm); |
1924 | if (res) | |
1925 | return res; | |
1da177e4 | 1926 | |
85ea6bdd | 1927 | if (mmap_write_lock_killable(mm)) |
dc0ef0df | 1928 | return -EINTR; |
221bf5ca | 1929 | vrm->mmap_locked = true; |
85ea6bdd | 1930 | |
d23cb648 LS |
1931 | if (vrm_move_only(vrm)) { |
1932 | res = remap_move(vrm); | |
1933 | } else { | |
1934 | vrm->vma = vma_lookup(current->mm, vrm->addr); | |
1935 | res = check_prep_vma(vrm); | |
1936 | if (res) | |
1937 | goto out; | |
85a06835 | 1938 | |
d23cb648 LS |
1939 | /* Actually execute mremap. */ |
1940 | res = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm); | |
1941 | } | |
85ea6bdd | 1942 | |
221bf5ca | 1943 | out: |
f9f11398 LS |
1944 | failed = IS_ERR_VALUE(res); |
1945 | ||
e49e76c2 | 1946 | if (vrm->mmap_locked) |
221bf5ca | 1947 | mmap_write_unlock(mm); |
1da177e4 | 1948 | |
2cf442d7 LS |
1949 | /* VMA mlock'd + was expanded, so populated expanded region. */ |
1950 | if (!failed && vrm->populate_expand) | |
e49e76c2 | 1951 | mm_populate(vrm->new_addr + vrm->old_len, vrm->delta); |
1da177e4 | 1952 | |
f9f11398 | 1953 | notify_uffd(vrm, failed); |
000c0691 | 1954 | return res; |
1da177e4 | 1955 | } |
221bf5ca LS |
1956 | |
1957 | /* | |
1958 | * Expand (or shrink) an existing mapping, potentially moving it at the | |
1959 | * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) | |
1960 | * | |
1961 | * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise | |
1962 | * This option implies MREMAP_MAYMOVE. | |
1963 | */ | |
1964 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, | |
1965 | unsigned long, new_len, unsigned long, flags, | |
1966 | unsigned long, new_addr) | |
1967 | { | |
1968 | struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; | |
1969 | LIST_HEAD(uf_unmap_early); | |
1970 | LIST_HEAD(uf_unmap); | |
1971 | /* | |
1972 | * There is a deliberate asymmetry here: we strip the pointer tag | |
1973 | * from the old address but leave the new address alone. This is | |
1974 | * for consistency with mmap(), where we prevent the creation of | |
1975 | * aliasing mappings in userspace by leaving the tag bits of the | |
1976 | * mapping address intact. A non-zero tag will cause the subsequent | |
1977 | * range checks to reject the address as invalid. | |
1978 | * | |
1979 | * See Documentation/arch/arm64/tagged-address-abi.rst for more | |
1980 | * information. | |
1981 | */ | |
1982 | struct vma_remap_struct vrm = { | |
1983 | .addr = untagged_addr(addr), | |
1984 | .old_len = old_len, | |
1985 | .new_len = new_len, | |
1986 | .flags = flags, | |
1987 | .new_addr = new_addr, | |
1988 | ||
1989 | .uf = &uf, | |
1990 | .uf_unmap_early = &uf_unmap_early, | |
1991 | .uf_unmap = &uf_unmap, | |
1992 | ||
1993 | .remap_type = MREMAP_INVALID, /* We set later. */ | |
1994 | }; | |
1995 | ||
1996 | return do_mremap(&vrm); | |
1997 | } |