arm64: mmu: drop paging_init comments
[linux-2.6-block.git] / arch / arm64 / mm / mmu.c
CommitLineData
c1cc1552
CM
1/*
2 * Based on arch/arm/mm/mmu.c
3 *
4 * Copyright (C) 1995-2005 Russell King
5 * Copyright (C) 2012 ARM Ltd.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
5a9e3e15 20#include <linux/cache.h>
c1cc1552
CM
21#include <linux/export.h>
22#include <linux/kernel.h>
23#include <linux/errno.h>
24#include <linux/init.h>
98d2e153
TA
25#include <linux/ioport.h>
26#include <linux/kexec.h>
61bd93ce 27#include <linux/libfdt.h>
c1cc1552
CM
28#include <linux/mman.h>
29#include <linux/nodemask.h>
30#include <linux/memblock.h>
31#include <linux/fs.h>
2475ff9d 32#include <linux/io.h>
2077be67 33#include <linux/mm.h>
6efd8499 34#include <linux/vmalloc.h>
c1cc1552 35
21ab99c2 36#include <asm/barrier.h>
c1cc1552 37#include <asm/cputype.h>
af86e597 38#include <asm/fixmap.h>
068a17a5 39#include <asm/kasan.h>
b433dce0 40#include <asm/kernel-pgtable.h>
c1cc1552
CM
41#include <asm/sections.h>
42#include <asm/setup.h>
43#include <asm/sizes.h>
44#include <asm/tlb.h>
45#include <asm/mmu_context.h>
1404d6f1 46#include <asm/ptdump.h>
ec28bb9c 47#include <asm/tlbflush.h>
c1cc1552 48
c0951366 49#define NO_BLOCK_MAPPINGS BIT(0)
d27cfa1f 50#define NO_CONT_MAPPINGS BIT(1)
c0951366 51
dd006da2 52u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
fa2a8445 53u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
67e7fdfc 54u64 vabits_user __ro_after_init;
4a1daf29 55EXPORT_SYMBOL(vabits_user);
dd006da2 56
5a9e3e15 57u64 kimage_voffset __ro_after_init;
a7f8de16
AB
58EXPORT_SYMBOL(kimage_voffset);
59
c1cc1552
CM
60/*
61 * Empty_zero_page is a special page that is used for zero-initialized data
62 * and COW.
63 */
5227cfa7 64unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
c1cc1552
CM
65EXPORT_SYMBOL(empty_zero_page);
66
f9040773
AB
67static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
68static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
69static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
70
2330b7ca
JY
71static DEFINE_SPINLOCK(swapper_pgdir_lock);
72
73void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
74{
75 pgd_t *fixmap_pgdp;
76
77 spin_lock(&swapper_pgdir_lock);
26a6f87e 78 fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
2330b7ca
JY
79 WRITE_ONCE(*fixmap_pgdp, pgd);
80 /*
81 * We need dsb(ishst) here to ensure the page-table-walker sees
82 * our new entry before set_p?d() returns. The fixmap's
83 * flush_tlb_kernel_range() via clear_fixmap() does this for us.
84 */
85 pgd_clear_fixmap();
86 spin_unlock(&swapper_pgdir_lock);
87}
88
c1cc1552
CM
89pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
90 unsigned long size, pgprot_t vma_prot)
91{
92 if (!pfn_valid(pfn))
93 return pgprot_noncached(vma_prot);
94 else if (file->f_flags & O_SYNC)
95 return pgprot_writecombine(vma_prot);
96 return vma_prot;
97}
98EXPORT_SYMBOL(phys_mem_access_prot);
99
f4710445 100static phys_addr_t __init early_pgtable_alloc(void)
c1cc1552 101{
7142392d
SP
102 phys_addr_t phys;
103 void *ptr;
104
9a8dd708 105 phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
f4710445
MR
106
107 /*
108 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
109 * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
110 * any level of table.
111 */
112 ptr = pte_set_fixmap(phys);
113
21ab99c2
MR
114 memset(ptr, 0, PAGE_SIZE);
115
f4710445
MR
116 /*
117 * Implicit barriers also ensure the zeroed page is visible to the page
118 * table walker
119 */
120 pte_clear_fixmap();
121
122 return phys;
c1cc1552
CM
123}
124
e98216b5
AB
125static bool pgattr_change_is_safe(u64 old, u64 new)
126{
127 /*
128 * The following mapping attributes may be updated in live
129 * kernel mappings without the need for break-before-make.
130 */
753e8abc 131 static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
e98216b5 132
141d1497
AB
133 /* creating or taking down mappings is always safe */
134 if (old == 0 || new == 0)
135 return true;
136
137 /* live contiguous mappings may not be manipulated at all */
138 if ((old | new) & PTE_CONT)
139 return false;
140
753e8abc
AB
141 /* Transitioning from Non-Global to Global is unsafe */
142 if (old & ~new & PTE_NG)
143 return false;
4e602056 144
141d1497 145 return ((old ^ new) & ~mask) == 0;
e98216b5
AB
146}
147
20a004e7 148static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
d27cfa1f 149 phys_addr_t phys, pgprot_t prot)
c1cc1552 150{
20a004e7 151 pte_t *ptep;
c1cc1552 152
20a004e7 153 ptep = pte_set_fixmap_offset(pmdp, addr);
c1cc1552 154 do {
20a004e7 155 pte_t old_pte = READ_ONCE(*ptep);
e98216b5 156
20a004e7 157 set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
e98216b5
AB
158
159 /*
160 * After the PTE entry has been populated once, we
161 * only allow updates to the permission attributes.
162 */
20a004e7
WD
163 BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
164 READ_ONCE(pte_val(*ptep))));
e98216b5 165
e393cf40 166 phys += PAGE_SIZE;
20a004e7 167 } while (ptep++, addr += PAGE_SIZE, addr != end);
f4710445
MR
168
169 pte_clear_fixmap();
c1cc1552
CM
170}
171
20a004e7 172static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
d27cfa1f
AB
173 unsigned long end, phys_addr_t phys,
174 pgprot_t prot,
175 phys_addr_t (*pgtable_alloc)(void),
176 int flags)
c1cc1552 177{
c1cc1552 178 unsigned long next;
20a004e7 179 pmd_t pmd = READ_ONCE(*pmdp);
c1cc1552 180
20a004e7
WD
181 BUG_ON(pmd_sect(pmd));
182 if (pmd_none(pmd)) {
d27cfa1f 183 phys_addr_t pte_phys;
132233a7 184 BUG_ON(!pgtable_alloc);
d27cfa1f 185 pte_phys = pgtable_alloc();
20a004e7
WD
186 __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
187 pmd = READ_ONCE(*pmdp);
c1cc1552 188 }
20a004e7 189 BUG_ON(pmd_bad(pmd));
d27cfa1f
AB
190
191 do {
192 pgprot_t __prot = prot;
193
194 next = pte_cont_addr_end(addr, end);
195
196 /* use a contiguous mapping if the range is suitably aligned */
197 if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
198 (flags & NO_CONT_MAPPINGS) == 0)
199 __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
200
20a004e7 201 init_pte(pmdp, addr, next, phys, __prot);
d27cfa1f
AB
202
203 phys += next - addr;
204 } while (addr = next, addr != end);
205}
206
20a004e7 207static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
d27cfa1f
AB
208 phys_addr_t phys, pgprot_t prot,
209 phys_addr_t (*pgtable_alloc)(void), int flags)
210{
211 unsigned long next;
20a004e7 212 pmd_t *pmdp;
c1cc1552 213
20a004e7 214 pmdp = pmd_set_fixmap_offset(pudp, addr);
c1cc1552 215 do {
20a004e7 216 pmd_t old_pmd = READ_ONCE(*pmdp);
e98216b5 217
c1cc1552 218 next = pmd_addr_end(addr, end);
e98216b5 219
c1cc1552 220 /* try section mapping first */
83863f25 221 if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
c0951366 222 (flags & NO_BLOCK_MAPPINGS) == 0) {
20a004e7 223 pmd_set_huge(pmdp, phys, prot);
e98216b5 224
a55f9929 225 /*
e98216b5
AB
226 * After the PMD entry has been populated once, we
227 * only allow updates to the permission attributes.
a55f9929 228 */
e98216b5 229 BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
20a004e7 230 READ_ONCE(pmd_val(*pmdp))));
a55f9929 231 } else {
20a004e7 232 alloc_init_cont_pte(pmdp, addr, next, phys, prot,
d27cfa1f 233 pgtable_alloc, flags);
e98216b5
AB
234
235 BUG_ON(pmd_val(old_pmd) != 0 &&
20a004e7 236 pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
a55f9929 237 }
c1cc1552 238 phys += next - addr;
20a004e7 239 } while (pmdp++, addr = next, addr != end);
f4710445
MR
240
241 pmd_clear_fixmap();
c1cc1552
CM
242}
243
20a004e7 244static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
d27cfa1f
AB
245 unsigned long end, phys_addr_t phys,
246 pgprot_t prot,
247 phys_addr_t (*pgtable_alloc)(void), int flags)
248{
249 unsigned long next;
20a004e7 250 pud_t pud = READ_ONCE(*pudp);
d27cfa1f
AB
251
252 /*
253 * Check for initial section mappings in the pgd/pud.
254 */
20a004e7
WD
255 BUG_ON(pud_sect(pud));
256 if (pud_none(pud)) {
d27cfa1f
AB
257 phys_addr_t pmd_phys;
258 BUG_ON(!pgtable_alloc);
259 pmd_phys = pgtable_alloc();
20a004e7
WD
260 __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
261 pud = READ_ONCE(*pudp);
d27cfa1f 262 }
20a004e7 263 BUG_ON(pud_bad(pud));
d27cfa1f
AB
264
265 do {
266 pgprot_t __prot = prot;
267
268 next = pmd_cont_addr_end(addr, end);
269
270 /* use a contiguous mapping if the range is suitably aligned */
271 if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
272 (flags & NO_CONT_MAPPINGS) == 0)
273 __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
274
20a004e7 275 init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
d27cfa1f
AB
276
277 phys += next - addr;
278 } while (addr = next, addr != end);
279}
280
da141706
LA
281static inline bool use_1G_block(unsigned long addr, unsigned long next,
282 unsigned long phys)
283{
284 if (PAGE_SHIFT != 12)
285 return false;
286
287 if (((addr | next | phys) & ~PUD_MASK) != 0)
288 return false;
289
290 return true;
291}
292
20a004e7
WD
293static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
294 phys_addr_t phys, pgprot_t prot,
295 phys_addr_t (*pgtable_alloc)(void),
296 int flags)
c1cc1552 297{
c1cc1552 298 unsigned long next;
20a004e7
WD
299 pud_t *pudp;
300 pgd_t pgd = READ_ONCE(*pgdp);
c1cc1552 301
20a004e7 302 if (pgd_none(pgd)) {
132233a7
LA
303 phys_addr_t pud_phys;
304 BUG_ON(!pgtable_alloc);
305 pud_phys = pgtable_alloc();
20a004e7
WD
306 __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
307 pgd = READ_ONCE(*pgdp);
c79b954b 308 }
20a004e7 309 BUG_ON(pgd_bad(pgd));
c79b954b 310
20a004e7 311 pudp = pud_set_fixmap_offset(pgdp, addr);
c1cc1552 312 do {
20a004e7 313 pud_t old_pud = READ_ONCE(*pudp);
e98216b5 314
c1cc1552 315 next = pud_addr_end(addr, end);
206a2a73
SC
316
317 /*
318 * For 4K granule only, attempt to put down a 1GB block
319 */
c0951366
AB
320 if (use_1G_block(addr, next, phys) &&
321 (flags & NO_BLOCK_MAPPINGS) == 0) {
20a004e7 322 pud_set_huge(pudp, phys, prot);
206a2a73
SC
323
324 /*
e98216b5
AB
325 * After the PUD entry has been populated once, we
326 * only allow updates to the permission attributes.
206a2a73 327 */
e98216b5 328 BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
20a004e7 329 READ_ONCE(pud_val(*pudp))));
206a2a73 330 } else {
20a004e7 331 alloc_init_cont_pmd(pudp, addr, next, phys, prot,
d27cfa1f 332 pgtable_alloc, flags);
e98216b5
AB
333
334 BUG_ON(pud_val(old_pud) != 0 &&
20a004e7 335 pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
206a2a73 336 }
c1cc1552 337 phys += next - addr;
20a004e7 338 } while (pudp++, addr = next, addr != end);
f4710445
MR
339
340 pud_clear_fixmap();
c1cc1552
CM
341}
342
40f87d31
AB
343static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
344 unsigned long virt, phys_addr_t size,
345 pgprot_t prot,
346 phys_addr_t (*pgtable_alloc)(void),
c0951366 347 int flags)
c1cc1552
CM
348{
349 unsigned long addr, length, end, next;
20a004e7 350 pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
c1cc1552 351
cc5d2b3b
MR
352 /*
353 * If the virtual and physical address don't have the same offset
354 * within a page, we cannot map the region as the caller expects.
355 */
356 if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
357 return;
358
9c4e08a3 359 phys &= PAGE_MASK;
c1cc1552
CM
360 addr = virt & PAGE_MASK;
361 length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
362
c1cc1552
CM
363 end = addr + length;
364 do {
365 next = pgd_addr_end(addr, end);
20a004e7 366 alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
c0951366 367 flags);
c1cc1552 368 phys += next - addr;
20a004e7 369 } while (pgdp++, addr = next, addr != end);
c1cc1552
CM
370}
371
1378dc3d 372static phys_addr_t pgd_pgtable_alloc(void)
da141706 373{
21ab99c2 374 void *ptr = (void *)__get_free_page(PGALLOC_GFP);
1378dc3d
AB
375 if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
376 BUG();
21ab99c2
MR
377
378 /* Ensure the zeroed page is visible to the page table walker */
379 dsb(ishst);
f4710445 380 return __pa(ptr);
da141706
LA
381}
382
132233a7
LA
383/*
384 * This function can only be used to modify existing table entries,
385 * without allocating new levels of table. Note that this permits the
386 * creation of new section or page entries.
387 */
388static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
da141706 389 phys_addr_t size, pgprot_t prot)
d7ecbddf
MS
390{
391 if (virt < VMALLOC_START) {
392 pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
393 &phys, virt);
394 return;
395 }
d27cfa1f
AB
396 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
397 NO_CONT_MAPPINGS);
d7ecbddf
MS
398}
399
8ce837ce
AB
400void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
401 unsigned long virt, phys_addr_t size,
f14c66ce 402 pgprot_t prot, bool page_mappings_only)
8ce837ce 403{
c0951366
AB
404 int flags = 0;
405
1378dc3d
AB
406 BUG_ON(mm == &init_mm);
407
c0951366 408 if (page_mappings_only)
d27cfa1f 409 flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
c0951366 410
11509a30 411 __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
c0951366 412 pgd_pgtable_alloc, flags);
d7ecbddf
MS
413}
414
aa8c09be
AB
415static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
416 phys_addr_t size, pgprot_t prot)
da141706
LA
417{
418 if (virt < VMALLOC_START) {
aa8c09be 419 pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
da141706
LA
420 &phys, virt);
421 return;
422 }
423
d27cfa1f
AB
424 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
425 NO_CONT_MAPPINGS);
aa8c09be
AB
426
427 /* flush the TLBs after updating live kernel mappings */
428 flush_tlb_kernel_range(virt, virt + size);
da141706
LA
429}
430
20a004e7 431static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
98d2e153
TA
432 phys_addr_t end, pgprot_t prot, int flags)
433{
20a004e7 434 __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
98d2e153
TA
435 prot, early_pgtable_alloc, flags);
436}
437
438void __init mark_linear_text_alias_ro(void)
439{
440 /*
441 * Remove the write permissions from the linear alias of .text/.rodata
442 */
443 update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
444 (unsigned long)__init_begin - (unsigned long)_text,
445 PAGE_KERNEL_RO);
446}
447
20a004e7 448static void __init map_mem(pgd_t *pgdp)
da141706 449{
eac8017f
MC
450 phys_addr_t kernel_start = __pa_symbol(_text);
451 phys_addr_t kernel_end = __pa_symbol(__init_begin);
98d2e153 452 struct memblock_region *reg;
c0951366
AB
453 int flags = 0;
454
c55191e9 455 if (rodata_full || debug_pagealloc_enabled())
d27cfa1f 456 flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
068a17a5 457
da141706 458 /*
f9040773
AB
459 * Take care not to create a writable alias for the
460 * read-only text and rodata sections of the kernel image.
98d2e153
TA
461 * So temporarily mark them as NOMAP to skip mappings in
462 * the following for-loop
da141706 463 */
98d2e153
TA
464 memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
465#ifdef CONFIG_KEXEC_CORE
466 if (crashk_res.end)
467 memblock_mark_nomap(crashk_res.start,
468 resource_size(&crashk_res));
469#endif
068a17a5 470
98d2e153
TA
471 /* map all the memory banks */
472 for_each_memblock(memory, reg) {
473 phys_addr_t start = reg->base;
474 phys_addr_t end = start + reg->size;
da141706 475
98d2e153
TA
476 if (start >= end)
477 break;
478 if (memblock_is_nomap(reg))
479 continue;
480
20a004e7 481 __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
98d2e153 482 }
f9040773
AB
483
484 /*
5ea5306c
AB
485 * Map the linear alias of the [_text, __init_begin) interval
486 * as non-executable now, and remove the write permission in
487 * mark_linear_text_alias_ro() below (which will be called after
488 * alternative patching has completed). This makes the contents
489 * of the region accessible to subsystems such as hibernate,
490 * but protects it from inadvertent modification or execution.
d27cfa1f
AB
491 * Note that contiguous mappings cannot be remapped in this way,
492 * so we should avoid them here.
f9040773 493 */
20a004e7 494 __map_memblock(pgdp, kernel_start, kernel_end,
98d2e153
TA
495 PAGE_KERNEL, NO_CONT_MAPPINGS);
496 memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
da141706 497
98d2e153 498#ifdef CONFIG_KEXEC_CORE
5ea5306c 499 /*
98d2e153
TA
500 * Use page-level mappings here so that we can shrink the region
501 * in page granularity and put back unused memory to buddy system
502 * through /sys/kernel/kexec_crash_size interface.
5ea5306c 503 */
98d2e153 504 if (crashk_res.end) {
20a004e7 505 __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
98d2e153
TA
506 PAGE_KERNEL,
507 NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
508 memblock_clear_nomap(crashk_res.start,
509 resource_size(&crashk_res));
c1cc1552 510 }
98d2e153 511#endif
c1cc1552
CM
512}
513
da141706
LA
514void mark_rodata_ro(void)
515{
2f39b5f9 516 unsigned long section_size;
f9040773 517
2f39b5f9 518 /*
9fdc14c5
AB
519 * mark .rodata as read only. Use __init_begin rather than __end_rodata
520 * to cover NOTES and EXCEPTION_TABLE.
2f39b5f9 521 */
9fdc14c5 522 section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
aa8c09be 523 update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
2f39b5f9 524 section_size, PAGE_KERNEL_RO);
e98216b5 525
1404d6f1 526 debug_checkwx();
da141706 527}
da141706 528
20a004e7 529static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
d27cfa1f 530 pgprot_t prot, struct vm_struct *vma,
92bbd16e 531 int flags, unsigned long vm_flags)
068a17a5 532{
2077be67 533 phys_addr_t pa_start = __pa_symbol(va_start);
068a17a5
MR
534 unsigned long size = va_end - va_start;
535
536 BUG_ON(!PAGE_ALIGNED(pa_start));
537 BUG_ON(!PAGE_ALIGNED(size));
538
20a004e7 539 __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
d27cfa1f 540 early_pgtable_alloc, flags);
f9040773 541
92bbd16e
WD
542 if (!(vm_flags & VM_NO_GUARD))
543 size += PAGE_SIZE;
544
f9040773
AB
545 vma->addr = va_start;
546 vma->phys_addr = pa_start;
547 vma->size = size;
92bbd16e 548 vma->flags = VM_MAP | vm_flags;
f9040773
AB
549 vma->caller = __builtin_return_address(0);
550
551 vm_area_add_early(vma);
068a17a5
MR
552}
553
28b066da
AB
554static int __init parse_rodata(char *arg)
555{
c55191e9
AB
556 int ret = strtobool(arg, &rodata_enabled);
557 if (!ret) {
558 rodata_full = false;
559 return 0;
560 }
561
562 /* permit 'full' in addition to boolean options */
563 if (strcmp(arg, "full"))
564 return -EINVAL;
565
566 rodata_enabled = true;
567 rodata_full = true;
568 return 0;
28b066da
AB
569}
570early_param("rodata", parse_rodata);
571
51a0048b
WD
572#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
573static int __init map_entry_trampoline(void)
574{
51a0048b
WD
575 pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
576 phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
577
578 /* The trampoline is always mapped and can therefore be global */
579 pgprot_val(prot) &= ~PTE_NG;
580
581 /* Map only the text into the trampoline page table */
582 memset(tramp_pg_dir, 0, PGD_SIZE);
583 __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
584 prot, pgd_pgtable_alloc, 0);
585
6c27c408 586 /* Map both the text and data into the kernel page table */
51a0048b 587 __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
6c27c408
WD
588 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
589 extern char __entry_tramp_data_start[];
590
591 __set_fixmap(FIX_ENTRY_TRAMP_DATA,
592 __pa_symbol(__entry_tramp_data_start),
593 PAGE_KERNEL_RO);
594 }
595
51a0048b
WD
596 return 0;
597}
598core_initcall(map_entry_trampoline);
599#endif
600
068a17a5
MR
601/*
602 * Create fine-grained mappings for the kernel.
603 */
20a004e7 604static void __init map_kernel(pgd_t *pgdp)
068a17a5 605{
2ebe088b
AB
606 static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
607 vmlinux_initdata, vmlinux_data;
068a17a5 608
28b066da
AB
609 /*
610 * External debuggers may need to write directly to the text
611 * mapping to install SW breakpoints. Allow this (only) when
612 * explicitly requested with rodata=off.
613 */
614 pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
615
d27cfa1f
AB
616 /*
617 * Only rodata will be remapped with different permissions later on,
618 * all other segments are allowed to use contiguous mappings.
619 */
20a004e7 620 map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
92bbd16e 621 VM_NO_GUARD);
20a004e7 622 map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
92bbd16e 623 &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
20a004e7 624 map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
92bbd16e 625 &vmlinux_inittext, 0, VM_NO_GUARD);
20a004e7 626 map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
92bbd16e 627 &vmlinux_initdata, 0, VM_NO_GUARD);
20a004e7 628 map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
068a17a5 629
20a004e7 630 if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
f9040773
AB
631 /*
632 * The fixmap falls in a separate pgd to the kernel, and doesn't
633 * live in the carveout for the swapper_pg_dir. We can simply
634 * re-use the existing dir for the fixmap.
635 */
20a004e7
WD
636 set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
637 READ_ONCE(*pgd_offset_k(FIXADDR_START)));
f9040773
AB
638 } else if (CONFIG_PGTABLE_LEVELS > 3) {
639 /*
640 * The fixmap shares its top level pgd entry with the kernel
641 * mapping. This can really only occur when we are running
642 * with 16k/4 levels, so we can simply reuse the pud level
643 * entry instead.
644 */
645 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
20a004e7
WD
646 pud_populate(&init_mm,
647 pud_set_fixmap_offset(pgdp, FIXADDR_START),
19338304 648 lm_alias(bm_pmd));
f9040773
AB
649 pud_clear_fixmap();
650 } else {
651 BUG();
652 }
068a17a5 653
20a004e7 654 kasan_copy_shadow(pgdp);
068a17a5
MR
655}
656
c1cc1552
CM
657void __init paging_init(void)
658{
2330b7ca 659 pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
068a17a5 660
20a004e7
WD
661 map_kernel(pgdp);
662 map_mem(pgdp);
068a17a5 663
068a17a5 664 pgd_clear_fixmap();
068a17a5 665
2077be67 666 cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
2b5548b6 667 init_mm.pgd = swapper_pg_dir;
068a17a5 668
2b5548b6
JY
669 memblock_free(__pa_symbol(init_pg_dir),
670 __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
24cc61d8
AB
671
672 memblock_allow_resize();
c1cc1552
CM
673}
674
c1cc1552
CM
675/*
676 * Check whether a kernel address is valid (derived from arch/x86/).
677 */
678int kern_addr_valid(unsigned long addr)
679{
20a004e7
WD
680 pgd_t *pgdp;
681 pud_t *pudp, pud;
682 pmd_t *pmdp, pmd;
683 pte_t *ptep, pte;
c1cc1552
CM
684
685 if ((((long)addr) >> VA_BITS) != -1UL)
686 return 0;
687
20a004e7
WD
688 pgdp = pgd_offset_k(addr);
689 if (pgd_none(READ_ONCE(*pgdp)))
c1cc1552
CM
690 return 0;
691
20a004e7
WD
692 pudp = pud_offset(pgdp, addr);
693 pud = READ_ONCE(*pudp);
694 if (pud_none(pud))
c1cc1552
CM
695 return 0;
696
20a004e7
WD
697 if (pud_sect(pud))
698 return pfn_valid(pud_pfn(pud));
206a2a73 699
20a004e7
WD
700 pmdp = pmd_offset(pudp, addr);
701 pmd = READ_ONCE(*pmdp);
702 if (pmd_none(pmd))
c1cc1552
CM
703 return 0;
704
20a004e7
WD
705 if (pmd_sect(pmd))
706 return pfn_valid(pmd_pfn(pmd));
da6e4cb6 707
20a004e7
WD
708 ptep = pte_offset_kernel(pmdp, addr);
709 pte = READ_ONCE(*ptep);
710 if (pte_none(pte))
c1cc1552
CM
711 return 0;
712
20a004e7 713 return pfn_valid(pte_pfn(pte));
c1cc1552
CM
714}
715#ifdef CONFIG_SPARSEMEM_VMEMMAP
b433dce0 716#if !ARM64_SWAPPER_USES_SECTION_MAPS
7b73d978
CH
717int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
718 struct vmem_altmap *altmap)
c1cc1552 719{
0aad818b 720 return vmemmap_populate_basepages(start, end, node);
c1cc1552 721}
b433dce0 722#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
7b73d978
CH
723int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
724 struct vmem_altmap *altmap)
c1cc1552 725{
0aad818b 726 unsigned long addr = start;
c1cc1552 727 unsigned long next;
20a004e7
WD
728 pgd_t *pgdp;
729 pud_t *pudp;
730 pmd_t *pmdp;
c1cc1552
CM
731
732 do {
733 next = pmd_addr_end(addr, end);
734
20a004e7
WD
735 pgdp = vmemmap_pgd_populate(addr, node);
736 if (!pgdp)
c1cc1552
CM
737 return -ENOMEM;
738
20a004e7
WD
739 pudp = vmemmap_pud_populate(pgdp, addr, node);
740 if (!pudp)
c1cc1552
CM
741 return -ENOMEM;
742
20a004e7
WD
743 pmdp = pmd_offset(pudp, addr);
744 if (pmd_none(READ_ONCE(*pmdp))) {
c1cc1552
CM
745 void *p = NULL;
746
747 p = vmemmap_alloc_block_buf(PMD_SIZE, node);
748 if (!p)
749 return -ENOMEM;
750
20a004e7 751 pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
c1cc1552 752 } else
20a004e7 753 vmemmap_verify((pte_t *)pmdp, node, addr, next);
c1cc1552
CM
754 } while (addr = next, addr != end);
755
756 return 0;
757}
758#endif /* CONFIG_ARM64_64K_PAGES */
24b6d416
CH
759void vmemmap_free(unsigned long start, unsigned long end,
760 struct vmem_altmap *altmap)
0197518c
TC
761{
762}
c1cc1552 763#endif /* CONFIG_SPARSEMEM_VMEMMAP */
af86e597 764
af86e597
LA
765static inline pud_t * fixmap_pud(unsigned long addr)
766{
20a004e7
WD
767 pgd_t *pgdp = pgd_offset_k(addr);
768 pgd_t pgd = READ_ONCE(*pgdp);
af86e597 769
20a004e7 770 BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
af86e597 771
20a004e7 772 return pud_offset_kimg(pgdp, addr);
af86e597
LA
773}
774
775static inline pmd_t * fixmap_pmd(unsigned long addr)
776{
20a004e7
WD
777 pud_t *pudp = fixmap_pud(addr);
778 pud_t pud = READ_ONCE(*pudp);
af86e597 779
20a004e7 780 BUG_ON(pud_none(pud) || pud_bad(pud));
af86e597 781
20a004e7 782 return pmd_offset_kimg(pudp, addr);
af86e597
LA
783}
784
785static inline pte_t * fixmap_pte(unsigned long addr)
786{
157962f5 787 return &bm_pte[pte_index(addr)];
af86e597
LA
788}
789
2077be67
LA
790/*
791 * The p*d_populate functions call virt_to_phys implicitly so they can't be used
792 * directly on kernel symbols (bm_p*d). This function is called too early to use
793 * lm_alias so __p*d_populate functions must be used to populate with the
794 * physical address from __pa_symbol.
795 */
af86e597
LA
796void __init early_fixmap_init(void)
797{
20a004e7
WD
798 pgd_t *pgdp, pgd;
799 pud_t *pudp;
800 pmd_t *pmdp;
af86e597
LA
801 unsigned long addr = FIXADDR_START;
802
20a004e7
WD
803 pgdp = pgd_offset_k(addr);
804 pgd = READ_ONCE(*pgdp);
f80fb3a3 805 if (CONFIG_PGTABLE_LEVELS > 3 &&
20a004e7 806 !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
f9040773
AB
807 /*
808 * We only end up here if the kernel mapping and the fixmap
809 * share the top level pgd entry, which should only happen on
810 * 16k/4 levels configurations.
811 */
812 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
20a004e7 813 pudp = pud_offset_kimg(pgdp, addr);
f9040773 814 } else {
20a004e7
WD
815 if (pgd_none(pgd))
816 __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
817 pudp = fixmap_pud(addr);
f9040773 818 }
20a004e7
WD
819 if (pud_none(READ_ONCE(*pudp)))
820 __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
821 pmdp = fixmap_pmd(addr);
822 __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
af86e597
LA
823
824 /*
825 * The boot-ioremap range spans multiple pmds, for which
157962f5 826 * we are not prepared:
af86e597
LA
827 */
828 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
829 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
830
20a004e7
WD
831 if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
832 || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
af86e597 833 WARN_ON(1);
20a004e7
WD
834 pr_warn("pmdp %p != %p, %p\n",
835 pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
af86e597
LA
836 fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
837 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
838 fix_to_virt(FIX_BTMAP_BEGIN));
839 pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
840 fix_to_virt(FIX_BTMAP_END));
841
842 pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
843 pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
844 }
845}
846
18b4b276
JM
847/*
848 * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we
849 * ever need to use IPIs for TLB broadcasting, then we're in trouble here.
850 */
af86e597
LA
851void __set_fixmap(enum fixed_addresses idx,
852 phys_addr_t phys, pgprot_t flags)
853{
854 unsigned long addr = __fix_to_virt(idx);
20a004e7 855 pte_t *ptep;
af86e597 856
b63dbef9 857 BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
af86e597 858
20a004e7 859 ptep = fixmap_pte(addr);
af86e597
LA
860
861 if (pgprot_val(flags)) {
20a004e7 862 set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
af86e597 863 } else {
20a004e7 864 pte_clear(&init_mm, addr, ptep);
af86e597
LA
865 flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
866 }
867}
61bd93ce 868
f80fb3a3 869void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
61bd93ce
AB
870{
871 const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
f80fb3a3 872 int offset;
61bd93ce
AB
873 void *dt_virt;
874
875 /*
876 * Check whether the physical FDT address is set and meets the minimum
877 * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
04a84810
AB
878 * at least 8 bytes so that we can always access the magic and size
879 * fields of the FDT header after mapping the first chunk, double check
880 * here if that is indeed the case.
61bd93ce
AB
881 */
882 BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
883 if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
884 return NULL;
885
886 /*
887 * Make sure that the FDT region can be mapped without the need to
888 * allocate additional translation table pages, so that it is safe
132233a7 889 * to call create_mapping_noalloc() this early.
61bd93ce
AB
890 *
891 * On 64k pages, the FDT will be mapped using PTEs, so we need to
892 * be in the same PMD as the rest of the fixmap.
893 * On 4k pages, we'll use section mappings for the FDT so we only
894 * have to be in the same PUD.
895 */
896 BUILD_BUG_ON(dt_virt_base % SZ_2M);
897
b433dce0
SP
898 BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
899 __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
61bd93ce 900
b433dce0 901 offset = dt_phys % SWAPPER_BLOCK_SIZE;
61bd93ce
AB
902 dt_virt = (void *)dt_virt_base + offset;
903
904 /* map the first chunk so we can read the size from the header */
132233a7
LA
905 create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
906 dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
61bd93ce 907
04a84810 908 if (fdt_magic(dt_virt) != FDT_MAGIC)
61bd93ce
AB
909 return NULL;
910
f80fb3a3
AB
911 *size = fdt_totalsize(dt_virt);
912 if (*size > MAX_FDT_SIZE)
61bd93ce
AB
913 return NULL;
914
f80fb3a3 915 if (offset + *size > SWAPPER_BLOCK_SIZE)
132233a7 916 create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
f80fb3a3 917 round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
61bd93ce 918
f80fb3a3
AB
919 return dt_virt;
920}
61bd93ce 921
f80fb3a3
AB
922void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
923{
924 void *dt_virt;
925 int size;
926
927 dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
928 if (!dt_virt)
929 return NULL;
930
931 memblock_reserve(dt_phys, size);
61bd93ce
AB
932 return dt_virt;
933}
324420bf
AB
934
935int __init arch_ioremap_pud_supported(void)
936{
937 /* only 4k granule supports level 1 block mappings */
938 return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
939}
940
941int __init arch_ioremap_pmd_supported(void)
942{
943 return 1;
944}
945
20a004e7 946int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
324420bf 947{
19338304
KM
948 pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
949 pgprot_val(mk_sect_prot(prot)));
82034c23 950 pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
15122ee2 951
82034c23
LA
952 /* Only allow permission changes for now */
953 if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
954 pud_val(new_pud)))
15122ee2
WD
955 return 0;
956
324420bf 957 BUG_ON(phys & ~PUD_MASK);
82034c23 958 set_pud(pudp, new_pud);
324420bf
AB
959 return 1;
960}
961
20a004e7 962int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
324420bf 963{
19338304
KM
964 pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
965 pgprot_val(mk_sect_prot(prot)));
82034c23 966 pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
15122ee2 967
82034c23
LA
968 /* Only allow permission changes for now */
969 if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
970 pmd_val(new_pmd)))
15122ee2
WD
971 return 0;
972
324420bf 973 BUG_ON(phys & ~PMD_MASK);
82034c23 974 set_pmd(pmdp, new_pmd);
324420bf
AB
975 return 1;
976}
977
20a004e7 978int pud_clear_huge(pud_t *pudp)
324420bf 979{
20a004e7 980 if (!pud_sect(READ_ONCE(*pudp)))
324420bf 981 return 0;
20a004e7 982 pud_clear(pudp);
324420bf
AB
983 return 1;
984}
985
20a004e7 986int pmd_clear_huge(pmd_t *pmdp)
324420bf 987{
20a004e7 988 if (!pmd_sect(READ_ONCE(*pmdp)))
324420bf 989 return 0;
20a004e7 990 pmd_clear(pmdp);
324420bf
AB
991 return 1;
992}
b6bdb751 993
ec28bb9c 994int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
b6bdb751 995{
ec28bb9c
CP
996 pte_t *table;
997 pmd_t pmd;
998
999 pmd = READ_ONCE(*pmdp);
1000
fac880c7 1001 if (!pmd_table(pmd)) {
9c006972 1002 VM_WARN_ON(1);
ec28bb9c
CP
1003 return 1;
1004 }
1005
1006 table = pte_offset_kernel(pmdp, addr);
1007 pmd_clear(pmdp);
1008 __flush_tlb_kernel_pgtable(addr);
1009 pte_free_kernel(NULL, table);
1010 return 1;
b6bdb751
TK
1011}
1012
ec28bb9c 1013int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
b6bdb751 1014{
ec28bb9c
CP
1015 pmd_t *table;
1016 pmd_t *pmdp;
1017 pud_t pud;
1018 unsigned long next, end;
1019
1020 pud = READ_ONCE(*pudp);
1021
fac880c7 1022 if (!pud_table(pud)) {
9c006972 1023 VM_WARN_ON(1);
ec28bb9c
CP
1024 return 1;
1025 }
1026
1027 table = pmd_offset(pudp, addr);
1028 pmdp = table;
1029 next = addr;
1030 end = addr + PUD_SIZE;
1031 do {
1032 pmd_free_pte_page(pmdp, next);
1033 } while (pmdp++, next += PMD_SIZE, next != end);
1034
1035 pud_clear(pudp);
1036 __flush_tlb_kernel_pgtable(addr);
1037 pmd_free(NULL, table);
1038 return 1;
b6bdb751 1039}
4ab21506 1040
8e2d4340
WD
1041int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1042{
1043 return 0; /* Don't attempt a block mapping */
1044}
1045
4ab21506
RM
1046#ifdef CONFIG_MEMORY_HOTPLUG
1047int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
1048 bool want_memblock)
1049{
1050 int flags = 0;
1051
1052 if (rodata_full || debug_pagealloc_enabled())
1053 flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
1054
1055 __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
1056 size, PAGE_KERNEL, pgd_pgtable_alloc, flags);
1057
1058 return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
1059 altmap, want_memblock);
1060}
1061#endif