Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
8f6aac41 CL |
2 | /* |
3 | * Virtual Memory Map support | |
4 | * | |
cde53535 | 5 | * (C) 2007 sgi. Christoph Lameter. |
8f6aac41 CL |
6 | * |
7 | * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, | |
8 | * virt_to_page, page_address() to be implemented as a base offset | |
9 | * calculation without memory access. | |
10 | * | |
11 | * However, virtual mappings need a page table and TLBs. Many Linux | |
12 | * architectures already map their physical space using 1-1 mappings | |
b595076a | 13 | * via TLBs. For those arches the virtual memory map is essentially |
8f6aac41 CL |
14 | * for free if we use the same page size as the 1-1 mappings. In that |
15 | * case the overhead consists of a few additional pages that are | |
16 | * allocated to create a view of memory for vmemmap. | |
17 | * | |
29c71111 AW |
18 | * The architecture is expected to provide a vmemmap_populate() function |
19 | * to instantiate the mapping. | |
8f6aac41 CL |
20 | */ |
21 | #include <linux/mm.h> | |
22 | #include <linux/mmzone.h> | |
97ad1087 | 23 | #include <linux/memblock.h> |
4b94ffdc | 24 | #include <linux/memremap.h> |
8f6aac41 | 25 | #include <linux/highmem.h> |
5a0e3ad6 | 26 | #include <linux/slab.h> |
8f6aac41 CL |
27 | #include <linux/spinlock.h> |
28 | #include <linux/vmalloc.h> | |
8bca44bb | 29 | #include <linux/sched.h> |
f41f2ed4 | 30 | |
8f6aac41 CL |
31 | #include <asm/dma.h> |
32 | #include <asm/pgalloc.h> | |
9eb6207b FL |
33 | #include <asm/tlbflush.h> |
34 | ||
b1222550 FL |
35 | #include "hugetlb_vmemmap.h" |
36 | ||
9eb6207b FL |
37 | /* |
38 | * Flags for vmemmap_populate_range and friends. | |
39 | */ | |
40 | /* Get a ref on the head page struct page, for ZONE_DEVICE compound pages */ | |
41 | #define VMEMMAP_POPULATE_PAGEREF 0x0001 | |
ad2fa371 | 42 | |
b2aad24b GW |
43 | #include "internal.h" |
44 | ||
8f6aac41 CL |
45 | /* |
46 | * Allocate a block of memory to be used to back the virtual memory map | |
47 | * or to back the page tables that are used to create the mapping. | |
48 | * Uses the main allocators if they are available, else bootmem. | |
49 | */ | |
e0dc3a53 | 50 | |
bd721ea7 | 51 | static void * __ref __earlyonly_bootmem_alloc(int node, |
e0dc3a53 KH |
52 | unsigned long size, |
53 | unsigned long align, | |
54 | unsigned long goal) | |
55 | { | |
b2aad24b | 56 | return memmap_alloc(size, align, goal, node, false); |
e0dc3a53 KH |
57 | } |
58 | ||
8f6aac41 CL |
59 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) |
60 | { | |
61 | /* If the main allocator is up use that, fallback to bootmem. */ | |
62 | if (slab_is_available()) { | |
fcdaf842 MH |
63 | gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; |
64 | int order = get_order(size); | |
65 | static bool warned; | |
f52407ce SL |
66 | struct page *page; |
67 | ||
fcdaf842 | 68 | page = alloc_pages_node(node, gfp_mask, order); |
8f6aac41 CL |
69 | if (page) |
70 | return page_address(page); | |
fcdaf842 MH |
71 | |
72 | if (!warned) { | |
73 | warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL, | |
74 | "vmemmap alloc failure: order:%u", order); | |
75 | warned = true; | |
76 | } | |
8f6aac41 CL |
77 | return NULL; |
78 | } else | |
e0dc3a53 | 79 | return __earlyonly_bootmem_alloc(node, size, size, |
8f6aac41 CL |
80 | __pa(MAX_DMA_ADDRESS)); |
81 | } | |
82 | ||
56993b4e AK |
83 | static void * __meminit altmap_alloc_block_buf(unsigned long size, |
84 | struct vmem_altmap *altmap); | |
85 | ||
9bdac914 | 86 | /* need to make sure size is all the same during early stage */ |
56993b4e AK |
87 | void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node, |
88 | struct vmem_altmap *altmap) | |
9bdac914 | 89 | { |
56993b4e AK |
90 | void *ptr; |
91 | ||
92 | if (altmap) | |
93 | return altmap_alloc_block_buf(size, altmap); | |
9bdac914 | 94 | |
56993b4e | 95 | ptr = sparse_buffer_alloc(size); |
35fd1eb1 PT |
96 | if (!ptr) |
97 | ptr = vmemmap_alloc_block(size, node); | |
9bdac914 YL |
98 | return ptr; |
99 | } | |
100 | ||
4b94ffdc DW |
101 | static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap) |
102 | { | |
103 | return altmap->base_pfn + altmap->reserve + altmap->alloc | |
104 | + altmap->align; | |
105 | } | |
106 | ||
107 | static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap) | |
108 | { | |
109 | unsigned long allocated = altmap->alloc + altmap->align; | |
110 | ||
111 | if (altmap->free > allocated) | |
112 | return altmap->free - allocated; | |
113 | return 0; | |
114 | } | |
115 | ||
56993b4e AK |
116 | static void * __meminit altmap_alloc_block_buf(unsigned long size, |
117 | struct vmem_altmap *altmap) | |
4b94ffdc | 118 | { |
eb804533 | 119 | unsigned long pfn, nr_pfns, nr_align; |
4b94ffdc DW |
120 | |
121 | if (size & ~PAGE_MASK) { | |
122 | pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", | |
123 | __func__, size); | |
124 | return NULL; | |
125 | } | |
126 | ||
eb804533 | 127 | pfn = vmem_altmap_next_pfn(altmap); |
4b94ffdc | 128 | nr_pfns = size >> PAGE_SHIFT; |
eb804533 CH |
129 | nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); |
130 | nr_align = ALIGN(pfn, nr_align) - pfn; | |
131 | if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) | |
132 | return NULL; | |
133 | ||
134 | altmap->alloc += nr_pfns; | |
135 | altmap->align += nr_align; | |
136 | pfn += nr_align; | |
137 | ||
4b94ffdc DW |
138 | pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", |
139 | __func__, pfn, altmap->alloc, altmap->align, nr_pfns); | |
eb804533 | 140 | return __va(__pfn_to_phys(pfn)); |
4b94ffdc DW |
141 | } |
142 | ||
8f6aac41 CL |
143 | void __meminit vmemmap_verify(pte_t *pte, int node, |
144 | unsigned long start, unsigned long end) | |
145 | { | |
c33c7948 | 146 | unsigned long pfn = pte_pfn(ptep_get(pte)); |
8f6aac41 CL |
147 | int actual_node = early_pfn_to_nid(pfn); |
148 | ||
b41ad14c | 149 | if (node_distance(actual_node, node) > LOCAL_DISTANCE) |
abd62377 | 150 | pr_warn_once("[%lx-%lx] potential offnode page_structs\n", |
1170532b | 151 | start, end - 1); |
8f6aac41 CL |
152 | } |
153 | ||
1d9cfee7 | 154 | pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, |
4917f55b | 155 | struct vmem_altmap *altmap, |
9eb6207b | 156 | unsigned long ptpfn, unsigned long flags) |
8f6aac41 | 157 | { |
29c71111 | 158 | pte_t *pte = pte_offset_kernel(pmd, addr); |
c33c7948 | 159 | if (pte_none(ptep_get(pte))) { |
29c71111 | 160 | pte_t entry; |
1d9cfee7 AK |
161 | void *p; |
162 | ||
9eb6207b | 163 | if (ptpfn == (unsigned long)-1) { |
4917f55b JM |
164 | p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); |
165 | if (!p) | |
166 | return NULL; | |
9eb6207b | 167 | ptpfn = PHYS_PFN(__pa(p)); |
4917f55b JM |
168 | } else { |
169 | /* | |
170 | * When a PTE/PMD entry is freed from the init_mm | |
f673bd7c | 171 | * there's a free_pages() call to this page allocated |
4917f55b JM |
172 | * above. Thus this get_page() is paired with the |
173 | * put_page_testzero() on the freeing path. | |
174 | * This can only called by certain ZONE_DEVICE path, | |
175 | * and through vmemmap_populate_compound_pages() when | |
176 | * slab is available. | |
177 | */ | |
9eb6207b FL |
178 | if (flags & VMEMMAP_POPULATE_PAGEREF) |
179 | get_page(pfn_to_page(ptpfn)); | |
4917f55b | 180 | } |
9eb6207b | 181 | entry = pfn_pte(ptpfn, PAGE_KERNEL); |
29c71111 AW |
182 | set_pte_at(&init_mm, addr, pte, entry); |
183 | } | |
184 | return pte; | |
8f6aac41 CL |
185 | } |
186 | ||
f7f99100 PT |
187 | static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) |
188 | { | |
189 | void *p = vmemmap_alloc_block(size, node); | |
190 | ||
191 | if (!p) | |
192 | return NULL; | |
193 | memset(p, 0, size); | |
194 | ||
195 | return p; | |
196 | } | |
197 | ||
29c71111 | 198 | pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) |
8f6aac41 | 199 | { |
29c71111 AW |
200 | pmd_t *pmd = pmd_offset(pud, addr); |
201 | if (pmd_none(*pmd)) { | |
f7f99100 | 202 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
29c71111 | 203 | if (!p) |
9dce07f1 | 204 | return NULL; |
d2f86710 | 205 | kernel_pte_init(p); |
29c71111 | 206 | pmd_populate_kernel(&init_mm, pmd, p); |
8f6aac41 | 207 | } |
29c71111 | 208 | return pmd; |
8f6aac41 | 209 | } |
8f6aac41 | 210 | |
c2febafc | 211 | pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) |
8f6aac41 | 212 | { |
c2febafc | 213 | pud_t *pud = pud_offset(p4d, addr); |
29c71111 | 214 | if (pud_none(*pud)) { |
f7f99100 | 215 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
29c71111 | 216 | if (!p) |
9dce07f1 | 217 | return NULL; |
7b09f5af | 218 | pmd_init(p); |
29c71111 AW |
219 | pud_populate(&init_mm, pud, p); |
220 | } | |
221 | return pud; | |
222 | } | |
8f6aac41 | 223 | |
c2febafc KS |
224 | p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) |
225 | { | |
226 | p4d_t *p4d = p4d_offset(pgd, addr); | |
227 | if (p4d_none(*p4d)) { | |
f7f99100 | 228 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
c2febafc KS |
229 | if (!p) |
230 | return NULL; | |
7b09f5af | 231 | pud_init(p); |
c2febafc KS |
232 | p4d_populate(&init_mm, p4d, p); |
233 | } | |
234 | return p4d; | |
235 | } | |
236 | ||
29c71111 AW |
237 | pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) |
238 | { | |
239 | pgd_t *pgd = pgd_offset_k(addr); | |
240 | if (pgd_none(*pgd)) { | |
f7f99100 | 241 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
29c71111 | 242 | if (!p) |
9dce07f1 | 243 | return NULL; |
29c71111 | 244 | pgd_populate(&init_mm, pgd, p); |
8f6aac41 | 245 | } |
29c71111 | 246 | return pgd; |
8f6aac41 CL |
247 | } |
248 | ||
2beea70a | 249 | static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, |
4917f55b | 250 | struct vmem_altmap *altmap, |
9eb6207b FL |
251 | unsigned long ptpfn, |
252 | unsigned long flags) | |
8f6aac41 | 253 | { |
29c71111 | 254 | pgd_t *pgd; |
c2febafc | 255 | p4d_t *p4d; |
29c71111 AW |
256 | pud_t *pud; |
257 | pmd_t *pmd; | |
258 | pte_t *pte; | |
8f6aac41 | 259 | |
2beea70a JM |
260 | pgd = vmemmap_pgd_populate(addr, node); |
261 | if (!pgd) | |
262 | return NULL; | |
263 | p4d = vmemmap_p4d_populate(pgd, addr, node); | |
264 | if (!p4d) | |
265 | return NULL; | |
266 | pud = vmemmap_pud_populate(p4d, addr, node); | |
267 | if (!pud) | |
268 | return NULL; | |
269 | pmd = vmemmap_pmd_populate(pud, addr, node); | |
270 | if (!pmd) | |
271 | return NULL; | |
9eb6207b | 272 | pte = vmemmap_pte_populate(pmd, addr, node, altmap, ptpfn, flags); |
2beea70a JM |
273 | if (!pte) |
274 | return NULL; | |
275 | vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); | |
276 | ||
277 | return pte; | |
278 | } | |
279 | ||
280 | static int __meminit vmemmap_populate_range(unsigned long start, | |
281 | unsigned long end, int node, | |
4917f55b | 282 | struct vmem_altmap *altmap, |
9eb6207b FL |
283 | unsigned long ptpfn, |
284 | unsigned long flags) | |
2beea70a JM |
285 | { |
286 | unsigned long addr = start; | |
287 | pte_t *pte; | |
288 | ||
29c71111 | 289 | for (; addr < end; addr += PAGE_SIZE) { |
9eb6207b FL |
290 | pte = vmemmap_populate_address(addr, node, altmap, |
291 | ptpfn, flags); | |
29c71111 AW |
292 | if (!pte) |
293 | return -ENOMEM; | |
8f6aac41 | 294 | } |
29c71111 AW |
295 | |
296 | return 0; | |
8f6aac41 | 297 | } |
8f6aac41 | 298 | |
2beea70a JM |
299 | int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, |
300 | int node, struct vmem_altmap *altmap) | |
301 | { | |
9eb6207b FL |
302 | return vmemmap_populate_range(start, end, node, altmap, -1, 0); |
303 | } | |
304 | ||
305 | /* | |
306 | * Undo populate_hvo, and replace it with a normal base page mapping. | |
307 | * Used in memory init in case a HVO mapping needs to be undone. | |
308 | * | |
309 | * This can happen when it is discovered that a memblock allocated | |
310 | * hugetlb page spans multiple zones, which can only be verified | |
311 | * after zones have been initialized. | |
312 | * | |
313 | * We know that: | |
314 | * 1) The first @headsize / PAGE_SIZE vmemmap pages were individually | |
315 | * allocated through memblock, and mapped. | |
316 | * | |
317 | * 2) The rest of the vmemmap pages are mirrors of the last head page. | |
318 | */ | |
319 | int __meminit vmemmap_undo_hvo(unsigned long addr, unsigned long end, | |
320 | int node, unsigned long headsize) | |
321 | { | |
322 | unsigned long maddr, pfn; | |
323 | pte_t *pte; | |
324 | int headpages; | |
325 | ||
326 | /* | |
327 | * Should only be called early in boot, so nothing will | |
328 | * be accessing these page structures. | |
329 | */ | |
330 | WARN_ON(!early_boot_irqs_disabled); | |
331 | ||
332 | headpages = headsize >> PAGE_SHIFT; | |
333 | ||
334 | /* | |
335 | * Clear mirrored mappings for tail page structs. | |
336 | */ | |
337 | for (maddr = addr + headsize; maddr < end; maddr += PAGE_SIZE) { | |
338 | pte = virt_to_kpte(maddr); | |
339 | pte_clear(&init_mm, maddr, pte); | |
340 | } | |
341 | ||
342 | /* | |
343 | * Clear and free mappings for head page and first tail page | |
344 | * structs. | |
345 | */ | |
346 | for (maddr = addr; headpages-- > 0; maddr += PAGE_SIZE) { | |
347 | pte = virt_to_kpte(maddr); | |
348 | pfn = pte_pfn(ptep_get(pte)); | |
349 | pte_clear(&init_mm, maddr, pte); | |
350 | memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE); | |
351 | } | |
352 | ||
353 | flush_tlb_kernel_range(addr, end); | |
354 | ||
355 | return vmemmap_populate(addr, end, node, NULL); | |
356 | } | |
357 | ||
358 | /* | |
359 | * Write protect the mirrored tail page structs for HVO. This will be | |
360 | * called from the hugetlb code when gathering and initializing the | |
361 | * memblock allocated gigantic pages. The write protect can't be | |
362 | * done earlier, since it can't be guaranteed that the reserved | |
363 | * page structures will not be written to during initialization, | |
364 | * even if CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled. | |
365 | * | |
366 | * The PTEs are known to exist, and nothing else should be touching | |
367 | * these pages. The caller is responsible for any TLB flushing. | |
368 | */ | |
369 | void vmemmap_wrprotect_hvo(unsigned long addr, unsigned long end, | |
370 | int node, unsigned long headsize) | |
371 | { | |
372 | unsigned long maddr; | |
373 | pte_t *pte; | |
374 | ||
375 | for (maddr = addr + headsize; maddr < end; maddr += PAGE_SIZE) { | |
376 | pte = virt_to_kpte(maddr); | |
377 | ptep_set_wrprotect(&init_mm, maddr, pte); | |
378 | } | |
379 | } | |
380 | ||
381 | /* | |
382 | * Populate vmemmap pages HVO-style. The first page contains the head | |
383 | * page and needed tail pages, the other ones are mirrors of the first | |
384 | * page. | |
385 | */ | |
386 | int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end, | |
387 | int node, unsigned long headsize) | |
388 | { | |
389 | pte_t *pte; | |
390 | unsigned long maddr; | |
391 | ||
392 | for (maddr = addr; maddr < addr + headsize; maddr += PAGE_SIZE) { | |
393 | pte = vmemmap_populate_address(maddr, node, NULL, -1, 0); | |
394 | if (!pte) | |
395 | return -ENOMEM; | |
396 | } | |
397 | ||
398 | /* | |
399 | * Reuse the last page struct page mapped above for the rest. | |
400 | */ | |
401 | return vmemmap_populate_range(maddr, end, node, NULL, | |
402 | pte_pfn(ptep_get(pte)), 0); | |
4917f55b JM |
403 | } |
404 | ||
2045a3b8 FC |
405 | void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, |
406 | unsigned long addr, unsigned long next) | |
407 | { | |
408 | } | |
409 | ||
410 | int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, | |
411 | unsigned long addr, unsigned long next) | |
412 | { | |
413 | return 0; | |
414 | } | |
415 | ||
416 | int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, | |
417 | int node, struct vmem_altmap *altmap) | |
418 | { | |
419 | unsigned long addr; | |
420 | unsigned long next; | |
421 | pgd_t *pgd; | |
422 | p4d_t *p4d; | |
423 | pud_t *pud; | |
424 | pmd_t *pmd; | |
425 | ||
426 | for (addr = start; addr < end; addr = next) { | |
427 | next = pmd_addr_end(addr, end); | |
428 | ||
429 | pgd = vmemmap_pgd_populate(addr, node); | |
430 | if (!pgd) | |
431 | return -ENOMEM; | |
432 | ||
433 | p4d = vmemmap_p4d_populate(pgd, addr, node); | |
434 | if (!p4d) | |
435 | return -ENOMEM; | |
436 | ||
437 | pud = vmemmap_pud_populate(p4d, addr, node); | |
438 | if (!pud) | |
439 | return -ENOMEM; | |
440 | ||
441 | pmd = pmd_offset(pud, addr); | |
442 | if (pmd_none(READ_ONCE(*pmd))) { | |
443 | void *p; | |
444 | ||
445 | p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); | |
446 | if (p) { | |
447 | vmemmap_set_pmd(pmd, p, node, addr, next); | |
448 | continue; | |
449 | } else if (altmap) { | |
450 | /* | |
451 | * No fallback: In any case we care about, the | |
452 | * altmap should be reasonably sized and aligned | |
453 | * such that vmemmap_alloc_block_buf() will always | |
454 | * succeed. For consistency with the PTE case, | |
455 | * return an error here as failure could indicate | |
456 | * a configuration issue with the size of the altmap. | |
457 | */ | |
458 | return -ENOMEM; | |
459 | } | |
460 | } else if (vmemmap_check_pmd(pmd, node, addr, next)) | |
461 | continue; | |
462 | if (vmemmap_populate_basepages(addr, next, node, altmap)) | |
463 | return -ENOMEM; | |
464 | } | |
465 | return 0; | |
466 | } | |
467 | ||
40135fc7 | 468 | #ifndef vmemmap_populate_compound_pages |
4917f55b JM |
469 | /* |
470 | * For compound pages bigger than section size (e.g. x86 1G compound | |
471 | * pages with 2M subsection size) fill the rest of sections as tail | |
472 | * pages. | |
473 | * | |
474 | * Note that memremap_pages() resets @nr_range value and will increment | |
475 | * it after each range successful onlining. Thus the value or @nr_range | |
476 | * at section memmap populate corresponds to the in-progress range | |
477 | * being onlined here. | |
478 | */ | |
479 | static bool __meminit reuse_compound_section(unsigned long start_pfn, | |
480 | struct dev_pagemap *pgmap) | |
481 | { | |
482 | unsigned long nr_pages = pgmap_vmemmap_nr(pgmap); | |
483 | unsigned long offset = start_pfn - | |
484 | PHYS_PFN(pgmap->ranges[pgmap->nr_range].start); | |
485 | ||
486 | return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION; | |
487 | } | |
488 | ||
489 | static pte_t * __meminit compound_section_tail_page(unsigned long addr) | |
490 | { | |
491 | pte_t *pte; | |
492 | ||
493 | addr -= PAGE_SIZE; | |
494 | ||
495 | /* | |
496 | * Assuming sections are populated sequentially, the previous section's | |
497 | * page data can be reused. | |
498 | */ | |
499 | pte = pte_offset_kernel(pmd_off_k(addr), addr); | |
500 | if (!pte) | |
501 | return NULL; | |
502 | ||
503 | return pte; | |
504 | } | |
505 | ||
506 | static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, | |
507 | unsigned long start, | |
508 | unsigned long end, int node, | |
509 | struct dev_pagemap *pgmap) | |
510 | { | |
511 | unsigned long size, addr; | |
512 | pte_t *pte; | |
513 | int rc; | |
514 | ||
515 | if (reuse_compound_section(start_pfn, pgmap)) { | |
516 | pte = compound_section_tail_page(start); | |
517 | if (!pte) | |
518 | return -ENOMEM; | |
519 | ||
520 | /* | |
521 | * Reuse the page that was populated in the prior iteration | |
522 | * with just tail struct pages. | |
523 | */ | |
524 | return vmemmap_populate_range(start, end, node, NULL, | |
9eb6207b FL |
525 | pte_pfn(ptep_get(pte)), |
526 | VMEMMAP_POPULATE_PAGEREF); | |
4917f55b JM |
527 | } |
528 | ||
529 | size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); | |
530 | for (addr = start; addr < end; addr += size) { | |
55896f93 | 531 | unsigned long next, last = addr + size; |
4917f55b JM |
532 | |
533 | /* Populate the head page vmemmap page */ | |
9eb6207b | 534 | pte = vmemmap_populate_address(addr, node, NULL, -1, 0); |
4917f55b JM |
535 | if (!pte) |
536 | return -ENOMEM; | |
537 | ||
538 | /* Populate the tail pages vmemmap page */ | |
539 | next = addr + PAGE_SIZE; | |
9eb6207b | 540 | pte = vmemmap_populate_address(next, node, NULL, -1, 0); |
4917f55b JM |
541 | if (!pte) |
542 | return -ENOMEM; | |
543 | ||
544 | /* | |
545 | * Reuse the previous page for the rest of tail pages | |
ee65728e | 546 | * See layout diagram in Documentation/mm/vmemmap_dedup.rst |
4917f55b JM |
547 | */ |
548 | next += PAGE_SIZE; | |
549 | rc = vmemmap_populate_range(next, last, node, NULL, | |
9eb6207b FL |
550 | pte_pfn(ptep_get(pte)), |
551 | VMEMMAP_POPULATE_PAGEREF); | |
4917f55b JM |
552 | if (rc) |
553 | return -ENOMEM; | |
554 | } | |
555 | ||
556 | return 0; | |
2beea70a JM |
557 | } |
558 | ||
40135fc7 AK |
559 | #endif |
560 | ||
e9c0a3f0 | 561 | struct page * __meminit __populate_section_memmap(unsigned long pfn, |
e3246d8f JM |
562 | unsigned long nr_pages, int nid, struct vmem_altmap *altmap, |
563 | struct dev_pagemap *pgmap) | |
8f6aac41 | 564 | { |
6cda7204 WY |
565 | unsigned long start = (unsigned long) pfn_to_page(pfn); |
566 | unsigned long end = start + nr_pages * sizeof(struct page); | |
4917f55b | 567 | int r; |
6cda7204 WY |
568 | |
569 | if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) || | |
570 | !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION))) | |
571 | return NULL; | |
0aad818b | 572 | |
87a7ae75 | 573 | if (vmemmap_can_optimize(altmap, pgmap)) |
4917f55b JM |
574 | r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); |
575 | else | |
576 | r = vmemmap_populate(start, end, nid, altmap); | |
577 | ||
578 | if (r < 0) | |
8f6aac41 CL |
579 | return NULL; |
580 | ||
9d857311 PT |
581 | if (system_state == SYSTEM_BOOTING) |
582 | memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); | |
583 | else | |
584 | memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); | |
15995a35 | 585 | |
e9c0a3f0 | 586 | return pfn_to_page(pfn); |
8f6aac41 | 587 | } |
d65917c4 FL |
588 | |
589 | #ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT | |
590 | /* | |
591 | * This is called just before initializing sections for a NUMA node. | |
592 | * Any special initialization that needs to be done before the | |
593 | * generic initialization can be done from here. Sections that | |
594 | * are initialized in hooks called from here will be skipped by | |
595 | * the generic initialization. | |
596 | */ | |
597 | void __init sparse_vmemmap_init_nid_early(int nid) | |
598 | { | |
b1222550 | 599 | hugetlb_vmemmap_init_early(nid); |
d65917c4 FL |
600 | } |
601 | ||
602 | /* | |
603 | * This is called just before the initialization of page structures | |
604 | * through memmap_init. Zones are now initialized, so any work that | |
605 | * needs to be done that needs zone information can be done from | |
606 | * here. | |
607 | */ | |
608 | void __init sparse_vmemmap_init_nid_late(int nid) | |
609 | { | |
b1222550 | 610 | hugetlb_vmemmap_init_late(nid); |
d65917c4 FL |
611 | } |
612 | #endif |