2 * address space "slices" (meta-segments) support
4 * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
6 * Based on hugetlb implementation
8 * Copyright (C) 2003 David Gibson, IBM Corporation.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include <linux/kernel.h>
29 #include <linux/pagemap.h>
30 #include <linux/err.h>
31 #include <linux/spinlock.h>
32 #include <linux/export.h>
33 #include <linux/hugetlb.h>
34 #include <linux/sched/mm.h>
35 #include <linux/security.h>
38 #include <asm/copro.h>
39 #include <asm/hugetlb.h>
40 #include <asm/mmu_context.h>
42 static DEFINE_SPINLOCK(slice_convert_lock);
47 static void slice_print_mask(const char *label, const struct slice_mask *mask)
51 pr_devel("%s low_slice: %*pbl\n", label,
52 (int)SLICE_NUM_LOW, &mask->low_slices);
53 pr_devel("%s high_slice: %*pbl\n", label,
54 (int)SLICE_NUM_HIGH, mask->high_slices);
57 #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
61 static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
62 #define slice_dbg(fmt...)
66 static inline bool slice_addr_is_low(unsigned long addr)
70 return tmp < SLICE_LOW_TOP;
73 static void slice_range_to_mask(unsigned long start, unsigned long len,
74 struct slice_mask *ret)
76 unsigned long end = start + len - 1;
80 bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
82 if (slice_addr_is_low(start)) {
83 unsigned long mend = min(end,
84 (unsigned long)(SLICE_LOW_TOP - 1));
86 ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
87 - (1u << GET_LOW_SLICE_INDEX(start));
90 if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
91 unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
92 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
93 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
95 bitmap_set(ret->high_slices, start_index, count);
99 static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
102 struct vm_area_struct *vma;
104 if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
106 vma = find_vma(mm, addr);
107 return (!vma || (addr + len) <= vm_start_gap(vma));
110 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
112 return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
113 1ul << SLICE_LOW_SHIFT);
116 static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
118 unsigned long start = slice << SLICE_HIGH_SHIFT;
119 unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
121 /* Hack, so that each addresses is controlled by exactly one
122 * of the high or low area bitmaps, the first high area starts
125 start = (unsigned long)SLICE_LOW_TOP;
127 return !slice_area_is_free(mm, start, end - start);
130 static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
131 unsigned long high_limit)
137 bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
139 for (i = 0; i < SLICE_NUM_LOW; i++)
140 if (!slice_low_has_vma(mm, i))
141 ret->low_slices |= 1u << i;
143 if (slice_addr_is_low(high_limit - 1))
146 for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
147 if (!slice_high_has_vma(mm, i))
148 __set_bit(i, ret->high_slices);
151 static bool slice_check_range_fits(struct mm_struct *mm,
152 const struct slice_mask *available,
153 unsigned long start, unsigned long len)
155 unsigned long end = start + len - 1;
158 if (slice_addr_is_low(start)) {
159 unsigned long mend = min(end,
160 (unsigned long)(SLICE_LOW_TOP - 1));
162 low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
163 - (1u << GET_LOW_SLICE_INDEX(start));
165 if ((low_slices & available->low_slices) != low_slices)
168 if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
169 unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
170 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
171 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
174 for (i = start_index; i < start_index + count; i++) {
175 if (!test_bit(i, available->high_slices))
183 static void slice_flush_segments(void *parm)
186 struct mm_struct *mm = parm;
189 if (mm != current->active_mm)
192 copy_mm_to_paca(current->active_mm);
194 local_irq_save(flags);
195 slb_flush_and_restore_bolted();
196 local_irq_restore(flags);
200 static void slice_convert(struct mm_struct *mm,
201 const struct slice_mask *mask, int psize)
203 int index, mask_index;
204 /* Write the new slice psize bits */
205 unsigned char *hpsizes, *lpsizes;
206 struct slice_mask *psize_mask, *old_mask;
207 unsigned long i, flags;
210 slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
211 slice_print_mask(" mask", mask);
213 psize_mask = slice_mask_for_size(&mm->context, psize);
215 /* We need to use a spinlock here to protect against
216 * concurrent 64k -> 4k demotion ...
218 spin_lock_irqsave(&slice_convert_lock, flags);
220 lpsizes = mm_ctx_low_slices(&mm->context);
221 for (i = 0; i < SLICE_NUM_LOW; i++) {
222 if (!(mask->low_slices & (1u << i)))
225 mask_index = i & 0x1;
228 /* Update the slice_mask */
229 old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
230 old_mask = slice_mask_for_size(&mm->context, old_psize);
231 old_mask->low_slices &= ~(1u << i);
232 psize_mask->low_slices |= 1u << i;
234 /* Update the sizes array */
235 lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
236 (((unsigned long)psize) << (mask_index * 4));
239 hpsizes = mm_ctx_high_slices(&mm->context);
240 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
241 if (!test_bit(i, mask->high_slices))
244 mask_index = i & 0x1;
247 /* Update the slice_mask */
248 old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
249 old_mask = slice_mask_for_size(&mm->context, old_psize);
250 __clear_bit(i, old_mask->high_slices);
251 __set_bit(i, psize_mask->high_slices);
253 /* Update the sizes array */
254 hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
255 (((unsigned long)psize) << (mask_index * 4));
258 slice_dbg(" lsps=%lx, hsps=%lx\n",
259 (unsigned long)mm_ctx_low_slices(&mm->context),
260 (unsigned long)mm_ctx_high_slices(&mm->context));
262 spin_unlock_irqrestore(&slice_convert_lock, flags);
264 copro_flush_all_slbs(mm);
268 * Compute which slice addr is part of;
269 * set *boundary_addr to the start or end boundary of that slice
270 * (depending on 'end' parameter);
271 * return boolean indicating if the slice is marked as available in the
272 * 'available' slice_mark.
274 static bool slice_scan_available(unsigned long addr,
275 const struct slice_mask *available,
276 int end, unsigned long *boundary_addr)
279 if (slice_addr_is_low(addr)) {
280 slice = GET_LOW_SLICE_INDEX(addr);
281 *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
282 return !!(available->low_slices & (1u << slice));
284 slice = GET_HIGH_SLICE_INDEX(addr);
285 *boundary_addr = (slice + end) ?
286 ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
287 return !!test_bit(slice, available->high_slices);
291 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
293 const struct slice_mask *available,
294 int psize, unsigned long high_limit)
296 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
297 unsigned long addr, found, next_end;
298 struct vm_unmapped_area_info info;
302 info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
303 info.align_offset = 0;
305 addr = TASK_UNMAPPED_BASE;
307 * Check till the allow max value for this mmap request
309 while (addr < high_limit) {
310 info.low_limit = addr;
311 if (!slice_scan_available(addr, available, 1, &addr))
316 * At this point [info.low_limit; addr) covers
317 * available slices only and ends at a slice boundary.
318 * Check if we need to reduce the range, or if we can
319 * extend it to cover the next available slice.
321 if (addr >= high_limit)
323 else if (slice_scan_available(addr, available, 1, &next_end)) {
327 info.high_limit = addr;
329 found = vm_unmapped_area(&info);
330 if (!(found & ~PAGE_MASK))
337 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
339 const struct slice_mask *available,
340 int psize, unsigned long high_limit)
342 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
343 unsigned long addr, found, prev;
344 struct vm_unmapped_area_info info;
345 unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
347 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
349 info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
350 info.align_offset = 0;
352 addr = mm->mmap_base;
354 * If we are trying to allocate above DEFAULT_MAP_WINDOW
355 * Add the different to the mmap_base.
356 * Only for that request for which high_limit is above
357 * DEFAULT_MAP_WINDOW we should apply this.
359 if (high_limit > DEFAULT_MAP_WINDOW)
360 addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
362 while (addr > min_addr) {
363 info.high_limit = addr;
364 if (!slice_scan_available(addr - 1, available, 0, &addr))
369 * At this point [addr; info.high_limit) covers
370 * available slices only and starts at a slice boundary.
371 * Check if we need to reduce the range, or if we can
372 * extend it to cover the previous available slice.
376 else if (slice_scan_available(addr - 1, available, 0, &prev)) {
380 info.low_limit = addr;
382 found = vm_unmapped_area(&info);
383 if (!(found & ~PAGE_MASK))
388 * A failed mmap() very likely causes application failure,
389 * so fall back to the bottom-up function here. This scenario
390 * can happen with large stack limits and large mmap()
393 return slice_find_area_bottomup(mm, len, available, psize, high_limit);
397 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
398 const struct slice_mask *mask, int psize,
399 int topdown, unsigned long high_limit)
402 return slice_find_area_topdown(mm, len, mask, psize, high_limit);
404 return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
407 static inline void slice_copy_mask(struct slice_mask *dst,
408 const struct slice_mask *src)
410 dst->low_slices = src->low_slices;
413 bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
416 static inline void slice_or_mask(struct slice_mask *dst,
417 const struct slice_mask *src1,
418 const struct slice_mask *src2)
420 dst->low_slices = src1->low_slices | src2->low_slices;
423 bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
426 static inline void slice_andnot_mask(struct slice_mask *dst,
427 const struct slice_mask *src1,
428 const struct slice_mask *src2)
430 dst->low_slices = src1->low_slices & ~src2->low_slices;
433 bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
436 #ifdef CONFIG_PPC_64K_PAGES
437 #define MMU_PAGE_BASE MMU_PAGE_64K
439 #define MMU_PAGE_BASE MMU_PAGE_4K
442 unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
443 unsigned long flags, unsigned int psize,
446 struct slice_mask good_mask;
447 struct slice_mask potential_mask;
448 const struct slice_mask *maskp;
449 const struct slice_mask *compat_maskp = NULL;
450 int fixed = (flags & MAP_FIXED);
451 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
452 unsigned long page_size = 1UL << pshift;
453 struct mm_struct *mm = current->mm;
454 unsigned long newaddr;
455 unsigned long high_limit;
457 high_limit = DEFAULT_MAP_WINDOW;
458 if (addr >= high_limit || (fixed && (addr + len > high_limit)))
459 high_limit = TASK_SIZE;
461 if (len > high_limit)
463 if (len & (page_size - 1))
466 if (addr & (page_size - 1))
468 if (addr > high_limit - len)
472 if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
474 * Increasing the slb_addr_limit does not require
475 * slice mask cache to be recalculated because it should
476 * be already initialised beyond the old address limit.
478 mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
480 on_each_cpu(slice_flush_segments, mm, 1);
484 BUG_ON(mm->task_size == 0);
485 BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
486 VM_BUG_ON(radix_enabled());
488 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
489 slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
490 addr, len, flags, topdown);
492 /* If hint, make sure it matches our alignment restrictions */
493 if (!fixed && addr) {
494 addr = _ALIGN_UP(addr, page_size);
495 slice_dbg(" aligned addr=%lx\n", addr);
496 /* Ignore hint if it's too large or overlaps a VMA */
497 if (addr > high_limit - len || addr < mmap_min_addr ||
498 !slice_area_is_free(mm, addr, len))
502 /* First make up a "good" mask of slices that have the right size
505 maskp = slice_mask_for_size(&mm->context, psize);
508 * Here "good" means slices that are already the right page size,
509 * "compat" means slices that have a compatible page size (i.e.
510 * 4k in a 64k pagesize kernel), and "free" means slices without
514 * check if fits in good | compat => OK
515 * check if fits in good | compat | free => convert free
518 * check if hint fits in good => OK
519 * check if hint fits in good | free => convert free
521 * search in good, found => OK
522 * search in good | free, found => convert free
523 * search in good | compat | free, found => convert free.
527 * If we support combo pages, we can allow 64k pages in 4k slices
528 * The mask copies could be avoided in most cases here if we had
529 * a pointer to good mask for the next code to use.
531 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
532 compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
534 slice_or_mask(&good_mask, maskp, compat_maskp);
536 slice_copy_mask(&good_mask, maskp);
538 slice_copy_mask(&good_mask, maskp);
541 slice_print_mask(" good_mask", &good_mask);
543 slice_print_mask(" compat_mask", compat_maskp);
545 /* First check hint if it's valid or if we have MAP_FIXED */
546 if (addr != 0 || fixed) {
547 /* Check if we fit in the good mask. If we do, we just return,
550 if (slice_check_range_fits(mm, &good_mask, addr, len)) {
551 slice_dbg(" fits good !\n");
556 /* Now let's see if we can find something in the existing
557 * slices for that size
559 newaddr = slice_find_area(mm, len, &good_mask,
560 psize, topdown, high_limit);
561 if (newaddr != -ENOMEM) {
562 /* Found within the good mask, we don't have to setup,
563 * we thus return directly
565 slice_dbg(" found area at 0x%lx\n", newaddr);
570 * We don't fit in the good mask, check what other slices are
571 * empty and thus can be converted
573 slice_mask_for_free(mm, &potential_mask, high_limit);
574 slice_or_mask(&potential_mask, &potential_mask, &good_mask);
575 slice_print_mask(" potential", &potential_mask);
577 if (addr != 0 || fixed) {
578 if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
579 slice_dbg(" fits potential !\n");
585 /* If we have MAP_FIXED and failed the above steps, then error out */
589 slice_dbg(" search...\n");
591 /* If we had a hint that didn't work out, see if we can fit
592 * anywhere in the good area.
595 newaddr = slice_find_area(mm, len, &good_mask,
596 psize, topdown, high_limit);
597 if (newaddr != -ENOMEM) {
598 slice_dbg(" found area at 0x%lx\n", newaddr);
603 /* Now let's see if we can find something in the existing slices
604 * for that size plus free slices
606 newaddr = slice_find_area(mm, len, &potential_mask,
607 psize, topdown, high_limit);
609 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM &&
610 psize == MMU_PAGE_64K) {
611 /* retry the search with 4k-page slices included */
612 slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
613 newaddr = slice_find_area(mm, len, &potential_mask,
614 psize, topdown, high_limit);
617 if (newaddr == -ENOMEM)
620 slice_range_to_mask(newaddr, len, &potential_mask);
621 slice_dbg(" found potential area at 0x%lx\n", newaddr);
622 slice_print_mask(" mask", &potential_mask);
626 * Try to allocate the context before we do slice convert
627 * so that we handle the context allocation failure gracefully.
629 if (need_extra_context(mm, newaddr)) {
630 if (alloc_extended_context(mm, newaddr) < 0)
634 slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
635 if (compat_maskp && !fixed)
636 slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
637 if (potential_mask.low_slices ||
639 !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
640 slice_convert(mm, &potential_mask, psize);
641 if (psize > MMU_PAGE_BASE)
642 on_each_cpu(slice_flush_segments, mm, 1);
647 if (need_extra_context(mm, newaddr)) {
648 if (alloc_extended_context(mm, newaddr) < 0)
653 EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
655 unsigned long arch_get_unmapped_area(struct file *filp,
661 return slice_get_unmapped_area(addr, len, flags,
662 mm_ctx_user_psize(¤t->mm->context), 0);
665 unsigned long arch_get_unmapped_area_topdown(struct file *filp,
666 const unsigned long addr0,
667 const unsigned long len,
668 const unsigned long pgoff,
669 const unsigned long flags)
671 return slice_get_unmapped_area(addr0, len, flags,
672 mm_ctx_user_psize(¤t->mm->context), 1);
675 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
677 unsigned char *psizes;
678 int index, mask_index;
680 VM_BUG_ON(radix_enabled());
682 if (slice_addr_is_low(addr)) {
683 psizes = mm_ctx_low_slices(&mm->context);
684 index = GET_LOW_SLICE_INDEX(addr);
686 psizes = mm_ctx_high_slices(&mm->context);
687 index = GET_HIGH_SLICE_INDEX(addr);
689 mask_index = index & 0x1;
690 return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
692 EXPORT_SYMBOL_GPL(get_slice_psize);
694 void slice_init_new_context_exec(struct mm_struct *mm)
696 unsigned char *hpsizes, *lpsizes;
697 struct slice_mask *mask;
698 unsigned int psize = mmu_virtual_psize;
700 slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
703 * In the case of exec, use the default limit. In the
704 * case of fork it is just inherited from the mm being
707 mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT);
708 mm_ctx_set_user_psize(&mm->context, psize);
711 * Set all slice psizes to the default.
713 lpsizes = mm_ctx_low_slices(&mm->context);
714 memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
716 hpsizes = mm_ctx_high_slices(&mm->context);
717 memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
720 * Slice mask cache starts zeroed, fill the default size cache.
722 mask = slice_mask_for_size(&mm->context, psize);
723 mask->low_slices = ~0UL;
725 bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
728 #ifdef CONFIG_PPC_BOOK3S_64
729 void slice_setup_new_exec(void)
731 struct mm_struct *mm = current->mm;
733 slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
735 if (!is_32bit_task())
738 mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
742 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
743 unsigned long len, unsigned int psize)
745 struct slice_mask mask;
747 VM_BUG_ON(radix_enabled());
749 slice_range_to_mask(start, len, &mask);
750 slice_convert(mm, &mask, psize);
753 #ifdef CONFIG_HUGETLB_PAGE
755 * is_hugepage_only_range() is used by generic code to verify whether
756 * a normal mmap mapping (non hugetlbfs) is valid on a given area.
758 * until the generic code provides a more generic hook and/or starts
759 * calling arch get_unmapped_area for MAP_FIXED (which our implementation
760 * here knows how to deal with), we hijack it to keep standard mappings
763 * because of that generic code limitation, MAP_FIXED mapping cannot
764 * "convert" back a slice with no VMAs to the standard page size, only
765 * get_unmapped_area() can. It would be possible to fix it here but I
766 * prefer working on fixing the generic code instead.
768 * WARNING: This will not work if hugetlbfs isn't enabled since the
769 * generic code will redefine that function as 0 in that. This is ok
770 * for now as we only use slices with hugetlbfs enabled. This should
771 * be fixed as the generic code gets fixed.
773 int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
776 const struct slice_mask *maskp;
777 unsigned int psize = mm_ctx_user_psize(&mm->context);
779 VM_BUG_ON(radix_enabled());
781 maskp = slice_mask_for_size(&mm->context, psize);
783 /* We need to account for 4k slices too */
784 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
785 const struct slice_mask *compat_maskp;
786 struct slice_mask available;
788 compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
789 slice_or_mask(&available, maskp, compat_maskp);
790 return !slice_check_range_fits(mm, &available, addr, len);
793 return !slice_check_range_fits(mm, maskp, addr, len);