khugepaged: introduce 'max_ptes_shared' tunable
[linux-2.6-block.git] / arch / x86 / mm / hugetlbpage.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
1da177e4
LT
2/*
3 * IA-32 Huge TLB Page Support for Kernel.
4 *
5 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
6 */
7
1da177e4
LT
8#include <linux/init.h>
9#include <linux/fs.h>
10#include <linux/mm.h>
01042607 11#include <linux/sched/mm.h>
1da177e4
LT
12#include <linux/hugetlb.h>
13#include <linux/pagemap.h>
1da177e4
LT
14#include <linux/err.h>
15#include <linux/sysctl.h>
e13b73dd 16#include <linux/compat.h>
1da177e4
LT
17#include <asm/mman.h>
18#include <asm/tlb.h>
19#include <asm/tlbflush.h>
a5a19c63 20#include <asm/pgalloc.h>
e13b73dd 21#include <asm/elf.h>
1da177e4 22
1da177e4
LT
23#if 0 /* This is just for testing */
24struct page *
25follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
26{
27 unsigned long start = address;
28 int length = 1;
29 int nr;
30 struct page *page;
31 struct vm_area_struct *vma;
32
33 vma = find_vma(mm, addr);
34 if (!vma || !is_vm_hugetlb_page(vma))
35 return ERR_PTR(-EINVAL);
36
7868a208 37 pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
1da177e4
LT
38
39 /* hugetlb should be locked, and hence, prefaulted */
40 WARN_ON(!pte || pte_none(*pte));
41
42 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
43
25e59881 44 WARN_ON(!PageHead(page));
1da177e4
LT
45
46 return page;
47}
48
49int pmd_huge(pmd_t pmd)
50{
51 return 0;
52}
53
ceb86879
AK
54int pud_huge(pud_t pud)
55{
56 return 0;
57}
58
1da177e4
LT
59#else
60
cbef8478
NH
61/*
62 * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
63 * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
64 * Otherwise, returns 0.
65 */
1da177e4
LT
66int pmd_huge(pmd_t pmd)
67{
cbef8478
NH
68 return !pmd_none(pmd) &&
69 (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
1da177e4
LT
70}
71
ceb86879
AK
72int pud_huge(pud_t pud)
73{
39c11e6c 74 return !!(pud_val(pud) & _PAGE_PSE);
ceb86879 75}
1da177e4
LT
76#endif
77
fd8526ad 78#ifdef CONFIG_HUGETLB_PAGE
1da177e4
LT
79static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
80 unsigned long addr, unsigned long len,
81 unsigned long pgoff, unsigned long flags)
82{
39c11e6c 83 struct hstate *h = hstate_file(file);
cdc17344
ML
84 struct vm_unmapped_area_info info;
85
86 info.flags = 0;
87 info.length = len;
e13b73dd 88 info.low_limit = get_mmap_base(1);
b569bab7
KS
89
90 /*
91 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
92 * in the full address space.
93 */
a846446b 94 info.high_limit = in_32bit_syscall() ?
b569bab7
KS
95 task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
96
cdc17344
ML
97 info.align_mask = PAGE_MASK & ~huge_page_mask(h);
98 info.align_offset = 0;
99 return vm_unmapped_area(&info);
1da177e4
LT
100}
101
102static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
b569bab7 103 unsigned long addr, unsigned long len,
1da177e4
LT
104 unsigned long pgoff, unsigned long flags)
105{
39c11e6c 106 struct hstate *h = hstate_file(file);
cdc17344 107 struct vm_unmapped_area_info info;
1da177e4 108
cdc17344
ML
109 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
110 info.length = len;
111 info.low_limit = PAGE_SIZE;
e13b73dd 112 info.high_limit = get_mmap_base(0);
b569bab7
KS
113
114 /*
115 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
116 * in the full address space.
117 */
a846446b 118 if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
b569bab7
KS
119 info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
120
cdc17344
ML
121 info.align_mask = PAGE_MASK & ~huge_page_mask(h);
122 info.align_offset = 0;
123 addr = vm_unmapped_area(&info);
1da177e4 124
1da177e4
LT
125 /*
126 * A failed mmap() very likely causes application failure,
127 * so fall back to the bottom-up function here. This scenario
128 * can happen with large stack limits and large mmap()
129 * allocations.
130 */
cdc17344
ML
131 if (addr & ~PAGE_MASK) {
132 VM_BUG_ON(addr != -ENOMEM);
133 info.flags = 0;
134 info.low_limit = TASK_UNMAPPED_BASE;
b569bab7 135 info.high_limit = TASK_SIZE_LOW;
cdc17344
ML
136 addr = vm_unmapped_area(&info);
137 }
1da177e4
LT
138
139 return addr;
140}
141
142unsigned long
143hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
144 unsigned long len, unsigned long pgoff, unsigned long flags)
145{
39c11e6c 146 struct hstate *h = hstate_file(file);
1da177e4
LT
147 struct mm_struct *mm = current->mm;
148 struct vm_area_struct *vma;
149
39c11e6c 150 if (len & ~huge_page_mask(h))
1da177e4 151 return -EINVAL;
44b04912 152
1da177e4
LT
153 if (len > TASK_SIZE)
154 return -ENOMEM;
155
1e0f25db 156 /* No address checking. See comment at mmap_address_hint_valid() */
5a8130f2 157 if (flags & MAP_FIXED) {
a5516438 158 if (prepare_hugepage_range(file, addr, len))
5a8130f2
BH
159 return -EINVAL;
160 return addr;
161 }
162
1da177e4 163 if (addr) {
1e0f25db
KS
164 addr &= huge_page_mask(h);
165 if (!mmap_address_hint_valid(addr, len))
166 goto get_unmapped_area;
167
1da177e4 168 vma = find_vma(mm, addr);
1e0f25db 169 if (!vma || addr + len <= vm_start_gap(vma))
1da177e4
LT
170 return addr;
171 }
1e0f25db
KS
172
173get_unmapped_area:
1da177e4
LT
174 if (mm->get_unmapped_area == arch_get_unmapped_area)
175 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
176 pgoff, flags);
177 else
178 return hugetlb_get_unmapped_area_topdown(file, addr, len,
179 pgoff, flags);
180}
fd8526ad 181#endif /* CONFIG_HUGETLB_PAGE */
1da177e4 182
b4718e62
AK
183#ifdef CONFIG_X86_64
184static __init int setup_hugepagesz(char *opt)
185{
186 unsigned long ps = memparse(opt, &opt);
187 if (ps == PMD_SIZE) {
188 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
b8291adc 189 } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) {
b4718e62
AK
190 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
191 } else {
2b18e532 192 hugetlb_bad_size();
b4718e62
AK
193 printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
194 ps >> 20);
195 return 0;
196 }
197 return 1;
198}
199__setup("hugepagesz=", setup_hugepagesz);
ece84b39 200
8df995f6 201#ifdef CONFIG_CONTIG_ALLOC
ece84b39
KS
202static __init int gigantic_pages_init(void)
203{
080fe206 204 /* With compaction or CMA we can allocate gigantic pages at runtime */
b8291adc 205 if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT))
ece84b39
KS
206 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
207 return 0;
208}
209arch_initcall(gigantic_pages_init);
210#endif
b4718e62 211#endif