Commit | Line | Data |
---|---|---|
fa6c0231 ZY |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual | |
4 | * address range in a process via <debugfs>/split_huge_pages interface. | |
5 | */ | |
6 | ||
7 | #define _GNU_SOURCE | |
8 | #include <stdio.h> | |
9 | #include <stdlib.h> | |
fbe37501 | 10 | #include <stdarg.h> |
fa6c0231 ZY |
11 | #include <unistd.h> |
12 | #include <inttypes.h> | |
13 | #include <string.h> | |
14 | #include <fcntl.h> | |
15 | #include <sys/mman.h> | |
fbe37501 | 16 | #include <sys/mount.h> |
fa6c0231 ZY |
17 | #include <malloc.h> |
18 | #include <stdbool.h> | |
fc4d1823 | 19 | #include <time.h> |
642bc52a | 20 | #include "vm_util.h" |
73588704 | 21 | #include "../kselftest.h" |
fa6c0231 ZY |
22 | |
23 | uint64_t pagesize; | |
24 | unsigned int pageshift; | |
25 | uint64_t pmd_pagesize; | |
26 | ||
fa6c0231 | 27 | #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" |
fc4d1823 | 28 | #define SMAP_PATH "/proc/self/smaps" |
fa6c0231 ZY |
29 | #define INPUT_MAX 80 |
30 | ||
fc4d1823 ZY |
31 | #define PID_FMT "%d,0x%lx,0x%lx,%d" |
32 | #define PATH_FMT "%s,0x%lx,0x%lx,%d" | |
fbe37501 | 33 | |
fa6c0231 ZY |
34 | #define PFN_MASK ((1UL<<55)-1) |
35 | #define KPF_THP (1UL<<22) | |
36 | ||
37 | int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) | |
38 | { | |
39 | uint64_t paddr; | |
40 | uint64_t page_flags; | |
41 | ||
42 | if (pagemap_file) { | |
43 | pread(pagemap_file, &paddr, sizeof(paddr), | |
44 | ((long)vaddr >> pageshift) * sizeof(paddr)); | |
45 | ||
46 | if (kpageflags_file) { | |
47 | pread(kpageflags_file, &page_flags, sizeof(page_flags), | |
48 | (paddr & PFN_MASK) * sizeof(page_flags)); | |
49 | ||
50 | return !!(page_flags & KPF_THP); | |
51 | } | |
52 | } | |
53 | return 0; | |
54 | } | |
55 | ||
73588704 | 56 | static void write_file(const char *path, const char *buf, size_t buflen) |
fa6c0231 ZY |
57 | { |
58 | int fd; | |
59 | ssize_t numwritten; | |
60 | ||
61 | fd = open(path, O_WRONLY); | |
62 | if (fd == -1) | |
73588704 | 63 | ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno)); |
fa6c0231 ZY |
64 | |
65 | numwritten = write(fd, buf, buflen - 1); | |
66 | close(fd); | |
67 | if (numwritten < 1) | |
73588704 | 68 | ksft_exit_fail_msg("Write failed\n"); |
fa6c0231 ZY |
69 | } |
70 | ||
fbe37501 | 71 | static void write_debugfs(const char *fmt, ...) |
fa6c0231 ZY |
72 | { |
73 | char input[INPUT_MAX]; | |
74 | int ret; | |
fbe37501 ZY |
75 | va_list argp; |
76 | ||
77 | va_start(argp, fmt); | |
78 | ret = vsnprintf(input, INPUT_MAX, fmt, argp); | |
79 | va_end(argp); | |
fa6c0231 | 80 | |
73588704 MUA |
81 | if (ret >= INPUT_MAX) |
82 | ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__); | |
fa6c0231 | 83 | |
73588704 | 84 | write_file(SPLIT_DEBUGFS, input, ret + 1); |
fa6c0231 ZY |
85 | } |
86 | ||
fa6c0231 ZY |
87 | void split_pmd_thp(void) |
88 | { | |
89 | char *one_page; | |
90 | size_t len = 4 * pmd_pagesize; | |
fa6c0231 ZY |
91 | size_t i; |
92 | ||
93 | one_page = memalign(pmd_pagesize, len); | |
73588704 MUA |
94 | if (!one_page) |
95 | ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); | |
fa6c0231 ZY |
96 | |
97 | madvise(one_page, len, MADV_HUGEPAGE); | |
98 | ||
99 | for (i = 0; i < len; i++) | |
100 | one_page[i] = (char)i; | |
101 | ||
73588704 MUA |
102 | if (!check_huge_anon(one_page, 4, pmd_pagesize)) |
103 | ksft_exit_fail_msg("No THP is allocated\n"); | |
fa6c0231 ZY |
104 | |
105 | /* split all THPs */ | |
fbe37501 | 106 | write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, |
fc4d1823 | 107 | (uint64_t)one_page + len, 0); |
fa6c0231 ZY |
108 | |
109 | for (i = 0; i < len; i++) | |
73588704 MUA |
110 | if (one_page[i] != (char)i) |
111 | ksft_exit_fail_msg("%ld byte corrupted\n", i); | |
fa6c0231 ZY |
112 | |
113 | ||
73588704 MUA |
114 | if (!check_huge_anon(one_page, 0, pmd_pagesize)) |
115 | ksft_exit_fail_msg("Still AnonHugePages not split\n"); | |
fa6c0231 | 116 | |
73588704 | 117 | ksft_test_result_pass("Split huge pages successful\n"); |
fa6c0231 ZY |
118 | free(one_page); |
119 | } | |
120 | ||
121 | void split_pte_mapped_thp(void) | |
122 | { | |
123 | char *one_page, *pte_mapped, *pte_mapped2; | |
124 | size_t len = 4 * pmd_pagesize; | |
125 | uint64_t thp_size; | |
126 | size_t i; | |
127 | const char *pagemap_template = "/proc/%d/pagemap"; | |
128 | const char *kpageflags_proc = "/proc/kpageflags"; | |
129 | char pagemap_proc[255]; | |
130 | int pagemap_fd; | |
131 | int kpageflags_fd; | |
132 | ||
73588704 MUA |
133 | if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) |
134 | ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); | |
fa6c0231 | 135 | |
73588704 MUA |
136 | pagemap_fd = open(pagemap_proc, O_RDONLY); |
137 | if (pagemap_fd == -1) | |
138 | ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); | |
fa6c0231 ZY |
139 | |
140 | kpageflags_fd = open(kpageflags_proc, O_RDONLY); | |
73588704 MUA |
141 | if (kpageflags_fd == -1) |
142 | ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); | |
fa6c0231 ZY |
143 | |
144 | one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, | |
145 | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | |
73588704 MUA |
146 | if (one_page == MAP_FAILED) |
147 | ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); | |
fa6c0231 ZY |
148 | |
149 | madvise(one_page, len, MADV_HUGEPAGE); | |
150 | ||
151 | for (i = 0; i < len; i++) | |
152 | one_page[i] = (char)i; | |
153 | ||
73588704 MUA |
154 | if (!check_huge_anon(one_page, 4, pmd_pagesize)) |
155 | ksft_exit_fail_msg("No THP is allocated\n"); | |
fa6c0231 ZY |
156 | |
157 | /* remap the first pagesize of first THP */ | |
158 | pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); | |
159 | ||
160 | /* remap the Nth pagesize of Nth THP */ | |
161 | for (i = 1; i < 4; i++) { | |
162 | pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, | |
163 | pagesize, pagesize, | |
164 | MREMAP_MAYMOVE|MREMAP_FIXED, | |
165 | pte_mapped + pagesize * i); | |
73588704 MUA |
166 | if (pte_mapped2 == MAP_FAILED) |
167 | ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); | |
fa6c0231 ZY |
168 | } |
169 | ||
170 | /* smap does not show THPs after mremap, use kpageflags instead */ | |
171 | thp_size = 0; | |
172 | for (i = 0; i < pagesize * 4; i++) | |
173 | if (i % pagesize == 0 && | |
174 | is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) | |
175 | thp_size++; | |
176 | ||
73588704 MUA |
177 | if (thp_size != 4) |
178 | ksft_exit_fail_msg("Some THPs are missing during mremap\n"); | |
fa6c0231 ZY |
179 | |
180 | /* split all remapped THPs */ | |
fbe37501 | 181 | write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, |
fc4d1823 | 182 | (uint64_t)pte_mapped + pagesize * 4, 0); |
fa6c0231 ZY |
183 | |
184 | /* smap does not show THPs after mremap, use kpageflags instead */ | |
185 | thp_size = 0; | |
186 | for (i = 0; i < pagesize * 4; i++) { | |
73588704 MUA |
187 | if (pte_mapped[i] != (char)i) |
188 | ksft_exit_fail_msg("%ld byte corrupted\n", i); | |
189 | ||
fa6c0231 ZY |
190 | if (i % pagesize == 0 && |
191 | is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) | |
192 | thp_size++; | |
193 | } | |
194 | ||
73588704 MUA |
195 | if (thp_size) |
196 | ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); | |
fa6c0231 | 197 | |
73588704 | 198 | ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); |
fa6c0231 ZY |
199 | munmap(one_page, len); |
200 | close(pagemap_fd); | |
201 | close(kpageflags_fd); | |
202 | } | |
203 | ||
fbe37501 ZY |
204 | void split_file_backed_thp(void) |
205 | { | |
206 | int status; | |
207 | int fd; | |
208 | ssize_t num_written; | |
209 | char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; | |
210 | const char *tmpfs_loc = mkdtemp(tmpfs_template); | |
211 | char testfile[INPUT_MAX]; | |
212 | uint64_t pgoff_start = 0, pgoff_end = 1024; | |
213 | ||
73588704 | 214 | ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); |
fbe37501 ZY |
215 | |
216 | status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); | |
217 | ||
73588704 MUA |
218 | if (status) |
219 | ksft_exit_fail_msg("Unable to create a tmpfs for testing\n"); | |
fbe37501 ZY |
220 | |
221 | status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); | |
222 | if (status >= INPUT_MAX) { | |
73588704 | 223 | ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); |
fbe37501 ZY |
224 | } |
225 | ||
8b65ef5a | 226 | fd = open(testfile, O_CREAT|O_WRONLY, 0664); |
fbe37501 | 227 | if (fd == -1) { |
73588704 | 228 | ksft_perror("Cannot open testing file"); |
fbe37501 ZY |
229 | goto cleanup; |
230 | } | |
231 | ||
232 | /* write something to the file, so a file-backed THP can be allocated */ | |
9c7516d6 | 233 | num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); |
fbe37501 ZY |
234 | close(fd); |
235 | ||
236 | if (num_written < 1) { | |
73588704 | 237 | ksft_perror("Fail to write data to testing file"); |
fbe37501 ZY |
238 | goto cleanup; |
239 | } | |
240 | ||
241 | /* split the file-backed THP */ | |
fc4d1823 | 242 | write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); |
fbe37501 ZY |
243 | |
244 | status = unlink(testfile); | |
73588704 MUA |
245 | if (status) { |
246 | ksft_perror("Cannot remove testing file"); | |
247 | goto cleanup; | |
248 | } | |
fbe37501 | 249 | |
fbe37501 ZY |
250 | status = umount(tmpfs_loc); |
251 | if (status) { | |
73588704 MUA |
252 | rmdir(tmpfs_loc); |
253 | ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc); | |
fbe37501 | 254 | } |
73588704 | 255 | |
fbe37501 | 256 | status = rmdir(tmpfs_loc); |
73588704 MUA |
257 | if (status) |
258 | ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); | |
fbe37501 | 259 | |
73588704 MUA |
260 | ksft_print_msg("Please check dmesg for more information\n"); |
261 | ksft_test_result_pass("File-backed THP split test done\n"); | |
262 | return; | |
263 | ||
264 | cleanup: | |
265 | umount(tmpfs_loc); | |
266 | rmdir(tmpfs_loc); | |
267 | ksft_exit_fail_msg("Error occurred\n"); | |
fbe37501 ZY |
268 | } |
269 | ||
fc4d1823 ZY |
270 | bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, |
271 | const char **thp_fs_loc) | |
272 | { | |
273 | if (xfs_path) { | |
274 | *thp_fs_loc = xfs_path; | |
275 | return false; | |
276 | } | |
277 | ||
278 | *thp_fs_loc = mkdtemp(thp_fs_template); | |
279 | ||
280 | if (!*thp_fs_loc) | |
281 | ksft_exit_fail_msg("cannot create temp folder\n"); | |
282 | ||
283 | return true; | |
284 | } | |
285 | ||
286 | void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) | |
287 | { | |
288 | int status; | |
289 | ||
290 | if (!created_tmp) | |
291 | return; | |
292 | ||
293 | status = rmdir(thp_fs_loc); | |
294 | if (status) | |
295 | ksft_exit_fail_msg("cannot remove tmp dir: %s\n", | |
296 | strerror(errno)); | |
297 | } | |
298 | ||
299 | int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, | |
300 | char **addr) | |
301 | { | |
302 | size_t i; | |
303 | int dummy; | |
304 | ||
305 | srand(time(NULL)); | |
306 | ||
307 | *fd = open(testfile, O_CREAT | O_RDWR, 0664); | |
308 | if (*fd == -1) | |
309 | ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); | |
310 | ||
311 | for (i = 0; i < fd_size; i++) { | |
312 | unsigned char byte = (unsigned char)i; | |
313 | ||
314 | write(*fd, &byte, sizeof(byte)); | |
315 | } | |
316 | close(*fd); | |
317 | sync(); | |
318 | *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); | |
319 | if (*fd == -1) { | |
320 | ksft_perror("open drop_caches"); | |
321 | goto err_out_unlink; | |
322 | } | |
323 | if (write(*fd, "3", 1) != 1) { | |
324 | ksft_perror("write to drop_caches"); | |
325 | goto err_out_unlink; | |
326 | } | |
327 | close(*fd); | |
328 | ||
329 | *fd = open(testfile, O_RDWR); | |
330 | if (*fd == -1) { | |
331 | ksft_perror("Failed to open testfile\n"); | |
332 | goto err_out_unlink; | |
333 | } | |
334 | ||
335 | *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); | |
336 | if (*addr == (char *)-1) { | |
337 | ksft_perror("cannot mmap"); | |
338 | goto err_out_close; | |
339 | } | |
340 | madvise(*addr, fd_size, MADV_HUGEPAGE); | |
341 | ||
342 | for (size_t i = 0; i < fd_size; i++) | |
343 | dummy += *(*addr + i); | |
344 | ||
345 | if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { | |
346 | ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); | |
347 | munmap(*addr, fd_size); | |
348 | close(*fd); | |
349 | unlink(testfile); | |
350 | ksft_test_result_skip("Pagecache folio split skipped\n"); | |
351 | return -2; | |
352 | } | |
353 | return 0; | |
354 | err_out_close: | |
355 | close(*fd); | |
356 | err_out_unlink: | |
357 | unlink(testfile); | |
358 | ksft_exit_fail_msg("Failed to create large pagecache folios\n"); | |
359 | return -1; | |
360 | } | |
361 | ||
362 | void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) | |
363 | { | |
364 | int fd; | |
365 | char *addr; | |
366 | size_t i; | |
367 | char testfile[INPUT_MAX]; | |
368 | int err = 0; | |
369 | ||
370 | err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc); | |
371 | ||
372 | if (err < 0) | |
373 | ksft_exit_fail_msg("cannot generate right test file name\n"); | |
374 | ||
375 | err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); | |
376 | if (err) | |
377 | return; | |
378 | err = 0; | |
379 | ||
380 | write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); | |
381 | ||
382 | for (i = 0; i < fd_size; i++) | |
383 | if (*(addr + i) != (char)i) { | |
384 | ksft_print_msg("%lu byte corrupted in the file\n", i); | |
385 | err = EXIT_FAILURE; | |
386 | goto out; | |
387 | } | |
388 | ||
389 | if (!check_huge_file(addr, 0, pmd_pagesize)) { | |
390 | ksft_print_msg("Still FilePmdMapped not split\n"); | |
391 | err = EXIT_FAILURE; | |
392 | goto out; | |
393 | } | |
394 | ||
395 | out: | |
396 | munmap(addr, fd_size); | |
397 | close(fd); | |
398 | unlink(testfile); | |
399 | if (err) | |
400 | ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); | |
401 | ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); | |
402 | } | |
403 | ||
fa6c0231 ZY |
404 | int main(int argc, char **argv) |
405 | { | |
fc4d1823 ZY |
406 | int i; |
407 | size_t fd_size; | |
408 | char *optional_xfs_path = NULL; | |
409 | char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; | |
410 | const char *fs_loc; | |
411 | bool created_tmp; | |
412 | ||
73588704 MUA |
413 | ksft_print_header(); |
414 | ||
fa6c0231 | 415 | if (geteuid() != 0) { |
73588704 MUA |
416 | ksft_print_msg("Please run the benchmark as root\n"); |
417 | ksft_finished(); | |
fa6c0231 ZY |
418 | } |
419 | ||
fc4d1823 ZY |
420 | if (argc > 1) |
421 | optional_xfs_path = argv[1]; | |
422 | ||
423 | ksft_set_plan(3+9); | |
73588704 | 424 | |
fa6c0231 ZY |
425 | pagesize = getpagesize(); |
426 | pageshift = ffs(pagesize) - 1; | |
427 | pmd_pagesize = read_pmd_pagesize(); | |
73588704 MUA |
428 | if (!pmd_pagesize) |
429 | ksft_exit_fail_msg("Reading PMD pagesize failed\n"); | |
fa6c0231 | 430 | |
fc4d1823 ZY |
431 | fd_size = 2 * pmd_pagesize; |
432 | ||
fa6c0231 ZY |
433 | split_pmd_thp(); |
434 | split_pte_mapped_thp(); | |
fbe37501 | 435 | split_file_backed_thp(); |
fa6c0231 | 436 | |
fc4d1823 ZY |
437 | created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, |
438 | &fs_loc); | |
439 | for (i = 8; i >= 0; i--) | |
440 | split_thp_in_pagecache_to_order(fd_size, i, fs_loc); | |
441 | cleanup_thp_fs(fs_loc, created_tmp); | |
442 | ||
73588704 | 443 | ksft_finished(); |
fc4d1823 ZY |
444 | |
445 | return 0; | |
fa6c0231 | 446 | } |