15 #define MADV_PAGEOUT 21
18 #define BASE_ADDR ((void *)(1UL << 30))
19 static unsigned long hpage_pmd_size;
20 static unsigned long page_size;
21 static int hpage_pmd_nr;
23 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24 #define PID_SMAPS "/proc/self/smaps"
32 static const char *thp_enabled_strings[] = {
42 THP_DEFRAG_DEFER_MADVISE,
47 static const char *thp_defrag_strings[] = {
65 static const char *shmem_enabled_strings[] = {
75 struct khugepaged_settings {
77 unsigned int alloc_sleep_millisecs;
78 unsigned int scan_sleep_millisecs;
79 unsigned int max_ptes_none;
80 unsigned int max_ptes_swap;
81 unsigned int max_ptes_shared;
82 unsigned long pages_to_scan;
86 enum thp_enabled thp_enabled;
87 enum thp_defrag thp_defrag;
88 enum shmem_enabled shmem_enabled;
90 struct khugepaged_settings khugepaged;
93 static struct settings default_settings = {
94 .thp_enabled = THP_MADVISE,
95 .thp_defrag = THP_DEFRAG_ALWAYS,
96 .shmem_enabled = SHMEM_NEVER,
100 .alloc_sleep_millisecs = 10,
101 .scan_sleep_millisecs = 10,
105 static struct settings saved_settings;
106 static bool skip_settings_restore;
108 static int exit_status;
110 static void success(const char *msg)
112 printf(" \e[32m%s\e[0m\n", msg);
115 static void fail(const char *msg)
117 printf(" \e[31m%s\e[0m\n", msg);
121 static int read_file(const char *path, char *buf, size_t buflen)
126 fd = open(path, O_RDONLY);
130 numread = read(fd, buf, buflen - 1);
139 return (unsigned int) numread;
142 static int write_file(const char *path, const char *buf, size_t buflen)
147 fd = open(path, O_WRONLY);
151 numwritten = write(fd, buf, buflen - 1);
156 return (unsigned int) numwritten;
159 static int read_string(const char *name, const char *strings[])
166 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
167 if (ret >= PATH_MAX) {
168 printf("%s: Pathname is too long\n", __func__);
172 if (!read_file(path, buf, sizeof(buf))) {
177 c = strchr(buf, '[');
179 printf("%s: Parse failure\n", __func__);
184 memmove(buf, c, sizeof(buf) - (c - buf));
186 c = strchr(buf, ']');
188 printf("%s: Parse failure\n", __func__);
194 while (strings[ret]) {
195 if (!strcmp(strings[ret], buf))
200 printf("Failed to parse %s\n", name);
204 static void write_string(const char *name, const char *val)
209 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
210 if (ret >= PATH_MAX) {
211 printf("%s: Pathname is too long\n", __func__);
215 if (!write_file(path, val, strlen(val) + 1)) {
221 static const unsigned long read_num(const char *name)
227 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
228 if (ret >= PATH_MAX) {
229 printf("%s: Pathname is too long\n", __func__);
233 ret = read_file(path, buf, sizeof(buf));
235 perror("read_file(read_num)");
239 return strtoul(buf, NULL, 10);
242 static void write_num(const char *name, unsigned long num)
248 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
249 if (ret >= PATH_MAX) {
250 printf("%s: Pathname is too long\n", __func__);
254 sprintf(buf, "%ld", num);
255 if (!write_file(path, buf, strlen(buf) + 1)) {
261 static void write_settings(struct settings *settings)
263 struct khugepaged_settings *khugepaged = &settings->khugepaged;
265 write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
266 write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
267 write_string("shmem_enabled",
268 shmem_enabled_strings[settings->shmem_enabled]);
269 write_num("use_zero_page", settings->use_zero_page);
271 write_num("khugepaged/defrag", khugepaged->defrag);
272 write_num("khugepaged/alloc_sleep_millisecs",
273 khugepaged->alloc_sleep_millisecs);
274 write_num("khugepaged/scan_sleep_millisecs",
275 khugepaged->scan_sleep_millisecs);
276 write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
277 write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
278 write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
279 write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
282 static void restore_settings(int sig)
284 if (skip_settings_restore)
287 printf("Restore THP and khugepaged settings...");
288 write_settings(&saved_settings);
296 static void save_settings(void)
298 printf("Save THP and khugepaged settings...");
299 saved_settings = (struct settings) {
300 .thp_enabled = read_string("enabled", thp_enabled_strings),
301 .thp_defrag = read_string("defrag", thp_defrag_strings),
303 read_string("shmem_enabled", shmem_enabled_strings),
304 .use_zero_page = read_num("use_zero_page"),
306 saved_settings.khugepaged = (struct khugepaged_settings) {
307 .defrag = read_num("khugepaged/defrag"),
308 .alloc_sleep_millisecs =
309 read_num("khugepaged/alloc_sleep_millisecs"),
310 .scan_sleep_millisecs =
311 read_num("khugepaged/scan_sleep_millisecs"),
312 .max_ptes_none = read_num("khugepaged/max_ptes_none"),
313 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
314 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
315 .pages_to_scan = read_num("khugepaged/pages_to_scan"),
319 signal(SIGTERM, restore_settings);
320 signal(SIGINT, restore_settings);
321 signal(SIGHUP, restore_settings);
322 signal(SIGQUIT, restore_settings);
325 static void adjust_settings(void)
328 printf("Adjust settings...");
329 write_settings(&default_settings);
333 #define MAX_LINE_LENGTH 500
335 static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
337 while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
338 if (!strncmp(buf, pattern, strlen(pattern)))
344 static bool check_huge(void *addr)
349 char buffer[MAX_LINE_LENGTH];
350 char addr_pattern[MAX_LINE_LENGTH];
352 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
353 (unsigned long) addr);
354 if (ret >= MAX_LINE_LENGTH) {
355 printf("%s: Pattern is too long\n", __func__);
360 fp = fopen(PID_SMAPS, "r");
362 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
365 if (!check_for_pattern(fp, addr_pattern, buffer))
368 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
369 hpage_pmd_size >> 10);
370 if (ret >= MAX_LINE_LENGTH) {
371 printf("%s: Pattern is too long\n", __func__);
375 * Fetch the AnonHugePages: in the same block and check whether it got
376 * the expected number of hugeepages next.
378 if (!check_for_pattern(fp, "AnonHugePages:", buffer))
381 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
391 static bool check_swap(void *addr, unsigned long size)
396 char buffer[MAX_LINE_LENGTH];
397 char addr_pattern[MAX_LINE_LENGTH];
399 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
400 (unsigned long) addr);
401 if (ret >= MAX_LINE_LENGTH) {
402 printf("%s: Pattern is too long\n", __func__);
407 fp = fopen(PID_SMAPS, "r");
409 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
412 if (!check_for_pattern(fp, addr_pattern, buffer))
415 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
417 if (ret >= MAX_LINE_LENGTH) {
418 printf("%s: Pattern is too long\n", __func__);
422 * Fetch the Swap: in the same block and check whether it got
423 * the expected number of hugeepages next.
425 if (!check_for_pattern(fp, "Swap:", buffer))
428 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
437 static void *alloc_mapping(void)
441 p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
442 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
443 if (p != BASE_ADDR) {
444 printf("Failed to allocate VMA at %p\n", BASE_ADDR);
451 static void fill_memory(int *p, unsigned long start, unsigned long end)
455 for (i = start / page_size; i < end / page_size; i++)
456 p[i * page_size / sizeof(*p)] = i + 0xdead0000;
459 static void validate_memory(int *p, unsigned long start, unsigned long end)
463 for (i = start / page_size; i < end / page_size; i++) {
464 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
465 printf("Page %d is corrupted: %#x\n",
466 i, p[i * page_size / sizeof(*p)]);
473 static bool wait_for_scan(const char *msg, char *p)
476 int timeout = 6; /* 3 seconds */
480 printf("Unexpected huge page\n");
484 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
486 /* Wait until the second full_scan completed */
487 full_scans = read_num("khugepaged/full_scans") + 2;
489 printf("%s...", msg);
493 if (read_num("khugepaged/full_scans") >= full_scans)
499 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
501 return timeout == -1;
504 static void alloc_at_fault(void)
506 struct settings settings = default_settings;
509 settings.thp_enabled = THP_ALWAYS;
510 write_settings(&settings);
514 printf("Allocate huge page on fault...");
520 write_settings(&default_settings);
522 madvise(p, page_size, MADV_DONTNEED);
523 printf("Split huge PMD on MADV_DONTNEED...");
528 munmap(p, hpage_pmd_size);
531 static void collapse_full(void)
536 fill_memory(p, 0, hpage_pmd_size);
537 if (wait_for_scan("Collapse fully populated PTE table", p))
539 else if (check_huge(p))
543 validate_memory(p, 0, hpage_pmd_size);
544 munmap(p, hpage_pmd_size);
547 static void collapse_empty(void)
552 if (wait_for_scan("Do not collapse empty PTE table", p))
554 else if (check_huge(p))
558 munmap(p, hpage_pmd_size);
561 static void collapse_single_pte_entry(void)
566 fill_memory(p, 0, page_size);
567 if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
569 else if (check_huge(p))
573 validate_memory(p, 0, page_size);
574 munmap(p, hpage_pmd_size);
577 static void collapse_max_ptes_none(void)
579 int max_ptes_none = hpage_pmd_nr / 2;
580 struct settings settings = default_settings;
583 settings.khugepaged.max_ptes_none = max_ptes_none;
584 write_settings(&settings);
588 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
589 if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
591 else if (check_huge(p))
595 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
597 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
598 if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
600 else if (check_huge(p))
604 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
606 munmap(p, hpage_pmd_size);
607 write_settings(&default_settings);
610 static void collapse_swapin_single_pte(void)
614 fill_memory(p, 0, hpage_pmd_size);
616 printf("Swapout one page...");
617 if (madvise(p, page_size, MADV_PAGEOUT)) {
618 perror("madvise(MADV_PAGEOUT)");
621 if (check_swap(p, page_size)) {
628 if (wait_for_scan("Collapse with swapping in single PTE entry", p))
630 else if (check_huge(p))
634 validate_memory(p, 0, hpage_pmd_size);
636 munmap(p, hpage_pmd_size);
639 static void collapse_max_ptes_swap(void)
641 int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
646 fill_memory(p, 0, hpage_pmd_size);
647 printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
648 if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
649 perror("madvise(MADV_PAGEOUT)");
652 if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
659 if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
661 else if (check_huge(p))
665 validate_memory(p, 0, hpage_pmd_size);
667 fill_memory(p, 0, hpage_pmd_size);
668 printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
669 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
670 perror("madvise(MADV_PAGEOUT)");
673 if (check_swap(p, max_ptes_swap * page_size)) {
680 if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
682 else if (check_huge(p))
686 validate_memory(p, 0, hpage_pmd_size);
688 munmap(p, hpage_pmd_size);
691 static void collapse_single_pte_entry_compound(void)
697 printf("Allocate huge page...");
698 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
699 fill_memory(p, 0, hpage_pmd_size);
704 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
706 printf("Split huge page leaving single PTE mapping compound page...");
707 madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
713 if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
715 else if (check_huge(p))
719 validate_memory(p, 0, page_size);
720 munmap(p, hpage_pmd_size);
723 static void collapse_full_of_compound(void)
729 printf("Allocate huge page...");
730 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
731 fill_memory(p, 0, hpage_pmd_size);
737 printf("Split huge page leaving single PTE page table full of compound pages...");
738 madvise(p, page_size, MADV_NOHUGEPAGE);
739 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
745 if (wait_for_scan("Collapse PTE table full of compound pages", p))
747 else if (check_huge(p))
751 validate_memory(p, 0, hpage_pmd_size);
752 munmap(p, hpage_pmd_size);
755 static void collapse_compound_extreme(void)
761 for (i = 0; i < hpage_pmd_nr; i++) {
762 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
763 i + 1, hpage_pmd_nr);
765 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
766 fill_memory(BASE_ADDR, 0, hpage_pmd_size);
767 if (!check_huge(BASE_ADDR)) {
768 printf("Failed to allocate huge page\n");
771 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
773 p = mremap(BASE_ADDR - i * page_size,
774 i * page_size + hpage_pmd_size,
776 MREMAP_MAYMOVE | MREMAP_FIXED,
777 BASE_ADDR + 2 * hpage_pmd_size);
778 if (p == MAP_FAILED) {
779 perror("mremap+unmap");
783 p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
785 (i + 1) * page_size + hpage_pmd_size,
786 MREMAP_MAYMOVE | MREMAP_FIXED,
787 BASE_ADDR - (i + 1) * page_size);
788 if (p == MAP_FAILED) {
789 perror("mremap+alloc");
794 munmap(BASE_ADDR, hpage_pmd_size);
795 fill_memory(p, 0, hpage_pmd_size);
801 if (wait_for_scan("Collapse PTE table full of different compound pages", p))
803 else if (check_huge(p))
808 validate_memory(p, 0, hpage_pmd_size);
809 munmap(p, hpage_pmd_size);
812 static void collapse_fork(void)
819 printf("Allocate small page...");
820 fill_memory(p, 0, page_size);
826 printf("Share small page over fork()...");
828 /* Do not touch settings on child exit */
829 skip_settings_restore = true;
837 fill_memory(p, page_size, 2 * page_size);
839 if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
841 else if (check_huge(p))
846 validate_memory(p, 0, page_size);
847 munmap(p, hpage_pmd_size);
852 exit_status += WEXITSTATUS(wstatus);
854 printf("Check if parent still has small page...");
859 validate_memory(p, 0, page_size);
860 munmap(p, hpage_pmd_size);
863 static void collapse_fork_compound(void)
870 printf("Allocate huge page...");
871 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
872 fill_memory(p, 0, hpage_pmd_size);
878 printf("Share huge page over fork()...");
880 /* Do not touch settings on child exit */
881 skip_settings_restore = true;
889 printf("Split huge page PMD in child process...");
890 madvise(p, page_size, MADV_NOHUGEPAGE);
891 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
896 fill_memory(p, 0, page_size);
898 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
899 if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
901 else if (check_huge(p))
905 write_num("khugepaged/max_ptes_shared",
906 default_settings.khugepaged.max_ptes_shared);
908 validate_memory(p, 0, hpage_pmd_size);
909 munmap(p, hpage_pmd_size);
914 exit_status += WEXITSTATUS(wstatus);
916 printf("Check if parent still has huge page...");
921 validate_memory(p, 0, hpage_pmd_size);
922 munmap(p, hpage_pmd_size);
925 static void collapse_max_ptes_shared()
927 int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
933 printf("Allocate huge page...");
934 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
935 fill_memory(p, 0, hpage_pmd_size);
941 printf("Share huge page over fork()...");
943 /* Do not touch settings on child exit */
944 skip_settings_restore = true;
952 printf("Trigger CoW on page %d of %d...",
953 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
954 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
960 if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
962 else if (!check_huge(p))
967 printf("Trigger CoW on page %d of %d...",
968 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
969 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
976 if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
978 else if (check_huge(p))
983 validate_memory(p, 0, hpage_pmd_size);
984 munmap(p, hpage_pmd_size);
989 exit_status += WEXITSTATUS(wstatus);
991 printf("Check if parent still has huge page...");
996 validate_memory(p, 0, hpage_pmd_size);
997 munmap(p, hpage_pmd_size);
1002 setbuf(stdout, NULL);
1004 page_size = getpagesize();
1005 hpage_pmd_size = read_num("hpage_pmd_size");
1006 hpage_pmd_nr = hpage_pmd_size / page_size;
1008 default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1009 default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1010 default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1011 default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1019 collapse_single_pte_entry();
1020 collapse_max_ptes_none();
1021 collapse_swapin_single_pte();
1022 collapse_max_ptes_swap();
1023 collapse_single_pte_entry_compound();
1024 collapse_full_of_compound();
1025 collapse_compound_extreme();
1027 collapse_fork_compound();
1028 collapse_max_ptes_shared();
1030 restore_settings(0);