blktrace: remove unnessary stop block trace in 'blk_trace_shutdown'
[linux-block.git] / tools / testing / selftests / vm / khugepaged.c
1 #define _GNU_SOURCE
2 #include <fcntl.h>
3 #include <limits.h>
4 #include <signal.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include <string.h>
9 #include <unistd.h>
10
11 #include <sys/mman.h>
12 #include <sys/wait.h>
13
14 #ifndef MADV_PAGEOUT
15 #define MADV_PAGEOUT 21
16 #endif
17
18 #define BASE_ADDR ((void *)(1UL << 30))
19 static unsigned long hpage_pmd_size;
20 static unsigned long page_size;
21 static int hpage_pmd_nr;
22
23 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24 #define PID_SMAPS "/proc/self/smaps"
25
26 enum thp_enabled {
27         THP_ALWAYS,
28         THP_MADVISE,
29         THP_NEVER,
30 };
31
32 static const char *thp_enabled_strings[] = {
33         "always",
34         "madvise",
35         "never",
36         NULL
37 };
38
39 enum thp_defrag {
40         THP_DEFRAG_ALWAYS,
41         THP_DEFRAG_DEFER,
42         THP_DEFRAG_DEFER_MADVISE,
43         THP_DEFRAG_MADVISE,
44         THP_DEFRAG_NEVER,
45 };
46
47 static const char *thp_defrag_strings[] = {
48         "always",
49         "defer",
50         "defer+madvise",
51         "madvise",
52         "never",
53         NULL
54 };
55
56 enum shmem_enabled {
57         SHMEM_ALWAYS,
58         SHMEM_WITHIN_SIZE,
59         SHMEM_ADVISE,
60         SHMEM_NEVER,
61         SHMEM_DENY,
62         SHMEM_FORCE,
63 };
64
65 static const char *shmem_enabled_strings[] = {
66         "always",
67         "within_size",
68         "advise",
69         "never",
70         "deny",
71         "force",
72         NULL
73 };
74
75 struct khugepaged_settings {
76         bool defrag;
77         unsigned int alloc_sleep_millisecs;
78         unsigned int scan_sleep_millisecs;
79         unsigned int max_ptes_none;
80         unsigned int max_ptes_swap;
81         unsigned int max_ptes_shared;
82         unsigned long pages_to_scan;
83 };
84
85 struct settings {
86         enum thp_enabled thp_enabled;
87         enum thp_defrag thp_defrag;
88         enum shmem_enabled shmem_enabled;
89         bool use_zero_page;
90         struct khugepaged_settings khugepaged;
91 };
92
93 static struct settings default_settings = {
94         .thp_enabled = THP_MADVISE,
95         .thp_defrag = THP_DEFRAG_ALWAYS,
96         .shmem_enabled = SHMEM_NEVER,
97         .use_zero_page = 0,
98         .khugepaged = {
99                 .defrag = 1,
100                 .alloc_sleep_millisecs = 10,
101                 .scan_sleep_millisecs = 10,
102         },
103 };
104
105 static struct settings saved_settings;
106 static bool skip_settings_restore;
107
108 static int exit_status;
109
110 static void success(const char *msg)
111 {
112         printf(" \e[32m%s\e[0m\n", msg);
113 }
114
115 static void fail(const char *msg)
116 {
117         printf(" \e[31m%s\e[0m\n", msg);
118         exit_status++;
119 }
120
121 static int read_file(const char *path, char *buf, size_t buflen)
122 {
123         int fd;
124         ssize_t numread;
125
126         fd = open(path, O_RDONLY);
127         if (fd == -1)
128                 return 0;
129
130         numread = read(fd, buf, buflen - 1);
131         if (numread < 1) {
132                 close(fd);
133                 return 0;
134         }
135
136         buf[numread] = '\0';
137         close(fd);
138
139         return (unsigned int) numread;
140 }
141
142 static int write_file(const char *path, const char *buf, size_t buflen)
143 {
144         int fd;
145         ssize_t numwritten;
146
147         fd = open(path, O_WRONLY);
148         if (fd == -1)
149                 return 0;
150
151         numwritten = write(fd, buf, buflen - 1);
152         close(fd);
153         if (numwritten < 1)
154                 return 0;
155
156         return (unsigned int) numwritten;
157 }
158
159 static int read_string(const char *name, const char *strings[])
160 {
161         char path[PATH_MAX];
162         char buf[256];
163         char *c;
164         int ret;
165
166         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
167         if (ret >= PATH_MAX) {
168                 printf("%s: Pathname is too long\n", __func__);
169                 exit(EXIT_FAILURE);
170         }
171
172         if (!read_file(path, buf, sizeof(buf))) {
173                 perror(path);
174                 exit(EXIT_FAILURE);
175         }
176
177         c = strchr(buf, '[');
178         if (!c) {
179                 printf("%s: Parse failure\n", __func__);
180                 exit(EXIT_FAILURE);
181         }
182
183         c++;
184         memmove(buf, c, sizeof(buf) - (c - buf));
185
186         c = strchr(buf, ']');
187         if (!c) {
188                 printf("%s: Parse failure\n", __func__);
189                 exit(EXIT_FAILURE);
190         }
191         *c = '\0';
192
193         ret = 0;
194         while (strings[ret]) {
195                 if (!strcmp(strings[ret], buf))
196                         return ret;
197                 ret++;
198         }
199
200         printf("Failed to parse %s\n", name);
201         exit(EXIT_FAILURE);
202 }
203
204 static void write_string(const char *name, const char *val)
205 {
206         char path[PATH_MAX];
207         int ret;
208
209         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
210         if (ret >= PATH_MAX) {
211                 printf("%s: Pathname is too long\n", __func__);
212                 exit(EXIT_FAILURE);
213         }
214
215         if (!write_file(path, val, strlen(val) + 1)) {
216                 perror(path);
217                 exit(EXIT_FAILURE);
218         }
219 }
220
221 static const unsigned long read_num(const char *name)
222 {
223         char path[PATH_MAX];
224         char buf[21];
225         int ret;
226
227         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
228         if (ret >= PATH_MAX) {
229                 printf("%s: Pathname is too long\n", __func__);
230                 exit(EXIT_FAILURE);
231         }
232
233         ret = read_file(path, buf, sizeof(buf));
234         if (ret < 0) {
235                 perror("read_file(read_num)");
236                 exit(EXIT_FAILURE);
237         }
238
239         return strtoul(buf, NULL, 10);
240 }
241
242 static void write_num(const char *name, unsigned long num)
243 {
244         char path[PATH_MAX];
245         char buf[21];
246         int ret;
247
248         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
249         if (ret >= PATH_MAX) {
250                 printf("%s: Pathname is too long\n", __func__);
251                 exit(EXIT_FAILURE);
252         }
253
254         sprintf(buf, "%ld", num);
255         if (!write_file(path, buf, strlen(buf) + 1)) {
256                 perror(path);
257                 exit(EXIT_FAILURE);
258         }
259 }
260
261 static void write_settings(struct settings *settings)
262 {
263         struct khugepaged_settings *khugepaged = &settings->khugepaged;
264
265         write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
266         write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
267         write_string("shmem_enabled",
268                         shmem_enabled_strings[settings->shmem_enabled]);
269         write_num("use_zero_page", settings->use_zero_page);
270
271         write_num("khugepaged/defrag", khugepaged->defrag);
272         write_num("khugepaged/alloc_sleep_millisecs",
273                         khugepaged->alloc_sleep_millisecs);
274         write_num("khugepaged/scan_sleep_millisecs",
275                         khugepaged->scan_sleep_millisecs);
276         write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
277         write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
278         write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
279         write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
280 }
281
282 static void restore_settings(int sig)
283 {
284         if (skip_settings_restore)
285                 goto out;
286
287         printf("Restore THP and khugepaged settings...");
288         write_settings(&saved_settings);
289         success("OK");
290         if (sig)
291                 exit(EXIT_FAILURE);
292 out:
293         exit(exit_status);
294 }
295
296 static void save_settings(void)
297 {
298         printf("Save THP and khugepaged settings...");
299         saved_settings = (struct settings) {
300                 .thp_enabled = read_string("enabled", thp_enabled_strings),
301                 .thp_defrag = read_string("defrag", thp_defrag_strings),
302                 .shmem_enabled =
303                         read_string("shmem_enabled", shmem_enabled_strings),
304                 .use_zero_page = read_num("use_zero_page"),
305         };
306         saved_settings.khugepaged = (struct khugepaged_settings) {
307                 .defrag = read_num("khugepaged/defrag"),
308                 .alloc_sleep_millisecs =
309                         read_num("khugepaged/alloc_sleep_millisecs"),
310                 .scan_sleep_millisecs =
311                         read_num("khugepaged/scan_sleep_millisecs"),
312                 .max_ptes_none = read_num("khugepaged/max_ptes_none"),
313                 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
314                 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
315                 .pages_to_scan = read_num("khugepaged/pages_to_scan"),
316         };
317         success("OK");
318
319         signal(SIGTERM, restore_settings);
320         signal(SIGINT, restore_settings);
321         signal(SIGHUP, restore_settings);
322         signal(SIGQUIT, restore_settings);
323 }
324
325 static void adjust_settings(void)
326 {
327
328         printf("Adjust settings...");
329         write_settings(&default_settings);
330         success("OK");
331 }
332
333 #define MAX_LINE_LENGTH 500
334
335 static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
336 {
337         while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
338                 if (!strncmp(buf, pattern, strlen(pattern)))
339                         return true;
340         }
341         return false;
342 }
343
344 static bool check_huge(void *addr)
345 {
346         bool thp = false;
347         int ret;
348         FILE *fp;
349         char buffer[MAX_LINE_LENGTH];
350         char addr_pattern[MAX_LINE_LENGTH];
351
352         ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
353                        (unsigned long) addr);
354         if (ret >= MAX_LINE_LENGTH) {
355                 printf("%s: Pattern is too long\n", __func__);
356                 exit(EXIT_FAILURE);
357         }
358
359
360         fp = fopen(PID_SMAPS, "r");
361         if (!fp) {
362                 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
363                 exit(EXIT_FAILURE);
364         }
365         if (!check_for_pattern(fp, addr_pattern, buffer))
366                 goto err_out;
367
368         ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
369                        hpage_pmd_size >> 10);
370         if (ret >= MAX_LINE_LENGTH) {
371                 printf("%s: Pattern is too long\n", __func__);
372                 exit(EXIT_FAILURE);
373         }
374         /*
375          * Fetch the AnonHugePages: in the same block and check whether it got
376          * the expected number of hugeepages next.
377          */
378         if (!check_for_pattern(fp, "AnonHugePages:", buffer))
379                 goto err_out;
380
381         if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
382                 goto err_out;
383
384         thp = true;
385 err_out:
386         fclose(fp);
387         return thp;
388 }
389
390
391 static bool check_swap(void *addr, unsigned long size)
392 {
393         bool swap = false;
394         int ret;
395         FILE *fp;
396         char buffer[MAX_LINE_LENGTH];
397         char addr_pattern[MAX_LINE_LENGTH];
398
399         ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
400                        (unsigned long) addr);
401         if (ret >= MAX_LINE_LENGTH) {
402                 printf("%s: Pattern is too long\n", __func__);
403                 exit(EXIT_FAILURE);
404         }
405
406
407         fp = fopen(PID_SMAPS, "r");
408         if (!fp) {
409                 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
410                 exit(EXIT_FAILURE);
411         }
412         if (!check_for_pattern(fp, addr_pattern, buffer))
413                 goto err_out;
414
415         ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
416                        size >> 10);
417         if (ret >= MAX_LINE_LENGTH) {
418                 printf("%s: Pattern is too long\n", __func__);
419                 exit(EXIT_FAILURE);
420         }
421         /*
422          * Fetch the Swap: in the same block and check whether it got
423          * the expected number of hugeepages next.
424          */
425         if (!check_for_pattern(fp, "Swap:", buffer))
426                 goto err_out;
427
428         if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
429                 goto err_out;
430
431         swap = true;
432 err_out:
433         fclose(fp);
434         return swap;
435 }
436
437 static void *alloc_mapping(void)
438 {
439         void *p;
440
441         p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
442                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
443         if (p != BASE_ADDR) {
444                 printf("Failed to allocate VMA at %p\n", BASE_ADDR);
445                 exit(EXIT_FAILURE);
446         }
447
448         return p;
449 }
450
451 static void fill_memory(int *p, unsigned long start, unsigned long end)
452 {
453         int i;
454
455         for (i = start / page_size; i < end / page_size; i++)
456                 p[i * page_size / sizeof(*p)] = i + 0xdead0000;
457 }
458
459 static void validate_memory(int *p, unsigned long start, unsigned long end)
460 {
461         int i;
462
463         for (i = start / page_size; i < end / page_size; i++) {
464                 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
465                         printf("Page %d is corrupted: %#x\n",
466                                         i, p[i * page_size / sizeof(*p)]);
467                         exit(EXIT_FAILURE);
468                 }
469         }
470 }
471
472 #define TICK 500000
473 static bool wait_for_scan(const char *msg, char *p)
474 {
475         int full_scans;
476         int timeout = 6; /* 3 seconds */
477
478         /* Sanity check */
479         if (check_huge(p)) {
480                 printf("Unexpected huge page\n");
481                 exit(EXIT_FAILURE);
482         }
483
484         madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
485
486         /* Wait until the second full_scan completed */
487         full_scans = read_num("khugepaged/full_scans") + 2;
488
489         printf("%s...", msg);
490         while (timeout--) {
491                 if (check_huge(p))
492                         break;
493                 if (read_num("khugepaged/full_scans") >= full_scans)
494                         break;
495                 printf(".");
496                 usleep(TICK);
497         }
498
499         madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
500
501         return timeout == -1;
502 }
503
504 static void alloc_at_fault(void)
505 {
506         struct settings settings = default_settings;
507         char *p;
508
509         settings.thp_enabled = THP_ALWAYS;
510         write_settings(&settings);
511
512         p = alloc_mapping();
513         *p = 1;
514         printf("Allocate huge page on fault...");
515         if (check_huge(p))
516                 success("OK");
517         else
518                 fail("Fail");
519
520         write_settings(&default_settings);
521
522         madvise(p, page_size, MADV_DONTNEED);
523         printf("Split huge PMD on MADV_DONTNEED...");
524         if (!check_huge(p))
525                 success("OK");
526         else
527                 fail("Fail");
528         munmap(p, hpage_pmd_size);
529 }
530
531 static void collapse_full(void)
532 {
533         void *p;
534
535         p = alloc_mapping();
536         fill_memory(p, 0, hpage_pmd_size);
537         if (wait_for_scan("Collapse fully populated PTE table", p))
538                 fail("Timeout");
539         else if (check_huge(p))
540                 success("OK");
541         else
542                 fail("Fail");
543         validate_memory(p, 0, hpage_pmd_size);
544         munmap(p, hpage_pmd_size);
545 }
546
547 static void collapse_empty(void)
548 {
549         void *p;
550
551         p = alloc_mapping();
552         if (wait_for_scan("Do not collapse empty PTE table", p))
553                 fail("Timeout");
554         else if (check_huge(p))
555                 fail("Fail");
556         else
557                 success("OK");
558         munmap(p, hpage_pmd_size);
559 }
560
561 static void collapse_single_pte_entry(void)
562 {
563         void *p;
564
565         p = alloc_mapping();
566         fill_memory(p, 0, page_size);
567         if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
568                 fail("Timeout");
569         else if (check_huge(p))
570                 success("OK");
571         else
572                 fail("Fail");
573         validate_memory(p, 0, page_size);
574         munmap(p, hpage_pmd_size);
575 }
576
577 static void collapse_max_ptes_none(void)
578 {
579         int max_ptes_none = hpage_pmd_nr / 2;
580         struct settings settings = default_settings;
581         void *p;
582
583         settings.khugepaged.max_ptes_none = max_ptes_none;
584         write_settings(&settings);
585
586         p = alloc_mapping();
587
588         fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
589         if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
590                 fail("Timeout");
591         else if (check_huge(p))
592                 fail("Fail");
593         else
594                 success("OK");
595         validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
596
597         fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
598         if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
599                 fail("Timeout");
600         else if (check_huge(p))
601                 success("OK");
602         else
603                 fail("Fail");
604         validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
605
606         munmap(p, hpage_pmd_size);
607         write_settings(&default_settings);
608 }
609
610 static void collapse_swapin_single_pte(void)
611 {
612         void *p;
613         p = alloc_mapping();
614         fill_memory(p, 0, hpage_pmd_size);
615
616         printf("Swapout one page...");
617         if (madvise(p, page_size, MADV_PAGEOUT)) {
618                 perror("madvise(MADV_PAGEOUT)");
619                 exit(EXIT_FAILURE);
620         }
621         if (check_swap(p, page_size)) {
622                 success("OK");
623         } else {
624                 fail("Fail");
625                 goto out;
626         }
627
628         if (wait_for_scan("Collapse with swapping in single PTE entry", p))
629                 fail("Timeout");
630         else if (check_huge(p))
631                 success("OK");
632         else
633                 fail("Fail");
634         validate_memory(p, 0, hpage_pmd_size);
635 out:
636         munmap(p, hpage_pmd_size);
637 }
638
639 static void collapse_max_ptes_swap(void)
640 {
641         int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
642         void *p;
643
644         p = alloc_mapping();
645
646         fill_memory(p, 0, hpage_pmd_size);
647         printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
648         if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
649                 perror("madvise(MADV_PAGEOUT)");
650                 exit(EXIT_FAILURE);
651         }
652         if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
653                 success("OK");
654         } else {
655                 fail("Fail");
656                 goto out;
657         }
658
659         if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
660                 fail("Timeout");
661         else if (check_huge(p))
662                 fail("Fail");
663         else
664                 success("OK");
665         validate_memory(p, 0, hpage_pmd_size);
666
667         fill_memory(p, 0, hpage_pmd_size);
668         printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
669         if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
670                 perror("madvise(MADV_PAGEOUT)");
671                 exit(EXIT_FAILURE);
672         }
673         if (check_swap(p, max_ptes_swap * page_size)) {
674                 success("OK");
675         } else {
676                 fail("Fail");
677                 goto out;
678         }
679
680         if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
681                 fail("Timeout");
682         else if (check_huge(p))
683                 success("OK");
684         else
685                 fail("Fail");
686         validate_memory(p, 0, hpage_pmd_size);
687 out:
688         munmap(p, hpage_pmd_size);
689 }
690
691 static void collapse_single_pte_entry_compound(void)
692 {
693         void *p;
694
695         p = alloc_mapping();
696
697         printf("Allocate huge page...");
698         madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
699         fill_memory(p, 0, hpage_pmd_size);
700         if (check_huge(p))
701                 success("OK");
702         else
703                 fail("Fail");
704         madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
705
706         printf("Split huge page leaving single PTE mapping compound page...");
707         madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
708         if (!check_huge(p))
709                 success("OK");
710         else
711                 fail("Fail");
712
713         if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
714                 fail("Timeout");
715         else if (check_huge(p))
716                 success("OK");
717         else
718                 fail("Fail");
719         validate_memory(p, 0, page_size);
720         munmap(p, hpage_pmd_size);
721 }
722
723 static void collapse_full_of_compound(void)
724 {
725         void *p;
726
727         p = alloc_mapping();
728
729         printf("Allocate huge page...");
730         madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
731         fill_memory(p, 0, hpage_pmd_size);
732         if (check_huge(p))
733                 success("OK");
734         else
735                 fail("Fail");
736
737         printf("Split huge page leaving single PTE page table full of compound pages...");
738         madvise(p, page_size, MADV_NOHUGEPAGE);
739         madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
740         if (!check_huge(p))
741                 success("OK");
742         else
743                 fail("Fail");
744
745         if (wait_for_scan("Collapse PTE table full of compound pages", p))
746                 fail("Timeout");
747         else if (check_huge(p))
748                 success("OK");
749         else
750                 fail("Fail");
751         validate_memory(p, 0, hpage_pmd_size);
752         munmap(p, hpage_pmd_size);
753 }
754
755 static void collapse_compound_extreme(void)
756 {
757         void *p;
758         int i;
759
760         p = alloc_mapping();
761         for (i = 0; i < hpage_pmd_nr; i++) {
762                 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
763                                 i + 1, hpage_pmd_nr);
764
765                 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
766                 fill_memory(BASE_ADDR, 0, hpage_pmd_size);
767                 if (!check_huge(BASE_ADDR)) {
768                         printf("Failed to allocate huge page\n");
769                         exit(EXIT_FAILURE);
770                 }
771                 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
772
773                 p = mremap(BASE_ADDR - i * page_size,
774                                 i * page_size + hpage_pmd_size,
775                                 (i + 1) * page_size,
776                                 MREMAP_MAYMOVE | MREMAP_FIXED,
777                                 BASE_ADDR + 2 * hpage_pmd_size);
778                 if (p == MAP_FAILED) {
779                         perror("mremap+unmap");
780                         exit(EXIT_FAILURE);
781                 }
782
783                 p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
784                                 (i + 1) * page_size,
785                                 (i + 1) * page_size + hpage_pmd_size,
786                                 MREMAP_MAYMOVE | MREMAP_FIXED,
787                                 BASE_ADDR - (i + 1) * page_size);
788                 if (p == MAP_FAILED) {
789                         perror("mremap+alloc");
790                         exit(EXIT_FAILURE);
791                 }
792         }
793
794         munmap(BASE_ADDR, hpage_pmd_size);
795         fill_memory(p, 0, hpage_pmd_size);
796         if (!check_huge(p))
797                 success("OK");
798         else
799                 fail("Fail");
800
801         if (wait_for_scan("Collapse PTE table full of different compound pages", p))
802                 fail("Timeout");
803         else if (check_huge(p))
804                 success("OK");
805         else
806                 fail("Fail");
807
808         validate_memory(p, 0, hpage_pmd_size);
809         munmap(p, hpage_pmd_size);
810 }
811
812 static void collapse_fork(void)
813 {
814         int wstatus;
815         void *p;
816
817         p = alloc_mapping();
818
819         printf("Allocate small page...");
820         fill_memory(p, 0, page_size);
821         if (!check_huge(p))
822                 success("OK");
823         else
824                 fail("Fail");
825
826         printf("Share small page over fork()...");
827         if (!fork()) {
828                 /* Do not touch settings on child exit */
829                 skip_settings_restore = true;
830                 exit_status = 0;
831
832                 if (!check_huge(p))
833                         success("OK");
834                 else
835                         fail("Fail");
836
837                 fill_memory(p, page_size, 2 * page_size);
838
839                 if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
840                         fail("Timeout");
841                 else if (check_huge(p))
842                         success("OK");
843                 else
844                         fail("Fail");
845
846                 validate_memory(p, 0, page_size);
847                 munmap(p, hpage_pmd_size);
848                 exit(exit_status);
849         }
850
851         wait(&wstatus);
852         exit_status += WEXITSTATUS(wstatus);
853
854         printf("Check if parent still has small page...");
855         if (!check_huge(p))
856                 success("OK");
857         else
858                 fail("Fail");
859         validate_memory(p, 0, page_size);
860         munmap(p, hpage_pmd_size);
861 }
862
863 static void collapse_fork_compound(void)
864 {
865         int wstatus;
866         void *p;
867
868         p = alloc_mapping();
869
870         printf("Allocate huge page...");
871         madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
872         fill_memory(p, 0, hpage_pmd_size);
873         if (check_huge(p))
874                 success("OK");
875         else
876                 fail("Fail");
877
878         printf("Share huge page over fork()...");
879         if (!fork()) {
880                 /* Do not touch settings on child exit */
881                 skip_settings_restore = true;
882                 exit_status = 0;
883
884                 if (check_huge(p))
885                         success("OK");
886                 else
887                         fail("Fail");
888
889                 printf("Split huge page PMD in child process...");
890                 madvise(p, page_size, MADV_NOHUGEPAGE);
891                 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
892                 if (!check_huge(p))
893                         success("OK");
894                 else
895                         fail("Fail");
896                 fill_memory(p, 0, page_size);
897
898                 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
899                 if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
900                         fail("Timeout");
901                 else if (check_huge(p))
902                         success("OK");
903                 else
904                         fail("Fail");
905                 write_num("khugepaged/max_ptes_shared",
906                                 default_settings.khugepaged.max_ptes_shared);
907
908                 validate_memory(p, 0, hpage_pmd_size);
909                 munmap(p, hpage_pmd_size);
910                 exit(exit_status);
911         }
912
913         wait(&wstatus);
914         exit_status += WEXITSTATUS(wstatus);
915
916         printf("Check if parent still has huge page...");
917         if (check_huge(p))
918                 success("OK");
919         else
920                 fail("Fail");
921         validate_memory(p, 0, hpage_pmd_size);
922         munmap(p, hpage_pmd_size);
923 }
924
925 static void collapse_max_ptes_shared()
926 {
927         int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
928         int wstatus;
929         void *p;
930
931         p = alloc_mapping();
932
933         printf("Allocate huge page...");
934         madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
935         fill_memory(p, 0, hpage_pmd_size);
936         if (check_huge(p))
937                 success("OK");
938         else
939                 fail("Fail");
940
941         printf("Share huge page over fork()...");
942         if (!fork()) {
943                 /* Do not touch settings on child exit */
944                 skip_settings_restore = true;
945                 exit_status = 0;
946
947                 if (check_huge(p))
948                         success("OK");
949                 else
950                         fail("Fail");
951
952                 printf("Trigger CoW on page %d of %d...",
953                                 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
954                 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
955                 if (!check_huge(p))
956                         success("OK");
957                 else
958                         fail("Fail");
959
960                 if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
961                         fail("Timeout");
962                 else if (!check_huge(p))
963                         success("OK");
964                 else
965                         fail("Fail");
966
967                 printf("Trigger CoW on page %d of %d...",
968                                 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
969                 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
970                 if (!check_huge(p))
971                         success("OK");
972                 else
973                         fail("Fail");
974
975
976                 if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
977                         fail("Timeout");
978                 else if (check_huge(p))
979                         success("OK");
980                 else
981                         fail("Fail");
982
983                 validate_memory(p, 0, hpage_pmd_size);
984                 munmap(p, hpage_pmd_size);
985                 exit(exit_status);
986         }
987
988         wait(&wstatus);
989         exit_status += WEXITSTATUS(wstatus);
990
991         printf("Check if parent still has huge page...");
992         if (check_huge(p))
993                 success("OK");
994         else
995                 fail("Fail");
996         validate_memory(p, 0, hpage_pmd_size);
997         munmap(p, hpage_pmd_size);
998 }
999
1000 int main(void)
1001 {
1002         setbuf(stdout, NULL);
1003
1004         page_size = getpagesize();
1005         hpage_pmd_size = read_num("hpage_pmd_size");
1006         hpage_pmd_nr = hpage_pmd_size / page_size;
1007
1008         default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1009         default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1010         default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1011         default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1012
1013         save_settings();
1014         adjust_settings();
1015
1016         alloc_at_fault();
1017         collapse_full();
1018         collapse_empty();
1019         collapse_single_pte_entry();
1020         collapse_max_ptes_none();
1021         collapse_swapin_single_pte();
1022         collapse_max_ptes_swap();
1023         collapse_single_pte_entry_compound();
1024         collapse_full_of_compound();
1025         collapse_compound_extreme();
1026         collapse_fork();
1027         collapse_fork_compound();
1028         collapse_max_ptes_shared();
1029
1030         restore_settings(0);
1031 }