Merge tag 'drm-intel-gt-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm...
[linux-block.git] / tools / testing / selftests / vm / khugepaged.c
1 #define _GNU_SOURCE
2 #include <ctype.h>
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <limits.h>
6 #include <dirent.h>
7 #include <signal.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <stdbool.h>
11 #include <string.h>
12 #include <unistd.h>
13
14 #include <sys/mman.h>
15 #include <sys/wait.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/sysmacros.h>
19 #include <sys/vfs.h>
20
21 #include "linux/magic.h"
22
23 #include "vm_util.h"
24
25 #ifndef MADV_PAGEOUT
26 #define MADV_PAGEOUT 21
27 #endif
28 #ifndef MADV_POPULATE_READ
29 #define MADV_POPULATE_READ 22
30 #endif
31 #ifndef MADV_COLLAPSE
32 #define MADV_COLLAPSE 25
33 #endif
34
35 #define BASE_ADDR ((void *)(1UL << 30))
36 static unsigned long hpage_pmd_size;
37 static unsigned long page_size;
38 static int hpage_pmd_nr;
39
40 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
41 #define PID_SMAPS "/proc/self/smaps"
42 #define TEST_FILE "collapse_test_file"
43
44 #define MAX_LINE_LENGTH 500
45
46 enum vma_type {
47         VMA_ANON,
48         VMA_FILE,
49         VMA_SHMEM,
50 };
51
52 struct mem_ops {
53         void *(*setup_area)(int nr_hpages);
54         void (*cleanup_area)(void *p, unsigned long size);
55         void (*fault)(void *p, unsigned long start, unsigned long end);
56         bool (*check_huge)(void *addr, int nr_hpages);
57         const char *name;
58 };
59
60 static struct mem_ops *file_ops;
61 static struct mem_ops *anon_ops;
62 static struct mem_ops *shmem_ops;
63
64 struct collapse_context {
65         void (*collapse)(const char *msg, char *p, int nr_hpages,
66                          struct mem_ops *ops, bool expect);
67         bool enforce_pte_scan_limits;
68         const char *name;
69 };
70
71 static struct collapse_context *khugepaged_context;
72 static struct collapse_context *madvise_context;
73
74 struct file_info {
75         const char *dir;
76         char path[PATH_MAX];
77         enum vma_type type;
78         int fd;
79         char dev_queue_read_ahead_path[PATH_MAX];
80 };
81
82 static struct file_info finfo;
83
84 enum thp_enabled {
85         THP_ALWAYS,
86         THP_MADVISE,
87         THP_NEVER,
88 };
89
90 static const char *thp_enabled_strings[] = {
91         "always",
92         "madvise",
93         "never",
94         NULL
95 };
96
97 enum thp_defrag {
98         THP_DEFRAG_ALWAYS,
99         THP_DEFRAG_DEFER,
100         THP_DEFRAG_DEFER_MADVISE,
101         THP_DEFRAG_MADVISE,
102         THP_DEFRAG_NEVER,
103 };
104
105 static const char *thp_defrag_strings[] = {
106         "always",
107         "defer",
108         "defer+madvise",
109         "madvise",
110         "never",
111         NULL
112 };
113
114 enum shmem_enabled {
115         SHMEM_ALWAYS,
116         SHMEM_WITHIN_SIZE,
117         SHMEM_ADVISE,
118         SHMEM_NEVER,
119         SHMEM_DENY,
120         SHMEM_FORCE,
121 };
122
123 static const char *shmem_enabled_strings[] = {
124         "always",
125         "within_size",
126         "advise",
127         "never",
128         "deny",
129         "force",
130         NULL
131 };
132
133 struct khugepaged_settings {
134         bool defrag;
135         unsigned int alloc_sleep_millisecs;
136         unsigned int scan_sleep_millisecs;
137         unsigned int max_ptes_none;
138         unsigned int max_ptes_swap;
139         unsigned int max_ptes_shared;
140         unsigned long pages_to_scan;
141 };
142
143 struct settings {
144         enum thp_enabled thp_enabled;
145         enum thp_defrag thp_defrag;
146         enum shmem_enabled shmem_enabled;
147         bool use_zero_page;
148         struct khugepaged_settings khugepaged;
149         unsigned long read_ahead_kb;
150 };
151
152 static struct settings saved_settings;
153 static bool skip_settings_restore;
154
155 static int exit_status;
156
157 static void success(const char *msg)
158 {
159         printf(" \e[32m%s\e[0m\n", msg);
160 }
161
162 static void fail(const char *msg)
163 {
164         printf(" \e[31m%s\e[0m\n", msg);
165         exit_status++;
166 }
167
168 static void skip(const char *msg)
169 {
170         printf(" \e[33m%s\e[0m\n", msg);
171 }
172
173 static int read_file(const char *path, char *buf, size_t buflen)
174 {
175         int fd;
176         ssize_t numread;
177
178         fd = open(path, O_RDONLY);
179         if (fd == -1)
180                 return 0;
181
182         numread = read(fd, buf, buflen - 1);
183         if (numread < 1) {
184                 close(fd);
185                 return 0;
186         }
187
188         buf[numread] = '\0';
189         close(fd);
190
191         return (unsigned int) numread;
192 }
193
194 static int write_file(const char *path, const char *buf, size_t buflen)
195 {
196         int fd;
197         ssize_t numwritten;
198
199         fd = open(path, O_WRONLY);
200         if (fd == -1) {
201                 printf("open(%s)\n", path);
202                 exit(EXIT_FAILURE);
203                 return 0;
204         }
205
206         numwritten = write(fd, buf, buflen - 1);
207         close(fd);
208         if (numwritten < 1) {
209                 printf("write(%s)\n", buf);
210                 exit(EXIT_FAILURE);
211                 return 0;
212         }
213
214         return (unsigned int) numwritten;
215 }
216
217 static int read_string(const char *name, const char *strings[])
218 {
219         char path[PATH_MAX];
220         char buf[256];
221         char *c;
222         int ret;
223
224         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
225         if (ret >= PATH_MAX) {
226                 printf("%s: Pathname is too long\n", __func__);
227                 exit(EXIT_FAILURE);
228         }
229
230         if (!read_file(path, buf, sizeof(buf))) {
231                 perror(path);
232                 exit(EXIT_FAILURE);
233         }
234
235         c = strchr(buf, '[');
236         if (!c) {
237                 printf("%s: Parse failure\n", __func__);
238                 exit(EXIT_FAILURE);
239         }
240
241         c++;
242         memmove(buf, c, sizeof(buf) - (c - buf));
243
244         c = strchr(buf, ']');
245         if (!c) {
246                 printf("%s: Parse failure\n", __func__);
247                 exit(EXIT_FAILURE);
248         }
249         *c = '\0';
250
251         ret = 0;
252         while (strings[ret]) {
253                 if (!strcmp(strings[ret], buf))
254                         return ret;
255                 ret++;
256         }
257
258         printf("Failed to parse %s\n", name);
259         exit(EXIT_FAILURE);
260 }
261
262 static void write_string(const char *name, const char *val)
263 {
264         char path[PATH_MAX];
265         int ret;
266
267         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
268         if (ret >= PATH_MAX) {
269                 printf("%s: Pathname is too long\n", __func__);
270                 exit(EXIT_FAILURE);
271         }
272
273         if (!write_file(path, val, strlen(val) + 1)) {
274                 perror(path);
275                 exit(EXIT_FAILURE);
276         }
277 }
278
279 static const unsigned long _read_num(const char *path)
280 {
281         char buf[21];
282
283         if (read_file(path, buf, sizeof(buf)) < 0) {
284                 perror("read_file(read_num)");
285                 exit(EXIT_FAILURE);
286         }
287
288         return strtoul(buf, NULL, 10);
289 }
290
291 static const unsigned long read_num(const char *name)
292 {
293         char path[PATH_MAX];
294         int ret;
295
296         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
297         if (ret >= PATH_MAX) {
298                 printf("%s: Pathname is too long\n", __func__);
299                 exit(EXIT_FAILURE);
300         }
301         return _read_num(path);
302 }
303
304 static void _write_num(const char *path, unsigned long num)
305 {
306         char buf[21];
307
308         sprintf(buf, "%ld", num);
309         if (!write_file(path, buf, strlen(buf) + 1)) {
310                 perror(path);
311                 exit(EXIT_FAILURE);
312         }
313 }
314
315 static void write_num(const char *name, unsigned long num)
316 {
317         char path[PATH_MAX];
318         int ret;
319
320         ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
321         if (ret >= PATH_MAX) {
322                 printf("%s: Pathname is too long\n", __func__);
323                 exit(EXIT_FAILURE);
324         }
325         _write_num(path, num);
326 }
327
328 static void write_settings(struct settings *settings)
329 {
330         struct khugepaged_settings *khugepaged = &settings->khugepaged;
331
332         write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
333         write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
334         write_string("shmem_enabled",
335                         shmem_enabled_strings[settings->shmem_enabled]);
336         write_num("use_zero_page", settings->use_zero_page);
337
338         write_num("khugepaged/defrag", khugepaged->defrag);
339         write_num("khugepaged/alloc_sleep_millisecs",
340                         khugepaged->alloc_sleep_millisecs);
341         write_num("khugepaged/scan_sleep_millisecs",
342                         khugepaged->scan_sleep_millisecs);
343         write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
344         write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
345         write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
346         write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
347
348         if (file_ops && finfo.type == VMA_FILE)
349                 _write_num(finfo.dev_queue_read_ahead_path,
350                            settings->read_ahead_kb);
351 }
352
353 #define MAX_SETTINGS_DEPTH 4
354 static struct settings settings_stack[MAX_SETTINGS_DEPTH];
355 static int settings_index;
356
357 static struct settings *current_settings(void)
358 {
359         if (!settings_index) {
360                 printf("Fail: No settings set");
361                 exit(EXIT_FAILURE);
362         }
363         return settings_stack + settings_index - 1;
364 }
365
366 static void push_settings(struct settings *settings)
367 {
368         if (settings_index >= MAX_SETTINGS_DEPTH) {
369                 printf("Fail: Settings stack exceeded");
370                 exit(EXIT_FAILURE);
371         }
372         settings_stack[settings_index++] = *settings;
373         write_settings(current_settings());
374 }
375
376 static void pop_settings(void)
377 {
378         if (settings_index <= 0) {
379                 printf("Fail: Settings stack empty");
380                 exit(EXIT_FAILURE);
381         }
382         --settings_index;
383         write_settings(current_settings());
384 }
385
386 static void restore_settings(int sig)
387 {
388         if (skip_settings_restore)
389                 goto out;
390
391         printf("Restore THP and khugepaged settings...");
392         write_settings(&saved_settings);
393         success("OK");
394         if (sig)
395                 exit(EXIT_FAILURE);
396 out:
397         exit(exit_status);
398 }
399
400 static void save_settings(void)
401 {
402         printf("Save THP and khugepaged settings...");
403         saved_settings = (struct settings) {
404                 .thp_enabled = read_string("enabled", thp_enabled_strings),
405                 .thp_defrag = read_string("defrag", thp_defrag_strings),
406                 .shmem_enabled =
407                         read_string("shmem_enabled", shmem_enabled_strings),
408                 .use_zero_page = read_num("use_zero_page"),
409         };
410         saved_settings.khugepaged = (struct khugepaged_settings) {
411                 .defrag = read_num("khugepaged/defrag"),
412                 .alloc_sleep_millisecs =
413                         read_num("khugepaged/alloc_sleep_millisecs"),
414                 .scan_sleep_millisecs =
415                         read_num("khugepaged/scan_sleep_millisecs"),
416                 .max_ptes_none = read_num("khugepaged/max_ptes_none"),
417                 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
418                 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
419                 .pages_to_scan = read_num("khugepaged/pages_to_scan"),
420         };
421         if (file_ops && finfo.type == VMA_FILE)
422                 saved_settings.read_ahead_kb =
423                                 _read_num(finfo.dev_queue_read_ahead_path);
424
425         success("OK");
426
427         signal(SIGTERM, restore_settings);
428         signal(SIGINT, restore_settings);
429         signal(SIGHUP, restore_settings);
430         signal(SIGQUIT, restore_settings);
431 }
432
433 static void get_finfo(const char *dir)
434 {
435         struct stat path_stat;
436         struct statfs fs;
437         char buf[1 << 10];
438         char path[PATH_MAX];
439         char *str, *end;
440
441         finfo.dir = dir;
442         stat(finfo.dir, &path_stat);
443         if (!S_ISDIR(path_stat.st_mode)) {
444                 printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
445                 exit(EXIT_FAILURE);
446         }
447         if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
448                      finfo.dir) >= sizeof(finfo.path)) {
449                 printf("%s: Pathname is too long\n", __func__);
450                 exit(EXIT_FAILURE);
451         }
452         if (statfs(finfo.dir, &fs)) {
453                 perror("statfs()");
454                 exit(EXIT_FAILURE);
455         }
456         finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
457         if (finfo.type == VMA_SHMEM)
458                 return;
459
460         /* Find owning device's queue/read_ahead_kb control */
461         if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
462                      major(path_stat.st_dev), minor(path_stat.st_dev))
463             >= sizeof(path)) {
464                 printf("%s: Pathname is too long\n", __func__);
465                 exit(EXIT_FAILURE);
466         }
467         if (read_file(path, buf, sizeof(buf)) < 0) {
468                 perror("read_file(read_num)");
469                 exit(EXIT_FAILURE);
470         }
471         if (strstr(buf, "DEVTYPE=disk")) {
472                 /* Found it */
473                 if (snprintf(finfo.dev_queue_read_ahead_path,
474                              sizeof(finfo.dev_queue_read_ahead_path),
475                              "/sys/dev/block/%d:%d/queue/read_ahead_kb",
476                              major(path_stat.st_dev), minor(path_stat.st_dev))
477                     >= sizeof(finfo.dev_queue_read_ahead_path)) {
478                         printf("%s: Pathname is too long\n", __func__);
479                         exit(EXIT_FAILURE);
480                 }
481                 return;
482         }
483         if (!strstr(buf, "DEVTYPE=partition")) {
484                 printf("%s: Unknown device type: %s\n", __func__, path);
485                 exit(EXIT_FAILURE);
486         }
487         /*
488          * Partition of block device - need to find actual device.
489          * Using naming convention that devnameN is partition of
490          * device devname.
491          */
492         str = strstr(buf, "DEVNAME=");
493         if (!str) {
494                 printf("%s: Could not read: %s", __func__, path);
495                 exit(EXIT_FAILURE);
496         }
497         str += 8;
498         end = str;
499         while (*end) {
500                 if (isdigit(*end)) {
501                         *end = '\0';
502                         if (snprintf(finfo.dev_queue_read_ahead_path,
503                                      sizeof(finfo.dev_queue_read_ahead_path),
504                                      "/sys/block/%s/queue/read_ahead_kb",
505                                      str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
506                                 printf("%s: Pathname is too long\n", __func__);
507                                 exit(EXIT_FAILURE);
508                         }
509                         return;
510                 }
511                 ++end;
512         }
513         printf("%s: Could not read: %s\n", __func__, path);
514         exit(EXIT_FAILURE);
515 }
516
517 static bool check_swap(void *addr, unsigned long size)
518 {
519         bool swap = false;
520         int ret;
521         FILE *fp;
522         char buffer[MAX_LINE_LENGTH];
523         char addr_pattern[MAX_LINE_LENGTH];
524
525         ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
526                        (unsigned long) addr);
527         if (ret >= MAX_LINE_LENGTH) {
528                 printf("%s: Pattern is too long\n", __func__);
529                 exit(EXIT_FAILURE);
530         }
531
532
533         fp = fopen(PID_SMAPS, "r");
534         if (!fp) {
535                 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
536                 exit(EXIT_FAILURE);
537         }
538         if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
539                 goto err_out;
540
541         ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
542                        size >> 10);
543         if (ret >= MAX_LINE_LENGTH) {
544                 printf("%s: Pattern is too long\n", __func__);
545                 exit(EXIT_FAILURE);
546         }
547         /*
548          * Fetch the Swap: in the same block and check whether it got
549          * the expected number of hugeepages next.
550          */
551         if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
552                 goto err_out;
553
554         if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
555                 goto err_out;
556
557         swap = true;
558 err_out:
559         fclose(fp);
560         return swap;
561 }
562
563 static void *alloc_mapping(int nr)
564 {
565         void *p;
566
567         p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
568                  MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
569         if (p != BASE_ADDR) {
570                 printf("Failed to allocate VMA at %p\n", BASE_ADDR);
571                 exit(EXIT_FAILURE);
572         }
573
574         return p;
575 }
576
577 static void fill_memory(int *p, unsigned long start, unsigned long end)
578 {
579         int i;
580
581         for (i = start / page_size; i < end / page_size; i++)
582                 p[i * page_size / sizeof(*p)] = i + 0xdead0000;
583 }
584
585 /*
586  * MADV_COLLAPSE is a best-effort request and may fail if an internal
587  * resource is temporarily unavailable, in which case it will set errno to
588  * EAGAIN.  In such a case, immediately reattempt the operation one more
589  * time.
590  */
591 static int madvise_collapse_retry(void *p, unsigned long size)
592 {
593         bool retry = true;
594         int ret;
595
596 retry:
597         ret = madvise(p, size, MADV_COLLAPSE);
598         if (ret && errno == EAGAIN && retry) {
599                 retry = false;
600                 goto retry;
601         }
602         return ret;
603 }
604
605 /*
606  * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
607  * validate_memory()'able contents.
608  */
609 static void *alloc_hpage(struct mem_ops *ops)
610 {
611         void *p = ops->setup_area(1);
612
613         ops->fault(p, 0, hpage_pmd_size);
614
615         /*
616          * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
617          * The latter is ineligible for collapse by MADV_COLLAPSE
618          * while the former might cause MADV_COLLAPSE to race with
619          * khugepaged on low-load system (like a test machine), which
620          * would cause MADV_COLLAPSE to fail with EAGAIN.
621          */
622         printf("Allocate huge page...");
623         if (madvise_collapse_retry(p, hpage_pmd_size)) {
624                 perror("madvise(MADV_COLLAPSE)");
625                 exit(EXIT_FAILURE);
626         }
627         if (!ops->check_huge(p, 1)) {
628                 perror("madvise(MADV_COLLAPSE)");
629                 exit(EXIT_FAILURE);
630         }
631         if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
632                 perror("madvise(MADV_HUGEPAGE)");
633                 exit(EXIT_FAILURE);
634         }
635         success("OK");
636         return p;
637 }
638
639 static void validate_memory(int *p, unsigned long start, unsigned long end)
640 {
641         int i;
642
643         for (i = start / page_size; i < end / page_size; i++) {
644                 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
645                         printf("Page %d is corrupted: %#x\n",
646                                         i, p[i * page_size / sizeof(*p)]);
647                         exit(EXIT_FAILURE);
648                 }
649         }
650 }
651
652 static void *anon_setup_area(int nr_hpages)
653 {
654         return alloc_mapping(nr_hpages);
655 }
656
657 static void anon_cleanup_area(void *p, unsigned long size)
658 {
659         munmap(p, size);
660 }
661
662 static void anon_fault(void *p, unsigned long start, unsigned long end)
663 {
664         fill_memory(p, start, end);
665 }
666
667 static bool anon_check_huge(void *addr, int nr_hpages)
668 {
669         return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
670 }
671
672 static void *file_setup_area(int nr_hpages)
673 {
674         int fd;
675         void *p;
676         unsigned long size;
677
678         unlink(finfo.path);  /* Cleanup from previous failed tests */
679         printf("Creating %s for collapse%s...", finfo.path,
680                finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
681         fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
682                   777);
683         if (fd < 0) {
684                 perror("open()");
685                 exit(EXIT_FAILURE);
686         }
687
688         size = nr_hpages * hpage_pmd_size;
689         p = alloc_mapping(nr_hpages);
690         fill_memory(p, 0, size);
691         write(fd, p, size);
692         close(fd);
693         munmap(p, size);
694         success("OK");
695
696         printf("Opening %s read only for collapse...", finfo.path);
697         finfo.fd = open(finfo.path, O_RDONLY, 777);
698         if (finfo.fd < 0) {
699                 perror("open()");
700                 exit(EXIT_FAILURE);
701         }
702         p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
703                  MAP_PRIVATE, finfo.fd, 0);
704         if (p == MAP_FAILED || p != BASE_ADDR) {
705                 perror("mmap()");
706                 exit(EXIT_FAILURE);
707         }
708
709         /* Drop page cache */
710         write_file("/proc/sys/vm/drop_caches", "3", 2);
711         success("OK");
712         return p;
713 }
714
715 static void file_cleanup_area(void *p, unsigned long size)
716 {
717         munmap(p, size);
718         close(finfo.fd);
719         unlink(finfo.path);
720 }
721
722 static void file_fault(void *p, unsigned long start, unsigned long end)
723 {
724         if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
725                 perror("madvise(MADV_POPULATE_READ");
726                 exit(EXIT_FAILURE);
727         }
728 }
729
730 static bool file_check_huge(void *addr, int nr_hpages)
731 {
732         switch (finfo.type) {
733         case VMA_FILE:
734                 return check_huge_file(addr, nr_hpages, hpage_pmd_size);
735         case VMA_SHMEM:
736                 return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
737         default:
738                 exit(EXIT_FAILURE);
739                 return false;
740         }
741 }
742
743 static void *shmem_setup_area(int nr_hpages)
744 {
745         void *p;
746         unsigned long size = nr_hpages * hpage_pmd_size;
747
748         finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
749         if (finfo.fd < 0)  {
750                 perror("memfd_create()");
751                 exit(EXIT_FAILURE);
752         }
753         if (ftruncate(finfo.fd, size)) {
754                 perror("ftruncate()");
755                 exit(EXIT_FAILURE);
756         }
757         p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
758                  0);
759         if (p != BASE_ADDR) {
760                 perror("mmap()");
761                 exit(EXIT_FAILURE);
762         }
763         return p;
764 }
765
766 static void shmem_cleanup_area(void *p, unsigned long size)
767 {
768         munmap(p, size);
769         close(finfo.fd);
770 }
771
772 static bool shmem_check_huge(void *addr, int nr_hpages)
773 {
774         return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
775 }
776
777 static struct mem_ops __anon_ops = {
778         .setup_area = &anon_setup_area,
779         .cleanup_area = &anon_cleanup_area,
780         .fault = &anon_fault,
781         .check_huge = &anon_check_huge,
782         .name = "anon",
783 };
784
785 static struct mem_ops __file_ops = {
786         .setup_area = &file_setup_area,
787         .cleanup_area = &file_cleanup_area,
788         .fault = &file_fault,
789         .check_huge = &file_check_huge,
790         .name = "file",
791 };
792
793 static struct mem_ops __shmem_ops = {
794         .setup_area = &shmem_setup_area,
795         .cleanup_area = &shmem_cleanup_area,
796         .fault = &anon_fault,
797         .check_huge = &shmem_check_huge,
798         .name = "shmem",
799 };
800
801 static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
802                                struct mem_ops *ops, bool expect)
803 {
804         int ret;
805         struct settings settings = *current_settings();
806
807         printf("%s...", msg);
808
809         /*
810          * Prevent khugepaged interference and tests that MADV_COLLAPSE
811          * ignores /sys/kernel/mm/transparent_hugepage/enabled
812          */
813         settings.thp_enabled = THP_NEVER;
814         settings.shmem_enabled = SHMEM_NEVER;
815         push_settings(&settings);
816
817         /* Clear VM_NOHUGEPAGE */
818         madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
819         ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
820         if (((bool)ret) == expect)
821                 fail("Fail: Bad return value");
822         else if (!ops->check_huge(p, expect ? nr_hpages : 0))
823                 fail("Fail: check_huge()");
824         else
825                 success("OK");
826
827         pop_settings();
828 }
829
830 static void madvise_collapse(const char *msg, char *p, int nr_hpages,
831                              struct mem_ops *ops, bool expect)
832 {
833         /* Sanity check */
834         if (!ops->check_huge(p, 0)) {
835                 printf("Unexpected huge page\n");
836                 exit(EXIT_FAILURE);
837         }
838         __madvise_collapse(msg, p, nr_hpages, ops, expect);
839 }
840
841 #define TICK 500000
842 static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
843                           struct mem_ops *ops)
844 {
845         int full_scans;
846         int timeout = 6; /* 3 seconds */
847
848         /* Sanity check */
849         if (!ops->check_huge(p, 0)) {
850                 printf("Unexpected huge page\n");
851                 exit(EXIT_FAILURE);
852         }
853
854         madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
855
856         /* Wait until the second full_scan completed */
857         full_scans = read_num("khugepaged/full_scans") + 2;
858
859         printf("%s...", msg);
860         while (timeout--) {
861                 if (ops->check_huge(p, nr_hpages))
862                         break;
863                 if (read_num("khugepaged/full_scans") >= full_scans)
864                         break;
865                 printf(".");
866                 usleep(TICK);
867         }
868
869         madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
870
871         return timeout == -1;
872 }
873
874 static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
875                                 struct mem_ops *ops, bool expect)
876 {
877         if (wait_for_scan(msg, p, nr_hpages, ops)) {
878                 if (expect)
879                         fail("Timeout");
880                 else
881                         success("OK");
882                 return;
883         }
884
885         /*
886          * For file and shmem memory, khugepaged only retracts pte entries after
887          * putting the new hugepage in the page cache. The hugepage must be
888          * subsequently refaulted to install the pmd mapping for the mm.
889          */
890         if (ops != &__anon_ops)
891                 ops->fault(p, 0, nr_hpages * hpage_pmd_size);
892
893         if (ops->check_huge(p, expect ? nr_hpages : 0))
894                 success("OK");
895         else
896                 fail("Fail");
897 }
898
899 static struct collapse_context __khugepaged_context = {
900         .collapse = &khugepaged_collapse,
901         .enforce_pte_scan_limits = true,
902         .name = "khugepaged",
903 };
904
905 static struct collapse_context __madvise_context = {
906         .collapse = &madvise_collapse,
907         .enforce_pte_scan_limits = false,
908         .name = "madvise",
909 };
910
911 static bool is_tmpfs(struct mem_ops *ops)
912 {
913         return ops == &__file_ops && finfo.type == VMA_SHMEM;
914 }
915
916 static void alloc_at_fault(void)
917 {
918         struct settings settings = *current_settings();
919         char *p;
920
921         settings.thp_enabled = THP_ALWAYS;
922         push_settings(&settings);
923
924         p = alloc_mapping(1);
925         *p = 1;
926         printf("Allocate huge page on fault...");
927         if (check_huge_anon(p, 1, hpage_pmd_size))
928                 success("OK");
929         else
930                 fail("Fail");
931
932         pop_settings();
933
934         madvise(p, page_size, MADV_DONTNEED);
935         printf("Split huge PMD on MADV_DONTNEED...");
936         if (check_huge_anon(p, 0, hpage_pmd_size))
937                 success("OK");
938         else
939                 fail("Fail");
940         munmap(p, hpage_pmd_size);
941 }
942
943 static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
944 {
945         void *p;
946         int nr_hpages = 4;
947         unsigned long size = nr_hpages * hpage_pmd_size;
948
949         p = ops->setup_area(nr_hpages);
950         ops->fault(p, 0, size);
951         c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
952                     ops, true);
953         validate_memory(p, 0, size);
954         ops->cleanup_area(p, size);
955 }
956
957 static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
958 {
959         void *p;
960
961         p = ops->setup_area(1);
962         c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
963         ops->cleanup_area(p, hpage_pmd_size);
964 }
965
966 static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
967 {
968         void *p;
969
970         p = ops->setup_area(1);
971         ops->fault(p, 0, page_size);
972         c->collapse("Collapse PTE table with single PTE entry present", p,
973                     1, ops, true);
974         ops->cleanup_area(p, hpage_pmd_size);
975 }
976
977 static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
978 {
979         int max_ptes_none = hpage_pmd_nr / 2;
980         struct settings settings = *current_settings();
981         void *p;
982
983         settings.khugepaged.max_ptes_none = max_ptes_none;
984         push_settings(&settings);
985
986         p = ops->setup_area(1);
987
988         if (is_tmpfs(ops)) {
989                 /* shmem pages always in the page cache */
990                 printf("tmpfs...");
991                 skip("Skip");
992                 goto skip;
993         }
994
995         ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
996         c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
997                     ops, !c->enforce_pte_scan_limits);
998         validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
999
1000         if (c->enforce_pte_scan_limits) {
1001                 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
1002                 c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
1003                             true);
1004                 validate_memory(p, 0,
1005                                 (hpage_pmd_nr - max_ptes_none) * page_size);
1006         }
1007 skip:
1008         ops->cleanup_area(p, hpage_pmd_size);
1009         pop_settings();
1010 }
1011
1012 static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
1013 {
1014         void *p;
1015
1016         p = ops->setup_area(1);
1017         ops->fault(p, 0, hpage_pmd_size);
1018
1019         printf("Swapout one page...");
1020         if (madvise(p, page_size, MADV_PAGEOUT)) {
1021                 perror("madvise(MADV_PAGEOUT)");
1022                 exit(EXIT_FAILURE);
1023         }
1024         if (check_swap(p, page_size)) {
1025                 success("OK");
1026         } else {
1027                 fail("Fail");
1028                 goto out;
1029         }
1030
1031         c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
1032                     true);
1033         validate_memory(p, 0, hpage_pmd_size);
1034 out:
1035         ops->cleanup_area(p, hpage_pmd_size);
1036 }
1037
1038 static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
1039 {
1040         int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
1041         void *p;
1042
1043         p = ops->setup_area(1);
1044         ops->fault(p, 0, hpage_pmd_size);
1045
1046         printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
1047         if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
1048                 perror("madvise(MADV_PAGEOUT)");
1049                 exit(EXIT_FAILURE);
1050         }
1051         if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
1052                 success("OK");
1053         } else {
1054                 fail("Fail");
1055                 goto out;
1056         }
1057
1058         c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
1059                     !c->enforce_pte_scan_limits);
1060         validate_memory(p, 0, hpage_pmd_size);
1061
1062         if (c->enforce_pte_scan_limits) {
1063                 ops->fault(p, 0, hpage_pmd_size);
1064                 printf("Swapout %d of %d pages...", max_ptes_swap,
1065                        hpage_pmd_nr);
1066                 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
1067                         perror("madvise(MADV_PAGEOUT)");
1068                         exit(EXIT_FAILURE);
1069                 }
1070                 if (check_swap(p, max_ptes_swap * page_size)) {
1071                         success("OK");
1072                 } else {
1073                         fail("Fail");
1074                         goto out;
1075                 }
1076
1077                 c->collapse("Collapse with max_ptes_swap pages swapped out", p,
1078                             1, ops, true);
1079                 validate_memory(p, 0, hpage_pmd_size);
1080         }
1081 out:
1082         ops->cleanup_area(p, hpage_pmd_size);
1083 }
1084
1085 static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
1086 {
1087         void *p;
1088
1089         p = alloc_hpage(ops);
1090
1091         if (is_tmpfs(ops)) {
1092                 /* MADV_DONTNEED won't evict tmpfs pages */
1093                 printf("tmpfs...");
1094                 skip("Skip");
1095                 goto skip;
1096         }
1097
1098         madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1099         printf("Split huge page leaving single PTE mapping compound page...");
1100         madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
1101         if (ops->check_huge(p, 0))
1102                 success("OK");
1103         else
1104                 fail("Fail");
1105
1106         c->collapse("Collapse PTE table with single PTE mapping compound page",
1107                     p, 1, ops, true);
1108         validate_memory(p, 0, page_size);
1109 skip:
1110         ops->cleanup_area(p, hpage_pmd_size);
1111 }
1112
1113 static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
1114 {
1115         void *p;
1116
1117         p = alloc_hpage(ops);
1118         printf("Split huge page leaving single PTE page table full of compound pages...");
1119         madvise(p, page_size, MADV_NOHUGEPAGE);
1120         madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1121         if (ops->check_huge(p, 0))
1122                 success("OK");
1123         else
1124                 fail("Fail");
1125
1126         c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
1127                     true);
1128         validate_memory(p, 0, hpage_pmd_size);
1129         ops->cleanup_area(p, hpage_pmd_size);
1130 }
1131
1132 static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
1133 {
1134         void *p;
1135         int i;
1136
1137         p = ops->setup_area(1);
1138         for (i = 0; i < hpage_pmd_nr; i++) {
1139                 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
1140                                 i + 1, hpage_pmd_nr);
1141
1142                 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
1143                 ops->fault(BASE_ADDR, 0, hpage_pmd_size);
1144                 if (!ops->check_huge(BASE_ADDR, 1)) {
1145                         printf("Failed to allocate huge page\n");
1146                         exit(EXIT_FAILURE);
1147                 }
1148                 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
1149
1150                 p = mremap(BASE_ADDR - i * page_size,
1151                                 i * page_size + hpage_pmd_size,
1152                                 (i + 1) * page_size,
1153                                 MREMAP_MAYMOVE | MREMAP_FIXED,
1154                                 BASE_ADDR + 2 * hpage_pmd_size);
1155                 if (p == MAP_FAILED) {
1156                         perror("mremap+unmap");
1157                         exit(EXIT_FAILURE);
1158                 }
1159
1160                 p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
1161                                 (i + 1) * page_size,
1162                                 (i + 1) * page_size + hpage_pmd_size,
1163                                 MREMAP_MAYMOVE | MREMAP_FIXED,
1164                                 BASE_ADDR - (i + 1) * page_size);
1165                 if (p == MAP_FAILED) {
1166                         perror("mremap+alloc");
1167                         exit(EXIT_FAILURE);
1168                 }
1169         }
1170
1171         ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
1172         ops->fault(p, 0, hpage_pmd_size);
1173         if (!ops->check_huge(p, 1))
1174                 success("OK");
1175         else
1176                 fail("Fail");
1177
1178         c->collapse("Collapse PTE table full of different compound pages", p, 1,
1179                     ops, true);
1180
1181         validate_memory(p, 0, hpage_pmd_size);
1182         ops->cleanup_area(p, hpage_pmd_size);
1183 }
1184
1185 static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
1186 {
1187         int wstatus;
1188         void *p;
1189
1190         p = ops->setup_area(1);
1191
1192         printf("Allocate small page...");
1193         ops->fault(p, 0, page_size);
1194         if (ops->check_huge(p, 0))
1195                 success("OK");
1196         else
1197                 fail("Fail");
1198
1199         printf("Share small page over fork()...");
1200         if (!fork()) {
1201                 /* Do not touch settings on child exit */
1202                 skip_settings_restore = true;
1203                 exit_status = 0;
1204
1205                 if (ops->check_huge(p, 0))
1206                         success("OK");
1207                 else
1208                         fail("Fail");
1209
1210                 ops->fault(p, page_size, 2 * page_size);
1211                 c->collapse("Collapse PTE table with single page shared with parent process",
1212                             p, 1, ops, true);
1213
1214                 validate_memory(p, 0, page_size);
1215                 ops->cleanup_area(p, hpage_pmd_size);
1216                 exit(exit_status);
1217         }
1218
1219         wait(&wstatus);
1220         exit_status += WEXITSTATUS(wstatus);
1221
1222         printf("Check if parent still has small page...");
1223         if (ops->check_huge(p, 0))
1224                 success("OK");
1225         else
1226                 fail("Fail");
1227         validate_memory(p, 0, page_size);
1228         ops->cleanup_area(p, hpage_pmd_size);
1229 }
1230
1231 static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
1232 {
1233         int wstatus;
1234         void *p;
1235
1236         p = alloc_hpage(ops);
1237         printf("Share huge page over fork()...");
1238         if (!fork()) {
1239                 /* Do not touch settings on child exit */
1240                 skip_settings_restore = true;
1241                 exit_status = 0;
1242
1243                 if (ops->check_huge(p, 1))
1244                         success("OK");
1245                 else
1246                         fail("Fail");
1247
1248                 printf("Split huge page PMD in child process...");
1249                 madvise(p, page_size, MADV_NOHUGEPAGE);
1250                 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1251                 if (ops->check_huge(p, 0))
1252                         success("OK");
1253                 else
1254                         fail("Fail");
1255                 ops->fault(p, 0, page_size);
1256
1257                 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
1258                 c->collapse("Collapse PTE table full of compound pages in child",
1259                             p, 1, ops, true);
1260                 write_num("khugepaged/max_ptes_shared",
1261                           current_settings()->khugepaged.max_ptes_shared);
1262
1263                 validate_memory(p, 0, hpage_pmd_size);
1264                 ops->cleanup_area(p, hpage_pmd_size);
1265                 exit(exit_status);
1266         }
1267
1268         wait(&wstatus);
1269         exit_status += WEXITSTATUS(wstatus);
1270
1271         printf("Check if parent still has huge page...");
1272         if (ops->check_huge(p, 1))
1273                 success("OK");
1274         else
1275                 fail("Fail");
1276         validate_memory(p, 0, hpage_pmd_size);
1277         ops->cleanup_area(p, hpage_pmd_size);
1278 }
1279
1280 static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
1281 {
1282         int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
1283         int wstatus;
1284         void *p;
1285
1286         p = alloc_hpage(ops);
1287         printf("Share huge page over fork()...");
1288         if (!fork()) {
1289                 /* Do not touch settings on child exit */
1290                 skip_settings_restore = true;
1291                 exit_status = 0;
1292
1293                 if (ops->check_huge(p, 1))
1294                         success("OK");
1295                 else
1296                         fail("Fail");
1297
1298                 printf("Trigger CoW on page %d of %d...",
1299                                 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
1300                 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
1301                 if (ops->check_huge(p, 0))
1302                         success("OK");
1303                 else
1304                         fail("Fail");
1305
1306                 c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
1307                             1, ops, !c->enforce_pte_scan_limits);
1308
1309                 if (c->enforce_pte_scan_limits) {
1310                         printf("Trigger CoW on page %d of %d...",
1311                                hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
1312                         ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
1313                                     page_size);
1314                         if (ops->check_huge(p, 0))
1315                                 success("OK");
1316                         else
1317                                 fail("Fail");
1318
1319                         c->collapse("Collapse with max_ptes_shared PTEs shared",
1320                                     p, 1, ops, true);
1321                 }
1322
1323                 validate_memory(p, 0, hpage_pmd_size);
1324                 ops->cleanup_area(p, hpage_pmd_size);
1325                 exit(exit_status);
1326         }
1327
1328         wait(&wstatus);
1329         exit_status += WEXITSTATUS(wstatus);
1330
1331         printf("Check if parent still has huge page...");
1332         if (ops->check_huge(p, 1))
1333                 success("OK");
1334         else
1335                 fail("Fail");
1336         validate_memory(p, 0, hpage_pmd_size);
1337         ops->cleanup_area(p, hpage_pmd_size);
1338 }
1339
1340 static void madvise_collapse_existing_thps(struct collapse_context *c,
1341                                            struct mem_ops *ops)
1342 {
1343         void *p;
1344
1345         p = ops->setup_area(1);
1346         ops->fault(p, 0, hpage_pmd_size);
1347         c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
1348         validate_memory(p, 0, hpage_pmd_size);
1349
1350         /* c->collapse() will find a hugepage and complain - call directly. */
1351         __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
1352         validate_memory(p, 0, hpage_pmd_size);
1353         ops->cleanup_area(p, hpage_pmd_size);
1354 }
1355
1356 /*
1357  * Test race with khugepaged where page tables have been retracted and
1358  * pmd cleared.
1359  */
1360 static void madvise_retracted_page_tables(struct collapse_context *c,
1361                                           struct mem_ops *ops)
1362 {
1363         void *p;
1364         int nr_hpages = 1;
1365         unsigned long size = nr_hpages * hpage_pmd_size;
1366
1367         p = ops->setup_area(nr_hpages);
1368         ops->fault(p, 0, size);
1369
1370         /* Let khugepaged collapse and leave pmd cleared */
1371         if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
1372                           ops)) {
1373                 fail("Timeout");
1374                 return;
1375         }
1376         success("OK");
1377         c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
1378                     true);
1379         validate_memory(p, 0, size);
1380         ops->cleanup_area(p, size);
1381 }
1382
1383 static void usage(void)
1384 {
1385         fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n");
1386         fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
1387         fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
1388         fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
1389         fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
1390         fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
1391         fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
1392         fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
1393         fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n");
1394         exit(1);
1395 }
1396
1397 static void parse_test_type(int argc, const char **argv)
1398 {
1399         char *buf;
1400         const char *token;
1401
1402         if (argc == 1) {
1403                 /* Backwards compatibility */
1404                 khugepaged_context =  &__khugepaged_context;
1405                 madvise_context =  &__madvise_context;
1406                 anon_ops = &__anon_ops;
1407                 return;
1408         }
1409
1410         buf = strdup(argv[1]);
1411         token = strsep(&buf, ":");
1412
1413         if (!strcmp(token, "all")) {
1414                 khugepaged_context =  &__khugepaged_context;
1415                 madvise_context =  &__madvise_context;
1416         } else if (!strcmp(token, "khugepaged")) {
1417                 khugepaged_context =  &__khugepaged_context;
1418         } else if (!strcmp(token, "madvise")) {
1419                 madvise_context =  &__madvise_context;
1420         } else {
1421                 usage();
1422         }
1423
1424         if (!buf)
1425                 usage();
1426
1427         if (!strcmp(buf, "all")) {
1428                 file_ops =  &__file_ops;
1429                 anon_ops = &__anon_ops;
1430                 shmem_ops = &__shmem_ops;
1431         } else if (!strcmp(buf, "anon")) {
1432                 anon_ops = &__anon_ops;
1433         } else if (!strcmp(buf, "file")) {
1434                 file_ops =  &__file_ops;
1435         } else if (!strcmp(buf, "shmem")) {
1436                 shmem_ops = &__shmem_ops;
1437         } else {
1438                 usage();
1439         }
1440
1441         if (!file_ops)
1442                 return;
1443
1444         if (argc != 3)
1445                 usage();
1446 }
1447
1448 int main(int argc, const char **argv)
1449 {
1450         struct settings default_settings = {
1451                 .thp_enabled = THP_MADVISE,
1452                 .thp_defrag = THP_DEFRAG_ALWAYS,
1453                 .shmem_enabled = SHMEM_ADVISE,
1454                 .use_zero_page = 0,
1455                 .khugepaged = {
1456                         .defrag = 1,
1457                         .alloc_sleep_millisecs = 10,
1458                         .scan_sleep_millisecs = 10,
1459                 },
1460                 /*
1461                  * When testing file-backed memory, the collapse path
1462                  * looks at how many pages are found in the page cache, not
1463                  * what pages are mapped. Disable read ahead optimization so
1464                  * pages don't find their way into the page cache unless
1465                  * we mem_ops->fault() them in.
1466                  */
1467                 .read_ahead_kb = 0,
1468         };
1469
1470         parse_test_type(argc, argv);
1471
1472         if (file_ops)
1473                 get_finfo(argv[2]);
1474
1475         setbuf(stdout, NULL);
1476
1477         page_size = getpagesize();
1478         hpage_pmd_size = read_pmd_pagesize();
1479         hpage_pmd_nr = hpage_pmd_size / page_size;
1480
1481         default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1482         default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1483         default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1484         default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1485
1486         save_settings();
1487         push_settings(&default_settings);
1488
1489         alloc_at_fault();
1490
1491 #define TEST(t, c, o) do { \
1492         if (c && o) { \
1493                 printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
1494                 t(c, o); \
1495         } \
1496         } while (0)
1497
1498         TEST(collapse_full, khugepaged_context, anon_ops);
1499         TEST(collapse_full, khugepaged_context, file_ops);
1500         TEST(collapse_full, khugepaged_context, shmem_ops);
1501         TEST(collapse_full, madvise_context, anon_ops);
1502         TEST(collapse_full, madvise_context, file_ops);
1503         TEST(collapse_full, madvise_context, shmem_ops);
1504
1505         TEST(collapse_empty, khugepaged_context, anon_ops);
1506         TEST(collapse_empty, madvise_context, anon_ops);
1507
1508         TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
1509         TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
1510         TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
1511         TEST(collapse_single_pte_entry, madvise_context, anon_ops);
1512         TEST(collapse_single_pte_entry, madvise_context, file_ops);
1513         TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
1514
1515         TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
1516         TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
1517         TEST(collapse_max_ptes_none, madvise_context, anon_ops);
1518         TEST(collapse_max_ptes_none, madvise_context, file_ops);
1519
1520         TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
1521         TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
1522         TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
1523         TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
1524
1525         TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
1526         TEST(collapse_full_of_compound, khugepaged_context, file_ops);
1527         TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
1528         TEST(collapse_full_of_compound, madvise_context, anon_ops);
1529         TEST(collapse_full_of_compound, madvise_context, file_ops);
1530         TEST(collapse_full_of_compound, madvise_context, shmem_ops);
1531
1532         TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
1533         TEST(collapse_compound_extreme, madvise_context, anon_ops);
1534
1535         TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
1536         TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
1537
1538         TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
1539         TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
1540
1541         TEST(collapse_fork, khugepaged_context, anon_ops);
1542         TEST(collapse_fork, madvise_context, anon_ops);
1543
1544         TEST(collapse_fork_compound, khugepaged_context, anon_ops);
1545         TEST(collapse_fork_compound, madvise_context, anon_ops);
1546
1547         TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
1548         TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
1549
1550         TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
1551         TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
1552         TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
1553
1554         TEST(madvise_retracted_page_tables, madvise_context, file_ops);
1555         TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
1556
1557         restore_settings(0);
1558 }