Merge tag 'i3c/for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux
[linux-block.git] / tools / testing / selftests / cgroup / test_zswap.c
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <signal.h>
8 #include <sys/sysinfo.h>
9 #include <string.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
15
16 static int read_int(const char *path, size_t *value)
17 {
18         FILE *file;
19         int ret = 0;
20
21         file = fopen(path, "r");
22         if (!file)
23                 return -1;
24         if (fscanf(file, "%ld", value) != 1)
25                 ret = -1;
26         fclose(file);
27         return ret;
28 }
29
30 static int set_min_free_kb(size_t value)
31 {
32         FILE *file;
33         int ret;
34
35         file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36         if (!file)
37                 return -1;
38         ret = fprintf(file, "%ld\n", value);
39         fclose(file);
40         return ret;
41 }
42
43 static int read_min_free_kb(size_t *value)
44 {
45         return read_int("/proc/sys/vm/min_free_kbytes", value);
46 }
47
48 static int get_zswap_stored_pages(size_t *value)
49 {
50         return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51 }
52
53 static int get_cg_wb_count(const char *cg)
54 {
55         return cg_read_key_long(cg, "memory.stat", "zswpwb");
56 }
57
58 static long get_zswpout(const char *cgroup)
59 {
60         return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61 }
62
63 static int allocate_and_read_bytes(const char *cgroup, void *arg)
64 {
65         size_t size = (size_t)arg;
66         char *mem = (char *)malloc(size);
67         int ret = 0;
68
69         if (!mem)
70                 return -1;
71         for (int i = 0; i < size; i += 4095)
72                 mem[i] = 'a';
73
74         /* Go through the allocated memory to (z)swap in and out pages */
75         for (int i = 0; i < size; i += 4095) {
76                 if (mem[i] != 'a')
77                         ret = -1;
78         }
79
80         free(mem);
81         return ret;
82 }
83
84 static int allocate_bytes(const char *cgroup, void *arg)
85 {
86         size_t size = (size_t)arg;
87         char *mem = (char *)malloc(size);
88
89         if (!mem)
90                 return -1;
91         for (int i = 0; i < size; i += 4095)
92                 mem[i] = 'a';
93         free(mem);
94         return 0;
95 }
96
97 static char *setup_test_group_1M(const char *root, const char *name)
98 {
99         char *group_name = cg_name(root, name);
100
101         if (!group_name)
102                 return NULL;
103         if (cg_create(group_name))
104                 goto fail;
105         if (cg_write(group_name, "memory.max", "1M")) {
106                 cg_destroy(group_name);
107                 goto fail;
108         }
109         return group_name;
110 fail:
111         free(group_name);
112         return NULL;
113 }
114
115 /*
116  * Sanity test to check that pages are written into zswap.
117  */
118 static int test_zswap_usage(const char *root)
119 {
120         long zswpout_before, zswpout_after;
121         int ret = KSFT_FAIL;
122         char *test_group;
123
124         test_group = cg_name(root, "no_shrink_test");
125         if (!test_group)
126                 goto out;
127         if (cg_create(test_group))
128                 goto out;
129         if (cg_write(test_group, "memory.max", "1M"))
130                 goto out;
131
132         zswpout_before = get_zswpout(test_group);
133         if (zswpout_before < 0) {
134                 ksft_print_msg("Failed to get zswpout\n");
135                 goto out;
136         }
137
138         /* Allocate more than memory.max to push memory into zswap */
139         if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
140                 goto out;
141
142         /* Verify that pages come into zswap */
143         zswpout_after = get_zswpout(test_group);
144         if (zswpout_after <= zswpout_before) {
145                 ksft_print_msg("zswpout does not increase after test program\n");
146                 goto out;
147         }
148         ret = KSFT_PASS;
149
150 out:
151         cg_destroy(test_group);
152         free(test_group);
153         return ret;
154 }
155
156 /*
157  * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158  * the cgroup.
159  */
160 static int test_swapin_nozswap(const char *root)
161 {
162         int ret = KSFT_FAIL;
163         char *test_group;
164         long swap_peak, zswpout;
165
166         test_group = cg_name(root, "no_zswap_test");
167         if (!test_group)
168                 goto out;
169         if (cg_create(test_group))
170                 goto out;
171         if (cg_write(test_group, "memory.max", "8M"))
172                 goto out;
173         if (cg_write(test_group, "memory.zswap.max", "0"))
174                 goto out;
175
176         /* Allocate and read more than memory.max to trigger swapin */
177         if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
178                 goto out;
179
180         /* Verify that pages are swapped out, but no zswap happened */
181         swap_peak = cg_read_long(test_group, "memory.swap.peak");
182         if (swap_peak < 0) {
183                 ksft_print_msg("failed to get cgroup's swap_peak\n");
184                 goto out;
185         }
186
187         if (swap_peak < MB(24)) {
188                 ksft_print_msg("at least 24MB of memory should be swapped out\n");
189                 goto out;
190         }
191
192         zswpout = get_zswpout(test_group);
193         if (zswpout < 0) {
194                 ksft_print_msg("failed to get zswpout\n");
195                 goto out;
196         }
197
198         if (zswpout > 0) {
199                 ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
200                 goto out;
201         }
202
203         ret = KSFT_PASS;
204
205 out:
206         cg_destroy(test_group);
207         free(test_group);
208         return ret;
209 }
210
211 /* Simple test to verify the (z)swapin code paths */
212 static int test_zswapin(const char *root)
213 {
214         int ret = KSFT_FAIL;
215         char *test_group;
216         long zswpin;
217
218         test_group = cg_name(root, "zswapin_test");
219         if (!test_group)
220                 goto out;
221         if (cg_create(test_group))
222                 goto out;
223         if (cg_write(test_group, "memory.max", "8M"))
224                 goto out;
225         if (cg_write(test_group, "memory.zswap.max", "max"))
226                 goto out;
227
228         /* Allocate and read more than memory.max to trigger (z)swap in */
229         if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
230                 goto out;
231
232         zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
233         if (zswpin < 0) {
234                 ksft_print_msg("failed to get zswpin\n");
235                 goto out;
236         }
237
238         if (zswpin < MB(24) / PAGE_SIZE) {
239                 ksft_print_msg("at least 24MB should be brought back from zswap\n");
240                 goto out;
241         }
242
243         ret = KSFT_PASS;
244
245 out:
246         cg_destroy(test_group);
247         free(test_group);
248         return ret;
249 }
250
251 /*
252  * When trying to store a memcg page in zswap, if the memcg hits its memory
253  * limit in zswap, writeback should affect only the zswapped pages of that
254  * memcg.
255  */
256 static int test_no_invasive_cgroup_shrink(const char *root)
257 {
258         int ret = KSFT_FAIL;
259         size_t control_allocation_size = MB(10);
260         char *control_allocation, *wb_group = NULL, *control_group = NULL;
261
262         wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
263         if (!wb_group)
264                 return KSFT_FAIL;
265         if (cg_write(wb_group, "memory.zswap.max", "10K"))
266                 goto out;
267         control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
268         if (!control_group)
269                 goto out;
270
271         /* Push some test_group2 memory into zswap */
272         if (cg_enter_current(control_group))
273                 goto out;
274         control_allocation = malloc(control_allocation_size);
275         for (int i = 0; i < control_allocation_size; i += 4095)
276                 control_allocation[i] = 'a';
277         if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
278                 goto out;
279
280         /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
281         if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
282                 goto out;
283
284         /* Verify that only zswapped memory from gwb_group has been written back */
285         if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
286                 ret = KSFT_PASS;
287 out:
288         cg_enter_current(root);
289         if (control_group) {
290                 cg_destroy(control_group);
291                 free(control_group);
292         }
293         cg_destroy(wb_group);
294         free(wb_group);
295         if (control_allocation)
296                 free(control_allocation);
297         return ret;
298 }
299
300 struct no_kmem_bypass_child_args {
301         size_t target_alloc_bytes;
302         size_t child_allocated;
303 };
304
305 static int no_kmem_bypass_child(const char *cgroup, void *arg)
306 {
307         struct no_kmem_bypass_child_args *values = arg;
308         void *allocation;
309
310         allocation = malloc(values->target_alloc_bytes);
311         if (!allocation) {
312                 values->child_allocated = true;
313                 return -1;
314         }
315         for (long i = 0; i < values->target_alloc_bytes; i += 4095)
316                 ((char *)allocation)[i] = 'a';
317         values->child_allocated = true;
318         pause();
319         free(allocation);
320         return 0;
321 }
322
323 /*
324  * When pages owned by a memcg are pushed to zswap by kswapd, they should be
325  * charged to that cgroup. This wasn't the case before commit
326  * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
327  *
328  * The test first allocates memory in a memcg, then raises min_free_kbytes to
329  * a very high value so that the allocation falls below low wm, then makes
330  * another allocation to trigger kswapd that should push the memcg-owned pages
331  * to zswap and verifies that the zswap pages are correctly charged.
332  *
333  * To be run on a VM with at most 4G of memory.
334  */
335 static int test_no_kmem_bypass(const char *root)
336 {
337         size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
338         struct no_kmem_bypass_child_args *values;
339         size_t trigger_allocation_size;
340         int wait_child_iteration = 0;
341         long stored_pages_threshold;
342         struct sysinfo sys_info;
343         int ret = KSFT_FAIL;
344         int child_status;
345         char *test_group;
346         pid_t child_pid;
347
348         /* Read sys info and compute test values accordingly */
349         if (sysinfo(&sys_info) != 0)
350                 return KSFT_FAIL;
351         if (sys_info.totalram > 5000000000)
352                 return KSFT_SKIP;
353         values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
354                         PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
355         if (values == MAP_FAILED)
356                 return KSFT_FAIL;
357         if (read_min_free_kb(&min_free_kb_original))
358                 return KSFT_FAIL;
359         min_free_kb_high = sys_info.totalram / 2000;
360         min_free_kb_low = sys_info.totalram / 500000;
361         values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
362                 sys_info.totalram * 5 / 100;
363         stored_pages_threshold = sys_info.totalram / 5 / 4096;
364         trigger_allocation_size = sys_info.totalram / 20;
365
366         /* Set up test memcg */
367         if (cg_write(root, "cgroup.subtree_control", "+memory"))
368                 goto out;
369         test_group = cg_name(root, "kmem_bypass_test");
370         if (!test_group)
371                 goto out;
372
373         /* Spawn memcg child and wait for it to allocate */
374         set_min_free_kb(min_free_kb_low);
375         if (cg_create(test_group))
376                 goto out;
377         values->child_allocated = false;
378         child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
379         if (child_pid < 0)
380                 goto out;
381         while (!values->child_allocated && wait_child_iteration++ < 10000)
382                 usleep(1000);
383
384         /* Try to wakeup kswapd and let it push child memory to zswap */
385         set_min_free_kb(min_free_kb_high);
386         for (int i = 0; i < 20; i++) {
387                 size_t stored_pages;
388                 char *trigger_allocation = malloc(trigger_allocation_size);
389
390                 if (!trigger_allocation)
391                         break;
392                 for (int i = 0; i < trigger_allocation_size; i += 4095)
393                         trigger_allocation[i] = 'b';
394                 usleep(100000);
395                 free(trigger_allocation);
396                 if (get_zswap_stored_pages(&stored_pages))
397                         break;
398                 if (stored_pages < 0)
399                         break;
400                 /* If memory was pushed to zswap, verify it belongs to memcg */
401                 if (stored_pages > stored_pages_threshold) {
402                         int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
403                         int delta = stored_pages * 4096 - zswapped;
404                         int result_ok = delta < stored_pages * 4096 / 4;
405
406                         ret = result_ok ? KSFT_PASS : KSFT_FAIL;
407                         break;
408                 }
409         }
410
411         kill(child_pid, SIGTERM);
412         waitpid(child_pid, &child_status, 0);
413 out:
414         set_min_free_kb(min_free_kb_original);
415         cg_destroy(test_group);
416         free(test_group);
417         return ret;
418 }
419
420 #define T(x) { x, #x }
421 struct zswap_test {
422         int (*fn)(const char *root);
423         const char *name;
424 } tests[] = {
425         T(test_zswap_usage),
426         T(test_swapin_nozswap),
427         T(test_zswapin),
428         T(test_no_kmem_bypass),
429         T(test_no_invasive_cgroup_shrink),
430 };
431 #undef T
432
433 static bool zswap_configured(void)
434 {
435         return access("/sys/module/zswap", F_OK) == 0;
436 }
437
438 int main(int argc, char **argv)
439 {
440         char root[PATH_MAX];
441         int i, ret = EXIT_SUCCESS;
442
443         if (cg_find_unified_root(root, sizeof(root)))
444                 ksft_exit_skip("cgroup v2 isn't mounted\n");
445
446         if (!zswap_configured())
447                 ksft_exit_skip("zswap isn't configured\n");
448
449         /*
450          * Check that memory controller is available:
451          * memory is listed in cgroup.controllers
452          */
453         if (cg_read_strstr(root, "cgroup.controllers", "memory"))
454                 ksft_exit_skip("memory controller isn't available\n");
455
456         if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
457                 if (cg_write(root, "cgroup.subtree_control", "+memory"))
458                         ksft_exit_skip("Failed to set memory controller\n");
459
460         for (i = 0; i < ARRAY_SIZE(tests); i++) {
461                 switch (tests[i].fn(root)) {
462                 case KSFT_PASS:
463                         ksft_test_result_pass("%s\n", tests[i].name);
464                         break;
465                 case KSFT_SKIP:
466                         ksft_test_result_skip("%s\n", tests[i].name);
467                         break;
468                 default:
469                         ret = EXIT_FAILURE;
470                         ksft_test_result_fail("%s\n", tests[i].name);
471                         break;
472                 }
473         }
474
475         return ret;
476 }