Merge tag 'm68k-for-v6.4-tag1' of git://git.kernel.org/pub/scm/linux/kernel/git/geert...
[linux-block.git] / tools / testing / selftests / cgroup / test_memcontrol.c
CommitLineData
84092dbc
RG
1/* SPDX-License-Identifier: GPL-2.0 */
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
a987785d 5#include <linux/oom.h>
84092dbc
RG
6#include <fcntl.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <unistd.h>
5f8f0193
MR
13#include <sys/socket.h>
14#include <sys/wait.h>
15#include <arpa/inet.h>
16#include <netinet/in.h>
17#include <netdb.h>
18#include <errno.h>
6323ec54 19#include <sys/mman.h>
84092dbc
RG
20
21#include "../kselftest.h"
22#include "cgroup_util.h"
23
72b1e03a 24static bool has_localevents;
cdc69458
DV
25static bool has_recursiveprot;
26
84092dbc
RG
27/*
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
30 */
31static int test_memcg_subtree_control(const char *root)
32{
e14d314c 33 char *parent, *child, *parent2 = NULL, *child2 = NULL;
84092dbc
RG
34 int ret = KSFT_FAIL;
35 char buf[PAGE_SIZE];
36
37 /* Create two nested cgroups with the memory controller enabled */
38 parent = cg_name(root, "memcg_test_0");
39 child = cg_name(root, "memcg_test_0/memcg_test_1");
40 if (!parent || !child)
e14d314c 41 goto cleanup_free;
84092dbc
RG
42
43 if (cg_create(parent))
e14d314c 44 goto cleanup_free;
84092dbc
RG
45
46 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
e14d314c 47 goto cleanup_parent;
84092dbc
RG
48
49 if (cg_create(child))
e14d314c 50 goto cleanup_parent;
84092dbc
RG
51
52 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
e14d314c 53 goto cleanup_child;
84092dbc
RG
54
55 /* Create two nested cgroups without enabling memory controller */
56 parent2 = cg_name(root, "memcg_test_1");
57 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
58 if (!parent2 || !child2)
e14d314c 59 goto cleanup_free2;
84092dbc
RG
60
61 if (cg_create(parent2))
e14d314c 62 goto cleanup_free2;
84092dbc
RG
63
64 if (cg_create(child2))
e14d314c 65 goto cleanup_parent2;
84092dbc
RG
66
67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
e14d314c 68 goto cleanup_all;
84092dbc
RG
69
70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
e14d314c 71 goto cleanup_all;
84092dbc
RG
72
73 ret = KSFT_PASS;
74
e14d314c 75cleanup_all:
84092dbc 76 cg_destroy(child2);
e14d314c 77cleanup_parent2:
84092dbc 78 cg_destroy(parent2);
e14d314c 79cleanup_free2:
84092dbc
RG
80 free(parent2);
81 free(child2);
e14d314c
RG
82cleanup_child:
83 cg_destroy(child);
84cleanup_parent:
85 cg_destroy(parent);
86cleanup_free:
87 free(parent);
88 free(child);
84092dbc
RG
89
90 return ret;
91}
92
93static int alloc_anon_50M_check(const char *cgroup, void *arg)
94{
95 size_t size = MB(50);
96 char *buf, *ptr;
97 long anon, current;
98 int ret = -1;
99
100 buf = malloc(size);
c83f320e
IO
101 if (buf == NULL) {
102 fprintf(stderr, "malloc() failed\n");
103 return -1;
104 }
105
84092dbc
RG
106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
107 *ptr = 0;
108
109 current = cg_read_long(cgroup, "memory.current");
110 if (current < size)
111 goto cleanup;
112
113 if (!values_close(size, current, 3))
114 goto cleanup;
115
116 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
117 if (anon < 0)
118 goto cleanup;
119
120 if (!values_close(anon, current, 3))
121 goto cleanup;
122
123 ret = 0;
124cleanup:
125 free(buf);
126 return ret;
127}
128
129static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
130{
131 size_t size = MB(50);
132 int ret = -1;
133 long current, file;
134 int fd;
135
136 fd = get_temp_fd();
137 if (fd < 0)
138 return -1;
139
140 if (alloc_pagecache(fd, size))
141 goto cleanup;
142
143 current = cg_read_long(cgroup, "memory.current");
144 if (current < size)
145 goto cleanup;
146
147 file = cg_read_key_long(cgroup, "memory.stat", "file ");
148 if (file < 0)
149 goto cleanup;
150
151 if (!values_close(file, current, 10))
152 goto cleanup;
153
154 ret = 0;
155
156cleanup:
157 close(fd);
158 return ret;
159}
160
161/*
162 * This test create a memory cgroup, allocates
163 * some anonymous memory and some pagecache
164 * and check memory.current and some memory.stat values.
165 */
166static int test_memcg_current(const char *root)
167{
168 int ret = KSFT_FAIL;
169 long current;
170 char *memcg;
171
172 memcg = cg_name(root, "memcg_test");
173 if (!memcg)
174 goto cleanup;
175
176 if (cg_create(memcg))
177 goto cleanup;
178
179 current = cg_read_long(memcg, "memory.current");
180 if (current != 0)
181 goto cleanup;
182
183 if (cg_run(memcg, alloc_anon_50M_check, NULL))
184 goto cleanup;
185
186 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
187 goto cleanup;
188
189 ret = KSFT_PASS;
190
191cleanup:
192 cg_destroy(memcg);
193 free(memcg);
194
195 return ret;
196}
197
84092dbc
RG
198static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
199{
200 int fd = (long)arg;
201 int ppid = getppid();
202
203 if (alloc_pagecache(fd, MB(50)))
204 return -1;
205
206 while (getppid() == ppid)
207 sleep(1);
208
209 return 0;
210}
211
a987785d
JK
212static int alloc_anon_noexit(const char *cgroup, void *arg)
213{
214 int ppid = getppid();
a3622a53
YA
215 size_t size = (unsigned long)arg;
216 char *buf, *ptr;
a987785d 217
a3622a53 218 buf = malloc(size);
c83f320e
IO
219 if (buf == NULL) {
220 fprintf(stderr, "malloc() failed\n");
221 return -1;
222 }
223
a3622a53
YA
224 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
225 *ptr = 0;
a987785d
JK
226
227 while (getppid() == ppid)
228 sleep(1);
229
a3622a53 230 free(buf);
a987785d
JK
231 return 0;
232}
233
234/*
235 * Wait until processes are killed asynchronously by the OOM killer
236 * If we exceed a timeout, fail.
237 */
238static int cg_test_proc_killed(const char *cgroup)
239{
240 int limit;
241
242 for (limit = 10; limit > 0; limit--) {
243 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
244 return 0;
245
246 usleep(100000);
247 }
248 return -1;
249}
250
1c746977
YA
251static bool reclaim_until(const char *memcg, long goal);
252
84092dbc
RG
253/*
254 * First, this test creates the following hierarchy:
6a359190 255 * A memory.min = 0, memory.max = 200M
f10b6e9a 256 * A/B memory.min = 50M
84092dbc
RG
257 * A/B/C memory.min = 75M, memory.current = 50M
258 * A/B/D memory.min = 25M, memory.current = 50M
f0cdaa56
DV
259 * A/B/E memory.min = 0, memory.current = 50M
260 * A/B/F memory.min = 500M, memory.current = 0
84092dbc 261 *
f079a020
MK
262 * (or memory.low if we test soft protection)
263 *
264 * Usages are pagecache and the test keeps a running
84092dbc
RG
265 * process in every leaf cgroup.
266 * Then it creates A/G and creates a significant
6a359190 267 * memory pressure in A.
84092dbc 268 *
f10b6e9a 269 * Then it checks actual memory usages and expects that:
84092dbc 270 * A/B memory.current ~= 50M
f10b6e9a
MK
271 * A/B/C memory.current ~= 29M
272 * A/B/D memory.current ~= 21M
273 * A/B/E memory.current ~= 0
274 * A/B/F memory.current = 0
275 * (for origin of the numbers, see model in memcg_protection.m.)
84092dbc
RG
276 *
277 * After that it tries to allocate more than there is
f079a020
MK
278 * unprotected memory in A available, and checks that:
279 * a) memory.min protects pagecache even in this case,
280 * b) memory.low allows reclaiming page cache with low events.
1c746977
YA
281 *
282 * Then we try to reclaim from A/B/C using memory.reclaim until its
283 * usage reaches 10M.
284 * This makes sure that:
285 * (a) We ignore the protection of the reclaim target memcg.
286 * (b) The previously calculated emin value (~29M) should be dismissed.
84092dbc 287 */
f079a020 288static int test_memcg_protection(const char *root, bool min)
84092dbc 289{
f079a020 290 int ret = KSFT_FAIL, rc;
84092dbc
RG
291 char *parent[3] = {NULL};
292 char *children[4] = {NULL};
f079a020 293 const char *attribute = min ? "memory.min" : "memory.low";
84092dbc
RG
294 long c[4];
295 int i, attempts;
296 int fd;
297
298 fd = get_temp_fd();
299 if (fd < 0)
300 goto cleanup;
301
302 parent[0] = cg_name(root, "memcg_test_0");
303 if (!parent[0])
304 goto cleanup;
305
306 parent[1] = cg_name(parent[0], "memcg_test_1");
307 if (!parent[1])
308 goto cleanup;
309
310 parent[2] = cg_name(parent[0], "memcg_test_2");
311 if (!parent[2])
312 goto cleanup;
313
314 if (cg_create(parent[0]))
315 goto cleanup;
316
f079a020
MK
317 if (cg_read_long(parent[0], attribute)) {
318 /* No memory.min on older kernels is fine */
319 if (min)
320 ret = KSFT_SKIP;
84092dbc
RG
321 goto cleanup;
322 }
323
324 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
325 goto cleanup;
326
327 if (cg_write(parent[0], "memory.max", "200M"))
328 goto cleanup;
329
330 if (cg_write(parent[0], "memory.swap.max", "0"))
331 goto cleanup;
332
333 if (cg_create(parent[1]))
334 goto cleanup;
335
336 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
337 goto cleanup;
338
339 if (cg_create(parent[2]))
340 goto cleanup;
341
342 for (i = 0; i < ARRAY_SIZE(children); i++) {
343 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
344 if (!children[i])
345 goto cleanup;
346
347 if (cg_create(children[i]))
348 goto cleanup;
349
f0cdaa56 350 if (i > 2)
84092dbc
RG
351 continue;
352
353 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
354 (void *)(long)fd);
355 }
356
f079a020 357 if (cg_write(parent[1], attribute, "50M"))
84092dbc 358 goto cleanup;
f079a020 359 if (cg_write(children[0], attribute, "75M"))
84092dbc 360 goto cleanup;
f079a020 361 if (cg_write(children[1], attribute, "25M"))
84092dbc 362 goto cleanup;
f079a020 363 if (cg_write(children[2], attribute, "0"))
84092dbc 364 goto cleanup;
f079a020 365 if (cg_write(children[3], attribute, "500M"))
84092dbc
RG
366 goto cleanup;
367
368 attempts = 0;
369 while (!values_close(cg_read_long(parent[1], "memory.current"),
370 MB(150), 3)) {
371 if (attempts++ > 5)
372 break;
373 sleep(1);
374 }
375
376 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
377 goto cleanup;
378
379 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
380 goto cleanup;
381
382 for (i = 0; i < ARRAY_SIZE(children); i++)
383 c[i] = cg_read_long(children[i], "memory.current");
384
f10b6e9a 385 if (!values_close(c[0], MB(29), 10))
84092dbc
RG
386 goto cleanup;
387
f10b6e9a 388 if (!values_close(c[1], MB(21), 10))
84092dbc
RG
389 goto cleanup;
390
f0cdaa56 391 if (c[3] != 0)
84092dbc
RG
392 goto cleanup;
393
f079a020
MK
394 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
395 if (min && !rc)
84092dbc 396 goto cleanup;
f079a020
MK
397 else if (!min && rc) {
398 fprintf(stderr,
399 "memory.low prevents from allocating anon memory\n");
84092dbc 400 goto cleanup;
84092dbc
RG
401 }
402
84092dbc
RG
403 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
404 goto cleanup;
405
1c746977
YA
406 if (!reclaim_until(children[0], MB(10)))
407 goto cleanup;
408
f079a020
MK
409 if (min) {
410 ret = KSFT_PASS;
84092dbc
RG
411 goto cleanup;
412 }
413
414 for (i = 0; i < ARRAY_SIZE(children); i++) {
1d09069f 415 int no_low_events_index = 1;
f079a020 416 long low, oom;
cdc69458 417
84092dbc
RG
418 oom = cg_read_key_long(children[i], "memory.events", "oom ");
419 low = cg_read_key_long(children[i], "memory.events", "low ");
420
421 if (oom)
422 goto cleanup;
cdc69458 423 if (i <= no_low_events_index && low <= 0)
84092dbc 424 goto cleanup;
cdc69458 425 if (i > no_low_events_index && low)
84092dbc 426 goto cleanup;
cdc69458 427
84092dbc
RG
428 }
429
430 ret = KSFT_PASS;
431
432cleanup:
433 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
434 if (!children[i])
435 continue;
436
437 cg_destroy(children[i]);
438 free(children[i]);
439 }
440
441 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
442 if (!parent[i])
443 continue;
444
445 cg_destroy(parent[i]);
446 free(parent[i]);
447 }
448 close(fd);
449 return ret;
450}
451
f079a020
MK
452static int test_memcg_min(const char *root)
453{
454 return test_memcg_protection(root, true);
455}
456
457static int test_memcg_low(const char *root)
458{
459 return test_memcg_protection(root, false);
460}
461
84092dbc
RG
462static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
463{
464 size_t size = MB(50);
465 int ret = -1;
c1a31a2f 466 long current, high, max;
84092dbc
RG
467 int fd;
468
c1a31a2f
DV
469 high = cg_read_long(cgroup, "memory.high");
470 max = cg_read_long(cgroup, "memory.max");
471 if (high != MB(30) && max != MB(30))
7fb63787 472 return -1;
c1a31a2f 473
84092dbc
RG
474 fd = get_temp_fd();
475 if (fd < 0)
476 return -1;
477
478 if (alloc_pagecache(fd, size))
479 goto cleanup;
480
481 current = cg_read_long(cgroup, "memory.current");
c1a31a2f 482 if (!values_close(current, MB(30), 5))
84092dbc
RG
483 goto cleanup;
484
485 ret = 0;
486
487cleanup:
488 close(fd);
489 return ret;
490
491}
492
493/*
494 * This test checks that memory.high limits the amount of
495 * memory which can be consumed by either anonymous memory
496 * or pagecache.
497 */
498static int test_memcg_high(const char *root)
499{
500 int ret = KSFT_FAIL;
501 char *memcg;
502 long high;
503
504 memcg = cg_name(root, "memcg_test");
505 if (!memcg)
506 goto cleanup;
507
508 if (cg_create(memcg))
509 goto cleanup;
510
511 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
512 goto cleanup;
513
514 if (cg_write(memcg, "memory.swap.max", "0"))
515 goto cleanup;
516
517 if (cg_write(memcg, "memory.high", "30M"))
518 goto cleanup;
519
be74553f 520 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
84092dbc
RG
521 goto cleanup;
522
523 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
524 goto cleanup;
525
526 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
527 goto cleanup;
528
529 high = cg_read_key_long(memcg, "memory.events", "high ");
530 if (high <= 0)
531 goto cleanup;
532
533 ret = KSFT_PASS;
534
535cleanup:
536 cg_destroy(memcg);
537 free(memcg);
538
539 return ret;
540}
541
6323ec54
SB
542static int alloc_anon_mlock(const char *cgroup, void *arg)
543{
544 size_t size = (size_t)arg;
545 void *buf;
546
547 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
548 0, 0);
549 if (buf == MAP_FAILED)
550 return -1;
551
552 mlock(buf, size);
553 munmap(buf, size);
554 return 0;
555}
556
557/*
558 * This test checks that memory.high is able to throttle big single shot
559 * allocation i.e. large allocation within one kernel entry.
560 */
561static int test_memcg_high_sync(const char *root)
562{
563 int ret = KSFT_FAIL, pid, fd = -1;
564 char *memcg;
565 long pre_high, pre_max;
566 long post_high, post_max;
567
568 memcg = cg_name(root, "memcg_test");
569 if (!memcg)
570 goto cleanup;
571
572 if (cg_create(memcg))
573 goto cleanup;
574
575 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
576 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
577 if (pre_high < 0 || pre_max < 0)
578 goto cleanup;
579
580 if (cg_write(memcg, "memory.swap.max", "0"))
581 goto cleanup;
582
583 if (cg_write(memcg, "memory.high", "30M"))
584 goto cleanup;
585
586 if (cg_write(memcg, "memory.max", "140M"))
587 goto cleanup;
588
589 fd = memcg_prepare_for_wait(memcg);
590 if (fd < 0)
591 goto cleanup;
592
593 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
594 if (pid < 0)
595 goto cleanup;
596
597 cg_wait_for(fd);
598
599 post_high = cg_read_key_long(memcg, "memory.events", "high ");
600 post_max = cg_read_key_long(memcg, "memory.events", "max ");
601 if (post_high < 0 || post_max < 0)
602 goto cleanup;
603
604 if (pre_high == post_high || pre_max != post_max)
605 goto cleanup;
606
607 ret = KSFT_PASS;
608
609cleanup:
610 if (fd >= 0)
611 close(fd);
612 cg_destroy(memcg);
613 free(memcg);
614
615 return ret;
616}
617
84092dbc
RG
618/*
619 * This test checks that memory.max limits the amount of
620 * memory which can be consumed by either anonymous memory
621 * or pagecache.
622 */
623static int test_memcg_max(const char *root)
624{
625 int ret = KSFT_FAIL;
626 char *memcg;
627 long current, max;
628
629 memcg = cg_name(root, "memcg_test");
630 if (!memcg)
631 goto cleanup;
632
633 if (cg_create(memcg))
634 goto cleanup;
635
636 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
637 goto cleanup;
638
639 if (cg_write(memcg, "memory.swap.max", "0"))
640 goto cleanup;
641
642 if (cg_write(memcg, "memory.max", "30M"))
643 goto cleanup;
644
645 /* Should be killed by OOM killer */
646 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
647 goto cleanup;
648
649 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
650 goto cleanup;
651
652 current = cg_read_long(memcg, "memory.current");
653 if (current > MB(30) || !current)
654 goto cleanup;
655
656 max = cg_read_key_long(memcg, "memory.events", "max ");
657 if (max <= 0)
658 goto cleanup;
659
660 ret = KSFT_PASS;
661
662cleanup:
663 cg_destroy(memcg);
664 free(memcg);
665
666 return ret;
667}
668
e5d64eda
YA
669/*
670 * Reclaim from @memcg until usage reaches @goal by writing to
671 * memory.reclaim.
672 *
673 * This function will return false if the usage is already below the
674 * goal.
675 *
676 * This function assumes that writing to memory.reclaim is the only
677 * source of change in memory.current (no concurrent allocations or
678 * reclaim).
679 *
680 * This function makes sure memory.reclaim is sane. It will return
681 * false if memory.reclaim's error codes do not make sense, even if
682 * the usage goal was satisfied.
683 */
684static bool reclaim_until(const char *memcg, long goal)
685{
686 char buf[64];
687 int retries, err;
688 long current, to_reclaim;
689 bool reclaimed = false;
690
691 for (retries = 5; retries > 0; retries--) {
692 current = cg_read_long(memcg, "memory.current");
693
694 if (current < goal || values_close(current, goal, 3))
695 break;
696 /* Did memory.reclaim return 0 incorrectly? */
697 else if (reclaimed)
698 return false;
699
700 to_reclaim = current - goal;
701 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
702 err = cg_write(memcg, "memory.reclaim", buf);
703 if (!err)
704 reclaimed = true;
705 else if (err != -EAGAIN)
706 return false;
707 }
708 return reclaimed;
709}
710
eae3cb2e
YA
711/*
712 * This test checks that memory.reclaim reclaims the given
713 * amount of memory (from both anon and file, if possible).
714 */
715static int test_memcg_reclaim(const char *root)
716{
717 int ret = KSFT_FAIL, fd, retries;
718 char *memcg;
e5d64eda 719 long current, expected_usage;
eae3cb2e
YA
720
721 memcg = cg_name(root, "memcg_test");
722 if (!memcg)
723 goto cleanup;
724
725 if (cg_create(memcg))
726 goto cleanup;
727
728 current = cg_read_long(memcg, "memory.current");
729 if (current != 0)
730 goto cleanup;
731
732 fd = get_temp_fd();
733 if (fd < 0)
734 goto cleanup;
735
736 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
737
738 /*
739 * If swap is enabled, try to reclaim from both anon and file, else try
740 * to reclaim from file only.
741 */
742 if (is_swap_enabled()) {
743 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
744 expected_usage = MB(100);
745 } else
746 expected_usage = MB(50);
747
748 /*
749 * Wait until current usage reaches the expected usage (or we run out of
750 * retries).
751 */
752 retries = 5;
753 while (!values_close(cg_read_long(memcg, "memory.current"),
754 expected_usage, 10)) {
755 if (retries--) {
756 sleep(1);
757 continue;
758 } else {
759 fprintf(stderr,
760 "failed to allocate %ld for memcg reclaim test\n",
761 expected_usage);
762 goto cleanup;
763 }
764 }
765
766 /*
767 * Reclaim until current reaches 30M, this makes sure we hit both anon
768 * and file if swap is enabled.
769 */
e5d64eda 770 if (!reclaim_until(memcg, MB(30)))
eae3cb2e 771 goto cleanup;
eae3cb2e
YA
772
773 ret = KSFT_PASS;
774cleanup:
775 cg_destroy(memcg);
776 free(memcg);
777 close(fd);
778
779 return ret;
780}
781
478b2784
MR
782static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
783{
784 long mem_max = (long)arg;
785 size_t size = MB(50);
786 char *buf, *ptr;
787 long mem_current, swap_current;
788 int ret = -1;
789
790 buf = malloc(size);
c83f320e
IO
791 if (buf == NULL) {
792 fprintf(stderr, "malloc() failed\n");
793 return -1;
794 }
795
478b2784
MR
796 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
797 *ptr = 0;
798
799 mem_current = cg_read_long(cgroup, "memory.current");
800 if (!mem_current || !values_close(mem_current, mem_max, 3))
801 goto cleanup;
802
803 swap_current = cg_read_long(cgroup, "memory.swap.current");
804 if (!swap_current ||
805 !values_close(mem_current + swap_current, size, 3))
806 goto cleanup;
807
808 ret = 0;
809cleanup:
810 free(buf);
811 return ret;
812}
813
814/*
815 * This test checks that memory.swap.max limits the amount of
816 * anonymous memory which can be swapped out.
817 */
818static int test_memcg_swap_max(const char *root)
819{
820 int ret = KSFT_FAIL;
821 char *memcg;
822 long max;
823
824 if (!is_swap_enabled())
825 return KSFT_SKIP;
826
827 memcg = cg_name(root, "memcg_test");
828 if (!memcg)
829 goto cleanup;
830
831 if (cg_create(memcg))
832 goto cleanup;
833
834 if (cg_read_long(memcg, "memory.swap.current")) {
835 ret = KSFT_SKIP;
836 goto cleanup;
837 }
838
839 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
840 goto cleanup;
841
842 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
843 goto cleanup;
844
845 if (cg_write(memcg, "memory.swap.max", "30M"))
846 goto cleanup;
847
848 if (cg_write(memcg, "memory.max", "30M"))
849 goto cleanup;
850
851 /* Should be killed by OOM killer */
852 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
853 goto cleanup;
854
855 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
856 goto cleanup;
857
858 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
859 goto cleanup;
860
861 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
862 goto cleanup;
863
864 max = cg_read_key_long(memcg, "memory.events", "max ");
865 if (max <= 0)
866 goto cleanup;
867
868 ret = KSFT_PASS;
869
870cleanup:
871 cg_destroy(memcg);
872 free(memcg);
873
874 return ret;
875}
876
84092dbc
RG
877/*
878 * This test disables swapping and tries to allocate anonymous memory
879 * up to OOM. Then it checks for oom and oom_kill events in
880 * memory.events.
881 */
882static int test_memcg_oom_events(const char *root)
883{
884 int ret = KSFT_FAIL;
885 char *memcg;
886
887 memcg = cg_name(root, "memcg_test");
888 if (!memcg)
889 goto cleanup;
890
891 if (cg_create(memcg))
892 goto cleanup;
893
894 if (cg_write(memcg, "memory.max", "30M"))
895 goto cleanup;
896
897 if (cg_write(memcg, "memory.swap.max", "0"))
898 goto cleanup;
899
900 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
901 goto cleanup;
902
903 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
904 goto cleanup;
905
906 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
907 goto cleanup;
908
909 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
910 goto cleanup;
911
912 ret = KSFT_PASS;
913
914cleanup:
915 cg_destroy(memcg);
916 free(memcg);
917
918 return ret;
919}
920
5f8f0193
MR
921struct tcp_server_args {
922 unsigned short port;
923 int ctl[2];
924};
925
926static int tcp_server(const char *cgroup, void *arg)
927{
928 struct tcp_server_args *srv_args = arg;
929 struct sockaddr_in6 saddr = { 0 };
930 socklen_t slen = sizeof(saddr);
931 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
932
933 close(srv_args->ctl[0]);
934 ctl_fd = srv_args->ctl[1];
935
936 saddr.sin6_family = AF_INET6;
937 saddr.sin6_addr = in6addr_any;
938 saddr.sin6_port = htons(srv_args->port);
939
940 sk = socket(AF_INET6, SOCK_STREAM, 0);
941 if (sk < 0)
942 return ret;
943
944 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
945 goto cleanup;
946
947 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
948 write(ctl_fd, &errno, sizeof(errno));
949 goto cleanup;
950 }
951
952 if (listen(sk, 1))
953 goto cleanup;
954
955 ret = 0;
956 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
957 ret = -1;
958 goto cleanup;
959 }
960
961 client_sk = accept(sk, NULL, NULL);
962 if (client_sk < 0)
963 goto cleanup;
964
965 ret = -1;
966 for (;;) {
967 uint8_t buf[0x100000];
968
969 if (write(client_sk, buf, sizeof(buf)) <= 0) {
970 if (errno == ECONNRESET)
971 ret = 0;
972 break;
973 }
974 }
975
976 close(client_sk);
977
978cleanup:
979 close(sk);
980 return ret;
981}
982
983static int tcp_client(const char *cgroup, unsigned short port)
984{
985 const char server[] = "localhost";
986 struct addrinfo *ai;
987 char servport[6];
988 int retries = 0x10; /* nice round number */
989 int sk, ret;
990
991 snprintf(servport, sizeof(servport), "%hd", port);
992 ret = getaddrinfo(server, servport, NULL, &ai);
993 if (ret)
994 return ret;
995
996 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
997 if (sk < 0)
998 goto free_ainfo;
999
1000 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
1001 if (ret < 0)
1002 goto close_sk;
1003
1004 ret = KSFT_FAIL;
1005 while (retries--) {
1006 uint8_t buf[0x100000];
1007 long current, sock;
1008
1009 if (read(sk, buf, sizeof(buf)) <= 0)
1010 goto close_sk;
1011
1012 current = cg_read_long(cgroup, "memory.current");
1013 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
1014
1015 if (current < 0 || sock < 0)
1016 goto close_sk;
1017
5f8f0193
MR
1018 if (values_close(current, sock, 10)) {
1019 ret = KSFT_PASS;
1020 break;
1021 }
1022 }
1023
1024close_sk:
1025 close(sk);
1026free_ainfo:
1027 freeaddrinfo(ai);
1028 return ret;
1029}
1030
1031/*
1032 * This test checks socket memory accounting.
1033 * The test forks a TCP server listens on a random port between 1000
1034 * and 61000. Once it gets a client connection, it starts writing to
1035 * its socket.
1036 * The TCP client interleaves reads from the socket with check whether
1037 * memory.current and memory.stat.sock are similar.
1038 */
1039static int test_memcg_sock(const char *root)
1040{
1041 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1042 unsigned short port;
1043 char *memcg;
1044
1045 memcg = cg_name(root, "memcg_test");
1046 if (!memcg)
1047 goto cleanup;
1048
1049 if (cg_create(memcg))
1050 goto cleanup;
1051
1052 while (bind_retries--) {
1053 struct tcp_server_args args;
1054
1055 if (pipe(args.ctl))
1056 goto cleanup;
1057
1058 port = args.port = 1000 + rand() % 60000;
1059
1060 pid = cg_run_nowait(memcg, tcp_server, &args);
1061 if (pid < 0)
1062 goto cleanup;
1063
1064 close(args.ctl[1]);
1065 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1066 goto cleanup;
1067 close(args.ctl[0]);
1068
1069 if (!err)
1070 break;
1071 if (err != EADDRINUSE)
1072 goto cleanup;
1073
1074 waitpid(pid, NULL, 0);
1075 }
1076
1077 if (err == EADDRINUSE) {
1078 ret = KSFT_SKIP;
1079 goto cleanup;
1080 }
1081
1082 if (tcp_client(memcg, port) != KSFT_PASS)
1083 goto cleanup;
1084
1085 waitpid(pid, &err, 0);
1086 if (WEXITSTATUS(err))
1087 goto cleanup;
1088
1089 if (cg_read_long(memcg, "memory.current") < 0)
1090 goto cleanup;
1091
1092 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1093 goto cleanup;
1094
1095 ret = KSFT_PASS;
1096
1097cleanup:
1098 cg_destroy(memcg);
1099 free(memcg);
1100
1101 return ret;
1102}
1103
a987785d
JK
1104/*
1105 * This test disables swapping and tries to allocate anonymous memory
1106 * up to OOM with memory.group.oom set. Then it checks that all
c85bcc91
RG
1107 * processes in the leaf were killed. It also checks that oom_events
1108 * were propagated to the parent level.
a987785d
JK
1109 */
1110static int test_memcg_oom_group_leaf_events(const char *root)
1111{
1112 int ret = KSFT_FAIL;
1113 char *parent, *child;
72b1e03a 1114 long parent_oom_events;
a987785d
JK
1115
1116 parent = cg_name(root, "memcg_test_0");
1117 child = cg_name(root, "memcg_test_0/memcg_test_1");
1118
1119 if (!parent || !child)
1120 goto cleanup;
1121
1122 if (cg_create(parent))
1123 goto cleanup;
1124
1125 if (cg_create(child))
1126 goto cleanup;
1127
1128 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1129 goto cleanup;
1130
1131 if (cg_write(child, "memory.max", "50M"))
1132 goto cleanup;
1133
1134 if (cg_write(child, "memory.swap.max", "0"))
1135 goto cleanup;
1136
1137 if (cg_write(child, "memory.oom.group", "1"))
1138 goto cleanup;
1139
1140 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1141 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1142 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1143 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1144 goto cleanup;
1145
1146 if (cg_test_proc_killed(child))
1147 goto cleanup;
1148
1149 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1150 goto cleanup;
1151
ff3b72a5
MK
1152 parent_oom_events = cg_read_key_long(
1153 parent, "memory.events", "oom_kill ");
1154 /*
1155 * If memory_localevents is not enabled (the default), the parent should
1156 * count OOM events in its children groups. Otherwise, it should not
1157 * have observed any events.
1158 */
1159 if (has_localevents && parent_oom_events != 0)
1160 goto cleanup;
1161 else if (!has_localevents && parent_oom_events <= 0)
a987785d
JK
1162 goto cleanup;
1163
1164 ret = KSFT_PASS;
1165
1166cleanup:
1167 if (child)
1168 cg_destroy(child);
1169 if (parent)
1170 cg_destroy(parent);
1171 free(child);
1172 free(parent);
1173
1174 return ret;
1175}
1176
1177/*
1178 * This test disables swapping and tries to allocate anonymous memory
1179 * up to OOM with memory.group.oom set. Then it checks that all
1180 * processes in the parent and leaf were killed.
1181 */
1182static int test_memcg_oom_group_parent_events(const char *root)
1183{
1184 int ret = KSFT_FAIL;
1185 char *parent, *child;
1186
1187 parent = cg_name(root, "memcg_test_0");
1188 child = cg_name(root, "memcg_test_0/memcg_test_1");
1189
1190 if (!parent || !child)
1191 goto cleanup;
1192
1193 if (cg_create(parent))
1194 goto cleanup;
1195
1196 if (cg_create(child))
1197 goto cleanup;
1198
1199 if (cg_write(parent, "memory.max", "80M"))
1200 goto cleanup;
1201
1202 if (cg_write(parent, "memory.swap.max", "0"))
1203 goto cleanup;
1204
1205 if (cg_write(parent, "memory.oom.group", "1"))
1206 goto cleanup;
1207
1208 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1209 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1210 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1211
1212 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1213 goto cleanup;
1214
1215 if (cg_test_proc_killed(child))
1216 goto cleanup;
1217 if (cg_test_proc_killed(parent))
1218 goto cleanup;
1219
1220 ret = KSFT_PASS;
1221
1222cleanup:
1223 if (child)
1224 cg_destroy(child);
1225 if (parent)
1226 cg_destroy(parent);
1227 free(child);
1228 free(parent);
1229
1230 return ret;
1231}
1232
1233/*
1234 * This test disables swapping and tries to allocate anonymous memory
1235 * up to OOM with memory.group.oom set. Then it checks that all
1236 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1237 */
1238static int test_memcg_oom_group_score_events(const char *root)
1239{
1240 int ret = KSFT_FAIL;
1241 char *memcg;
1242 int safe_pid;
1243
1244 memcg = cg_name(root, "memcg_test_0");
1245
1246 if (!memcg)
1247 goto cleanup;
1248
1249 if (cg_create(memcg))
1250 goto cleanup;
1251
1252 if (cg_write(memcg, "memory.max", "50M"))
1253 goto cleanup;
1254
1255 if (cg_write(memcg, "memory.swap.max", "0"))
1256 goto cleanup;
1257
1258 if (cg_write(memcg, "memory.oom.group", "1"))
1259 goto cleanup;
1260
1261 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1262 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1263 goto cleanup;
1264
1265 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1266 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1267 goto cleanup;
1268
ff3b72a5
MK
1269 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1270 goto cleanup;
a987785d
JK
1271
1272 if (kill(safe_pid, SIGKILL))
1273 goto cleanup;
1274
ff3b72a5
MK
1275 ret = KSFT_PASS;
1276
a987785d
JK
1277cleanup:
1278 if (memcg)
1279 cg_destroy(memcg);
1280 free(memcg);
1281
1282 return ret;
1283}
1284
84092dbc
RG
1285#define T(x) { x, #x }
1286struct memcg_test {
1287 int (*fn)(const char *root);
1288 const char *name;
1289} tests[] = {
1290 T(test_memcg_subtree_control),
1291 T(test_memcg_current),
1292 T(test_memcg_min),
1293 T(test_memcg_low),
1294 T(test_memcg_high),
6323ec54 1295 T(test_memcg_high_sync),
84092dbc 1296 T(test_memcg_max),
eae3cb2e 1297 T(test_memcg_reclaim),
84092dbc 1298 T(test_memcg_oom_events),
478b2784 1299 T(test_memcg_swap_max),
5f8f0193 1300 T(test_memcg_sock),
a987785d
JK
1301 T(test_memcg_oom_group_leaf_events),
1302 T(test_memcg_oom_group_parent_events),
1303 T(test_memcg_oom_group_score_events),
84092dbc
RG
1304};
1305#undef T
1306
1307int main(int argc, char **argv)
1308{
1309 char root[PATH_MAX];
cdc69458 1310 int i, proc_status, ret = EXIT_SUCCESS;
84092dbc
RG
1311
1312 if (cg_find_unified_root(root, sizeof(root)))
1313 ksft_exit_skip("cgroup v2 isn't mounted\n");
1314
1315 /*
1316 * Check that memory controller is available:
1317 * memory is listed in cgroup.controllers
1318 */
1319 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1320 ksft_exit_skip("memory controller isn't available\n");
1321
f6131f28
AS
1322 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1323 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1324 ksft_exit_skip("Failed to set memory controller\n");
1325
cdc69458
DV
1326 proc_status = proc_mount_contains("memory_recursiveprot");
1327 if (proc_status < 0)
1328 ksft_exit_skip("Failed to query cgroup mount option\n");
1329 has_recursiveprot = proc_status;
1330
72b1e03a
DV
1331 proc_status = proc_mount_contains("memory_localevents");
1332 if (proc_status < 0)
1333 ksft_exit_skip("Failed to query cgroup mount option\n");
1334 has_localevents = proc_status;
1335
84092dbc
RG
1336 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1337 switch (tests[i].fn(root)) {
1338 case KSFT_PASS:
1339 ksft_test_result_pass("%s\n", tests[i].name);
1340 break;
1341 case KSFT_SKIP:
1342 ksft_test_result_skip("%s\n", tests[i].name);
1343 break;
1344 default:
1345 ret = EXIT_FAILURE;
1346 ksft_test_result_fail("%s\n", tests[i].name);
1347 break;
1348 }
1349 }
1350
1351 return ret;
1352}