Commit | Line | Data |
---|---|---|
43b0536c SP |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * DAMON-based page reclamation | |
4 | * | |
5 | * Author: SeongJae Park <sj@kernel.org> | |
6 | */ | |
7 | ||
8 | #define pr_fmt(fmt) "damon-reclaim: " fmt | |
9 | ||
10 | #include <linux/damon.h> | |
11 | #include <linux/ioport.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/sched.h> | |
14 | #include <linux/workqueue.h> | |
15 | ||
16 | #ifdef MODULE_PARAM_PREFIX | |
17 | #undef MODULE_PARAM_PREFIX | |
18 | #endif | |
19 | #define MODULE_PARAM_PREFIX "damon_reclaim." | |
20 | ||
21 | /* | |
22 | * Enable or disable DAMON_RECLAIM. | |
23 | * | |
24 | * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. | |
25 | * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could | |
26 | * do no real monitoring and reclamation due to the watermarks-based activation | |
27 | * condition. Refer to below descriptions for the watermarks parameter for | |
28 | * this. | |
29 | */ | |
30 | static bool enabled __read_mostly; | |
43b0536c | 31 | |
e035c280 SP |
32 | /* |
33 | * Make DAMON_RECLAIM reads the input parameters again, except ``enabled``. | |
34 | * | |
35 | * Input parameters that updated while DAMON_RECLAIM is running are not applied | |
36 | * by default. Once this parameter is set as ``Y``, DAMON_RECLAIM reads values | |
37 | * of parametrs except ``enabled`` again. Once the re-reading is done, this | |
38 | * parameter is set as ``N``. If invalid parameters are found while the | |
39 | * re-reading, DAMON_RECLAIM will be disabled. | |
40 | */ | |
41 | static bool commit_inputs __read_mostly; | |
42 | module_param(commit_inputs, bool, 0600); | |
43 | ||
43b0536c SP |
44 | /* |
45 | * Time threshold for cold memory regions identification in microseconds. | |
46 | * | |
47 | * If a memory region is not accessed for this or longer time, DAMON_RECLAIM | |
48 | * identifies the region as cold, and reclaims. 120 seconds by default. | |
49 | */ | |
50 | static unsigned long min_age __read_mostly = 120000000; | |
51 | module_param(min_age, ulong, 0600); | |
52 | ||
53 | /* | |
54 | * Limit of time for trying the reclamation in milliseconds. | |
55 | * | |
56 | * DAMON_RECLAIM tries to use only up to this time within a time window | |
57 | * (quota_reset_interval_ms) for trying reclamation of cold pages. This can be | |
58 | * used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, | |
59 | * the limit is disabled. | |
60 | * | |
61 | * 10 ms by default. | |
62 | */ | |
63 | static unsigned long quota_ms __read_mostly = 10; | |
64 | module_param(quota_ms, ulong, 0600); | |
65 | ||
66 | /* | |
67 | * Limit of size of memory for the reclamation in bytes. | |
68 | * | |
69 | * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a | |
70 | * time window (quota_reset_interval_ms) and makes no more than this limit is | |
71 | * tried. This can be used for limiting consumption of CPU and IO. If this | |
72 | * value is zero, the limit is disabled. | |
73 | * | |
74 | * 128 MiB by default. | |
75 | */ | |
76 | static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024; | |
77 | module_param(quota_sz, ulong, 0600); | |
78 | ||
79 | /* | |
80 | * The time/size quota charge reset interval in milliseconds. | |
81 | * | |
82 | * The charge reset interval for the quota of time (quota_ms) and size | |
83 | * (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than | |
84 | * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms | |
85 | * milliseconds. | |
86 | * | |
87 | * 1 second by default. | |
88 | */ | |
89 | static unsigned long quota_reset_interval_ms __read_mostly = 1000; | |
90 | module_param(quota_reset_interval_ms, ulong, 0600); | |
91 | ||
92 | /* | |
93 | * The watermarks check time interval in microseconds. | |
94 | * | |
95 | * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is | |
96 | * enabled but inactive due to its watermarks rule. 5 seconds by default. | |
97 | */ | |
98 | static unsigned long wmarks_interval __read_mostly = 5000000; | |
99 | module_param(wmarks_interval, ulong, 0600); | |
100 | ||
101 | /* | |
102 | * Free memory rate (per thousand) for the high watermark. | |
103 | * | |
104 | * If free memory of the system in bytes per thousand bytes is higher than | |
105 | * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically | |
106 | * checks the watermarks. 500 (50%) by default. | |
107 | */ | |
108 | static unsigned long wmarks_high __read_mostly = 500; | |
109 | module_param(wmarks_high, ulong, 0600); | |
110 | ||
111 | /* | |
112 | * Free memory rate (per thousand) for the middle watermark. | |
113 | * | |
114 | * If free memory of the system in bytes per thousand bytes is between this and | |
115 | * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring | |
116 | * and the reclaiming. 400 (40%) by default. | |
117 | */ | |
118 | static unsigned long wmarks_mid __read_mostly = 400; | |
119 | module_param(wmarks_mid, ulong, 0600); | |
120 | ||
121 | /* | |
122 | * Free memory rate (per thousand) for the low watermark. | |
123 | * | |
124 | * If free memory of the system in bytes per thousand bytes is lower than this, | |
125 | * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks | |
126 | * the watermarks. In the case, the system falls back to the LRU-based page | |
127 | * granularity reclamation logic. 200 (20%) by default. | |
128 | */ | |
129 | static unsigned long wmarks_low __read_mostly = 200; | |
130 | module_param(wmarks_low, ulong, 0600); | |
131 | ||
132 | /* | |
133 | * Sampling interval for the monitoring in microseconds. | |
134 | * | |
135 | * The sampling interval of DAMON for the cold memory monitoring. Please refer | |
136 | * to the DAMON documentation for more detail. 5 ms by default. | |
137 | */ | |
138 | static unsigned long sample_interval __read_mostly = 5000; | |
139 | module_param(sample_interval, ulong, 0600); | |
140 | ||
141 | /* | |
142 | * Aggregation interval for the monitoring in microseconds. | |
143 | * | |
144 | * The aggregation interval of DAMON for the cold memory monitoring. Please | |
145 | * refer to the DAMON documentation for more detail. 100 ms by default. | |
146 | */ | |
147 | static unsigned long aggr_interval __read_mostly = 100000; | |
148 | module_param(aggr_interval, ulong, 0600); | |
149 | ||
150 | /* | |
151 | * Minimum number of monitoring regions. | |
152 | * | |
153 | * The minimal number of monitoring regions of DAMON for the cold memory | |
154 | * monitoring. This can be used to set lower-bound of the monitoring quality. | |
155 | * But, setting this too high could result in increased monitoring overhead. | |
156 | * Please refer to the DAMON documentation for more detail. 10 by default. | |
157 | */ | |
158 | static unsigned long min_nr_regions __read_mostly = 10; | |
159 | module_param(min_nr_regions, ulong, 0600); | |
160 | ||
161 | /* | |
162 | * Maximum number of monitoring regions. | |
163 | * | |
164 | * The maximum number of monitoring regions of DAMON for the cold memory | |
165 | * monitoring. This can be used to set upper-bound of the monitoring overhead. | |
166 | * However, setting this too low could result in bad monitoring quality. | |
167 | * Please refer to the DAMON documentation for more detail. 1000 by default. | |
168 | */ | |
169 | static unsigned long max_nr_regions __read_mostly = 1000; | |
170 | module_param(max_nr_regions, ulong, 0600); | |
171 | ||
172 | /* | |
173 | * Start of the target memory region in physical address. | |
174 | * | |
175 | * The start physical address of memory region that DAMON_RECLAIM will do work | |
176 | * against. By default, biggest System RAM is used as the region. | |
177 | */ | |
178 | static unsigned long monitor_region_start __read_mostly; | |
179 | module_param(monitor_region_start, ulong, 0600); | |
180 | ||
181 | /* | |
182 | * End of the target memory region in physical address. | |
183 | * | |
184 | * The end physical address of memory region that DAMON_RECLAIM will do work | |
185 | * against. By default, biggest System RAM is used as the region. | |
186 | */ | |
187 | static unsigned long monitor_region_end __read_mostly; | |
188 | module_param(monitor_region_end, ulong, 0600); | |
189 | ||
190 | /* | |
191 | * PID of the DAMON thread | |
192 | * | |
193 | * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. | |
194 | * Else, -1. | |
195 | */ | |
196 | static int kdamond_pid __read_mostly = -1; | |
197 | module_param(kdamond_pid, int, 0400); | |
198 | ||
60e52e7c SP |
199 | /* |
200 | * Number of memory regions that tried to be reclaimed. | |
201 | */ | |
202 | static unsigned long nr_reclaim_tried_regions __read_mostly; | |
203 | module_param(nr_reclaim_tried_regions, ulong, 0400); | |
204 | ||
205 | /* | |
206 | * Total bytes of memory regions that tried to be reclaimed. | |
207 | */ | |
208 | static unsigned long bytes_reclaim_tried_regions __read_mostly; | |
209 | module_param(bytes_reclaim_tried_regions, ulong, 0400); | |
210 | ||
211 | /* | |
212 | * Number of memory regions that successfully be reclaimed. | |
213 | */ | |
214 | static unsigned long nr_reclaimed_regions __read_mostly; | |
215 | module_param(nr_reclaimed_regions, ulong, 0400); | |
216 | ||
217 | /* | |
218 | * Total bytes of memory regions that successfully be reclaimed. | |
219 | */ | |
220 | static unsigned long bytes_reclaimed_regions __read_mostly; | |
221 | module_param(bytes_reclaimed_regions, ulong, 0400); | |
222 | ||
223 | /* | |
224 | * Number of times that the time/space quota limits have exceeded | |
225 | */ | |
226 | static unsigned long nr_quota_exceeds __read_mostly; | |
227 | module_param(nr_quota_exceeds, ulong, 0400); | |
228 | ||
43b0536c SP |
229 | static struct damon_ctx *ctx; |
230 | static struct damon_target *target; | |
231 | ||
232 | struct damon_reclaim_ram_walk_arg { | |
233 | unsigned long start; | |
234 | unsigned long end; | |
235 | }; | |
236 | ||
237 | static int walk_system_ram(struct resource *res, void *arg) | |
238 | { | |
239 | struct damon_reclaim_ram_walk_arg *a = arg; | |
240 | ||
2e14a8d3 | 241 | if (a->end - a->start < resource_size(res)) { |
43b0536c SP |
242 | a->start = res->start; |
243 | a->end = res->end; | |
244 | } | |
245 | return 0; | |
246 | } | |
247 | ||
248 | /* | |
249 | * Find biggest 'System RAM' resource and store its start and end address in | |
250 | * @start and @end, respectively. If no System RAM is found, returns false. | |
251 | */ | |
252 | static bool get_monitoring_region(unsigned long *start, unsigned long *end) | |
253 | { | |
254 | struct damon_reclaim_ram_walk_arg arg = {}; | |
255 | ||
256 | walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); | |
257 | if (arg.end <= arg.start) | |
258 | return false; | |
259 | ||
260 | *start = arg.start; | |
261 | *end = arg.end; | |
262 | return true; | |
263 | } | |
264 | ||
265 | static struct damos *damon_reclaim_new_scheme(void) | |
266 | { | |
267 | struct damos_watermarks wmarks = { | |
268 | .metric = DAMOS_WMARK_FREE_MEM_RATE, | |
269 | .interval = wmarks_interval, | |
270 | .high = wmarks_high, | |
271 | .mid = wmarks_mid, | |
272 | .low = wmarks_low, | |
273 | }; | |
274 | struct damos_quota quota = { | |
275 | /* | |
276 | * Do not try reclamation for more than quota_ms milliseconds | |
277 | * or quota_sz bytes within quota_reset_interval_ms. | |
278 | */ | |
279 | .ms = quota_ms, | |
280 | .sz = quota_sz, | |
281 | .reset_interval = quota_reset_interval_ms, | |
282 | /* Within the quota, page out older regions first. */ | |
283 | .weight_sz = 0, | |
284 | .weight_nr_accesses = 0, | |
285 | .weight_age = 1 | |
286 | }; | |
287 | struct damos *scheme = damon_new_scheme( | |
288 | /* Find regions having PAGE_SIZE or larger size */ | |
289 | PAGE_SIZE, ULONG_MAX, | |
290 | /* and not accessed at all */ | |
291 | 0, 0, | |
292 | /* for min_age or more micro-seconds, and */ | |
293 | min_age / aggr_interval, UINT_MAX, | |
294 | /* page out those, as soon as found */ | |
295 | DAMOS_PAGEOUT, | |
296 | /* under the quota. */ | |
297 | "a, | |
298 | /* (De)activate this according to the watermarks. */ | |
299 | &wmarks); | |
300 | ||
301 | return scheme; | |
302 | } | |
303 | ||
e035c280 | 304 | static int damon_reclaim_apply_parameters(void) |
43b0536c | 305 | { |
43b0536c | 306 | struct damos *scheme; |
e035c280 SP |
307 | struct damon_addr_range addr_range; |
308 | int err = 0; | |
43b0536c SP |
309 | |
310 | err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, | |
311 | min_nr_regions, max_nr_regions); | |
312 | if (err) | |
313 | return err; | |
314 | ||
e035c280 SP |
315 | /* Will be freed by next 'damon_set_schemes()' below */ |
316 | scheme = damon_reclaim_new_scheme(); | |
317 | if (!scheme) | |
318 | return -ENOMEM; | |
319 | err = damon_set_schemes(ctx, &scheme, 1); | |
320 | if (err) | |
321 | return err; | |
322 | ||
43b0536c SP |
323 | if (monitor_region_start > monitor_region_end) |
324 | return -EINVAL; | |
325 | if (!monitor_region_start && !monitor_region_end && | |
326 | !get_monitoring_region(&monitor_region_start, | |
327 | &monitor_region_end)) | |
328 | return -EINVAL; | |
e035c280 SP |
329 | addr_range.start = monitor_region_start; |
330 | addr_range.end = monitor_region_end; | |
331 | return damon_set_regions(target, &addr_range, 1); | |
332 | } | |
43b0536c | 333 | |
e035c280 SP |
334 | static int damon_reclaim_turn(bool on) |
335 | { | |
336 | int err; | |
337 | ||
338 | if (!on) { | |
339 | err = damon_stop(&ctx, 1); | |
340 | if (!err) | |
341 | kdamond_pid = -1; | |
342 | return err; | |
43b0536c | 343 | } |
e035c280 SP |
344 | |
345 | err = damon_reclaim_apply_parameters(); | |
43b0536c | 346 | if (err) |
e035c280 | 347 | return err; |
43b0536c | 348 | |
8b9b0d33 | 349 | err = damon_start(&ctx, 1, true); |
e035c280 SP |
350 | if (err) |
351 | return err; | |
352 | kdamond_pid = ctx->kdamond->pid; | |
353 | return 0; | |
43b0536c SP |
354 | } |
355 | ||
356 | #define ENABLE_CHECK_INTERVAL_MS 1000 | |
357 | static struct delayed_work damon_reclaim_timer; | |
358 | static void damon_reclaim_timer_fn(struct work_struct *work) | |
359 | { | |
360 | static bool last_enabled; | |
361 | bool now_enabled; | |
362 | ||
363 | now_enabled = enabled; | |
364 | if (last_enabled != now_enabled) { | |
365 | if (!damon_reclaim_turn(now_enabled)) | |
366 | last_enabled = now_enabled; | |
367 | else | |
368 | enabled = last_enabled; | |
369 | } | |
370 | ||
059342d1 HT |
371 | if (enabled) |
372 | schedule_delayed_work(&damon_reclaim_timer, | |
43b0536c SP |
373 | msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS)); |
374 | } | |
375 | static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); | |
376 | ||
29492829 SP |
377 | static bool damon_reclaim_initialized; |
378 | ||
059342d1 HT |
379 | static int enabled_store(const char *val, |
380 | const struct kernel_param *kp) | |
381 | { | |
382 | int rc = param_set_bool(val, kp); | |
383 | ||
384 | if (rc < 0) | |
385 | return rc; | |
386 | ||
29492829 SP |
387 | /* system_wq might not initialized yet */ |
388 | if (!damon_reclaim_initialized) | |
389 | return rc; | |
390 | ||
059342d1 HT |
391 | if (enabled) |
392 | schedule_delayed_work(&damon_reclaim_timer, 0); | |
393 | ||
394 | return 0; | |
395 | } | |
396 | ||
397 | static const struct kernel_param_ops enabled_param_ops = { | |
398 | .set = enabled_store, | |
399 | .get = param_get_bool, | |
400 | }; | |
401 | ||
402 | module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); | |
403 | MODULE_PARM_DESC(enabled, | |
404 | "Enable or disable DAMON_RECLAIM (default: disabled)"); | |
405 | ||
60e52e7c SP |
406 | static int damon_reclaim_after_aggregation(struct damon_ctx *c) |
407 | { | |
408 | struct damos *s; | |
e035c280 | 409 | int err = 0; |
60e52e7c SP |
410 | |
411 | /* update the stats parameter */ | |
412 | damon_for_each_scheme(s, c) { | |
413 | nr_reclaim_tried_regions = s->stat.nr_tried; | |
414 | bytes_reclaim_tried_regions = s->stat.sz_tried; | |
415 | nr_reclaimed_regions = s->stat.nr_applied; | |
416 | bytes_reclaimed_regions = s->stat.sz_applied; | |
417 | nr_quota_exceeds = s->stat.qt_exceeds; | |
418 | } | |
e035c280 SP |
419 | |
420 | if (commit_inputs) { | |
421 | err = damon_reclaim_apply_parameters(); | |
422 | commit_inputs = false; | |
423 | } | |
424 | return err; | |
425 | } | |
426 | ||
427 | static int damon_reclaim_after_wmarks_check(struct damon_ctx *c) | |
428 | { | |
429 | int err = 0; | |
430 | ||
431 | if (commit_inputs) { | |
432 | err = damon_reclaim_apply_parameters(); | |
433 | commit_inputs = false; | |
434 | } | |
435 | return err; | |
60e52e7c SP |
436 | } |
437 | ||
43b0536c SP |
438 | static int __init damon_reclaim_init(void) |
439 | { | |
440 | ctx = damon_new_ctx(); | |
441 | if (!ctx) | |
442 | return -ENOMEM; | |
443 | ||
4d69c345 SP |
444 | if (damon_select_ops(ctx, DAMON_OPS_PADDR)) |
445 | return -EINVAL; | |
446 | ||
e035c280 | 447 | ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check; |
60e52e7c | 448 | ctx->callback.after_aggregation = damon_reclaim_after_aggregation; |
43b0536c | 449 | |
1971bd63 | 450 | target = damon_new_target(); |
43b0536c SP |
451 | if (!target) { |
452 | damon_destroy_ctx(ctx); | |
453 | return -ENOMEM; | |
454 | } | |
455 | damon_add_target(ctx, target); | |
456 | ||
457 | schedule_delayed_work(&damon_reclaim_timer, 0); | |
29492829 SP |
458 | |
459 | damon_reclaim_initialized = true; | |
43b0536c SP |
460 | return 0; |
461 | } | |
462 | ||
463 | module_init(damon_reclaim_init); |