Commit | Line | Data |
---|---|---|
43b0536c SP |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * DAMON-based page reclamation | |
4 | * | |
5 | * Author: SeongJae Park <sj@kernel.org> | |
6 | */ | |
7 | ||
8 | #define pr_fmt(fmt) "damon-reclaim: " fmt | |
9 | ||
10 | #include <linux/damon.h> | |
11 | #include <linux/ioport.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/sched.h> | |
14 | #include <linux/workqueue.h> | |
15 | ||
16 | #ifdef MODULE_PARAM_PREFIX | |
17 | #undef MODULE_PARAM_PREFIX | |
18 | #endif | |
19 | #define MODULE_PARAM_PREFIX "damon_reclaim." | |
20 | ||
21 | /* | |
22 | * Enable or disable DAMON_RECLAIM. | |
23 | * | |
24 | * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. | |
25 | * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could | |
26 | * do no real monitoring and reclamation due to the watermarks-based activation | |
27 | * condition. Refer to below descriptions for the watermarks parameter for | |
28 | * this. | |
29 | */ | |
30 | static bool enabled __read_mostly; | |
43b0536c | 31 | |
e035c280 SP |
32 | /* |
33 | * Make DAMON_RECLAIM reads the input parameters again, except ``enabled``. | |
34 | * | |
35 | * Input parameters that updated while DAMON_RECLAIM is running are not applied | |
36 | * by default. Once this parameter is set as ``Y``, DAMON_RECLAIM reads values | |
37 | * of parametrs except ``enabled`` again. Once the re-reading is done, this | |
38 | * parameter is set as ``N``. If invalid parameters are found while the | |
39 | * re-reading, DAMON_RECLAIM will be disabled. | |
40 | */ | |
41 | static bool commit_inputs __read_mostly; | |
42 | module_param(commit_inputs, bool, 0600); | |
43 | ||
43b0536c SP |
44 | /* |
45 | * Time threshold for cold memory regions identification in microseconds. | |
46 | * | |
47 | * If a memory region is not accessed for this or longer time, DAMON_RECLAIM | |
48 | * identifies the region as cold, and reclaims. 120 seconds by default. | |
49 | */ | |
50 | static unsigned long min_age __read_mostly = 120000000; | |
51 | module_param(min_age, ulong, 0600); | |
52 | ||
53 | /* | |
54 | * Limit of time for trying the reclamation in milliseconds. | |
55 | * | |
56 | * DAMON_RECLAIM tries to use only up to this time within a time window | |
57 | * (quota_reset_interval_ms) for trying reclamation of cold pages. This can be | |
58 | * used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, | |
59 | * the limit is disabled. | |
60 | * | |
61 | * 10 ms by default. | |
62 | */ | |
63 | static unsigned long quota_ms __read_mostly = 10; | |
64 | module_param(quota_ms, ulong, 0600); | |
65 | ||
66 | /* | |
67 | * Limit of size of memory for the reclamation in bytes. | |
68 | * | |
69 | * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a | |
70 | * time window (quota_reset_interval_ms) and makes no more than this limit is | |
71 | * tried. This can be used for limiting consumption of CPU and IO. If this | |
72 | * value is zero, the limit is disabled. | |
73 | * | |
74 | * 128 MiB by default. | |
75 | */ | |
76 | static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024; | |
77 | module_param(quota_sz, ulong, 0600); | |
78 | ||
79 | /* | |
80 | * The time/size quota charge reset interval in milliseconds. | |
81 | * | |
82 | * The charge reset interval for the quota of time (quota_ms) and size | |
83 | * (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than | |
84 | * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms | |
85 | * milliseconds. | |
86 | * | |
87 | * 1 second by default. | |
88 | */ | |
89 | static unsigned long quota_reset_interval_ms __read_mostly = 1000; | |
90 | module_param(quota_reset_interval_ms, ulong, 0600); | |
91 | ||
92 | /* | |
93 | * The watermarks check time interval in microseconds. | |
94 | * | |
95 | * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is | |
96 | * enabled but inactive due to its watermarks rule. 5 seconds by default. | |
97 | */ | |
98 | static unsigned long wmarks_interval __read_mostly = 5000000; | |
99 | module_param(wmarks_interval, ulong, 0600); | |
100 | ||
101 | /* | |
102 | * Free memory rate (per thousand) for the high watermark. | |
103 | * | |
104 | * If free memory of the system in bytes per thousand bytes is higher than | |
105 | * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically | |
106 | * checks the watermarks. 500 (50%) by default. | |
107 | */ | |
108 | static unsigned long wmarks_high __read_mostly = 500; | |
109 | module_param(wmarks_high, ulong, 0600); | |
110 | ||
111 | /* | |
112 | * Free memory rate (per thousand) for the middle watermark. | |
113 | * | |
114 | * If free memory of the system in bytes per thousand bytes is between this and | |
115 | * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring | |
116 | * and the reclaiming. 400 (40%) by default. | |
117 | */ | |
118 | static unsigned long wmarks_mid __read_mostly = 400; | |
119 | module_param(wmarks_mid, ulong, 0600); | |
120 | ||
121 | /* | |
122 | * Free memory rate (per thousand) for the low watermark. | |
123 | * | |
124 | * If free memory of the system in bytes per thousand bytes is lower than this, | |
125 | * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks | |
126 | * the watermarks. In the case, the system falls back to the LRU-based page | |
127 | * granularity reclamation logic. 200 (20%) by default. | |
128 | */ | |
129 | static unsigned long wmarks_low __read_mostly = 200; | |
130 | module_param(wmarks_low, ulong, 0600); | |
131 | ||
132 | /* | |
133 | * Sampling interval for the monitoring in microseconds. | |
134 | * | |
135 | * The sampling interval of DAMON for the cold memory monitoring. Please refer | |
136 | * to the DAMON documentation for more detail. 5 ms by default. | |
137 | */ | |
138 | static unsigned long sample_interval __read_mostly = 5000; | |
139 | module_param(sample_interval, ulong, 0600); | |
140 | ||
141 | /* | |
142 | * Aggregation interval for the monitoring in microseconds. | |
143 | * | |
144 | * The aggregation interval of DAMON for the cold memory monitoring. Please | |
145 | * refer to the DAMON documentation for more detail. 100 ms by default. | |
146 | */ | |
147 | static unsigned long aggr_interval __read_mostly = 100000; | |
148 | module_param(aggr_interval, ulong, 0600); | |
149 | ||
150 | /* | |
151 | * Minimum number of monitoring regions. | |
152 | * | |
153 | * The minimal number of monitoring regions of DAMON for the cold memory | |
154 | * monitoring. This can be used to set lower-bound of the monitoring quality. | |
155 | * But, setting this too high could result in increased monitoring overhead. | |
156 | * Please refer to the DAMON documentation for more detail. 10 by default. | |
157 | */ | |
158 | static unsigned long min_nr_regions __read_mostly = 10; | |
159 | module_param(min_nr_regions, ulong, 0600); | |
160 | ||
161 | /* | |
162 | * Maximum number of monitoring regions. | |
163 | * | |
164 | * The maximum number of monitoring regions of DAMON for the cold memory | |
165 | * monitoring. This can be used to set upper-bound of the monitoring overhead. | |
166 | * However, setting this too low could result in bad monitoring quality. | |
167 | * Please refer to the DAMON documentation for more detail. 1000 by default. | |
168 | */ | |
169 | static unsigned long max_nr_regions __read_mostly = 1000; | |
170 | module_param(max_nr_regions, ulong, 0600); | |
171 | ||
172 | /* | |
173 | * Start of the target memory region in physical address. | |
174 | * | |
175 | * The start physical address of memory region that DAMON_RECLAIM will do work | |
176 | * against. By default, biggest System RAM is used as the region. | |
177 | */ | |
178 | static unsigned long monitor_region_start __read_mostly; | |
179 | module_param(monitor_region_start, ulong, 0600); | |
180 | ||
181 | /* | |
182 | * End of the target memory region in physical address. | |
183 | * | |
184 | * The end physical address of memory region that DAMON_RECLAIM will do work | |
185 | * against. By default, biggest System RAM is used as the region. | |
186 | */ | |
187 | static unsigned long monitor_region_end __read_mostly; | |
188 | module_param(monitor_region_end, ulong, 0600); | |
189 | ||
190 | /* | |
191 | * PID of the DAMON thread | |
192 | * | |
193 | * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. | |
194 | * Else, -1. | |
195 | */ | |
196 | static int kdamond_pid __read_mostly = -1; | |
197 | module_param(kdamond_pid, int, 0400); | |
198 | ||
60e52e7c SP |
199 | /* |
200 | * Number of memory regions that tried to be reclaimed. | |
201 | */ | |
202 | static unsigned long nr_reclaim_tried_regions __read_mostly; | |
203 | module_param(nr_reclaim_tried_regions, ulong, 0400); | |
204 | ||
205 | /* | |
206 | * Total bytes of memory regions that tried to be reclaimed. | |
207 | */ | |
208 | static unsigned long bytes_reclaim_tried_regions __read_mostly; | |
209 | module_param(bytes_reclaim_tried_regions, ulong, 0400); | |
210 | ||
211 | /* | |
212 | * Number of memory regions that successfully be reclaimed. | |
213 | */ | |
214 | static unsigned long nr_reclaimed_regions __read_mostly; | |
215 | module_param(nr_reclaimed_regions, ulong, 0400); | |
216 | ||
217 | /* | |
218 | * Total bytes of memory regions that successfully be reclaimed. | |
219 | */ | |
220 | static unsigned long bytes_reclaimed_regions __read_mostly; | |
221 | module_param(bytes_reclaimed_regions, ulong, 0400); | |
222 | ||
223 | /* | |
224 | * Number of times that the time/space quota limits have exceeded | |
225 | */ | |
226 | static unsigned long nr_quota_exceeds __read_mostly; | |
227 | module_param(nr_quota_exceeds, ulong, 0400); | |
228 | ||
43b0536c SP |
229 | static struct damon_ctx *ctx; |
230 | static struct damon_target *target; | |
231 | ||
232 | struct damon_reclaim_ram_walk_arg { | |
233 | unsigned long start; | |
234 | unsigned long end; | |
235 | }; | |
236 | ||
237 | static int walk_system_ram(struct resource *res, void *arg) | |
238 | { | |
239 | struct damon_reclaim_ram_walk_arg *a = arg; | |
240 | ||
2e14a8d3 | 241 | if (a->end - a->start < resource_size(res)) { |
43b0536c SP |
242 | a->start = res->start; |
243 | a->end = res->end; | |
244 | } | |
245 | return 0; | |
246 | } | |
247 | ||
248 | /* | |
249 | * Find biggest 'System RAM' resource and store its start and end address in | |
250 | * @start and @end, respectively. If no System RAM is found, returns false. | |
251 | */ | |
252 | static bool get_monitoring_region(unsigned long *start, unsigned long *end) | |
253 | { | |
254 | struct damon_reclaim_ram_walk_arg arg = {}; | |
255 | ||
256 | walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); | |
257 | if (arg.end <= arg.start) | |
258 | return false; | |
259 | ||
260 | *start = arg.start; | |
261 | *end = arg.end; | |
262 | return true; | |
263 | } | |
264 | ||
265 | static struct damos *damon_reclaim_new_scheme(void) | |
266 | { | |
f5a79d7c YD |
267 | struct damos_access_pattern pattern = { |
268 | /* Find regions having PAGE_SIZE or larger size */ | |
269 | .min_sz_region = PAGE_SIZE, | |
270 | .max_sz_region = ULONG_MAX, | |
271 | /* and not accessed at all */ | |
272 | .min_nr_accesses = 0, | |
273 | .max_nr_accesses = 0, | |
274 | /* for min_age or more micro-seconds */ | |
275 | .min_age_region = min_age / aggr_interval, | |
276 | .max_age_region = UINT_MAX, | |
277 | }; | |
43b0536c SP |
278 | struct damos_watermarks wmarks = { |
279 | .metric = DAMOS_WMARK_FREE_MEM_RATE, | |
280 | .interval = wmarks_interval, | |
281 | .high = wmarks_high, | |
282 | .mid = wmarks_mid, | |
283 | .low = wmarks_low, | |
284 | }; | |
285 | struct damos_quota quota = { | |
286 | /* | |
287 | * Do not try reclamation for more than quota_ms milliseconds | |
288 | * or quota_sz bytes within quota_reset_interval_ms. | |
289 | */ | |
290 | .ms = quota_ms, | |
291 | .sz = quota_sz, | |
292 | .reset_interval = quota_reset_interval_ms, | |
293 | /* Within the quota, page out older regions first. */ | |
294 | .weight_sz = 0, | |
295 | .weight_nr_accesses = 0, | |
296 | .weight_age = 1 | |
297 | }; | |
f5a79d7c YD |
298 | |
299 | return damon_new_scheme( | |
300 | &pattern, | |
43b0536c SP |
301 | /* page out those, as soon as found */ |
302 | DAMOS_PAGEOUT, | |
303 | /* under the quota. */ | |
304 | "a, | |
305 | /* (De)activate this according to the watermarks. */ | |
306 | &wmarks); | |
43b0536c SP |
307 | } |
308 | ||
e035c280 | 309 | static int damon_reclaim_apply_parameters(void) |
43b0536c | 310 | { |
43b0536c | 311 | struct damos *scheme; |
e035c280 SP |
312 | struct damon_addr_range addr_range; |
313 | int err = 0; | |
43b0536c SP |
314 | |
315 | err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, | |
316 | min_nr_regions, max_nr_regions); | |
317 | if (err) | |
318 | return err; | |
319 | ||
e035c280 SP |
320 | /* Will be freed by next 'damon_set_schemes()' below */ |
321 | scheme = damon_reclaim_new_scheme(); | |
322 | if (!scheme) | |
323 | return -ENOMEM; | |
324 | err = damon_set_schemes(ctx, &scheme, 1); | |
325 | if (err) | |
326 | return err; | |
327 | ||
43b0536c SP |
328 | if (monitor_region_start > monitor_region_end) |
329 | return -EINVAL; | |
330 | if (!monitor_region_start && !monitor_region_end && | |
331 | !get_monitoring_region(&monitor_region_start, | |
332 | &monitor_region_end)) | |
333 | return -EINVAL; | |
e035c280 SP |
334 | addr_range.start = monitor_region_start; |
335 | addr_range.end = monitor_region_end; | |
336 | return damon_set_regions(target, &addr_range, 1); | |
337 | } | |
43b0536c | 338 | |
e035c280 SP |
339 | static int damon_reclaim_turn(bool on) |
340 | { | |
341 | int err; | |
342 | ||
343 | if (!on) { | |
344 | err = damon_stop(&ctx, 1); | |
345 | if (!err) | |
346 | kdamond_pid = -1; | |
347 | return err; | |
43b0536c | 348 | } |
e035c280 SP |
349 | |
350 | err = damon_reclaim_apply_parameters(); | |
43b0536c | 351 | if (err) |
e035c280 | 352 | return err; |
43b0536c | 353 | |
8b9b0d33 | 354 | err = damon_start(&ctx, 1, true); |
e035c280 SP |
355 | if (err) |
356 | return err; | |
357 | kdamond_pid = ctx->kdamond->pid; | |
358 | return 0; | |
43b0536c SP |
359 | } |
360 | ||
43b0536c SP |
361 | static struct delayed_work damon_reclaim_timer; |
362 | static void damon_reclaim_timer_fn(struct work_struct *work) | |
363 | { | |
364 | static bool last_enabled; | |
365 | bool now_enabled; | |
366 | ||
367 | now_enabled = enabled; | |
368 | if (last_enabled != now_enabled) { | |
369 | if (!damon_reclaim_turn(now_enabled)) | |
370 | last_enabled = now_enabled; | |
371 | else | |
372 | enabled = last_enabled; | |
373 | } | |
43b0536c SP |
374 | } |
375 | static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); | |
376 | ||
29492829 SP |
377 | static bool damon_reclaim_initialized; |
378 | ||
d79905c7 | 379 | static int damon_reclaim_enabled_store(const char *val, |
059342d1 HT |
380 | const struct kernel_param *kp) |
381 | { | |
382 | int rc = param_set_bool(val, kp); | |
383 | ||
384 | if (rc < 0) | |
385 | return rc; | |
386 | ||
29492829 SP |
387 | /* system_wq might not initialized yet */ |
388 | if (!damon_reclaim_initialized) | |
389 | return rc; | |
390 | ||
f943e7e3 | 391 | schedule_delayed_work(&damon_reclaim_timer, 0); |
059342d1 HT |
392 | return 0; |
393 | } | |
394 | ||
395 | static const struct kernel_param_ops enabled_param_ops = { | |
d79905c7 | 396 | .set = damon_reclaim_enabled_store, |
059342d1 HT |
397 | .get = param_get_bool, |
398 | }; | |
399 | ||
400 | module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); | |
401 | MODULE_PARM_DESC(enabled, | |
402 | "Enable or disable DAMON_RECLAIM (default: disabled)"); | |
403 | ||
f25ab3bd SP |
404 | static int damon_reclaim_handle_commit_inputs(void) |
405 | { | |
406 | int err; | |
407 | ||
408 | if (!commit_inputs) | |
409 | return 0; | |
410 | ||
411 | err = damon_reclaim_apply_parameters(); | |
412 | commit_inputs = false; | |
413 | return err; | |
414 | } | |
415 | ||
60e52e7c SP |
416 | static int damon_reclaim_after_aggregation(struct damon_ctx *c) |
417 | { | |
418 | struct damos *s; | |
419 | ||
420 | /* update the stats parameter */ | |
421 | damon_for_each_scheme(s, c) { | |
422 | nr_reclaim_tried_regions = s->stat.nr_tried; | |
423 | bytes_reclaim_tried_regions = s->stat.sz_tried; | |
424 | nr_reclaimed_regions = s->stat.nr_applied; | |
425 | bytes_reclaimed_regions = s->stat.sz_applied; | |
426 | nr_quota_exceeds = s->stat.qt_exceeds; | |
427 | } | |
e035c280 | 428 | |
f25ab3bd | 429 | return damon_reclaim_handle_commit_inputs(); |
e035c280 SP |
430 | } |
431 | ||
432 | static int damon_reclaim_after_wmarks_check(struct damon_ctx *c) | |
433 | { | |
f25ab3bd | 434 | return damon_reclaim_handle_commit_inputs(); |
60e52e7c SP |
435 | } |
436 | ||
43b0536c SP |
437 | static int __init damon_reclaim_init(void) |
438 | { | |
439 | ctx = damon_new_ctx(); | |
440 | if (!ctx) | |
441 | return -ENOMEM; | |
442 | ||
188043c7 JN |
443 | if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { |
444 | damon_destroy_ctx(ctx); | |
4d69c345 | 445 | return -EINVAL; |
188043c7 | 446 | } |
4d69c345 | 447 | |
e035c280 | 448 | ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check; |
60e52e7c | 449 | ctx->callback.after_aggregation = damon_reclaim_after_aggregation; |
43b0536c | 450 | |
1971bd63 | 451 | target = damon_new_target(); |
43b0536c SP |
452 | if (!target) { |
453 | damon_destroy_ctx(ctx); | |
454 | return -ENOMEM; | |
455 | } | |
456 | damon_add_target(ctx, target); | |
457 | ||
458 | schedule_delayed_work(&damon_reclaim_timer, 0); | |
29492829 SP |
459 | |
460 | damon_reclaim_initialized = true; | |
43b0536c SP |
461 | return 0; |
462 | } | |
463 | ||
464 | module_init(damon_reclaim_init); |