Commit | Line | Data |
---|---|---|
2224d848 SP |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Data Access Monitor | |
4 | * | |
5 | * Author: SeongJae Park <sjpark@amazon.de> | |
6 | */ | |
7 | ||
8 | #define pr_fmt(fmt) "damon: " fmt | |
9 | ||
10 | #include <linux/damon.h> | |
11 | #include <linux/delay.h> | |
12 | #include <linux/kthread.h> | |
ee801b7d | 13 | #include <linux/mm.h> |
2224d848 | 14 | #include <linux/slab.h> |
38683e00 | 15 | #include <linux/string.h> |
2224d848 | 16 | |
2fcb9362 SP |
17 | #define CREATE_TRACE_POINTS |
18 | #include <trace/events/damon.h> | |
19 | ||
17ccae8b SP |
20 | #ifdef CONFIG_DAMON_KUNIT_TEST |
21 | #undef DAMON_MIN_REGION | |
22 | #define DAMON_MIN_REGION 1 | |
23 | #endif | |
24 | ||
2224d848 SP |
25 | static DEFINE_MUTEX(damon_lock); |
26 | static int nr_running_ctxs; | |
27 | ||
f23b8eee SP |
28 | /* |
29 | * Construct a damon_region struct | |
30 | * | |
31 | * Returns the pointer to the new struct if success, or NULL otherwise | |
32 | */ | |
33 | struct damon_region *damon_new_region(unsigned long start, unsigned long end) | |
34 | { | |
35 | struct damon_region *region; | |
36 | ||
37 | region = kmalloc(sizeof(*region), GFP_KERNEL); | |
38 | if (!region) | |
39 | return NULL; | |
40 | ||
41 | region->ar.start = start; | |
42 | region->ar.end = end; | |
43 | region->nr_accesses = 0; | |
44 | INIT_LIST_HEAD(®ion->list); | |
45 | ||
fda504fa SP |
46 | region->age = 0; |
47 | region->last_nr_accesses = 0; | |
48 | ||
f23b8eee SP |
49 | return region; |
50 | } | |
51 | ||
f23b8eee SP |
52 | void damon_add_region(struct damon_region *r, struct damon_target *t) |
53 | { | |
54 | list_add_tail(&r->list, &t->regions_list); | |
b9a6ac4e | 55 | t->nr_regions++; |
f23b8eee SP |
56 | } |
57 | ||
b9a6ac4e | 58 | static void damon_del_region(struct damon_region *r, struct damon_target *t) |
f23b8eee SP |
59 | { |
60 | list_del(&r->list); | |
b9a6ac4e | 61 | t->nr_regions--; |
f23b8eee SP |
62 | } |
63 | ||
64 | static void damon_free_region(struct damon_region *r) | |
65 | { | |
66 | kfree(r); | |
67 | } | |
68 | ||
b9a6ac4e | 69 | void damon_destroy_region(struct damon_region *r, struct damon_target *t) |
f23b8eee | 70 | { |
b9a6ac4e | 71 | damon_del_region(r, t); |
f23b8eee SP |
72 | damon_free_region(r); |
73 | } | |
74 | ||
1f366e42 SP |
75 | struct damos *damon_new_scheme( |
76 | unsigned long min_sz_region, unsigned long max_sz_region, | |
77 | unsigned int min_nr_accesses, unsigned int max_nr_accesses, | |
78 | unsigned int min_age_region, unsigned int max_age_region, | |
ee801b7d SP |
79 | enum damos_action action, struct damos_quota *quota, |
80 | struct damos_watermarks *wmarks) | |
1f366e42 SP |
81 | { |
82 | struct damos *scheme; | |
83 | ||
84 | scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); | |
85 | if (!scheme) | |
86 | return NULL; | |
87 | scheme->min_sz_region = min_sz_region; | |
88 | scheme->max_sz_region = max_sz_region; | |
89 | scheme->min_nr_accesses = min_nr_accesses; | |
90 | scheme->max_nr_accesses = max_nr_accesses; | |
91 | scheme->min_age_region = min_age_region; | |
92 | scheme->max_age_region = max_age_region; | |
93 | scheme->action = action; | |
0e92c2ee | 94 | scheme->stat = (struct damos_stat){}; |
1f366e42 SP |
95 | INIT_LIST_HEAD(&scheme->list); |
96 | ||
1cd24303 | 97 | scheme->quota.ms = quota->ms; |
2b8a248d SP |
98 | scheme->quota.sz = quota->sz; |
99 | scheme->quota.reset_interval = quota->reset_interval; | |
38683e00 SP |
100 | scheme->quota.weight_sz = quota->weight_sz; |
101 | scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; | |
102 | scheme->quota.weight_age = quota->weight_age; | |
1cd24303 SP |
103 | scheme->quota.total_charged_sz = 0; |
104 | scheme->quota.total_charged_ns = 0; | |
105 | scheme->quota.esz = 0; | |
2b8a248d SP |
106 | scheme->quota.charged_sz = 0; |
107 | scheme->quota.charged_from = 0; | |
50585192 SP |
108 | scheme->quota.charge_target_from = NULL; |
109 | scheme->quota.charge_addr_from = 0; | |
2b8a248d | 110 | |
ee801b7d SP |
111 | scheme->wmarks.metric = wmarks->metric; |
112 | scheme->wmarks.interval = wmarks->interval; | |
113 | scheme->wmarks.high = wmarks->high; | |
114 | scheme->wmarks.mid = wmarks->mid; | |
115 | scheme->wmarks.low = wmarks->low; | |
116 | scheme->wmarks.activated = true; | |
117 | ||
1f366e42 SP |
118 | return scheme; |
119 | } | |
120 | ||
121 | void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) | |
122 | { | |
123 | list_add_tail(&s->list, &ctx->schemes); | |
124 | } | |
125 | ||
126 | static void damon_del_scheme(struct damos *s) | |
127 | { | |
128 | list_del(&s->list); | |
129 | } | |
130 | ||
131 | static void damon_free_scheme(struct damos *s) | |
132 | { | |
133 | kfree(s); | |
134 | } | |
135 | ||
136 | void damon_destroy_scheme(struct damos *s) | |
137 | { | |
138 | damon_del_scheme(s); | |
139 | damon_free_scheme(s); | |
140 | } | |
141 | ||
f23b8eee SP |
142 | /* |
143 | * Construct a damon_target struct | |
144 | * | |
145 | * Returns the pointer to the new struct if success, or NULL otherwise | |
146 | */ | |
1971bd63 | 147 | struct damon_target *damon_new_target(void) |
f23b8eee SP |
148 | { |
149 | struct damon_target *t; | |
150 | ||
151 | t = kmalloc(sizeof(*t), GFP_KERNEL); | |
152 | if (!t) | |
153 | return NULL; | |
154 | ||
1971bd63 | 155 | t->pid = NULL; |
b9a6ac4e | 156 | t->nr_regions = 0; |
f23b8eee SP |
157 | INIT_LIST_HEAD(&t->regions_list); |
158 | ||
159 | return t; | |
160 | } | |
161 | ||
162 | void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) | |
163 | { | |
b9a6ac4e | 164 | list_add_tail(&t->list, &ctx->adaptive_targets); |
f23b8eee SP |
165 | } |
166 | ||
b5ca3e83 XH |
167 | bool damon_targets_empty(struct damon_ctx *ctx) |
168 | { | |
169 | return list_empty(&ctx->adaptive_targets); | |
170 | } | |
171 | ||
f23b8eee SP |
172 | static void damon_del_target(struct damon_target *t) |
173 | { | |
174 | list_del(&t->list); | |
175 | } | |
176 | ||
177 | void damon_free_target(struct damon_target *t) | |
178 | { | |
179 | struct damon_region *r, *next; | |
180 | ||
181 | damon_for_each_region_safe(r, next, t) | |
182 | damon_free_region(r); | |
183 | kfree(t); | |
184 | } | |
185 | ||
186 | void damon_destroy_target(struct damon_target *t) | |
187 | { | |
188 | damon_del_target(t); | |
189 | damon_free_target(t); | |
190 | } | |
191 | ||
b9a6ac4e SP |
192 | unsigned int damon_nr_regions(struct damon_target *t) |
193 | { | |
194 | return t->nr_regions; | |
195 | } | |
196 | ||
2224d848 SP |
197 | struct damon_ctx *damon_new_ctx(void) |
198 | { | |
199 | struct damon_ctx *ctx; | |
200 | ||
201 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | |
202 | if (!ctx) | |
203 | return NULL; | |
204 | ||
205 | ctx->sample_interval = 5 * 1000; | |
206 | ctx->aggr_interval = 100 * 1000; | |
207 | ctx->primitive_update_interval = 60 * 1000 * 1000; | |
208 | ||
209 | ktime_get_coarse_ts64(&ctx->last_aggregation); | |
210 | ctx->last_primitive_update = ctx->last_aggregation; | |
211 | ||
212 | mutex_init(&ctx->kdamond_lock); | |
213 | ||
b9a6ac4e SP |
214 | ctx->min_nr_regions = 10; |
215 | ctx->max_nr_regions = 1000; | |
216 | ||
217 | INIT_LIST_HEAD(&ctx->adaptive_targets); | |
1f366e42 | 218 | INIT_LIST_HEAD(&ctx->schemes); |
2224d848 SP |
219 | |
220 | return ctx; | |
221 | } | |
222 | ||
f23b8eee | 223 | static void damon_destroy_targets(struct damon_ctx *ctx) |
2224d848 | 224 | { |
f23b8eee SP |
225 | struct damon_target *t, *next_t; |
226 | ||
227 | if (ctx->primitive.cleanup) { | |
2224d848 | 228 | ctx->primitive.cleanup(ctx); |
f23b8eee SP |
229 | return; |
230 | } | |
231 | ||
232 | damon_for_each_target_safe(t, next_t, ctx) | |
233 | damon_destroy_target(t); | |
234 | } | |
235 | ||
236 | void damon_destroy_ctx(struct damon_ctx *ctx) | |
237 | { | |
1f366e42 SP |
238 | struct damos *s, *next_s; |
239 | ||
f23b8eee | 240 | damon_destroy_targets(ctx); |
1f366e42 SP |
241 | |
242 | damon_for_each_scheme_safe(s, next_s, ctx) | |
243 | damon_destroy_scheme(s); | |
244 | ||
2224d848 SP |
245 | kfree(ctx); |
246 | } | |
247 | ||
248 | /** | |
249 | * damon_set_attrs() - Set attributes for the monitoring. | |
250 | * @ctx: monitoring context | |
251 | * @sample_int: time interval between samplings | |
252 | * @aggr_int: time interval between aggregations | |
253 | * @primitive_upd_int: time interval between monitoring primitive updates | |
b9a6ac4e SP |
254 | * @min_nr_reg: minimal number of regions |
255 | * @max_nr_reg: maximum number of regions | |
2224d848 SP |
256 | * |
257 | * This function should not be called while the kdamond is running. | |
258 | * Every time interval is in micro-seconds. | |
259 | * | |
260 | * Return: 0 on success, negative error code otherwise. | |
261 | */ | |
262 | int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, | |
b9a6ac4e SP |
263 | unsigned long aggr_int, unsigned long primitive_upd_int, |
264 | unsigned long min_nr_reg, unsigned long max_nr_reg) | |
2224d848 | 265 | { |
1afaf5cb | 266 | if (min_nr_reg < 3) |
b9a6ac4e | 267 | return -EINVAL; |
1afaf5cb | 268 | if (min_nr_reg > max_nr_reg) |
b9a6ac4e | 269 | return -EINVAL; |
b9a6ac4e | 270 | |
2224d848 SP |
271 | ctx->sample_interval = sample_int; |
272 | ctx->aggr_interval = aggr_int; | |
273 | ctx->primitive_update_interval = primitive_upd_int; | |
b9a6ac4e SP |
274 | ctx->min_nr_regions = min_nr_reg; |
275 | ctx->max_nr_regions = max_nr_reg; | |
2224d848 SP |
276 | |
277 | return 0; | |
278 | } | |
279 | ||
1f366e42 SP |
280 | /** |
281 | * damon_set_schemes() - Set data access monitoring based operation schemes. | |
282 | * @ctx: monitoring context | |
283 | * @schemes: array of the schemes | |
284 | * @nr_schemes: number of entries in @schemes | |
285 | * | |
286 | * This function should not be called while the kdamond of the context is | |
287 | * running. | |
288 | * | |
289 | * Return: 0 if success, or negative error code otherwise. | |
290 | */ | |
291 | int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, | |
292 | ssize_t nr_schemes) | |
293 | { | |
294 | struct damos *s, *next; | |
295 | ssize_t i; | |
296 | ||
297 | damon_for_each_scheme_safe(s, next, ctx) | |
298 | damon_destroy_scheme(s); | |
299 | for (i = 0; i < nr_schemes; i++) | |
300 | damon_add_scheme(ctx, schemes[i]); | |
301 | return 0; | |
302 | } | |
303 | ||
4bc05954 SP |
304 | /** |
305 | * damon_nr_running_ctxs() - Return number of currently running contexts. | |
306 | */ | |
307 | int damon_nr_running_ctxs(void) | |
308 | { | |
309 | int nr_ctxs; | |
310 | ||
311 | mutex_lock(&damon_lock); | |
312 | nr_ctxs = nr_running_ctxs; | |
313 | mutex_unlock(&damon_lock); | |
314 | ||
315 | return nr_ctxs; | |
316 | } | |
317 | ||
b9a6ac4e SP |
318 | /* Returns the size upper limit for each monitoring region */ |
319 | static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) | |
320 | { | |
321 | struct damon_target *t; | |
322 | struct damon_region *r; | |
323 | unsigned long sz = 0; | |
324 | ||
325 | damon_for_each_target(t, ctx) { | |
326 | damon_for_each_region(r, t) | |
327 | sz += r->ar.end - r->ar.start; | |
328 | } | |
329 | ||
330 | if (ctx->min_nr_regions) | |
331 | sz /= ctx->min_nr_regions; | |
332 | if (sz < DAMON_MIN_REGION) | |
333 | sz = DAMON_MIN_REGION; | |
334 | ||
335 | return sz; | |
336 | } | |
337 | ||
2224d848 SP |
338 | static int kdamond_fn(void *data); |
339 | ||
340 | /* | |
341 | * __damon_start() - Starts monitoring with given context. | |
342 | * @ctx: monitoring context | |
343 | * | |
344 | * This function should be called while damon_lock is hold. | |
345 | * | |
346 | * Return: 0 on success, negative error code otherwise. | |
347 | */ | |
348 | static int __damon_start(struct damon_ctx *ctx) | |
349 | { | |
350 | int err = -EBUSY; | |
351 | ||
352 | mutex_lock(&ctx->kdamond_lock); | |
353 | if (!ctx->kdamond) { | |
354 | err = 0; | |
2224d848 SP |
355 | ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", |
356 | nr_running_ctxs); | |
357 | if (IS_ERR(ctx->kdamond)) { | |
358 | err = PTR_ERR(ctx->kdamond); | |
7ec1992b | 359 | ctx->kdamond = NULL; |
2224d848 SP |
360 | } |
361 | } | |
362 | mutex_unlock(&ctx->kdamond_lock); | |
363 | ||
364 | return err; | |
365 | } | |
366 | ||
367 | /** | |
368 | * damon_start() - Starts the monitorings for a given group of contexts. | |
369 | * @ctxs: an array of the pointers for contexts to start monitoring | |
370 | * @nr_ctxs: size of @ctxs | |
371 | * | |
372 | * This function starts a group of monitoring threads for a group of monitoring | |
373 | * contexts. One thread per each context is created and run in parallel. The | |
374 | * caller should handle synchronization between the threads by itself. If a | |
375 | * group of threads that created by other 'damon_start()' call is currently | |
376 | * running, this function does nothing but returns -EBUSY. | |
377 | * | |
378 | * Return: 0 on success, negative error code otherwise. | |
379 | */ | |
380 | int damon_start(struct damon_ctx **ctxs, int nr_ctxs) | |
381 | { | |
382 | int i; | |
383 | int err = 0; | |
384 | ||
385 | mutex_lock(&damon_lock); | |
386 | if (nr_running_ctxs) { | |
387 | mutex_unlock(&damon_lock); | |
388 | return -EBUSY; | |
389 | } | |
390 | ||
391 | for (i = 0; i < nr_ctxs; i++) { | |
392 | err = __damon_start(ctxs[i]); | |
393 | if (err) | |
394 | break; | |
395 | nr_running_ctxs++; | |
396 | } | |
397 | mutex_unlock(&damon_lock); | |
398 | ||
399 | return err; | |
400 | } | |
401 | ||
402 | /* | |
403 | * __damon_stop() - Stops monitoring of given context. | |
404 | * @ctx: monitoring context | |
405 | * | |
406 | * Return: 0 on success, negative error code otherwise. | |
407 | */ | |
408 | static int __damon_stop(struct damon_ctx *ctx) | |
409 | { | |
0f91d133 CD |
410 | struct task_struct *tsk; |
411 | ||
2224d848 | 412 | mutex_lock(&ctx->kdamond_lock); |
0f91d133 CD |
413 | tsk = ctx->kdamond; |
414 | if (tsk) { | |
415 | get_task_struct(tsk); | |
2224d848 | 416 | mutex_unlock(&ctx->kdamond_lock); |
0f91d133 CD |
417 | kthread_stop(tsk); |
418 | put_task_struct(tsk); | |
2224d848 SP |
419 | return 0; |
420 | } | |
421 | mutex_unlock(&ctx->kdamond_lock); | |
422 | ||
423 | return -EPERM; | |
424 | } | |
425 | ||
426 | /** | |
427 | * damon_stop() - Stops the monitorings for a given group of contexts. | |
428 | * @ctxs: an array of the pointers for contexts to stop monitoring | |
429 | * @nr_ctxs: size of @ctxs | |
430 | * | |
431 | * Return: 0 on success, negative error code otherwise. | |
432 | */ | |
433 | int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) | |
434 | { | |
435 | int i, err = 0; | |
436 | ||
437 | for (i = 0; i < nr_ctxs; i++) { | |
438 | /* nr_running_ctxs is decremented in kdamond_fn */ | |
439 | err = __damon_stop(ctxs[i]); | |
440 | if (err) | |
441 | return err; | |
442 | } | |
443 | ||
444 | return err; | |
445 | } | |
446 | ||
447 | /* | |
448 | * damon_check_reset_time_interval() - Check if a time interval is elapsed. | |
449 | * @baseline: the time to check whether the interval has elapsed since | |
450 | * @interval: the time interval (microseconds) | |
451 | * | |
452 | * See whether the given time interval has passed since the given baseline | |
453 | * time. If so, it also updates the baseline to current time for next check. | |
454 | * | |
455 | * Return: true if the time interval has passed, or false otherwise. | |
456 | */ | |
457 | static bool damon_check_reset_time_interval(struct timespec64 *baseline, | |
458 | unsigned long interval) | |
459 | { | |
460 | struct timespec64 now; | |
461 | ||
462 | ktime_get_coarse_ts64(&now); | |
463 | if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < | |
464 | interval * 1000) | |
465 | return false; | |
466 | *baseline = now; | |
467 | return true; | |
468 | } | |
469 | ||
470 | /* | |
471 | * Check whether it is time to flush the aggregated information | |
472 | */ | |
473 | static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) | |
474 | { | |
475 | return damon_check_reset_time_interval(&ctx->last_aggregation, | |
476 | ctx->aggr_interval); | |
477 | } | |
478 | ||
f23b8eee SP |
479 | /* |
480 | * Reset the aggregated monitoring results ('nr_accesses' of each region). | |
481 | */ | |
482 | static void kdamond_reset_aggregated(struct damon_ctx *c) | |
483 | { | |
484 | struct damon_target *t; | |
76fd0285 | 485 | unsigned int ti = 0; /* target's index */ |
f23b8eee SP |
486 | |
487 | damon_for_each_target(t, c) { | |
488 | struct damon_region *r; | |
489 | ||
2fcb9362 | 490 | damon_for_each_region(r, t) { |
76fd0285 | 491 | trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); |
fda504fa | 492 | r->last_nr_accesses = r->nr_accesses; |
f23b8eee | 493 | r->nr_accesses = 0; |
2fcb9362 | 494 | } |
76fd0285 | 495 | ti++; |
f23b8eee SP |
496 | } |
497 | } | |
498 | ||
2b8a248d SP |
499 | static void damon_split_region_at(struct damon_ctx *ctx, |
500 | struct damon_target *t, struct damon_region *r, | |
501 | unsigned long sz_r); | |
502 | ||
38683e00 SP |
503 | static bool __damos_valid_target(struct damon_region *r, struct damos *s) |
504 | { | |
505 | unsigned long sz; | |
506 | ||
507 | sz = r->ar.end - r->ar.start; | |
508 | return s->min_sz_region <= sz && sz <= s->max_sz_region && | |
509 | s->min_nr_accesses <= r->nr_accesses && | |
510 | r->nr_accesses <= s->max_nr_accesses && | |
511 | s->min_age_region <= r->age && r->age <= s->max_age_region; | |
512 | } | |
513 | ||
514 | static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, | |
515 | struct damon_region *r, struct damos *s) | |
516 | { | |
517 | bool ret = __damos_valid_target(r, s); | |
518 | ||
519 | if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) | |
520 | return ret; | |
521 | ||
522 | return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; | |
523 | } | |
524 | ||
1f366e42 SP |
525 | static void damon_do_apply_schemes(struct damon_ctx *c, |
526 | struct damon_target *t, | |
527 | struct damon_region *r) | |
528 | { | |
529 | struct damos *s; | |
1f366e42 SP |
530 | |
531 | damon_for_each_scheme(s, c) { | |
2b8a248d SP |
532 | struct damos_quota *quota = &s->quota; |
533 | unsigned long sz = r->ar.end - r->ar.start; | |
1cd24303 | 534 | struct timespec64 begin, end; |
0e92c2ee | 535 | unsigned long sz_applied = 0; |
2b8a248d | 536 | |
ee801b7d SP |
537 | if (!s->wmarks.activated) |
538 | continue; | |
539 | ||
2b8a248d | 540 | /* Check the quota */ |
1cd24303 | 541 | if (quota->esz && quota->charged_sz >= quota->esz) |
2b8a248d SP |
542 | continue; |
543 | ||
50585192 SP |
544 | /* Skip previously charged regions */ |
545 | if (quota->charge_target_from) { | |
546 | if (t != quota->charge_target_from) | |
547 | continue; | |
548 | if (r == damon_last_region(t)) { | |
549 | quota->charge_target_from = NULL; | |
550 | quota->charge_addr_from = 0; | |
551 | continue; | |
552 | } | |
553 | if (quota->charge_addr_from && | |
554 | r->ar.end <= quota->charge_addr_from) | |
555 | continue; | |
556 | ||
557 | if (quota->charge_addr_from && r->ar.start < | |
558 | quota->charge_addr_from) { | |
559 | sz = ALIGN_DOWN(quota->charge_addr_from - | |
560 | r->ar.start, DAMON_MIN_REGION); | |
561 | if (!sz) { | |
562 | if (r->ar.end - r->ar.start <= | |
563 | DAMON_MIN_REGION) | |
564 | continue; | |
565 | sz = DAMON_MIN_REGION; | |
566 | } | |
567 | damon_split_region_at(c, t, r, sz); | |
568 | r = damon_next_region(r); | |
569 | sz = r->ar.end - r->ar.start; | |
570 | } | |
571 | quota->charge_target_from = NULL; | |
572 | quota->charge_addr_from = 0; | |
573 | } | |
574 | ||
38683e00 | 575 | if (!damos_valid_target(c, t, r, s)) |
1f366e42 | 576 | continue; |
2b8a248d SP |
577 | |
578 | /* Apply the scheme */ | |
579 | if (c->primitive.apply_scheme) { | |
1cd24303 SP |
580 | if (quota->esz && |
581 | quota->charged_sz + sz > quota->esz) { | |
582 | sz = ALIGN_DOWN(quota->esz - quota->charged_sz, | |
2b8a248d SP |
583 | DAMON_MIN_REGION); |
584 | if (!sz) | |
585 | goto update_stat; | |
586 | damon_split_region_at(c, t, r, sz); | |
587 | } | |
1cd24303 | 588 | ktime_get_coarse_ts64(&begin); |
0e92c2ee | 589 | sz_applied = c->primitive.apply_scheme(c, t, r, s); |
1cd24303 SP |
590 | ktime_get_coarse_ts64(&end); |
591 | quota->total_charged_ns += timespec64_to_ns(&end) - | |
592 | timespec64_to_ns(&begin); | |
2b8a248d | 593 | quota->charged_sz += sz; |
1cd24303 | 594 | if (quota->esz && quota->charged_sz >= quota->esz) { |
50585192 SP |
595 | quota->charge_target_from = t; |
596 | quota->charge_addr_from = r->ar.end + 1; | |
597 | } | |
2b8a248d | 598 | } |
2f0b548c SP |
599 | if (s->action != DAMOS_STAT) |
600 | r->age = 0; | |
2b8a248d SP |
601 | |
602 | update_stat: | |
0e92c2ee SP |
603 | s->stat.nr_tried++; |
604 | s->stat.sz_tried += sz; | |
605 | if (sz_applied) | |
606 | s->stat.nr_applied++; | |
607 | s->stat.sz_applied += sz_applied; | |
1f366e42 SP |
608 | } |
609 | } | |
610 | ||
1cd24303 SP |
611 | /* Shouldn't be called if quota->ms and quota->sz are zero */ |
612 | static void damos_set_effective_quota(struct damos_quota *quota) | |
613 | { | |
614 | unsigned long throughput; | |
615 | unsigned long esz; | |
616 | ||
617 | if (!quota->ms) { | |
618 | quota->esz = quota->sz; | |
619 | return; | |
620 | } | |
621 | ||
622 | if (quota->total_charged_ns) | |
623 | throughput = quota->total_charged_sz * 1000000 / | |
624 | quota->total_charged_ns; | |
625 | else | |
626 | throughput = PAGE_SIZE * 1024; | |
627 | esz = throughput * quota->ms; | |
628 | ||
629 | if (quota->sz && quota->sz < esz) | |
630 | esz = quota->sz; | |
631 | quota->esz = esz; | |
632 | } | |
633 | ||
1f366e42 SP |
634 | static void kdamond_apply_schemes(struct damon_ctx *c) |
635 | { | |
636 | struct damon_target *t; | |
2b8a248d SP |
637 | struct damon_region *r, *next_r; |
638 | struct damos *s; | |
639 | ||
640 | damon_for_each_scheme(s, c) { | |
641 | struct damos_quota *quota = &s->quota; | |
38683e00 SP |
642 | unsigned long cumulated_sz; |
643 | unsigned int score, max_score = 0; | |
2b8a248d | 644 | |
ee801b7d SP |
645 | if (!s->wmarks.activated) |
646 | continue; | |
647 | ||
1cd24303 | 648 | if (!quota->ms && !quota->sz) |
2b8a248d SP |
649 | continue; |
650 | ||
651 | /* New charge window starts */ | |
652 | if (time_after_eq(jiffies, quota->charged_from + | |
653 | msecs_to_jiffies( | |
654 | quota->reset_interval))) { | |
6268eac3 SP |
655 | if (quota->esz && quota->charged_sz >= quota->esz) |
656 | s->stat.qt_exceeds++; | |
1cd24303 | 657 | quota->total_charged_sz += quota->charged_sz; |
2b8a248d SP |
658 | quota->charged_from = jiffies; |
659 | quota->charged_sz = 0; | |
1cd24303 | 660 | damos_set_effective_quota(quota); |
2b8a248d | 661 | } |
38683e00 SP |
662 | |
663 | if (!c->primitive.get_scheme_score) | |
664 | continue; | |
665 | ||
666 | /* Fill up the score histogram */ | |
667 | memset(quota->histogram, 0, sizeof(quota->histogram)); | |
668 | damon_for_each_target(t, c) { | |
669 | damon_for_each_region(r, t) { | |
670 | if (!__damos_valid_target(r, s)) | |
671 | continue; | |
672 | score = c->primitive.get_scheme_score( | |
673 | c, t, r, s); | |
674 | quota->histogram[score] += | |
675 | r->ar.end - r->ar.start; | |
676 | if (score > max_score) | |
677 | max_score = score; | |
678 | } | |
679 | } | |
680 | ||
681 | /* Set the min score limit */ | |
682 | for (cumulated_sz = 0, score = max_score; ; score--) { | |
683 | cumulated_sz += quota->histogram[score]; | |
684 | if (cumulated_sz >= quota->esz || !score) | |
685 | break; | |
686 | } | |
687 | quota->min_score = score; | |
2b8a248d | 688 | } |
1f366e42 SP |
689 | |
690 | damon_for_each_target(t, c) { | |
2b8a248d | 691 | damon_for_each_region_safe(r, next_r, t) |
1f366e42 SP |
692 | damon_do_apply_schemes(c, t, r); |
693 | } | |
694 | } | |
695 | ||
88f86dcf SP |
696 | static inline unsigned long sz_damon_region(struct damon_region *r) |
697 | { | |
698 | return r->ar.end - r->ar.start; | |
699 | } | |
b9a6ac4e SP |
700 | |
701 | /* | |
702 | * Merge two adjacent regions into one region | |
703 | */ | |
704 | static void damon_merge_two_regions(struct damon_target *t, | |
705 | struct damon_region *l, struct damon_region *r) | |
706 | { | |
707 | unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); | |
708 | ||
709 | l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / | |
710 | (sz_l + sz_r); | |
fda504fa | 711 | l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); |
b9a6ac4e SP |
712 | l->ar.end = r->ar.end; |
713 | damon_destroy_region(r, t); | |
714 | } | |
715 | ||
b9a6ac4e SP |
716 | /* |
717 | * Merge adjacent regions having similar access frequencies | |
718 | * | |
719 | * t target affected by this merge operation | |
720 | * thres '->nr_accesses' diff threshold for the merge | |
721 | * sz_limit size upper limit of each region | |
722 | */ | |
723 | static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, | |
724 | unsigned long sz_limit) | |
725 | { | |
726 | struct damon_region *r, *prev = NULL, *next; | |
727 | ||
728 | damon_for_each_region_safe(r, next, t) { | |
d720bbbd | 729 | if (abs(r->nr_accesses - r->last_nr_accesses) > thres) |
fda504fa SP |
730 | r->age = 0; |
731 | else | |
732 | r->age++; | |
733 | ||
b9a6ac4e | 734 | if (prev && prev->ar.end == r->ar.start && |
d720bbbd | 735 | abs(prev->nr_accesses - r->nr_accesses) <= thres && |
b9a6ac4e SP |
736 | sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) |
737 | damon_merge_two_regions(t, prev, r); | |
738 | else | |
739 | prev = r; | |
740 | } | |
741 | } | |
742 | ||
743 | /* | |
744 | * Merge adjacent regions having similar access frequencies | |
745 | * | |
746 | * threshold '->nr_accesses' diff threshold for the merge | |
747 | * sz_limit size upper limit of each region | |
748 | * | |
749 | * This function merges monitoring target regions which are adjacent and their | |
750 | * access frequencies are similar. This is for minimizing the monitoring | |
751 | * overhead under the dynamically changeable access pattern. If a merge was | |
752 | * unnecessarily made, later 'kdamond_split_regions()' will revert it. | |
753 | */ | |
754 | static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, | |
755 | unsigned long sz_limit) | |
756 | { | |
757 | struct damon_target *t; | |
758 | ||
759 | damon_for_each_target(t, c) | |
760 | damon_merge_regions_of(t, threshold, sz_limit); | |
761 | } | |
762 | ||
763 | /* | |
764 | * Split a region in two | |
765 | * | |
766 | * r the region to be split | |
767 | * sz_r size of the first sub-region that will be made | |
768 | */ | |
769 | static void damon_split_region_at(struct damon_ctx *ctx, | |
770 | struct damon_target *t, struct damon_region *r, | |
771 | unsigned long sz_r) | |
772 | { | |
773 | struct damon_region *new; | |
774 | ||
775 | new = damon_new_region(r->ar.start + sz_r, r->ar.end); | |
776 | if (!new) | |
777 | return; | |
778 | ||
779 | r->ar.end = new->ar.start; | |
780 | ||
fda504fa SP |
781 | new->age = r->age; |
782 | new->last_nr_accesses = r->last_nr_accesses; | |
783 | ||
b9a6ac4e SP |
784 | damon_insert_region(new, r, damon_next_region(r), t); |
785 | } | |
786 | ||
787 | /* Split every region in the given target into 'nr_subs' regions */ | |
788 | static void damon_split_regions_of(struct damon_ctx *ctx, | |
789 | struct damon_target *t, int nr_subs) | |
790 | { | |
791 | struct damon_region *r, *next; | |
792 | unsigned long sz_region, sz_sub = 0; | |
793 | int i; | |
794 | ||
795 | damon_for_each_region_safe(r, next, t) { | |
796 | sz_region = r->ar.end - r->ar.start; | |
797 | ||
798 | for (i = 0; i < nr_subs - 1 && | |
799 | sz_region > 2 * DAMON_MIN_REGION; i++) { | |
800 | /* | |
801 | * Randomly select size of left sub-region to be at | |
802 | * least 10 percent and at most 90% of original region | |
803 | */ | |
804 | sz_sub = ALIGN_DOWN(damon_rand(1, 10) * | |
805 | sz_region / 10, DAMON_MIN_REGION); | |
806 | /* Do not allow blank region */ | |
807 | if (sz_sub == 0 || sz_sub >= sz_region) | |
808 | continue; | |
809 | ||
810 | damon_split_region_at(ctx, t, r, sz_sub); | |
811 | sz_region = sz_sub; | |
812 | } | |
813 | } | |
814 | } | |
815 | ||
816 | /* | |
817 | * Split every target region into randomly-sized small regions | |
818 | * | |
819 | * This function splits every target region into random-sized small regions if | |
820 | * current total number of the regions is equal or smaller than half of the | |
821 | * user-specified maximum number of regions. This is for maximizing the | |
822 | * monitoring accuracy under the dynamically changeable access patterns. If a | |
823 | * split was unnecessarily made, later 'kdamond_merge_regions()' will revert | |
824 | * it. | |
825 | */ | |
826 | static void kdamond_split_regions(struct damon_ctx *ctx) | |
827 | { | |
828 | struct damon_target *t; | |
829 | unsigned int nr_regions = 0; | |
830 | static unsigned int last_nr_regions; | |
831 | int nr_subregions = 2; | |
832 | ||
833 | damon_for_each_target(t, ctx) | |
834 | nr_regions += damon_nr_regions(t); | |
835 | ||
836 | if (nr_regions > ctx->max_nr_regions / 2) | |
837 | return; | |
838 | ||
839 | /* Maybe the middle of the region has different access frequency */ | |
840 | if (last_nr_regions == nr_regions && | |
841 | nr_regions < ctx->max_nr_regions / 3) | |
842 | nr_subregions = 3; | |
843 | ||
844 | damon_for_each_target(t, ctx) | |
845 | damon_split_regions_of(ctx, t, nr_subregions); | |
846 | ||
847 | last_nr_regions = nr_regions; | |
848 | } | |
849 | ||
2224d848 SP |
850 | /* |
851 | * Check whether it is time to check and apply the target monitoring regions | |
852 | * | |
853 | * Returns true if it is. | |
854 | */ | |
855 | static bool kdamond_need_update_primitive(struct damon_ctx *ctx) | |
856 | { | |
857 | return damon_check_reset_time_interval(&ctx->last_primitive_update, | |
858 | ctx->primitive_update_interval); | |
859 | } | |
860 | ||
861 | /* | |
862 | * Check whether current monitoring should be stopped | |
863 | * | |
864 | * The monitoring is stopped when either the user requested to stop, or all | |
865 | * monitoring targets are invalid. | |
866 | * | |
867 | * Returns true if need to stop current monitoring. | |
868 | */ | |
869 | static bool kdamond_need_stop(struct damon_ctx *ctx) | |
870 | { | |
f23b8eee | 871 | struct damon_target *t; |
2224d848 | 872 | |
0f91d133 | 873 | if (kthread_should_stop()) |
2224d848 SP |
874 | return true; |
875 | ||
876 | if (!ctx->primitive.target_valid) | |
877 | return false; | |
878 | ||
f23b8eee SP |
879 | damon_for_each_target(t, ctx) { |
880 | if (ctx->primitive.target_valid(t)) | |
881 | return false; | |
882 | } | |
883 | ||
884 | return true; | |
2224d848 SP |
885 | } |
886 | ||
ee801b7d SP |
887 | static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) |
888 | { | |
889 | struct sysinfo i; | |
890 | ||
891 | switch (metric) { | |
892 | case DAMOS_WMARK_FREE_MEM_RATE: | |
893 | si_meminfo(&i); | |
894 | return i.freeram * 1000 / i.totalram; | |
895 | default: | |
896 | break; | |
897 | } | |
898 | return -EINVAL; | |
899 | } | |
900 | ||
901 | /* | |
902 | * Returns zero if the scheme is active. Else, returns time to wait for next | |
903 | * watermark check in micro-seconds. | |
904 | */ | |
905 | static unsigned long damos_wmark_wait_us(struct damos *scheme) | |
906 | { | |
907 | unsigned long metric; | |
908 | ||
909 | if (scheme->wmarks.metric == DAMOS_WMARK_NONE) | |
910 | return 0; | |
911 | ||
912 | metric = damos_wmark_metric_value(scheme->wmarks.metric); | |
913 | /* higher than high watermark or lower than low watermark */ | |
914 | if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { | |
915 | if (scheme->wmarks.activated) | |
01078655 | 916 | pr_debug("deactivate a scheme (%d) for %s wmark\n", |
ee801b7d SP |
917 | scheme->action, |
918 | metric > scheme->wmarks.high ? | |
919 | "high" : "low"); | |
920 | scheme->wmarks.activated = false; | |
921 | return scheme->wmarks.interval; | |
922 | } | |
923 | ||
924 | /* inactive and higher than middle watermark */ | |
925 | if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && | |
926 | !scheme->wmarks.activated) | |
927 | return scheme->wmarks.interval; | |
928 | ||
929 | if (!scheme->wmarks.activated) | |
930 | pr_debug("activate a scheme (%d)\n", scheme->action); | |
931 | scheme->wmarks.activated = true; | |
932 | return 0; | |
933 | } | |
934 | ||
935 | static void kdamond_usleep(unsigned long usecs) | |
936 | { | |
4de46a30 SP |
937 | /* See Documentation/timers/timers-howto.rst for the thresholds */ |
938 | if (usecs > 20 * USEC_PER_MSEC) | |
70e92748 | 939 | schedule_timeout_idle(usecs_to_jiffies(usecs)); |
ee801b7d | 940 | else |
70e92748 | 941 | usleep_idle_range(usecs, usecs + 1); |
ee801b7d SP |
942 | } |
943 | ||
944 | /* Returns negative error code if it's not activated but should return */ | |
945 | static int kdamond_wait_activation(struct damon_ctx *ctx) | |
946 | { | |
947 | struct damos *s; | |
948 | unsigned long wait_time; | |
949 | unsigned long min_wait_time = 0; | |
950 | ||
951 | while (!kdamond_need_stop(ctx)) { | |
952 | damon_for_each_scheme(s, ctx) { | |
953 | wait_time = damos_wmark_wait_us(s); | |
954 | if (!min_wait_time || wait_time < min_wait_time) | |
955 | min_wait_time = wait_time; | |
956 | } | |
957 | if (!min_wait_time) | |
958 | return 0; | |
959 | ||
960 | kdamond_usleep(min_wait_time); | |
961 | } | |
962 | return -EBUSY; | |
963 | } | |
964 | ||
2224d848 SP |
965 | /* |
966 | * The monitoring daemon that runs as a kernel thread | |
967 | */ | |
968 | static int kdamond_fn(void *data) | |
969 | { | |
970 | struct damon_ctx *ctx = (struct damon_ctx *)data; | |
f23b8eee SP |
971 | struct damon_target *t; |
972 | struct damon_region *r, *next; | |
b9a6ac4e SP |
973 | unsigned int max_nr_accesses = 0; |
974 | unsigned long sz_limit = 0; | |
0f91d133 | 975 | bool done = false; |
2224d848 | 976 | |
42e4cef5 | 977 | pr_debug("kdamond (%d) starts\n", current->pid); |
2224d848 SP |
978 | |
979 | if (ctx->primitive.init) | |
980 | ctx->primitive.init(ctx); | |
981 | if (ctx->callback.before_start && ctx->callback.before_start(ctx)) | |
0f91d133 | 982 | done = true; |
2224d848 | 983 | |
b9a6ac4e SP |
984 | sz_limit = damon_region_sz_limit(ctx); |
985 | ||
0f91d133 | 986 | while (!kdamond_need_stop(ctx) && !done) { |
ee801b7d SP |
987 | if (kdamond_wait_activation(ctx)) |
988 | continue; | |
989 | ||
2224d848 SP |
990 | if (ctx->primitive.prepare_access_checks) |
991 | ctx->primitive.prepare_access_checks(ctx); | |
992 | if (ctx->callback.after_sampling && | |
993 | ctx->callback.after_sampling(ctx)) | |
0f91d133 | 994 | done = true; |
2224d848 | 995 | |
70e92748 | 996 | kdamond_usleep(ctx->sample_interval); |
2224d848 SP |
997 | |
998 | if (ctx->primitive.check_accesses) | |
b9a6ac4e | 999 | max_nr_accesses = ctx->primitive.check_accesses(ctx); |
2224d848 SP |
1000 | |
1001 | if (kdamond_aggregate_interval_passed(ctx)) { | |
b9a6ac4e SP |
1002 | kdamond_merge_regions(ctx, |
1003 | max_nr_accesses / 10, | |
1004 | sz_limit); | |
2224d848 SP |
1005 | if (ctx->callback.after_aggregation && |
1006 | ctx->callback.after_aggregation(ctx)) | |
0f91d133 | 1007 | done = true; |
1f366e42 | 1008 | kdamond_apply_schemes(ctx); |
f23b8eee | 1009 | kdamond_reset_aggregated(ctx); |
b9a6ac4e | 1010 | kdamond_split_regions(ctx); |
2224d848 SP |
1011 | if (ctx->primitive.reset_aggregated) |
1012 | ctx->primitive.reset_aggregated(ctx); | |
1013 | } | |
1014 | ||
1015 | if (kdamond_need_update_primitive(ctx)) { | |
1016 | if (ctx->primitive.update) | |
1017 | ctx->primitive.update(ctx); | |
b9a6ac4e | 1018 | sz_limit = damon_region_sz_limit(ctx); |
2224d848 SP |
1019 | } |
1020 | } | |
f23b8eee SP |
1021 | damon_for_each_target(t, ctx) { |
1022 | damon_for_each_region_safe(r, next, t) | |
b9a6ac4e | 1023 | damon_destroy_region(r, t); |
f23b8eee | 1024 | } |
2224d848 | 1025 | |
0f91d133 CD |
1026 | if (ctx->callback.before_terminate) |
1027 | ctx->callback.before_terminate(ctx); | |
2224d848 SP |
1028 | if (ctx->primitive.cleanup) |
1029 | ctx->primitive.cleanup(ctx); | |
1030 | ||
42e4cef5 | 1031 | pr_debug("kdamond (%d) finishes\n", current->pid); |
2224d848 SP |
1032 | mutex_lock(&ctx->kdamond_lock); |
1033 | ctx->kdamond = NULL; | |
1034 | mutex_unlock(&ctx->kdamond_lock); | |
1035 | ||
1036 | mutex_lock(&damon_lock); | |
1037 | nr_running_ctxs--; | |
1038 | mutex_unlock(&damon_lock); | |
1039 | ||
5f7fe2b9 | 1040 | return 0; |
2224d848 | 1041 | } |
17ccae8b SP |
1042 | |
1043 | #include "core-test.h" |