Commit | Line | Data |
---|---|---|
2224d848 SP |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Data Access Monitor | |
4 | * | |
5 | * Author: SeongJae Park <sjpark@amazon.de> | |
6 | */ | |
7 | ||
8 | #define pr_fmt(fmt) "damon: " fmt | |
9 | ||
10 | #include <linux/damon.h> | |
11 | #include <linux/delay.h> | |
12 | #include <linux/kthread.h> | |
ee801b7d | 13 | #include <linux/mm.h> |
2224d848 | 14 | #include <linux/slab.h> |
38683e00 | 15 | #include <linux/string.h> |
2224d848 | 16 | |
2fcb9362 SP |
17 | #define CREATE_TRACE_POINTS |
18 | #include <trace/events/damon.h> | |
19 | ||
17ccae8b SP |
20 | #ifdef CONFIG_DAMON_KUNIT_TEST |
21 | #undef DAMON_MIN_REGION | |
22 | #define DAMON_MIN_REGION 1 | |
23 | #endif | |
24 | ||
2224d848 SP |
25 | static DEFINE_MUTEX(damon_lock); |
26 | static int nr_running_ctxs; | |
27 | ||
f23b8eee SP |
28 | /* |
29 | * Construct a damon_region struct | |
30 | * | |
31 | * Returns the pointer to the new struct if success, or NULL otherwise | |
32 | */ | |
33 | struct damon_region *damon_new_region(unsigned long start, unsigned long end) | |
34 | { | |
35 | struct damon_region *region; | |
36 | ||
37 | region = kmalloc(sizeof(*region), GFP_KERNEL); | |
38 | if (!region) | |
39 | return NULL; | |
40 | ||
41 | region->ar.start = start; | |
42 | region->ar.end = end; | |
43 | region->nr_accesses = 0; | |
44 | INIT_LIST_HEAD(®ion->list); | |
45 | ||
fda504fa SP |
46 | region->age = 0; |
47 | region->last_nr_accesses = 0; | |
48 | ||
f23b8eee SP |
49 | return region; |
50 | } | |
51 | ||
52 | /* | |
53 | * Add a region between two other regions | |
54 | */ | |
55 | inline void damon_insert_region(struct damon_region *r, | |
b9a6ac4e SP |
56 | struct damon_region *prev, struct damon_region *next, |
57 | struct damon_target *t) | |
f23b8eee SP |
58 | { |
59 | __list_add(&r->list, &prev->list, &next->list); | |
b9a6ac4e | 60 | t->nr_regions++; |
f23b8eee SP |
61 | } |
62 | ||
63 | void damon_add_region(struct damon_region *r, struct damon_target *t) | |
64 | { | |
65 | list_add_tail(&r->list, &t->regions_list); | |
b9a6ac4e | 66 | t->nr_regions++; |
f23b8eee SP |
67 | } |
68 | ||
b9a6ac4e | 69 | static void damon_del_region(struct damon_region *r, struct damon_target *t) |
f23b8eee SP |
70 | { |
71 | list_del(&r->list); | |
b9a6ac4e | 72 | t->nr_regions--; |
f23b8eee SP |
73 | } |
74 | ||
75 | static void damon_free_region(struct damon_region *r) | |
76 | { | |
77 | kfree(r); | |
78 | } | |
79 | ||
b9a6ac4e | 80 | void damon_destroy_region(struct damon_region *r, struct damon_target *t) |
f23b8eee | 81 | { |
b9a6ac4e | 82 | damon_del_region(r, t); |
f23b8eee SP |
83 | damon_free_region(r); |
84 | } | |
85 | ||
1f366e42 SP |
86 | struct damos *damon_new_scheme( |
87 | unsigned long min_sz_region, unsigned long max_sz_region, | |
88 | unsigned int min_nr_accesses, unsigned int max_nr_accesses, | |
89 | unsigned int min_age_region, unsigned int max_age_region, | |
ee801b7d SP |
90 | enum damos_action action, struct damos_quota *quota, |
91 | struct damos_watermarks *wmarks) | |
1f366e42 SP |
92 | { |
93 | struct damos *scheme; | |
94 | ||
95 | scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); | |
96 | if (!scheme) | |
97 | return NULL; | |
98 | scheme->min_sz_region = min_sz_region; | |
99 | scheme->max_sz_region = max_sz_region; | |
100 | scheme->min_nr_accesses = min_nr_accesses; | |
101 | scheme->max_nr_accesses = max_nr_accesses; | |
102 | scheme->min_age_region = min_age_region; | |
103 | scheme->max_age_region = max_age_region; | |
104 | scheme->action = action; | |
2f0b548c SP |
105 | scheme->stat_count = 0; |
106 | scheme->stat_sz = 0; | |
1f366e42 SP |
107 | INIT_LIST_HEAD(&scheme->list); |
108 | ||
1cd24303 | 109 | scheme->quota.ms = quota->ms; |
2b8a248d SP |
110 | scheme->quota.sz = quota->sz; |
111 | scheme->quota.reset_interval = quota->reset_interval; | |
38683e00 SP |
112 | scheme->quota.weight_sz = quota->weight_sz; |
113 | scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; | |
114 | scheme->quota.weight_age = quota->weight_age; | |
1cd24303 SP |
115 | scheme->quota.total_charged_sz = 0; |
116 | scheme->quota.total_charged_ns = 0; | |
117 | scheme->quota.esz = 0; | |
2b8a248d SP |
118 | scheme->quota.charged_sz = 0; |
119 | scheme->quota.charged_from = 0; | |
50585192 SP |
120 | scheme->quota.charge_target_from = NULL; |
121 | scheme->quota.charge_addr_from = 0; | |
2b8a248d | 122 | |
ee801b7d SP |
123 | scheme->wmarks.metric = wmarks->metric; |
124 | scheme->wmarks.interval = wmarks->interval; | |
125 | scheme->wmarks.high = wmarks->high; | |
126 | scheme->wmarks.mid = wmarks->mid; | |
127 | scheme->wmarks.low = wmarks->low; | |
128 | scheme->wmarks.activated = true; | |
129 | ||
1f366e42 SP |
130 | return scheme; |
131 | } | |
132 | ||
133 | void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) | |
134 | { | |
135 | list_add_tail(&s->list, &ctx->schemes); | |
136 | } | |
137 | ||
138 | static void damon_del_scheme(struct damos *s) | |
139 | { | |
140 | list_del(&s->list); | |
141 | } | |
142 | ||
143 | static void damon_free_scheme(struct damos *s) | |
144 | { | |
145 | kfree(s); | |
146 | } | |
147 | ||
148 | void damon_destroy_scheme(struct damos *s) | |
149 | { | |
150 | damon_del_scheme(s); | |
151 | damon_free_scheme(s); | |
152 | } | |
153 | ||
f23b8eee SP |
154 | /* |
155 | * Construct a damon_target struct | |
156 | * | |
157 | * Returns the pointer to the new struct if success, or NULL otherwise | |
158 | */ | |
159 | struct damon_target *damon_new_target(unsigned long id) | |
160 | { | |
161 | struct damon_target *t; | |
162 | ||
163 | t = kmalloc(sizeof(*t), GFP_KERNEL); | |
164 | if (!t) | |
165 | return NULL; | |
166 | ||
167 | t->id = id; | |
b9a6ac4e | 168 | t->nr_regions = 0; |
f23b8eee SP |
169 | INIT_LIST_HEAD(&t->regions_list); |
170 | ||
171 | return t; | |
172 | } | |
173 | ||
174 | void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) | |
175 | { | |
b9a6ac4e | 176 | list_add_tail(&t->list, &ctx->adaptive_targets); |
f23b8eee SP |
177 | } |
178 | ||
b5ca3e83 XH |
179 | bool damon_targets_empty(struct damon_ctx *ctx) |
180 | { | |
181 | return list_empty(&ctx->adaptive_targets); | |
182 | } | |
183 | ||
f23b8eee SP |
184 | static void damon_del_target(struct damon_target *t) |
185 | { | |
186 | list_del(&t->list); | |
187 | } | |
188 | ||
189 | void damon_free_target(struct damon_target *t) | |
190 | { | |
191 | struct damon_region *r, *next; | |
192 | ||
193 | damon_for_each_region_safe(r, next, t) | |
194 | damon_free_region(r); | |
195 | kfree(t); | |
196 | } | |
197 | ||
198 | void damon_destroy_target(struct damon_target *t) | |
199 | { | |
200 | damon_del_target(t); | |
201 | damon_free_target(t); | |
202 | } | |
203 | ||
b9a6ac4e SP |
204 | unsigned int damon_nr_regions(struct damon_target *t) |
205 | { | |
206 | return t->nr_regions; | |
207 | } | |
208 | ||
2224d848 SP |
209 | struct damon_ctx *damon_new_ctx(void) |
210 | { | |
211 | struct damon_ctx *ctx; | |
212 | ||
213 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | |
214 | if (!ctx) | |
215 | return NULL; | |
216 | ||
217 | ctx->sample_interval = 5 * 1000; | |
218 | ctx->aggr_interval = 100 * 1000; | |
219 | ctx->primitive_update_interval = 60 * 1000 * 1000; | |
220 | ||
221 | ktime_get_coarse_ts64(&ctx->last_aggregation); | |
222 | ctx->last_primitive_update = ctx->last_aggregation; | |
223 | ||
224 | mutex_init(&ctx->kdamond_lock); | |
225 | ||
b9a6ac4e SP |
226 | ctx->min_nr_regions = 10; |
227 | ctx->max_nr_regions = 1000; | |
228 | ||
229 | INIT_LIST_HEAD(&ctx->adaptive_targets); | |
1f366e42 | 230 | INIT_LIST_HEAD(&ctx->schemes); |
2224d848 SP |
231 | |
232 | return ctx; | |
233 | } | |
234 | ||
f23b8eee | 235 | static void damon_destroy_targets(struct damon_ctx *ctx) |
2224d848 | 236 | { |
f23b8eee SP |
237 | struct damon_target *t, *next_t; |
238 | ||
239 | if (ctx->primitive.cleanup) { | |
2224d848 | 240 | ctx->primitive.cleanup(ctx); |
f23b8eee SP |
241 | return; |
242 | } | |
243 | ||
244 | damon_for_each_target_safe(t, next_t, ctx) | |
245 | damon_destroy_target(t); | |
246 | } | |
247 | ||
248 | void damon_destroy_ctx(struct damon_ctx *ctx) | |
249 | { | |
1f366e42 SP |
250 | struct damos *s, *next_s; |
251 | ||
f23b8eee | 252 | damon_destroy_targets(ctx); |
1f366e42 SP |
253 | |
254 | damon_for_each_scheme_safe(s, next_s, ctx) | |
255 | damon_destroy_scheme(s); | |
256 | ||
2224d848 SP |
257 | kfree(ctx); |
258 | } | |
259 | ||
4bc05954 SP |
260 | /** |
261 | * damon_set_targets() - Set monitoring targets. | |
262 | * @ctx: monitoring context | |
263 | * @ids: array of target ids | |
264 | * @nr_ids: number of entries in @ids | |
265 | * | |
266 | * This function should not be called while the kdamond is running. | |
267 | * | |
268 | * Return: 0 on success, negative error code otherwise. | |
269 | */ | |
270 | int damon_set_targets(struct damon_ctx *ctx, | |
271 | unsigned long *ids, ssize_t nr_ids) | |
272 | { | |
273 | ssize_t i; | |
274 | struct damon_target *t, *next; | |
275 | ||
276 | damon_destroy_targets(ctx); | |
277 | ||
278 | for (i = 0; i < nr_ids; i++) { | |
279 | t = damon_new_target(ids[i]); | |
280 | if (!t) { | |
4bc05954 SP |
281 | /* The caller should do cleanup of the ids itself */ |
282 | damon_for_each_target_safe(t, next, ctx) | |
283 | damon_destroy_target(t); | |
284 | return -ENOMEM; | |
285 | } | |
286 | damon_add_target(ctx, t); | |
287 | } | |
288 | ||
289 | return 0; | |
290 | } | |
291 | ||
2224d848 SP |
292 | /** |
293 | * damon_set_attrs() - Set attributes for the monitoring. | |
294 | * @ctx: monitoring context | |
295 | * @sample_int: time interval between samplings | |
296 | * @aggr_int: time interval between aggregations | |
297 | * @primitive_upd_int: time interval between monitoring primitive updates | |
b9a6ac4e SP |
298 | * @min_nr_reg: minimal number of regions |
299 | * @max_nr_reg: maximum number of regions | |
2224d848 SP |
300 | * |
301 | * This function should not be called while the kdamond is running. | |
302 | * Every time interval is in micro-seconds. | |
303 | * | |
304 | * Return: 0 on success, negative error code otherwise. | |
305 | */ | |
306 | int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, | |
b9a6ac4e SP |
307 | unsigned long aggr_int, unsigned long primitive_upd_int, |
308 | unsigned long min_nr_reg, unsigned long max_nr_reg) | |
2224d848 | 309 | { |
1afaf5cb | 310 | if (min_nr_reg < 3) |
b9a6ac4e | 311 | return -EINVAL; |
1afaf5cb | 312 | if (min_nr_reg > max_nr_reg) |
b9a6ac4e | 313 | return -EINVAL; |
b9a6ac4e | 314 | |
2224d848 SP |
315 | ctx->sample_interval = sample_int; |
316 | ctx->aggr_interval = aggr_int; | |
317 | ctx->primitive_update_interval = primitive_upd_int; | |
b9a6ac4e SP |
318 | ctx->min_nr_regions = min_nr_reg; |
319 | ctx->max_nr_regions = max_nr_reg; | |
2224d848 SP |
320 | |
321 | return 0; | |
322 | } | |
323 | ||
1f366e42 SP |
324 | /** |
325 | * damon_set_schemes() - Set data access monitoring based operation schemes. | |
326 | * @ctx: monitoring context | |
327 | * @schemes: array of the schemes | |
328 | * @nr_schemes: number of entries in @schemes | |
329 | * | |
330 | * This function should not be called while the kdamond of the context is | |
331 | * running. | |
332 | * | |
333 | * Return: 0 if success, or negative error code otherwise. | |
334 | */ | |
335 | int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, | |
336 | ssize_t nr_schemes) | |
337 | { | |
338 | struct damos *s, *next; | |
339 | ssize_t i; | |
340 | ||
341 | damon_for_each_scheme_safe(s, next, ctx) | |
342 | damon_destroy_scheme(s); | |
343 | for (i = 0; i < nr_schemes; i++) | |
344 | damon_add_scheme(ctx, schemes[i]); | |
345 | return 0; | |
346 | } | |
347 | ||
4bc05954 SP |
348 | /** |
349 | * damon_nr_running_ctxs() - Return number of currently running contexts. | |
350 | */ | |
351 | int damon_nr_running_ctxs(void) | |
352 | { | |
353 | int nr_ctxs; | |
354 | ||
355 | mutex_lock(&damon_lock); | |
356 | nr_ctxs = nr_running_ctxs; | |
357 | mutex_unlock(&damon_lock); | |
358 | ||
359 | return nr_ctxs; | |
360 | } | |
361 | ||
b9a6ac4e SP |
362 | /* Returns the size upper limit for each monitoring region */ |
363 | static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) | |
364 | { | |
365 | struct damon_target *t; | |
366 | struct damon_region *r; | |
367 | unsigned long sz = 0; | |
368 | ||
369 | damon_for_each_target(t, ctx) { | |
370 | damon_for_each_region(r, t) | |
371 | sz += r->ar.end - r->ar.start; | |
372 | } | |
373 | ||
374 | if (ctx->min_nr_regions) | |
375 | sz /= ctx->min_nr_regions; | |
376 | if (sz < DAMON_MIN_REGION) | |
377 | sz = DAMON_MIN_REGION; | |
378 | ||
379 | return sz; | |
380 | } | |
381 | ||
2224d848 SP |
382 | static int kdamond_fn(void *data); |
383 | ||
384 | /* | |
385 | * __damon_start() - Starts monitoring with given context. | |
386 | * @ctx: monitoring context | |
387 | * | |
388 | * This function should be called while damon_lock is hold. | |
389 | * | |
390 | * Return: 0 on success, negative error code otherwise. | |
391 | */ | |
392 | static int __damon_start(struct damon_ctx *ctx) | |
393 | { | |
394 | int err = -EBUSY; | |
395 | ||
396 | mutex_lock(&ctx->kdamond_lock); | |
397 | if (!ctx->kdamond) { | |
398 | err = 0; | |
2224d848 SP |
399 | ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", |
400 | nr_running_ctxs); | |
401 | if (IS_ERR(ctx->kdamond)) { | |
402 | err = PTR_ERR(ctx->kdamond); | |
7ec1992b | 403 | ctx->kdamond = NULL; |
2224d848 SP |
404 | } |
405 | } | |
406 | mutex_unlock(&ctx->kdamond_lock); | |
407 | ||
408 | return err; | |
409 | } | |
410 | ||
411 | /** | |
412 | * damon_start() - Starts the monitorings for a given group of contexts. | |
413 | * @ctxs: an array of the pointers for contexts to start monitoring | |
414 | * @nr_ctxs: size of @ctxs | |
415 | * | |
416 | * This function starts a group of monitoring threads for a group of monitoring | |
417 | * contexts. One thread per each context is created and run in parallel. The | |
418 | * caller should handle synchronization between the threads by itself. If a | |
419 | * group of threads that created by other 'damon_start()' call is currently | |
420 | * running, this function does nothing but returns -EBUSY. | |
421 | * | |
422 | * Return: 0 on success, negative error code otherwise. | |
423 | */ | |
424 | int damon_start(struct damon_ctx **ctxs, int nr_ctxs) | |
425 | { | |
426 | int i; | |
427 | int err = 0; | |
428 | ||
429 | mutex_lock(&damon_lock); | |
430 | if (nr_running_ctxs) { | |
431 | mutex_unlock(&damon_lock); | |
432 | return -EBUSY; | |
433 | } | |
434 | ||
435 | for (i = 0; i < nr_ctxs; i++) { | |
436 | err = __damon_start(ctxs[i]); | |
437 | if (err) | |
438 | break; | |
439 | nr_running_ctxs++; | |
440 | } | |
441 | mutex_unlock(&damon_lock); | |
442 | ||
443 | return err; | |
444 | } | |
445 | ||
446 | /* | |
447 | * __damon_stop() - Stops monitoring of given context. | |
448 | * @ctx: monitoring context | |
449 | * | |
450 | * Return: 0 on success, negative error code otherwise. | |
451 | */ | |
452 | static int __damon_stop(struct damon_ctx *ctx) | |
453 | { | |
0f91d133 CD |
454 | struct task_struct *tsk; |
455 | ||
2224d848 | 456 | mutex_lock(&ctx->kdamond_lock); |
0f91d133 CD |
457 | tsk = ctx->kdamond; |
458 | if (tsk) { | |
459 | get_task_struct(tsk); | |
2224d848 | 460 | mutex_unlock(&ctx->kdamond_lock); |
0f91d133 CD |
461 | kthread_stop(tsk); |
462 | put_task_struct(tsk); | |
2224d848 SP |
463 | return 0; |
464 | } | |
465 | mutex_unlock(&ctx->kdamond_lock); | |
466 | ||
467 | return -EPERM; | |
468 | } | |
469 | ||
470 | /** | |
471 | * damon_stop() - Stops the monitorings for a given group of contexts. | |
472 | * @ctxs: an array of the pointers for contexts to stop monitoring | |
473 | * @nr_ctxs: size of @ctxs | |
474 | * | |
475 | * Return: 0 on success, negative error code otherwise. | |
476 | */ | |
477 | int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) | |
478 | { | |
479 | int i, err = 0; | |
480 | ||
481 | for (i = 0; i < nr_ctxs; i++) { | |
482 | /* nr_running_ctxs is decremented in kdamond_fn */ | |
483 | err = __damon_stop(ctxs[i]); | |
484 | if (err) | |
485 | return err; | |
486 | } | |
487 | ||
488 | return err; | |
489 | } | |
490 | ||
491 | /* | |
492 | * damon_check_reset_time_interval() - Check if a time interval is elapsed. | |
493 | * @baseline: the time to check whether the interval has elapsed since | |
494 | * @interval: the time interval (microseconds) | |
495 | * | |
496 | * See whether the given time interval has passed since the given baseline | |
497 | * time. If so, it also updates the baseline to current time for next check. | |
498 | * | |
499 | * Return: true if the time interval has passed, or false otherwise. | |
500 | */ | |
501 | static bool damon_check_reset_time_interval(struct timespec64 *baseline, | |
502 | unsigned long interval) | |
503 | { | |
504 | struct timespec64 now; | |
505 | ||
506 | ktime_get_coarse_ts64(&now); | |
507 | if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < | |
508 | interval * 1000) | |
509 | return false; | |
510 | *baseline = now; | |
511 | return true; | |
512 | } | |
513 | ||
514 | /* | |
515 | * Check whether it is time to flush the aggregated information | |
516 | */ | |
517 | static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) | |
518 | { | |
519 | return damon_check_reset_time_interval(&ctx->last_aggregation, | |
520 | ctx->aggr_interval); | |
521 | } | |
522 | ||
f23b8eee SP |
523 | /* |
524 | * Reset the aggregated monitoring results ('nr_accesses' of each region). | |
525 | */ | |
526 | static void kdamond_reset_aggregated(struct damon_ctx *c) | |
527 | { | |
528 | struct damon_target *t; | |
529 | ||
530 | damon_for_each_target(t, c) { | |
531 | struct damon_region *r; | |
532 | ||
2fcb9362 SP |
533 | damon_for_each_region(r, t) { |
534 | trace_damon_aggregated(t, r, damon_nr_regions(t)); | |
fda504fa | 535 | r->last_nr_accesses = r->nr_accesses; |
f23b8eee | 536 | r->nr_accesses = 0; |
2fcb9362 | 537 | } |
f23b8eee SP |
538 | } |
539 | } | |
540 | ||
2b8a248d SP |
541 | static void damon_split_region_at(struct damon_ctx *ctx, |
542 | struct damon_target *t, struct damon_region *r, | |
543 | unsigned long sz_r); | |
544 | ||
38683e00 SP |
545 | static bool __damos_valid_target(struct damon_region *r, struct damos *s) |
546 | { | |
547 | unsigned long sz; | |
548 | ||
549 | sz = r->ar.end - r->ar.start; | |
550 | return s->min_sz_region <= sz && sz <= s->max_sz_region && | |
551 | s->min_nr_accesses <= r->nr_accesses && | |
552 | r->nr_accesses <= s->max_nr_accesses && | |
553 | s->min_age_region <= r->age && r->age <= s->max_age_region; | |
554 | } | |
555 | ||
556 | static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, | |
557 | struct damon_region *r, struct damos *s) | |
558 | { | |
559 | bool ret = __damos_valid_target(r, s); | |
560 | ||
561 | if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) | |
562 | return ret; | |
563 | ||
564 | return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; | |
565 | } | |
566 | ||
1f366e42 SP |
567 | static void damon_do_apply_schemes(struct damon_ctx *c, |
568 | struct damon_target *t, | |
569 | struct damon_region *r) | |
570 | { | |
571 | struct damos *s; | |
1f366e42 SP |
572 | |
573 | damon_for_each_scheme(s, c) { | |
2b8a248d SP |
574 | struct damos_quota *quota = &s->quota; |
575 | unsigned long sz = r->ar.end - r->ar.start; | |
1cd24303 | 576 | struct timespec64 begin, end; |
2b8a248d | 577 | |
ee801b7d SP |
578 | if (!s->wmarks.activated) |
579 | continue; | |
580 | ||
2b8a248d | 581 | /* Check the quota */ |
1cd24303 | 582 | if (quota->esz && quota->charged_sz >= quota->esz) |
2b8a248d SP |
583 | continue; |
584 | ||
50585192 SP |
585 | /* Skip previously charged regions */ |
586 | if (quota->charge_target_from) { | |
587 | if (t != quota->charge_target_from) | |
588 | continue; | |
589 | if (r == damon_last_region(t)) { | |
590 | quota->charge_target_from = NULL; | |
591 | quota->charge_addr_from = 0; | |
592 | continue; | |
593 | } | |
594 | if (quota->charge_addr_from && | |
595 | r->ar.end <= quota->charge_addr_from) | |
596 | continue; | |
597 | ||
598 | if (quota->charge_addr_from && r->ar.start < | |
599 | quota->charge_addr_from) { | |
600 | sz = ALIGN_DOWN(quota->charge_addr_from - | |
601 | r->ar.start, DAMON_MIN_REGION); | |
602 | if (!sz) { | |
603 | if (r->ar.end - r->ar.start <= | |
604 | DAMON_MIN_REGION) | |
605 | continue; | |
606 | sz = DAMON_MIN_REGION; | |
607 | } | |
608 | damon_split_region_at(c, t, r, sz); | |
609 | r = damon_next_region(r); | |
610 | sz = r->ar.end - r->ar.start; | |
611 | } | |
612 | quota->charge_target_from = NULL; | |
613 | quota->charge_addr_from = 0; | |
614 | } | |
615 | ||
38683e00 | 616 | if (!damos_valid_target(c, t, r, s)) |
1f366e42 | 617 | continue; |
2b8a248d SP |
618 | |
619 | /* Apply the scheme */ | |
620 | if (c->primitive.apply_scheme) { | |
1cd24303 SP |
621 | if (quota->esz && |
622 | quota->charged_sz + sz > quota->esz) { | |
623 | sz = ALIGN_DOWN(quota->esz - quota->charged_sz, | |
2b8a248d SP |
624 | DAMON_MIN_REGION); |
625 | if (!sz) | |
626 | goto update_stat; | |
627 | damon_split_region_at(c, t, r, sz); | |
628 | } | |
1cd24303 | 629 | ktime_get_coarse_ts64(&begin); |
1f366e42 | 630 | c->primitive.apply_scheme(c, t, r, s); |
1cd24303 SP |
631 | ktime_get_coarse_ts64(&end); |
632 | quota->total_charged_ns += timespec64_to_ns(&end) - | |
633 | timespec64_to_ns(&begin); | |
2b8a248d | 634 | quota->charged_sz += sz; |
1cd24303 | 635 | if (quota->esz && quota->charged_sz >= quota->esz) { |
50585192 SP |
636 | quota->charge_target_from = t; |
637 | quota->charge_addr_from = r->ar.end + 1; | |
638 | } | |
2b8a248d | 639 | } |
2f0b548c SP |
640 | if (s->action != DAMOS_STAT) |
641 | r->age = 0; | |
2b8a248d SP |
642 | |
643 | update_stat: | |
644 | s->stat_count++; | |
645 | s->stat_sz += sz; | |
1f366e42 SP |
646 | } |
647 | } | |
648 | ||
1cd24303 SP |
649 | /* Shouldn't be called if quota->ms and quota->sz are zero */ |
650 | static void damos_set_effective_quota(struct damos_quota *quota) | |
651 | { | |
652 | unsigned long throughput; | |
653 | unsigned long esz; | |
654 | ||
655 | if (!quota->ms) { | |
656 | quota->esz = quota->sz; | |
657 | return; | |
658 | } | |
659 | ||
660 | if (quota->total_charged_ns) | |
661 | throughput = quota->total_charged_sz * 1000000 / | |
662 | quota->total_charged_ns; | |
663 | else | |
664 | throughput = PAGE_SIZE * 1024; | |
665 | esz = throughput * quota->ms; | |
666 | ||
667 | if (quota->sz && quota->sz < esz) | |
668 | esz = quota->sz; | |
669 | quota->esz = esz; | |
670 | } | |
671 | ||
1f366e42 SP |
672 | static void kdamond_apply_schemes(struct damon_ctx *c) |
673 | { | |
674 | struct damon_target *t; | |
2b8a248d SP |
675 | struct damon_region *r, *next_r; |
676 | struct damos *s; | |
677 | ||
678 | damon_for_each_scheme(s, c) { | |
679 | struct damos_quota *quota = &s->quota; | |
38683e00 SP |
680 | unsigned long cumulated_sz; |
681 | unsigned int score, max_score = 0; | |
2b8a248d | 682 | |
ee801b7d SP |
683 | if (!s->wmarks.activated) |
684 | continue; | |
685 | ||
1cd24303 | 686 | if (!quota->ms && !quota->sz) |
2b8a248d SP |
687 | continue; |
688 | ||
689 | /* New charge window starts */ | |
690 | if (time_after_eq(jiffies, quota->charged_from + | |
691 | msecs_to_jiffies( | |
692 | quota->reset_interval))) { | |
1cd24303 | 693 | quota->total_charged_sz += quota->charged_sz; |
2b8a248d SP |
694 | quota->charged_from = jiffies; |
695 | quota->charged_sz = 0; | |
1cd24303 | 696 | damos_set_effective_quota(quota); |
2b8a248d | 697 | } |
38683e00 SP |
698 | |
699 | if (!c->primitive.get_scheme_score) | |
700 | continue; | |
701 | ||
702 | /* Fill up the score histogram */ | |
703 | memset(quota->histogram, 0, sizeof(quota->histogram)); | |
704 | damon_for_each_target(t, c) { | |
705 | damon_for_each_region(r, t) { | |
706 | if (!__damos_valid_target(r, s)) | |
707 | continue; | |
708 | score = c->primitive.get_scheme_score( | |
709 | c, t, r, s); | |
710 | quota->histogram[score] += | |
711 | r->ar.end - r->ar.start; | |
712 | if (score > max_score) | |
713 | max_score = score; | |
714 | } | |
715 | } | |
716 | ||
717 | /* Set the min score limit */ | |
718 | for (cumulated_sz = 0, score = max_score; ; score--) { | |
719 | cumulated_sz += quota->histogram[score]; | |
720 | if (cumulated_sz >= quota->esz || !score) | |
721 | break; | |
722 | } | |
723 | quota->min_score = score; | |
2b8a248d | 724 | } |
1f366e42 SP |
725 | |
726 | damon_for_each_target(t, c) { | |
2b8a248d | 727 | damon_for_each_region_safe(r, next_r, t) |
1f366e42 SP |
728 | damon_do_apply_schemes(c, t, r); |
729 | } | |
730 | } | |
731 | ||
88f86dcf SP |
732 | static inline unsigned long sz_damon_region(struct damon_region *r) |
733 | { | |
734 | return r->ar.end - r->ar.start; | |
735 | } | |
b9a6ac4e SP |
736 | |
737 | /* | |
738 | * Merge two adjacent regions into one region | |
739 | */ | |
740 | static void damon_merge_two_regions(struct damon_target *t, | |
741 | struct damon_region *l, struct damon_region *r) | |
742 | { | |
743 | unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); | |
744 | ||
745 | l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / | |
746 | (sz_l + sz_r); | |
fda504fa | 747 | l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); |
b9a6ac4e SP |
748 | l->ar.end = r->ar.end; |
749 | damon_destroy_region(r, t); | |
750 | } | |
751 | ||
b9a6ac4e SP |
752 | /* |
753 | * Merge adjacent regions having similar access frequencies | |
754 | * | |
755 | * t target affected by this merge operation | |
756 | * thres '->nr_accesses' diff threshold for the merge | |
757 | * sz_limit size upper limit of each region | |
758 | */ | |
759 | static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, | |
760 | unsigned long sz_limit) | |
761 | { | |
762 | struct damon_region *r, *prev = NULL, *next; | |
763 | ||
764 | damon_for_each_region_safe(r, next, t) { | |
d720bbbd | 765 | if (abs(r->nr_accesses - r->last_nr_accesses) > thres) |
fda504fa SP |
766 | r->age = 0; |
767 | else | |
768 | r->age++; | |
769 | ||
b9a6ac4e | 770 | if (prev && prev->ar.end == r->ar.start && |
d720bbbd | 771 | abs(prev->nr_accesses - r->nr_accesses) <= thres && |
b9a6ac4e SP |
772 | sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) |
773 | damon_merge_two_regions(t, prev, r); | |
774 | else | |
775 | prev = r; | |
776 | } | |
777 | } | |
778 | ||
779 | /* | |
780 | * Merge adjacent regions having similar access frequencies | |
781 | * | |
782 | * threshold '->nr_accesses' diff threshold for the merge | |
783 | * sz_limit size upper limit of each region | |
784 | * | |
785 | * This function merges monitoring target regions which are adjacent and their | |
786 | * access frequencies are similar. This is for minimizing the monitoring | |
787 | * overhead under the dynamically changeable access pattern. If a merge was | |
788 | * unnecessarily made, later 'kdamond_split_regions()' will revert it. | |
789 | */ | |
790 | static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, | |
791 | unsigned long sz_limit) | |
792 | { | |
793 | struct damon_target *t; | |
794 | ||
795 | damon_for_each_target(t, c) | |
796 | damon_merge_regions_of(t, threshold, sz_limit); | |
797 | } | |
798 | ||
799 | /* | |
800 | * Split a region in two | |
801 | * | |
802 | * r the region to be split | |
803 | * sz_r size of the first sub-region that will be made | |
804 | */ | |
805 | static void damon_split_region_at(struct damon_ctx *ctx, | |
806 | struct damon_target *t, struct damon_region *r, | |
807 | unsigned long sz_r) | |
808 | { | |
809 | struct damon_region *new; | |
810 | ||
811 | new = damon_new_region(r->ar.start + sz_r, r->ar.end); | |
812 | if (!new) | |
813 | return; | |
814 | ||
815 | r->ar.end = new->ar.start; | |
816 | ||
fda504fa SP |
817 | new->age = r->age; |
818 | new->last_nr_accesses = r->last_nr_accesses; | |
819 | ||
b9a6ac4e SP |
820 | damon_insert_region(new, r, damon_next_region(r), t); |
821 | } | |
822 | ||
823 | /* Split every region in the given target into 'nr_subs' regions */ | |
824 | static void damon_split_regions_of(struct damon_ctx *ctx, | |
825 | struct damon_target *t, int nr_subs) | |
826 | { | |
827 | struct damon_region *r, *next; | |
828 | unsigned long sz_region, sz_sub = 0; | |
829 | int i; | |
830 | ||
831 | damon_for_each_region_safe(r, next, t) { | |
832 | sz_region = r->ar.end - r->ar.start; | |
833 | ||
834 | for (i = 0; i < nr_subs - 1 && | |
835 | sz_region > 2 * DAMON_MIN_REGION; i++) { | |
836 | /* | |
837 | * Randomly select size of left sub-region to be at | |
838 | * least 10 percent and at most 90% of original region | |
839 | */ | |
840 | sz_sub = ALIGN_DOWN(damon_rand(1, 10) * | |
841 | sz_region / 10, DAMON_MIN_REGION); | |
842 | /* Do not allow blank region */ | |
843 | if (sz_sub == 0 || sz_sub >= sz_region) | |
844 | continue; | |
845 | ||
846 | damon_split_region_at(ctx, t, r, sz_sub); | |
847 | sz_region = sz_sub; | |
848 | } | |
849 | } | |
850 | } | |
851 | ||
852 | /* | |
853 | * Split every target region into randomly-sized small regions | |
854 | * | |
855 | * This function splits every target region into random-sized small regions if | |
856 | * current total number of the regions is equal or smaller than half of the | |
857 | * user-specified maximum number of regions. This is for maximizing the | |
858 | * monitoring accuracy under the dynamically changeable access patterns. If a | |
859 | * split was unnecessarily made, later 'kdamond_merge_regions()' will revert | |
860 | * it. | |
861 | */ | |
862 | static void kdamond_split_regions(struct damon_ctx *ctx) | |
863 | { | |
864 | struct damon_target *t; | |
865 | unsigned int nr_regions = 0; | |
866 | static unsigned int last_nr_regions; | |
867 | int nr_subregions = 2; | |
868 | ||
869 | damon_for_each_target(t, ctx) | |
870 | nr_regions += damon_nr_regions(t); | |
871 | ||
872 | if (nr_regions > ctx->max_nr_regions / 2) | |
873 | return; | |
874 | ||
875 | /* Maybe the middle of the region has different access frequency */ | |
876 | if (last_nr_regions == nr_regions && | |
877 | nr_regions < ctx->max_nr_regions / 3) | |
878 | nr_subregions = 3; | |
879 | ||
880 | damon_for_each_target(t, ctx) | |
881 | damon_split_regions_of(ctx, t, nr_subregions); | |
882 | ||
883 | last_nr_regions = nr_regions; | |
884 | } | |
885 | ||
2224d848 SP |
886 | /* |
887 | * Check whether it is time to check and apply the target monitoring regions | |
888 | * | |
889 | * Returns true if it is. | |
890 | */ | |
891 | static bool kdamond_need_update_primitive(struct damon_ctx *ctx) | |
892 | { | |
893 | return damon_check_reset_time_interval(&ctx->last_primitive_update, | |
894 | ctx->primitive_update_interval); | |
895 | } | |
896 | ||
897 | /* | |
898 | * Check whether current monitoring should be stopped | |
899 | * | |
900 | * The monitoring is stopped when either the user requested to stop, or all | |
901 | * monitoring targets are invalid. | |
902 | * | |
903 | * Returns true if need to stop current monitoring. | |
904 | */ | |
905 | static bool kdamond_need_stop(struct damon_ctx *ctx) | |
906 | { | |
f23b8eee | 907 | struct damon_target *t; |
2224d848 | 908 | |
0f91d133 | 909 | if (kthread_should_stop()) |
2224d848 SP |
910 | return true; |
911 | ||
912 | if (!ctx->primitive.target_valid) | |
913 | return false; | |
914 | ||
f23b8eee SP |
915 | damon_for_each_target(t, ctx) { |
916 | if (ctx->primitive.target_valid(t)) | |
917 | return false; | |
918 | } | |
919 | ||
920 | return true; | |
2224d848 SP |
921 | } |
922 | ||
ee801b7d SP |
923 | static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) |
924 | { | |
925 | struct sysinfo i; | |
926 | ||
927 | switch (metric) { | |
928 | case DAMOS_WMARK_FREE_MEM_RATE: | |
929 | si_meminfo(&i); | |
930 | return i.freeram * 1000 / i.totalram; | |
931 | default: | |
932 | break; | |
933 | } | |
934 | return -EINVAL; | |
935 | } | |
936 | ||
937 | /* | |
938 | * Returns zero if the scheme is active. Else, returns time to wait for next | |
939 | * watermark check in micro-seconds. | |
940 | */ | |
941 | static unsigned long damos_wmark_wait_us(struct damos *scheme) | |
942 | { | |
943 | unsigned long metric; | |
944 | ||
945 | if (scheme->wmarks.metric == DAMOS_WMARK_NONE) | |
946 | return 0; | |
947 | ||
948 | metric = damos_wmark_metric_value(scheme->wmarks.metric); | |
949 | /* higher than high watermark or lower than low watermark */ | |
950 | if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { | |
951 | if (scheme->wmarks.activated) | |
01078655 | 952 | pr_debug("deactivate a scheme (%d) for %s wmark\n", |
ee801b7d SP |
953 | scheme->action, |
954 | metric > scheme->wmarks.high ? | |
955 | "high" : "low"); | |
956 | scheme->wmarks.activated = false; | |
957 | return scheme->wmarks.interval; | |
958 | } | |
959 | ||
960 | /* inactive and higher than middle watermark */ | |
961 | if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && | |
962 | !scheme->wmarks.activated) | |
963 | return scheme->wmarks.interval; | |
964 | ||
965 | if (!scheme->wmarks.activated) | |
966 | pr_debug("activate a scheme (%d)\n", scheme->action); | |
967 | scheme->wmarks.activated = true; | |
968 | return 0; | |
969 | } | |
970 | ||
971 | static void kdamond_usleep(unsigned long usecs) | |
972 | { | |
4de46a30 SP |
973 | /* See Documentation/timers/timers-howto.rst for the thresholds */ |
974 | if (usecs > 20 * USEC_PER_MSEC) | |
70e92748 | 975 | schedule_timeout_idle(usecs_to_jiffies(usecs)); |
ee801b7d | 976 | else |
70e92748 | 977 | usleep_idle_range(usecs, usecs + 1); |
ee801b7d SP |
978 | } |
979 | ||
980 | /* Returns negative error code if it's not activated but should return */ | |
981 | static int kdamond_wait_activation(struct damon_ctx *ctx) | |
982 | { | |
983 | struct damos *s; | |
984 | unsigned long wait_time; | |
985 | unsigned long min_wait_time = 0; | |
986 | ||
987 | while (!kdamond_need_stop(ctx)) { | |
988 | damon_for_each_scheme(s, ctx) { | |
989 | wait_time = damos_wmark_wait_us(s); | |
990 | if (!min_wait_time || wait_time < min_wait_time) | |
991 | min_wait_time = wait_time; | |
992 | } | |
993 | if (!min_wait_time) | |
994 | return 0; | |
995 | ||
996 | kdamond_usleep(min_wait_time); | |
997 | } | |
998 | return -EBUSY; | |
999 | } | |
1000 | ||
2224d848 SP |
1001 | /* |
1002 | * The monitoring daemon that runs as a kernel thread | |
1003 | */ | |
1004 | static int kdamond_fn(void *data) | |
1005 | { | |
1006 | struct damon_ctx *ctx = (struct damon_ctx *)data; | |
f23b8eee SP |
1007 | struct damon_target *t; |
1008 | struct damon_region *r, *next; | |
b9a6ac4e SP |
1009 | unsigned int max_nr_accesses = 0; |
1010 | unsigned long sz_limit = 0; | |
0f91d133 | 1011 | bool done = false; |
2224d848 | 1012 | |
42e4cef5 | 1013 | pr_debug("kdamond (%d) starts\n", current->pid); |
2224d848 SP |
1014 | |
1015 | if (ctx->primitive.init) | |
1016 | ctx->primitive.init(ctx); | |
1017 | if (ctx->callback.before_start && ctx->callback.before_start(ctx)) | |
0f91d133 | 1018 | done = true; |
2224d848 | 1019 | |
b9a6ac4e SP |
1020 | sz_limit = damon_region_sz_limit(ctx); |
1021 | ||
0f91d133 | 1022 | while (!kdamond_need_stop(ctx) && !done) { |
ee801b7d SP |
1023 | if (kdamond_wait_activation(ctx)) |
1024 | continue; | |
1025 | ||
2224d848 SP |
1026 | if (ctx->primitive.prepare_access_checks) |
1027 | ctx->primitive.prepare_access_checks(ctx); | |
1028 | if (ctx->callback.after_sampling && | |
1029 | ctx->callback.after_sampling(ctx)) | |
0f91d133 | 1030 | done = true; |
2224d848 | 1031 | |
70e92748 | 1032 | kdamond_usleep(ctx->sample_interval); |
2224d848 SP |
1033 | |
1034 | if (ctx->primitive.check_accesses) | |
b9a6ac4e | 1035 | max_nr_accesses = ctx->primitive.check_accesses(ctx); |
2224d848 SP |
1036 | |
1037 | if (kdamond_aggregate_interval_passed(ctx)) { | |
b9a6ac4e SP |
1038 | kdamond_merge_regions(ctx, |
1039 | max_nr_accesses / 10, | |
1040 | sz_limit); | |
2224d848 SP |
1041 | if (ctx->callback.after_aggregation && |
1042 | ctx->callback.after_aggregation(ctx)) | |
0f91d133 | 1043 | done = true; |
1f366e42 | 1044 | kdamond_apply_schemes(ctx); |
f23b8eee | 1045 | kdamond_reset_aggregated(ctx); |
b9a6ac4e | 1046 | kdamond_split_regions(ctx); |
2224d848 SP |
1047 | if (ctx->primitive.reset_aggregated) |
1048 | ctx->primitive.reset_aggregated(ctx); | |
1049 | } | |
1050 | ||
1051 | if (kdamond_need_update_primitive(ctx)) { | |
1052 | if (ctx->primitive.update) | |
1053 | ctx->primitive.update(ctx); | |
b9a6ac4e | 1054 | sz_limit = damon_region_sz_limit(ctx); |
2224d848 SP |
1055 | } |
1056 | } | |
f23b8eee SP |
1057 | damon_for_each_target(t, ctx) { |
1058 | damon_for_each_region_safe(r, next, t) | |
b9a6ac4e | 1059 | damon_destroy_region(r, t); |
f23b8eee | 1060 | } |
2224d848 | 1061 | |
0f91d133 CD |
1062 | if (ctx->callback.before_terminate) |
1063 | ctx->callback.before_terminate(ctx); | |
2224d848 SP |
1064 | if (ctx->primitive.cleanup) |
1065 | ctx->primitive.cleanup(ctx); | |
1066 | ||
42e4cef5 | 1067 | pr_debug("kdamond (%d) finishes\n", current->pid); |
2224d848 SP |
1068 | mutex_lock(&ctx->kdamond_lock); |
1069 | ctx->kdamond = NULL; | |
1070 | mutex_unlock(&ctx->kdamond_lock); | |
1071 | ||
1072 | mutex_lock(&damon_lock); | |
1073 | nr_running_ctxs--; | |
1074 | mutex_unlock(&damon_lock); | |
1075 | ||
5f7fe2b9 | 1076 | return 0; |
2224d848 | 1077 | } |
17ccae8b SP |
1078 | |
1079 | #include "core-test.h" |