mm/damon/paddr: deduplicate damon_pa_{mark_accessed,deactivate_pages}()
[linux-block.git] / mm / damon / core.c
CommitLineData
2224d848
SP
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Data Access Monitor
4 *
5 * Author: SeongJae Park <sjpark@amazon.de>
6 */
7
8#define pr_fmt(fmt) "damon: " fmt
9
10#include <linux/damon.h>
11#include <linux/delay.h>
12#include <linux/kthread.h>
ee801b7d 13#include <linux/mm.h>
2224d848 14#include <linux/slab.h>
38683e00 15#include <linux/string.h>
2224d848 16
2fcb9362
SP
17#define CREATE_TRACE_POINTS
18#include <trace/events/damon.h>
19
17ccae8b
SP
20#ifdef CONFIG_DAMON_KUNIT_TEST
21#undef DAMON_MIN_REGION
22#define DAMON_MIN_REGION 1
23#endif
24
2224d848
SP
25static DEFINE_MUTEX(damon_lock);
26static int nr_running_ctxs;
8b9b0d33 27static bool running_exclusive_ctxs;
2224d848 28
9f7b053a
SP
29static DEFINE_MUTEX(damon_ops_lock);
30static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
31
a1870944
DL
32static struct kmem_cache *damon_region_cache __ro_after_init;
33
9f7b053a 34/* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
152e5617 35static bool __damon_is_registered_ops(enum damon_ops_id id)
9f7b053a
SP
36{
37 struct damon_operations empty_ops = {};
38
39 if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
40 return false;
41 return true;
42}
43
152e5617
SP
44/**
45 * damon_is_registered_ops() - Check if a given damon_operations is registered.
46 * @id: Id of the damon_operations to check if registered.
47 *
48 * Return: true if the ops is set, false otherwise.
49 */
50bool damon_is_registered_ops(enum damon_ops_id id)
51{
52 bool registered;
53
54 if (id >= NR_DAMON_OPS)
55 return false;
56 mutex_lock(&damon_ops_lock);
57 registered = __damon_is_registered_ops(id);
58 mutex_unlock(&damon_ops_lock);
59 return registered;
60}
61
9f7b053a
SP
62/**
63 * damon_register_ops() - Register a monitoring operations set to DAMON.
64 * @ops: monitoring operations set to register.
65 *
66 * This function registers a monitoring operations set of valid &struct
67 * damon_operations->id so that others can find and use them later.
68 *
69 * Return: 0 on success, negative error code otherwise.
70 */
71int damon_register_ops(struct damon_operations *ops)
72{
73 int err = 0;
74
75 if (ops->id >= NR_DAMON_OPS)
76 return -EINVAL;
77 mutex_lock(&damon_ops_lock);
78 /* Fail for already registered ops */
152e5617 79 if (__damon_is_registered_ops(ops->id)) {
9f7b053a
SP
80 err = -EINVAL;
81 goto out;
82 }
83 damon_registered_ops[ops->id] = *ops;
84out:
85 mutex_unlock(&damon_ops_lock);
86 return err;
87}
88
89/**
90 * damon_select_ops() - Select a monitoring operations to use with the context.
91 * @ctx: monitoring context to use the operations.
92 * @id: id of the registered monitoring operations to select.
93 *
94 * This function finds registered monitoring operations set of @id and make
95 * @ctx to use it.
96 *
97 * Return: 0 on success, negative error code otherwise.
98 */
99int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
100{
101 int err = 0;
102
103 if (id >= NR_DAMON_OPS)
104 return -EINVAL;
105
106 mutex_lock(&damon_ops_lock);
152e5617 107 if (!__damon_is_registered_ops(id))
9f7b053a
SP
108 err = -EINVAL;
109 else
110 ctx->ops = damon_registered_ops[id];
111 mutex_unlock(&damon_ops_lock);
112 return err;
113}
114
f23b8eee
SP
115/*
116 * Construct a damon_region struct
117 *
118 * Returns the pointer to the new struct if success, or NULL otherwise
119 */
120struct damon_region *damon_new_region(unsigned long start, unsigned long end)
121{
122 struct damon_region *region;
123
a1870944 124 region = kmem_cache_alloc(damon_region_cache, GFP_KERNEL);
f23b8eee
SP
125 if (!region)
126 return NULL;
127
128 region->ar.start = start;
129 region->ar.end = end;
130 region->nr_accesses = 0;
131 INIT_LIST_HEAD(&region->list);
132
fda504fa
SP
133 region->age = 0;
134 region->last_nr_accesses = 0;
135
f23b8eee
SP
136 return region;
137}
138
f23b8eee
SP
139void damon_add_region(struct damon_region *r, struct damon_target *t)
140{
141 list_add_tail(&r->list, &t->regions_list);
b9a6ac4e 142 t->nr_regions++;
f23b8eee
SP
143}
144
b9a6ac4e 145static void damon_del_region(struct damon_region *r, struct damon_target *t)
f23b8eee
SP
146{
147 list_del(&r->list);
b9a6ac4e 148 t->nr_regions--;
f23b8eee
SP
149}
150
151static void damon_free_region(struct damon_region *r)
152{
a1870944 153 kmem_cache_free(damon_region_cache, r);
f23b8eee
SP
154}
155
b9a6ac4e 156void damon_destroy_region(struct damon_region *r, struct damon_target *t)
f23b8eee 157{
b9a6ac4e 158 damon_del_region(r, t);
f23b8eee
SP
159 damon_free_region(r);
160}
161
d0723bc0
SP
162/*
163 * Check whether a region is intersecting an address range
164 *
165 * Returns true if it is.
166 */
167static bool damon_intersect(struct damon_region *r,
168 struct damon_addr_range *re)
169{
170 return !(r->ar.end <= re->start || re->end <= r->ar.start);
171}
172
9c950c22
SP
173/*
174 * Fill holes in regions with new regions.
175 */
176static int damon_fill_regions_holes(struct damon_region *first,
177 struct damon_region *last, struct damon_target *t)
178{
179 struct damon_region *r = first;
180
181 damon_for_each_region_from(r, t) {
182 struct damon_region *next, *newr;
183
184 if (r == last)
185 break;
186 next = damon_next_region(r);
187 if (r->ar.end != next->ar.start) {
188 newr = damon_new_region(r->ar.end, next->ar.start);
189 if (!newr)
190 return -ENOMEM;
191 damon_insert_region(newr, r, next, t);
192 }
193 }
194 return 0;
195}
196
d0723bc0
SP
197/*
198 * damon_set_regions() - Set regions of a target for given address ranges.
199 * @t: the given target.
200 * @ranges: array of new monitoring target ranges.
201 * @nr_ranges: length of @ranges.
202 *
203 * This function adds new regions to, or modify existing regions of a
204 * monitoring target to fit in specific ranges.
205 *
206 * Return: 0 if success, or negative error code otherwise.
207 */
208int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
209 unsigned int nr_ranges)
210{
211 struct damon_region *r, *next;
212 unsigned int i;
9c950c22 213 int err;
d0723bc0
SP
214
215 /* Remove regions which are not in the new ranges */
216 damon_for_each_region_safe(r, next, t) {
217 for (i = 0; i < nr_ranges; i++) {
218 if (damon_intersect(r, &ranges[i]))
219 break;
220 }
221 if (i == nr_ranges)
222 damon_destroy_region(r, t);
223 }
224
36001cba 225 r = damon_first_region(t);
d0723bc0
SP
226 /* Add new regions or resize existing regions to fit in the ranges */
227 for (i = 0; i < nr_ranges; i++) {
228 struct damon_region *first = NULL, *last, *newr;
229 struct damon_addr_range *range;
230
231 range = &ranges[i];
232 /* Get the first/last regions intersecting with the range */
36001cba 233 damon_for_each_region_from(r, t) {
d0723bc0
SP
234 if (damon_intersect(r, range)) {
235 if (!first)
236 first = r;
237 last = r;
238 }
239 if (r->ar.start >= range->end)
240 break;
241 }
242 if (!first) {
243 /* no region intersects with this range */
244 newr = damon_new_region(
245 ALIGN_DOWN(range->start,
246 DAMON_MIN_REGION),
247 ALIGN(range->end, DAMON_MIN_REGION));
248 if (!newr)
249 return -ENOMEM;
250 damon_insert_region(newr, damon_prev_region(r), r, t);
251 } else {
252 /* resize intersecting regions to fit in this range */
253 first->ar.start = ALIGN_DOWN(range->start,
254 DAMON_MIN_REGION);
255 last->ar.end = ALIGN(range->end, DAMON_MIN_REGION);
9c950c22
SP
256
257 /* fill possible holes in the range */
258 err = damon_fill_regions_holes(first, last, t);
259 if (err)
260 return err;
d0723bc0
SP
261 }
262 }
263 return 0;
264}
265
f5a79d7c
YD
266struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
267 enum damos_action action, struct damos_quota *quota,
268 struct damos_watermarks *wmarks)
1f366e42
SP
269{
270 struct damos *scheme;
271
272 scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
273 if (!scheme)
274 return NULL;
f5a79d7c
YD
275 scheme->pattern.min_sz_region = pattern->min_sz_region;
276 scheme->pattern.max_sz_region = pattern->max_sz_region;
277 scheme->pattern.min_nr_accesses = pattern->min_nr_accesses;
278 scheme->pattern.max_nr_accesses = pattern->max_nr_accesses;
279 scheme->pattern.min_age_region = pattern->min_age_region;
280 scheme->pattern.max_age_region = pattern->max_age_region;
1f366e42 281 scheme->action = action;
0e92c2ee 282 scheme->stat = (struct damos_stat){};
1f366e42
SP
283 INIT_LIST_HEAD(&scheme->list);
284
1cd24303 285 scheme->quota.ms = quota->ms;
2b8a248d
SP
286 scheme->quota.sz = quota->sz;
287 scheme->quota.reset_interval = quota->reset_interval;
38683e00
SP
288 scheme->quota.weight_sz = quota->weight_sz;
289 scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
290 scheme->quota.weight_age = quota->weight_age;
1cd24303
SP
291 scheme->quota.total_charged_sz = 0;
292 scheme->quota.total_charged_ns = 0;
293 scheme->quota.esz = 0;
2b8a248d
SP
294 scheme->quota.charged_sz = 0;
295 scheme->quota.charged_from = 0;
50585192
SP
296 scheme->quota.charge_target_from = NULL;
297 scheme->quota.charge_addr_from = 0;
2b8a248d 298
ee801b7d
SP
299 scheme->wmarks.metric = wmarks->metric;
300 scheme->wmarks.interval = wmarks->interval;
301 scheme->wmarks.high = wmarks->high;
302 scheme->wmarks.mid = wmarks->mid;
303 scheme->wmarks.low = wmarks->low;
304 scheme->wmarks.activated = true;
305
1f366e42
SP
306 return scheme;
307}
308
309void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
310{
311 list_add_tail(&s->list, &ctx->schemes);
312}
313
314static void damon_del_scheme(struct damos *s)
315{
316 list_del(&s->list);
317}
318
319static void damon_free_scheme(struct damos *s)
320{
321 kfree(s);
322}
323
324void damon_destroy_scheme(struct damos *s)
325{
326 damon_del_scheme(s);
327 damon_free_scheme(s);
328}
329
f23b8eee
SP
330/*
331 * Construct a damon_target struct
332 *
333 * Returns the pointer to the new struct if success, or NULL otherwise
334 */
1971bd63 335struct damon_target *damon_new_target(void)
f23b8eee
SP
336{
337 struct damon_target *t;
338
339 t = kmalloc(sizeof(*t), GFP_KERNEL);
340 if (!t)
341 return NULL;
342
1971bd63 343 t->pid = NULL;
b9a6ac4e 344 t->nr_regions = 0;
f23b8eee
SP
345 INIT_LIST_HEAD(&t->regions_list);
346
347 return t;
348}
349
350void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
351{
b9a6ac4e 352 list_add_tail(&t->list, &ctx->adaptive_targets);
f23b8eee
SP
353}
354
b5ca3e83
XH
355bool damon_targets_empty(struct damon_ctx *ctx)
356{
357 return list_empty(&ctx->adaptive_targets);
358}
359
f23b8eee
SP
360static void damon_del_target(struct damon_target *t)
361{
362 list_del(&t->list);
363}
364
365void damon_free_target(struct damon_target *t)
366{
367 struct damon_region *r, *next;
368
369 damon_for_each_region_safe(r, next, t)
370 damon_free_region(r);
371 kfree(t);
372}
373
374void damon_destroy_target(struct damon_target *t)
375{
376 damon_del_target(t);
377 damon_free_target(t);
378}
379
b9a6ac4e
SP
380unsigned int damon_nr_regions(struct damon_target *t)
381{
382 return t->nr_regions;
383}
384
2224d848
SP
385struct damon_ctx *damon_new_ctx(void)
386{
387 struct damon_ctx *ctx;
388
389 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
390 if (!ctx)
391 return NULL;
392
393 ctx->sample_interval = 5 * 1000;
394 ctx->aggr_interval = 100 * 1000;
f7d911c3 395 ctx->ops_update_interval = 60 * 1000 * 1000;
2224d848
SP
396
397 ktime_get_coarse_ts64(&ctx->last_aggregation);
f7d911c3 398 ctx->last_ops_update = ctx->last_aggregation;
2224d848
SP
399
400 mutex_init(&ctx->kdamond_lock);
401
b9a6ac4e
SP
402 ctx->min_nr_regions = 10;
403 ctx->max_nr_regions = 1000;
404
405 INIT_LIST_HEAD(&ctx->adaptive_targets);
1f366e42 406 INIT_LIST_HEAD(&ctx->schemes);
2224d848
SP
407
408 return ctx;
409}
410
f23b8eee 411static void damon_destroy_targets(struct damon_ctx *ctx)
2224d848 412{
f23b8eee
SP
413 struct damon_target *t, *next_t;
414
f7d911c3
SP
415 if (ctx->ops.cleanup) {
416 ctx->ops.cleanup(ctx);
f23b8eee
SP
417 return;
418 }
419
420 damon_for_each_target_safe(t, next_t, ctx)
421 damon_destroy_target(t);
422}
423
424void damon_destroy_ctx(struct damon_ctx *ctx)
425{
1f366e42
SP
426 struct damos *s, *next_s;
427
f23b8eee 428 damon_destroy_targets(ctx);
1f366e42
SP
429
430 damon_for_each_scheme_safe(s, next_s, ctx)
431 damon_destroy_scheme(s);
432
2224d848
SP
433 kfree(ctx);
434}
435
436/**
437 * damon_set_attrs() - Set attributes for the monitoring.
438 * @ctx: monitoring context
439 * @sample_int: time interval between samplings
440 * @aggr_int: time interval between aggregations
f7d911c3 441 * @ops_upd_int: time interval between monitoring operations updates
b9a6ac4e
SP
442 * @min_nr_reg: minimal number of regions
443 * @max_nr_reg: maximum number of regions
2224d848
SP
444 *
445 * This function should not be called while the kdamond is running.
446 * Every time interval is in micro-seconds.
447 *
448 * Return: 0 on success, negative error code otherwise.
449 */
450int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
f7d911c3 451 unsigned long aggr_int, unsigned long ops_upd_int,
b9a6ac4e 452 unsigned long min_nr_reg, unsigned long max_nr_reg)
2224d848 453{
1afaf5cb 454 if (min_nr_reg < 3)
b9a6ac4e 455 return -EINVAL;
1afaf5cb 456 if (min_nr_reg > max_nr_reg)
b9a6ac4e 457 return -EINVAL;
b9a6ac4e 458
2224d848
SP
459 ctx->sample_interval = sample_int;
460 ctx->aggr_interval = aggr_int;
f7d911c3 461 ctx->ops_update_interval = ops_upd_int;
b9a6ac4e
SP
462 ctx->min_nr_regions = min_nr_reg;
463 ctx->max_nr_regions = max_nr_reg;
2224d848
SP
464
465 return 0;
466}
467
1f366e42
SP
468/**
469 * damon_set_schemes() - Set data access monitoring based operation schemes.
470 * @ctx: monitoring context
471 * @schemes: array of the schemes
472 * @nr_schemes: number of entries in @schemes
473 *
474 * This function should not be called while the kdamond of the context is
475 * running.
476 *
477 * Return: 0 if success, or negative error code otherwise.
478 */
479int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
480 ssize_t nr_schemes)
481{
482 struct damos *s, *next;
483 ssize_t i;
484
485 damon_for_each_scheme_safe(s, next, ctx)
486 damon_destroy_scheme(s);
487 for (i = 0; i < nr_schemes; i++)
488 damon_add_scheme(ctx, schemes[i]);
489 return 0;
490}
491
4bc05954
SP
492/**
493 * damon_nr_running_ctxs() - Return number of currently running contexts.
494 */
495int damon_nr_running_ctxs(void)
496{
497 int nr_ctxs;
498
499 mutex_lock(&damon_lock);
500 nr_ctxs = nr_running_ctxs;
501 mutex_unlock(&damon_lock);
502
503 return nr_ctxs;
504}
505
b9a6ac4e
SP
506/* Returns the size upper limit for each monitoring region */
507static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
508{
509 struct damon_target *t;
510 struct damon_region *r;
511 unsigned long sz = 0;
512
513 damon_for_each_target(t, ctx) {
514 damon_for_each_region(r, t)
515 sz += r->ar.end - r->ar.start;
516 }
517
518 if (ctx->min_nr_regions)
519 sz /= ctx->min_nr_regions;
520 if (sz < DAMON_MIN_REGION)
521 sz = DAMON_MIN_REGION;
522
523 return sz;
524}
525
2224d848
SP
526static int kdamond_fn(void *data);
527
528/*
529 * __damon_start() - Starts monitoring with given context.
530 * @ctx: monitoring context
531 *
532 * This function should be called while damon_lock is hold.
533 *
534 * Return: 0 on success, negative error code otherwise.
535 */
536static int __damon_start(struct damon_ctx *ctx)
537{
538 int err = -EBUSY;
539
540 mutex_lock(&ctx->kdamond_lock);
541 if (!ctx->kdamond) {
542 err = 0;
2224d848
SP
543 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
544 nr_running_ctxs);
545 if (IS_ERR(ctx->kdamond)) {
546 err = PTR_ERR(ctx->kdamond);
7ec1992b 547 ctx->kdamond = NULL;
2224d848
SP
548 }
549 }
550 mutex_unlock(&ctx->kdamond_lock);
551
552 return err;
553}
554
555/**
556 * damon_start() - Starts the monitorings for a given group of contexts.
557 * @ctxs: an array of the pointers for contexts to start monitoring
558 * @nr_ctxs: size of @ctxs
8b9b0d33 559 * @exclusive: exclusiveness of this contexts group
2224d848
SP
560 *
561 * This function starts a group of monitoring threads for a group of monitoring
562 * contexts. One thread per each context is created and run in parallel. The
8b9b0d33
SP
563 * caller should handle synchronization between the threads by itself. If
564 * @exclusive is true and a group of threads that created by other
565 * 'damon_start()' call is currently running, this function does nothing but
566 * returns -EBUSY.
2224d848
SP
567 *
568 * Return: 0 on success, negative error code otherwise.
569 */
8b9b0d33 570int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
2224d848
SP
571{
572 int i;
573 int err = 0;
574
575 mutex_lock(&damon_lock);
8b9b0d33
SP
576 if ((exclusive && nr_running_ctxs) ||
577 (!exclusive && running_exclusive_ctxs)) {
2224d848
SP
578 mutex_unlock(&damon_lock);
579 return -EBUSY;
580 }
581
582 for (i = 0; i < nr_ctxs; i++) {
583 err = __damon_start(ctxs[i]);
584 if (err)
585 break;
586 nr_running_ctxs++;
587 }
8b9b0d33
SP
588 if (exclusive && nr_running_ctxs)
589 running_exclusive_ctxs = true;
2224d848
SP
590 mutex_unlock(&damon_lock);
591
592 return err;
593}
594
595/*
8b9b0d33 596 * __damon_stop() - Stops monitoring of a given context.
2224d848
SP
597 * @ctx: monitoring context
598 *
599 * Return: 0 on success, negative error code otherwise.
600 */
601static int __damon_stop(struct damon_ctx *ctx)
602{
0f91d133
CD
603 struct task_struct *tsk;
604
2224d848 605 mutex_lock(&ctx->kdamond_lock);
0f91d133
CD
606 tsk = ctx->kdamond;
607 if (tsk) {
608 get_task_struct(tsk);
2224d848 609 mutex_unlock(&ctx->kdamond_lock);
0f91d133
CD
610 kthread_stop(tsk);
611 put_task_struct(tsk);
2224d848
SP
612 return 0;
613 }
614 mutex_unlock(&ctx->kdamond_lock);
615
616 return -EPERM;
617}
618
619/**
620 * damon_stop() - Stops the monitorings for a given group of contexts.
621 * @ctxs: an array of the pointers for contexts to stop monitoring
622 * @nr_ctxs: size of @ctxs
623 *
624 * Return: 0 on success, negative error code otherwise.
625 */
626int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
627{
628 int i, err = 0;
629
630 for (i = 0; i < nr_ctxs; i++) {
631 /* nr_running_ctxs is decremented in kdamond_fn */
632 err = __damon_stop(ctxs[i]);
633 if (err)
8b9b0d33 634 break;
2224d848 635 }
2224d848
SP
636 return err;
637}
638
639/*
640 * damon_check_reset_time_interval() - Check if a time interval is elapsed.
641 * @baseline: the time to check whether the interval has elapsed since
642 * @interval: the time interval (microseconds)
643 *
644 * See whether the given time interval has passed since the given baseline
645 * time. If so, it also updates the baseline to current time for next check.
646 *
647 * Return: true if the time interval has passed, or false otherwise.
648 */
649static bool damon_check_reset_time_interval(struct timespec64 *baseline,
650 unsigned long interval)
651{
652 struct timespec64 now;
653
654 ktime_get_coarse_ts64(&now);
655 if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
656 interval * 1000)
657 return false;
658 *baseline = now;
659 return true;
660}
661
662/*
663 * Check whether it is time to flush the aggregated information
664 */
665static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
666{
667 return damon_check_reset_time_interval(&ctx->last_aggregation,
668 ctx->aggr_interval);
669}
670
f23b8eee
SP
671/*
672 * Reset the aggregated monitoring results ('nr_accesses' of each region).
673 */
674static void kdamond_reset_aggregated(struct damon_ctx *c)
675{
676 struct damon_target *t;
76fd0285 677 unsigned int ti = 0; /* target's index */
f23b8eee
SP
678
679 damon_for_each_target(t, c) {
680 struct damon_region *r;
681
2fcb9362 682 damon_for_each_region(r, t) {
76fd0285 683 trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
fda504fa 684 r->last_nr_accesses = r->nr_accesses;
f23b8eee 685 r->nr_accesses = 0;
2fcb9362 686 }
76fd0285 687 ti++;
f23b8eee
SP
688 }
689}
690
4ed98243
KX
691static void damon_split_region_at(struct damon_target *t,
692 struct damon_region *r, unsigned long sz_r);
2b8a248d 693
38683e00
SP
694static bool __damos_valid_target(struct damon_region *r, struct damos *s)
695{
696 unsigned long sz;
697
698 sz = r->ar.end - r->ar.start;
f5a79d7c
YD
699 return s->pattern.min_sz_region <= sz &&
700 sz <= s->pattern.max_sz_region &&
701 s->pattern.min_nr_accesses <= r->nr_accesses &&
702 r->nr_accesses <= s->pattern.max_nr_accesses &&
703 s->pattern.min_age_region <= r->age &&
704 r->age <= s->pattern.max_age_region;
38683e00
SP
705}
706
707static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
708 struct damon_region *r, struct damos *s)
709{
710 bool ret = __damos_valid_target(r, s);
711
f7d911c3 712 if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
38683e00
SP
713 return ret;
714
f7d911c3 715 return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
38683e00
SP
716}
717
1f366e42
SP
718static void damon_do_apply_schemes(struct damon_ctx *c,
719 struct damon_target *t,
720 struct damon_region *r)
721{
722 struct damos *s;
1f366e42
SP
723
724 damon_for_each_scheme(s, c) {
2b8a248d
SP
725 struct damos_quota *quota = &s->quota;
726 unsigned long sz = r->ar.end - r->ar.start;
1cd24303 727 struct timespec64 begin, end;
0e92c2ee 728 unsigned long sz_applied = 0;
2b8a248d 729
ee801b7d
SP
730 if (!s->wmarks.activated)
731 continue;
732
2b8a248d 733 /* Check the quota */
1cd24303 734 if (quota->esz && quota->charged_sz >= quota->esz)
2b8a248d
SP
735 continue;
736
50585192
SP
737 /* Skip previously charged regions */
738 if (quota->charge_target_from) {
739 if (t != quota->charge_target_from)
740 continue;
741 if (r == damon_last_region(t)) {
742 quota->charge_target_from = NULL;
743 quota->charge_addr_from = 0;
744 continue;
745 }
746 if (quota->charge_addr_from &&
747 r->ar.end <= quota->charge_addr_from)
748 continue;
749
750 if (quota->charge_addr_from && r->ar.start <
751 quota->charge_addr_from) {
752 sz = ALIGN_DOWN(quota->charge_addr_from -
753 r->ar.start, DAMON_MIN_REGION);
754 if (!sz) {
755 if (r->ar.end - r->ar.start <=
756 DAMON_MIN_REGION)
757 continue;
758 sz = DAMON_MIN_REGION;
759 }
4ed98243 760 damon_split_region_at(t, r, sz);
50585192
SP
761 r = damon_next_region(r);
762 sz = r->ar.end - r->ar.start;
763 }
764 quota->charge_target_from = NULL;
765 quota->charge_addr_from = 0;
766 }
767
38683e00 768 if (!damos_valid_target(c, t, r, s))
1f366e42 769 continue;
2b8a248d
SP
770
771 /* Apply the scheme */
f7d911c3 772 if (c->ops.apply_scheme) {
1cd24303
SP
773 if (quota->esz &&
774 quota->charged_sz + sz > quota->esz) {
775 sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
2b8a248d
SP
776 DAMON_MIN_REGION);
777 if (!sz)
778 goto update_stat;
4ed98243 779 damon_split_region_at(t, r, sz);
2b8a248d 780 }
1cd24303 781 ktime_get_coarse_ts64(&begin);
f7d911c3 782 sz_applied = c->ops.apply_scheme(c, t, r, s);
1cd24303
SP
783 ktime_get_coarse_ts64(&end);
784 quota->total_charged_ns += timespec64_to_ns(&end) -
785 timespec64_to_ns(&begin);
2b8a248d 786 quota->charged_sz += sz;
1cd24303 787 if (quota->esz && quota->charged_sz >= quota->esz) {
50585192
SP
788 quota->charge_target_from = t;
789 quota->charge_addr_from = r->ar.end + 1;
790 }
2b8a248d 791 }
2f0b548c
SP
792 if (s->action != DAMOS_STAT)
793 r->age = 0;
2b8a248d
SP
794
795update_stat:
0e92c2ee
SP
796 s->stat.nr_tried++;
797 s->stat.sz_tried += sz;
798 if (sz_applied)
799 s->stat.nr_applied++;
800 s->stat.sz_applied += sz_applied;
1f366e42
SP
801 }
802}
803
1cd24303
SP
804/* Shouldn't be called if quota->ms and quota->sz are zero */
805static void damos_set_effective_quota(struct damos_quota *quota)
806{
807 unsigned long throughput;
808 unsigned long esz;
809
810 if (!quota->ms) {
811 quota->esz = quota->sz;
812 return;
813 }
814
815 if (quota->total_charged_ns)
816 throughput = quota->total_charged_sz * 1000000 /
817 quota->total_charged_ns;
818 else
819 throughput = PAGE_SIZE * 1024;
820 esz = throughput * quota->ms;
821
822 if (quota->sz && quota->sz < esz)
823 esz = quota->sz;
824 quota->esz = esz;
825}
826
1f366e42
SP
827static void kdamond_apply_schemes(struct damon_ctx *c)
828{
829 struct damon_target *t;
2b8a248d
SP
830 struct damon_region *r, *next_r;
831 struct damos *s;
832
833 damon_for_each_scheme(s, c) {
834 struct damos_quota *quota = &s->quota;
38683e00
SP
835 unsigned long cumulated_sz;
836 unsigned int score, max_score = 0;
2b8a248d 837
ee801b7d
SP
838 if (!s->wmarks.activated)
839 continue;
840
1cd24303 841 if (!quota->ms && !quota->sz)
2b8a248d
SP
842 continue;
843
844 /* New charge window starts */
845 if (time_after_eq(jiffies, quota->charged_from +
846 msecs_to_jiffies(
847 quota->reset_interval))) {
6268eac3
SP
848 if (quota->esz && quota->charged_sz >= quota->esz)
849 s->stat.qt_exceeds++;
1cd24303 850 quota->total_charged_sz += quota->charged_sz;
2b8a248d
SP
851 quota->charged_from = jiffies;
852 quota->charged_sz = 0;
1cd24303 853 damos_set_effective_quota(quota);
2b8a248d 854 }
38683e00 855
f7d911c3 856 if (!c->ops.get_scheme_score)
38683e00
SP
857 continue;
858
859 /* Fill up the score histogram */
860 memset(quota->histogram, 0, sizeof(quota->histogram));
861 damon_for_each_target(t, c) {
862 damon_for_each_region(r, t) {
863 if (!__damos_valid_target(r, s))
864 continue;
f7d911c3 865 score = c->ops.get_scheme_score(
38683e00
SP
866 c, t, r, s);
867 quota->histogram[score] +=
868 r->ar.end - r->ar.start;
869 if (score > max_score)
870 max_score = score;
871 }
872 }
873
874 /* Set the min score limit */
875 for (cumulated_sz = 0, score = max_score; ; score--) {
876 cumulated_sz += quota->histogram[score];
877 if (cumulated_sz >= quota->esz || !score)
878 break;
879 }
880 quota->min_score = score;
2b8a248d 881 }
1f366e42
SP
882
883 damon_for_each_target(t, c) {
2b8a248d 884 damon_for_each_region_safe(r, next_r, t)
1f366e42
SP
885 damon_do_apply_schemes(c, t, r);
886 }
887}
888
88f86dcf
SP
889static inline unsigned long sz_damon_region(struct damon_region *r)
890{
891 return r->ar.end - r->ar.start;
892}
b9a6ac4e
SP
893
894/*
895 * Merge two adjacent regions into one region
896 */
897static void damon_merge_two_regions(struct damon_target *t,
898 struct damon_region *l, struct damon_region *r)
899{
900 unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
901
902 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
903 (sz_l + sz_r);
fda504fa 904 l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
b9a6ac4e
SP
905 l->ar.end = r->ar.end;
906 damon_destroy_region(r, t);
907}
908
b9a6ac4e
SP
909/*
910 * Merge adjacent regions having similar access frequencies
911 *
912 * t target affected by this merge operation
913 * thres '->nr_accesses' diff threshold for the merge
914 * sz_limit size upper limit of each region
915 */
916static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
917 unsigned long sz_limit)
918{
919 struct damon_region *r, *prev = NULL, *next;
920
921 damon_for_each_region_safe(r, next, t) {
d720bbbd 922 if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
fda504fa
SP
923 r->age = 0;
924 else
925 r->age++;
926
b9a6ac4e 927 if (prev && prev->ar.end == r->ar.start &&
d720bbbd 928 abs(prev->nr_accesses - r->nr_accesses) <= thres &&
b9a6ac4e
SP
929 sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
930 damon_merge_two_regions(t, prev, r);
931 else
932 prev = r;
933 }
934}
935
936/*
937 * Merge adjacent regions having similar access frequencies
938 *
939 * threshold '->nr_accesses' diff threshold for the merge
940 * sz_limit size upper limit of each region
941 *
942 * This function merges monitoring target regions which are adjacent and their
943 * access frequencies are similar. This is for minimizing the monitoring
944 * overhead under the dynamically changeable access pattern. If a merge was
945 * unnecessarily made, later 'kdamond_split_regions()' will revert it.
946 */
947static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
948 unsigned long sz_limit)
949{
950 struct damon_target *t;
951
952 damon_for_each_target(t, c)
953 damon_merge_regions_of(t, threshold, sz_limit);
954}
955
956/*
957 * Split a region in two
958 *
959 * r the region to be split
960 * sz_r size of the first sub-region that will be made
961 */
4ed98243
KX
962static void damon_split_region_at(struct damon_target *t,
963 struct damon_region *r, unsigned long sz_r)
b9a6ac4e
SP
964{
965 struct damon_region *new;
966
967 new = damon_new_region(r->ar.start + sz_r, r->ar.end);
968 if (!new)
969 return;
970
971 r->ar.end = new->ar.start;
972
fda504fa
SP
973 new->age = r->age;
974 new->last_nr_accesses = r->last_nr_accesses;
975
b9a6ac4e
SP
976 damon_insert_region(new, r, damon_next_region(r), t);
977}
978
979/* Split every region in the given target into 'nr_subs' regions */
4ed98243 980static void damon_split_regions_of(struct damon_target *t, int nr_subs)
b9a6ac4e
SP
981{
982 struct damon_region *r, *next;
983 unsigned long sz_region, sz_sub = 0;
984 int i;
985
986 damon_for_each_region_safe(r, next, t) {
987 sz_region = r->ar.end - r->ar.start;
988
989 for (i = 0; i < nr_subs - 1 &&
990 sz_region > 2 * DAMON_MIN_REGION; i++) {
991 /*
992 * Randomly select size of left sub-region to be at
993 * least 10 percent and at most 90% of original region
994 */
995 sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
996 sz_region / 10, DAMON_MIN_REGION);
997 /* Do not allow blank region */
998 if (sz_sub == 0 || sz_sub >= sz_region)
999 continue;
1000
4ed98243 1001 damon_split_region_at(t, r, sz_sub);
b9a6ac4e
SP
1002 sz_region = sz_sub;
1003 }
1004 }
1005}
1006
1007/*
1008 * Split every target region into randomly-sized small regions
1009 *
1010 * This function splits every target region into random-sized small regions if
1011 * current total number of the regions is equal or smaller than half of the
1012 * user-specified maximum number of regions. This is for maximizing the
1013 * monitoring accuracy under the dynamically changeable access patterns. If a
1014 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
1015 * it.
1016 */
1017static void kdamond_split_regions(struct damon_ctx *ctx)
1018{
1019 struct damon_target *t;
1020 unsigned int nr_regions = 0;
1021 static unsigned int last_nr_regions;
1022 int nr_subregions = 2;
1023
1024 damon_for_each_target(t, ctx)
1025 nr_regions += damon_nr_regions(t);
1026
1027 if (nr_regions > ctx->max_nr_regions / 2)
1028 return;
1029
1030 /* Maybe the middle of the region has different access frequency */
1031 if (last_nr_regions == nr_regions &&
1032 nr_regions < ctx->max_nr_regions / 3)
1033 nr_subregions = 3;
1034
1035 damon_for_each_target(t, ctx)
4ed98243 1036 damon_split_regions_of(t, nr_subregions);
b9a6ac4e
SP
1037
1038 last_nr_regions = nr_regions;
1039}
1040
2224d848 1041/*
f7d911c3
SP
1042 * Check whether it is time to check and apply the operations-related data
1043 * structures.
2224d848
SP
1044 *
1045 * Returns true if it is.
1046 */
f7d911c3 1047static bool kdamond_need_update_operations(struct damon_ctx *ctx)
2224d848 1048{
f7d911c3
SP
1049 return damon_check_reset_time_interval(&ctx->last_ops_update,
1050 ctx->ops_update_interval);
2224d848
SP
1051}
1052
1053/*
1054 * Check whether current monitoring should be stopped
1055 *
1056 * The monitoring is stopped when either the user requested to stop, or all
1057 * monitoring targets are invalid.
1058 *
1059 * Returns true if need to stop current monitoring.
1060 */
1061static bool kdamond_need_stop(struct damon_ctx *ctx)
1062{
f23b8eee 1063 struct damon_target *t;
2224d848 1064
0f91d133 1065 if (kthread_should_stop())
2224d848
SP
1066 return true;
1067
f7d911c3 1068 if (!ctx->ops.target_valid)
2224d848
SP
1069 return false;
1070
f23b8eee 1071 damon_for_each_target(t, ctx) {
f7d911c3 1072 if (ctx->ops.target_valid(t))
f23b8eee
SP
1073 return false;
1074 }
1075
1076 return true;
2224d848
SP
1077}
1078
ee801b7d
SP
1079static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
1080{
1081 struct sysinfo i;
1082
1083 switch (metric) {
1084 case DAMOS_WMARK_FREE_MEM_RATE:
1085 si_meminfo(&i);
1086 return i.freeram * 1000 / i.totalram;
1087 default:
1088 break;
1089 }
1090 return -EINVAL;
1091}
1092
1093/*
1094 * Returns zero if the scheme is active. Else, returns time to wait for next
1095 * watermark check in micro-seconds.
1096 */
1097static unsigned long damos_wmark_wait_us(struct damos *scheme)
1098{
1099 unsigned long metric;
1100
1101 if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
1102 return 0;
1103
1104 metric = damos_wmark_metric_value(scheme->wmarks.metric);
1105 /* higher than high watermark or lower than low watermark */
1106 if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
1107 if (scheme->wmarks.activated)
01078655 1108 pr_debug("deactivate a scheme (%d) for %s wmark\n",
ee801b7d
SP
1109 scheme->action,
1110 metric > scheme->wmarks.high ?
1111 "high" : "low");
1112 scheme->wmarks.activated = false;
1113 return scheme->wmarks.interval;
1114 }
1115
1116 /* inactive and higher than middle watermark */
1117 if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
1118 !scheme->wmarks.activated)
1119 return scheme->wmarks.interval;
1120
1121 if (!scheme->wmarks.activated)
1122 pr_debug("activate a scheme (%d)\n", scheme->action);
1123 scheme->wmarks.activated = true;
1124 return 0;
1125}
1126
1127static void kdamond_usleep(unsigned long usecs)
1128{
4de46a30
SP
1129 /* See Documentation/timers/timers-howto.rst for the thresholds */
1130 if (usecs > 20 * USEC_PER_MSEC)
70e92748 1131 schedule_timeout_idle(usecs_to_jiffies(usecs));
ee801b7d 1132 else
70e92748 1133 usleep_idle_range(usecs, usecs + 1);
ee801b7d
SP
1134}
1135
1136/* Returns negative error code if it's not activated but should return */
1137static int kdamond_wait_activation(struct damon_ctx *ctx)
1138{
1139 struct damos *s;
1140 unsigned long wait_time;
1141 unsigned long min_wait_time = 0;
78049e94 1142 bool init_wait_time = false;
ee801b7d
SP
1143
1144 while (!kdamond_need_stop(ctx)) {
1145 damon_for_each_scheme(s, ctx) {
1146 wait_time = damos_wmark_wait_us(s);
78049e94
JK
1147 if (!init_wait_time || wait_time < min_wait_time) {
1148 init_wait_time = true;
ee801b7d 1149 min_wait_time = wait_time;
78049e94 1150 }
ee801b7d
SP
1151 }
1152 if (!min_wait_time)
1153 return 0;
1154
1155 kdamond_usleep(min_wait_time);
6e74d2bf
SP
1156
1157 if (ctx->callback.after_wmarks_check &&
1158 ctx->callback.after_wmarks_check(ctx))
1159 break;
ee801b7d
SP
1160 }
1161 return -EBUSY;
1162}
1163
2224d848
SP
1164/*
1165 * The monitoring daemon that runs as a kernel thread
1166 */
1167static int kdamond_fn(void *data)
1168{
cef4493f 1169 struct damon_ctx *ctx = data;
f23b8eee
SP
1170 struct damon_target *t;
1171 struct damon_region *r, *next;
b9a6ac4e
SP
1172 unsigned int max_nr_accesses = 0;
1173 unsigned long sz_limit = 0;
0f91d133 1174 bool done = false;
2224d848 1175
42e4cef5 1176 pr_debug("kdamond (%d) starts\n", current->pid);
2224d848 1177
f7d911c3
SP
1178 if (ctx->ops.init)
1179 ctx->ops.init(ctx);
2224d848 1180 if (ctx->callback.before_start && ctx->callback.before_start(ctx))
0f91d133 1181 done = true;
2224d848 1182
b9a6ac4e
SP
1183 sz_limit = damon_region_sz_limit(ctx);
1184
0f91d133 1185 while (!kdamond_need_stop(ctx) && !done) {
6e74d2bf
SP
1186 if (kdamond_wait_activation(ctx)) {
1187 done = true;
ee801b7d 1188 continue;
6e74d2bf 1189 }
ee801b7d 1190
f7d911c3
SP
1191 if (ctx->ops.prepare_access_checks)
1192 ctx->ops.prepare_access_checks(ctx);
2224d848 1193 if (ctx->callback.after_sampling &&
abacd635 1194 ctx->callback.after_sampling(ctx)) {
0f91d133 1195 done = true;
abacd635
SP
1196 continue;
1197 }
2224d848 1198
70e92748 1199 kdamond_usleep(ctx->sample_interval);
2224d848 1200
f7d911c3
SP
1201 if (ctx->ops.check_accesses)
1202 max_nr_accesses = ctx->ops.check_accesses(ctx);
2224d848
SP
1203
1204 if (kdamond_aggregate_interval_passed(ctx)) {
b9a6ac4e
SP
1205 kdamond_merge_regions(ctx,
1206 max_nr_accesses / 10,
1207 sz_limit);
2224d848 1208 if (ctx->callback.after_aggregation &&
abacd635 1209 ctx->callback.after_aggregation(ctx)) {
0f91d133 1210 done = true;
abacd635
SP
1211 continue;
1212 }
1f366e42 1213 kdamond_apply_schemes(ctx);
f23b8eee 1214 kdamond_reset_aggregated(ctx);
b9a6ac4e 1215 kdamond_split_regions(ctx);
f7d911c3
SP
1216 if (ctx->ops.reset_aggregated)
1217 ctx->ops.reset_aggregated(ctx);
2224d848
SP
1218 }
1219
f7d911c3
SP
1220 if (kdamond_need_update_operations(ctx)) {
1221 if (ctx->ops.update)
1222 ctx->ops.update(ctx);
b9a6ac4e 1223 sz_limit = damon_region_sz_limit(ctx);
2224d848
SP
1224 }
1225 }
f23b8eee
SP
1226 damon_for_each_target(t, ctx) {
1227 damon_for_each_region_safe(r, next, t)
b9a6ac4e 1228 damon_destroy_region(r, t);
f23b8eee 1229 }
2224d848 1230
0f91d133
CD
1231 if (ctx->callback.before_terminate)
1232 ctx->callback.before_terminate(ctx);
f7d911c3
SP
1233 if (ctx->ops.cleanup)
1234 ctx->ops.cleanup(ctx);
2224d848 1235
42e4cef5 1236 pr_debug("kdamond (%d) finishes\n", current->pid);
2224d848
SP
1237 mutex_lock(&ctx->kdamond_lock);
1238 ctx->kdamond = NULL;
1239 mutex_unlock(&ctx->kdamond_lock);
1240
1241 mutex_lock(&damon_lock);
1242 nr_running_ctxs--;
8b9b0d33
SP
1243 if (!nr_running_ctxs && running_exclusive_ctxs)
1244 running_exclusive_ctxs = false;
2224d848
SP
1245 mutex_unlock(&damon_lock);
1246
5f7fe2b9 1247 return 0;
2224d848 1248}
17ccae8b 1249
0d83b2d8
XH
1250/*
1251 * struct damon_system_ram_region - System RAM resource address region of
1252 * [@start, @end).
1253 * @start: Start address of the region (inclusive).
1254 * @end: End address of the region (exclusive).
1255 */
1256struct damon_system_ram_region {
1257 unsigned long start;
1258 unsigned long end;
1259};
1260
1261static int walk_system_ram(struct resource *res, void *arg)
1262{
1263 struct damon_system_ram_region *a = arg;
1264
1265 if (a->end - a->start < resource_size(res)) {
1266 a->start = res->start;
1267 a->end = res->end;
1268 }
1269 return 0;
1270}
1271
1272/*
1273 * Find biggest 'System RAM' resource and store its start and end address in
1274 * @start and @end, respectively. If no System RAM is found, returns false.
1275 */
1276bool damon_find_biggest_system_ram(unsigned long *start, unsigned long *end)
1277
1278{
1279 struct damon_system_ram_region arg = {};
1280
1281 walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
1282 if (arg.end <= arg.start)
1283 return false;
1284
1285 *start = arg.start;
1286 *end = arg.end;
1287 return true;
1288}
1289
a1870944
DL
1290static int __init damon_init(void)
1291{
1292 damon_region_cache = KMEM_CACHE(damon_region, 0);
1293 if (unlikely(!damon_region_cache)) {
1294 pr_err("creating damon_region_cache fails\n");
1295 return -ENOMEM;
1296 }
1297
1298 return 0;
1299}
1300
1301subsys_initcall(damon_init);
1302
17ccae8b 1303#include "core-test.h"