[linux-block.git] / mm / damon / core.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Data Access Monitor
 *
 * Author: SeongJae Park <sjpark@amazon.de>
 */

#define pr_fmt(fmt) "damon: " fmt

#include <linux/damon.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/random.h>
#include <linux/slab.h>

#define CREATE_TRACE_POINTS
#include <trace/events/damon.h>

#ifdef CONFIG_DAMON_KUNIT_TEST
#undef DAMON_MIN_REGION
#define DAMON_MIN_REGION 1
#endif

/* Get a random number in [l, r) */
#define damon_rand(l, r) (l + prandom_u32_max(r - l))

static DEFINE_MUTEX(damon_lock);
static int nr_running_ctxs;

/*
 * Construct a damon_region struct
 *
 * Returns the pointer to the new struct if success, or NULL otherwise
 */
struct damon_region *damon_new_region(unsigned long start, unsigned long end)
{
	struct damon_region *region;

	region = kmalloc(sizeof(*region), GFP_KERNEL);
	if (!region)
		return NULL;

	region->ar.start = start;
	region->ar.end = end;
	region->nr_accesses = 0;
	INIT_LIST_HEAD(&region->list);

	return region;
}

/*
 * Add a region between two other regions
 */
inline void damon_insert_region(struct damon_region *r,
		struct damon_region *prev, struct damon_region *next,
		struct damon_target *t)
{
	__list_add(&r->list, &prev->list, &next->list);
	t->nr_regions++;
}

void damon_add_region(struct damon_region *r, struct damon_target *t)
{
	list_add_tail(&r->list, &t->regions_list);
	t->nr_regions++;
}

static void damon_del_region(struct damon_region *r, struct damon_target *t)
{
	list_del(&r->list);
	t->nr_regions--;
}

static void damon_free_region(struct damon_region *r)
{
	kfree(r);
}

void damon_destroy_region(struct damon_region *r, struct damon_target *t)
{
	damon_del_region(r, t);
	damon_free_region(r);
}

/*
 * Construct a damon_target struct
 *
 * Returns the pointer to the new struct if success, or NULL otherwise
 */
struct damon_target *damon_new_target(unsigned long id)
{
	struct damon_target *t;

	t = kmalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		return NULL;

	t->id = id;
	t->nr_regions = 0;
	INIT_LIST_HEAD(&t->regions_list);

	return t;
}

void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
{
	list_add_tail(&t->list, &ctx->adaptive_targets);
}

static void damon_del_target(struct damon_target *t)
{
	list_del(&t->list);
}

void damon_free_target(struct damon_target *t)
{
	struct damon_region *r, *next;

	damon_for_each_region_safe(r, next, t)
		damon_free_region(r);
	kfree(t);
}

void damon_destroy_target(struct damon_target *t)
{
	damon_del_target(t);
	damon_free_target(t);
}

unsigned int damon_nr_regions(struct damon_target *t)
{
	return t->nr_regions;
}

struct damon_ctx *damon_new_ctx(void)
{
	struct damon_ctx *ctx;

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return NULL;

	ctx->sample_interval = 5 * 1000;
	ctx->aggr_interval = 100 * 1000;
	ctx->primitive_update_interval = 60 * 1000 * 1000;

	ktime_get_coarse_ts64(&ctx->last_aggregation);
	ctx->last_primitive_update = ctx->last_aggregation;

	mutex_init(&ctx->kdamond_lock);

	ctx->min_nr_regions = 10;
	ctx->max_nr_regions = 1000;

	INIT_LIST_HEAD(&ctx->adaptive_targets);

	return ctx;
}

static void damon_destroy_targets(struct damon_ctx *ctx)
{
	struct damon_target *t, *next_t;

	if (ctx->primitive.cleanup) {
		ctx->primitive.cleanup(ctx);
		return;
	}

	damon_for_each_target_safe(t, next_t, ctx)
		damon_destroy_target(t);
}

void damon_destroy_ctx(struct damon_ctx *ctx)
{
	damon_destroy_targets(ctx);
	kfree(ctx);
}

/**
 * damon_set_targets() - Set monitoring targets.
 * @ctx:	monitoring context
 * @ids:	array of target ids
 * @nr_ids:	number of entries in @ids
 *
 * This function should not be called while the kdamond is running.
 *
 * Return: 0 on success, negative error code otherwise.
 */
int damon_set_targets(struct damon_ctx *ctx,
		      unsigned long *ids, ssize_t nr_ids)
{
	ssize_t i;
	struct damon_target *t, *next;

	damon_destroy_targets(ctx);

	for (i = 0; i < nr_ids; i++) {
		t = damon_new_target(ids[i]);
		if (!t) {
			pr_err("Failed to alloc damon_target\n");
			/* The caller should do cleanup of the ids itself */
			damon_for_each_target_safe(t, next, ctx)
				damon_destroy_target(t);
			return -ENOMEM;
		}
		damon_add_target(ctx, t);
	}

	return 0;
}

/**
 * damon_set_attrs() - Set attributes for the monitoring.
 * @ctx:		monitoring context
 * @sample_int:		time interval between samplings
 * @aggr_int:		time interval between aggregations
 * @primitive_upd_int:	time interval between monitoring primitive updates
 * @min_nr_reg:		minimal number of regions
 * @max_nr_reg:		maximum number of regions
 *
 * This function should not be called while the kdamond is running.
 * Every time interval is in micro-seconds.
 *
 * Return: 0 on success, negative error code otherwise.
 */
int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
		    unsigned long aggr_int, unsigned long primitive_upd_int,
		    unsigned long min_nr_reg, unsigned long max_nr_reg)
{
	if (min_nr_reg < 3) {
		pr_err("min_nr_regions (%lu) must be at least 3\n",
				min_nr_reg);
		return -EINVAL;
	}
	if (min_nr_reg > max_nr_reg) {
		pr_err("invalid nr_regions.  min (%lu) > max (%lu)\n",
				min_nr_reg, max_nr_reg);
		return -EINVAL;
	}

	ctx->sample_interval = sample_int;
	ctx->aggr_interval = aggr_int;
	ctx->primitive_update_interval = primitive_upd_int;
	ctx->min_nr_regions = min_nr_reg;
	ctx->max_nr_regions = max_nr_reg;

	return 0;
}

/**
 * damon_nr_running_ctxs() - Return number of currently running contexts.
 */
int damon_nr_running_ctxs(void)
{
	int nr_ctxs;

	mutex_lock(&damon_lock);
	nr_ctxs = nr_running_ctxs;
	mutex_unlock(&damon_lock);

	return nr_ctxs;
}

/* Returns the size upper limit for each monitoring region */
static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
{
	struct damon_target *t;
	struct damon_region *r;
	unsigned long sz = 0;

	damon_for_each_target(t, ctx) {
		damon_for_each_region(r, t)
			sz += r->ar.end - r->ar.start;
	}

	if (ctx->min_nr_regions)
		sz /= ctx->min_nr_regions;
	if (sz < DAMON_MIN_REGION)
		sz = DAMON_MIN_REGION;

	return sz;
}

static bool damon_kdamond_running(struct damon_ctx *ctx)
{
	bool running;

	mutex_lock(&ctx->kdamond_lock);
	running = ctx->kdamond != NULL;
	mutex_unlock(&ctx->kdamond_lock);

	return running;
}

static int kdamond_fn(void *data);

/*
 * __damon_start() - Starts monitoring with given context.
 * @ctx:	monitoring context
 *
 * This function should be called while damon_lock is hold.
 *
 * Return: 0 on success, negative error code otherwise.
 */
static int __damon_start(struct damon_ctx *ctx)
{
	int err = -EBUSY;

	mutex_lock(&ctx->kdamond_lock);
	if (!ctx->kdamond) {
		err = 0;
		ctx->kdamond_stop = false;
		ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
				nr_running_ctxs);
		if (IS_ERR(ctx->kdamond)) {
			err = PTR_ERR(ctx->kdamond);
			ctx->kdamond = 0;
		}
	}
	mutex_unlock(&ctx->kdamond_lock);

	return err;
}

/**
 * damon_start() - Starts the monitorings for a given group of contexts.
 * @ctxs:	an array of the pointers for contexts to start monitoring
 * @nr_ctxs:	size of @ctxs
 *
 * This function starts a group of monitoring threads for a group of monitoring
 * contexts.  One thread per each context is created and run in parallel.  The
 * caller should handle synchronization between the threads by itself.  If a
 * group of threads that created by other 'damon_start()' call is currently
 * running, this function does nothing but returns -EBUSY.
 *
 * Return: 0 on success, negative error code otherwise.
 */
int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
{
	int i;
	int err = 0;

	mutex_lock(&damon_lock);
	if (nr_running_ctxs) {
		mutex_unlock(&damon_lock);
		return -EBUSY;
	}

	for (i = 0; i < nr_ctxs; i++) {
		err = __damon_start(ctxs[i]);
		if (err)
			break;
		nr_running_ctxs++;
	}
	mutex_unlock(&damon_lock);

	return err;
}

/*
 * __damon_stop() - Stops monitoring of given context.
 * @ctx:	monitoring context
 *
 * Return: 0 on success, negative error code otherwise.
 */
static int __damon_stop(struct damon_ctx *ctx)
{
	mutex_lock(&ctx->kdamond_lock);
	if (ctx->kdamond) {
		ctx->kdamond_stop = true;
		mutex_unlock(&ctx->kdamond_lock);
		while (damon_kdamond_running(ctx))
			usleep_range(ctx->sample_interval,
					ctx->sample_interval * 2);
		return 0;
	}
	mutex_unlock(&ctx->kdamond_lock);

	return -EPERM;
}

/**
 * damon_stop() - Stops the monitorings for a given group of contexts.
 * @ctxs:	an array of the pointers for contexts to stop monitoring
 * @nr_ctxs:	size of @ctxs
 *
 * Return: 0 on success, negative error code otherwise.
 */
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
{
	int i, err = 0;

	for (i = 0; i < nr_ctxs; i++) {
		/* nr_running_ctxs is decremented in kdamond_fn */
		err = __damon_stop(ctxs[i]);
		if (err)
			return err;
	}

	return err;
}

/*
 * damon_check_reset_time_interval() - Check if a time interval is elapsed.
 * @baseline:	the time to check whether the interval has elapsed since
 * @interval:	the time interval (microseconds)
 *
 * See whether the given time interval has passed since the given baseline
 * time.  If so, it also updates the baseline to current time for next check.
 *
 * Return:	true if the time interval has passed, or false otherwise.
 */
static bool damon_check_reset_time_interval(struct timespec64 *baseline,
		unsigned long interval)
{
	struct timespec64 now;

	ktime_get_coarse_ts64(&now);
	if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
			interval * 1000)
		return false;
	*baseline = now;
	return true;
}

/*
 * Check whether it is time to flush the aggregated information
 */
static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
{
	return damon_check_reset_time_interval(&ctx->last_aggregation,
			ctx->aggr_interval);
}

/*
 * Reset the aggregated monitoring results ('nr_accesses' of each region).
 */
static void kdamond_reset_aggregated(struct damon_ctx *c)
{
	struct damon_target *t;

	damon_for_each_target(t, c) {
		struct damon_region *r;

		damon_for_each_region(r, t) {
			trace_damon_aggregated(t, r, damon_nr_regions(t));
			r->nr_accesses = 0;
		}
	}
}

#define sz_damon_region(r) (r->ar.end - r->ar.start)

/*
 * Merge two adjacent regions into one region
 */
static void damon_merge_two_regions(struct damon_target *t,
		struct damon_region *l, struct damon_region *r)
{
	unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);

	l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
			(sz_l + sz_r);
	l->ar.end = r->ar.end;
	damon_destroy_region(r, t);
}

#define diff_of(a, b) (a > b ? a - b : b - a)

/*
 * Merge adjacent regions having similar access frequencies
 *
 * t		target affected by this merge operation
 * thres	'->nr_accesses' diff threshold for the merge
 * sz_limit	size upper limit of each region
 */
static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
				   unsigned long sz_limit)
{
	struct damon_region *r, *prev = NULL, *next;

	damon_for_each_region_safe(r, next, t) {
		if (prev && prev->ar.end == r->ar.start &&
		    diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
		    sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
			damon_merge_two_regions(t, prev, r);
		else
			prev = r;
	}
}

/*
 * Merge adjacent regions having similar access frequencies
 *
 * threshold	'->nr_accesses' diff threshold for the merge
 * sz_limit	size upper limit of each region
 *
 * This function merges monitoring target regions which are adjacent and their
 * access frequencies are similar.  This is for minimizing the monitoring
 * overhead under the dynamically changeable access pattern.  If a merge was
 * unnecessarily made, later 'kdamond_split_regions()' will revert it.
 */
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
				  unsigned long sz_limit)
{
	struct damon_target *t;

	damon_for_each_target(t, c)
		damon_merge_regions_of(t, threshold, sz_limit);
}

/*
 * Split a region in two
 *
 * r		the region to be split
 * sz_r		size of the first sub-region that will be made
 */
static void damon_split_region_at(struct damon_ctx *ctx,
		struct damon_target *t, struct damon_region *r,
		unsigned long sz_r)
{
	struct damon_region *new;

	new = damon_new_region(r->ar.start + sz_r, r->ar.end);
	if (!new)
		return;

	r->ar.end = new->ar.start;

	damon_insert_region(new, r, damon_next_region(r), t);
}

/* Split every region in the given target into 'nr_subs' regions */
static void damon_split_regions_of(struct damon_ctx *ctx,
				     struct damon_target *t, int nr_subs)
{
	struct damon_region *r, *next;
	unsigned long sz_region, sz_sub = 0;
	int i;

	damon_for_each_region_safe(r, next, t) {
		sz_region = r->ar.end - r->ar.start;

		for (i = 0; i < nr_subs - 1 &&
				sz_region > 2 * DAMON_MIN_REGION; i++) {
			/*
			 * Randomly select size of left sub-region to be at
			 * least 10 percent and at most 90% of original region
			 */
			sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
					sz_region / 10, DAMON_MIN_REGION);
			/* Do not allow blank region */
			if (sz_sub == 0 || sz_sub >= sz_region)
				continue;

			damon_split_region_at(ctx, t, r, sz_sub);
			sz_region = sz_sub;
		}
	}
}

/*
 * Split every target region into randomly-sized small regions
 *
 * This function splits every target region into random-sized small regions if
 * current total number of the regions is equal or smaller than half of the
 * user-specified maximum number of regions.  This is for maximizing the
 * monitoring accuracy under the dynamically changeable access patterns.  If a
 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
 * it.
 */
static void kdamond_split_regions(struct damon_ctx *ctx)
{
	struct damon_target *t;
	unsigned int nr_regions = 0;
	static unsigned int last_nr_regions;
	int nr_subregions = 2;

	damon_for_each_target(t, ctx)
		nr_regions += damon_nr_regions(t);

	if (nr_regions > ctx->max_nr_regions / 2)
		return;

	/* Maybe the middle of the region has different access frequency */
	if (last_nr_regions == nr_regions &&
			nr_regions < ctx->max_nr_regions / 3)
		nr_subregions = 3;

	damon_for_each_target(t, ctx)
		damon_split_regions_of(ctx, t, nr_subregions);

	last_nr_regions = nr_regions;
}

/*
 * Check whether it is time to check and apply the target monitoring regions
 *
 * Returns true if it is.
 */
static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
{
	return damon_check_reset_time_interval(&ctx->last_primitive_update,
			ctx->primitive_update_interval);
}

/*
 * Check whether current monitoring should be stopped
 *
 * The monitoring is stopped when either the user requested to stop, or all
 * monitoring targets are invalid.
 *
 * Returns true if need to stop current monitoring.
 */
static bool kdamond_need_stop(struct damon_ctx *ctx)
{
	struct damon_target *t;
	bool stop;

	mutex_lock(&ctx->kdamond_lock);
	stop = ctx->kdamond_stop;
	mutex_unlock(&ctx->kdamond_lock);
	if (stop)
		return true;

	if (!ctx->primitive.target_valid)
		return false;

	damon_for_each_target(t, ctx) {
		if (ctx->primitive.target_valid(t))
			return false;
	}

	return true;
}

static void set_kdamond_stop(struct damon_ctx *ctx)
{
	mutex_lock(&ctx->kdamond_lock);
	ctx->kdamond_stop = true;
	mutex_unlock(&ctx->kdamond_lock);
}

/*
 * The monitoring daemon that runs as a kernel thread
 */
static int kdamond_fn(void *data)
{
	struct damon_ctx *ctx = (struct damon_ctx *)data;
	struct damon_target *t;
	struct damon_region *r, *next;
	unsigned int max_nr_accesses = 0;
	unsigned long sz_limit = 0;

	mutex_lock(&ctx->kdamond_lock);
	pr_debug("kdamond (%d) starts\n", ctx->kdamond->pid);
	mutex_unlock(&ctx->kdamond_lock);

	if (ctx->primitive.init)
		ctx->primitive.init(ctx);
	if (ctx->callback.before_start && ctx->callback.before_start(ctx))
		set_kdamond_stop(ctx);

	sz_limit = damon_region_sz_limit(ctx);

	while (!kdamond_need_stop(ctx)) {
		if (ctx->primitive.prepare_access_checks)
			ctx->primitive.prepare_access_checks(ctx);
		if (ctx->callback.after_sampling &&
				ctx->callback.after_sampling(ctx))
			set_kdamond_stop(ctx);

		usleep_range(ctx->sample_interval, ctx->sample_interval + 1);

		if (ctx->primitive.check_accesses)
			max_nr_accesses = ctx->primitive.check_accesses(ctx);

		if (kdamond_aggregate_interval_passed(ctx)) {
			kdamond_merge_regions(ctx,
					max_nr_accesses / 10,
					sz_limit);
			if (ctx->callback.after_aggregation &&
					ctx->callback.after_aggregation(ctx))
				set_kdamond_stop(ctx);
			kdamond_reset_aggregated(ctx);
			kdamond_split_regions(ctx);
			if (ctx->primitive.reset_aggregated)
				ctx->primitive.reset_aggregated(ctx);
		}

		if (kdamond_need_update_primitive(ctx)) {
			if (ctx->primitive.update)
				ctx->primitive.update(ctx);
			sz_limit = damon_region_sz_limit(ctx);
		}
	}
	damon_for_each_target(t, ctx) {
		damon_for_each_region_safe(r, next, t)
			damon_destroy_region(r, t);
	}

	if (ctx->callback.before_terminate &&
			ctx->callback.before_terminate(ctx))
		set_kdamond_stop(ctx);
	if (ctx->primitive.cleanup)
		ctx->primitive.cleanup(ctx);

	pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid);
	mutex_lock(&ctx->kdamond_lock);
	ctx->kdamond = NULL;
	mutex_unlock(&ctx->kdamond_lock);

	mutex_lock(&damon_lock);
	nr_running_ctxs--;
	mutex_unlock(&damon_lock);

	do_exit(0);
}

#include "core-test.h"
Commit	Line	Data
2224d848 SP	1	// SPDX-License-Identifier: GPL-2.0
	2	/*
	3	* Data Access Monitor
	4	*
	5	* Author: SeongJae Park <sjpark@amazon.de>
	6	*/
	7
	8	#define pr_fmt(fmt) "damon: " fmt
	9
	10	#include <linux/damon.h>
	11	#include <linux/delay.h>
	12	#include <linux/kthread.h>
b9a6ac4e	13	#include <linux/random.h>
2224d848 SP	14	#include <linux/slab.h>
2224d848 SP	15
2fcb9362 SP	16	#define CREATE_TRACE_POINTS
	17	#include <trace/events/damon.h>
	18
17ccae8b SP	19	#ifdef CONFIG_DAMON_KUNIT_TEST
	20	#undef DAMON_MIN_REGION
	21	#define DAMON_MIN_REGION 1
	22	#endif
	23
b9a6ac4e SP	24	/* Get a random number in [l, r) */
	25	#define damon_rand(l, r) (l + prandom_u32_max(r - l))
	26
2224d848 SP	27	static DEFINE_MUTEX(damon_lock);
	28	static int nr_running_ctxs;
	29
f23b8eee SP	30	/*
	31	* Construct a damon_region struct
	32	*
	33	* Returns the pointer to the new struct if success, or NULL otherwise
	34	*/
	35	struct damon_region *damon_new_region(unsigned long start, unsigned long end)
	36	{
	37	struct damon_region *region;
	38
	39	region = kmalloc(sizeof(*region), GFP_KERNEL);
	40	if (!region)
	41	return NULL;
	42
	43	region->ar.start = start;
	44	region->ar.end = end;
	45	region->nr_accesses = 0;
	46	INIT_LIST_HEAD(&region->list);
	47
	48	return region;
	49	}
	50
	51	/*
	52	* Add a region between two other regions
	53	*/
	54	inline void damon_insert_region(struct damon_region *r,
b9a6ac4e SP	55	struct damon_region prev, struct damon_region next,
b9a6ac4e SP	56	struct damon_target *t)
f23b8eee SP	57	{
f23b8eee SP	58	__list_add(&r->list, &prev->list, &next->list);
b9a6ac4e	59	t->nr_regions++;
f23b8eee SP	60	}
	61
	62	void damon_add_region(struct damon_region r, struct damon_target t)
	63	{
	64	list_add_tail(&r->list, &t->regions_list);
b9a6ac4e	65	t->nr_regions++;
f23b8eee SP	66	}
f23b8eee SP	67
b9a6ac4e	68	static void damon_del_region(struct damon_region r, struct damon_target t)
f23b8eee SP	69	{
f23b8eee SP	70	list_del(&r->list);
b9a6ac4e	71	t->nr_regions--;
f23b8eee SP	72	}
	73
	74	static void damon_free_region(struct damon_region *r)
	75	{
	76	kfree(r);
	77	}
	78
b9a6ac4e	79	void damon_destroy_region(struct damon_region r, struct damon_target t)
f23b8eee	80	{
b9a6ac4e	81	damon_del_region(r, t);
f23b8eee SP	82	damon_free_region(r);
	83	}
	84
	85	/*
	86	* Construct a damon_target struct
	87	*
	88	* Returns the pointer to the new struct if success, or NULL otherwise
	89	*/
	90	struct damon_target *damon_new_target(unsigned long id)
	91	{
	92	struct damon_target *t;
	93
	94	t = kmalloc(sizeof(*t), GFP_KERNEL);
	95	if (!t)
	96	return NULL;
	97
	98	t->id = id;
b9a6ac4e	99	t->nr_regions = 0;
f23b8eee SP	100	INIT_LIST_HEAD(&t->regions_list);
	101
	102	return t;
	103	}
	104
	105	void damon_add_target(struct damon_ctx ctx, struct damon_target t)
	106	{
b9a6ac4e	107	list_add_tail(&t->list, &ctx->adaptive_targets);
f23b8eee SP	108	}
	109
	110	static void damon_del_target(struct damon_target *t)
	111	{
	112	list_del(&t->list);
	113	}
	114
	115	void damon_free_target(struct damon_target *t)
	116	{
	117	struct damon_region r, next;
	118
	119	damon_for_each_region_safe(r, next, t)
	120	damon_free_region(r);
	121	kfree(t);
	122	}
	123
	124	void damon_destroy_target(struct damon_target *t)
	125	{
	126	damon_del_target(t);
	127	damon_free_target(t);
	128	}
	129
b9a6ac4e SP	130	unsigned int damon_nr_regions(struct damon_target *t)
	131	{
	132	return t->nr_regions;
	133	}
	134
2224d848 SP	135	struct damon_ctx *damon_new_ctx(void)
	136	{
	137	struct damon_ctx *ctx;
	138
	139	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	140	if (!ctx)
	141	return NULL;
	142
	143	ctx->sample_interval = 5 * 1000;
	144	ctx->aggr_interval = 100 * 1000;
	145	ctx->primitive_update_interval = 60 * 1000 * 1000;
	146
	147	ktime_get_coarse_ts64(&ctx->last_aggregation);
	148	ctx->last_primitive_update = ctx->last_aggregation;
	149
	150	mutex_init(&ctx->kdamond_lock);
	151
b9a6ac4e SP	152	ctx->min_nr_regions = 10;
	153	ctx->max_nr_regions = 1000;
	154
	155	INIT_LIST_HEAD(&ctx->adaptive_targets);
2224d848 SP	156
	157	return ctx;
	158	}
	159
f23b8eee	160	static void damon_destroy_targets(struct damon_ctx *ctx)
2224d848	161	{
f23b8eee SP	162	struct damon_target t, next_t;
	163
	164	if (ctx->primitive.cleanup) {
2224d848	165	ctx->primitive.cleanup(ctx);
f23b8eee SP	166	return;
	167	}
	168
	169	damon_for_each_target_safe(t, next_t, ctx)
	170	damon_destroy_target(t);
	171	}
	172
	173	void damon_destroy_ctx(struct damon_ctx *ctx)
	174	{
	175	damon_destroy_targets(ctx);
2224d848 SP	176	kfree(ctx);
	177	}
	178
4bc05954 SP	179	/**
	180	* damon_set_targets() - Set monitoring targets.
	181	* @ctx: monitoring context
	182	* @ids: array of target ids
	183	* @nr_ids: number of entries in @ids
	184	*
	185	* This function should not be called while the kdamond is running.
	186	*
	187	* Return: 0 on success, negative error code otherwise.
	188	*/
	189	int damon_set_targets(struct damon_ctx *ctx,
	190	unsigned long *ids, ssize_t nr_ids)
	191	{
	192	ssize_t i;
	193	struct damon_target t, next;
	194
	195	damon_destroy_targets(ctx);
	196
	197	for (i = 0; i < nr_ids; i++) {
	198	t = damon_new_target(ids[i]);
	199	if (!t) {
	200	pr_err("Failed to alloc damon_target\n");
	201	/* The caller should do cleanup of the ids itself */
	202	damon_for_each_target_safe(t, next, ctx)
	203	damon_destroy_target(t);
	204	return -ENOMEM;
	205	}
	206	damon_add_target(ctx, t);
	207	}
	208
	209	return 0;
	210	}
	211
2224d848 SP	212	/**
	213	* damon_set_attrs() - Set attributes for the monitoring.
	214	* @ctx: monitoring context
	215	* @sample_int: time interval between samplings
	216	* @aggr_int: time interval between aggregations
	217	* @primitive_upd_int: time interval between monitoring primitive updates
b9a6ac4e SP	218	* @min_nr_reg: minimal number of regions
b9a6ac4e SP	219	* @max_nr_reg: maximum number of regions
2224d848 SP	220	*
	221	* This function should not be called while the kdamond is running.
	222	* Every time interval is in micro-seconds.
	223	*
	224	* Return: 0 on success, negative error code otherwise.
	225	*/
	226	int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
b9a6ac4e SP	227	unsigned long aggr_int, unsigned long primitive_upd_int,
b9a6ac4e SP	228	unsigned long min_nr_reg, unsigned long max_nr_reg)
2224d848	229	{
b9a6ac4e SP	230	if (min_nr_reg < 3) {
	231	pr_err("min_nr_regions (%lu) must be at least 3\n",
	232	min_nr_reg);
	233	return -EINVAL;
	234	}
	235	if (min_nr_reg > max_nr_reg) {
	236	pr_err("invalid nr_regions. min (%lu) > max (%lu)\n",
	237	min_nr_reg, max_nr_reg);
	238	return -EINVAL;
	239	}
	240
2224d848 SP	241	ctx->sample_interval = sample_int;
	242	ctx->aggr_interval = aggr_int;
	243	ctx->primitive_update_interval = primitive_upd_int;
b9a6ac4e SP	244	ctx->min_nr_regions = min_nr_reg;
b9a6ac4e SP	245	ctx->max_nr_regions = max_nr_reg;
2224d848 SP	246
	247	return 0;
	248	}
	249
4bc05954 SP	250	/**
	251	* damon_nr_running_ctxs() - Return number of currently running contexts.
	252	*/
	253	int damon_nr_running_ctxs(void)
	254	{
	255	int nr_ctxs;
	256
	257	mutex_lock(&damon_lock);
	258	nr_ctxs = nr_running_ctxs;
	259	mutex_unlock(&damon_lock);
	260
	261	return nr_ctxs;
	262	}
	263
b9a6ac4e SP	264	/* Returns the size upper limit for each monitoring region */
	265	static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
	266	{
	267	struct damon_target *t;
	268	struct damon_region *r;
	269	unsigned long sz = 0;
	270
	271	damon_for_each_target(t, ctx) {
	272	damon_for_each_region(r, t)
	273	sz += r->ar.end - r->ar.start;
	274	}
	275
	276	if (ctx->min_nr_regions)
	277	sz /= ctx->min_nr_regions;
	278	if (sz < DAMON_MIN_REGION)
	279	sz = DAMON_MIN_REGION;
	280
	281	return sz;
	282	}
	283
2224d848 SP	284	static bool damon_kdamond_running(struct damon_ctx *ctx)
	285	{
	286	bool running;
	287
	288	mutex_lock(&ctx->kdamond_lock);
	289	running = ctx->kdamond != NULL;
	290	mutex_unlock(&ctx->kdamond_lock);
	291
	292	return running;
	293	}
	294
	295	static int kdamond_fn(void *data);
	296
	297	/*
	298	* __damon_start() - Starts monitoring with given context.
	299	* @ctx: monitoring context
	300	*
	301	* This function should be called while damon_lock is hold.
	302	*
	303	* Return: 0 on success, negative error code otherwise.
	304	*/
	305	static int __damon_start(struct damon_ctx *ctx)
	306	{
	307	int err = -EBUSY;
	308
	309	mutex_lock(&ctx->kdamond_lock);
	310	if (!ctx->kdamond) {
	311	err = 0;
	312	ctx->kdamond_stop = false;
	313	ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
	314	nr_running_ctxs);
	315	if (IS_ERR(ctx->kdamond)) {
	316	err = PTR_ERR(ctx->kdamond);
	317	ctx->kdamond = 0;
	318	}
	319	}
	320	mutex_unlock(&ctx->kdamond_lock);
	321
	322	return err;
	323	}
	324
	325	/**
	326	* damon_start() - Starts the monitorings for a given group of contexts.
	327	* @ctxs: an array of the pointers for contexts to start monitoring
	328	* @nr_ctxs: size of @ctxs
	329	*
	330	* This function starts a group of monitoring threads for a group of monitoring
	331	* contexts. One thread per each context is created and run in parallel. The
	332	* caller should handle synchronization between the threads by itself. If a
	333	* group of threads that created by other 'damon_start()' call is currently
	334	* running, this function does nothing but returns -EBUSY.
	335	*
	336	* Return: 0 on success, negative error code otherwise.
	337	*/
	338	int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
	339	{
	340	int i;
	341	int err = 0;
	342
	343	mutex_lock(&damon_lock);
	344	if (nr_running_ctxs) {
	345	mutex_unlock(&damon_lock);
	346	return -EBUSY;
	347	}
348
349	for (i = 0; i < nr_ctxs; i++) {
350	err = __damon_start(ctxs[i]);
351	if (err)
352	break;
353	nr_running_ctxs++;
354	}
355	mutex_unlock(&damon_lock);
356
357	return err;
358	}
359
360	/*
361	* __damon_stop() - Stops monitoring of given context.
362	* @ctx: monitoring context
363	*
364	* Return: 0 on success, negative error code otherwise.
365	*/
366	static int __damon_stop(struct damon_ctx *ctx)
367	{
368	mutex_lock(&ctx->kdamond_lock);
369	if (ctx->kdamond) {
370	ctx->kdamond_stop = true;
371	mutex_unlock(&ctx->kdamond_lock);
372	while (damon_kdamond_running(ctx))
373	usleep_range(ctx->sample_interval,
374	ctx->sample_interval * 2);
375	return 0;
376	}
377	mutex_unlock(&ctx->kdamond_lock);
378
379	return -EPERM;
380	}
381
382	/**
383	* damon_stop() - Stops the monitorings for a given group of contexts.
384	* @ctxs: an array of the pointers for contexts to stop monitoring
385	* @nr_ctxs: size of @ctxs
386	*
387	* Return: 0 on success, negative error code otherwise.
388	*/
389	int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
390	{
391	int i, err = 0;
392
393	for (i = 0; i < nr_ctxs; i++) {
394	/* nr_running_ctxs is decremented in kdamond_fn */
395	err = __damon_stop(ctxs[i]);
396	if (err)
397	return err;
398	}
399
400	return err;
401	}
402
403	/*
404	* damon_check_reset_time_interval() - Check if a time interval is elapsed.
405	* @baseline: the time to check whether the interval has elapsed since
406	* @interval: the time interval (microseconds)
407	*
408	* See whether the given time interval has passed since the given baseline
409	* time. If so, it also updates the baseline to current time for next check.
410	*
411	* Return: true if the time interval has passed, or false otherwise.
412	*/
413	static bool damon_check_reset_time_interval(struct timespec64 *baseline,
414	unsigned long interval)
415	{
416	struct timespec64 now;
417
418	ktime_get_coarse_ts64(&now);
419	if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
420	interval * 1000)
421	return false;
422	*baseline = now;
423	return true;
424	}
425
426	/*
427	* Check whether it is time to flush the aggregated information
428	*/
429	static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
430	{
431	return damon_check_reset_time_interval(&ctx->last_aggregation,
432	ctx->aggr_interval);
433	}
434
f23b8eee SP	435	/*
	436	* Reset the aggregated monitoring results ('nr_accesses' of each region).
	437	*/
	438	static void kdamond_reset_aggregated(struct damon_ctx *c)
	439	{
	440	struct damon_target *t;
	441
	442	damon_for_each_target(t, c) {
	443	struct damon_region *r;
	444
2fcb9362 SP	445	damon_for_each_region(r, t) {
2fcb9362 SP	446	trace_damon_aggregated(t, r, damon_nr_regions(t));
f23b8eee	447	r->nr_accesses = 0;
2fcb9362	448	}
f23b8eee SP	449	}
	450	}
	451
b9a6ac4e SP	452	#define sz_damon_region(r) (r->ar.end - r->ar.start)
	453
	454	/*
	455	* Merge two adjacent regions into one region
	456	*/
	457	static void damon_merge_two_regions(struct damon_target *t,
	458	struct damon_region l, struct damon_region r)
	459	{
	460	unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
	461
	462	l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
	463	(sz_l + sz_r);
	464	l->ar.end = r->ar.end;
	465	damon_destroy_region(r, t);
	466	}
	467
	468	#define diff_of(a, b) (a > b ? a - b : b - a)
	469
	470	/*
	471	* Merge adjacent regions having similar access frequencies
	472	*
	473	* t target affected by this merge operation
	474	* thres '->nr_accesses' diff threshold for the merge
	475	* sz_limit size upper limit of each region
	476	*/
	477	static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
	478	unsigned long sz_limit)
	479	{
	480	struct damon_region r, prev = NULL, *next;
	481
	482	damon_for_each_region_safe(r, next, t) {
	483	if (prev && prev->ar.end == r->ar.start &&
	484	diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
	485	sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
	486	damon_merge_two_regions(t, prev, r);
	487	else
	488	prev = r;
	489	}
	490	}
	491
	492	/*
	493	* Merge adjacent regions having similar access frequencies
	494	*
	495	* threshold '->nr_accesses' diff threshold for the merge
	496	* sz_limit size upper limit of each region
	497	*
	498	* This function merges monitoring target regions which are adjacent and their
	499	* access frequencies are similar. This is for minimizing the monitoring
	500	* overhead under the dynamically changeable access pattern. If a merge was
	501	* unnecessarily made, later 'kdamond_split_regions()' will revert it.
	502	*/
	503	static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
	504	unsigned long sz_limit)
	505	{
	506	struct damon_target *t;
	507
	508	damon_for_each_target(t, c)
	509	damon_merge_regions_of(t, threshold, sz_limit);
	510	}
	511
	512	/*
	513	* Split a region in two
	514	*
	515	* r the region to be split
516	* sz_r size of the first sub-region that will be made
517	*/
518	static void damon_split_region_at(struct damon_ctx *ctx,
519	struct damon_target t, struct damon_region r,
520	unsigned long sz_r)
521	{
522	struct damon_region *new;
523
524	new = damon_new_region(r->ar.start + sz_r, r->ar.end);
525	if (!new)
526	return;
527
528	r->ar.end = new->ar.start;
529
530	damon_insert_region(new, r, damon_next_region(r), t);
531	}
532
533	/* Split every region in the given target into 'nr_subs' regions */
534	static void damon_split_regions_of(struct damon_ctx *ctx,
535	struct damon_target *t, int nr_subs)
536	{
537	struct damon_region r, next;
538	unsigned long sz_region, sz_sub = 0;
539	int i;
540
541	damon_for_each_region_safe(r, next, t) {
542	sz_region = r->ar.end - r->ar.start;
543
544	for (i = 0; i < nr_subs - 1 &&
545	sz_region > 2 * DAMON_MIN_REGION; i++) {
546	/*
547	* Randomly select size of left sub-region to be at
548	* least 10 percent and at most 90% of original region
549	*/
550	sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
551	sz_region / 10, DAMON_MIN_REGION);
552	/* Do not allow blank region */
553	if (sz_sub == 0 \|\| sz_sub >= sz_region)
554	continue;
555
556	damon_split_region_at(ctx, t, r, sz_sub);
557	sz_region = sz_sub;
558	}
559	}
560	}
561
562	/*
563	* Split every target region into randomly-sized small regions
564	*
565	* This function splits every target region into random-sized small regions if
566	* current total number of the regions is equal or smaller than half of the
567	* user-specified maximum number of regions. This is for maximizing the
568	* monitoring accuracy under the dynamically changeable access patterns. If a
569	* split was unnecessarily made, later 'kdamond_merge_regions()' will revert
570	* it.
571	*/
572	static void kdamond_split_regions(struct damon_ctx *ctx)
573	{
574	struct damon_target *t;
575	unsigned int nr_regions = 0;
576	static unsigned int last_nr_regions;
577	int nr_subregions = 2;
578
579	damon_for_each_target(t, ctx)
580	nr_regions += damon_nr_regions(t);
581
582	if (nr_regions > ctx->max_nr_regions / 2)
583	return;
584
585	/* Maybe the middle of the region has different access frequency */
586	if (last_nr_regions == nr_regions &&
587	nr_regions < ctx->max_nr_regions / 3)
588	nr_subregions = 3;
589
590	damon_for_each_target(t, ctx)
591	damon_split_regions_of(ctx, t, nr_subregions);
592
593	last_nr_regions = nr_regions;
594	}
595
2224d848 SP	596	/*
	597	* Check whether it is time to check and apply the target monitoring regions
	598	*
	599	* Returns true if it is.
	600	*/
	601	static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
	602	{
	603	return damon_check_reset_time_interval(&ctx->last_primitive_update,
	604	ctx->primitive_update_interval);
	605	}
	606
	607	/*
	608	* Check whether current monitoring should be stopped
	609	*
	610	* The monitoring is stopped when either the user requested to stop, or all
	611	* monitoring targets are invalid.
	612	*
	613	* Returns true if need to stop current monitoring.
	614	*/
	615	static bool kdamond_need_stop(struct damon_ctx *ctx)
	616	{
f23b8eee	617	struct damon_target *t;
2224d848 SP	618	bool stop;
	619
	620	mutex_lock(&ctx->kdamond_lock);
	621	stop = ctx->kdamond_stop;
	622	mutex_unlock(&ctx->kdamond_lock);
	623	if (stop)
	624	return true;
	625
	626	if (!ctx->primitive.target_valid)
	627	return false;
	628
f23b8eee SP	629	damon_for_each_target(t, ctx) {
	630	if (ctx->primitive.target_valid(t))
	631	return false;
	632	}
	633
	634	return true;
2224d848 SP	635	}
	636
	637	static void set_kdamond_stop(struct damon_ctx *ctx)
	638	{
	639	mutex_lock(&ctx->kdamond_lock);
	640	ctx->kdamond_stop = true;
	641	mutex_unlock(&ctx->kdamond_lock);
	642	}
	643
	644	/*
	645	* The monitoring daemon that runs as a kernel thread
	646	*/
	647	static int kdamond_fn(void *data)
	648	{
	649	struct damon_ctx ctx = (struct damon_ctx )data;
f23b8eee SP	650	struct damon_target *t;
f23b8eee SP	651	struct damon_region r, next;
b9a6ac4e SP	652	unsigned int max_nr_accesses = 0;
b9a6ac4e SP	653	unsigned long sz_limit = 0;
2224d848 SP	654
2224d848 SP	655	mutex_lock(&ctx->kdamond_lock);
704571f9	656	pr_debug("kdamond (%d) starts\n", ctx->kdamond->pid);
2224d848 SP	657	mutex_unlock(&ctx->kdamond_lock);
	658
	659	if (ctx->primitive.init)
	660	ctx->primitive.init(ctx);
	661	if (ctx->callback.before_start && ctx->callback.before_start(ctx))
	662	set_kdamond_stop(ctx);
	663
b9a6ac4e SP	664	sz_limit = damon_region_sz_limit(ctx);
b9a6ac4e SP	665
2224d848 SP	666	while (!kdamond_need_stop(ctx)) {
	667	if (ctx->primitive.prepare_access_checks)
	668	ctx->primitive.prepare_access_checks(ctx);
	669	if (ctx->callback.after_sampling &&
	670	ctx->callback.after_sampling(ctx))
	671	set_kdamond_stop(ctx);
	672
	673	usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
	674
	675	if (ctx->primitive.check_accesses)
b9a6ac4e	676	max_nr_accesses = ctx->primitive.check_accesses(ctx);
2224d848 SP	677
2224d848 SP	678	if (kdamond_aggregate_interval_passed(ctx)) {
b9a6ac4e SP	679	kdamond_merge_regions(ctx,
	680	max_nr_accesses / 10,
	681	sz_limit);
2224d848 SP	682	if (ctx->callback.after_aggregation &&
	683	ctx->callback.after_aggregation(ctx))
	684	set_kdamond_stop(ctx);
f23b8eee	685	kdamond_reset_aggregated(ctx);
b9a6ac4e	686	kdamond_split_regions(ctx);
2224d848 SP	687	if (ctx->primitive.reset_aggregated)
	688	ctx->primitive.reset_aggregated(ctx);
	689	}
	690
	691	if (kdamond_need_update_primitive(ctx)) {
	692	if (ctx->primitive.update)
	693	ctx->primitive.update(ctx);
b9a6ac4e	694	sz_limit = damon_region_sz_limit(ctx);
2224d848 SP	695	}
2224d848 SP	696	}
f23b8eee SP	697	damon_for_each_target(t, ctx) {
f23b8eee SP	698	damon_for_each_region_safe(r, next, t)
b9a6ac4e	699	damon_destroy_region(r, t);
f23b8eee	700	}
2224d848 SP	701
	702	if (ctx->callback.before_terminate &&
	703	ctx->callback.before_terminate(ctx))
	704	set_kdamond_stop(ctx);
	705	if (ctx->primitive.cleanup)
	706	ctx->primitive.cleanup(ctx);
	707
	708	pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid);
	709	mutex_lock(&ctx->kdamond_lock);
	710	ctx->kdamond = NULL;
	711	mutex_unlock(&ctx->kdamond_lock);
	712
	713	mutex_lock(&damon_lock);
	714	nr_running_ctxs--;
	715	mutex_unlock(&damon_lock);
	716
	717	do_exit(0);
	718	}
17ccae8b SP	719
17ccae8b SP	720	#include "core-test.h"