[PATCH 03/13] mm/damon/core: make damon_set_attrs() be safe to be called from damon_call()
From: SeongJae Park
Date: Thu Mar 06 2025 - 12:59:52 EST
Currently all DAMON kernel API callers do online DAMON parameters commit
from damon_callback->after_aggregation because only those are safe place
to call the DAMON monitoring attributes update function, namely
damon_set_attrs().
Because damon_callback hooks provide no synchronization, the callers
work in asynchronous ways or implement their own inefficient and
complicated synchronization mechanisms. It also means online DAMON
parameters commit can take up to one aggregation interval. On large
systems having long aggregation intervals, that can be too slow. The
synchronization can be done in more efficient and simple way while
removing the latency constraint if it can be done using damon_call().
The fact that damon_call() can be executed in the middle of the
aggregation makes damon_set_attrs() unsafe to be called from it, though.
Two real problems can occur in the case. First, converting the not yet
completely aggregated nr_accesses for new user-set intervals can
arguably degrade the accuracy or at least make the logic complicated.
Second, kdamond_reset_aggregated() will not be called after the
monitoring results update, so next aggregation starts from unclean
state. This can result in inconsistent and unexpected nr_accesses_bp.
Make it safe as follows. Catch the middle-of-the-aggregation case from
damon_set_attrs() by checking the passed_sample_intervals and
next_aggregationsis of the context. And pass the information to
nr_accesses conversion logic. The logic works as before if it is not
the case (called after the current aggregation is completed). If it is
the case (committing parameters in the middle of the aggregation), it
drops the nr_accesses information that so far aggregated, and make the
status same to the beginning of this aggregation, but as if the last
aggregation was started with the updated sampling/aggregation intervals.
The middle-of-aggregastion check introduce yet another edge case,
though. This happens because kdamond_tune_intervals() can also call
damon_set_attrs() with the middle-of-aggregation check. Consider
damon_call() for parameters commit and kdamond_tune_intervals() are
called in same iteration of kdamond main loop. Because
kdamond_tune_interval() is called for aggregation intervals, it should
be the end of the aggregation. The first damon_set_attrs() call from
kdamond_call() understands it is the end of the aggregation and
correctly handle it. But, because the damon_set_attrs() updated
next_aggregation_sis of the context. Hence, the second
damon_set_attrs() invocation from kdamond_tune_interval() believes it is
called in the middle of the aggregation. It therefore resets aggregated
information so far. After that, kdamond_reset_interval() is called and
double-reset the aggregated information. Avoid this case, too, by
setting the next_aggregation_sis before kdamond_tune_intervals() is
invoked.
Signed-off-by: SeongJae Park <sj@xxxxxxxxxx>
---
mm/damon/core.c | 56 +++++++++++++++++++++++++++++--------
mm/damon/tests/core-kunit.h | 6 ++--
2 files changed, 48 insertions(+), 14 deletions(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 0f359b4ebf1a..de30b788e13a 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -603,11 +603,25 @@ static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses,
}
static void damon_update_monitoring_result(struct damon_region *r,
- struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
+ struct damon_attrs *old_attrs, struct damon_attrs *new_attrs,
+ bool aggregating)
{
- r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses,
- old_attrs, new_attrs);
- r->nr_accesses_bp = r->nr_accesses * 10000;
+ if (!aggregating) {
+ r->nr_accesses = damon_nr_accesses_for_new_attrs(
+ r->nr_accesses, old_attrs, new_attrs);
+ r->nr_accesses_bp = r->nr_accesses * 10000;
+ } else {
+ /*
+ * if this is called in the middle of the aggregation, reset
+ * the aggregations we made so far for this aggregation
+ * interval. In other words, make the status like
+ * kdamond_reset_aggregated() is called.
+ */
+ r->last_nr_accesses = damon_nr_accesses_for_new_attrs(
+ r->last_nr_accesses, old_attrs, new_attrs);
+ r->nr_accesses_bp = r->last_nr_accesses * 10000;
+ r->nr_accesses = 0;
+ }
r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs);
}
@@ -620,7 +634,7 @@ static void damon_update_monitoring_result(struct damon_region *r,
* ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs.
*/
static void damon_update_monitoring_results(struct damon_ctx *ctx,
- struct damon_attrs *new_attrs)
+ struct damon_attrs *new_attrs, bool aggregating)
{
struct damon_attrs *old_attrs = &ctx->attrs;
struct damon_target *t;
@@ -635,7 +649,7 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx,
damon_for_each_target(t, ctx)
damon_for_each_region(r, t)
damon_update_monitoring_result(
- r, old_attrs, new_attrs);
+ r, old_attrs, new_attrs, aggregating);
}
/*
@@ -662,10 +676,10 @@ static bool damon_valid_intervals_goal(struct damon_attrs *attrs)
* @ctx: monitoring context
* @attrs: monitoring attributes
*
- * This function should be called while the kdamond is not running, or an
- * access check results aggregation is not ongoing (e.g., from
- * &struct damon_callback->after_aggregation or
- * &struct damon_callback->after_wmarks_check callbacks).
+ * This function should be called while the kdamond is not running, an access
+ * check results aggregation is not ongoing (e.g., from &struct
+ * damon_callback->after_aggregation or &struct
+ * damon_callback->after_wmarks_check callbacks), or from damon_call().
*
* Every time interval is in micro-seconds.
*
@@ -676,6 +690,8 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs)
unsigned long sample_interval = attrs->sample_interval ?
attrs->sample_interval : 1;
struct damos *s;
+ bool aggregating = ctx->passed_sample_intervals <
+ ctx->next_aggregation_sis;
if (!damon_valid_intervals_goal(attrs))
return -EINVAL;
@@ -696,7 +712,7 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs)
ctx->next_ops_update_sis = ctx->passed_sample_intervals +
attrs->ops_update_interval / sample_interval;
- damon_update_monitoring_results(ctx, attrs);
+ damon_update_monitoring_results(ctx, attrs, aggregating);
ctx->attrs = *attrs;
damon_for_each_scheme(s, ctx)
@@ -2452,6 +2468,24 @@ static int kdamond_fn(void *data)
if (ctx->attrs.intervals_goal.aggrs &&
ctx->passed_sample_intervals >=
ctx->next_intervals_tune_sis) {
+ /*
+ * ctx->next_aggregation_sis might be updated
+ * from kdamond_call(). In the case,
+ * damon_set_attrs() which will be called from
+ * kdamond_tune_interval() may wrongly think
+ * this is in the middle of the current
+ * aggregation, and make aggregation
+ * information reset for all regions. Then,
+ * following kdamond_reset_aggregated() call
+ * will make the region information invalid,
+ * particularly for ->nr_accesses_bp.
+ *
+ * Reset ->next_aggregation_sis to avoid that.
+ * It will anyway correctly updated after this
+ * if caluse.
+ */
+ ctx->next_aggregation_sis =
+ next_aggregation_sis;
ctx->next_intervals_tune_sis +=
ctx->attrs.aggr_samples *
ctx->attrs.intervals_goal.aggrs;
diff --git a/mm/damon/tests/core-kunit.h b/mm/damon/tests/core-kunit.h
index 532c6a6f21f9..be0fea9ee5fc 100644
--- a/mm/damon/tests/core-kunit.h
+++ b/mm/damon/tests/core-kunit.h
@@ -348,19 +348,19 @@ static void damon_test_update_monitoring_result(struct kunit *test)
new_attrs = (struct damon_attrs){
.sample_interval = 100, .aggr_interval = 10000,};
- damon_update_monitoring_result(r, &old_attrs, &new_attrs);
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs, false);
KUNIT_EXPECT_EQ(test, r->nr_accesses, 15);
KUNIT_EXPECT_EQ(test, r->age, 2);
new_attrs = (struct damon_attrs){
.sample_interval = 1, .aggr_interval = 1000};
- damon_update_monitoring_result(r, &old_attrs, &new_attrs);
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs, false);
KUNIT_EXPECT_EQ(test, r->nr_accesses, 150);
KUNIT_EXPECT_EQ(test, r->age, 2);
new_attrs = (struct damon_attrs){
.sample_interval = 1, .aggr_interval = 100};
- damon_update_monitoring_result(r, &old_attrs, &new_attrs);
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs, false);
KUNIT_EXPECT_EQ(test, r->nr_accesses, 150);
KUNIT_EXPECT_EQ(test, r->age, 20);
--
2.39.5