Re: [PATCH v1 6/9] memcg: sleep during flushing stats in safe contexts

From: Johannes Weiner
Date: Tue Mar 28 2023 - 14:35:54 EST


On Tue, Mar 28, 2023 at 06:16:35AM +0000, Yosry Ahmed wrote:
> @@ -642,24 +642,57 @@ static void __mem_cgroup_flush_stats(void)
> * from memcg flushers (e.g. reclaim, refault, etc).
> */
> if (atomic_xchg(&stats_flush_ongoing, 1))
> - return;
> + return false;
>
> WRITE_ONCE(flush_next_time, jiffies_64 + 2*FLUSH_TIME);
> - cgroup_rstat_flush_atomic(root_mem_cgroup->css.cgroup);
> + return true;
> +}
> +
> +static void mem_cgroup_post_stats_flush(void)
> +{
> atomic_set(&stats_flush_threshold, 0);
> atomic_set(&stats_flush_ongoing, 0);
> }
>
> -void mem_cgroup_flush_stats(void)
> +static bool mem_cgroup_should_flush_stats(void)
> {
> - if (atomic_read(&stats_flush_threshold) > num_online_cpus())
> - __mem_cgroup_flush_stats();
> + return atomic_read(&stats_flush_threshold) > num_online_cpus();
> +}
> +
> +/* atomic functions, safe to call from any context */
> +static void __mem_cgroup_flush_stats_atomic(void)
> +{
> + if (mem_cgroup_pre_stats_flush()) {
> + cgroup_rstat_flush_atomic(root_mem_cgroup->css.cgroup);
> + mem_cgroup_post_stats_flush();
> + }
> +}

I'm afraid I wasn't very nuanced with my complaint about the bool
parameter in the previous version. In this case, when you can do a
common helper for a couple of API functions defined right below it,
and the callers don't spread throughout the codebase, using bools
makes things simpler while still being easily understandable:

static void do_flush_stats(bool may_sleep)
{
if (atomic_xchg(&stats_flush_ongoing, 1))
return;

WRITE_ONCE(flush_next_time, jiffies_64 + 2*FLUSH_TIME);
atomic_set(&stats_flush_threshold, 0);

if (!may_sleep)
cgroup_rstat_flush_atomic(root_mem_cgroup->css.cgroup);
else
cgroup_rstat_flush(root_mem_cgroup->css.cgroup);

atomic_set(&stats_flush_ongoing, 0);
}

void mem_cgroup_flush_stats(void)
{
if (atomic_read(&stats_flush_threshold) > num_online_cpus())
do_flush_stats(true);
}

void mem_cgroup_flush_stats_atomic(void)
{
if (atomic_read(&stats_flush_threshold) > num_online_cpus())
do_flush_stats(false);
}

> void mem_cgroup_flush_stats_ratelimited(void)
> {
> if (time_after64(jiffies_64, READ_ONCE(flush_next_time)))
> - mem_cgroup_flush_stats();
> + mem_cgroup_flush_stats_atomic();
> +}

This should probably be mem_cgroup_flush_stats_atomic_ratelimited().

(Whee, kinda long, but that's alright. Very specialized caller...)

Btw, can you guys think of a reason against moving the threshold check
into the common function? It would then apply to the time-limited
flushes as well, but that shouldn't hurt anything. This would make the
code even simpler:

static void do_flush_stats(bool may_sleep)
{
if (atomic_read(&stats_flush_threshold) <= num_online_cpus())
return;

if (atomic_xchg(&stats_flush_ongoing, 1))
return;

WRITE_ONCE(flush_next_time, jiffies_64 + 2*FLUSH_TIME);
atomic_set(&stats_flush_threshold, 0);

if (!may_sleep)
cgroup_rstat_flush_atomic(root_mem_cgroup->css.cgroup);
else
cgroup_rstat_flush(root_mem_cgroup->css.cgroup);

atomic_set(&stats_flush_ongoing, 0);
}

void mem_cgroup_flush_stats(void)
{
do_flush_stats(true);
}

void mem_cgroup_flush_stats_atomic(void)
{
do_flush_stats(false);
}

void mem_cgroup_flush_stats_atomic_ratelimited(void)
{
if (time_after64(jiffies_64, READ_ONCE(flush_next_time)))
do_flush_stats(false);
}

> @@ -2845,7 +2845,7 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
> * Flush the memory cgroup stats, so that we read accurate per-memcg
> * lruvec stats for heuristics.
> */
> - mem_cgroup_flush_stats();
> + mem_cgroup_flush_stats_atomic();

I'm thinking this one could be non-atomic as well. It's called fairly
high up in reclaim without any locks held.