Re: [PATCH 2/5] cgroup/dmem: Add reclaim callback for lowering max below current usage

From: Thomas Hellström

Date: Tue Apr 28 2026 - 03:05:04 EST


On Sat, 2026-04-25 at 14:42 +0800, Chen Ridong wrote:
>
>
> On 2026/3/27 16:15, Thomas Hellström wrote:
> > Add an optional reclaim callback to struct dmem_cgroup_region. 
> > When
> > dmem.max is set below current usage, invoke the callback to evict
> > memory
> > and retry setting the limit rather than failing immediately. 
> > Signal
> > interruptions propagate back to the write() caller.
> >
> > RFC:
> > Due to us updating the max limit _after_ the usage has been
> > sufficiently lowered, this should be prone to failures if there are
> > aggressive allocators running in parallel to the reclaim.
> > So can we somehow enforce the new limit while the eviction is
> > happening?
> >
> > Assisted-by: GitHub Copilot:claude-sonnet-4.6
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
> > ---
> >   include/linux/cgroup_dmem.h | 11 +++++
> >   kernel/cgroup/dmem.c        | 94
> > +++++++++++++++++++++++++++++++++----
> >   2 files changed, 96 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/linux/cgroup_dmem.h
> > b/include/linux/cgroup_dmem.h
> > index dd4869f1d736..61520a431740 100644
> > --- a/include/linux/cgroup_dmem.h
> > +++ b/include/linux/cgroup_dmem.h
> > @@ -26,6 +26,10 @@ bool dmem_cgroup_state_evict_valuable(struct
> > dmem_cgroup_pool_state *limit_pool,
> >          bool ignore_low, bool
> > *ret_hit_low);
> >  
> >   void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state
> > *pool);
> > +void dmem_cgroup_region_set_reclaim(struct dmem_cgroup_region
> > *region,
> > +     int (*reclaim)(struct
> > dmem_cgroup_pool_state *pool,
> > +    u64
> > target_bytes, void *priv),
> > +     void *priv);
> >   #else
> >   static inline __printf(2,3) struct dmem_cgroup_region *
> >   dmem_cgroup_register_region(u64 size, const char *name_fmt, ...)
> > @@ -62,5 +66,12 @@ bool dmem_cgroup_state_evict_valuable(struct
> > dmem_cgroup_pool_state *limit_pool,
> >   static inline void dmem_cgroup_pool_state_put(struct
> > dmem_cgroup_pool_state *pool)
> >   { }
> >  
> > +static inline void
> > +dmem_cgroup_region_set_reclaim(struct dmem_cgroup_region *region,
> > +        int (*reclaim)(struct
> > dmem_cgroup_pool_state *pool,
> > +       u64 target_bytes,
> > void *priv),
> > +        void *priv)
> > +{ }
> > +
> >   #endif
> >   #endif /* _CGROUP_DMEM_H */
> > diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c
> > index 3e6d4c0b26a1..f993fb058b74 100644
> > --- a/kernel/cgroup/dmem.c
> > +++ b/kernel/cgroup/dmem.c
> > @@ -51,6 +51,18 @@ struct dmem_cgroup_region {
> >    * No new pools should be added to the region afterwards.
> >    */
> >    bool unregistered;
> > +
> > + /**
> > + * @reclaim: Optional callback invoked when dmem.max is
> > set below the
> > + * current usage of a pool. The driver should attempt to
> > free at least
> > + * @target_bytes from @pool. May be called multiple times
> > if usage
> > + * remains above the limit after returning.
> > + */
> > + int (*reclaim)(struct dmem_cgroup_pool_state *pool, u64
> > target_bytes,
> > +        void *priv);
> > +
> > + /** @reclaim_priv: Private data passed to @reclaim. */
> > + void *reclaim_priv;
> >   };
> >  
> >   struct dmemcg_state {
> > @@ -145,23 +157,59 @@ static void free_cg_pool(struct
> > dmem_cgroup_pool_state *pool)
> >   }
> >  
> >   static int
> > -set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val)
> > +set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val,
> > + struct dmem_cgroup_region *region)
> >   {
> >    page_counter_set_min(&pool->cnt, val);
> >    return 0;
> >   }
> >  
> >   static int
> > -set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val)
> > +set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val,
> > + struct dmem_cgroup_region *region)
> >   {
> >    page_counter_set_low(&pool->cnt, val);
> >    return 0;
> >   }
> >  
> >   static int
> > -set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val)
> > +set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val,
>
> Though we are discussing how to set the maximum, renaming 'val' to
> 'max' would
> improve readability in the next version.


Since all the set_resource_xxx() functions are using @val, that'd be an
unrelated change. Possibly in a follow-up patch?

Thanks,
Thomas