Re: [PATCH 1/4] sched/fair: free allocated memory on error in alloc_fair_sched_group()

From: Chen Yu
Date: Wed Aug 02 2023 - 04:18:24 EST


On 2023-08-02 at 15:01:05 +0800, Aaron Lu wrote:
> On Wed, Jul 19, 2023 at 10:13:55AM +0800, Aaron Lu wrote:
> > On Tue, Jul 18, 2023 at 11:13:12PM +0800, Chen Yu wrote:
> > > On 2023-07-18 at 21:41:17 +0800, Aaron Lu wrote:
> > > > There is one struct cfs_rq and one struct se on each cpu for a taskgroup
> > > > and when allocation for tg->cfs_rq[X] failed, the already allocated
> > > > tg->cfs_rq[0]..tg->cfs_rq[X-1] should be freed. The same for tg->se.
> > > >
> > > > Signed-off-by: Aaron Lu <aaron.lu@xxxxxxxxx>
> > > > ---
> > > > kernel/sched/fair.c | 23 ++++++++++++++++-------
> > > > 1 file changed, 16 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > > > index a80a73909dc2..0f913487928d 100644
> > > > --- a/kernel/sched/fair.c
> > > > +++ b/kernel/sched/fair.c
> > > > @@ -12443,10 +12443,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
> > > >
> > > > tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL);
> > > > if (!tg->cfs_rq)
> > > > - goto err;
> > > > + return 0;
> > > > tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL);
> > > > if (!tg->se)
> > > > - goto err;
> > > > + goto err_free_rq_pointer;
> > > >
> > > > tg->shares = NICE_0_LOAD;
> > > >
> > > > @@ -12456,12 +12456,12 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
> > > > cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
> > > > GFP_KERNEL, cpu_to_node(i));
> > > > if (!cfs_rq)
> > > > - goto err;
> > > > + goto err_free;
> > > >
> > > > se = kzalloc_node(sizeof(struct sched_entity_stats),
> > > > GFP_KERNEL, cpu_to_node(i));
> > > > if (!se)
> > > > - goto err_free_rq;
> > > > + goto err_free;
> > > >
> > > > init_cfs_rq(cfs_rq);
> > > > init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
> > > > @@ -12470,9 +12470,18 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
> > > >
> > > > return 1;
> > > >
> > > > -err_free_rq:
> > > > - kfree(cfs_rq);
> > > > -err:
> > > > +err_free:
> > > > + for_each_possible_cpu(i) {
> > > > + kfree(tg->cfs_rq[i]);
> > > > + kfree(tg->se[i]);
> > > > +
> > > > + if (!tg->cfs_rq[i] && !tg->se[i])
> > > > + break;
> > > > + }
> > > > + kfree(tg->se);
> > > > +err_free_rq_pointer:
> > > > + kfree(tg->cfs_rq);
> > > > +
> > > Not sure if I overlooked, if alloc_fair_sched_group() fails in sched_create_group(),
> > > would sched_free_group()->free_fair_sched_group() do the cleanup?
> >
> > You are right, I overlooked... Thanks for pointing this out.
>
> While preparing v2, one thing still looks strange in the existing code:
>
> int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
> {
> ... ...
>
> for_each_possible_cpu(i) {
> cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
> GFP_KERNEL, cpu_to_node(i));
> if (!cfs_rq)
> goto err;
>
> se = kzalloc_node(sizeof(struct sched_entity_stats),
> GFP_KERNEL, cpu_to_node(i));
> if (!se)
> goto err_free_rq;
> ~~~~~~~~~~~~~~~~
>
> Since free_fair_sched_group() will take care freeing these memories on
> error path, it looks unnecessary to do this free for a random cfs_rq
> here. Or do I miss something again...?
>
> init_cfs_rq(cfs_rq);
> init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
> init_entity_runnable_average(se);
> }
>
> return 1;
>
> err_free_rq:
> kfree(cfs_rq);
> err:
> return 0;
> }
>
> I plan to change it to this:
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a80a73909dc2..ef2618ad26eb 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -12461,7 +12461,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
> se = kzalloc_node(sizeof(struct sched_entity_stats),
> GFP_KERNEL, cpu_to_node(i));
> if (!se)
> - goto err_free_rq;
> + goto err;
>
> init_cfs_rq(cfs_rq);
> init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
> @@ -12470,8 +12470,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
>
> return 1;
>
> -err_free_rq:
> - kfree(cfs_rq);
> err:
> return 0;
> }

It seems that the err_free_rq was introduced in
Commit dfc12eb26a28 ("sched: Fix memory leak in two error corner cases")
The memory leak was detected by the static tool, while that tools is
unaware of free_fair_sched_group() which will take care of this.
Not sure if a self-maintained function is preferred, or let other function
to take care of that. For now it seems to be a duplicated free. And
alloc_rt_sched_group() has the same issue.

thanks,
Chenyu