Re: [PATCH 2/6] x86,perf/cqm: Fix cqm memory leak and notifier leak

From: Thomas Gleixner
Date: Tue Mar 08 2016 - 04:24:14 EST


On Wed, 2 Mar 2016, Vikas Shivappa wrote:

Please fix the subject line prefix: "x86/perf/intel/cqm:"

> Fixes the hotcpu notifier leak and other global variable memory leaks
> during cqm(cache quality of service monitoring) initialization.
>
> Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
> Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
> ---
>
> Fixed the memory leak for cqm_rmid_ptrs as per Thomas feedback.
>
> arch/x86/kernel/cpu/perf_event_intel_cqm.c | 34 ++++++++++++++++++++++++------
> 1 file changed, 28 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> index e6be335..37a93fa 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> @@ -250,10 +250,13 @@ static int intel_cqm_setup_rmid_cache(void)
>
> return 0;
> fail:
> - while (r--)
> + while (r--) {
> kfree(cqm_rmid_ptrs[r]);
> + cqm_rmid_ptrs[r] = NULL;
> + }
>
> kfree(cqm_rmid_ptrs);
> + cqm_rmid_ptrs = NULL;

This partial rollback is crap. Why can't you utilize cqm_cleanup() ?
Performance certainly is not an issue here.

> return -ENOMEM;
> }
>
> @@ -1320,9 +1323,19 @@ static const struct x86_cpu_id intel_cqm_match[] = {
> {}
> };
>
> +static void cqm_cleanup(void)
> +{
> + int r = cqm_max_rmid;
> +
> + while (r--)
> + kfree(cqm_rmid_ptrs[r]);
> +
> + kfree(cqm_rmid_ptrs);
> +}
> +
> static int __init intel_cqm_init(void)
> {
> - char *str, scale[20];
> + char *str = NULL, scale[20];
> int i, cpu, ret;
>
> if (!x86_match_cpu(intel_cqm_match))
> @@ -1382,16 +1395,25 @@ static int __init intel_cqm_init(void)
> cqm_pick_event_reader(i);
> }
>
> - __perf_cpu_notifier(intel_cqm_cpu_notifier);
> -
> ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
> - if (ret)
> + if (ret) {
> pr_err("Intel CQM perf registration failed: %d\n", ret);
> - else
> + goto out;
> + } else {
> pr_info("Intel CQM monitoring enabled\n");
> + }
>
> + /*
> + * Register the hot cpu notifier once we are sure cqm
> + * is enabled to avoid notifier leak.
> + */
> + __perf_cpu_notifier(intel_cqm_cpu_notifier);
> out:
> cpu_notifier_register_done();
> + if (ret) {
> + kfree(str);
> + cqm_cleanup();

This is still broken. If intel_cqm_setup_rmid_cache() fails, you clear
cqm_rmid_ptrs and then jump to out. ret is -ENOMEM, so you will call
cqm_cleanup which will dereference cqm_rmid_ptrs .....

Find below a delta patch, which fixes that proper and safe.

Thanks,

tglx

8<----------------

arch/x86/events/intel/cqm.c | 40 +++++++++++++++++++---------------------
1 file changed, 19 insertions(+), 21 deletions(-)

--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -211,6 +211,20 @@ static void __put_rmid(u32 rmid)
list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
}

+static void cqm_cleanup(void)
+{
+ int i;
+
+ if (!cqm_rmid_ptrs)
+ return;
+
+ for (i = 0; i < cqm_max_rmid; i++)
+ kfree(cqm_rmid_ptrs[i]);
+
+ kfree(cqm_rmid_ptrs);
+ cqm_rmid_ptrs = NULL;
+}
+
static int intel_cqm_setup_rmid_cache(void)
{
struct cqm_rmid_entry *entry;
@@ -218,7 +232,7 @@ static int intel_cqm_setup_rmid_cache(vo
int r = 0;

nr_rmids = cqm_max_rmid + 1;
- cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+ cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
nr_rmids, GFP_KERNEL);
if (!cqm_rmid_ptrs)
return -ENOMEM;
@@ -247,16 +261,10 @@ static int intel_cqm_setup_rmid_cache(vo
mutex_lock(&cache_mutex);
intel_cqm_rotation_rmid = __get_rmid();
mutex_unlock(&cache_mutex);
-
return 0;
-fail:
- while (r--) {
- kfree(cqm_rmid_ptrs[r]);
- cqm_rmid_ptrs[r] = NULL;
- }

- kfree(cqm_rmid_ptrs);
- cqm_rmid_ptrs = NULL;
+fail:
+ cqm_cleanup();
return -ENOMEM;
}

@@ -1313,16 +1321,6 @@ static const struct x86_cpu_id intel_cqm
{}
};

-static void cqm_cleanup(void)
-{
- int r = cqm_max_rmid;
-
- while (r--)
- kfree(cqm_rmid_ptrs[r]);
-
- kfree(cqm_rmid_ptrs);
-}
-
static int __init intel_cqm_init(void)
{
char *str = NULL, scale[20];
@@ -1389,10 +1387,10 @@ static int __init intel_cqm_init(void)
if (ret) {
pr_err("Intel CQM perf registration failed: %d\n", ret);
goto out;
- } else {
- pr_info("Intel CQM monitoring enabled\n");
}

+ pr_info("Intel CQM monitoring enabled\n");
+
/*
* Register the hot cpu notifier once we are sure cqm
* is enabled to avoid notifier leak.