Re: [PATCH 2/6] x86,perf/cqm: Fix cqm memory leak and notifier leak

From: Vikas Shivappa
Date: Tue Mar 08 2016 - 14:35:54 EST




On Tue, 8 Mar 2016, Thomas Gleixner wrote:

On Wed, 2 Mar 2016, Vikas Shivappa wrote:

Please fix the subject line prefix: "x86/perf/intel/cqm:"

Will fix..


Fixes the hotcpu notifier leak and other global variable memory leaks
during cqm(cache quality of service monitoring) initialization.

Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
---

Fixed the memory leak for cqm_rmid_ptrs as per Thomas feedback.

arch/x86/kernel/cpu/perf_event_intel_cqm.c | 34 ++++++++++++++++++++++++------
1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e6be335..37a93fa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -250,10 +250,13 @@ static int intel_cqm_setup_rmid_cache(void)

return 0;
fail:
- while (r--)
+ while (r--) {
kfree(cqm_rmid_ptrs[r]);
+ cqm_rmid_ptrs[r] = NULL;
+ }

kfree(cqm_rmid_ptrs);
+ cqm_rmid_ptrs = NULL;

This partial rollback is crap. Why can't you utilize cqm_cleanup() ?
Performance certainly is not an issue here.

return -ENOMEM;
}

@@ -1320,9 +1323,19 @@ static const struct x86_cpu_id intel_cqm_match[] = {
{}
};

+static void cqm_cleanup(void)
+{
+ int r = cqm_max_rmid;
+
+ while (r--)
+ kfree(cqm_rmid_ptrs[r]);
+
+ kfree(cqm_rmid_ptrs);
+}
+
static int __init intel_cqm_init(void)
{
- char *str, scale[20];
+ char *str = NULL, scale[20];
int i, cpu, ret;

if (!x86_match_cpu(intel_cqm_match))
@@ -1382,16 +1395,25 @@ static int __init intel_cqm_init(void)
cqm_pick_event_reader(i);
}

- __perf_cpu_notifier(intel_cqm_cpu_notifier);
-
ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
- if (ret)
+ if (ret) {
pr_err("Intel CQM perf registration failed: %d\n", ret);
- else
+ goto out;
+ } else {
pr_info("Intel CQM monitoring enabled\n");
+ }

+ /*
+ * Register the hot cpu notifier once we are sure cqm
+ * is enabled to avoid notifier leak.
+ */
+ __perf_cpu_notifier(intel_cqm_cpu_notifier);
out:
cpu_notifier_register_done();
+ if (ret) {
+ kfree(str);
+ cqm_cleanup();

This is still broken. If intel_cqm_setup_rmid_cache() fails, you clear
cqm_rmid_ptrs and then jump to out. ret is -ENOMEM, so you will call
cqm_cleanup which will dereference cqm_rmid_ptrs .....

Find below a delta patch, which fixes that proper and safe.

Thanks for sending it. will resend the patch with your fix.

-Vikas


Thanks,

tglx

8<----------------

arch/x86/events/intel/cqm.c | 40 +++++++++++++++++++---------------------
1 file changed, 19 insertions(+), 21 deletions(-)

--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -211,6 +211,20 @@ static void __put_rmid(u32 rmid)
list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
}

+static void cqm_cleanup(void)
+{
+ int i;
+
+ if (!cqm_rmid_ptrs)
+ return;
+
+ for (i = 0; i < cqm_max_rmid; i++)
+ kfree(cqm_rmid_ptrs[i]);
+
+ kfree(cqm_rmid_ptrs);
+ cqm_rmid_ptrs = NULL;
+}
+
static int intel_cqm_setup_rmid_cache(void)
{
struct cqm_rmid_entry *entry;
@@ -218,7 +232,7 @@ static int intel_cqm_setup_rmid_cache(vo
int r = 0;

nr_rmids = cqm_max_rmid + 1;
- cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+ cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
nr_rmids, GFP_KERNEL);
if (!cqm_rmid_ptrs)
return -ENOMEM;
@@ -247,16 +261,10 @@ static int intel_cqm_setup_rmid_cache(vo
mutex_lock(&cache_mutex);
intel_cqm_rotation_rmid = __get_rmid();
mutex_unlock(&cache_mutex);
-
return 0;
-fail:
- while (r--) {
- kfree(cqm_rmid_ptrs[r]);
- cqm_rmid_ptrs[r] = NULL;
- }

- kfree(cqm_rmid_ptrs);
- cqm_rmid_ptrs = NULL;
+fail:
+ cqm_cleanup();
return -ENOMEM;
}

@@ -1313,16 +1321,6 @@ static const struct x86_cpu_id intel_cqm
{}
};

-static void cqm_cleanup(void)
-{
- int r = cqm_max_rmid;
-
- while (r--)
- kfree(cqm_rmid_ptrs[r]);
-
- kfree(cqm_rmid_ptrs);
-}
-
static int __init intel_cqm_init(void)
{
char *str = NULL, scale[20];
@@ -1389,10 +1387,10 @@ static int __init intel_cqm_init(void)
if (ret) {
pr_err("Intel CQM perf registration failed: %d\n", ret);
goto out;
- } else {
- pr_info("Intel CQM monitoring enabled\n");
}

+ pr_info("Intel CQM monitoring enabled\n");
+
/*
* Register the hot cpu notifier once we are sure cqm
* is enabled to avoid notifier leak.