[PATCH 1/2] drm/amdgpu: Move racy global PMU list into device

From: Brian Norris
Date: Fri Oct 28 2022 - 18:49:12 EST


If there are multiple amdgpu devices, this list processing can be racy.

We're really treating this like a per-device list, so make that explicit
and remove the global list.

Signed-off-by: Brian Norris <briannorris@xxxxxxxxxxxx>
---

drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c | 12 +++++-------
2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0e6ddf05c23c..e968b7f2417c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1063,6 +1063,10 @@ struct amdgpu_device {
struct work_struct reset_work;

bool job_hang;
+
+#if IS_ENABLED(CONFIG_PERF_EVENTS)
+ struct list_head pmu_list;
+#endif
};

static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index 71ee361d0972..24f2055a2f23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -23,6 +23,7 @@

#include <linux/perf_event.h>
#include <linux/init.h>
+#include <linux/list.h>
#include "amdgpu.h"
#include "amdgpu_pmu.h"

@@ -72,9 +73,6 @@ static ssize_t amdgpu_pmu_event_show(struct device *dev,
amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type);
}

-static LIST_HEAD(amdgpu_pmu_list);
-
-
struct amdgpu_pmu_attr {
const char *name;
const char *config;
@@ -558,7 +556,7 @@ static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry,
pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events);


- list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list);
+ list_add_tail(&pmu_entry->entry, &pmu_entry->adev->pmu_list);

return 0;
err_register:
@@ -579,9 +577,7 @@ void amdgpu_pmu_fini(struct amdgpu_device *adev)
{
struct amdgpu_pmu_entry *pe, *temp;

- list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) {
- if (pe->adev != adev)
- continue;
+ list_for_each_entry_safe(pe, temp, &adev->pmu_list, entry) {
list_del(&pe->entry);
perf_pmu_unregister(&pe->pmu);
kfree(pe->pmu.attr_groups);
@@ -623,6 +619,8 @@ int amdgpu_pmu_init(struct amdgpu_device *adev)
int ret = 0;
struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df;

+ INIT_LIST_HEAD(&adev->pmu_list);
+
switch (adev->asic_type) {
case CHIP_VEGA20:
pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF,
--
2.38.1.273.g43a17bfeac-goog