[GIT PULL] perf fixes

From: Ingo Molnar
Date: Fri Oct 05 2018 - 05:42:20 EST


Linus,

Please pull the latest perf-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

# HEAD: d7cbbe49a9304520181fb8c9272d1327deec8453 perf/x86/amd/uncore: Set ThreadMask and SliceMask for L3 Cache perf events

Misc fixes:

- fix a CPU#0 hot unplug bug and a PCI enumeration bug in the x86 Intel uncore PMU driver
- fix a CPU event enumeration bug in the x86 AMD PMU driver
- fix a perf ring-buffer corruption bug when using tracepoints
- fix a PMU unregister locking bug

Thanks,

Ingo

------------------>
Jiri Olsa (1):
perf/ring_buffer: Prevent concurent ring buffer access

Kan Liang (1):
perf/x86/intel/uncore: Fix PCI BDF address of M3UPI on SKX

Masayoshi Mizuma (1):
perf/x86/intel/uncore: Use boot_cpu_data.phys_proc_id instead of hardcorded physical package ID 0

Natarajan, Janakarajan (1):
perf/x86/amd/uncore: Set ThreadMask and SliceMask for L3 Cache perf events

Peter Zijlstra (1):
perf/core: Fix perf_pmu_unregister() locking


arch/x86/events/amd/uncore.c | 10 ++++++++++
arch/x86/events/intel/uncore_snbep.c | 14 +++++++-------
arch/x86/include/asm/perf_event.h | 8 ++++++++
kernel/events/core.c | 11 ++++-------
4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 981ba5e8241b..8671de126eac 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -36,6 +36,7 @@

static int num_counters_llc;
static int num_counters_nb;
+static bool l3_mask;

static HLIST_HEAD(uncore_unused_list);

@@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event)
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;

+ /*
+ * SliceMask and ThreadMask need to be set for certain L3 events in
+ * Family 17h. For other events, the two fields do not affect the count.
+ */
+ if (l3_mask)
+ hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
+
if (event->cpu < 0)
return -EINVAL;

@@ -525,6 +533,7 @@ static int __init amd_uncore_init(void)
amd_llc_pmu.name = "amd_l3";
format_attr_event_df.show = &event_show_df;
format_attr_event_l3.show = &event_show_l3;
+ l3_mask = true;
} else {
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
@@ -532,6 +541,7 @@ static int __init amd_uncore_init(void)
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
+ l3_mask = false;
}

amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 51d7c117e3c7..c07bee31abe8 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {

void bdx_uncore_cpu_init(void)
{
- int pkg = topology_phys_to_logical_pkg(0);
+ int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id);

if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
@@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
},
{ /* M3UPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0),
},
{ /* M3UPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1),
},
{ /* M3UPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2),
},
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 12f54082f4c8..78241b736f2a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -46,6 +46,14 @@
#define INTEL_ARCH_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)

+#define AMD64_L3_SLICE_SHIFT 48
+#define AMD64_L3_SLICE_MASK \
+ ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+
+#define AMD64_L3_THREAD_SHIFT 56
+#define AMD64_L3_THREAD_MASK \
+ ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+
#define X86_RAW_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK | \
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dcb093e7b377..5a97f34bc14c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8314,6 +8314,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
goto unlock;

list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ if (event->cpu != smp_processor_id())
+ continue;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
continue;
if (event->attr.config != entry->type)
@@ -9431,9 +9433,7 @@ static void free_pmu_context(struct pmu *pmu)
if (pmu->task_ctx_nr > perf_invalid_context)
return;

- mutex_lock(&pmus_lock);
free_percpu(pmu->pmu_cpu_context);
- mutex_unlock(&pmus_lock);
}

/*
@@ -9689,12 +9689,8 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);

void perf_pmu_unregister(struct pmu *pmu)
{
- int remove_device;
-
mutex_lock(&pmus_lock);
- remove_device = pmu_bus_running;
list_del_rcu(&pmu->entry);
- mutex_unlock(&pmus_lock);

/*
* We dereference the pmu list under both SRCU and regular RCU, so
@@ -9706,13 +9702,14 @@ void perf_pmu_unregister(struct pmu *pmu)
free_percpu(pmu->pmu_disable_count);
if (pmu->type >= PERF_TYPE_MAX)
idr_remove(&pmu_idr, pmu->type);
- if (remove_device) {
+ if (pmu_bus_running) {
if (pmu->nr_addr_filters)
device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
device_del(pmu->dev);
put_device(pmu->dev);
}
free_pmu_context(pmu);
+ mutex_unlock(&pmus_lock);
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);