[PATCH RFC] perf: Let PMUs provide supplementary information

From: Adrian Hunter
Date: Fri Jul 24 2015 - 03:48:52 EST


This is a follow-up to "[RFC PATCH] perf: Provide status of known PMUs" i.e.

https://lkml.kernel.org/r/1436428080-3098-1-git-send-email-adrian.hunter@xxxxxxxxx

There are 3 main changes:
- uses debugfs not sysfs
- doesn't require a PMU to be on the list of known PMU's
- renamed from 'known_pmus' to 'pmu_supplementary_info'

The purpose of the changes is to make optional whether or not PMU
supplementary information is provided for a PMU, thus easing the
maintenance burden. Maintenance is also eased by having all the
code in a separate file.

Also the status codes have changed as requested:

[PERF_PMU_STATUS_ENABLED] = "Enabled",
[PERF_PMU_STATUS_ERROR] = "Disabled: Driver error",
[PERF_PMU_STATUS_NOT_LOADED] = "Disabled: Driver not loaded",
[PERF_PMU_STATUS_NOT_CONFIG] = "Disabled: Driver not in kernel config",
[PERF_PMU_STATUS_NOT_SUPPORTED] = "Disabled: Not supported by the kernel",
[PERF_PMU_STATUS_WRONG_HW] = "Disabled: Not supported by the hardware",
[PERF_PMU_STATUS_WRONG_VENDOR] = "Disabled: Not supported by the hardware vendor",
[PERF_PMU_STATUS_WRONG_ARCH] = "Disabled: Not supported by the the architecture",
[PERF_PMU_STATUS_UNKNOWN] = "Disabled: Unknown status",

Example:

$ cat /sys/kernel/debug/perf/pmu_supplementary_info/intel_pt
Enabled

Last time it wasn't clear that a custom status message can be provided.
For example, this code snippet from Intel PT:

if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt);
status_msg = "PT is enabled at boot time, doing nothing";
ret = -EBUSY;
goto out;
}
...
out:
if (status_msg)
pr_warn("%s\n", status_msg);
perf_pmu_error_status("intel_pt", ret, status_msg);

Would result in:

$ cat /sys/kernel/debug/perf/pmu_supplementary_info/intel_pt
Disabled: Driver error - PT is enabled at boot time, doing nothing

Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx>
---
arch/x86/kernel/cpu/perf_event.c | 3 +
arch/x86/kernel/cpu/perf_event_intel_bts.c | 20 +-
arch/x86/kernel/cpu/perf_event_intel_pt.c | 18 +-
include/linux/perf_event.h | 45 ++++
kernel/events/Makefile | 2 +-
kernel/events/pmu_supp_info.c | 384 +++++++++++++++++++++++++++++
6 files changed, 462 insertions(+), 10 deletions(-)
create mode 100644 kernel/events/pmu_supp_info.c

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3658de47900f..546163a42a30 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1670,12 +1670,15 @@ static int __init init_hw_perf_events(void)

switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
+ perf_pmu_vendor("Intel");
err = intel_pmu_init();
break;
case X86_VENDOR_AMD:
+ perf_pmu_vendor("AMD");
err = amd_pmu_init();
break;
default:
+ perf_pmu_vendor("Unknown");
err = -ENOTSUPP;
}
if (err != 0) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 54690e885759..7c397cecce65 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -511,8 +511,19 @@ static void bts_event_read(struct perf_event *event)

static __init int bts_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
- return -ENODEV;
+ const char *status_msg = NULL;
+ int ret;
+
+ if (!x86_pmu.bts) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_DTES64)) {
+ status_msg = "requires 64-bit kernel";
+ ret = -ENOTSUPP;
+ goto out;
+ }

bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
bts_pmu.task_ctx_nr = perf_sw_context;
@@ -525,6 +536,9 @@ static __init int bts_init(void)
bts_pmu.setup_aux = bts_buffer_setup_aux;
bts_pmu.free_aux = bts_buffer_free_aux;

- return perf_pmu_register(&bts_pmu, "intel_bts", -1);
+ ret = perf_pmu_register(&bts_pmu, "intel_bts", -1);
+out:
+ perf_pmu_error_status("intel_bts", ret, status_msg);
+ return ret;
}
arch_initcall(bts_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 5505371b414d..f4aa3831f8b5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -1122,6 +1122,7 @@ static int pt_event_init(struct perf_event *event)
static __init int pt_init(void)
{
int ret, cpu, prior_warn = 0;
+ const char *status_msg = NULL;

BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
get_online_cpus();
@@ -1136,18 +1137,19 @@ static __init int pt_init(void)

if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt);
- pr_warn("PT is enabled at boot time, doing nothing\n");
-
- return -EBUSY;
+ status_msg = "PT is enabled at boot time, doing nothing";
+ ret = -EBUSY;
+ goto out;
}

ret = pt_pmu_hw_init();
if (ret)
- return ret;
+ goto out;

if (!pt_cap_get(PT_CAP_topa_output)) {
- pr_warn("ToPA output is not supported on this CPU\n");
- return -ENODEV;
+ status_msg = "ToPA output is not supported on this CPU";
+ ret = -ENODEV;
+ goto out;
}

if (!pt_cap_get(PT_CAP_topa_multiple_entries))
@@ -1166,6 +1168,10 @@ static __init int pt_init(void)
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
+out:
+ if (status_msg)
+ pr_warn("%s\n", status_msg);
+ perf_pmu_error_status("intel_pt", ret, status_msg);

return ret;
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0af4c8b4b6d0..13886c2d9e93 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -185,6 +185,22 @@ struct perf_event;
#define PERF_PMU_CAP_ITRACE 0x20

/**
+ * enum perf_pmu_status - the PMU Supplementary Information status of PMUs
+ */
+enum perf_pmu_status {
+ PERF_PMU_STATUS_ENABLED,
+ PERF_PMU_STATUS_ERROR,
+ PERF_PMU_STATUS_NOT_LOADED,
+ PERF_PMU_STATUS_NOT_CONFIG,
+ PERF_PMU_STATUS_NOT_SUPPORTED,
+ PERF_PMU_STATUS_WRONG_HW,
+ PERF_PMU_STATUS_WRONG_VENDOR,
+ PERF_PMU_STATUS_WRONG_ARCH,
+ PERF_PMU_STATUS_UNKNOWN,
+ PERF_PMU_STATUS_MAX,
+};
+
+/**
* struct pmu - generic performance monitoring unit
*/
struct pmu {
@@ -639,6 +655,35 @@ extern void *perf_get_aux(struct perf_output_handle *handle);
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
extern void perf_pmu_unregister(struct pmu *pmu);

+#ifdef CONFIG_DEBUG_FS
+
+extern int perf_pmu_update_status(const char *name, enum perf_pmu_status status,
+ const char *status_msg);
+extern int perf_pmu_error_status(const char *name, int err,
+ const char *status_msg);
+extern void perf_pmu_vendor(const char *vendor);
+
+#else /* !CONFIG_DEBUG_FS */
+
+static inline int perf_pmu_update_status(const char *name,
+ enum perf_pmu_status status,
+ const char *status_msg)
+{
+ return 0;
+}
+
+static inline int perf_pmu_error_status(const char *name, int err,
+ const char *status_msg)
+{
+ return 0;
+}
+
+static inline void perf_pmu_vendor(const char *vendor)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
extern int perf_num_counters(void);
extern const char *perf_pmu_name(void);
extern void __perf_event_task_sched_in(struct task_struct *prev,
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 2925188f50ea..f4ee64411af0 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -6,4 +6,4 @@ obj-y := core.o ring_buffer.o callchain.o

obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_UPROBES) += uprobes.o
-
+obj-$(CONFIG_DEBUG_FS) += pmu_supp_info.o
diff --git a/kernel/events/pmu_supp_info.c b/kernel/events/pmu_supp_info.c
new file mode 100644
index 000000000000..ac714ad09f97
--- /dev/null
+++ b/kernel/events/pmu_supp_info.c
@@ -0,0 +1,384 @@
+/*
+ * pmu_supp_info.c: PMU Supplementary Information
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/perf_event.h>
+
+struct pmu_supp_info {
+ struct rb_node node;
+ const char *name;
+ const char *vendor;
+ const char *status_msg;
+ int status;
+};
+
+static struct rb_root pmu_supp_info_tree = RB_ROOT;
+static DEFINE_MUTEX(pmu_supp_info_lock);
+static struct dentry *pmu_supp_info_dir;
+
+struct known_pmu {
+ const char *name;
+ const char *vendor;
+ int status;
+};
+
+#define KNOWN_PMU(_name, _vendor, _status) { \
+ .name = _name, \
+ .vendor = _vendor, \
+ .status = _status, \
+}
+
+#if defined(CONFIG_X86)
+#define PERF_PMU_STATUS_ARCH_X86 PERF_PMU_STATUS_UNKNOWN
+#else
+#define PERF_PMU_STATUS_ARCH_X86 PERF_PMU_STATUS_WRONG_ARCH
+#endif
+
+static struct known_pmu known_pmus[] = {
+ KNOWN_PMU("intel_pt", "Intel", PERF_PMU_STATUS_ARCH_X86),
+ KNOWN_PMU("intel_bts", "Intel", PERF_PMU_STATUS_ARCH_X86),
+ KNOWN_PMU(NULL, NULL, 0),
+};
+
+static const char *pmu_status_msg[] = {
+ [PERF_PMU_STATUS_ENABLED] = "Enabled",
+ [PERF_PMU_STATUS_ERROR] = "Disabled: Driver error",
+ [PERF_PMU_STATUS_NOT_LOADED] = "Disabled: Driver not loaded",
+ [PERF_PMU_STATUS_NOT_CONFIG] = "Disabled: Driver not in kernel config",
+ [PERF_PMU_STATUS_NOT_SUPPORTED] = "Disabled: Not supported by the kernel",
+ [PERF_PMU_STATUS_WRONG_HW] = "Disabled: Not supported by the hardware",
+ [PERF_PMU_STATUS_WRONG_VENDOR] = "Disabled: Not supported by the hardware vendor",
+ [PERF_PMU_STATUS_WRONG_ARCH] = "Disabled: Not supported by the the architecture",
+ [PERF_PMU_STATUS_UNKNOWN] = "Disabled: Unknown status",
+};
+
+static int pmu_supp_info_show(struct seq_file *seq, void *v)
+{
+ struct pmu_supp_info *pmuinfo = seq->private;
+ const char *msg, *vendor;
+ int status;
+
+ mutex_lock(&pmu_supp_info_lock);
+
+ status = pmuinfo->status;
+ msg = pmuinfo->status_msg;
+ vendor = pmuinfo->vendor;
+
+ mutex_unlock(&pmu_supp_info_lock);
+
+ if (status < 0 || status >= PERF_PMU_STATUS_MAX)
+ status = PERF_PMU_STATUS_UNKNOWN;
+
+ if (!msg && status == PERF_PMU_STATUS_WRONG_VENDOR && vendor) {
+ seq_printf(seq, "%s - requires %s CPU\n",
+ pmu_status_msg[status], vendor);
+ return 0;
+ }
+
+ if (!msg) {
+ seq_printf(seq, "%s\n", pmu_status_msg[status]);
+ return 0;
+ }
+
+ seq_printf(seq, "%s - %s\n", pmu_status_msg[status], msg);
+
+ return 0;
+}
+
+static int pmu_supp_info_open(struct inode *inode, struct file *file)
+{
+ struct pmu_supp_info *pmuinfo = inode->i_private;
+
+ return single_open(file, pmu_supp_info_show, pmuinfo);
+}
+
+static const struct file_operations pmu_supp_info_fops = {
+ .open = pmu_supp_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int pmu_supp_info_add_file(struct pmu_supp_info *pmuinfo)
+{
+ if (!pmu_supp_info_dir)
+ return 0;
+
+ if (!debugfs_create_file(pmuinfo->name, S_IRUGO, pmu_supp_info_dir,
+ pmuinfo, &pmu_supp_info_fops))
+ return -ENODEV;
+
+ return 0;
+}
+
+static struct pmu_supp_info *pmu_supp_info_lookup(const char *name)
+{
+ struct rb_node **node = &(pmu_supp_info_tree.rb_node), *parent = NULL;
+ struct pmu_supp_info *pmuinfo;
+ int cmp;
+
+ if (!name)
+ return NULL;
+
+ while (*node) {
+ pmuinfo = container_of(*node, struct pmu_supp_info, node);
+ cmp = strcmp(name, pmuinfo->name);
+ parent = *node;
+ if (cmp < 0)
+ node = &((*node)->rb_left);
+ else if (cmp > 0)
+ node = &((*node)->rb_right);
+ else
+ return pmuinfo;
+ }
+
+ return NULL;
+}
+
+static int pmu_supp_info_insert(struct pmu_supp_info *new_pmuinfo)
+{
+ struct rb_node **node = &(pmu_supp_info_tree.rb_node), *parent = NULL;
+ struct pmu_supp_info *pmuinfo;
+ int cmp;
+
+ if (!new_pmuinfo->name)
+ return -EINVAL;
+
+ while (*node) {
+ pmuinfo = container_of(*node, struct pmu_supp_info, node);
+ cmp = strcmp(new_pmuinfo->name, pmuinfo->name);
+ parent = *node;
+ if (cmp < 0)
+ node = &((*node)->rb_left);
+ else if (cmp > 0)
+ node = &((*node)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&new_pmuinfo->node, parent, node);
+ rb_insert_color(&new_pmuinfo->node, &pmu_supp_info_tree);
+
+ return 0;
+}
+
+static struct pmu_supp_info *pmu_supp_info_alloc(const char *name)
+{
+ struct pmu_supp_info *pmuinfo;
+
+ pmuinfo = kzalloc(sizeof(struct pmu_supp_info), GFP_KERNEL);
+ if (!pmuinfo)
+ return NULL;
+
+ pmuinfo->name = kstrdup_const(name, GFP_KERNEL);
+ if (!pmuinfo->name) {
+ kfree(pmuinfo);
+ return NULL;
+ }
+
+ pmuinfo->status = PERF_PMU_STATUS_UNKNOWN;
+
+ return pmuinfo;
+}
+
+static void pmu_supp_info_free(struct pmu_supp_info *pmuinfo)
+{
+ kfree_const(pmuinfo->name);
+ kfree(pmuinfo);
+}
+
+static struct pmu_supp_info *pmu_supp_info_add(const char *name)
+{
+ struct pmu_supp_info *pmuinfo;
+ int err;
+
+ pmuinfo = pmu_supp_info_alloc(name);
+ if (!pmuinfo)
+ return ERR_PTR(-ENOMEM);
+
+ err = pmu_supp_info_insert(pmuinfo);
+ if (err)
+ goto out_free;
+
+ err = pmu_supp_info_add_file(pmuinfo);
+ if (err)
+ goto out_remove;
+
+ return pmuinfo;
+
+out_remove:
+ rb_erase(&pmuinfo->node, &pmu_supp_info_tree);
+out_free:
+ pmu_supp_info_free(pmuinfo);
+ return ERR_PTR(err);
+}
+
+int perf_pmu_update_status(const char *name, enum perf_pmu_status status,
+ const char *status_msg)
+{
+ struct pmu_supp_info *pmuinfo;
+ int err = 0;
+
+ if (!name)
+ return -EINVAL;
+
+ mutex_lock(&pmu_supp_info_lock);
+
+ pmuinfo = pmu_supp_info_lookup(name);
+ if (!pmuinfo) {
+ pmuinfo = pmu_supp_info_add(name);
+ if (IS_ERR(pmuinfo)) {
+ err = PTR_ERR(pmuinfo);
+ goto out_unlock;
+ }
+ }
+
+ pmuinfo->status = status;
+ kfree_const(pmuinfo->status_msg);
+ pmuinfo->status_msg = kstrdup_const(status_msg, GFP_KERNEL);
+
+out_unlock:
+ mutex_unlock(&pmu_supp_info_lock);
+
+ return err;
+}
+
+int perf_pmu_error_status(const char *name, int err, const char *status_msg)
+{
+ enum perf_pmu_status status;
+
+ switch (err) {
+ case 0:
+ status = PERF_PMU_STATUS_ENABLED;
+ break;
+ case -ENODEV:
+ status = PERF_PMU_STATUS_WRONG_HW;
+ break;
+ case -ENOTSUPP:
+ status = PERF_PMU_STATUS_NOT_SUPPORTED;
+ break;
+ default:
+ status = PERF_PMU_STATUS_ERROR;
+ }
+
+ return perf_pmu_update_status(name, status, status_msg);
+}
+
+
+void perf_pmu_vendor(const char *vendor)
+{
+ struct pmu_supp_info *pmuinfo;
+ struct known_pmu *known_pmu;
+ struct rb_node *node;
+
+ if (!vendor)
+ return;
+
+ mutex_lock(&pmu_supp_info_lock);
+
+ for (known_pmu = known_pmus; known_pmu->name; known_pmu++) {
+ if (known_pmu->status == PERF_PMU_STATUS_UNKNOWN &&
+ known_pmu->vendor && strcmp(known_pmu->vendor, vendor))
+ known_pmu->status = PERF_PMU_STATUS_WRONG_VENDOR;
+ }
+
+ for (node = rb_first(&pmu_supp_info_tree); node; node = rb_next(node)) {
+ pmuinfo = rb_entry(node, struct pmu_supp_info, node);
+ if (pmuinfo->status == PERF_PMU_STATUS_UNKNOWN &&
+ pmuinfo->vendor && strcmp(pmuinfo->vendor, vendor)) {
+ pmuinfo->status = PERF_PMU_STATUS_WRONG_VENDOR;
+ kfree_const(pmuinfo->status_msg);
+ pmuinfo->status_msg = NULL;
+ }
+ }
+
+ mutex_unlock(&pmu_supp_info_lock);
+}
+
+static void __init pmu_supp_info_add_known_pmus(void)
+{
+ struct pmu_supp_info *pmuinfo;
+ struct known_pmu *known_pmu;
+
+ for (known_pmu = known_pmus; known_pmu->name; known_pmu++) {
+ pmuinfo = pmu_supp_info_lookup(known_pmu->name);
+ if (!pmuinfo) {
+ pmuinfo = pmu_supp_info_add(known_pmu->name);
+ if (IS_ERR(pmuinfo)) {
+ pr_warn("Failed to add pmu_supplementary_info for %s\n",
+ known_pmu->name);
+ continue;
+ }
+ pmuinfo->status = known_pmu->status;
+ }
+ if (known_pmu->vendor && !pmuinfo->vendor)
+ pmuinfo->vendor = kstrdup_const(known_pmu->vendor,
+ GFP_KERNEL);
+ }
+}
+
+static int __init pmu_supp_info_add_files(void)
+{
+ struct pmu_supp_info *pmuinfo;
+ struct rb_node *node;
+ int err;
+
+ for (node = rb_first(&pmu_supp_info_tree); node; node = rb_next(node)) {
+ pmuinfo = rb_entry(node, struct pmu_supp_info, node);
+ err = pmu_supp_info_add_file(pmuinfo);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __init perf_event_debugfs_init(void)
+{
+ struct dentry *root = NULL, *dentry;
+ int err;
+
+ mutex_lock(&pmu_supp_info_lock);
+
+ root = debugfs_create_dir("perf", NULL);
+ if (IS_ERR_OR_NULL(root))
+ goto out_err;
+
+ dentry = debugfs_create_dir("pmu_supplementary_info", root);
+ if (IS_ERR_OR_NULL(dentry))
+ goto out_err_remove;
+
+ pmu_supp_info_dir = dentry;
+
+ err = pmu_supp_info_add_files();
+ if (err)
+ goto out_err_remove;
+
+ pmu_supp_info_add_known_pmus();
+
+out_unlock:
+ mutex_unlock(&pmu_supp_info_lock);
+
+ return err;
+
+out_err_remove:
+ debugfs_remove_recursive(root);
+out_err:
+ pr_err("perf failed to create debugfs dir/files\n");
+ err = -ENODEV;
+ goto out_unlock;
+}
+device_initcall(perf_event_debugfs_init);
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/