[RFC 3/5] perf: Allow per PMU access control

From: Tvrtko Ursulin
Date: Wed Sep 19 2018 - 08:28:05 EST


From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

For situations where sysadmins might want to allow different level of
access control for different PMUs, we start creating per-PMU
perf_event_paranoid controls in sysfs.

These work in equivalent fashion as the existing perf_event_paranoid
sysctl, which now becomes the parent control for each PMU.

On PMU registration the global/parent value will be inherited by each PMU,
as it will be propagated to all registered PMUs when the sysctl is
updated.

At any later point individual PMU access controls, located in
<sysfs>/device/<pmu-name>/perf_event_paranoid, can be adjusted to achieve
fine grained access control.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Madhavan Srinivasan <maddy@xxxxxxxxxxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
---
include/linux/perf_event.h | 12 ++++++--
kernel/events/core.c | 59 ++++++++++++++++++++++++++++++++++++++
kernel/sysctl.c | 4 ++-
3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 22906bcc1bcd..bb82e47f5343 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -271,6 +271,9 @@ struct pmu {
/* number of address filters this PMU can do */
unsigned int nr_addr_filters;

+ /* per PMU access control */
+ int perf_event_paranoid;
+
/*
* Fully disable/enable this PMU, can be used to protect from the PMI
* as well as for lazy/batch writing of the MSRs.
@@ -1169,6 +1172,9 @@ extern int sysctl_perf_cpu_time_max_percent;

extern void perf_sample_event_took(u64 sample_len_ns);

+extern int perf_proc_paranoid_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
extern int perf_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
@@ -1181,17 +1187,17 @@ int perf_event_max_stack_handler(struct ctl_table *table, int write,

static inline bool perf_paranoid_tracepoint_raw(const struct pmu *pmu)
{
- return sysctl_perf_event_paranoid > -1;
+ return pmu->perf_event_paranoid > -1;
}

static inline bool perf_paranoid_cpu(const struct pmu *pmu)
{
- return sysctl_perf_event_paranoid > 0;
+ return pmu->perf_event_paranoid > 0;
}

static inline bool perf_paranoid_kernel(const struct pmu *pmu)
{
- return sysctl_perf_event_paranoid > 1;
+ return pmu->perf_event_paranoid > 1;
}

extern void perf_event_init(void);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f556144bc0c5..35f122349508 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -432,6 +432,24 @@ static void update_perf_cpu_limits(void)

static bool perf_rotate_context(struct perf_cpu_context *cpuctx);

+int perf_proc_paranoid_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ struct pmu *pmu;
+
+ if (ret || !write)
+ return ret;
+
+ mutex_lock(&pmus_lock);
+ list_for_each_entry(pmu, &pmus, entry)
+ pmu->perf_event_paranoid = sysctl_perf_event_paranoid;
+ mutex_unlock(&pmus_lock);
+
+ return 0;
+}
+
int perf_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -9430,6 +9448,41 @@ static void free_pmu_context(struct pmu *pmu)
mutex_unlock(&pmus_lock);
}

+/*
+ * Fine-grained access control:
+ */
+static ssize_t
+perf_event_paranoid_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->perf_event_paranoid);
+}
+
+static ssize_t
+perf_event_paranoid_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ int ret, val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < -1 || val > 2)
+ return -EINVAL;
+
+ pmu->perf_event_paranoid = val;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(perf_event_paranoid);
+
/*
* Let userspace know that this PMU supports address range filtering:
*/
@@ -9544,6 +9597,11 @@ static int pmu_dev_alloc(struct pmu *pmu)
if (ret)
goto free_dev;

+ /* Add fine-grained access control attribute. */
+ ret = device_create_file(pmu->dev, &dev_attr_perf_event_paranoid);
+ if (ret)
+ goto del_dev;
+
/* For PMUs with address filters, throw in an extra attribute: */
if (pmu->nr_addr_filters)
ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
@@ -9575,6 +9633,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
if (!pmu->pmu_disable_count)
goto unlock;

+ pmu->perf_event_paranoid = sysctl_perf_event_paranoid;
pmu->type = -1;
if (!name)
goto skip_type;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc02050fd0c4..83179c443c89 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1145,7 +1145,9 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_perf_event_paranoid,
.maxlen = sizeof(sysctl_perf_event_paranoid),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = perf_proc_paranoid_handler,
+ .extra1 = &neg_one,
+ .extra2 = &two,
},
{
.procname = "perf_event_mlock_kb",
--
2.17.1