[RFC PATCH 12/17] perf: Track pinned events per user
From: Alexander Shishkin
Date: Tue Sep 05 2017 - 09:50:38 EST
Maintain a per-user cpu-indexed array of shmemfs-backed events, same
way as mlock accounting.
Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
include/linux/sched/user.h | 6 ++++
kernel/events/core.c | 14 ++++-----
kernel/events/ring_buffer.c | 69 +++++++++++++++++++++++++++++++++++++--------
kernel/user.c | 1 +
4 files changed, 71 insertions(+), 19 deletions(-)
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 5d5415e129..bf10f95250 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -5,6 +5,7 @@
#include <linux/atomic.h>
struct key;
+struct perf_event;
/*
* Some day this will be a full-fledged user tracking system..
@@ -39,6 +40,11 @@ struct user_struct {
#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
atomic_long_t locked_vm;
#endif
+#ifdef CONFIG_PERF_EVENTS
+ atomic_long_t nr_pinnable_events;
+ struct mutex pinned_mutex;
+ struct perf_event ** __percpu pinned_events;
+#endif
};
extern int uids_sysfs_init(void);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1fed69d4ba..e00f1f6aaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -384,7 +384,6 @@ static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
-static DEFINE_PER_CPU(struct perf_event *, shmem_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -2086,7 +2085,8 @@ enum pin_event_t {
static enum pin_event_t pin_event_pages(struct perf_event *event)
{
- struct perf_event **pinned_event = this_cpu_ptr(&shmem_events);
+ struct user_struct *user = event->rb->mmap_user;
+ struct perf_event **pinned_event = this_cpu_ptr(user->pinned_events);
struct perf_event *old_event = *pinned_event;
if (old_event == event)
@@ -4281,13 +4281,14 @@ static void _free_event(struct perf_event *event)
unaccount_event(event);
if (event->attach_state & PERF_ATTACH_SHMEM) {
+ struct user_struct *user = event->rb->mmap_user;
struct perf_event_context *ctx = event->ctx;
int cpu;
atomic_set(&event->xpinned, 0);
for_each_possible_cpu(cpu) {
struct perf_event **pinned_event =
- per_cpu_ptr(&shmem_events, cpu);
+ per_cpu_ptr(user->pinned_events, cpu);
cmpxchg(pinned_event, event, NULL);
}
@@ -9530,7 +9531,7 @@ perf_event_detach(struct perf_event *event, struct perf_event *parent_event,
{
struct ring_buffer *parent_rb = parent_event ? parent_event->rb : NULL;
char *filename;
- int err;
+ int err = -ENOMEM;
filename = kasprintf(GFP_KERNEL, "%s:%x.event",
task ? "task" : "cpu",
@@ -9550,10 +9551,9 @@ perf_event_detach(struct perf_event *event, struct perf_event *parent_event,
if (err) {
tracefs_remove(event->dent);
event->dent = NULL;
- return err;
}
- return 0;
+ return err;
}
/*
* Allocate and initialize a event structure
@@ -10290,7 +10290,7 @@ SYSCALL_DEFINE5(perf_event_open,
}
if (detached) {
- err = perf_event_detach(event, task, NULL);
+ err = perf_event_detach(event, NULL, task, NULL);
if (err)
goto err_context;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 896d441642..8d37e4e591 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -563,6 +563,44 @@ void *perf_get_aux(struct perf_output_handle *handle)
return handle->rb->aux_priv;
}
+static struct user_struct *get_users_pinned_events(void)
+{
+ struct user_struct *user = current_user(), *ret = NULL;
+
+ if (atomic_long_inc_not_zero(&user->nr_pinnable_events))
+ return user;
+
+ mutex_lock(&user->pinned_mutex);
+ if (!atomic_long_read(&user->nr_pinnable_events)) {
+ if (WARN_ON_ONCE(!!user->pinned_events))
+ goto unlock;
+
+ user->pinned_events = alloc_percpu(struct perf_event *);
+ if (!user->pinned_events) {
+ goto unlock;
+ } else {
+ atomic_long_inc(&user->nr_pinnable_events);
+ ret = get_current_user();
+ }
+ }
+
+unlock:
+ mutex_unlock(&user->pinned_mutex);
+
+ return ret;
+}
+
+static void put_users_pinned_events(struct user_struct *user)
+{
+ if (!atomic_long_dec_and_test(&user->nr_pinnable_events))
+ return;
+
+ mutex_lock(&user->pinned_mutex);
+ free_percpu(user->pinned_events);
+ user->pinned_events = NULL;
+ mutex_unlock(&user->pinned_mutex);
+}
+
/*
* Check if the current user can afford @nr_pages, considering the
* perf_event_mlock sysctl and their mlock limit. If the former is exceeded,
@@ -574,11 +612,14 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
unsigned long nr_pages, unsigned long *locked)
{
unsigned long total, limit, pinned;
+ struct user_struct *user;
if (!mm)
mm = rb->mmap_mapping;
- rb->mmap_user = current_user();
+ user = get_users_pinned_events();
+ if (!user)
+ return -ENOMEM;
limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
@@ -587,10 +628,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
*/
limit *= num_online_cpus();
- total = atomic_long_read(&rb->mmap_user->locked_vm) + nr_pages;
-
- free_uid(rb->mmap_user);
- rb->mmap_user = NULL;
+ total = atomic_long_read(&user->locked_vm) + nr_pages;
pinned = 0;
if (total > limit) {
@@ -599,7 +637,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
* limit needs to be accounted to the consumer's mm.
*/
if (!mm)
- return -EPERM;
+ goto err_put_user;
pinned = total - limit;
@@ -608,9 +646,8 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
total = mm->pinned_vm + pinned;
if ((total > limit) && perf_paranoid_tracepoint_raw() &&
- !capable(CAP_IPC_LOCK)) {
- return -EPERM;
- }
+ !capable(CAP_IPC_LOCK))
+ goto err_put_user;
*locked = pinned;
mm->pinned_vm += pinned;
@@ -619,10 +656,15 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
if (!rb->mmap_mapping)
rb->mmap_mapping = mm;
- rb->mmap_user = get_current_user();
- atomic_long_add(nr_pages, &rb->mmap_user->locked_vm);
+ rb->mmap_user = user;
+ atomic_long_add(nr_pages, &user->locked_vm);
return 0;
+
+err_put_user:
+ put_users_pinned_events(user);
+
+ return -EPERM;
}
static int ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
@@ -657,7 +699,7 @@ void ring_buffer_unaccount(struct ring_buffer *rb, bool aux)
if (rb->mmap_mapping)
rb->mmap_mapping->pinned_vm -= pinned;
- free_uid(rb->mmap_user);
+ put_users_pinned_events(rb->mmap_user);
}
#define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
@@ -1124,6 +1166,7 @@ rb_shmem_account(struct ring_buffer *rb, struct ring_buffer *parent_rb)
rb->acct_refcount = parent_rb->acct_refcount;
atomic_inc(rb->acct_refcount);
+ rb->mmap_user = get_uid(parent_rb->mmap_user);
return 0;
}
@@ -1146,6 +1189,8 @@ rb_shmem_account(struct ring_buffer *rb, struct ring_buffer *parent_rb)
static void rb_shmem_unaccount(struct ring_buffer *rb)
{
+ free_uid(rb->mmap_user);
+
if (!atomic_dec_and_test(rb->acct_refcount)) {
rb->acct_refcount = NULL;
return;
diff --git a/kernel/user.c b/kernel/user.c
index 00281add65..e95a82d31d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -185,6 +185,7 @@ struct user_struct *alloc_uid(kuid_t uid)
new->uid = uid;
atomic_set(&new->__count, 1);
+ mutex_init(&new->pinned_mutex);
/*
* Before adding this, check whether we raced
--
2.14.1