[RFC][PATCH 7/7] kprobes: Replace rp->free_instance with freelist

From: Peter Zijlstra
Date: Thu Aug 27 2020 - 12:21:54 EST


Gets rid of rp->lock, and as a result kretprobes are now fully
lockless.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/kprobes.h | 11 ++++++--
kernel/kprobes.c | 63 +++++++++++++++++++-----------------------------
2 files changed, 34 insertions(+), 40 deletions(-)

--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -27,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
+#include <linux/freelist.h>
#include <asm/kprobes.h>

#ifdef CONFIG_KPROBES
@@ -151,14 +152,18 @@ struct kretprobe {
int maxactive;
int nmissed;
size_t data_size;
- struct hlist_head free_instances;
- raw_spinlock_t lock;
+ struct freelist_head freelist;
};

struct kretprobe_instance {
union {
+ /*
+ * Dodgy as heck, this relies on not clobbering freelist::refs.
+ * llist: only clobbers freelist::next.
+ * rcu: clobbers both, but only after rp::freelist is gone.
+ */
+ struct freelist_node freelist;
struct llist_node llist;
- struct hlist_node hlist;
struct rcu_head rcu;
};
struct kretprobe *rp;
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1219,12 +1219,8 @@ static void recycle_rp_inst(struct kretp
struct kretprobe *rp = ri->rp;

/* remove rp inst off the rprobe_inst_table */
- hlist_del(&ri->hlist);
- INIT_HLIST_NODE(&ri->hlist);
if (likely(rp)) {
- raw_spin_lock(&rp->lock);
- hlist_add_head(&ri->hlist, &rp->free_instances);
- raw_spin_unlock(&rp->lock);
+ freelist_add(&ri->freelist, &rp->freelist);
} else {
kfree_rcu(ri, rcu);
}
@@ -1286,10 +1282,13 @@ NOKPROBE_SYMBOL(kprobe_flush_task);
static inline void free_rp_inst(struct kretprobe *rp)
{
struct kretprobe_instance *ri;
- struct hlist_node *next;
+ struct freelist_node *node;
+
+ node = rp->freelist.head;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, freelist);
+ node = node->next;

- hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
- hlist_del(&ri->hlist);
kfree(ri);
}
}
@@ -1986,36 +1985,28 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_h
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
- unsigned long hash, flags = 0;
struct kretprobe_instance *ri;
+ struct freelist_node *fn;

- /* TODO: consider to only swap the RA after the last pre_handler fired */
- hash = hash_ptr(current, KPROBE_HASH_BITS);
- raw_spin_lock_irqsave(&rp->lock, flags);
- if (!hlist_empty(&rp->free_instances)) {
- ri = hlist_entry(rp->free_instances.first,
- struct kretprobe_instance, hlist);
- hlist_del(&ri->hlist);
- raw_spin_unlock_irqrestore(&rp->lock, flags);
-
- ri->rp = rp;
- ri->task = current;
-
- if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- raw_spin_lock_irqsave(&rp->lock, flags);
- hlist_add_head(&ri->hlist, &rp->free_instances);
- raw_spin_unlock_irqrestore(&rp->lock, flags);
- return 0;
- }
-
- arch_prepare_kretprobe(ri, regs);
+ fn = freelist_try_get(&rp->freelist);
+ if (!fn) {
+ rp->nmissed++;
+ return 0;
+ }

- __llist_add(&ri->llist, &current->kretprobe_instances);
+ ri = container_of(fn, struct kretprobe_instance, freelist);
+ ri->rp = rp;
+ ri->task = current;

- } else {
- rp->nmissed++;
- raw_spin_unlock_irqrestore(&rp->lock, flags);
+ if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+ freelist_add(&ri->freelist, &rp->freelist);
+ return 0;
}
+
+ arch_prepare_kretprobe(ri, regs);
+
+ __llist_add(&ri->llist, &current->kretprobe_instances);
+
return 0;
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);
@@ -2072,8 +2063,7 @@ int register_kretprobe(struct kretprobe
rp->maxactive = num_possible_cpus();
#endif
}
- raw_spin_lock_init(&rp->lock);
- INIT_HLIST_HEAD(&rp->free_instances);
+ rp->freelist.head = NULL;
for (i = 0; i < rp->maxactive; i++) {
inst = kmalloc(sizeof(struct kretprobe_instance) +
rp->data_size, GFP_KERNEL);
@@ -2081,8 +2071,7 @@ int register_kretprobe(struct kretprobe
free_rp_inst(rp);
return -ENOMEM;
}
- INIT_HLIST_NODE(&inst->hlist);
- hlist_add_head(&inst->hlist, &rp->free_instances);
+ freelist_add(&inst->freelist, &rp->freelist);
}

rp->nmissed = 0;