[PATCH tip/master v4] kprobes: extable: Identify kprobes' insn-slots as kernel text area

From: Masami Hiramatsu
Date: Sun Jan 08 2017 - 10:00:39 EST


Make __kernel_text_address()/kernel_text_address() returns
true if the given address is on a kprobe's instruction slot,
which is generated by kprobes as a trampoline code.
This can help stacktraces to determine the address is on a
text area or not.

To implement this without any sleep in is_kprobe_*_slot(),
this also modify insn_cache page list as a rcu list. It may
increase processing deley (not processing time) for garbage
slot collection, because it requires to wait an additional
rcu grance period when freeing a page from the list.
However, since it is not a hot path, we may not take care of it.

Note: this can give a small overhead to stack unwinders because
this adds 2 checks in __kernel_text_address(). However, the
impact should be very small, kprobe_insn_pages list has 1 entry
per 256 probes(on x86, on arm/arm64 it will be 1024 probes),
and kprobe_optinsn_pages has 1 entry per 32 probes(on x86).
In most use cases, the number of kprobe events may be less
than 20, this means is_kprobe_*_slot() will check just 1 entry.

Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>

---
V4:
- Add a performance note on patch description.
- Update for the latest tip/master.
---
include/linux/kprobes.h | 30 ++++++++++++++++++++
kernel/extable.c | 9 +++++-
kernel/kprobes.c | 69 +++++++++++++++++++++++++++++++++++------------
3 files changed, 89 insertions(+), 19 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 8f68490..16ddfb8 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -278,9 +278,13 @@ struct kprobe_insn_cache {
int nr_garbage;
};

+#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c);
extern void __free_insn_slot(struct kprobe_insn_cache *c,
kprobe_opcode_t *slot, int dirty);
+/* sleep-less address checking routine */
+extern bool __is_insn_slot_addr(struct kprobe_insn_cache *c,
+ unsigned long addr);

#define DEFINE_INSN_CACHE_OPS(__name) \
extern struct kprobe_insn_cache kprobe_##__name##_slots; \
@@ -294,6 +298,18 @@ static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\
{ \
__free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \
} \
+ \
+static inline bool is_kprobe_##__name##_slot(unsigned long addr) \
+{ \
+ return __is_insn_slot_addr(&kprobe_##__name##_slots, addr); \
+}
+#else /* __ARCH_WANT_KPROBES_INSN_SLOT */
+#define DEFINE_INSN_CACHE_OPS(__name) \
+static inline bool is_kprobe_##__name##_slot(unsigned long addr) \
+{ \
+ return 0; \
+}
+#endif

DEFINE_INSN_CACHE_OPS(insn);

@@ -330,7 +346,6 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
int write, void __user *buffer,
size_t *length, loff_t *ppos);
#endif
-
#endif /* CONFIG_OPTPROBES */
#ifdef CONFIG_KPROBES_ON_FTRACE
extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
@@ -481,6 +496,19 @@ static inline int enable_jprobe(struct jprobe *jp)
return enable_kprobe(&jp->kp);
}

+#ifndef CONFIG_KPROBES
+static inline bool is_kprobe_insn_slot(unsigned long addr)
+{
+ return false;
+}
+#endif
+#ifndef CONFIG_OPTPROBES
+static inline bool is_kprobe_optinsn_slot(unsigned long addr)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_KPROBES
/*
* Blacklist ganerating macro. Specify functions which is not probed
diff --git a/kernel/extable.c b/kernel/extable.c
index e3beec4..e135947 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/init.h>
+#include <linux/kprobes.h>

#include <asm/sections.h>
#include <linux/uaccess.h>
@@ -104,6 +105,8 @@ int __kernel_text_address(unsigned long addr)
return 1;
if (is_ftrace_trampoline(addr))
return 1;
+ if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
+ return 1;
/*
* There might be init symbols in saved stacktraces.
* Give those symbols a chance to be printed in
@@ -123,7 +126,11 @@ int kernel_text_address(unsigned long addr)
return 1;
if (is_module_text_address(addr))
return 1;
- return is_ftrace_trampoline(addr);
+ if (is_ftrace_trampoline(addr))
+ return 1;
+ if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
+ return 1;
+ return 0;
}

/*
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4346010..e7e3f18 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -149,9 +149,11 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
struct kprobe_insn_page *kip;
kprobe_opcode_t *slot = NULL;

+ /* Since the slot array is not protected by rcu, we need a mutex */
mutex_lock(&c->mutex);
retry:
- list_for_each_entry(kip, &c->pages, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(kip, &c->pages, list) {
if (kip->nused < slots_per_page(c)) {
int i;
for (i = 0; i < slots_per_page(c); i++) {
@@ -159,6 +161,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
kip->slot_used[i] = SLOT_USED;
kip->nused++;
slot = kip->insns + (i * c->insn_size);
+ rcu_read_unlock();
goto out;
}
}
@@ -167,6 +170,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
WARN_ON(1);
}
}
+ rcu_read_unlock();

/* If there are any garbage slots, collect it and try again. */
if (c->nr_garbage && collect_garbage_slots(c) == 0)
@@ -193,7 +197,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
kip->nused = 1;
kip->ngarbage = 0;
kip->cache = c;
- list_add(&kip->list, &c->pages);
+ list_add_rcu(&kip->list, &c->pages);
slot = kip->insns;
out:
mutex_unlock(&c->mutex);
@@ -213,7 +217,8 @@ static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
* next time somebody inserts a probe.
*/
if (!list_is_singular(&kip->list)) {
- list_del(&kip->list);
+ list_del_rcu(&kip->list);
+ synchronize_rcu();
kip->cache->free(kip->insns);
kfree(kip);
}
@@ -248,29 +253,59 @@ void __free_insn_slot(struct kprobe_insn_cache *c,
kprobe_opcode_t *slot, int dirty)
{
struct kprobe_insn_page *kip;
+ long idx;

mutex_lock(&c->mutex);
- list_for_each_entry(kip, &c->pages, list) {
- long idx = ((long)slot - (long)kip->insns) /
- (c->insn_size * sizeof(kprobe_opcode_t));
- if (idx >= 0 && idx < slots_per_page(c)) {
- WARN_ON(kip->slot_used[idx] != SLOT_USED);
- if (dirty) {
- kip->slot_used[idx] = SLOT_DIRTY;
- kip->ngarbage++;
- if (++c->nr_garbage > slots_per_page(c))
- collect_garbage_slots(c);
- } else
- collect_one_slot(kip, idx);
+ rcu_read_lock();
+ list_for_each_entry_rcu(kip, &c->pages, list) {
+ idx = ((long)slot - (long)kip->insns) /
+ (c->insn_size * sizeof(kprobe_opcode_t));
+ if (idx >= 0 && idx < slots_per_page(c))
goto out;
- }
}
- /* Could not free this slot. */
+ /* Could not find this slot. */
WARN_ON(1);
+ kip = NULL;
out:
+ rcu_read_unlock();
+ /* Mark and sweep: this may sleep */
+ if (kip) {
+ /* Check double free */
+ WARN_ON(kip->slot_used[idx] != SLOT_USED);
+ if (dirty) {
+ kip->slot_used[idx] = SLOT_DIRTY;
+ kip->ngarbage++;
+ if (++c->nr_garbage > slots_per_page(c))
+ collect_garbage_slots(c);
+ } else
+ collect_one_slot(kip, idx);
+ }
mutex_unlock(&c->mutex);
}

+/*
+ * Check given address is on the page of kprobe instruction slots.
+ * This will be used for checking whether the address on a stack
+ * is on a text area or not.
+ */
+bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
+{
+ struct kprobe_insn_page *kip;
+ bool ret = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(kip, &c->pages, list) {
+ if (addr >= (unsigned long)kip->insns &&
+ addr < (unsigned long)kip->insns + PAGE_SIZE) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
#ifdef CONFIG_OPTPROBES
/* For optimized_kprobe buffer */
struct kprobe_insn_cache kprobe_optinsn_slots = {