Re: [RFC bpf-next 01/10] uprobe: Add session callbacks to uprobe_consumer

From: Jiri Olsa
Date: Mon Jun 10 2024 - 07:06:57 EST


On Thu, Jun 06, 2024 at 09:52:39AM -0700, Andrii Nakryiko wrote:
> On Thu, Jun 6, 2024 at 9:46 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote:
> >
> > On Wed, Jun 05, 2024 at 10:50:11PM +0200, Jiri Olsa wrote:
> > > On Wed, Jun 05, 2024 at 07:56:19PM +0200, Oleg Nesterov wrote:
> > > > On 06/05, Andrii Nakryiko wrote:
> > > > >
> > > > > so any such
> > > > > limitations will cause problems, issue reports, investigation, etc.
> > > >
> > > > Agreed...
> > > >
> > > > > As one possible solution, what if we do
> > > > >
> > > > > struct return_instance {
> > > > > ...
> > > > > u64 session_cookies[];
> > > > > };
> > > > >
> > > > > and allocate sizeof(struct return_instance) + 8 *
> > > > > <num-of-session-consumers> and then at runtime pass
> > > > > &session_cookies[i] as data pointer to session-aware callbacks?
> > > >
> > > > I too thought about this, but I guess it is not that simple.
> > > >
> > > > Just for example. Suppose we have 2 session-consumers C1 and C2.
> > > > What if uprobe_unregister(C1) comes before the probed function
> > > > returns?
> > > >
> > > > We need something like map_cookie_to_consumer().
> > >
> > > I guess we could have hash table in return_instance that gets 'consumer -> cookie' ?
> >
> > ok, hash table is probably too big for this.. I guess some solution that
> > would iterate consumers and cookies made sure it matches would be fine
> >
>
> Yes, I was hoping to avoid hash tables for this, and in the common
> case have no added overhead.

hi,
here's first stab on that.. the change below:
- extends current handlers with extra argument rather than adding new
set of handlers
- store session consumers objects within return_instance object and
- iterate these objects ^^^ in handle_uretprobe_chain

I guess it could be still polished, but I wonder if this could
be the right direction to do this.. thoughts? ;-)

thanks,
jirka


---
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f46e0ca0169c..4e40e8352eac 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -34,15 +34,19 @@ enum uprobe_filter_ctx {
};

struct uprobe_consumer {
- int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
+ int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs,
+ unsigned long *data);
int (*ret_handler)(struct uprobe_consumer *self,
unsigned long func,
- struct pt_regs *regs);
+ struct pt_regs *regs,
+ unsigned long *data);
bool (*filter)(struct uprobe_consumer *self,
enum uprobe_filter_ctx ctx,
struct mm_struct *mm);

struct uprobe_consumer *next;
+ bool is_session;
+ unsigned int id;
};

#ifdef CONFIG_UPROBES
@@ -80,6 +84,12 @@ struct uprobe_task {
unsigned int depth;
};

+struct session_consumer {
+ long cookie;
+ unsigned int id;
+ int rc;
+};
+
struct return_instance {
struct uprobe *uprobe;
unsigned long func;
@@ -88,6 +98,8 @@ struct return_instance {
bool chained; /* true, if instance is nested */

struct return_instance *next; /* keep as stack */
+ int session_cnt;
+ struct session_consumer sc[1]; /* 1 for zero item marking the end */
};

enum rp_check {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2c83ba776fc7..cbd71dc06ef0 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -63,6 +63,8 @@ struct uprobe {
loff_t ref_ctr_offset;
unsigned long flags;

+ unsigned int session_cnt;
+
/*
* The generic code assumes that it has two members of unknown type
* owned by the arch-specific code:
@@ -750,11 +752,30 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
return uprobe;
}

+static void
+uprobe_consumer_account(struct uprobe *uprobe, struct uprobe_consumer *uc)
+{
+ static unsigned int session_id;
+
+ if (uc->is_session) {
+ uprobe->session_cnt++;
+ uc->id = ++session_id ?: ++session_id;
+ }
+}
+
+static void
+uprobe_consumer_unaccount(struct uprobe *uprobe, struct uprobe_consumer *uc)
+{
+ if (uc->is_session)
+ uprobe->session_cnt--;
+}
+
static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
down_write(&uprobe->consumer_rwsem);
uc->next = uprobe->consumers;
uprobe->consumers = uc;
+ uprobe_consumer_account(uprobe, uc);
up_write(&uprobe->consumer_rwsem);
}

@@ -773,6 +794,7 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
if (*con == uc) {
*con = uc->next;
ret = true;
+ uprobe_consumer_unaccount(uprobe, uc);
break;
}
}
@@ -1744,6 +1766,23 @@ static struct uprobe_task *get_utask(void)
return current->utask;
}

+static size_t ri_size(int session_cnt)
+{
+ struct return_instance *ri __maybe_unused;
+
+ return sizeof(*ri) + session_cnt * sizeof(ri->sc[0]);
+}
+
+static struct return_instance *alloc_return_instance(int session_cnt)
+{
+ struct return_instance *ri;
+
+ ri = kzalloc(ri_size(session_cnt), GFP_KERNEL);
+ if (ri)
+ ri->session_cnt = session_cnt;
+ return ri;
+}
+
static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
{
struct uprobe_task *n_utask;
@@ -1756,11 +1795,11 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)

p = &n_utask->return_instances;
for (o = o_utask->return_instances; o; o = o->next) {
- n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
+ n = alloc_return_instance(o->session_cnt);
if (!n)
return -ENOMEM;

- *n = *o;
+ memcpy(n, o, ri_size(o->session_cnt));
get_uprobe(n->uprobe);
n->next = NULL;

@@ -1853,35 +1892,38 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
utask->return_instances = ri;
}

-static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
+static struct return_instance *
+prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
+ struct return_instance *ri, int session_cnt)
{
- struct return_instance *ri;
struct uprobe_task *utask;
unsigned long orig_ret_vaddr, trampoline_vaddr;
bool chained;

if (!get_xol_area())
- return;
+ return ri;

utask = get_utask();
if (!utask)
- return;
+ return ri;

if (utask->depth >= MAX_URETPROBE_DEPTH) {
printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
" nestedness limit pid/tgid=%d/%d\n",
current->pid, current->tgid);
- return;
+ return ri;
}

- ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
- if (!ri)
- return;
+ if (!ri) {
+ ri = alloc_return_instance(session_cnt);
+ if (!ri)
+ return NULL;
+ }

trampoline_vaddr = get_trampoline_vaddr();
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
if (orig_ret_vaddr == -1)
- goto fail;
+ return ri;

/* drop the entries invalidated by longjmp() */
chained = (orig_ret_vaddr == trampoline_vaddr);
@@ -1899,7 +1941,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
* attack from user-space.
*/
uprobe_warn(current, "handle tail call");
- goto fail;
+ return ri;
}
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
}
@@ -1914,9 +1956,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
ri->next = utask->return_instances;
utask->return_instances = ri;

- return;
- fail:
- kfree(ri);
+ return NULL;
}

/* Prepare to single-step probed instruction out of line. */
@@ -2069,44 +2109,90 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{
struct uprobe_consumer *uc;
int remove = UPROBE_HANDLER_REMOVE;
+ struct session_consumer *sc = NULL;
+ struct return_instance *ri = NULL;
bool need_prep = false; /* prepare return uprobe, when needed */

down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ if (uprobe->session_cnt) {
+ ri = alloc_return_instance(uprobe->session_cnt);
+ if (!ri)
+ goto out;
+ }
+ for (uc = uprobe->consumers, sc = &ri->sc[0]; uc; uc = uc->next) {
int rc = 0;

if (uc->handler) {
- rc = uc->handler(uc, regs);
+ rc = uc->handler(uc, regs, uc->is_session ? &sc->cookie : NULL);
WARN(rc & ~UPROBE_HANDLER_MASK,
"bad rc=0x%x from %ps()\n", rc, uc->handler);
}

- if (uc->ret_handler)
+ if (uc->is_session) {
+ need_prep |= !rc;
+ remove = 0;
+ sc->id = uc->id;
+ sc->rc = rc;
+ sc++;
+ } else if (uc->ret_handler) {
need_prep = true;
+ }

remove &= rc;
}

if (need_prep && !remove)
- prepare_uretprobe(uprobe, regs); /* put bp at return */
+ ri = prepare_uretprobe(uprobe, regs, ri, uprobe->session_cnt); /* put bp at return */
+ kfree(ri);

if (remove && uprobe->consumers) {
WARN_ON(!uprobe_is_active(uprobe));
unapply_uprobe(uprobe, current->mm);
}
+ out:
up_read(&uprobe->register_rwsem);
}

+static struct session_consumer *
+consumer_find(struct session_consumer *sc, struct uprobe_consumer *uc)
+{
+ for (; sc && sc->id; sc++) {
+ if (sc->id == uc->id)
+ return sc;
+ }
+ return NULL;
+}
+
static void
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
{
struct uprobe *uprobe = ri->uprobe;
+ struct session_consumer *sc, *tmp;
struct uprobe_consumer *uc;

down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
- if (uc->ret_handler)
- uc->ret_handler(uc, ri->func, regs);
+ for (uc = uprobe->consumers, sc = &ri->sc[0]; uc; uc = uc->next) {
+ long *cookie = NULL;
+ int rc = 0;
+
+ if (uc->is_session) {
+ /*
+ * session_consumers are in order with uprobe_consumers,
+ * we just need to reflect that any uprobe_consumer could
+ * be removed or added
+ */
+ tmp = consumer_find(sc, uc);
+ if (tmp) {
+ rc = tmp->rc;
+ cookie = &tmp->cookie;
+ sc = tmp + 1;
+ } else {
+ rc = 1;
+ }
+ }
+
+ if (!rc && uc->ret_handler)
+ uc->ret_handler(uc, ri->func, regs, cookie);
}
up_read(&uprobe->register_rwsem);
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f5154c051d2c..ae7c35379e4a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3329,7 +3329,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx
}

static int
-uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
+uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
+ unsigned long *data)
{
struct bpf_uprobe *uprobe;

@@ -3338,7 +3339,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
}

static int
-uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
+uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
+ unsigned long *data)
{
struct bpf_uprobe *uprobe;

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8541fa1494ae..f7b17f08344c 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -88,9 +88,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
static int register_uprobe_event(struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu);

-static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
+static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
+ unsigned long *data);
static int uretprobe_dispatcher(struct uprobe_consumer *con,
- unsigned long func, struct pt_regs *regs);
+ unsigned long func, struct pt_regs *regs,
+ unsigned long *data);

#ifdef CONFIG_STACK_GROWSUP
static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
@@ -1500,7 +1502,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
}
}

-static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
+static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
+ unsigned long *data)
{
struct trace_uprobe *tu;
struct uprobe_dispatch_data udd;
@@ -1530,7 +1533,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
}

static int uretprobe_dispatcher(struct uprobe_consumer *con,
- unsigned long func, struct pt_regs *regs)
+ unsigned long func, struct pt_regs *regs,
+ unsigned long *data)
{
struct trace_uprobe *tu;
struct uprobe_dispatch_data udd;