Re: [PATCH v2 11/11] perf/uprobe: Add uretprobe timer

From: Andrii Nakryiko
Date: Fri Jul 12 2024 - 17:44:16 EST


+ bpf

On Thu, Jul 11, 2024 at 4:07 AM Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>
> In order to put a bound on the uretprobe_srcu critical section, add a
> timer to uprobe_task. Upon every RI added or removed the timer is
> pushed forward to now + 1s. If the timer were ever to fire, it would
> convert the SRCU 'reference' to a refcount reference if possible.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> ---
> include/linux/uprobes.h | 8 +++++
> kernel/events/uprobes.c | 67 ++++++++++++++++++++++++++++++++++++++++++++----
> 2 files changed, 69 insertions(+), 6 deletions(-)
>
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -15,6 +15,7 @@
> #include <linux/rbtree.h>
> #include <linux/types.h>
> #include <linux/wait.h>
> +#include <linux/timer.h>
>
> struct vm_area_struct;
> struct mm_struct;
> @@ -79,6 +80,10 @@ struct uprobe_task {
> struct return_instance *return_instances;
> unsigned int depth;
> unsigned int active_srcu_idx;
> +
> + struct timer_list ri_timer;
> + struct callback_head ri_task_work;
> + struct task_struct *task;
> };
>
> struct return_instance {
> @@ -86,7 +91,8 @@ struct return_instance {
> unsigned long func;
> unsigned long stack; /* stack pointer */
> unsigned long orig_ret_vaddr; /* original return address */
> - bool chained; /* true, if instance is nested */
> + u8 chained; /* true, if instance is nested */
> + u8 has_ref;

Why bool -> u8 switch? You don't touch chained, so why change its
type? And for has_ref you interchangeably use 0 and true for the same
field. Let's stick to bool as there is nothing wrong with it?

> int srcu_idx;
>
> struct return_instance *next; /* keep as stack */

[...]

> @@ -1822,13 +1864,20 @@ static int dup_utask(struct task_struct
> return -ENOMEM;
>
> *n = *o;
> - __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx);
> + if (n->uprobe) {
> + if (n->has_ref)
> + get_uprobe(n->uprobe);
> + else
> + __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx);
> + }
> n->next = NULL;
>
> *p = n;
> p = &n->next;
> n_utask->depth++;
> }
> + if (n_utask->return_instances)
> + mod_timer(&n_utask->ri_timer, jiffies + HZ);

let's add #define for HZ, so it's adjusted in just one place (instead
of 3 as it is right now)

Also, we can have up to 64 levels of uretprobe nesting, so,
technically, the user can cause a delay of 64 seconds in total. Maybe
let's use something smaller than a full second? After all, if the
user-space function has high latency, then this refcount congestion is
much less of a problem. I'd set it to something like 50-100 ms for
starters.

>
> return 0;
> }
> @@ -1967,6 +2016,7 @@ static void prepare_uretprobe(struct upr
>
> ri->srcu_idx = __srcu_read_lock(&uretprobes_srcu);
> ri->uprobe = uprobe;
> + ri->has_ref = 0;
> ri->func = instruction_pointer(regs);
> ri->stack = user_stack_pointer(regs);
> ri->orig_ret_vaddr = orig_ret_vaddr;
> @@ -1976,6 +2026,8 @@ static void prepare_uretprobe(struct upr
> ri->next = utask->return_instances;
> utask->return_instances = ri;
>
> + mod_timer(&utask->ri_timer, jiffies + HZ);
> +
> return;
>
> err_mem:
> @@ -2204,6 +2256,9 @@ handle_uretprobe_chain(struct return_ins
> struct uprobe *uprobe = ri->uprobe;
> struct uprobe_consumer *uc;
>
> + if (!uprobe)
> + return;
> +
> guard(srcu)(&uprobes_srcu);
>
> for_each_consumer_rcu(uc, uprobe->consumers) {
> @@ -2250,8 +2305,10 @@ static void handle_trampoline(struct pt_
>
> instruction_pointer_set(regs, ri->orig_ret_vaddr);
> do {
> - if (valid)
> + if (valid) {
> handle_uretprobe_chain(ri, regs);
> + mod_timer(&utask->ri_timer, jiffies + HZ);
> + }
> ri = free_ret_instance(ri);
> utask->depth--;
> } while (ri != next);
>
>