Re: [PATCH 3/6] ftrace/x86: Warn and ignore graph tracing when RCU is disabled
From: Paul E. McKenney
Date: Sat Jan 28 2023 - 14:12:09 EST
On Thu, Jan 26, 2023 at 10:28:51AM +0100, Peter Zijlstra wrote:
> On Wed, Jan 25, 2023 at 10:46:58AM -0800, Paul E. McKenney wrote:
>
> > > Ofc. Paul might have an opinion on this glorious bodge ;-)
> >
> > For some definition of the word "glorious", to be sure. ;-)
> >
> > Am I correct that you have two things happening here? (1) Preventing
> > trace recursion and (2) forcing RCU to pay attention when needed.
>
> Mostly just (1), we're in an error situation, I'm not too worried about
> (2).
>
> > I cannot resist pointing out that you have re-invented RCU_NONIDLE(),
> > though avoiding much of the overhead when not needed. ;-)
>
> Yeah, this was the absolute minimal bodge I could come up with that
> shuts up the rcu_derefence warning thing.
>
> > I would have objections if this ever leaks out onto a non-error code path.
>
> Agreed.
>
> > There are things that need doing when RCU starts and stops watching,
> > and this approach omits those things. Which again is OK in this case,
> > where this code is only ever executed when something is already broken,
> > but definitely *not* OK when things are not already broken.
>
> And agreed.
>
> Current version of the bodge looks like so (will repost the whole series
> a little later today).
>
> I managed to tickle the recursion so that it was a test-case for the
> stack guard...
>
> With this on, it prints just the one WARN and lives.
>
> ---
> Subject: bug: Disable rcu_is_watching() during WARN/BUG
> From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Date: Wed Jan 25 13:57:49 CET 2023
>
> In order to avoid WARN/BUG from generating nested or even recursive
> warnings, force rcu_is_watching() true during
> WARN/lockdep_rcu_suspicious().
>
> Notably things like unwinding the stack can trigger rcu_dereference()
> warnings, which then triggers more unwinding which then triggers more
> warnings etc..
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
>From an RCU perspective:
Acked-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
> ---
> include/linux/context_tracking.h | 27 +++++++++++++++++++++++++++
> kernel/locking/lockdep.c | 3 +++
> kernel/panic.c | 5 +++++
> lib/bug.c | 15 ++++++++++++++-
> 4 files changed, 49 insertions(+), 1 deletion(-)
>
> --- a/include/linux/context_tracking.h
> +++ b/include/linux/context_tracking.h
> @@ -130,9 +130,36 @@ static __always_inline unsigned long ct_
> return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
> }
>
> +static __always_inline bool warn_rcu_enter(void)
> +{
> + bool ret = false;
> +
> + /*
> + * Horrible hack to shut up recursive RCU isn't watching fail since
> + * lots of the actual reporting also relies on RCU.
> + */
> + preempt_disable_notrace();
> + if (rcu_dynticks_curr_cpu_in_eqs()) {
> + ret = true;
> + ct_state_inc(RCU_DYNTICKS_IDX);
> + }
> +
> + return ret;
> +}
> +
> +static __always_inline void warn_rcu_exit(bool rcu)
> +{
> + if (rcu)
> + ct_state_inc(RCU_DYNTICKS_IDX);
> + preempt_enable_notrace();
> +}
> +
> #else
> static inline void ct_idle_enter(void) { }
> static inline void ct_idle_exit(void) { }
> +
> +static __always_inline bool warn_rcu_enter(void) { return false; }
> +static __always_inline void warn_rcu_exit(bool rcu) { }
> #endif /* !CONFIG_CONTEXT_TRACKING_IDLE */
>
> #endif
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -55,6 +55,7 @@
> #include <linux/rcupdate.h>
> #include <linux/kprobes.h>
> #include <linux/lockdep.h>
> +#include <linux/context_tracking.h>
>
> #include <asm/sections.h>
>
> @@ -6555,6 +6556,7 @@ void lockdep_rcu_suspicious(const char *
> {
> struct task_struct *curr = current;
> int dl = READ_ONCE(debug_locks);
> + bool rcu = warn_rcu_enter();
>
> /* Note: the following can be executed concurrently, so be careful. */
> pr_warn("\n");
> @@ -6595,5 +6597,6 @@ void lockdep_rcu_suspicious(const char *
> lockdep_print_held_locks(curr);
> pr_warn("\nstack backtrace:\n");
> dump_stack();
> + warn_rcu_exit(rcu);
> }
> EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -34,6 +34,7 @@
> #include <linux/ratelimit.h>
> #include <linux/debugfs.h>
> #include <linux/sysfs.h>
> +#include <linux/context_tracking.h>
> #include <trace/events/error_report.h>
> #include <asm/sections.h>
>
> @@ -679,6 +680,7 @@ void __warn(const char *file, int line,
> void warn_slowpath_fmt(const char *file, int line, unsigned taint,
> const char *fmt, ...)
> {
> + bool rcu = warn_rcu_enter();
> struct warn_args args;
>
> pr_warn(CUT_HERE);
> @@ -693,11 +695,13 @@ void warn_slowpath_fmt(const char *file,
> va_start(args.args, fmt);
> __warn(file, line, __builtin_return_address(0), taint, NULL, &args);
> va_end(args.args);
> + warn_rcu_exit(rcu);
> }
> EXPORT_SYMBOL(warn_slowpath_fmt);
> #else
> void __warn_printk(const char *fmt, ...)
> {
> + bool rcu = warn_rcu_enter();
> va_list args;
>
> pr_warn(CUT_HERE);
> @@ -705,6 +709,7 @@ void __warn_printk(const char *fmt, ...)
> va_start(args, fmt);
> vprintk(fmt, args);
> va_end(args);
> + warn_rcu_exit(rcu);
> }
> EXPORT_SYMBOL(__warn_printk);
> #endif
> --- a/lib/bug.c
> +++ b/lib/bug.c
> @@ -47,6 +47,7 @@
> #include <linux/sched.h>
> #include <linux/rculist.h>
> #include <linux/ftrace.h>
> +#include <linux/context_tracking.h>
>
> extern struct bug_entry __start___bug_table[], __stop___bug_table[];
>
> @@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long
> return module_find_bug(bugaddr);
> }
>
> -enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
> {
> struct bug_entry *bug;
> const char *file;
> @@ -209,6 +210,18 @@ enum bug_trap_type report_bug(unsigned l
> return BUG_TRAP_TYPE_BUG;
> }
>
> +enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +{
> + enum bug_trap_type ret;
> + bool rcu = false;
> +
> + rcu = warn_rcu_enter();
> + ret = __report_bug(bugaddr, regs);
> + warn_rcu_exit(rcu);
> +
> + return ret;
> +}
> +
> static void clear_once_table(struct bug_entry *start, struct bug_entry *end)
> {
> struct bug_entry *bug;