Re: [PATCH 3/6] ftrace/x86: Warn and ignore graph tracing when RCU is disabled

From: Peter Zijlstra
Date: Thu Jan 26 2023 - 04:30:23 EST


On Wed, Jan 25, 2023 at 10:46:58AM -0800, Paul E. McKenney wrote:

> > Ofc. Paul might have an opinion on this glorious bodge ;-)
>
> For some definition of the word "glorious", to be sure. ;-)
>
> Am I correct that you have two things happening here? (1) Preventing
> trace recursion and (2) forcing RCU to pay attention when needed.

Mostly just (1), we're in an error situation, I'm not too worried about
(2).

> I cannot resist pointing out that you have re-invented RCU_NONIDLE(),
> though avoiding much of the overhead when not needed. ;-)

Yeah, this was the absolute minimal bodge I could come up with that
shuts up the rcu_derefence warning thing.

> I would have objections if this ever leaks out onto a non-error code path.

Agreed.

> There are things that need doing when RCU starts and stops watching,
> and this approach omits those things. Which again is OK in this case,
> where this code is only ever executed when something is already broken,
> but definitely *not* OK when things are not already broken.

And agreed.

Current version of the bodge looks like so (will repost the whole series
a little later today).

I managed to tickle the recursion so that it was a test-case for the
stack guard...

With this on, it prints just the one WARN and lives.

---
Subject: bug: Disable rcu_is_watching() during WARN/BUG
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Date: Wed Jan 25 13:57:49 CET 2023

In order to avoid WARN/BUG from generating nested or even recursive
warnings, force rcu_is_watching() true during
WARN/lockdep_rcu_suspicious().

Notably things like unwinding the stack can trigger rcu_dereference()
warnings, which then triggers more unwinding which then triggers more
warnings etc..

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/context_tracking.h | 27 +++++++++++++++++++++++++++
kernel/locking/lockdep.c | 3 +++
kernel/panic.c | 5 +++++
lib/bug.c | 15 ++++++++++++++-
4 files changed, 49 insertions(+), 1 deletion(-)

--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -130,9 +130,36 @@ static __always_inline unsigned long ct_
return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
}

+static __always_inline bool warn_rcu_enter(void)
+{
+ bool ret = false;
+
+ /*
+ * Horrible hack to shut up recursive RCU isn't watching fail since
+ * lots of the actual reporting also relies on RCU.
+ */
+ preempt_disable_notrace();
+ if (rcu_dynticks_curr_cpu_in_eqs()) {
+ ret = true;
+ ct_state_inc(RCU_DYNTICKS_IDX);
+ }
+
+ return ret;
+}
+
+static __always_inline void warn_rcu_exit(bool rcu)
+{
+ if (rcu)
+ ct_state_inc(RCU_DYNTICKS_IDX);
+ preempt_enable_notrace();
+}
+
#else
static inline void ct_idle_enter(void) { }
static inline void ct_idle_exit(void) { }
+
+static __always_inline bool warn_rcu_enter(void) { return false; }
+static __always_inline void warn_rcu_exit(bool rcu) { }
#endif /* !CONFIG_CONTEXT_TRACKING_IDLE */

#endif
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -55,6 +55,7 @@
#include <linux/rcupdate.h>
#include <linux/kprobes.h>
#include <linux/lockdep.h>
+#include <linux/context_tracking.h>

#include <asm/sections.h>

@@ -6555,6 +6556,7 @@ void lockdep_rcu_suspicious(const char *
{
struct task_struct *curr = current;
int dl = READ_ONCE(debug_locks);
+ bool rcu = warn_rcu_enter();

/* Note: the following can be executed concurrently, so be careful. */
pr_warn("\n");
@@ -6595,5 +6597,6 @@ void lockdep_rcu_suspicious(const char *
lockdep_print_held_locks(curr);
pr_warn("\nstack backtrace:\n");
dump_stack();
+ warn_rcu_exit(rcu);
}
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,6 +34,7 @@
#include <linux/ratelimit.h>
#include <linux/debugfs.h>
#include <linux/sysfs.h>
+#include <linux/context_tracking.h>
#include <trace/events/error_report.h>
#include <asm/sections.h>

@@ -679,6 +680,7 @@ void __warn(const char *file, int line,
void warn_slowpath_fmt(const char *file, int line, unsigned taint,
const char *fmt, ...)
{
+ bool rcu = warn_rcu_enter();
struct warn_args args;

pr_warn(CUT_HERE);
@@ -693,11 +695,13 @@ void warn_slowpath_fmt(const char *file,
va_start(args.args, fmt);
__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
va_end(args.args);
+ warn_rcu_exit(rcu);
}
EXPORT_SYMBOL(warn_slowpath_fmt);
#else
void __warn_printk(const char *fmt, ...)
{
+ bool rcu = warn_rcu_enter();
va_list args;

pr_warn(CUT_HERE);
@@ -705,6 +709,7 @@ void __warn_printk(const char *fmt, ...)
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
+ warn_rcu_exit(rcu);
}
EXPORT_SYMBOL(__warn_printk);
#endif
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -47,6 +47,7 @@
#include <linux/sched.h>
#include <linux/rculist.h>
#include <linux/ftrace.h>
+#include <linux/context_tracking.h>

extern struct bug_entry __start___bug_table[], __stop___bug_table[];

@@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long
return module_find_bug(bugaddr);
}

-enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
{
struct bug_entry *bug;
const char *file;
@@ -209,6 +210,18 @@ enum bug_trap_type report_bug(unsigned l
return BUG_TRAP_TYPE_BUG;
}

+enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+{
+ enum bug_trap_type ret;
+ bool rcu = false;
+
+ rcu = warn_rcu_enter();
+ ret = __report_bug(bugaddr, regs);
+ warn_rcu_exit(rcu);
+
+ return ret;
+}
+
static void clear_once_table(struct bug_entry *start, struct bug_entry *end)
{
struct bug_entry *bug;