Re: [PATCH V2 10/11] x86/rcu: Add rcu_preempt_count

From: Joel Fernandes
Date: Tue Apr 23 2024 - 14:09:26 EST


On Sun, Apr 7, 2024 at 5:06 AM Lai Jiangshan <jiangshanlai@xxxxxxxxx> wrote:
>
> From: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
>
> Implement PCPU_RCU_PREEMPT_COUNT for x86.
> Mainly copied from asm/preempt.h
>
> Make rcu_read_[un]lock() inlined for rcu-preempt.

Changelog is wrong. You inlined rcu_read_[un]lock in previous patch,
not this one?

- Joel

> Make rcu_read_lock() only one instruction.
> Make rcu_read_unlock() only two instructions in the fast path.
>
> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Frederic Weisbecker <frederic@xxxxxxxxxx>
> Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
> ---
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/current.h | 3 +
> arch/x86/include/asm/rcu_preempt.h | 107 +++++++++++++++++++++++++++++
> arch/x86/kernel/cpu/common.c | 4 ++
> 4 files changed, 115 insertions(+)
> create mode 100644 arch/x86/include/asm/rcu_preempt.h
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 4fff6ed46e90..e805cac3763d 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -256,6 +256,7 @@ config X86
> select HAVE_OBJTOOL if X86_64
> select HAVE_OPTPROBES
> select HAVE_PAGE_SIZE_4KB
> + select HAVE_PCPU_RCU_PREEMPT_COUNT
> select HAVE_PCSPKR_PLATFORM
> select HAVE_PERF_EVENTS
> select HAVE_PERF_EVENTS_NMI
> diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
> index bf5953883ec3..dcc2ef784120 100644
> --- a/arch/x86/include/asm/current.h
> +++ b/arch/x86/include/asm/current.h
> @@ -24,6 +24,9 @@ struct pcpu_hot {
> unsigned long top_of_stack;
> void *hardirq_stack_ptr;
> u16 softirq_pending;
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> + int rcu_preempt_count;
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> #ifdef CONFIG_X86_64
> bool hardirq_stack_inuse;
> #else
> diff --git a/arch/x86/include/asm/rcu_preempt.h b/arch/x86/include/asm/rcu_preempt.h
> new file mode 100644
> index 000000000000..cb25ebe038a5
> --- /dev/null
> +++ b/arch/x86/include/asm/rcu_preempt.h
> @@ -0,0 +1,107 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_RCU_PREEMPT_H
> +#define __ASM_RCU_PREEMPT_H
> +
> +#include <asm/rmwcc.h>
> +#include <asm/percpu.h>
> +#include <asm/current.h>
> +
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +
> +/* We use the MSB mostly because its available */
> +#define RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED 0x80000000
> +
> +/*
> + * We use the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit as an inverted
> + * current->rcu_read_unlock_special.s such that a decrement hitting 0
> + * means we can and should call rcu_read_unlock_special().
> + */
> +#define RCU_PREEMPT_INIT (0 + RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED)
> +
> +/*
> + * We mask the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit so as not to
> + * confuse all current users that think a non-zero value indicates we
> + * are in a critical section.
> + */
> +static inline int pcpu_rcu_preempt_count(void)
> +{
> + return raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED;
> +}
> +
> +static inline void pcpu_rcu_preempt_count_set(int count)
> +{
> + int old, new;
> +
> + old = raw_cpu_read_4(pcpu_hot.rcu_preempt_count);
> + do {
> + new = (old & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED) |
> + (count & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> + } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.rcu_preempt_count, &old, new));
> +}
> +
> +/*
> + * We fold the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit into the RCU
> + * preempt count such that rcu_read_unlock() can decrement and test for
> + * the need of unlock-special handling with a single instruction.
> + *
> + * We invert the actual bit, so that when the decrement hits 0 we know
> + * we both reach a quiescent state (no rcu preempt count) and need to
> + * handle unlock-special (the bit is cleared), normally to report the
> + * quiescent state immediately.
> + */
> +
> +static inline void pcpu_rcu_preempt_special_set(void)
> +{
> + raw_cpu_and_4(pcpu_hot.rcu_preempt_count, ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +}
> +
> +static inline void pcpu_rcu_preempt_special_clear(void)
> +{
> + raw_cpu_or_4(pcpu_hot.rcu_preempt_count, RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +}
> +
> +static inline bool pcpu_rcu_preempt_special_test(void)
> +{
> + return !(raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +}
> +
> +static inline void pcpu_rcu_preempt_switch(int count, bool special)
> +{
> + if (likely(!special))
> + raw_cpu_write(pcpu_hot.rcu_preempt_count, count | RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> + else
> + raw_cpu_write(pcpu_hot.rcu_preempt_count, count);
> +}
> +
> +/*
> + * The various rcu_preempt_count add/sub methods
> + */
> +
> +static __always_inline void pcpu_rcu_preempt_count_add(int val)
> +{
> + raw_cpu_add_4(pcpu_hot.rcu_preempt_count, val);
> +}
> +
> +static __always_inline void pcpu_rcu_preempt_count_sub(int val)
> +{
> + raw_cpu_add_4(pcpu_hot.rcu_preempt_count, -val);
> +}
> +
> +/*
> + * Because we keep RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED set when we do
> + * _not_ need to handle unlock-special for a fast-path decrement.
> + */
> +static __always_inline bool pcpu_rcu_preempt_count_dec_and_test(void)
> +{
> + return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.rcu_preempt_count), e,
> + __percpu_arg([var]));
> +}
> +
> +#define pcpu_rcu_read_unlock_special() \
> +do { \
> + rcu_read_unlock_special(); \
> +} while (0)
> +
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +
> +#endif /* __ASM_RCU_PREEMPT_H */
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 5c1e6d6be267..918b1f5cb75d 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1995,6 +1995,10 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
> .current_task = &init_task,
> .preempt_count = INIT_PREEMPT_COUNT,
> .top_of_stack = TOP_OF_INIT_STACK,
> +
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> + .rcu_preempt_count = RCU_PREEMPT_INIT,
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> };
> EXPORT_PER_CPU_SYMBOL(pcpu_hot);
> EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
> --
> 2.19.1.6.gb485710b
>
>