Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence

From: Paul E. McKenney
Date: Wed Dec 18 2024 - 14:15:13 EST


On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
>
> +linux-toolchains
>
> On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
>
> > > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> > > I'd have to check what the compiler makes of that.
> > >
> > > /me mucks about with godbolt for a bit...
> > >
> > > GCC doesn't optimize that, but Clang does.
> > >
> > > I would still very much refrain from making this change until both
> > > compilers can generate sane code for it.
> >
> > Is GCC on track to do this, or do we need to encourage them?
>
> I have no clue; probably wise to offer encouragement.

Hopefully your +linux-toolchain is a start.

> > Just to make sure I understand, your proposal is to create an INC_ONCE()
> > or similar, and add it once compiler support is there? Seems reasonable
> > to me, just checking.
>
> I suppose we can work in parallel, add INC_ONCE() now, but not have a
> proper definition for GCC.

This passes an rcutorture smoke test, so feel free to add:

Reviewed-by: Paul E. McKenney <paulmck@xxxxxxxxxx>

> ---
> arch/riscv/kvm/vmid.c | 2 +-
> arch/s390/kernel/idle.c | 2 +-
> drivers/md/dm-vdo/indexer/index-session.c | 2 +-
> fs/xfs/libxfs/xfs_iext_tree.c | 2 +-
> include/linux/compiler-gcc.h | 5 +++++
> include/linux/compiler.h | 4 ++++
> include/linux/rcupdate_trace.h | 2 +-
> include/linux/srcutiny.h | 2 +-
> io_uring/io_uring.c | 2 +-
> kernel/rcu/srcutree.c | 4 ++--
> kernel/rcu/tree_plugin.h | 2 +-
> kernel/sched/fair.c | 2 +-
> mm/kfence/kfence_test.c | 2 +-
> security/apparmor/apparmorfs.c | 2 +-
> 14 files changed, 22 insertions(+), 13 deletions(-)
>
> diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
> index ddc98714ce8e..805a5acf669c 100644
> --- a/arch/riscv/kvm/vmid.c
> +++ b/arch/riscv/kvm/vmid.c
> @@ -90,7 +90,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
>
> /* First user of a new VMID version? */
> if (unlikely(vmid_next == 0)) {
> - WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
> + INC_ONCE(vmid_version);
> vmid_next = 1;
>
> /*
> diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> index 39cb8d0ae348..8fb7cd75fe62 100644
> --- a/arch/s390/kernel/idle.c
> +++ b/arch/s390/kernel/idle.c
> @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
>
> /* Account time spent with enabled wait psw loaded as idle time. */
> WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> - WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> + INC_ONC(idle->idle_count);
> account_idle_time(cputime_to_nsecs(idle_time));
> }
>
> diff --git a/drivers/md/dm-vdo/indexer/index-session.c b/drivers/md/dm-vdo/indexer/index-session.c
> index aee0914d604a..c5a7dee9dc66 100644
> --- a/drivers/md/dm-vdo/indexer/index-session.c
> +++ b/drivers/md/dm-vdo/indexer/index-session.c
> @@ -152,7 +152,7 @@ static void enter_callback_stage(struct uds_request *request)
>
> static inline void count_once(u64 *count_ptr)
> {
> - WRITE_ONCE(*count_ptr, READ_ONCE(*count_ptr) + 1);
> + INC_ONCE(*count_ptr);
> }
>
> static void update_session_stats(struct uds_request *request)
> diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
> index 8796f2b3e534..a1fcd4cf2424 100644
> --- a/fs/xfs/libxfs/xfs_iext_tree.c
> +++ b/fs/xfs/libxfs/xfs_iext_tree.c
> @@ -626,7 +626,7 @@ xfs_iext_realloc_root(
> */
> static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
> {
> - WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
> + INC_ONCE(ifp->if_seq);
> }
>
> void
> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index c9b58188ec61..77c253e29758 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -137,3 +137,8 @@
> #if GCC_VERSION < 90100
> #undef __alloc_size__
> #endif
> +
> +/*
> + * GCC can't properly optimize the real one with volatile on.
> + */
> +#define INC_ONCE(v) (v)++
> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index efd43df3a99a..b1b13dac1b9e 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -8,6 +8,10 @@
>
> #ifdef __KERNEL__
>
> +#ifndef INC_ONCE
> +#define INC_ONCE(v) (*(volatile typeof(v) *)&(v))++
> +#endif
> +
> /*
> * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
> * to disable branch tracing on a per file basis.
> diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
> index e6c44eb428ab..adb12e7304da 100644
> --- a/include/linux/rcupdate_trace.h
> +++ b/include/linux/rcupdate_trace.h
> @@ -50,7 +50,7 @@ static inline void rcu_read_lock_trace(void)
> {
> struct task_struct *t = current;
>
> - WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
> + INC_ONCE(t->trc_reader_nesting);
> barrier();
> if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
> t->trc_reader_special.b.need_mb)
> diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
> index 1321da803274..bd4362323733 100644
> --- a/include/linux/srcutiny.h
> +++ b/include/linux/srcutiny.h
> @@ -66,7 +66,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
>
> preempt_disable(); // Needed for PREEMPT_AUTO
> idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
> - WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
> + INC_ONCE(ssp->srcu_lock_nesting[idx]);
> preempt_enable();
> return idx;
> }
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 06ff41484e29..ef3d4871e775 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -373,7 +373,7 @@ static void io_account_cq_overflow(struct io_ring_ctx *ctx)
> {
> struct io_rings *r = ctx->rings;
>
> - WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
> + INC_ONCE(r->cq_overflow);
> ctx->cq_extra--;
> }
>
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 5e2e53464794..a812af81dbff 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -656,7 +656,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
> jbase += j - gpstart;
> if (!jbase) {
> ASSERT_EXCLUSIVE_WRITER(sup->srcu_n_exp_nodelay);
> - WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1);
> + INC_ONCE(sup->srcu_n_exp_nodelay);
> if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
> jbase = 1;
> }
> @@ -1856,7 +1856,7 @@ static void process_srcu(struct work_struct *work)
> j = jiffies;
> if (READ_ONCE(sup->reschedule_jiffies) == j) {
> ASSERT_EXCLUSIVE_WRITER(sup->reschedule_count);
> - WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1);
> + INC_ONCE(sup->reschedule_count);
> if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
> curdelay = 1;
> } else {
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 3927ea5f7955..51f525089c27 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -387,7 +387,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
>
> static void rcu_preempt_read_enter(void)
> {
> - WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
> + INC_ONCE(current->rcu_read_lock_nesting);
> }
>
> static int rcu_preempt_read_exit(void)
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index f5329672815b..f0927407abbe 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3242,7 +3242,7 @@ static void reset_ptenuma_scan(struct task_struct *p)
> * statistical sampling. Use READ_ONCE/WRITE_ONCE, which are not
> * expensive, to avoid any form of compiler optimizations:
> */
> - WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
> + INC_ONCE(p->mm->numa_scan_seq);
> p->mm->numa_scan_offset = 0;
> }
>
> diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
> index f65fb182466d..e0bf31b1875e 100644
> --- a/mm/kfence/kfence_test.c
> +++ b/mm/kfence/kfence_test.c
> @@ -517,7 +517,7 @@ static void test_kmalloc_aligned_oob_write(struct kunit *test)
> * fault immediately after it.
> */
> expect.addr = buf + size;
> - WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
> + INC_ONCE(*expect.addr);
> KUNIT_EXPECT_FALSE(test, report_available());
> test_free(buf);
> KUNIT_EXPECT_TRUE(test, report_matches(&expect));
> diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
> index 2c0185ebc900..63f5c8387764 100644
> --- a/security/apparmor/apparmorfs.c
> +++ b/security/apparmor/apparmorfs.c
> @@ -596,7 +596,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt)
>
> void __aa_bump_ns_revision(struct aa_ns *ns)
> {
> - WRITE_ONCE(ns->revision, READ_ONCE(ns->revision) + 1);
> + INC_ONCE(ns->revision);
> wake_up_interruptible(&ns->wait);
> }
>