Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence

From: Peter Zijlstra
Date: Wed Dec 18 2024 - 12:12:58 EST



+linux-toolchains

On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:

> > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> > I'd have to check what the compiler makes of that.
> >
> > /me mucks about with godbolt for a bit...
> >
> > GCC doesn't optimize that, but Clang does.
> >
> > I would still very much refrain from making this change until both
> > compilers can generate sane code for it.
>
> Is GCC on track to do this, or do we need to encourage them?

I have no clue; probably wise to offer encouragement.

> Just to make sure I understand, your proposal is to create an INC_ONCE()
> or similar, and add it once compiler support is there? Seems reasonable
> to me, just checking.

I suppose we can work in parallel, add INC_ONCE() now, but not have a
proper definition for GCC.

---
arch/riscv/kvm/vmid.c | 2 +-
arch/s390/kernel/idle.c | 2 +-
drivers/md/dm-vdo/indexer/index-session.c | 2 +-
fs/xfs/libxfs/xfs_iext_tree.c | 2 +-
include/linux/compiler-gcc.h | 5 +++++
include/linux/compiler.h | 4 ++++
include/linux/rcupdate_trace.h | 2 +-
include/linux/srcutiny.h | 2 +-
io_uring/io_uring.c | 2 +-
kernel/rcu/srcutree.c | 4 ++--
kernel/rcu/tree_plugin.h | 2 +-
kernel/sched/fair.c | 2 +-
mm/kfence/kfence_test.c | 2 +-
security/apparmor/apparmorfs.c | 2 +-
14 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index ddc98714ce8e..805a5acf669c 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -90,7 +90,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)

/* First user of a new VMID version? */
if (unlikely(vmid_next == 0)) {
- WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
+ INC_ONCE(vmid_version);
vmid_next = 1;

/*
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 39cb8d0ae348..8fb7cd75fe62 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -45,7 +45,7 @@ void account_idle_time_irq(void)

/* Account time spent with enabled wait psw loaded as idle time. */
WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
- WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+ INC_ONC(idle->idle_count);
account_idle_time(cputime_to_nsecs(idle_time));
}

diff --git a/drivers/md/dm-vdo/indexer/index-session.c b/drivers/md/dm-vdo/indexer/index-session.c
index aee0914d604a..c5a7dee9dc66 100644
--- a/drivers/md/dm-vdo/indexer/index-session.c
+++ b/drivers/md/dm-vdo/indexer/index-session.c
@@ -152,7 +152,7 @@ static void enter_callback_stage(struct uds_request *request)

static inline void count_once(u64 *count_ptr)
{
- WRITE_ONCE(*count_ptr, READ_ONCE(*count_ptr) + 1);
+ INC_ONCE(*count_ptr);
}

static void update_session_stats(struct uds_request *request)
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 8796f2b3e534..a1fcd4cf2424 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -626,7 +626,7 @@ xfs_iext_realloc_root(
*/
static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
{
- WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
+ INC_ONCE(ifp->if_seq);
}

void
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index c9b58188ec61..77c253e29758 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -137,3 +137,8 @@
#if GCC_VERSION < 90100
#undef __alloc_size__
#endif
+
+/*
+ * GCC can't properly optimize the real one with volatile on.
+ */
+#define INC_ONCE(v) (v)++
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index efd43df3a99a..b1b13dac1b9e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -8,6 +8,10 @@

#ifdef __KERNEL__

+#ifndef INC_ONCE
+#define INC_ONCE(v) (*(volatile typeof(v) *)&(v))++
+#endif
+
/*
* Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
* to disable branch tracing on a per file basis.
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index e6c44eb428ab..adb12e7304da 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -50,7 +50,7 @@ static inline void rcu_read_lock_trace(void)
{
struct task_struct *t = current;

- WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
+ INC_ONCE(t->trc_reader_nesting);
barrier();
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
t->trc_reader_special.b.need_mb)
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 1321da803274..bd4362323733 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -66,7 +66,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)

preempt_disable(); // Needed for PREEMPT_AUTO
idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
- WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
+ INC_ONCE(ssp->srcu_lock_nesting[idx]);
preempt_enable();
return idx;
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 06ff41484e29..ef3d4871e775 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -373,7 +373,7 @@ static void io_account_cq_overflow(struct io_ring_ctx *ctx)
{
struct io_rings *r = ctx->rings;

- WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
+ INC_ONCE(r->cq_overflow);
ctx->cq_extra--;
}

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 5e2e53464794..a812af81dbff 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -656,7 +656,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
jbase += j - gpstart;
if (!jbase) {
ASSERT_EXCLUSIVE_WRITER(sup->srcu_n_exp_nodelay);
- WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1);
+ INC_ONCE(sup->srcu_n_exp_nodelay);
if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
jbase = 1;
}
@@ -1856,7 +1856,7 @@ static void process_srcu(struct work_struct *work)
j = jiffies;
if (READ_ONCE(sup->reschedule_jiffies) == j) {
ASSERT_EXCLUSIVE_WRITER(sup->reschedule_count);
- WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1);
+ INC_ONCE(sup->reschedule_count);
if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
curdelay = 1;
} else {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3927ea5f7955..51f525089c27 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -387,7 +387,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)

static void rcu_preempt_read_enter(void)
{
- WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
+ INC_ONCE(current->rcu_read_lock_nesting);
}

static int rcu_preempt_read_exit(void)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f5329672815b..f0927407abbe 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3242,7 +3242,7 @@ static void reset_ptenuma_scan(struct task_struct *p)
* statistical sampling. Use READ_ONCE/WRITE_ONCE, which are not
* expensive, to avoid any form of compiler optimizations:
*/
- WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
+ INC_ONCE(p->mm->numa_scan_seq);
p->mm->numa_scan_offset = 0;
}

diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index f65fb182466d..e0bf31b1875e 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -517,7 +517,7 @@ static void test_kmalloc_aligned_oob_write(struct kunit *test)
* fault immediately after it.
*/
expect.addr = buf + size;
- WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
+ INC_ONCE(*expect.addr);
KUNIT_EXPECT_FALSE(test, report_available());
test_free(buf);
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 2c0185ebc900..63f5c8387764 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -596,7 +596,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt)

void __aa_bump_ns_revision(struct aa_ns *ns)
{
- WRITE_ONCE(ns->revision, READ_ONCE(ns->revision) + 1);
+ INC_ONCE(ns->revision);
wake_up_interruptible(&ns->wait);
}