[RFC PATCH 05/10] rcu: Enable RCU callbacks to benefit from expedited grace periods
From: Puranjay Mohan
Date: Fri Apr 17 2026 - 19:16:42 EST
Currently, RCU callbacks only track normal grace period sequence
numbers. This means callbacks must wait for normal grace periods to
complete even when expedited grace periods have already elapsed.
This commit uses the full rcu_gp_oldstate structure (which tracks both
normal and expedited GP sequences) throughout the callback
infrastructure.
The rcu_segcblist_advance() function now checks both normal and
expedited GP completion via poll_state_synchronize_rcu_full(), becoming
parameterless since it reads the GP state internally.
rcu_segcblist_accelerate() stores the full GP state (both normal and
expedited sequences) instead of just the normal sequence.
The rcu_accelerate_cbs() and rcu_accelerate_cbs_unlocked() functions use
get_state_synchronize_rcu_full() to capture both GP sequences. The NOCB
code uses poll_state_synchronize_rcu_full() for advance checks instead
of comparing only the normal GP sequence.
srcu_segcblist_advance() become standalone implementations because
compares SRCU sequences directly (it cannot use
poll_state_synchronize_rcu_full(), which reads RCU-specific globals).
srcu_segcblist_accelerate() sets rgos_exp to RCU_GET_STATE_NOT_TRACKED
so poll_state_synchronize_rcu_full() only compares the rgosp->rgos_norm
and ignores rgos_exp.
Reviewed-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
Signed-off-by: Puranjay Mohan <puranjay@xxxxxxxxxx>
---
kernel/rcu/rcu_segcblist.c | 30 ++++++++++++++++++++++++------
kernel/rcu/rcu_segcblist.h | 2 +-
kernel/rcu/tree.c | 9 +++------
kernel/rcu/tree_nocb.h | 33 +++++++++++++++++++++++----------
4 files changed, 51 insertions(+), 23 deletions(-)
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 00e164db8b74..11174e2be3c2 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
+#include "rcu.h"
#include "rcu_segcblist.h"
/* Initialize simple callback list. */
@@ -494,9 +495,9 @@ static void rcu_segcblist_advance_compact(struct rcu_segcblist *rsclp, int i)
/*
* Advance the callbacks in the specified rcu_segcblist structure based
- * on the current value passed in for the grace-period counter.
+ * on the current value of the grace-period counter.
*/
-void rcu_segcblist_advance(struct rcu_segcblist *rsclp, struct rcu_gp_oldstate *rgosp)
+void rcu_segcblist_advance(struct rcu_segcblist *rsclp)
{
int i;
@@ -509,7 +510,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, struct rcu_gp_oldstate *
* are ready to invoke, and put them into the RCU_DONE_TAIL segment.
*/
for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
- if (ULONG_CMP_LT(rgosp->rgos_norm, rsclp->gp_seq_full[i].rgos_norm))
+ if (!poll_state_synchronize_rcu_full(&rsclp->gp_seq_full[i]))
break;
WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]);
rcu_segcblist_move_seglen(rsclp, i, RCU_DONE_TAIL);
@@ -595,7 +596,7 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, struct rcu_gp_oldstat
*/
for (; i < RCU_NEXT_TAIL; i++) {
WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]);
- rsclp->gp_seq_full[i].rgos_norm = rgosp->rgos_norm;
+ rsclp->gp_seq_full[i] = *rgosp;
}
return true;
}
@@ -637,14 +638,31 @@ void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
void srcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
{
- struct rcu_gp_oldstate rgos = { .rgos_norm = seq };
+ int i;
- rcu_segcblist_advance(rsclp, &rgos);
+ WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+ if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+ return;
+
+ for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+ if (ULONG_CMP_LT(seq, rsclp->gp_seq_full[i].rgos_norm))
+ break;
+ WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]);
+ rcu_segcblist_move_seglen(rsclp, i, RCU_DONE_TAIL);
+ }
+
+ /* If no callbacks moved, nothing more need be done. */
+ if (i == RCU_WAIT_TAIL)
+ return;
+
+ rcu_segcblist_advance_compact(rsclp, i);
}
bool srcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
{
struct rcu_gp_oldstate rgos = { .rgos_norm = seq };
+ if (IS_ENABLED(CONFIG_SMP))
+ rgos.rgos_exp = RCU_GET_STATE_NOT_TRACKED;
return rcu_segcblist_accelerate(rsclp, &rgos);
}
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 2c06ab830a3d..6e05fdf93e7b 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -139,7 +139,7 @@ void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp);
void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp);
-void rcu_segcblist_advance(struct rcu_segcblist *rsclp, struct rcu_gp_oldstate *rgosp);
+void rcu_segcblist_advance(struct rcu_segcblist *rsclp);
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, struct rcu_gp_oldstate *rgosp);
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
struct rcu_segcblist *src_rsclp);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 607fc5715cd1..35076092f754 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1164,7 +1164,7 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
* accelerating callback invocation to an earlier grace-period
* number.
*/
- rgos.rgos_norm = rcu_seq_snap(&rcu_state.gp_seq);
+ get_state_synchronize_rcu_full(&rgos);
if (rcu_segcblist_accelerate(&rdp->cblist, &rgos))
ret = rcu_start_this_gp(rnp, rdp, rgos.rgos_norm);
@@ -1193,7 +1193,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
bool needwake;
rcu_lockdep_assert_cblist_protected(rdp);
- rgos.rgos_norm = rcu_seq_snap(&rcu_state.gp_seq);
+ get_state_synchronize_rcu_full(&rgos);
if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, rgos.rgos_norm)) {
/* Old request still live, so mark recent callbacks. */
(void)rcu_segcblist_accelerate(&rdp->cblist, &rgos);
@@ -1218,8 +1218,6 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
*/
static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
{
- struct rcu_gp_oldstate rgos;
-
rcu_lockdep_assert_cblist_protected(rdp);
raw_lockdep_assert_held_rcu_node(rnp);
@@ -1231,8 +1229,7 @@ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
* Find all callbacks whose ->gp_seq numbers indicate that they
* are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
*/
- rgos.rgos_norm = rnp->gp_seq;
- rcu_segcblist_advance(&rdp->cblist, &rgos);
+ rcu_segcblist_advance(&rdp->cblist);
/* Classify any remaining callbacks. */
return rcu_accelerate_cbs(rnp, rdp);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 1837eedfb8c2..7462cd5e2507 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -502,7 +502,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
}
if (j != rdp->nocb_gp_adv_time &&
rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq_full) &&
- rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq_full.rgos_norm)) {
+ poll_state_synchronize_rcu_full(&cur_gp_seq_full)) {
rcu_advance_cbs_nowake(rdp->mynode, rdp);
rdp->nocb_gp_adv_time = j;
}
@@ -731,7 +731,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (!rcu_segcblist_restempty(&rdp->cblist,
RCU_NEXT_READY_TAIL) ||
(rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq_full) &&
- rcu_seq_done(&rnp->gp_seq, cur_gp_seq_full.rgos_norm))) {
+ poll_state_synchronize_rcu_full(&cur_gp_seq_full))) {
raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
needwake_gp = rcu_advance_cbs(rnp, rdp);
wasempty = rcu_segcblist_restempty(&rdp->cblist,
@@ -742,7 +742,18 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
WARN_ON_ONCE(wasempty &&
!rcu_segcblist_restempty(&rdp->cblist,
RCU_NEXT_READY_TAIL));
- if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq_full)) {
+ /*
+ * Only request a GP wait if the next pending callback's
+ * GP has not already completed (normal or expedited).
+ * If poll_state_synchronize_rcu_full() says it completed,
+ * then rcu_advance_cbs() above already moved those
+ * callbacks to RCU_DONE_TAIL, so there is no GP to wait
+ * for. Any remaining callbacks got new (future) GP
+ * numbers from rcu_accelerate_cbs() inside
+ * rcu_advance_cbs() and will be handled on the next pass.
+ */
+ if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq_full) &&
+ !poll_state_synchronize_rcu_full(&cur_gp_seq_full)) {
if (!needwait_gp ||
ULONG_CMP_LT(cur_gp_seq_full.rgos_norm, wait_gp_seq))
wait_gp_seq = cur_gp_seq_full.rgos_norm;
@@ -919,7 +930,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
lockdep_assert_irqs_enabled();
rcu_nocb_lock_irqsave(rdp, flags);
if (rcu_segcblist_nextgp(cblist, &cur_gp_seq_full) &&
- rcu_seq_done(&rnp->gp_seq, cur_gp_seq_full.rgos_norm) &&
+ poll_state_synchronize_rcu_full(&cur_gp_seq_full) &&
raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */
needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
@@ -1548,8 +1559,8 @@ static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
static void show_rcu_nocb_state(struct rcu_data *rdp)
{
char bufd[22];
- char bufw[45];
- char bufr[45];
+ char bufw[64];
+ char bufr[64];
char bufn[22];
char bufb[22];
struct rcu_data *nocb_next_rdp;
@@ -1569,10 +1580,12 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
nocb_entry_rdp);
sprintf(bufd, "%ld", rsclp->seglen[RCU_DONE_TAIL]);
- sprintf(bufw, "%ld(%ld)", rsclp->seglen[RCU_WAIT_TAIL],
- rsclp->gp_seq_full[RCU_WAIT_TAIL].rgos_norm);
- sprintf(bufr, "%ld(%ld)", rsclp->seglen[RCU_NEXT_READY_TAIL],
- rsclp->gp_seq_full[RCU_NEXT_READY_TAIL].rgos_norm);
+ sprintf(bufw, "%ld(%ld/%ld)", rsclp->seglen[RCU_WAIT_TAIL],
+ rsclp->gp_seq_full[RCU_WAIT_TAIL].rgos_norm,
+ rsclp->gp_seq_full[RCU_WAIT_TAIL].rgos_exp);
+ sprintf(bufr, "%ld(%ld/%ld)", rsclp->seglen[RCU_NEXT_READY_TAIL],
+ rsclp->gp_seq_full[RCU_NEXT_READY_TAIL].rgos_norm,
+ rsclp->gp_seq_full[RCU_NEXT_READY_TAIL].rgos_exp);
sprintf(bufn, "%ld", rsclp->seglen[RCU_NEXT_TAIL]);
sprintf(bufb, "%ld", rcu_cblist_n_cbs(&rdp->nocb_bypass));
pr_info(" CB %d^%d->%d %c%c%c%c%c F%ld L%ld C%d %c%s%c%s%c%s%c%s%c%s q%ld %c CPU %d%s\n",
--
2.52.0