[PATCH 3/8] blk-cgroup: don't nest queue_lock under rcu in blkcg_print_blkgs()
From: Yu Kuai
Date: Sun Jun 07 2026 - 23:43:16 EST
From: Yu Kuai <yukuai@xxxxxxx>
With previous modification to delay freeing policy data after an RCU grace
period, prfill() can run under RCU instead of taking queue_lock. However,
policy teardown can still clear blkg->pd[plid] after blkcg_print_blkgs()
observes the policy enabled bit.
Load policy data once with READ_ONCE() and skip the blkg if teardown
already cleared it. Do the same in recursive stat walks for descendant
blkgs. Remove the stale BFQ debug queue_lock assertion because
blkcg_print_blkgs() no longer calls prfill() with queue_lock held. This
also lets ioc_qos_prfill() and ioc_cost_model_prfill() use IRQ-safe
ioc->lock locking without re-enabling IRQs while queue_lock is still held.
Signed-off-by: Yu Kuai <yukuai@xxxxxxx>
---
block/bfq-cgroup.c | 8 +++++---
block/blk-cgroup-rwstat.c | 15 +++++++++------
block/blk-cgroup.c | 22 +++++++++++++---------
block/blk-cgroup.h | 6 +++---
block/blk-iocost.c | 8 ++++----
5 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 56f60e36c799..904d9e0d9029 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1146,20 +1146,22 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
struct blkcg_gq *blkg = pd_to_blkg(pd);
struct blkcg_gq *pos_blkg;
struct cgroup_subsys_state *pos_css;
u64 sum = 0;
- lockdep_assert_held(&blkg->q->queue_lock);
-
rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+ struct blkg_policy_data *pd;
struct bfq_stat *stat;
if (!pos_blkg->online)
continue;
- stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
+ pd = blkg_to_pd(pos_blkg, &blkcg_policy_bfq);
+ if (!pd)
+ continue;
+ stat = (void *)pd + off;
sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
}
rcu_read_unlock();
return __blkg_prfill_u64(sf, pd, sum);
diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c
index a55fb0c53558..aae910713814 100644
--- a/block/blk-cgroup-rwstat.c
+++ b/block/blk-cgroup-rwstat.c
@@ -99,26 +99,29 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
{
struct blkcg_gq *pos_blkg;
struct cgroup_subsys_state *pos_css;
unsigned int i;
- lockdep_assert_held(&blkg->q->queue_lock);
+ WARN_ON_ONCE(!rcu_read_lock_held());
memset(sum, 0, sizeof(*sum));
- rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
struct blkg_rwstat *rwstat;
if (!pos_blkg->online)
continue;
- if (pol)
- rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
- else
+ if (pol) {
+ struct blkg_policy_data *pd = blkg_to_pd(pos_blkg, pol);
+
+ if (!pd)
+ continue;
+ rwstat = (void *)pd + off;
+ } else {
rwstat = (void *)pos_blkg + off;
+ }
for (i = 0; i < BLKG_RWSTAT_NR; i++)
sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i);
}
- rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b55c43f72bcb..46fc65050c38 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -699,13 +699,13 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
* @data: data to be passed to @prfill
* @show_total: to print out sum of prfill return values or not
*
* This function invokes @prfill on each blkg of @blkcg if pd for the
* policy specified by @pol exists. @prfill is invoked with @sf, the
- * policy data and @data and the matching queue lock held. If @show_total
- * is %true, the sum of the return values from @prfill is printed with
- * "Total" label at the end.
+ * policy data and @data under RCU read lock. If @show_total is %true, the
+ * sum of the return values from @prfill is printed with "Total" label at the
+ * end.
*
* This is to be used to construct print functions for
* cftype->read_seq_string method.
*/
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
@@ -717,14 +717,18 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
struct blkcg_gq *blkg;
u64 total = 0;
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
- spin_lock_irq(&blkg->q->queue_lock);
- if (blkcg_policy_enabled(blkg->q, pol))
- total += prfill(sf, blkg->pd[pol->plid], data);
- spin_unlock_irq(&blkg->q->queue_lock);
+ struct blkg_policy_data *pd;
+
+ if (!blkcg_policy_enabled(blkg->q, pol))
+ continue;
+
+ pd = blkg_to_pd(blkg, pol);
+ if (pd)
+ total += prfill(sf, pd, data);
}
rcu_read_unlock();
if (show_total)
seq_printf(sf, "Total %llu\n", (unsigned long long)total);
@@ -1591,11 +1595,11 @@ static void blkcg_policy_teardown_pds(struct request_queue *q,
if (pd) {
if (pd->online && pol->pd_offline_fn)
pol->pd_offline_fn(pd);
pd->online = false;
pol->pd_free_fn(pd);
- blkg->pd[pol->plid] = NULL;
+ WRITE_ONCE(blkg->pd[pol->plid], NULL);
}
spin_unlock(&blkcg->lock);
}
}
@@ -1683,11 +1687,11 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
spin_lock(&blkg->blkcg->lock);
pd->blkg = blkg;
pd->plid = pol->plid;
- blkg->pd[pol->plid] = pd;
+ WRITE_ONCE(blkg->pd[pol->plid], pd);
if (pol->pd_init_fn)
pol->pd_init_fn(pd);
if (pol->pd_online_fn)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index fd206d1fa3c9..5402b4ff6f3f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -279,13 +279,13 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
* @pol: policy of interest
*
* Return pointer to private data associated with the @blkg-@pol pair.
*/
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
- struct blkcg_policy *pol)
+ const struct blkcg_policy *pol)
{
- return blkg ? blkg->pd[pol->plid] : NULL;
+ return blkg ? READ_ONCE(blkg->pd[pol->plid]) : NULL;
}
static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
struct blkcg_policy *pol)
{
@@ -488,11 +488,11 @@ static inline int blkcg_activate_policy(struct gendisk *disk,
const struct blkcg_policy *pol) { return 0; }
static inline void blkcg_deactivate_policy(struct gendisk *disk,
const struct blkcg_policy *pol) { }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
- struct blkcg_policy *pol) { return NULL; }
+ const struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index c136b1f46fcc..1f3f6e0f8901 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3188,11 +3188,11 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
struct ioc *ioc = pd_to_iocg(pd)->ioc;
if (!dname)
return 0;
- spin_lock(&ioc->lock);
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
ioc->params.qos[QOS_RPPM] / 10000,
ioc->params.qos[QOS_RPPM] % 10000 / 100,
ioc->params.qos[QOS_RLAT],
@@ -3201,11 +3201,11 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
ioc->params.qos[QOS_WLAT],
ioc->params.qos[QOS_MIN] / 10000,
ioc->params.qos[QOS_MIN] % 10000 / 100,
ioc->params.qos[QOS_MAX] / 10000,
ioc->params.qos[QOS_MAX] % 10000 / 100);
- spin_unlock(&ioc->lock);
+ spin_unlock_irq(&ioc->lock);
return 0;
}
static int ioc_qos_show(struct seq_file *sf, void *v)
{
@@ -3386,18 +3386,18 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
u64 *u = ioc->params.i_lcoefs;
if (!dname)
return 0;
- spin_lock(&ioc->lock);
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s ctrl=%s model=linear "
"rbps=%llu rseqiops=%llu rrandiops=%llu "
"wbps=%llu wseqiops=%llu wrandiops=%llu\n",
dname, ioc->user_cost_model ? "user" : "auto",
u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
- spin_unlock(&ioc->lock);
+ spin_unlock_irq(&ioc->lock);
return 0;
}
static int ioc_cost_model_show(struct seq_file *sf, void *v)
{
--
2.51.0