[PATCH v4] blk-cgroup: Replace u64 sync with spinlock for iostat

From: boy . wu
Date: Thu Jul 18 2024 - 04:42:16 EST


From: Boy Wu <boy.wu@xxxxxxxxxxxx>

In 32bit SMP systems, if multiple CPUs call blkcg_print_stat,
it may cause blkcg_fill_root_iostats to have a concurrent problem
on the seqlock in u64_stats_update, which will cause a deadlock
on u64_stats_fetch_begin in blkcg_print_one_stat.

Thus, replace u64 sync with spinlock to protect iostat.

Fixes: ef45fe470e1e ("blk-cgroup: show global disk stats in root cgroup io.stat")
Signed-off-by: Boy Wu <boy.wu@xxxxxxxxxxxx>
---
Change in v2:
- update commit message
- Remove u64_sync
- Replace spin_lock_irq with guard statement
- Replace blkg->q->queue_lock with blkg_stat_lock
Change in v3:
- update commit message
- Add spinlock in blkg_iostat_set structure
- Replace all u64_sync with spinlock for iostat
- Replace blkg_stat_lock with iostat.spinlock
Change in v4:
- update commit message
- Remove spinlock in blkg_iostat_set structure
- Replace iostat.spinlock with blkg_stat_lock
- Add 32-bit systems only define
---
block/blk-cgroup.c | 34 +++++++++++++---------------------
1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 37e6cc91d576..faa604c6fab9 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -329,7 +329,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
#endif

- u64_stats_init(&blkg->iostat.sync);
for_each_possible_cpu(cpu) {
u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg;
@@ -632,18 +631,18 @@ static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
static void __blkg_clear_stat(struct blkg_iostat_set *bis)
{
struct blkg_iostat cur = {0};
- unsigned long flags;

- flags = u64_stats_update_begin_irqsave(&bis->sync);
blkg_iostat_set(&bis->cur, &cur);
blkg_iostat_set(&bis->last, &cur);
- u64_stats_update_end_irqrestore(&bis->sync, flags);
}

static void blkg_clear_stat(struct blkcg_gq *blkg)
{
int cpu;

+#if BITS_PER_LONG == 32
+ guard(raw_spinlock_irqsave)(&blkg_stat_lock);
+#endif
for_each_possible_cpu(cpu) {
struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu);

@@ -995,15 +994,12 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
struct blkg_iostat *last)
{
struct blkg_iostat delta;
- unsigned long flags;

/* propagate percpu delta to global */
- flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&delta, cur);
blkg_iostat_sub(&delta, last);
blkg_iostat_add(&blkg->iostat.cur, &delta);
blkg_iostat_add(last, &delta);
- u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}

static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
@@ -1034,7 +1030,6 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
struct blkcg_gq *blkg = bisc->blkg;
struct blkcg_gq *parent = blkg->parent;
struct blkg_iostat cur;
- unsigned int seq;

/*
* Order assignment of `next_bisc` from `bisc->lnode.next` in
@@ -1051,10 +1046,7 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
goto propagate_up; /* propagate up to parent only */

/* fetch the current per-cpu values */
- do {
- seq = u64_stats_fetch_begin(&bisc->sync);
- blkg_iostat_set(&cur, &bisc->cur);
- } while (u64_stats_fetch_retry(&bisc->sync, seq));
+ blkg_iostat_set(&cur, &bisc->cur);

blkcg_iostat_update(blkg, &cur, &bisc->last);

@@ -1112,7 +1104,6 @@ static void blkcg_fill_root_iostats(void)
struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg;
struct blkg_iostat tmp;
int cpu;
- unsigned long flags;

memset(&tmp, 0, sizeof(tmp));
for_each_possible_cpu(cpu) {
@@ -1133,10 +1124,10 @@ static void blkcg_fill_root_iostats(void)
tmp.bytes[BLKG_IOSTAT_DISCARD] +=
cpu_dkstats->sectors[STAT_DISCARD] << 9;
}
-
- flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
+#if BITS_PER_LONG == 32
+ guard(raw_spinlock_irqsave)(&blkg_stat_lock);
+#endif
blkg_iostat_set(&blkg->iostat.cur, &tmp);
- u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
}

@@ -1145,7 +1136,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
struct blkg_iostat_set *bis = &blkg->iostat;
u64 rbytes, wbytes, rios, wios, dbytes, dios;
const char *dname;
- unsigned seq;
int i;

if (!blkg->online)
@@ -1157,16 +1147,18 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)

seq_printf(s, "%s ", dname);

- do {
- seq = u64_stats_fetch_begin(&bis->sync);
-
+#if BITS_PER_LONG == 32
+ scoped_guard(raw_spinlock_irqsave, &blkg_stat_lock) {
+#endif
rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
rios = bis->cur.ios[BLKG_IOSTAT_READ];
wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
- } while (u64_stats_fetch_retry(&bis->sync, seq));
+#if BITS_PER_LONG == 32
+ }
+#endif

if (rbytes || wbytes || rios || wios) {
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
--
2.18.0