Re: [PATCH -next RFC] block: count 'ios' and 'sectors' when io is done for bio-based device

From: Yu Kuai
Date: Tue Mar 07 2023 - 05:45:40 EST


Hi,

在 2023/02/28 9:01, Yu Kuai 写道:
Hi,

friendly ping ...

Thanks,
Kuai

在 2023/02/23 17:12, Yu Kuai 写道:
From: Yu Kuai <yukuai3@xxxxxxxxxx>

While using iostat for raid, I observed very strange 'await'
occasionally, and turns out it's due to that 'ios' and 'sectors' is
counted in bdev_start_io_acct(), while 'nsecs' is counted in
bdev_end_io_acct(). I'm not sure why they are ccounted like that
but I think this behaviour is obviously wrong because user will get
wrong disk stats.

Fix the problem by counting 'ios' and 'sectors' when io is done, like
what rq-based device does.

Can anyone help to review this change? Or is there any reason to count
'ios' and 'sectors' when io is started?

Thanks,
Kuai

Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
  block/blk-core.c              | 16 ++++++----------
  drivers/md/dm.c               |  6 +++---
  drivers/nvme/host/multipath.c |  8 ++++----
  include/linux/blkdev.h        |  5 ++---
  4 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 82b5b2c53f1e..fe1d320f5f07 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
      }
  }
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-                 unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
                   unsigned long start_time)
  {
-    const int sgrp = op_stat_group(op);
-
      part_stat_lock();
      update_io_ticks(bdev, start_time, false);
-    part_stat_inc(bdev, ios[sgrp]);
-    part_stat_add(bdev, sectors[sgrp], sectors);
      part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
      part_stat_unlock();
@@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
   */
  unsigned long bio_start_io_acct(struct bio *bio)
  {
-    return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-                  bio_op(bio), jiffies);
+    return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
  }
  EXPORT_SYMBOL_GPL(bio_start_io_acct);
  void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-              unsigned long start_time)
+              unsigned int sectors, unsigned long start_time)
  {
      const int sgrp = op_stat_group(op);
      unsigned long now = READ_ONCE(jiffies);
@@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
      part_stat_lock();
      update_io_ticks(bdev, now, true);
+    part_stat_inc(bdev, ios[sgrp]);
+    part_stat_add(bdev, sectors[sgrp], sectors);
      part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
      part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
      part_stat_unlock();
@@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
  void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
                    struct block_device *orig_bdev)
  {
-    bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+    bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
  }
  EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eace45a18d45..f5cc330bb549 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
          sectors = io->sectors;
      if (!end)
-        bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
-                   start_time);
+        bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
      else
-        bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+        bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
+                 start_time);
      if (static_branch_unlikely(&stats_enabled) &&
          unlikely(dm_stats_used(&md->stats))) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fc39d01e7b63..9171452e2f6d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
          return;
      nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
-    nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
-                    blk_rq_bytes(rq) >> SECTOR_SHIFT,
-                    req_op(rq), jiffies);
+    nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
+                              jiffies);
  }
  EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
@@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
      if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
          return;
      bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
-        nvme_req(rq)->start_time);
+             blk_rq_bytes(rq) >> SECTOR_SHIFT,
+             nvme_req(rq)->start_time);
  }
  void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1aee08f8c18..941304f17492 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
          wake_up_process(waiter);
  }
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-                 unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
                   unsigned long start_time);
  void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-        unsigned long start_time);
+              unsigned int sectors, unsigned long start_time);
  unsigned long bio_start_io_acct(struct bio *bio);
  void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,


.