Re: [PATCH 5.10 01/25] md: revert io stats accounting

From: Jack Wang
Date: Wed Jan 26 2022 - 05:10:00 EST


Hi,

Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> 于2022年1月14日周五 19:57写道:
>
> From: Guoqing Jiang <jgq516@xxxxxxxxx>
>
> commit ad3fc798800fb7ca04c1dfc439dba946818048d8 upstream.
>
> The commit 41d2d848e5c0 ("md: improve io stats accounting") could cause
> double fault problem per the report [1], and also it is not correct to
> change ->bi_end_io if md don't own it, so let's revert it.
>
> And io stats accounting will be replemented in later commits.
>
> [1]. https://lore.kernel.org/linux-raid/3bf04253-3fad-434a-63a7-20214e38cf26@xxxxxxxxx/T/#t
>
> Fixes: 41d2d848e5c0 ("md: improve io stats accounting")
> Signed-off-by: Guoqing Jiang <jiangguoqing@xxxxxxxxxx>
> Signed-off-by: Song Liu <song@xxxxxxxxxx>
> [GM: backport to 5.10-stable]
> Signed-off-by: Guillaume Morin <guillaume@xxxxxxxxxxx>
> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> ---
> drivers/md/md.c | 57 +++++++++++---------------------------------------------
> drivers/md/md.h | 1
> 2 files changed, 12 insertions(+), 46 deletions(-)
>
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -459,34 +459,12 @@ check_suspended:
> }
> EXPORT_SYMBOL(md_handle_request);
>
> -struct md_io {
> - struct mddev *mddev;
> - bio_end_io_t *orig_bi_end_io;
> - void *orig_bi_private;
> - unsigned long start_time;
> - struct hd_struct *part;
> -};
> -
> -static void md_end_io(struct bio *bio)
> -{
> - struct md_io *md_io = bio->bi_private;
> - struct mddev *mddev = md_io->mddev;
> -
> - part_end_io_acct(md_io->part, bio, md_io->start_time);
> -
> - bio->bi_end_io = md_io->orig_bi_end_io;
> - bio->bi_private = md_io->orig_bi_private;
> -
> - mempool_free(md_io, &mddev->md_io_pool);
> -
> - if (bio->bi_end_io)
> - bio->bi_end_io(bio);
> -}
> -
> static blk_qc_t md_submit_bio(struct bio *bio)
> {
> const int rw = bio_data_dir(bio);
> + const int sgrp = op_stat_group(bio_op(bio));
> struct mddev *mddev = bio->bi_disk->private_data;
> + unsigned int sectors;
>
> if (mddev == NULL || mddev->pers == NULL) {
> bio_io_error(bio);
> @@ -507,26 +485,21 @@ static blk_qc_t md_submit_bio(struct bio
> return BLK_QC_T_NONE;
> }
>
> - if (bio->bi_end_io != md_end_io) {
> - struct md_io *md_io;
> -
> - md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
> - md_io->mddev = mddev;
> - md_io->orig_bi_end_io = bio->bi_end_io;
> - md_io->orig_bi_private = bio->bi_private;
> -
> - bio->bi_end_io = md_end_io;
> - bio->bi_private = md_io;
> -
> - md_io->start_time = part_start_io_acct(mddev->gendisk,
> - &md_io->part, bio);
> - }
> -
> + /*
> + * save the sectors now since our bio can
> + * go away inside make_request
> + */
> + sectors = bio_sectors(bio);
This code snip is not inside the original patch, and it's not in
latest upstream too.
> /* bio could be mergeable after passing to underlayer */
> bio->bi_opf &= ~REQ_NOMERGE;
>
> md_handle_request(mddev, bio);
>
> + part_stat_lock();
> + part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
> + part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
> + part_stat_unlock();
> +
same here, this code snip is not inside the original patch, and it's
not in latest upstream too.

I think would be good keep it as the upstream version.

Best!
Jinpu Wang @ IONOS
> return BLK_QC_T_NONE;
> }
>
> @@ -5636,7 +5609,6 @@ static void md_free(struct kobject *ko)
>
> bioset_exit(&mddev->bio_set);
> bioset_exit(&mddev->sync_set);
> - mempool_exit(&mddev->md_io_pool);
> kfree(mddev);
> }
>
> @@ -5732,11 +5704,6 @@ static int md_alloc(dev_t dev, char *nam
> */
> mddev->hold_active = UNTIL_STOP;
>
> - error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
> - sizeof(struct md_io));
> - if (error)
> - goto abort;
> -
> error = -ENOMEM;
> mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
> if (!mddev->queue)
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -487,7 +487,6 @@ struct mddev {
> struct bio_set sync_set; /* for sync operations like
> * metadata and bitmap writes
> */
> - mempool_t md_io_pool;
>
> /* Generic flush handling.
> * The last to finish preflush schedules a worker to submit
>
>