[PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio.

From: NeilBrown
Date: Mon Jul 30 2007 - 22:20:20 EST



This count is currently only used by raid5 (which used to use bi_phys_segments),
but it will be used more widely in future.

generic_make_request sets the count to 1, and bio_endio decrements it and
calls bi_end_io only when it hits zero. A make_request_fn can do whatever
it likes if it doesn't call bio_endio directly.

As some bios do not come through generic_make_request (some are stuck on
the head of the request queue by scsi) we init bi_iocnt in bio_init too.

It now becomes important to call bi_endio exactly the right number of times,
so order_bio_endio can no-longer use it to call flush_dry_bio_endio.
So remove that function and opencode the effect inside ordered_bio_endio.

Signed-off-by: Neil Brown <neilb@xxxxxxx>

### Diffstat output
./block/ll_rw_blk.c | 31 +++++++------------------------
./drivers/md/raid5.c | 30 +++++++++++-------------------
./fs/bio.c | 4 +++-
./include/linux/bio.h | 3 +++
4 files changed, 24 insertions(+), 44 deletions(-)

diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:51.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:52.000000000 +1000
@@ -527,44 +527,25 @@ int blk_do_ordered(struct request_queue
return 1;
}

-static void flush_dry_bio_endio(struct bio *bio, int error)
-{
-
- /*
- * This is dry run, restore bio_sector and size. We'll finish
- * this request again with the original bi_end_io after an
- * error occurs or post flush is complete.
- */
-
- /* Reset bio */
- set_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
static int ordered_bio_endio(struct request *rq, struct bio *bio,
int error)
{
struct request_queue *q = rq->q;
- bio_end_io_t *endio;
- void *private;

if (&q->bar_rq != rq)
return 0;

/*
* Okay, this is the barrier request in progress, dry finish it.
+ *
+ * We'll finish this request again with the original
+ * bi_end_io after an error occurs or post flush is complete.
*/
+
if (error && !q->orderr)
q->orderr = error;

- endio = bio->bi_end_io;
- private = bio->bi_private;
- bio->bi_end_io = flush_dry_bio_endio;
- bio->bi_private = q;
-
- bio_endio(bio, error);
-
- bio->bi_end_io = endio;
- bio->bi_private = private;
+ set_bit(BIO_UPTODATE, &bio->bi_flags);

return 1;
}
@@ -3149,6 +3130,8 @@ static inline void __generic_make_reques
int ret, nr_sectors = bio_sectors(bio);
dev_t old_dev;

+ atomic_set(&bio->bi_iocnt, 1);
+
might_sleep();
/* Test device or partition size, when known. */
maxsector = bio->bi_bdev->bd_inode->i_size >> 9;

diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:20:52.000000000 +1000
@@ -851,7 +851,7 @@ static void ops_complete_biofill(void *s
dev_q->sector + STRIPE_SECTORS) {
rbi2 = r5_next_bio(rbi, dev_q->sector);
spin_lock_irq(&conf->device_lock);
- if (--rbi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&rbi->bi_iocnt)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
@@ -2294,7 +2294,7 @@ static int add_queue_bio(struct stripe_q
if (*bip)
bi->bi_next = *bip;
*bip = bi;
- bi->bi_phys_segments ++;
+ atomic_inc(&bi->bi_iocnt);
spin_unlock_irq(&conf->device_lock);
spin_unlock(&sq->lock);

@@ -2395,7 +2395,7 @@ handle_requests_to_failed_array(raid5_co
sq->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sq->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
@@ -2410,7 +2410,7 @@ handle_requests_to_failed_array(raid5_co
sq->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sq->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
@@ -2435,7 +2435,7 @@ handle_requests_to_failed_array(raid5_co
struct bio *nextbi =
r5_next_bio(bi, sq->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {
bi->bi_next = *return_bi;
*return_bi = bi;
}
@@ -2640,7 +2640,7 @@ static void handle_completed_write_reque
while (wbi && wbi->bi_sector <
dev_q->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev_q->sector);
- if (--wbi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&wbi->bi_iocnt)) {
md_write_end(conf->mddev);
wbi->bi_next = *return_bi;
*return_bi = wbi;
@@ -3426,7 +3426,7 @@ static void handle_stripe6(struct stripe
copy_data(0, rbi, dev->page, dev_q->sector);
rbi2 = r5_next_bio(rbi, dev_q->sector);
spin_lock_irq(&conf->device_lock);
- if (--rbi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&rbi->bi_iocnt)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
@@ -3870,7 +3870,7 @@ static struct bio *remove_bio_from_retry
if(bi) {
conf->retry_read_aligned_list = bi->bi_next;
bi->bi_next = NULL;
- bi->bi_phys_segments = 1; /* biased count of active stripes */
+ atomic_set(&bi->bi_iocnt, 1);
bi->bi_hw_segments = 0; /* count of processed stripes */
}

@@ -4014,7 +4014,6 @@ static int make_request(struct request_q
sector_t logical_sector, last_sector;
struct stripe_queue *sq;
const int rw = bio_data_dir(bi);
- int remaining;

if (unlikely(bio_barrier(bi))) {
bio_endio(bi, -EOPNOTSUPP);
@@ -4034,7 +4033,7 @@ static int make_request(struct request_q
logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bi->bi_sector + (bi->bi_size>>9);
bi->bi_next = NULL;
- bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
+ atomic_set(&bi->bi_iocnt, 1);

for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
@@ -4131,10 +4130,7 @@ static int make_request(struct request_q
}

}
- spin_lock_irq(&conf->device_lock);
- remaining = --bi->bi_phys_segments;
- spin_unlock_irq(&conf->device_lock);
- if (remaining == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {

if ( rw == WRITE )
md_write_end(mddev);
@@ -4408,7 +4404,6 @@ static int retry_aligned_read(raid5_con
int dd_idx, pd_idx;
sector_t sector, logical_sector, last_sector;
int scnt = 0;
- int remaining;
int handled = 0;
int disks = conf->raid_disks;
int data_disks = disks - conf->max_degraded;
@@ -4455,10 +4450,7 @@ static int retry_aligned_read(raid5_con
handle_queue(sq, disks, data_disks);
handled++;
}
- spin_lock_irq(&conf->device_lock);
- remaining = --raid_bio->bi_phys_segments;
- spin_unlock_irq(&conf->device_lock);
- if (remaining == 0) {
+ if (atomic_dec_and_test(&raid_bio->bi_iocnt)) {

raid_bio->bi_end_io(raid_bio,
test_bit(BIO_UPTODATE, &raid_bio->bi_flags)

diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:20:51.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:20:52.000000000 +1000
@@ -141,6 +141,7 @@ void bio_init(struct bio *bio)
bio->bi_max_vecs = 0;
bio->bi_end_io = NULL;
atomic_set(&bio->bi_cnt, 1);
+ atomic_set(&bio->bi_iocnt, 1);
bio->bi_private = NULL;
}

@@ -1013,7 +1014,8 @@ void bio_endio(struct bio *bio, int erro
if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);

- if (bio->bi_end_io)
+ if (atomic_dec_and_test(&bio->bi_iocnt) &&
+ bio->bi_end_io)
bio->bi_end_io(bio, error);
}


diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:20:51.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:20:52.000000000 +1000
@@ -108,6 +108,9 @@ struct bio {

bio_end_io_t *bi_end_io;
atomic_t bi_cnt; /* pin count */
+ atomic_t bi_iocnt; /* number of io requests
+ * referring to this bio
+ */

void *bi_private;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/