Re: [PATCH 1/2] Don't merge different partition's IOs

From: Linus Torvalds
Date: Mon Dec 06 2010 - 11:09:07 EST


2010/12/6 Yasuaki Ishimatsu <isimatu.yasuaki@xxxxxxxxxxxxxx>:
>
> The problem is caused by merging different partition's I/Os. So the patch
> check whether a merging bio or request is a same partition as a request or not
> by using a partition's start sector and size.

I really think this is wrong.

We should just carry the partition information around in the req and
the bio, and just compare the pointers, rather than compare the range.
No need to even dereference the pointers, you should be able to just
do

/* don't merge if not on the same partition */
if (bio->part != req->part)
return 0;

or something.

This is doubly true since the accounting already does that horrible
partition lookup: rather than look it up, we should just _set_ it in
__generic_make_request(), where I think we already know it since we do
that whole blk_partition_remap().

So just something like the appended (TOTALLY UNTESTED) perhaps?

Note that this should get it right even for overlapping partitions etc.

Linus
block/blk-core.c | 2 ++
block/blk-merge.c | 9 +++++++++
include/linux/blk_types.h | 2 +-
include/linux/blkdev.h | 1 +
4 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 4ce953f..a0c13a9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1323,6 +1323,7 @@ static inline void blk_partition_remap(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;

+ bio->bi_part = bdev;
if (bio_sectors(bio) && bdev != bdev->bd_contains) {
struct hd_struct *p = bdev->bd_part;

@@ -2437,6 +2438,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
rq->__data_len = bio->bi_size;
rq->bio = rq->biotail = bio;

+ rq->rq_part = bio->bi_part;
if (bio->bi_bdev)
rq->rq_disk = bio->bi_bdev->bd_disk;
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 77b7c26..3913f5f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -230,6 +230,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
{
unsigned short max_sectors;

+ if (req->rq_part != bio->bi_part)
+ return 0;
+
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
@@ -254,6 +257,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
{
unsigned short max_sectors;

+ if (bio->bi_part != req->rq_part)
+ return 0;
+
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
@@ -392,6 +398,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;

+ if (req->rq_part != next->rq_part)
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 46ad519..0940480 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -34,7 +34,7 @@ struct bio {
sector_t bi_sector; /* device address in 512 byte
sectors */
struct bio *bi_next; /* request queue link */
- struct block_device *bi_bdev;
+ struct block_device *bi_bdev, *bi_part;
unsigned long bi_flags; /* status, command, etc */
unsigned long bi_rw; /* bottom bits READ/WRITE,
* top bits priority
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aae86fd..f31bd4f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -114,6 +114,7 @@ struct request {
void *elevator_private2;
void *elevator_private3;

+ struct block_device *rq_part;
struct gendisk *rq_disk;
unsigned long start_time;
#ifdef CONFIG_BLK_CGROUP