Re: Slow I/O on USB media after commit f664a3cc17b7d0a2bc3b3ab96181e1029b0ec0e6

From: Ming Lei
Date: Sun Nov 24 2019 - 22:55:02 EST


On Sat, Nov 23, 2019 at 04:44:55PM +0100, Andrea Vai wrote:
> Il giorno sab, 23/11/2019 alle 15.28 +0800, Ming Lei ha scritto:
> >
> > Please post the log of 'lsusb -v', and I will try to make a patch
> > for
> > addressing the issue.
>
> attached,

Please apply the attached patch, and re-build & install & reboot kernel.

This time, please don't switch io scheduler.

Thanks,
Ming
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5c9adcaa27ac..eecb46020bfb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1436,7 +1436,13 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
if (unlikely(blk_mq_hctx_stopped(hctx)))
return;

- if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
+ /*
+ * Some single-queue devices may need to dispatch IO in order
+ * which was guaranteed for the legacy queue via the big queue
+ * lock. Now we reply on single hctx->run_work for that.
+ */
+ if (!async && !(hctx->flags & (BLK_MQ_F_BLOCKING |
+ BLK_MQ_F_STRICT_DISPATCH_ORDER))) {
int cpu = get_cpu();
if (cpumask_test_cpu(cpu, hctx->cpumask)) {
__blk_mq_run_hw_queue(hctx);
@@ -3042,6 +3048,10 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (!set->ops->get_budget ^ !set->ops->put_budget)
return -EINVAL;

+ if (set->queue_depth > 1 && (set->flags &
+ BLK_MQ_F_STRICT_DISPATCH_ORDER))
+ return -EINVAL;
+
if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
pr_info("blk-mq: reduced tag depth to %u\n",
BLK_MQ_MAX_DEPTH);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d3d237a09a78..563188844143 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1939,6 +1939,9 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
shost->tag_set.flags |=
BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
+ if (shost->hostt->strict_dispatch_order)
+ shost->tag_set.flags |= BLK_MQ_F_STRICT_DISPATCH_ORDER;
+
shost->tag_set.driver_data = shost;

return blk_mq_alloc_tag_set(&shost->tag_set);
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 6737fab94959..77795edad8e8 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -661,6 +661,18 @@ static const struct scsi_host_template usb_stor_host_template = {
/* we do our own delay after a device or bus reset */
.skip_settle_delay = 1,

+
+ /*
+ * Some USB storage, such as Kingston Technology DataTraveler 100
+ * G3/G4/SE9 G2(ID 0951:1666), requires IO dispatched in the
+ * sequential order, otherwise IO performance may drop drastically.
+ *
+ * can_queue is always 1, so we set .strict_dispatch_order for
+ * USB mass storage HBA. Another reason is that there can be such
+ * kind of devices too.
+ */
+ .strict_dispatch_order = 1,
+
/* sysfs device attributes */
.sdev_attrs = sysfs_device_attr_list,

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index dc03e059fdff..844539690a27 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -388,6 +388,7 @@ struct blk_mq_ops {
enum {
BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_TAG_SHARED = 1 << 1,
+ BLK_MQ_F_STRICT_DISPATCH_ORDER = 1 << 2,
BLK_MQ_F_BLOCKING = 1 << 5,
BLK_MQ_F_NO_SCHED = 1 << 6,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index d4452d0ea3c7..f932d6fa1a4c 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -442,6 +442,13 @@ struct scsi_host_template {
/* True if the low-level driver supports blk-mq only */
unsigned force_blk_mq:1;

+ /*
+ * True if the low-level driver needs IO to be dispatched in
+ * the order provided by legacy IO path. The flag is only
+ * valid for single queue device.
+ */
+ unsigned strict_dispatch_order:1;
+
/*
* Countdown for host blocking with no commands outstanding.
*/