[PATCH RFC 9/9] scsi: scsi_debug: Drop sdebug_queue concept

From: John Garry
Date: Mon Mar 13 2023 - 08:50:06 EST


The struct sdebug_queue is to manage tags and hold the associated
queued command entry pointer (for that tag).

Since the tagset iters are now used for functions like
sdebug_blk_mq_poll(), there is no real need to manage these queues.
Indeed, blk-mq already provides what we need for managing tags and
requests.

However the sdebug_queue concept was still being used in max_queue_store()
to find tags which we need to "retire" when lowering sdebug_max_queue.
However the method to reduce sdebug_max_queue separate from
shost->can_queue is fundamentally broken.

The shost->can_queue value is initially used to set per-HW queue context
tag depth in the block layer. This ensures that the shost is not sent too
many commands which it can deal with. However lowering sdebug_max_queue
separately meant that we can easily overload the shost.

Indeed, since sdebug_q_arr is shared amount all shosts, it is possible
to easily consume all tags in sdebug_q_arr while the shosts are not
overloaded according to SCSI ml/block layer, i.e. shosts may be sent more
commands than they can consume.

Resolve this by changing per-HW queue context tag depth when we change
sdebug_max_queue.

The submit queue tags info shown in scsi_debug_show_info() is dropped -
tag info may be got from /sys/kernel/debug/block/ path.

Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx>
---
We should not call sbitmap_queue_resize(), below - any better ideas?

drivers/scsi/scsi_debug.c | 246 +++++++-------------------------------
1 file changed, 42 insertions(+), 204 deletions(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 7ef195072d97..2bfb6db538fc 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -341,8 +341,6 @@ struct sdebug_defer {
struct hrtimer hrt;
struct execute_work ew;
ktime_t cmpl_ts;/* time since boot to complete this cmd */
- int sqa_idx; /* index of sdebug_queue array */
- int hc_idx; /* hostwide tag index */
int issuing_cpu;
bool aborted; /* true when blk_abort_request() already called */
enum sdeb_defer_type defer_t;
@@ -360,12 +358,6 @@ struct sdebug_scsi_cmd {
spinlock_t lock;
};

-struct sdebug_queue {
- struct sdebug_queued_cmd *qc_arr[SDEBUG_CANQUEUE];
- unsigned long in_use_bm[SDEBUG_CANQUEUE_WORDS];
- spinlock_t qc_lock;
-};
-
static atomic_t sdebug_cmnd_count; /* number of incoming commands */
static atomic_t sdebug_completions; /* count of deferred completions */
static atomic_t sdebug_miss_cpus; /* submission + completion cpus differ */
@@ -848,8 +840,8 @@ static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES;
static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;

static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */
+
static int poll_queues = 2; /* iouring iopoll interface.*/
-static struct sdebug_queue *sdebug_q_arr; /* ptr to array of submit queues */

static DEFINE_RWLOCK(atomic_rw);
static DEFINE_RWLOCK(atomic_rw2);
@@ -4904,20 +4896,6 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
return res;
}

-static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd)
-{
- u16 hwq;
- u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
-
- hwq = blk_mq_unique_tag_to_hwq(tag);
-
- pr_debug("tag=%#x, hwq=%d\n", tag, hwq);
- if (WARN_ON_ONCE(hwq >= submit_queues))
- hwq = 0;
-
- return sdebug_q_arr + hwq;
-}
-
static u32 get_tag(struct scsi_cmnd *cmnd)
{
return blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
@@ -4927,68 +4905,30 @@ static u32 get_tag(struct scsi_cmnd *cmnd)
static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
{
struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp);
- int qc_idx;
- int retiring = 0;
- unsigned long flags, iflags;
+ unsigned long flags;
struct scsi_cmnd *scp = sqcp->scmd;
struct sdebug_scsi_cmd *sdsc;
bool aborted;
- struct sdebug_queue *sqp;

- qc_idx = sd_dp->sqa_idx;
if (sdebug_statistics) {
atomic_inc(&sdebug_completions);
if (raw_smp_processor_id() != sd_dp->issuing_cpu)
atomic_inc(&sdebug_miss_cpus);
}
+
if (!scp) {
pr_err("scmd=NULL\n");
goto out;
}
- if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) {
- pr_err("wild qc_idx=%d\n", qc_idx);
- goto out;
- }

sdsc = scsi_cmd_priv(scp);
- sqp = get_queue(scp);
- spin_lock_irqsave(&sqp->qc_lock, iflags);
spin_lock_irqsave(&sdsc->lock, flags);
aborted = sd_dp->aborted;
if (unlikely(aborted))
sd_dp->aborted = false;
ASSIGN_QEUEUED_CMD(scp, NULL);

- if (unlikely(atomic_read(&retired_max_queue) > 0))
- retiring = 1;
-
- sqp->qc_arr[qc_idx] = NULL;
- if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
- spin_unlock_irqrestore(&sdsc->lock, flags);
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
- pr_err("Unexpected completion qc_idx=%d\n", qc_idx);
- goto out;
- }
-
- if (unlikely(retiring)) { /* user has reduced max_queue */
- int k, retval;
-
- retval = atomic_read(&retired_max_queue);
- if (qc_idx >= retval) {
- spin_unlock_irqrestore(&sdsc->lock, flags);
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
- pr_err("index %d too large\n", retval);
- goto out;
- }
- k = find_last_bit(sqp->in_use_bm, retval);
- if ((k < sdebug_max_queue) || (k == retval))
- atomic_set(&retired_max_queue, 0);
- else
- atomic_set(&retired_max_queue, k + 1);
- }
-
spin_unlock_irqrestore(&sdsc->lock, flags);
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);

if (aborted) {
pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n");
@@ -5275,21 +5215,18 @@ static bool stop_qc_helper(struct sdebug_defer *sd_dp,
}


-static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx)
+static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd)
{
enum sdeb_defer_type l_defer_t;
- struct sdebug_queued_cmd *sqcp;
struct sdebug_defer *sd_dp;
struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
+ struct sdebug_queued_cmd *sqcp = TO_QEUEUED_CMD(cmnd);

lockdep_assert_held(&sdsc->lock);

- sqcp = TO_QEUEUED_CMD(cmnd);
if (!sqcp)
return false;
sd_dp = &sqcp->sd_dp;
- if (sqa_idx)
- *sqa_idx = sd_dp->sqa_idx;
l_defer_t = READ_ONCE(sd_dp->defer_t);
ASSIGN_QEUEUED_CMD(cmnd, NULL);

@@ -5305,22 +5242,13 @@ static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx)
static bool scsi_debug_abort_cmnd(struct scsi_cmnd *cmnd)
{
struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
- struct sdebug_queue *sqp = get_queue(cmnd);
- unsigned long flags, iflags;
- int k = -1;
+ unsigned long flags;
bool res;

spin_lock_irqsave(&sdsc->lock, flags);
- res = scsi_debug_stop_cmnd(cmnd, &k);
+ res = scsi_debug_stop_cmnd(cmnd);
spin_unlock_irqrestore(&sdsc->lock, flags);

- if (k >= 0) {
- spin_lock_irqsave(&sqp->qc_lock, iflags);
- clear_bit(k, sqp->in_use_bm);
- sqp->qc_arr[k] = NULL;
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
- }
-
return res;
}

@@ -5578,7 +5506,6 @@ struct sdebug_queued_cmd *sdebug_alloc_queued_cmd(struct scsi_cmnd *scmd)
INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);

sqcp->scmd = scmd;
- sd_dp->sqa_idx = -1;

return sqcp;
}
@@ -5597,13 +5524,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
struct request *rq = scsi_cmd_to_rq(cmnd);
bool polled = rq->cmd_flags & REQ_POLLED;
struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
- unsigned long iflags, flags;
+ unsigned long flags;
u64 ns_from_boot = 0;
- struct sdebug_queue *sqp;
struct sdebug_queued_cmd *sqcp;
struct scsi_device *sdp;
struct sdebug_defer *sd_dp;
- int k;

if (unlikely(devip == NULL)) {
if (scsi_result == 0)
@@ -5615,8 +5540,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
if (delta_jiff == 0)
goto respond_in_thread;

- sqp = get_queue(cmnd);
- spin_lock_irqsave(&sqp->qc_lock, iflags);

if (unlikely(sdebug_every_nth && (SDEBUG_OPT_RARE_TSF & sdebug_opts) &&
(scsi_result == 0))) {
@@ -5635,33 +5558,12 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
}
}

- k = find_first_zero_bit(sqp->in_use_bm, sdebug_max_queue);
- if (unlikely(k >= sdebug_max_queue)) {
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
- if (scsi_result)
- goto respond_in_thread;
- scsi_result = device_qfull_result;
- if (SDEBUG_OPT_Q_NOISE & sdebug_opts)
- sdev_printk(KERN_INFO, sdp, "%s: max_queue=%d exceeded: TASK SET FULL\n",
- __func__, sdebug_max_queue);
- goto respond_in_thread;
- }
- set_bit(k, sqp->in_use_bm);
-
sqcp = sdebug_alloc_queued_cmd(cmnd);
if (!sqcp) {
- clear_bit(k, sqp->in_use_bm);
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+ pr_err("%s no alloc\n", __func__);
return SCSI_MLQUEUE_HOST_BUSY;
}
sd_dp = &sqcp->sd_dp;
- sd_dp->sqa_idx = k;
- sqp->qc_arr[k] = sqcp;
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-
- /* Set the hostwide tag */
- if (sdebug_host_max_queue)
- sd_dp->hc_idx = get_tag(cmnd);

if (polled)
ns_from_boot = ktime_get_boottime_ns();
@@ -5708,10 +5610,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
u64 d = ktime_get_boottime_ns() - ns_from_boot;

if (kt <= d) { /* elapsed duration >= kt */
- spin_lock_irqsave(&sqp->qc_lock, iflags);
- sqp->qc_arr[k] = NULL;
- clear_bit(k, sqp->in_use_bm);
- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
/* call scsi_done() from this thread */
sdebug_free_queued_cmd(sqcp);
scsi_done(cmnd);
@@ -5974,8 +5872,7 @@ static int scsi_debug_write_info(struct Scsi_Host *host, char *buffer,
* output are not atomics so might be inaccurate in a busy system. */
static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
{
- int f, j, l;
- struct sdebug_queue *sqp;
+ int j;
struct sdebug_host_info *sdhp;

seq_printf(m, "scsi_debug adapter driver, version %s [%s]\n",
@@ -6003,16 +5900,6 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
atomic_read(&sdebug_a_tsf),
atomic_read(&sdeb_mq_poll_count));

- seq_printf(m, "submit_queues=%d\n", submit_queues);
- for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
- seq_printf(m, " queue %d:\n", j);
- f = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
- if (f != sdebug_max_queue) {
- l = find_last_bit(sqp->in_use_bm, sdebug_max_queue);
- seq_printf(m, " in_use_bm BUSY: %s: %d,%d\n",
- "first,last bits", f, l);
- }
- }

seq_printf(m, "this host_no=%d\n", host->host_no);
if (!xa_empty(per_store_ap)) {
@@ -6428,30 +6315,36 @@ static ssize_t max_queue_show(struct device_driver *ddp, char *buf)
static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
size_t count)
{
- int j, n, k, a;
- struct sdebug_queue *sqp;
+ int n;

if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) &&
- (n <= SDEBUG_CANQUEUE) &&
- (sdebug_host_max_queue == 0)) {
+ (n <= SDEBUG_CANQUEUE)) {
+ struct sdebug_host_info *sdhp;
+
mutex_lock(&sdebug_host_list_mutex);
- block_unblock_all_queues(true);
- k = 0;
- for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
- ++j, ++sqp) {
- a = find_last_bit(sqp->in_use_bm, SDEBUG_CANQUEUE);
- if (a > k)
- k = a;
+ list_for_each_entry(sdhp, &sdebug_host_list, host_list) {
+ struct Scsi_Host *shost = sdhp->shost;
+ struct blk_mq_tag_set *tag_set = &shost->tag_set;
+ struct scsi_device *sdev;
+ int i;
+
+ scsi_block_requests(shost);
+ shost_for_each_device(sdev, shost)
+ scsi_device_quiesce(sdev);
+
+ for (i = 0; i < tag_set->nr_hw_queues; i++) {
+ struct blk_mq_tags *tags = tag_set->tags[i];
+
+ /* yuck, we should not do this */
+ sbitmap_queue_resize(&tags->bitmap_tags, n);
+ }
+
+ shost_for_each_device(sdev, shost)
+ scsi_device_resume(sdev);
+ scsi_unblock_requests(shost);
}
- sdebug_max_queue = n;
- if (k == SDEBUG_CANQUEUE)
- atomic_set(&retired_max_queue, 0);
- else if (k >= n)
- atomic_set(&retired_max_queue, k + 1);
- else
- atomic_set(&retired_max_queue, 0);
- block_unblock_all_queues(false);
mutex_unlock(&sdebug_host_list_mutex);
+ sdebug_max_queue = n;
return count;
}
return -EINVAL;
@@ -6975,13 +6868,6 @@ static int __init scsi_debug_init(void)
sdebug_max_queue);
}

- sdebug_q_arr = kcalloc(submit_queues, sizeof(struct sdebug_queue),
- GFP_KERNEL);
- if (sdebug_q_arr == NULL)
- return -ENOMEM;
- for (k = 0; k < submit_queues; ++k)
- spin_lock_init(&sdebug_q_arr[k].qc_lock);
-
/*
* check for host managed zoned block device specified with
* ptype=0x14 or zbc=XXX.
@@ -6990,10 +6876,8 @@ static int __init scsi_debug_init(void)
sdeb_zbc_model = BLK_ZONED_HM;
} else if (sdeb_zbc_model_s && *sdeb_zbc_model_s) {
k = sdeb_zbc_model_str(sdeb_zbc_model_s);
- if (k < 0) {
- ret = k;
- goto free_q_arr;
- }
+ if (k < 0)
+ return k;
sdeb_zbc_model = k;
switch (sdeb_zbc_model) {
case BLK_ZONED_NONE:
@@ -7005,8 +6889,7 @@ static int __init scsi_debug_init(void)
break;
default:
pr_err("Invalid ZBC model\n");
- ret = -EINVAL;
- goto free_q_arr;
+ return -EINVAL;
}
}
if (sdeb_zbc_model != BLK_ZONED_NONE) {
@@ -7053,17 +6936,14 @@ static int __init scsi_debug_init(void)
sdebug_unmap_granularity <=
sdebug_unmap_alignment) {
pr_err("ERR: unmap_granularity <= unmap_alignment\n");
- ret = -EINVAL;
- goto free_q_arr;
+ return -EINVAL;
}
}
xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
if (want_store) {
idx = sdebug_add_store();
- if (idx < 0) {
- ret = idx;
- goto free_q_arr;
- }
+ if (idx < 0)
+ return idx;
}

pseudo_primary = root_device_register("pseudo_0");
@@ -7120,8 +7000,6 @@ static int __init scsi_debug_init(void)
root_device_unregister(pseudo_primary);
free_vm:
sdebug_erase_store(idx, NULL);
-free_q_arr:
- kfree(sdebug_q_arr);
return ret;
}

@@ -7138,7 +7016,6 @@ static void __exit scsi_debug_exit(void)

sdebug_erase_all_stores(false);
xa_destroy(per_store_ap);
- kfree(sdebug_q_arr);
}

device_initcall(scsi_debug_init);
@@ -7514,11 +7391,8 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
u32 unique_tag = blk_mq_unique_tag(rq);
u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag);
struct sdebug_queued_cmd *sqcp;
- struct sdebug_queue *sqp;
unsigned long flags;
int queue_num = data->queue_num;
- bool retiring = false;
- int qc_idx;
ktime_t time;

/* We're only interested in one queue for this iteration */
@@ -7537,7 +7411,7 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
spin_unlock_irqrestore(&sdsc->lock, flags);
return true;
}
- sqp = sdebug_q_arr + queue_num;
+
sd_dp = &sqcp->sd_dp;


@@ -7552,36 +7426,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
return true;
}

- if (unlikely(atomic_read(&retired_max_queue) > 0))
- retiring = true;
-
- qc_idx = sd_dp->sqa_idx;
- sqp->qc_arr[qc_idx] = NULL;
- if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
- spin_unlock_irqrestore(&sdsc->lock, flags);
- pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u\n",
- sqp, queue_num, qc_idx);
- sdebug_free_queued_cmd(sqcp);
- return true;
- }
-
- if (unlikely(retiring)) { /* user has reduced max_queue */
- int k, retval = atomic_read(&retired_max_queue);
-
- if (qc_idx >= retval) {
- pr_err("index %d too large\n", retval);
- spin_unlock_irqrestore(&sdsc->lock, flags);
- sdebug_free_queued_cmd(sqcp);
- return true;
- }
-
- k = find_last_bit(sqp->in_use_bm, retval);
- if ((k < sdebug_max_queue) || (k == retval))
- atomic_set(&retired_max_queue, 0);
- else
- atomic_set(&retired_max_queue, k + 1);
- }
-
ASSIGN_QEUEUED_CMD(cmd, NULL);
spin_unlock_irqrestore(&sdsc->lock, flags);

@@ -7601,20 +7445,14 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
{
int num_entries = 0;
- unsigned long iflags;
- struct sdebug_queue *sqp;
struct sdebug_blk_mq_poll_data data = {
.queue_num = queue_num,
.num_entries = &num_entries,
};
- sqp = sdebug_q_arr + queue_num;
-
- spin_lock_irqsave(&sqp->qc_lock, iflags);

blk_mq_tagset_busy_iter(&shost->tag_set, sdebug_blk_mq_poll_iter,
&data);

- spin_unlock_irqrestore(&sqp->qc_lock, iflags);
if (num_entries > 0)
atomic_add(num_entries, &sdeb_mq_poll_count);
return num_entries;
--
2.35.3