Re: [RFC PATCH 3/3] nvme: Introduce nvme_execute_passthru_rq_nowait()

From: Sagi Grimberg
Date: Fri Oct 25 2019 - 17:40:27 EST



+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+static void nvme_execute_passthru_rq_work(struct work_struct *w)
+{
+ÂÂÂ struct nvme_request *req = container_of(w, struct nvme_request,
work);
+ÂÂÂ struct request *rq = blk_mq_rq_from_pdu(req);
+ÂÂÂ rq_end_io_fn *done = rq->end_io;
+ÂÂÂ void *end_io_data = rq->end_io_data;

Why is end_io_data stored to a local variable here? where is it set?

blk_execute_rq() (which is called by nvme_execute_rq()) will overwrite
rq->endio and rq->end_io_data. We store them here so we can call the
callback appropriately after the request completes. It would be set by
the caller in the same way they set it if they were calling
blk_execute_rq_nowait().

I see..

+
+ÂÂÂ nvme_execute_passthru_rq(rq);
+
+ÂÂÂ if (done) {
+ÂÂÂÂÂÂÂ rq->end_io_data = end_io_data;
+ÂÂÂÂÂÂÂ done(rq, 0);
+ÂÂÂ }
+}
+
+void nvme_execute_passthru_rq_nowait(struct request *rq, rq_end_io_fn
*done)
+{
+ÂÂÂ struct nvme_command *cmd = nvme_req(rq)->cmd;
+ÂÂÂ struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
+ÂÂÂ struct nvme_ns *ns = rq->q->queuedata;
+ÂÂÂ struct gendisk *disk = ns ? ns->disk : NULL;
+ÂÂÂ u32 effects;
+
+ÂÂÂ /*
+ÂÂÂÂ * This function may be called in interrupt context, so we cannot
sleep
+ÂÂÂÂ * but nvme_passthru_[start|end]() may sleep so we need to execute
+ÂÂÂÂ * the command in a work queue.
+ÂÂÂÂ */
+ÂÂÂ effects = nvme_command_effects(ctrl, ns, cmd->common.opcode);
+ÂÂÂ if (effects) {
+ÂÂÂÂÂÂÂ rq->end_io = done;
+ÂÂÂÂÂÂÂ INIT_WORK(&nvme_req(rq)->work, nvme_execute_passthru_rq_work);
+ÂÂÂÂÂÂÂ queue_work(nvme_wq, &nvme_req(rq)->work);

This work will need to be flushed when in nvme_stop_ctrl. That is
assuming that it will fail-fast and not hang (which it should given
that its a passthru command that is allocated via nvme_alloc_request).

Hmm, that's going to be a bit tricky. Seeing the work_struct belongs
potentially to a number of different requests, we can't just flush the
individual work items. I think we'd have to create a work-queue per ctrl
and flush that. Any objections to that?

I'd object to that overhead...

How about marking the request if the workqueue path is taken and
in nvme_stop_ctrl you add a blk_mq_tagset_busy_iter and cancel
it in the callback?

Something like:
--
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fa7ba09dca77..13dbbec5497d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3955,12 +3955,33 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
}
EXPORT_SYMBOL_GPL(nvme_complete_async_event);

+static bool nvme_flush_async_passthru_request(struct request *rq,
+ void *data, bool reserved)
+{
+ if (!(nvme_req(rq)->flags & NVME_REQ_ASYNC_PASSTHRU))
+ return true;
+
+ dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
+ "Cancelling passthru I/O %d", req->tag);
+ flush_work(&nvme_req(rq)->work);
+ return true;
+}
+
+static void nvme_flush_async_passthru_requests(struct nvme_ctrl *ctrl)
+{
+ blk_mq_tagset_busy_iter(ctrl->tagset,
+ nvme_flush_async_passthru_request, ctrl);
+ blk_mq_tagset_busy_iter(ctrl->admin_tagset,
+ nvme_flush_async_passthru_request, ctrl);
+}
+
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
nvme_mpath_stop(ctrl);
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fw_act_work);
+ nvme_flush_async_passthru_requests(ctrl);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
--