On 6/11/24 17:20, John Meneghini wrote:
From: Thomas Song <tsong@xxxxxxxxxxxxxxx>
+
+ if ((nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)) {
+ result = atomic_dec_if_positive(&ns->ctrl->nr_active);
+ WARN_ON_ONCE(result < 0);
+ }
if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
can we remove result variable ? that is only used once,
how about something like this unless there is something wrong with
totally untested :-
+static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head)
{
- struct nvme_ns *ns, *found = NULL;
+ struct nvme_ns *ns, *old, *found = NULL;
+ int node = numa_node_id();
+
+ old = srcu_dereference(head->current_path[node], &head->srcu);
+
nit:- no need for white-line above ?
+inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
+{
+ switch (READ_ONCE(head->subsys->iopolicy)) {
+ case NVME_IOPOLICY_QD:
+ return nvme_queue_depth_path(head);
+ case NVME_IOPOLICY_RR:
+ return nvme_round_robin_path(head);
+ default:
+ return nvme_numa_path(head);
+ }
should we use another case for NVME_IOPOLICY_NUMA that will call
nvme_numa_path() and report ratelimited error on the default lable
before settling on nvme_numa_path()?
something like this totally untested :-
+}
+
static bool nvme_available_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns;
@@ -803,6 +870,28 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
}
+static void nvme_subsys_iopolicy_update(struct nvme_subsystem *subsys,
+ int iopolicy)
+{
+ struct nvme_ctrl *ctrl;
+ int old_iopolicy = READ_ONCE(subsys->iopolicy);
+
+ if (old_iopolicy == iopolicy)
+ return;
+
+ WRITE_ONCE(subsys->iopolicy, iopolicy);
+
+ /* iopolicy changes clear the mpath by design */
+ mutex_lock(&nvme_subsystems_lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ nvme_mpath_clear_ctrl_paths(ctrl);
+ mutex_unlock(&nvme_subsystems_lock);
+
+ pr_notice("%s: changed from %s to %s for subsysnqn %s\n", __func__,
+ nvme_iopolicy_names[old_iopolicy], nvme_iopolicy_names[iopolicy],
+ subsys->subnqn);
+}
+
static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
@@ -812,7 +901,7 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
- WRITE_ONCE(subsys->iopolicy, i);
+ nvme_subsys_iopolicy_update(subsys, i);
return count;
}
}
@@ -923,6 +1012,9 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
!(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
return 0;
+ /* initialize this in the identify path to cover controller resets */
nit: If I'm not wrong, this function gets called from
|nvme_init_identify()|,
so it's pretty clear. That makes above comment kind of redundant ?
However, if others want that comment here, please ignore this message.
+ atomic_set(&ctrl->nr_active, 0);
+
if (!ctrl->max_namespaces ||
ctrl->max_namespaces > le32_to_cpu(id->nn)) {
dev_err(ctrl->device,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 73442d3f504b..d6c1fe3e2832 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -50,6 +50,8 @@ extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;that extern struct workqueue_struct *nvme_delete_wq;
+extern struct mutex nvme_subsystems_lock;
+
/*
* List of workarounds for devices that required behavior not specified in
* the standard.
@@ -195,6 +197,7 @@ enum {
NVME_REQ_CANCELLED = (1 << 0),
NVME_REQ_USERCMD = (1 << 1),
NVME_MPATH_IO_STATS = (1 << 2),
+ NVME_MPATH_CNT_ACTIVE = (1 << 3),
nit:- please align above to existing code ...