[PATCH 2/3] block: nvme: use map_changed to set irq affinity hint

From: Ming Lei
Date: Mon Sep 28 2015 - 23:21:24 EST


This patch uses the .map_changed callback to set irq affinity
hint, then the irq affinity can be updated when CPU topo
is changed.

Signed-off-by: Ming Lei <tom.leiming@xxxxxxxxx>
---
drivers/block/nvme-core.c | 53 ++++++++++++++++++++++++++++++-----------------
1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index b97fc3f..cac16a6f 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -105,6 +105,8 @@ struct nvme_queue {
struct device *q_dmadev;
struct nvme_dev *dev;
char irqname[24]; /* nvme4294967295-65535\0 */
+ unsigned long mapped:1;
+ unsigned long irq_affinity_set:1;
spinlock_t q_lock;
struct nvme_command *sq_cmds;
struct nvme_command __iomem *sq_cmds_io;
@@ -232,6 +234,37 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
return 0;
}

+/*
+ * Since namespaces shared tagset and the 'hctx' with same
+ * index shared one same nvme queue & tag, also the mapping
+ * between sw queue and hw queue is global and only depends
+ * on CPUs topo, this callback only sets irq affinity once
+ * by using the cpumask from one of hctx.
+ * */
+static void nvme_map_changed(struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx, bool mapped)
+{
+ struct nvme_queue *nvmeq = hctx->driver_data;
+ struct nvme_dev *dev = nvmeq->dev;
+ unsigned int irq;
+
+ if (nvmeq->mapped != mapped)
+ nvmeq->irq_affinity_set = 0;
+
+ nvmeq->mapped = mapped;
+
+ if (nvmeq->irq_affinity_set)
+ return;
+
+ irq = dev->entry[nvmeq->cq_vector].vector;
+ if (mapped)
+ irq_set_affinity_hint(irq, hctx->cpumask);
+ else
+ irq_set_affinity_hint(irq, NULL);
+
+ nvmeq->irq_affinity_set = 1;
+}
+
static int nvme_init_request(void *data, struct request *req,
unsigned int hctx_idx, unsigned int rq_idx,
unsigned int numa_node)
@@ -1664,6 +1697,7 @@ static struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_init_hctx,
+ .map_changed = nvme_map_changed,
.init_request = nvme_init_request,
.timeout = nvme_timeout,
};
@@ -2953,22 +2987,6 @@ static const struct file_operations nvme_dev_fops = {
.compat_ioctl = nvme_dev_ioctl,
};

-static void nvme_set_irq_hints(struct nvme_dev *dev)
-{
- struct nvme_queue *nvmeq;
- int i;
-
- for (i = 0; i < dev->online_queues; i++) {
- nvmeq = dev->queues[i];
-
- if (!nvmeq->tags || !(*nvmeq->tags))
- continue;
-
- irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- blk_mq_tags_cpumask(*nvmeq->tags));
- }
-}
-
static int nvme_dev_start(struct nvme_dev *dev)
{
int result;
@@ -3010,8 +3028,6 @@ static int nvme_dev_start(struct nvme_dev *dev)
if (result)
goto free_tags;

- nvme_set_irq_hints(dev);
-
dev->event_limit = 1;
return result;

@@ -3062,7 +3078,6 @@ static int nvme_dev_resume(struct nvme_dev *dev)
} else {
nvme_unfreeze_queues(dev);
nvme_dev_add(dev);
- nvme_set_irq_hints(dev);
}
return 0;
}
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/