[PATCH RFC 20/21] blk-mq: Allow combined hardware queues
From: Alexander Gordeev
Date: Fri Sep 16 2016 - 04:53:33 EST
This is 4th and last step change in a bid to enable mapping
of multiple device hardware queues to a single CPU.
Available hardware queues are evenly distributed to CPUs.
Still, there might some number of queues left spared, but no
more than (number of queues) % (number of CPUs) in the worst
case.
CC: Jens Axboe <axboe@xxxxxxxxx>
CC: linux-nvme@xxxxxxxxxxxxxxxxxxx
Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxx>
---
block/blk-mq-cpumap.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
block/blk-mq.c | 14 +-------------
block/blk-mq.h | 2 ++
3 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index ee553a4..0b49f30 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/cpu.h>
+#include <linux/crash_dump.h>
#include <linux/blk-mq.h>
#include "blk.h"
@@ -86,6 +87,49 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
return 0;
}
+void blk_mq_adjust_tag_set(struct blk_mq_tag_set *set,
+ const struct cpumask *online_mask)
+{
+ unsigned int nr_cpus, nr_uniq_cpus, first_sibling;
+ cpumask_var_t cpus;
+ int i;
+
+ /*
+ * If a crashdump is active, then we are potentially in a very
+ * memory constrained environment. Limit us to 1 queue.
+ */
+ if (is_kdump_kernel())
+ goto default_map;
+
+ if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
+ goto default_map;
+
+ cpumask_clear(cpus);
+ nr_cpus = nr_uniq_cpus = 0;
+
+ for_each_cpu(i, online_mask) {
+ nr_cpus++;
+ first_sibling = get_first_sibling(i);
+ if (!cpumask_test_cpu(first_sibling, cpus))
+ nr_uniq_cpus++;
+ cpumask_set_cpu(i, cpus);
+ }
+
+ free_cpumask_var(cpus);
+
+ if (set->nr_hw_queues < nr_uniq_cpus) {
+default_map:
+ set->nr_co_queues = set->nr_hw_queues;
+ set->co_queue_size = 1;
+ } else if (set->nr_hw_queues < nr_cpus) {
+ set->nr_co_queues = nr_uniq_cpus;
+ set->co_queue_size = set->nr_hw_queues / nr_uniq_cpus;
+ } else {
+ set->nr_co_queues = nr_cpus;
+ set->co_queue_size = set->nr_hw_queues / nr_cpus;
+ }
+}
+
/*
* We have no quick way of doing reverse lookups. This is only used at
* queue init time, so runtime isn't important.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 450a3ed..ee05ea9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -21,7 +21,6 @@
#include <linux/cache.h>
#include <linux/sched/sysctl.h>
#include <linux/delay.h>
-#include <linux/crash_dump.h>
#include <trace/events/block.h>
@@ -2286,24 +2285,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
* TODO Restore original queue depth and count limits
*/
- /*
- * If a crashdump is active, then we are potentially in a very
- * memory constrained environment. Limit us to 1 queue.
- */
- set->nr_co_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
- set->co_queue_size = 1;
+ blk_mq_adjust_tag_set(set, cpu_online_mask);
if (queue_depth(set) < set->reserved_tags + BLK_MQ_TAG_MIN)
return -EINVAL;
if (queue_depth(set) > BLK_MQ_MAX_DEPTH)
return -EINVAL;
- /*
- * There is no use for more h/w queues than cpus.
- */
- if (set->nr_co_queues > nr_cpu_ids)
- set->nr_co_queues = nr_cpu_ids;
-
set->tags = kzalloc_node(set->nr_co_queues * sizeof(*set->tags),
GFP_KERNEL, set->numa_node);
if (!set->tags)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 592e308..70704f7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -49,6 +49,8 @@ void blk_mq_disable_hotplug(void);
*/
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
const struct cpumask *online_mask);
+extern void blk_mq_adjust_tag_set(struct blk_mq_tag_set *set,
+ const struct cpumask *online_mask);
extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
/*
--
1.8.3.1