Re: [PATCH 3/4] blk-mq: establish new mapping before cpu starts handling requests

From: Ming Lei
Date: Wed Jun 24 2015 - 05:46:15 EST


On Sun, Jun 21, 2015 at 9:52 PM, Akinobu Mita <akinobu.mita@xxxxxxxxx> wrote:
> ctx->index_hw is zero for the CPUs which have never been onlined since
> the block queue was initialized. If one of those CPUs is hotadded and
> starts handling request before new mappings are established, pending

Could you explain a bit what the handling request is? The fact is that
blk_mq_queue_reinit() is run after all queues are put into freezing.

> bitmap is not correctly marked for inserted requests as ctx->index_hw
> is still zero. So flush_busy_ctxs can't find the requests on the
> software queue.
>
> Fix it by establishing new mapping before hotadded cpu starts handling
> requests.
>
> Signed-off-by: Akinobu Mita <akinobu.mita@xxxxxxxxx>
> Cc: Jens Axboe <axboe@xxxxxxxxx>
> ---
> block/blk-mq-cpumap.c | 9 +++++----
> block/blk-mq.c | 41 ++++++++++++++++++++++++++---------------
> block/blk-mq.h | 3 ++-
> 3 files changed, 33 insertions(+), 20 deletions(-)
>
> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> index 5f13f4d..99f65c0 100644
> --- a/block/blk-mq-cpumap.c
> +++ b/block/blk-mq-cpumap.c
> @@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu)
> return cpu;
> }
>
> -int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
> +int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
> + const struct cpumask *online_mask)
> {
> unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
> cpumask_var_t cpus;
> @@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
>
> cpumask_clear(cpus);
> nr_cpus = nr_uniq_cpus = 0;
> - for_each_online_cpu(i) {
> + for_each_cpu(i, online_mask) {
> nr_cpus++;
> first_sibling = get_first_sibling(i);
> if (!cpumask_test_cpu(first_sibling, cpus))
> @@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
>
> queue = 0;
> for_each_possible_cpu(i) {
> - if (!cpu_online(i)) {
> + if (!cpumask_test_cpu(i, online_mask)) {
> map[i] = 0;
> continue;
> }
> @@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
> if (!map)
> return NULL;
>
> - if (!blk_mq_update_queue_map(map, set->nr_hw_queues))
> + if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask))
> return map;
>
> kfree(map);
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 6487710..64d93e4 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1771,7 +1771,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
> }
> }
>
> -static void blk_mq_map_swqueue(struct request_queue *q)
> +static void blk_mq_map_swqueue(struct request_queue *q,
> + const struct cpumask *online_mask)
> {
> unsigned int i;
> struct blk_mq_hw_ctx *hctx;
> @@ -1788,7 +1789,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
> */
> queue_for_each_ctx(q, ctx, i) {
> /* If the cpu isn't online, the cpu is mapped to first hctx */
> - if (!cpu_online(i))
> + if (!cpumask_test_cpu(i, online_mask))
> continue;
>
> hctx = q->mq_ops->map_queue(q, i);
> @@ -2014,7 +2015,10 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
>
> blk_mq_add_queue_tag_set(set, q);
>
> - blk_mq_map_swqueue(q);
> + get_online_cpus();
> + blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, cpu_online_mask);
> + blk_mq_map_swqueue(q, cpu_online_mask);
> + put_online_cpus();
>
> return q;
>
> @@ -2055,13 +2059,14 @@ void blk_mq_free_queue(struct request_queue *q)
> }
>
> /* Basically redo blk_mq_init_queue with queue frozen */
> -static void blk_mq_queue_reinit(struct request_queue *q)
> +static void blk_mq_queue_reinit(struct request_queue *q,
> + const struct cpumask *online_mask)
> {
> WARN_ON_ONCE(!q->mq_freeze_depth);
>
> blk_mq_sysfs_unregister(q);
>
> - blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues);
> + blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
>
> /*
> * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
> @@ -2069,25 +2074,31 @@ static void blk_mq_queue_reinit(struct request_queue *q)
> * involves free and re-allocate memory, worthy doing?)
> */
>
> - blk_mq_map_swqueue(q);
> + blk_mq_map_swqueue(q, online_mask);
>
> blk_mq_sysfs_register(q);
> }
>
> +static struct cpumask online_new;
> +
> static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
> unsigned long action, void *hcpu)
> {
> struct request_queue *q;
> + int cpu = (unsigned long)hcpu;
>
> - /*
> - * Before new mappings are established, hotadded cpu might already
> - * start handling requests. This doesn't break anything as we map
> - * offline CPUs to first hardware queue. We will re-init the queue
> - * below to get optimal settings.
> - */
> - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN &&
> - action != CPU_ONLINE && action != CPU_ONLINE_FROZEN)
> + switch (action & ~CPU_TASKS_FROZEN) {
> + case CPU_DEAD:
> + case CPU_UP_CANCELED:
> + cpumask_copy(&online_new, cpu_online_mask);
> + break;
> + case CPU_UP_PREPARE:
> + cpumask_copy(&online_new, cpu_online_mask);
> + cpumask_set_cpu(cpu, &online_new);
> + break;
> + default:
> return NOTIFY_OK;
> + }
>
> mutex_lock(&all_q_mutex);
>
> @@ -2111,7 +2122,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
> }
>
> list_for_each_entry(q, &all_q_list, all_q_node)
> - blk_mq_queue_reinit(q);
> + blk_mq_queue_reinit(q, &online_new);
>
> list_for_each_entry(q, &all_q_list, all_q_node)
> blk_mq_unfreeze_queue(q);
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 6a48c4c..f4fea79 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void);
> * CPU -> queue mappings
> */
> extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
> -extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
> +extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
> + const struct cpumask *online_mask);
> extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
>
> /*
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> Please read the FAQ at http://www.tux.org/lkml/



--
Ming Lei
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/