[PATCH v1 2/2] blk-mq: fix CPU hotplug handling
From: Ming Lei
Date: Sun Apr 19 2015 - 11:32:46 EST
Firstly the hctx->tags have to be set as NULL if it is to be disabled
no matter if set->tags[i] is NULL or not in blk_mq_map_swqueue() because
shared tags can be freed already from another request queue.
The same situation has to be considered in blk_mq_hctx_cpu_online() too.
Finally one unmapped hw queue can be remapped after CPU topo is changed,
we need to allocate tags for the hw queue in blk_mq_map_swqueue() too.
Then tags allocation for hw queue can be removed in hctx cpu online
notifier, and it is reasonable to do that after remapping is done.
Cc: <stable@xxxxxxxxxxxxxxx>
Reported-by: Dongsu Park <dongsu.park@xxxxxxxxxxxxxxxx>
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx>
---
block/blk-mq.c | 34 +++++++++++++---------------------
1 file changed, 13 insertions(+), 21 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1277f70..a0ae38a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1574,22 +1574,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
return NOTIFY_OK;
}
-static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
-{
- struct request_queue *q = hctx->queue;
- struct blk_mq_tag_set *set = q->tag_set;
-
- if (set->tags[hctx->queue_num])
- return NOTIFY_OK;
-
- set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
- if (!set->tags[hctx->queue_num])
- return NOTIFY_STOP;
-
- hctx->tags = set->tags[hctx->queue_num];
- return NOTIFY_OK;
-}
-
static int blk_mq_hctx_notify(void *data, unsigned long action,
unsigned int cpu)
{
@@ -1597,8 +1581,11 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
return blk_mq_hctx_cpu_offline(hctx, cpu);
- else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- return blk_mq_hctx_cpu_online(hctx, cpu);
+
+ /*
+ * In case of CPU online, tags will be reallocated
+ * after new mapping is done in blk_mq_map_swqueue().
+ */
return NOTIFY_OK;
}
@@ -1778,6 +1765,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
unsigned int i;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
+ struct blk_mq_tag_set *set = q->tag_set;
queue_for_each_hw_ctx(q, hctx, i) {
cpumask_clear(hctx->cpumask);
@@ -1806,16 +1794,20 @@ static void blk_mq_map_swqueue(struct request_queue *q)
* disable it and free the request entries.
*/
if (!hctx->nr_ctx) {
- struct blk_mq_tag_set *set = q->tag_set;
-
if (set->tags[i]) {
blk_mq_free_rq_map(set, set->tags[i], i);
set->tags[i] = NULL;
- hctx->tags = NULL;
}
+ hctx->tags = NULL;
continue;
}
+ /* unmapped hw queue can be remapped after CPU topo changed */
+ if (!set->tags[i])
+ set->tags[i] = blk_mq_init_rq_map(set, hctx->queue_num);
+ hctx->tags = set->tags[i];
+ WARN_ON(!hctx->tags);
+
/*
* Set the map size to the number of mapped software queues.
* This is more accurate and more efficient than looping
--
1.7.9.5
> Dongsu
>
> > I can not reproduce it in my VM.
> > One interesting point is that the oops always happened
> > on CPU3 in your tests, looks like the mapping is broken
> > for CPU3's ctx in case of CPU 1 offline?
> >
> > > Cheers,
> > > Dongsu
> > >
> > > ---- [beginning of call traces] ----
> > > [ 22.942214] smpboot: CPU 1 is now offline
> > > [ 30.686284] random: nonblocking pool is initialized
> > > [ 39.857305] fuse init (API version 7.23)
> > > [ 40.563853] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
> > > [ 40.564005] IP: [<ffffffff813b905d>] __bt_get.isra.5+0x7d/0x1e0
> > > [ 40.564005] PGD 7a363067 PUD 7cadc067 PMD 0
> > > [ 40.564005] Oops: 0000 [#1] SMP
> > > [ 40.564005] Modules linked in: fuse cpufreq_stats binfmt_misc 9p fscache dm_round_robin dm_multipath loop r
> > > tc_cmos 9pnet_virtio 9pnet serio_raw acpi_cpufreq i2c_piix4 virtio_net
> > > [ 40.564005] CPU: 3 PID: 6349 Comm: grub-mount Not tainted 4.0.0+ #320
> > > [ 40.564005] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014
> > > [ 40.564005] task: ffff880079011560 ti: ffff88007a1c8000 task.ti: ffff88007a1c8000
> > > [ 40.564005] RIP: 0010:[<ffffffff813b905d>] [<ffffffff813b905d>] __bt_get.isra.5+0x7d/0x1e0
> > > [ 40.564005] RSP: 0018:ffff88007a1cb838 EFLAGS: 00010246
> > > [ 40.564005] RAX: 0000000000000075 RBX: ffff88007913c400 RCX: 0000000000000078
> > > [ 40.564005] RDX: ffff88007fddbb80 RSI: 0000000000000010 RDI: ffff88007913c400
> > > [ 40.564005] RBP: ffff88007a1cb888 R08: ffff88007fddbb80 R09: 0000000000000001
> > > [ 40.564005] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000010
> > > [ 40.564005] R13: 0000000000000010 R14: ffff88007a1cb988 R15: ffff88007fddbb80
> > > [ 40.564005] FS: 00002b7c8b6807c0(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
> > > [ 40.564005] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [ 40.564005] CR2: 0000000000000018 CR3: 0000000079b0b000 CR4: 00000000001407e0
> > > [ 40.564005] Stack:
> > > [ 40.564005] ffff88007a1cb918 ffff88007fdd58c0 0000000000000078 ffffffff813b5d28
> > > [ 40.564005] ffff88007a1cb878 ffff88007913c400 0000000000000010 0000000000000010
> > > [ 40.564005] ffff88007a1cb988 ffff88007fddbb80 ffff88007a1cb908 ffffffff813b9225
> > > [ 40.564005] Call Trace:
> > > [ 40.564005] [<ffffffff813b5d28>] ? blk_mq_queue_enter+0x98/0x2b0
> > > [ 40.564005] [<ffffffff813b9225>] bt_get+0x65/0x1d0
> > > [ 40.564005] [<ffffffff813b5d28>] ? blk_mq_queue_enter+0x98/0x2b0
> > > [ 40.564005] [<ffffffff810c13e0>] ? wait_woken+0x90/0x90
> > > [ 40.564005] [<ffffffff813b9737>] blk_mq_get_tag+0xa7/0xd0
> > > [ 40.564005] [<ffffffff810acbe8>] ? sched_clock_cpu+0x88/0xb0
> > > [ 40.564005] [<ffffffff813b4c5b>] __blk_mq_alloc_request+0x1b/0x1f0
> > > [ 40.564005] [<ffffffff813b6a41>] blk_mq_map_request+0xb1/0x200
> > > [ 40.564005] [<ffffffff813b828e>] blk_mq_make_request+0x6e/0x2c0
> > > [ 40.564005] [<ffffffff813a871f>] ? generic_make_request_checks+0x1ff/0x3d0
> > > [ 40.564005] [<ffffffff813a1f3e>] ? bio_add_page+0x5e/0x70
> > > [ 40.564005] [<ffffffff813a89b0>] generic_make_request+0xc0/0x110
> > > [ 40.564005] [<ffffffff813a8a68>] submit_bio+0x68/0x150
> > > [ 40.564005] [<ffffffff8119a3cc>] ? lru_cache_add+0x1c/0x50
> > > [ 40.564005] [<ffffffff8123beca>] mpage_bio_submit+0x2a/0x40
> > > [ 40.564005] [<ffffffff8123cfbc>] mpage_readpages+0x10c/0x130
> > > [ 40.564005] [<ffffffff81235d90>] ? I_BDEV+0x10/0x10
> > > [ 40.564005] [<ffffffff81235d90>] ? I_BDEV+0x10/0x10
> > > [ 40.564005] [<ffffffff81189347>] ? __page_cache_alloc+0x137/0x160
> > > [ 40.564005] [<ffffffff812365bd>] blkdev_readpages+0x1d/0x20
> > > [ 40.564005] [<ffffffff81197eef>] __do_page_cache_readahead+0x28f/0x310
> > > [ 40.564005] [<ffffffff81197dbe>] ? __do_page_cache_readahead+0x15e/0x310
> > > [ 40.564005] [<ffffffff81198052>] ondemand_readahead+0xe2/0x460
> > > [ 40.564005] [<ffffffff8118a70d>] ? pagecache_get_page+0x2d/0x1b0
> > > [ 40.564005] [<ffffffff81198501>] page_cache_sync_readahead+0x31/0x50
> > > [ 40.564005] [<ffffffff8118b6bc>] generic_file_read_iter+0x4ec/0x600
> > > [ 40.564005] [<ffffffff81236967>] blkdev_read_iter+0x37/0x40
> > > [ 40.564005] [<ffffffff811f69ee>] new_sync_read+0x7e/0xb0
> > > [ 40.564005] [<ffffffff811f7c68>] __vfs_read+0x18/0x50
> > > [ 40.564005] [<ffffffff811f7d2d>] vfs_read+0x8d/0x150
> > > [ 40.564005] [<ffffffff811f7e39>] SyS_read+0x49/0xb0
> > > [ 40.564005] [<ffffffff816a50f2>] system_call_fastpath+0x12/0x17
> > > [ 40.564005] Code: 97 18 03 00 00 bf 04 00 00 00 41 f7 f1 83 f8 04 0f 43 f8 b8 ff ff ff ff 44 39 d7 0f 86 c1 00 00 00 41 8b 00 48 89 4d c0 49 89 f5 <8b> 4e 08 8b 56 0c 4c 89 45 b0 c7 45 c8 00 00 00 00 41 89 c4 89
> > > [ 40.564005] RIP [<ffffffff813b905d>] __bt_get.isra.5+0x7d/0x1e0
> > > [ 40.564005] RSP <ffff88007a1cb838>
> > > [ 40.564005] CR2: 0000000000000018
> > > [ 40.686846] ---[ end trace 32b76e93ea582fae ]---
> > > [ 40.688354] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
> > > [ 40.689123] IP: [<ffffffff813b905d>] __bt_get.isra.5+0x7d/0x1e0
> > > [ 40.689123] PGD 0
> > > [ 40.689123] Oops: 0000 [#2] SMP
> > > [ 40.689123] Modules linked in: fuse cpufreq_stats binfmt_misc 9p fscache dm_round_robin dm_multipath loop rtc_cmos 9pnet_virtio 9pnet serio_raw acpi_cpufreq i2c_piix4 virtio_net
> > > [ 40.689123] CPU: 3 PID: 559 Comm: kworker/3:2 Tainted: G D 4.0.0+ #320
> > > [ 40.689123] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014
> > > [ 40.689123] Workqueue: events_freezable_power_ disk_events_workfn
> > > [ 40.689123] task: ffff88007a17d580 ti: ffff88007caa4000 task.ti: ffff88007caa4000
> > > [ 40.689123] RIP: 0010:[<ffffffff813b905d>] [<ffffffff813b905d>] __bt_get.isra.5+0x7d/0x1e0
> > > [ 40.689123] RSP: 0018:ffff88007caa7958 EFLAGS: 00010246
> > > [ 40.689123] RAX: 0000000000000075 RBX: ffff88007913c400 RCX: 0000000000000078
> > > [ 40.689123] RDX: ffff88007fddbb80 RSI: 0000000000000010 RDI: ffff88007913c400
> > > [ 40.689123] RBP: ffff88007caa79a8 R08: ffff88007fddbb80 R09: 0000000000000000
> > > [ 40.689123] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> > > [ 40.689123] R13: 0000000000000010 R14: ffff88007caa7ab8 R15: ffff88007fddbb80
> > > [ 40.689123] FS: 0000000000000000(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
> > > [ 40.689123] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [ 40.689123] CR2: 0000000000000018 CR3: 0000000001c0b000 CR4: 00000000001407e0
> > > [ 40.689123] Stack:
> > > [ 40.689123] ffffffff810c5ea5 0000000000000292 0000000000000078 0000000000000002
> > > [ 40.689123] 0000000000000000 ffff88007913c400 0000000000000000 0000000000000010
> > > [ 40.689123] ffff88007caa7ab8 ffff88007fddbb80 ffff88007caa7a28 ffffffff813b9225
> > > [ 40.689123] Call Trace:
> > > [ 40.689123] [<ffffffff810c5ea5>] ? cpuacct_charge+0x5/0x1b0
> > > [ 40.689123] [<ffffffff813b9225>] bt_get+0x65/0x1d0
> > > [ 40.689123] [<ffffffff810c13e0>] ? wait_woken+0x90/0x90
> > > [ 40.689123] [<ffffffff813b9737>] blk_mq_get_tag+0xa7/0xd0
> > > [ 40.689123] [<ffffffff813b4c5b>] __blk_mq_alloc_request+0x1b/0x1f0
> > > [ 40.689123] [<ffffffff813b67aa>] blk_mq_alloc_request+0x9a/0x230
> > > [ 40.689123] [<ffffffff813ab82c>] blk_get_request+0x2c/0xf0
> > > [ 40.689123] [<ffffffff81519dcd>] scsi_execute+0x3d/0x1f0
> > > [ 40.689123] [<ffffffff8151c3de>] scsi_execute_req_flags+0x8e/0x100
> > > [ 40.689123] [<ffffffff810c5ea5>] ? cpuacct_charge+0x5/0x1b0
> > > [ 40.689123] [<ffffffff8151ca63>] scsi_test_unit_ready+0x83/0x130
> > > [ 40.689123] [<ffffffff8152b7ae>] sd_check_events+0x14e/0x1b0
> > > [ 40.689123] [<ffffffff813bc461>] disk_check_events+0x51/0x170
> > > [ 40.689123] [<ffffffff813bc59c>] disk_events_workfn+0x1c/0x20
> > > [ 40.689123] [<ffffffff810937c9>] process_one_work+0x1c9/0x500
> > > [ 40.689123] [<ffffffff8109375d>] ? process_one_work+0x15d/0x500
> > > [ 40.689123] [<ffffffff81093ea7>] ? worker_thread+0xc7/0x460
> > > [ 40.689123] [<ffffffff81093e2b>] worker_thread+0x4b/0x460
> > > [ 40.689123] [<ffffffff81093de0>] ? rescuer_thread+0x2e0/0x2e0
> > > [ 40.689123] [<ffffffff81093de0>] ? rescuer_thread+0x2e0/0x2e0
> > > [ 40.689123] [<ffffffff81099697>] kthread+0xe7/0x100
> > > [ 40.689123] [<ffffffff810cb3ed>] ? trace_hardirqs_on+0xd/0x10
> > > [ 40.689123] [<ffffffff810995b0>] ? kthread_create_on_node+0x230/0x230
> > > [ 40.689123] [<ffffffff816a5048>] ret_from_fork+0x58/0x90
> > > [ 40.689123] [<ffffffff810995b0>] ? kthread_create_on_node+0x230/0x230
> > > [ 40.689123] Code: 97 18 03 00 00 bf 04 00 00 00 41 f7 f1 83 f8 04 0f 43 f8 b8 ff ff ff ff 44 39 d7 0f 86 c1 00 00 00 41 8b 00 48 89 4d c0 49 89 f5 <8b> 4e 08 8b 56 0c 4c 89 45 b0 c7 45 c8 00 00 00 00 41 89 c4 89
> > > [ 40.689123] RIP [<ffffffff813b905d>] __bt_get.isra.5+0x7d/0x1e0
> > > [ 40.689123] RSP <ffff88007caa7958>
> > > [ 40.689123] CR2: 0000000000000018
> > > [ 40.689123] ---[ end trace 32b76e93ea582faf ]---
> > > [ 40.844044] BUG: unable to handle kernel paging request at ffffffffffffff98
> > > [ 40.845007] IP: [<ffffffff81099de0>] kthread_data+0x10/0x20
> > > [ 40.845007] PGD 1c0c067 PUD 1c0e067 PMD 0
> > > [ 40.845007] Oops: 0000 [#3] SMP
> > > [ 40.845007] Modules linked in: fuse cpufreq_stats binfmt_misc 9p fscache dm_round_robin dm_multipath loop rtc_cmos 9pnet_virtio 9pnet serio_raw acpi_cpufreq i2c_piix4 virtio_net
> > > [ 40.845007] CPU: 3 PID: 559 Comm: kworker/3:2 Tainted: G D 4.0.0+ #320
> > > [ 40.845007] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014
> > > [ 40.845007] task: ffff88007a17d580 ti: ffff88007caa4000 task.ti: ffff88007caa4000
> > > [ 40.845007] RIP: 0010:[<ffffffff81099de0>] [<ffffffff81099de0>] kthread_data+0x10/0x20
> > > [ 40.845007] RSP: 0018:ffff88007caa75e8 EFLAGS: 00010092
> > > [ 40.845007] RAX: 0000000000000000 RBX: 0000000000000003 RCX: 000000000000000f
> > > [ 40.845007] RDX: 000000000000000f RSI: 0000000000000003 RDI: ffff88007a17d580
> > > [ 40.845007] RBP: ffff88007caa75e8 R08: ffff88007a17d610 R09: 0000000000000000
> > > [ 40.845007] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88007fdd4dc0
> > > [ 40.845007] R13: ffff88007a17d580 R14: 0000000000000003 R15: 0000000000000000
> > > [ 40.845007] FS: 0000000000000000(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
> > > [ 40.845007] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [ 40.845007] CR2: 0000000000000028 CR3: 0000000001c0b000 CR4: 00000000001407e0
> > > [ 40.845007] Stack:
> > > [ 40.845007] ffff88007caa7608 ffffffff81094495 ffff88007caa7608 00000000001d4dc0
> > > [ 40.845007] ffff88007caa7678 ffffffff8169ef42 ffffffff81079dde 0000000000000292
> > > [ 40.845007] ffffffff81079e12 ffff88007a17d580 ffff88007caa7678 0000000000000296
> > > [ 40.845007] Call Trace:
> > > [ 40.845007] [<ffffffff81094495>] wq_worker_sleeping+0x15/0xa0
> > > [ 40.845007] [<ffffffff8169ef42>] __schedule+0x932/0xc20
> > > [ 40.845007] [<ffffffff81079dde>] ? do_exit+0x6ee/0xb10
> > > [ 40.845007] [<ffffffff81079e12>] ? do_exit+0x722/0xb10
> > > [ 40.845007] [<ffffffff8169f267>] schedule+0x37/0x90
> > > [ 40.845007] [<ffffffff81079ee6>] do_exit+0x7f6/0xb10
> > > [ 40.845007] [<ffffffff810e17ee>] ? kmsg_dump+0xee/0x1f0
> > > [ 40.845007] [<ffffffff810064fd>] oops_end+0x8d/0xd0
> > > [ 40.845007] [<ffffffff810457d9>] no_context+0x119/0x370
> > > [ 40.845007] [<ffffffff810ac9b5>] ? sched_clock_local+0x25/0x90
> > > [ 40.845007] [<ffffffff81045ab5>] __bad_area_nosemaphore+0x85/0x210
> > > [ 40.845007] [<ffffffff81045c53>] bad_area_nosemaphore+0x13/0x20
> > > [ 40.845007] [<ffffffff81045f3e>] __do_page_fault+0xae/0x460
> > > [ 40.845007] [<ffffffff810462fc>] do_page_fault+0xc/0x10
> > > [ 40.845007] [<ffffffff816a6e02>] page_fault+0x22/0x30
> > > [ 40.845007] [<ffffffff813b905d>] ? __bt_get.isra.5+0x7d/0x1e0
> > > [ 40.845007] [<ffffffff810c826e>] ? __lock_is_held+0x5e/0x90
> > > [ 40.845007] [<ffffffff810c5ea5>] ? cpuacct_charge+0x5/0x1b0
> > > [ 40.845007] [<ffffffff813b9225>] bt_get+0x65/0x1d0
> > > [ 40.845007] [<ffffffff810c13e0>] ? wait_woken+0x90/0x90
> > > [ 40.845007] [<ffffffff813b9737>] blk_mq_get_tag+0xa7/0xd0
> > > [ 40.845007] [<ffffffff813b4c5b>] __blk_mq_alloc_request+0x1b/0x1f0
> > > [ 40.845007] [<ffffffff813b67aa>] blk_mq_alloc_request+0x9a/0x230
> > > [ 40.845007] [<ffffffff813ab82c>] blk_get_request+0x2c/0xf0
> > > [ 40.845007] [<ffffffff81519dcd>] scsi_execute+0x3d/0x1f0
> > > [ 40.845007] [<ffffffff8151c3de>] scsi_execute_req_flags+0x8e/0x100
> > > [ 40.845007] [<ffffffff810c5ea5>] ? cpuacct_charge+0x5/0x1b0
> > > [ 40.845007] [<ffffffff8151ca63>] scsi_test_unit_ready+0x83/0x130
> > > [ 40.845007] [<ffffffff8152b7ae>] sd_check_events+0x14e/0x1b0
> > > [ 40.845007] [<ffffffff813bc461>] disk_check_events+0x51/0x170
> > > [ 40.845007] [<ffffffff813bc59c>] disk_events_workfn+0x1c/0x20
> > > [ 40.845007] [<ffffffff810937c9>] process_one_work+0x1c9/0x500
> > > [ 40.845007] [<ffffffff8109375d>] ? process_one_work+0x15d/0x500
> > > [ 40.845007] [<ffffffff81093ea7>] ? worker_thread+0xc7/0x460
> > > [ 40.845007] [<ffffffff81093e2b>] worker_thread+0x4b/0x460
> > > [ 40.845007] [<ffffffff81093de0>] ? rescuer_thread+0x2e0/0x2e0
> > > [ 40.845007] [<ffffffff81093de0>] ? rescuer_thread+0x2e0/0x2e0
> > > [ 40.845007] [<ffffffff81099697>] kthread+0xe7/0x100
> > > [ 40.845007] [<ffffffff810cb3ed>] ? trace_hardirqs_on+0xd/0x10
> > > [ 40.845007] [<ffffffff810995b0>] ? kthread_create_on_node+0x230/0x230
> > > [ 40.845007] [<ffffffff816a5048>] ret_from_fork+0x58/0x90
> > > [ 40.845007] [<ffffffff810995b0>] ? kthread_create_on_node+0x230/0x230
> > > [ 40.845007] Code: 00 48 89 e5 5d 48 8b 40 88 48 c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 87 20 04 00 00 55 48 89 e5 <48> 8b 40 98 5d c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00
> > > [ 40.845007] RIP [<ffffffff81099de0>] kthread_data+0x10/0x20
> > > [ 40.845007] RSP <ffff88007caa75e8>
> > > [ 40.845007] CR2: ffffffffffffff98
> > > [ 40.845007] ---[ end trace 32b76e93ea582fb0 ]---
> > > [ 40.845007] Fixing recursive fault but reboot is needed!
> > > ---- [end of call traces] ----
> > >
> > >> Thanks,
> > >> Ming Lei
> > >> >
> > >> > Regards,
> > >> > Dongsu
> > >> >
> > >> > ---- [beginning of call traces] ----
> > >> > [ 47.274292] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
> > >> > [ 47.275013] IP: [<ffffffff8140b31d>] __bt_get.isra.5+0x7d/0x1e0
> > >> > [ 47.275013] PGD 79c55067 PUD 7ba17067 PMD 0
> > >> > [ 47.275013] Oops: 0000 [#1] SMP
> > >> > [ 47.275013] Modules linked in: fuse cpufreq_stats binfmt_misc 9p fscache dm_round_robin loop dm_multipath 9pnet_virtio rtc_cmos 9pnet acpi_cpufreq serio_raw i2c_piix4 virtio_net
> > >> > [ 47.275013] CPU: 3 PID: 6232 Comm: blkid Not tainted 4.0.0 #303
> > >> > [ 47.275013] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014
> > >> > [ 47.275013] task: ffff88003dfbc020 ti: ffff880079bac000 task.ti: ffff880079bac000
> > >> > [ 47.275013] RIP: 0010:[<ffffffff8140b31d>] [<ffffffff8140b31d>] __bt_get.isra.5+0x7d/0x1e0
> > >> > [ 47.275013] RSP: 0018:ffff880079baf898 EFLAGS: 00010246
> > >> > [ 47.275013] RAX: 000000000000003c RBX: ffff880079198400 RCX: 0000000000000078
> > >> > [ 47.275013] RDX: ffff88007fddbb80 RSI: 0000000000000010 RDI: ffff880079198400
> > >> > [ 47.275013] RBP: ffff880079baf8e8 R08: ffff88007fddbb80 R09: 0000000000000000
> > >> > [ 47.275013] R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000010
> > >> > [ 47.275013] R13: 0000000000000010 R14: ffff880079baf9e8 R15: ffff88007fddbb80
> > >> > [ 47.275013] FS: 00002b270c049800(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
> > >> > [ 47.275013] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > >> > [ 47.275013] CR2: 0000000000000018 CR3: 000000007ca8d000 CR4: 00000000001407e0
> > >> > [ 47.275013] Stack:
> > >> > [ 47.275013] ffff880079baf978 ffff88007fdd58c0 0000000000000078 ffffffff814071ff
> > >> > [ 47.275013] ffff880079baf8d8 ffff880079198400 0000000000000010 0000000000000010
> > >> > [ 47.275013] ffff880079baf9e8 ffff88007fddbb80 ffff880079baf968 ffffffff8140b4e5
> > >> > [ 47.275013] Call Trace:
> > >> > [ 47.275013] [<ffffffff814071ff>] ? blk_mq_queue_enter+0x9f/0x2d0
> > >> > [ 47.275013] [<ffffffff8140b4e5>] bt_get+0x65/0x1e0
> > >> > [ 47.275013] [<ffffffff814071ff>] ? blk_mq_queue_enter+0x9f/0x2d0
> > >> > [ 47.275013] [<ffffffff810c9b40>] ? wait_woken+0xa0/0xa0
> > >> > [ 47.275013] [<ffffffff8140ba07>] blk_mq_get_tag+0xa7/0xd0
> > >> > [ 47.275013] [<ffffffff8140630b>] __blk_mq_alloc_request+0x1b/0x200
> > >> > [ 47.275013] [<ffffffff81408736>] blk_mq_map_request+0xd6/0x4e0
> > >> > [ 47.275013] [<ffffffff8140a53e>] blk_mq_make_request+0x6e/0x2d0
> > >> > [ 47.275013] [<ffffffff813fb844>] ? generic_make_request_checks+0x674/0x6a0
> > >> > [ 47.275013] [<ffffffff813f23ae>] ? bio_add_page+0x5e/0x70
> > >> > [ 47.275013] [<ffffffff813fb930>] generic_make_request+0xc0/0x110
> > >> > [ 47.275013] [<ffffffff813fb9e8>] submit_bio+0x68/0x150
> > >> > [ 47.275013] [<ffffffff811b0c6c>] ? lru_cache_add+0x1c/0x50
> > >> > [ 47.275013] [<ffffffff8125972a>] mpage_bio_submit+0x2a/0x40
> > >> > [ 47.275013] [<ffffffff8125a81c>] mpage_readpages+0x10c/0x130
> > >> > [ 47.275013] [<ffffffff81254040>] ? I_BDEV+0x10/0x10
> > >> > [ 47.275013] [<ffffffff81254040>] ? I_BDEV+0x10/0x10
> > >> > [ 47.275013] [<ffffffff8119e417>] ? __page_cache_alloc+0x137/0x160
> > >> > [ 47.275013] [<ffffffff8125486d>] blkdev_readpages+0x1d/0x20
> > >> > [ 47.275013] [<ffffffff811ae43f>] __do_page_cache_readahead+0x29f/0x320
> > >> > [ 47.275013] [<ffffffff811ae305>] ? __do_page_cache_readahead+0x165/0x320
> > >> > [ 47.275013] [<ffffffff811aea14>] force_page_cache_readahead+0x34/0x60
> > >> > [ 47.275013] [<ffffffff811aea86>] page_cache_sync_readahead+0x46/0x50
> > >> > [ 47.275013] [<ffffffff811a094c>] generic_file_read_iter+0x52c/0x640
> > >> > [ 47.275013] [<ffffffff81254c17>] blkdev_read_iter+0x37/0x40
> > >> > [ 47.275013] [<ffffffff81211a0e>] new_sync_read+0x7e/0xb0
> > >> > [ 47.275013] [<ffffffff81212c88>] __vfs_read+0x18/0x50
> > >> > [ 47.275013] [<ffffffff81212d4d>] vfs_read+0x8d/0x150
> > >> > [ 47.275013] [<ffffffff81212e59>] SyS_read+0x49/0xb0
> > >> > [ 47.275013] [<ffffffff817063b2>] system_call_fastpath+0x12/0x17
> > >> > [ 47.275013] Code: 97 18 03 00 00 bf 04 00 00 00 41 f7 f1 83 f8 04 0f 43 f8 b8 ff ff ff ff 44 39 d7 0f 86 c1 00 00 00 41 8b 00 48 89 4d c0 49 89 f5 <8b> 4e 08 8b 56 0c 4c 89 45 b0 c7 45 c8 00 00 00 00 41 89 c4 89
> > >> > [ 47.275013] RIP [<ffffffff8140b31d>] __bt_get.isra.5+0x7d/0x1e0
> > >> > [ 47.275013] RSP <ffff880079baf898>
> > >> > [ 47.275013] CR2: 0000000000000018
> > >> > [ 47.275013] ---[ end trace 9a650b674f0fae74 ]---
> > >> > [ 47.701261] note: kworker/3:2[225] exited with preempt_count 1
> > >> > [ 47.815398] BUG: unable to handle kernel paging request at ffffffffffffff98
> > >> > [ 47.816324] IP: [<ffffffff810a00d0>] kthread_data+0x10/0x20
> > >> > [ 47.816324] PGD 1c0c067 PUD 1c0e067 PMD 0
> > >> > [ 47.816324] Oops: 0000 [#3] SMP
> > >> > [ 47.816324] Modules linked in: fuse cpufreq_stats binfmt_misc 9p fscache dm_round_robin loop dm_multipath 9pnet_virtio rtc_cmos 9pnet acpi_cpufreq serio_raw i2c_piix4 virtio_net
> > >> > [ 47.816324] CPU: 3 PID: 225 Comm: kworker/3:2 Tainted: G D W 4.0.0 #303
> > >> > [ 47.816324] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014
> > >> > [ 47.816324] task: ffff88007ac90000 ti: ffff88007906c000 task.ti: ffff88007906c000
> > >> > [ 47.816324] RIP: 0010:[<ffffffff810a00d0>] [<ffffffff810a00d0>] kthread_data+0x10/0x20
> > >> > [ 47.816324] RSP: 0018:ffff88007906f5e8 EFLAGS: 00010092
> > >> > [ 47.816324] RAX: 0000000000000000 RBX: 0000000000000003 RCX: 000000000000000f
> > >> > [ 47.816324] RDX: 000000000000000f RSI: 0000000000000003 RDI: ffff88007ac90000
> > >> > [ 47.816324] RBP: ffff88007906f5e8 R08: ffff88007ac90090 R09: 0000000000000000
> > >> > [ 47.816324] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88007fdd4dc0
> > >> > [ 47.816324] R13: ffff88007ac90000 R14: 0000000000000003 R15: 0000000000000000
> > >> > [ 47.816324] FS: 0000000000000000(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
> > >> > [ 47.816324] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > >> > [ 47.816324] CR2: 0000000000000028 CR3: 0000000001c0b000 CR4: 00000000001407e0
> > >> > [ 47.816324] Stack:
> > >> > [ 47.816324] ffff88007906f608 ffffffff81099f35 ffff88007906f608 00000000001d4dc0
> > >> > [ 47.816324] ffff88007906f678 ffffffff816ff757 ffffffff8107cfc6 0000000000000292
> > >> > [ 47.816324] ffffffff8107cffa ffff88007ac90000 ffff88007906f678 0000000000000296
> > >> > [ 47.816324] Call Trace:
> > >> > [ 47.816324] [<ffffffff81099f35>] wq_worker_sleeping+0x15/0xa0
> > >> > [ 47.816324] [<ffffffff816ff757>] __schedule+0xa77/0x1080
> > >> > [ 47.816324] [<ffffffff8107cfc6>] ? do_exit+0x756/0xbf0
> > >> > [ 47.816324] [<ffffffff8107cffa>] ? do_exit+0x78a/0xbf0
> > >> > [ 47.816324] [<ffffffff816ffd97>] schedule+0x37/0x90
> > >> > [ 47.816324] [<ffffffff8107d0d6>] do_exit+0x866/0xbf0
> > >> > [ 47.816324] [<ffffffff810ec14e>] ? kmsg_dump+0xfe/0x200
> > >> > [ 47.816324] [<ffffffff810068ad>] oops_end+0x8d/0xd0
> > >> > [ 47.816324] [<ffffffff81047849>] no_context+0x119/0x370
> > >> > [ 47.816324] [<ffffffff810ce795>] ? cpuacct_charge+0x5/0x1c0
> > >> > [ 47.816324] [<ffffffff810b4a25>] ? sched_clock_local+0x25/0x90
> > >> > [ 47.816324] [<ffffffff81047b25>] __bad_area_nosemaphore+0x85/0x210
> > >> > [ 47.816324] [<ffffffff81047cc3>] bad_area_nosemaphore+0x13/0x20
> > >> > [ 47.816324] [<ffffffff81047fb6>] __do_page_fault+0xb6/0x490
> > >> > [ 47.816324] [<ffffffff8104839c>] do_page_fault+0xc/0x10
> > >> > [ 47.816324] [<ffffffff817080c2>] page_fault+0x22/0x30
> > >> > [ 47.816324] [<ffffffff8140b31d>] ? __bt_get.isra.5+0x7d/0x1e0
> > >> > [ 47.816324] [<ffffffff8140b4e5>] bt_get+0x65/0x1e0
> > >> > [ 47.816324] [<ffffffff810c9b40>] ? wait_woken+0xa0/0xa0
> > >> > [ 47.816324] [<ffffffff8140ba07>] blk_mq_get_tag+0xa7/0xd0
> > >> > [ 47.816324] [<ffffffff8140630b>] __blk_mq_alloc_request+0x1b/0x200
> > >> > [ 47.816324] [<ffffffff81407f91>] blk_mq_alloc_request+0xa1/0x250
> > >> > [ 47.816324] [<ffffffff813fc74c>] blk_get_request+0x2c/0xf0
> > >> > [ 47.816324] [<ffffffff810a6acd>] ? __might_sleep+0x4d/0x90
> > >> > [ 47.816324] [<ffffffff815747dd>] scsi_execute+0x3d/0x1f0
> > >> > [ 47.816324] [<ffffffff815763be>] scsi_execute_req_flags+0x8e/0x100
> > >> > [ 47.816324] [<ffffffff81576a43>] scsi_test_unit_ready+0x83/0x130
> > >> > [ 47.816324] [<ffffffff8158672e>] sd_check_events+0x14e/0x1b0
> > >> > [ 47.816324] [<ffffffff8140e731>] disk_check_events+0x51/0x170
> > >> > [ 47.816324] [<ffffffff8140e86c>] disk_events_workfn+0x1c/0x20
> > >> > [ 47.816324] [<ffffffff81099128>] process_one_work+0x1e8/0x800
> > >> > [ 47.816324] [<ffffffff8109909d>] ? process_one_work+0x15d/0x800
> > >> > [ 47.816324] [<ffffffff8109981a>] ? worker_thread+0xda/0x470
> > >> > [ 47.816324] [<ffffffff81099793>] worker_thread+0x53/0x470
> > >> > [ 47.816324] [<ffffffff81099740>] ? process_one_work+0x800/0x800
> > >> > [ 47.816324] [<ffffffff81099740>] ? process_one_work+0x800/0x800
> > >> > [ 47.816324] [<ffffffff8109f652>] kthread+0xf2/0x110
> > >> > [ 47.816324] [<ffffffff810d3d4d>] ? trace_hardirqs_on+0xd/0x10
> > >> > [ 47.816324] [<ffffffff8109f560>] ? kthread_create_on_node+0x230/0x230
> > >> > [ 47.816324] [<ffffffff81706308>] ret_from_fork+0x58/0x90
> > >> > [ 47.816324] [<ffffffff8109f560>] ? kthread_create_on_node+0x230/0x230
> > >> > [ 47.816324] Code: 00 48 89 e5 5d 48 8b 40 88 48 c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 87 20 04 00 00 55 48 89 e5 <48> 8b 40 98 5d c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00
> > >> > [ 47.816324] RIP [<ffffffff810a00d0>] kthread_data+0x10/0x20
> > >> > [ 47.816324] RSP <ffff88007906f5e8>
> > >> > [ 47.816324] CR2: ffffffffffffff98
> > >> > [ 47.816324] ---[ end trace 9a650b674f0fae76 ]---
> > >> > [ 47.816324] Fixing recursive fault but reboot is needed!
> > >> > ---- [end of call traces] ----
> > >> > --
> > >> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > >> > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > >> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > >> > Please read the FAQ at http://www.tux.org/lkml/
> > >
> > >> From 9aed1bd79531d91513cd16ed90872e4349425acc Mon Sep 17 00:00:00 2001
> > >> From: Ming Lei <ming.lei@xxxxxxxxxxxxx>
> > >> Date: Fri, 17 Apr 2015 23:50:48 -0400
> > >> Subject: [PATCH 1/2] block: blk-mq: fix race between timeout and CPU hotplug
> > >>
> > >> Firstly during CPU hotplug, even queue is freezed, timeout
> > >> handler still may come and access hctx->tags, which may cause
> > >> use after free, so this patch deactivates timeout handler
> > >> inside CPU hotplug notifier.
> > >>
> > >> Secondly, tags can be shared by more than one queues, so we
> > >> have to check if the hctx has been disabled, otherwise
> > >> still use-after-free on tags can be triggered.
> > >>
> > >> Cc: <stable@xxxxxxxxxxxxxxx>
> > >> Reported-by: Dongsu Park <dongsu.park@xxxxxxxxxxxxxxxx>
> > >> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx>
> > >> ---
> > >> block/blk-mq.c | 13 ++++++++++---
> > >> 1 file changed, 10 insertions(+), 3 deletions(-)
> > >>
> > >> diff --git a/block/blk-mq.c b/block/blk-mq.c
> > >> index 67f01a0..58a3b4c 100644
> > >> --- a/block/blk-mq.c
> > >> +++ b/block/blk-mq.c
> > >> @@ -677,8 +677,11 @@ static void blk_mq_rq_timer(unsigned long priv)
> > >> data.next = blk_rq_timeout(round_jiffies_up(data.next));
> > >> mod_timer(&q->timeout, data.next);
> > >> } else {
> > >> - queue_for_each_hw_ctx(q, hctx, i)
> > >> - blk_mq_tag_idle(hctx);
> > >> + queue_for_each_hw_ctx(q, hctx, i) {
> > >> + /* the hctx may be disabled, so we have to check here */
> > >> + if (hctx->tags)
> > >> + blk_mq_tag_idle(hctx);
> > >> + }
> > >> }
> > >> }
> > >>
> > >> @@ -2085,9 +2088,13 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
> > >> */
> > >> list_for_each_entry(q, &all_q_list, all_q_node)
> > >> blk_mq_freeze_queue_start(q);
> > >> - list_for_each_entry(q, &all_q_list, all_q_node)
> > >> + list_for_each_entry(q, &all_q_list, all_q_node) {
> > >> blk_mq_freeze_queue_wait(q);
> > >>
> > >> + /* deactivate timeout handler */
> > >> + del_timer_sync(&q->timeout);
> > >> + }
> > >> +
> > >> list_for_each_entry(q, &all_q_list, all_q_node)
> > >> blk_mq_queue_reinit(q);
> > >>
> > >> --
> > >> 1.9.1
> > >>
> > >
> > >> From 8b70c8612543859173230fbd16a63bacf84ba23a Mon Sep 17 00:00:00 2001
> > >> From: Ming Lei <ming.lei@xxxxxxxxxxxxx>
> > >> Date: Sat, 18 Apr 2015 00:01:31 -0400
> > >> Subject: [PATCH 2/2] blk-mq: fix CPU hotplug handling
> > >>
> > >> Firstly the hctx->tags have to be set as NULL if it is to be disabled
> > >> no matter if set->tags[i] is NULL or not in blk_mq_map_swqueue() because
> > >> shared tags can be freed already from another request_queue.
> > >>
> > >> The same situation has to be considered in blk_mq_hctx_cpu_online()
> > >> too.
> > >>
> > >> Cc: <stable@xxxxxxxxxxxxxxx>
> > >> Reported-by: Dongsu Park <dongsu.park@xxxxxxxxxxxxxxxx>
> > >> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx>
> > >> ---
> > >> block/blk-mq.c | 17 +++++++++++------
> > >> 1 file changed, 11 insertions(+), 6 deletions(-)
> > >>
> > >> diff --git a/block/blk-mq.c b/block/blk-mq.c
> > >> index 58a3b4c..612d5c6 100644
> > >> --- a/block/blk-mq.c
> > >> +++ b/block/blk-mq.c
> > >> @@ -1580,15 +1580,20 @@ static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
> > >> {
> > >> struct request_queue *q = hctx->queue;
> > >> struct blk_mq_tag_set *set = q->tag_set;
> > >> + struct blk_mq_tags *tags = set->tags[hctx->queue_num];
> > >>
> > >> - if (set->tags[hctx->queue_num])
> > >> + /* tags can be shared by more than one queues */
> > >> + if (hctx->tags)
> > >> return NOTIFY_OK;
> > >>
> > >> - set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
> > >> - if (!set->tags[hctx->queue_num])
> > >> - return NOTIFY_STOP;
> > >> + if (!tags) {
> > >> + tags = blk_mq_init_rq_map(set, hctx->queue_num);
> > >> + if (!tags)
> > >> + return NOTIFY_STOP;
> > >> + set->tags[hctx->queue_num] = tags;
> > >> + }
> > >>
> > >> - hctx->tags = set->tags[hctx->queue_num];
> > >> + hctx->tags = tags;
> > >> return NOTIFY_OK;
> > >> }
> > >>
> > >> @@ -1813,8 +1818,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
> > >> if (set->tags[i]) {
> > >> blk_mq_free_rq_map(set, set->tags[i], i);
> > >> set->tags[i] = NULL;
> > >> - hctx->tags = NULL;
> > >> }
> > >> + hctx->tags = NULL;
> > >> continue;
> > >> }
> > >>
> > >> --
> > >> 1.9.1
> > >>
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > > Please read the FAQ at http://www.tux.org/lkml/
--
Ming Lei
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/