Re: [PATCH v2] virtio-blk: Avoid use-after-free on suspend/resume
From: Kim Suwan
Date: Thu Aug 11 2022 - 08:52:32 EST
On Thu, Aug 11, 2022 at 1:10 AM Shigeru Yoshida <syoshida@xxxxxxxxxx> wrote:
>
> hctx->user_data is set to vq in virtblk_init_hctx(). However, vq is
> freed on suspend and reallocated on resume. So, hctx->user_data is
> invalid after resume, and it will cause use-after-free accessing which
> will result in the kernel crash something like below:
>
> [ 22.428391] Call Trace:
> [ 22.428899] <TASK>
> [ 22.429339] virtqueue_add_split+0x3eb/0x620
> [ 22.430035] ? __blk_mq_alloc_requests+0x17f/0x2d0
> [ 22.430789] ? kvm_clock_get_cycles+0x14/0x30
> [ 22.431496] virtqueue_add_sgs+0xad/0xd0
> [ 22.432108] virtblk_add_req+0xe8/0x150
> [ 22.432692] virtio_queue_rqs+0xeb/0x210
> [ 22.433330] blk_mq_flush_plug_list+0x1b8/0x280
> [ 22.434059] __blk_flush_plug+0xe1/0x140
> [ 22.434853] blk_finish_plug+0x20/0x40
> [ 22.435512] read_pages+0x20a/0x2e0
> [ 22.436063] ? folio_add_lru+0x62/0xa0
> [ 22.436652] page_cache_ra_unbounded+0x112/0x160
> [ 22.437365] filemap_get_pages+0xe1/0x5b0
> [ 22.437964] ? context_to_sid+0x70/0x100
> [ 22.438580] ? sidtab_context_to_sid+0x32/0x400
> [ 22.439979] filemap_read+0xcd/0x3d0
> [ 22.440917] xfs_file_buffered_read+0x4a/0xc0
> [ 22.441984] xfs_file_read_iter+0x65/0xd0
> [ 22.442970] __kernel_read+0x160/0x2e0
> [ 22.443921] bprm_execve+0x21b/0x640
> [ 22.444809] do_execveat_common.isra.0+0x1a8/0x220
> [ 22.446008] __x64_sys_execve+0x2d/0x40
> [ 22.446920] do_syscall_64+0x37/0x90
> [ 22.447773] entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> This patch fixes this issue by getting vq from vblk, and removes
> virtblk_init_hctx().
>
> Fixes: 4e0400525691 ("virtio-blk: support polling I/O")
> Cc: "Suwan Kim" <suwan.kim027@xxxxxxxxx>
> Signed-off-by: Shigeru Yoshida <syoshida@xxxxxxxxxx>
> ---
> drivers/block/virtio_blk.c | 24 ++++++++++--------------
> 1 file changed, 10 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 6fc7850c2b0a..d756423e0059 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
> }
> }
>
> +static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
> +{
> + struct virtio_blk *vblk = hctx->queue->queuedata;
> + struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> +
> + return vq;
> +}
> +
> static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
> {
> struct scatterlist hdr, status, *sgs[3];
> @@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist)
> struct request *requeue_list = NULL;
>
> rq_list_for_each_safe(rqlist, req, next) {
> - struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
> + struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
> bool kick;
>
> if (!virtblk_prep_rq_batch(req)) {
> @@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
> static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
> {
> struct virtio_blk *vblk = hctx->queue->queuedata;
> - struct virtio_blk_vq *vq = hctx->driver_data;
> + struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
> struct virtblk_req *vbr;
> unsigned long flags;
> unsigned int len;
> @@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
> return found;
> }
>
> -static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
> - unsigned int hctx_idx)
> -{
> - struct virtio_blk *vblk = data;
> - struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
> -
> - WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags);
> - hctx->driver_data = vq;
> - return 0;
> -}
> -
> static const struct blk_mq_ops virtio_mq_ops = {
> .queue_rq = virtio_queue_rq,
> .queue_rqs = virtio_queue_rqs,
> .commit_rqs = virtio_commit_rqs,
> - .init_hctx = virtblk_init_hctx,
> .complete = virtblk_request_done,
> .map_queues = virtblk_map_queues,
> .poll = virtblk_poll,
> --
> 2.37.1
>
Hi Shigeru,
It works well on my machine.
I missed the reallocation of vqs on freeze/restore situations.
Thanks for debugging this issue. Michael already committed it.
Regards,
Suwan Kim