[PATCH v7 04/12] nvdimm: virtio_pmem: stop allocating child flush bio
From: Li Chen
Date: Tue Jun 30 2026 - 05:27:48 EST
pmem_submit_bio() passes the parent bio to nvdimm_flush() for
REQ_FUA. For virtio-pmem this makes async_pmem_flush() allocate
and submit a child PREFLUSH bio chained to the parent.
That child allocation is in the block submit path. Making it
blocking with GFP_NOIO can consume the same global bio mempool that
submit_bio() uses, while making it GFP_ATOMIC can fail under
pressure. A forced failure of the child allocation produced:
virtio_pmem: forcing child bio allocation failure for test
Buffer I/O error on dev pmem0, logical block 0, lost sync page write
EXT4-fs (pmem0): I/O error while writing superblock
EXT4-fs (pmem0): mount failed
Avoid the child bio without turning REQ_FUA into a synchronous
submit-path wait. Let provider flush callbacks return
NVDIMM_FLUSH_ASYNC after taking ownership of parent bio completion.
pmem_submit_bio() returns in that case, and virtio-pmem queues an
ordered WQ_MEM_RECLAIM work item that runs the existing host flush
path and completes the parent bio.
This keeps the asynchronous completion model of the child-bio path
while removing the child bio allocation from the submit path.
Signed-off-by: Li Chen <me@linux.beauty>
---
Changes in v7:
- Replace synchronous FUA flushing with provider-owned asynchronous parent bio
completion.
- Add NVDIMM_FLUSH_ASYNC and ordered WQ_MEM_RECLAIM flush work.
Changes in v6:
- Replace the child bio allocation fix with synchronous FUA flushing.
drivers/nvdimm/nd_virtio.c | 54 +++++++++++++++++++++++++-----------
drivers/nvdimm/pmem.c | 5 +++-
drivers/nvdimm/region_devs.c | 2 ++
drivers/nvdimm/virtio_pmem.c | 17 +++++++++++-
drivers/nvdimm/virtio_pmem.h | 4 +++
include/linux/libnvdimm.h | 9 ++++++
6 files changed, 73 insertions(+), 18 deletions(-)
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 4176046627beb..8e16b7780be1a 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -9,6 +9,12 @@
#include "virtio_pmem.h"
#include "nd.h"
+struct virtio_pmem_flush_work {
+ struct work_struct work;
+ struct nd_region *nd_region;
+ struct bio *bio;
+};
+
/* The interrupt handler */
void virtio_pmem_host_ack(struct virtqueue *vq)
{
@@ -107,30 +113,46 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
return err;
};
+static void virtio_pmem_flush_work(struct work_struct *work)
+{
+ struct virtio_pmem_flush_work *flush;
+ int err;
+
+ flush = container_of(work, struct virtio_pmem_flush_work, work);
+ err = virtio_pmem_flush(flush->nd_region);
+ if (err > 0)
+ err = -EIO;
+ if (err)
+ flush->bio->bi_status = errno_to_blk_status(err);
+ bio_endio(flush->bio);
+ kfree(flush);
+}
+
/* The asynchronous flush callback function */
int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
{
- /*
- * Create child bio for asynchronous flush and chain with
- * parent bio. Otherwise directly call nd_region flush.
- */
- if (bio && bio->bi_iter.bi_sector != -1) {
- struct bio *child = bio_alloc(bio->bi_bdev, 0,
- REQ_OP_WRITE | REQ_PREFLUSH,
- GFP_ATOMIC);
+ struct virtio_device *vdev = nd_region->provider_data;
+ struct virtio_pmem *vpmem = vdev->priv;
+ struct virtio_pmem_flush_work *flush;
+ int err;
- if (!child)
+ if (bio && bio->bi_iter.bi_sector != -1) {
+ flush = kmalloc_obj(*flush, GFP_NOIO);
+ if (!flush)
return -ENOMEM;
- bio_clone_blkg_association(child, bio);
- child->bi_iter.bi_sector = -1;
- bio_chain(child, bio);
- submit_bio(child);
- return 0;
+
+ INIT_WORK(&flush->work, virtio_pmem_flush_work);
+ flush->nd_region = nd_region;
+ flush->bio = bio;
+ queue_work(vpmem->flush_wq, &flush->work);
+ return NVDIMM_FLUSH_ASYNC;
}
- if (virtio_pmem_flush(nd_region))
+
+ err = virtio_pmem_flush(nd_region);
+ if (err > 0)
return -EIO;
- return 0;
+ return err;
};
EXPORT_SYMBOL_GPL(async_pmem_flush);
MODULE_DESCRIPTION("Virtio Persistent Memory Driver");
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 82ee1ddb3a445..30a51c365ce8b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -241,8 +241,11 @@ static void pmem_submit_bio(struct bio *bio)
bio_end_io_acct(bio, start);
}
- if ((bio->bi_opf & REQ_FUA) && !bio->bi_status)
+ if ((bio->bi_opf & REQ_FUA) && !bio->bi_status) {
ret = nvdimm_flush(nd_region, bio);
+ if (ret == NVDIMM_FLUSH_ASYNC)
+ return;
+ }
if (ret)
bio->bi_status = errno_to_blk_status(ret);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 7cd2c2f0d3121..c540f1cff9250 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1116,6 +1116,8 @@ int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
rc = generic_nvdimm_flush(nd_region);
else {
rc = nd_region->flush(nd_region, bio);
+ if (rc > 0)
+ return rc;
if (rc && rc != -ENOMEM)
rc = -EIO;
}
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 77b1966619059..9cf822a6c0c38 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -67,10 +67,17 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
mutex_init(&vpmem->flush_lock);
vpmem->vdev = vdev;
vdev->priv = vpmem;
+ vpmem->flush_wq = alloc_ordered_workqueue("virtio-pmem-flush",
+ WQ_MEM_RECLAIM);
+ if (!vpmem->flush_wq) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
err = init_vq(vpmem);
if (err) {
dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
- goto out_err;
+ goto out_wq;
}
if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION)) {
@@ -131,6 +138,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
nvdimm_bus_unregister(vpmem->nvdimm_bus);
out_vq:
vdev->config->del_vqs(vdev);
+out_wq:
+ destroy_workqueue(vpmem->flush_wq);
out_err:
return err;
}
@@ -138,14 +147,20 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
static void virtio_pmem_remove(struct virtio_device *vdev)
{
struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev);
+ struct virtio_pmem *vpmem = vdev->priv;
nvdimm_bus_unregister(nvdimm_bus);
+ drain_workqueue(vpmem->flush_wq);
vdev->config->del_vqs(vdev);
virtio_reset_device(vdev);
+ destroy_workqueue(vpmem->flush_wq);
}
static int virtio_pmem_freeze(struct virtio_device *vdev)
{
+ struct virtio_pmem *vpmem = vdev->priv;
+
+ drain_workqueue(vpmem->flush_wq);
vdev->config->del_vqs(vdev);
virtio_reset_device(vdev);
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index f72cf17f9518f..e6dfc10ce0762 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -15,6 +15,7 @@
#include <linux/libnvdimm.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
+#include <linux/workqueue.h>
struct virtio_pmem_request {
struct virtio_pmem_req req;
@@ -39,6 +40,9 @@ struct virtio_pmem {
/* Serialize flush requests to the device. */
struct mutex flush_lock;
+ /* Complete asynchronous FUA flushes outside the submit path. */
+ struct workqueue_struct *flush_wq;
+
/* nvdimm bus registers virtio pmem device */
struct nvdimm_bus *nvdimm_bus;
struct nvdimm_bus_descriptor nd_desc;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 28f086c4a1873..d929d83abf3be 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -126,6 +126,15 @@ struct nd_mapping_desc {
struct bio;
struct resource;
struct nd_region;
+
+/*
+ * Provider flush callback return values:
+ * 0: flush completed synchronously
+ * <0: flush failed
+ * >0: flush completion was queued and @bio will be completed later
+ */
+#define NVDIMM_FLUSH_ASYNC 1
+
struct nd_region_desc {
struct resource *res;
struct nd_mapping_desc *mapping;
--
2.52.0