Re: [PATCH v2] vdpasim: support doorbell mapping

From: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
Date: Sun Dec 11 2022 - 19:51:36 EST




在 2022/12/8 15:37, Jason Wang 写道:
On Tue, Dec 6, 2022 at 4:46 PM Longpeng (Mike, Cloud Infrastructure
Service Product Dept.) <longpeng2@xxxxxxxxxx> wrote:



在 2022/12/6 14:59, Jason Wang 写道:
On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) <longpeng2@xxxxxxxxxx> wrote:

From: Longpeng <longpeng2@xxxxxxxxxx>

Support doorbell mapping for vdpasim devices, then we can test the notify
passthrough feature even if there's no real hardware on hand.

Allocates a dummy page which is used to emulate the notify page of the device,
all VQs share the same notify register that initiated to 0xffff. A periodic
work will check whether there're requests need to process ( the value of the
notify register is 0xffff or not ).

This cap is disabled as default, users can enable it as follow:
modprobe vdpa_sim notify_passthrough=true

Signed-off-by: Longpeng <longpeng2@xxxxxxxxxx>
---
Changes v1->v2:
- support both kick mode and passthrough mode. [Jason]
- poll the notify register first. [Jason, Michael]
---
drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
drivers/vdpa/vdpa_sim/vdpa_sim.h | 3 ++
2 files changed, 80 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 7438a89ce939..07096a04dabb 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/dma-map-ops.h>
+#include <asm/set_memory.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/vhost_iotlb.h>
@@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");

+static bool notify_passthrough;
+module_param(notify_passthrough, bool, 0444);
+MODULE_PARM_DESC(notify_passthrough,
+ "Enable vq notify(doorbell) area mapping. (default: false)");

I'm not sure if it's worth doing this, I think we can afford the cost
of periodic work (especially considering it's a simulator).

We can support kick mode and passthgourh mode simultaneously now, it is
a little unnecessary. I would remove it in next version.

+
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
#define VDPASIM_VENDOR_ID 0
+#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
+#define VDPASIM_NOTIFY_DEFVAL 0xffff

static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
{
@@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
static const struct vdpa_config_ops vdpasim_config_ops;
static const struct vdpa_config_ops vdpasim_batch_config_ops;

+static void vdpasim_notify_work(struct work_struct *work)
+{
+ struct vdpasim *vdpasim;
+ u16 *val;
+
+ vdpasim = container_of(work, struct vdpasim, notify_work.work);
+
+ if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ goto out;
+
+ if (!vdpasim->running)
+ goto out;
+
+ val = (u16 *)vdpasim->notify;
+ if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
+ schedule_work(&vdpasim->work);
+
+out:
+ schedule_delayed_work(&vdpasim->notify_work,
+ msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
+}
+
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
const struct vdpa_dev_set_config *config)
{
@@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
set_dma_ops(dev, &vdpasim_dma_ops);
vdpasim->vdpa.mdev = dev_attr->mgmt_dev;

+ if (notify_passthrough) {
+ INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
+
+ vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vdpasim->notify)
+ goto err_iommu;
+#ifdef CONFIG_X86
+ set_memory_uc(vdpasim->notify, 1);
+#endif

I had the same question with version 1, any reason for having this
part uncacheable? It's a hint that we have bugs somewhere. Are we
missing ACCESS/ORDER_PLATFORM or other features?

Oh, I had replied you in that thread.

The vm_page_prot of notify mapping VMA is pgprot_noncached (see
vhost_vdpa_fault) but the vdpasim->notify is WB, so we should set its
memtype to UC here and set it back to WB when releasing the device.

Ok, but any reason to have a X86 guard for this? This basically makes
it won't work for non-x86 archs?

Because only X86 defines __HAVE_PFNMAP_TRACKING, track_pfn_remap does nothing on other archs.

I think the correct way is to introduce a new config_ops to fetch the
correct pgprot. Then we are fine.

Is it worth doing this? It seems the vdpasim device is just a special case (using RAM to emulates the notify area) and other real HW devices work fine with the current implementation?

Thanks


Otherwise, the warning in trace_pfn_remap() would be triggered.
For example:
x86/PAT: CPU 16/KVM:17819 map pfn RAM range req uncached-minus for [mem
0x5151f3000-0x5151f3fff], got write-back

+ *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;

WRITE_ONCE()?

Okay.

+ }
+
vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
if (!vdpasim->config)
goto err_iommu;
@@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
return status;
}

+static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
+{
+ if (!notify_passthrough)
+ return;

Only two callers for this function: one is start another is stop. If
we decide to get rid of notify_passthrough, I'd rather opencode the
schedule/cancel_delayed().

Okay, thanks.

Thanks

+
+ if (start)
+ schedule_delayed_work(&vdpasim->notify_work,
+ msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
+ else
+ cancel_delayed_work_sync(&vdpasim->notify_work);
+}
+
static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
spin_lock(&vdpasim->lock);
vdpasim->status = status;
spin_unlock(&vdpasim->lock);
+ vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
}

static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);

+ vdpasim_set_notify_work(vdpasim, false);
spin_lock(&vdpasim->lock);
vdpasim->status = 0;
vdpasim_do_reset(vdpasim);
@@ -672,11 +728,24 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
return 0;
}

+static struct vdpa_notification_area
+vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpa_notification_area notify;
+
+ notify.addr = virt_to_phys((void *)vdpasim->notify);
+ notify.size = PAGE_SIZE;
+
+ return notify;
+}
+
static void vdpasim_free(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
int i;

+ vdpasim_set_notify_work(vdpasim, false);
cancel_work_sync(&vdpasim->work);

for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
@@ -693,6 +762,12 @@ static void vdpasim_free(struct vdpa_device *vdpa)
vhost_iotlb_free(vdpasim->iommu);
kfree(vdpasim->vqs);
kfree(vdpasim->config);
+ if (vdpasim->notify) {
+#ifdef CONFIG_X86
+ set_memory_wb(vdpasim->notify, 1);
+#endif
+ free_page(vdpasim->notify);
+ }
}

static const struct vdpa_config_ops vdpasim_config_ops = {
@@ -704,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_vq_ready = vdpasim_get_vq_ready,
.set_vq_state = vdpasim_set_vq_state,
.get_vq_state = vdpasim_get_vq_state,
+ .get_vq_notification = vdpasim_get_vq_notification,
.get_vq_align = vdpasim_get_vq_align,
.get_vq_group = vdpasim_get_vq_group,
.get_device_features = vdpasim_get_device_features,
@@ -737,6 +813,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_vq_ready = vdpasim_get_vq_ready,
.set_vq_state = vdpasim_set_vq_state,
.get_vq_state = vdpasim_get_vq_state,
+ .get_vq_notification = vdpasim_get_vq_notification,
.get_vq_align = vdpasim_get_vq_align,
.get_vq_group = vdpasim_get_vq_group,
.get_device_features = vdpasim_get_device_features,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 0e78737dcc16..0769ccbd3911 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -69,6 +69,9 @@ struct vdpasim {
bool running;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
+ /* dummy notify page */
+ unsigned long notify;
+ struct delayed_work notify_work;
};

struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
--
2.23.0


.


.