Re: [PATCH v1 9/9] drivers: hv: dxgkrnl: Implement DXGSYNCFILE

From: Daniel Vetter
Date: Fri Jan 14 2022 - 13:03:43 EST


Hi all,

On Wed, Jan 12, 2022 at 11:55:14AM -0800, Iouri Tarassov wrote:
> Implement the LX_DXCREATESYNCFILE IOCTL (D3DKMTCreateSyncFile).
>
> dxgsyncfile is built on top of the Linux sync_file object and
> provides a way for the user mode to synchronize with the execution
> of the device DMA packets.
>
> The IOCTL creates a dxgsyncfile object for the given GPU synchronization
> object and a fence value. A sync_object file descriptor is returned to
> the caller. The caller could wait for the object by using poll().
> When the GPU synchronization object is signaled on the host, the host
> sends a message to the virtual machine and the sync_file object is
> signaled.
>
> Signed-off-by: Iouri Tarassov <iourit@xxxxxxxxxxxxxxxxxxx>

Adding dri-devel, which get_maintainers.pl should have done automatically
with the dma_fence wildcard match. Not sure why that didn't happen.

> +struct dxgsyncpoint {
> + struct dxghostevent hdr;
> + struct dma_fence base;

This doesn't work unfortuntately. For better or worse memory fences like
monitored fences from wddm have completely different semantics from
dma_fence. You could probably hack this to be self-consistent for hyper-v,
but the problem is that then hv would have incompatible locking/nesting
rules compared to everything else, and dma_fence matter for memory
management so this includes whether you're allowed to kmalloc(GFP_KERNEL)
or not, and that's just a bit too much.

I discussed this quickly with Jesse on irc and it sounds like the reason
you want the dma_fence is just to emulate the sync_file interface for
android. I think the correct solution here is to create a hv_dxg_sync_file
fd, which emulates the exact ioctls that Android needs, but with a wddm
monitored fence underneath instead of a dma_fence underneath.

This way we guarantee that no one ever accidentally mixes these
incompatible concepts up in the kernel, and Android should still be able
to happily run under hyperv.

Thoughts?

Also pls cc me on this sync work since even if you drop dma_fence use
completely I'd like to follow this a bit.

Cheers, Daniel

> + u64 fence_value;
> + u64 context;
> + spinlock_t lock;
> + u64 u64;
> +};
> +
> +#endif /* _DXGSYNCFILE_H */
> diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
> index 773d8f364b34..185fb7e38c21 100644
> --- a/drivers/hv/dxgkrnl/dxgvmbus.c
> +++ b/drivers/hv/dxgkrnl/dxgvmbus.c
> @@ -1311,15 +1311,18 @@ int create_existing_sysmem(struct dxgdevice *device,
> void *kmem = NULL;
> int ret = 0;
> struct dxgkvmb_command_setexistingsysmemstore *set_store_command;
> + struct dxgkvmb_command_setexistingsysmempages *set_pages_command;
> u64 alloc_size = host_alloc->allocation_size;
> u32 npages = alloc_size >> PAGE_SHIFT;
> struct dxgvmbusmsg msg = {.hdr = NULL};
> -
> - ret = init_message(&msg, device->adapter, device->process,
> - sizeof(*set_store_command));
> - if (ret)
> - goto cleanup;
> - set_store_command = (void *)msg.msg;
> + const u32 max_pfns_in_message =
> + (DXG_MAX_VM_BUS_PACKET_SIZE - sizeof(*set_pages_command) -
> + PAGE_SIZE) / sizeof(__u64);
> + u32 alloc_offset_in_pages = 0;
> + struct page **page_in;
> + u64 *pfn;
> + u32 pages_to_send;
> + u32 i;
>
> /*
> * Create a guest physical address list and set it as the allocation
> @@ -1330,6 +1333,7 @@ int create_existing_sysmem(struct dxgdevice *device,
> dev_dbg(dxgglobaldev, " Alloc size: %lld", alloc_size);
>
> dxgalloc->cpu_address = (void *)sysmem;
> +
> dxgalloc->pages = vzalloc(npages * sizeof(void *));
> if (dxgalloc->pages == NULL) {
> pr_err("failed to allocate pages");
> @@ -1347,31 +1351,80 @@ int create_existing_sysmem(struct dxgdevice *device,
> ret = -ENOMEM;
> goto cleanup;
> }
> - kmem = vmap(dxgalloc->pages, npages, VM_MAP, PAGE_KERNEL);
> - if (kmem == NULL) {
> - pr_err("vmap failed");
> - ret = -ENOMEM;
> - goto cleanup;
> - }
> - ret1 = vmbus_establish_gpadl(dxgglobal_get_vmbus(), kmem,
> - alloc_size, &dxgalloc->gpadl);
> - if (ret1) {
> - pr_err("establish_gpadl failed: %d", ret1);
> - ret = -ENOMEM;
> - goto cleanup;
> - }
> - dev_dbg(dxgglobaldev, "New gpadl %d", dxgalloc->gpadl.gpadl_handle);
> + if (!dxgglobal->map_guest_pages_enabled) {
> + ret = init_message(&msg, device->adapter, device->process,
> + sizeof(*set_store_command));
> + if (ret)
> + goto cleanup;
> + set_store_command = (void *)msg.msg;
>
> - command_vgpu_to_host_init2(&set_store_command->hdr,
> - DXGK_VMBCOMMAND_SETEXISTINGSYSMEMSTORE,
> - device->process->host_handle);
> - set_store_command->device = device->handle;
> - set_store_command->device = device->handle;
> - set_store_command->allocation = host_alloc->allocation;
> - set_store_command->gpadl = dxgalloc->gpadl.gpadl_handle;
> - ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
> - if (ret < 0)
> - pr_err("failed to set existing store: %x", ret);
> + kmem = vmap(dxgalloc->pages, npages, VM_MAP, PAGE_KERNEL);
> + if (kmem == NULL) {
> + pr_err("vmap failed");
> + ret = -ENOMEM;
> + goto cleanup;
> + }
> + ret1 = vmbus_establish_gpadl(dxgglobal_get_vmbus(), kmem,
> + alloc_size, &dxgalloc->gpadl);
> + if (ret1) {
> + pr_err("establish_gpadl failed: %d", ret1);
> + ret = -ENOMEM;
> + goto cleanup;
> + }
> + dev_dbg(dxgglobaldev, "New gpadl %d",
> + dxgalloc->gpadl.gpadl_handle);
> +
> + command_vgpu_to_host_init2(&set_store_command->hdr,
> + DXGK_VMBCOMMAND_SETEXISTINGSYSMEMSTORE,
> + device->process->host_handle);
> + set_store_command->device = device->handle;
> + set_store_command->allocation = host_alloc->allocation;
> + set_store_command->gpadl = dxgalloc->gpadl.gpadl_handle;
> + ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr,
> + msg.size);
> + if (ret < 0)
> + pr_err("failed to set existing store: %x", ret);
> + } else {
> + /*
> + * Send the list of the allocation PFNs to the host. The host
> + * will map the pages for GPU access.
> + */
> +
> + ret = init_message(&msg, device->adapter, device->process,
> + sizeof(*set_pages_command) +
> + max_pfns_in_message * sizeof(u64));
> + if (ret)
> + goto cleanup;
> + set_pages_command = (void *)msg.msg;
> + command_vgpu_to_host_init2(&set_pages_command->hdr,
> + DXGK_VMBCOMMAND_SETEXISTINGSYSMEMPAGES,
> + device->process->host_handle);
> + set_pages_command->device = device->handle;
> + set_pages_command->allocation = host_alloc->allocation;
> +
> + page_in = dxgalloc->pages;
> + while (alloc_offset_in_pages < npages) {
> + pfn = (u64 *)((char *)msg.msg +
> + sizeof(*set_pages_command));
> + pages_to_send = min(npages - alloc_offset_in_pages,
> + max_pfns_in_message);
> + set_pages_command->num_pages = pages_to_send;
> + set_pages_command->alloc_offset_in_pages =
> + alloc_offset_in_pages;
> +
> + for (i = 0; i < pages_to_send; i++)
> + *pfn++ = page_to_pfn(*page_in++);
> +
> + ret = dxgvmb_send_sync_msg_ntstatus(msg.channel,
> + msg.hdr,
> + msg.size);
> + if (ret < 0) {
> + pr_err("failed to set existing pages: %x", ret);
> + break;
> + }
> + alloc_offset_in_pages += pages_to_send;
> + }
> + }
>
> cleanup:
> if (kmem)
> @@ -2685,6 +2738,7 @@ int dxgvmb_send_wait_sync_object_cpu(struct dxgprocess *process,
> struct
> d3dkmt_waitforsynchronizationobjectfromcpu
> *args,
> + bool user_address,
> u64 cpu_event)
> {
> int ret = -EINVAL;
> @@ -2708,18 +2762,25 @@ int dxgvmb_send_wait_sync_object_cpu(struct dxgprocess *process,
> command->object_count = args->object_count;
> command->guest_event_pointer = (u64) cpu_event;
> current_pos = (u8 *) &command[1];
> - ret = copy_from_user(current_pos, args->objects, object_size);
> - if (ret) {
> - pr_err("%s failed to copy objects", __func__);
> - ret = -EINVAL;
> - goto cleanup;
> - }
> - current_pos += object_size;
> - ret = copy_from_user(current_pos, args->fence_values, fence_size);
> - if (ret) {
> - pr_err("%s failed to copy fences", __func__);
> - ret = -EINVAL;
> - goto cleanup;
> + if (user_address) {
> + ret = copy_from_user(current_pos, args->objects, object_size);
> + if (ret) {
> + pr_err("%s failed to copy objects", __func__);
> + ret = -EINVAL;
> + goto cleanup;
> + }
> + current_pos += object_size;
> + ret = copy_from_user(current_pos, args->fence_values,
> + fence_size);
> + if (ret) {
> + pr_err("%s failed to copy fences", __func__);
> + ret = -EINVAL;
> + goto cleanup;
> + }
> + } else {
> + memcpy(current_pos, args->objects, object_size);
> + current_pos += object_size;
> + memcpy(current_pos, args->fence_values, fence_size);
> }
>
> ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
> diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
> index a19ac804a320..dc766d95a0c4 100644
> --- a/drivers/hv/dxgkrnl/dxgvmbus.h
> +++ b/drivers/hv/dxgkrnl/dxgvmbus.h
> @@ -139,6 +139,8 @@ enum dxgkvmb_commandtype {
> DXGK_VMBCOMMAND_GETCONTEXTSCHEDULINGPRIORITY = 61,
> DXGK_VMBCOMMAND_QUERYCLOCKCALIBRATION = 62,
> DXGK_VMBCOMMAND_QUERYRESOURCEINFO = 64,
> + DXGK_VMBCOMMAND_LOGEVENT = 65,
> + DXGK_VMBCOMMAND_SETEXISTINGSYSMEMPAGES = 66,
> DXGK_VMBCOMMAND_INVALID
> };
>
> @@ -245,6 +247,16 @@ struct dxgkvmb_command_setexistingsysmemstore {
> u32 gpadl;
> };
>
> +/* Returns ntstatus */
> +struct dxgkvmb_command_setexistingsysmempages {
> + struct dxgkvmb_command_vgpu_to_host hdr;
> + struct d3dkmthandle device;
> + struct d3dkmthandle allocation;
> + u32 num_pages;
> + u32 alloc_offset_in_pages;
> + /* u64 pfn_array[num_pages] */
> +};
> +
> struct dxgkvmb_command_createprocess {
> struct dxgkvmb_command_vm_to_host hdr;
> void *process;
> diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
> index 9770fabf163e..9236b6b95973 100644
> --- a/drivers/hv/dxgkrnl/ioctl.c
> +++ b/drivers/hv/dxgkrnl/ioctl.c
> @@ -19,6 +19,7 @@
>
> #include "dxgkrnl.h"
> #include "dxgvmbus.h"
> +#include "dxgsyncfile.h"
>
> #undef pr_fmt
> #define pr_fmt(fmt) "dxgk:err: " fmt
> @@ -32,11 +33,6 @@ struct ioctl_desc {
> };
> static struct ioctl_desc ioctls[LX_IO_MAX + 1];
>
> -static char *errorstr(int ret)
> -{
> - return ret < 0 ? "err" : "";
> -}
> -
> static int dxgsyncobj_release(struct inode *inode, struct file *file)
> {
> struct dxgsharedsyncobject *syncobj = file->private_data;
> @@ -3561,7 +3557,7 @@ dxgk_wait_sync_object_cpu(struct dxgprocess *process, void *__user inargs)
> }
>
> ret = dxgvmb_send_wait_sync_object_cpu(process, adapter,
> - &args, event_id);
> + &args, true, event_id);
> if (ret < 0)
> goto cleanup;
>
> @@ -5457,4 +5453,6 @@ void init_ioctls(void)
> LX_DXQUERYSTATISTICS);
> SET_IOCTL(/*0x44 */ dxgk_share_object_with_host,
> LX_DXSHAREOBJECTWITHHOST);
> + SET_IOCTL(/*0x45 */ dxgk_create_sync_file,
> + LX_DXCREATESYNCFILE);
> }
> diff --git a/drivers/hv/dxgkrnl/misc.c b/drivers/hv/dxgkrnl/misc.c
> index ffb491641836..1b152c269265 100644
> --- a/drivers/hv/dxgkrnl/misc.c
> +++ b/drivers/hv/dxgkrnl/misc.c
> @@ -35,3 +35,9 @@ u16 *wcsncpy(u16 *dest, const u16 *src, size_t n)
> dest[i - 1] = 0;
> return dest;
> }
> +
> +char *errorstr(int ret)
> +{
> + return ret < 0 ? "err" : "";
> +}
> +
> diff --git a/drivers/hv/dxgkrnl/misc.h b/drivers/hv/dxgkrnl/misc.h
> index 7fe3fc45b67c..3079dd55c7a4 100644
> --- a/drivers/hv/dxgkrnl/misc.h
> +++ b/drivers/hv/dxgkrnl/misc.h
> @@ -44,6 +44,7 @@ extern const struct d3dkmthandle zerohandle;
> */
>
> u16 *wcsncpy(u16 *dest, const u16 *src, size_t n);
> +char *errorstr(int ret);
>
> enum dxglockstate {
> DXGLOCK_SHARED,
> --
> 2.32.0
>

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch