Re: [PATCH 1/3] VFIO: take reference to the KVM module
From: Paolo Bonzini
Date: Fri Apr 10 2026 - 10:34:52 EST
On Fri, Apr 10, 2026 at 4:13 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote:
>
> +Dan
> > We could get rid of the reference count completely (get_file() as a
> > replacement for kvm_get_kvm(), get_file_active() as a replacement for
> > kvm_get_kvm_safe()). struct kvm would need to add a back pointer from
> > struct kvm to struct file,
>
> I wasn't thinking of dropping kvm_get_kvm() entirely, rather just not exporting
> it. Forcing internal KVM usage to grab a reference to the file doesn't add a
> whole lot value.
It adds not doing things in two different ways. The kvm_file is not
always available (and if we need to add it, it should be added in
struct kvm not struct kvm_device).
> > therefore adding and removing a reference count would have some additional
> > pointer chasing. KVM has too many kinds of files to seriously consider
> > passing around struct file* in virt/kvm/ and arch/*/kvm/, and you'd also have
> > pointer chasing to get filp->private_data so it wouldn't win much.
>
> Again, I'm not suggesting we do that. The conversion looks pretty straightforward;
> the compile-tested only, but it doesn't seem overly complex.
I don't like losing the type safety - though file_is_kvm() works as a
run-time check, we could keep a struct kvm_file wrapper and
kvm_file_get/kvm_file_get_kvm/kvm_file_put to manage the reference
count.
But yeah, it's better in other ways. The relatively new
get_file_active()/get_file() APIs make the change nicely transparent.
> drivers/s390/crypto/vfio_ap_ops.c would need similar treatment. I didn't try to
> handle it in my sketch because it's not clear to me how we want to deal with that
> thing, as it's accessing a pile of state/fields that really shouldn't be used by
> non-KVM code :-(
It accessing vcpu->run is not too bad, because vfio_ap_ops.c needs to
do what userspace usually does. Except it's not userspace and does not
have access to (say) QEMU's data structure for the memory map, which
is where the memslot hacks lie. We can provide a clean API for that
but I think it's unavoidable. :(
Paolo
> I added Dan because the PCI TSM stuff is picking up "struct kvm *kvm" references,
> and I want to head that off too, i.e. have it use the file approach instead of
> whatever it plans on doing (can't tell from the code, because there are no users).
>
> ---
> arch/x86/include/asm/kvm_page_track.h | 8 +++---
> arch/x86/kvm/mmu/page_track.c | 16 +++++++----
> drivers/vfio/group.c | 2 +-
> drivers/vfio/vfio.h | 12 ++++----
> drivers/vfio/vfio_main.c | 41 ++++-----------------------
> include/linux/kvm_host.h | 2 ++
> include/linux/vfio.h | 5 ++--
> virt/kvm/kvm_main.c | 14 +++++++--
> virt/kvm/vfio.c | 6 ++--
> 9 files changed, 46 insertions(+), 60 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
> index 3d040741044b..046a25c8fe4f 100644
> --- a/arch/x86/include/asm/kvm_page_track.h
> +++ b/arch/x86/include/asm/kvm_page_track.h
> @@ -44,13 +44,13 @@ struct kvm_page_track_notifier_node {
> struct kvm_page_track_notifier_node *node);
> };
>
> -int kvm_page_track_register_notifier(struct kvm *kvm,
> +int kvm_page_track_register_notifier(struct file *file,
> struct kvm_page_track_notifier_node *n);
> -void kvm_page_track_unregister_notifier(struct kvm *kvm,
> +void kvm_page_track_unregister_notifier(struct file *file,
> struct kvm_page_track_notifier_node *n);
>
> -int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
> -int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
> +int kvm_write_track_add_gfn(struct file *file, gfn_t gfn);
> +int kvm_write_track_remove_gfn(struct file *file, gfn_t gfn);
> #else
> /*
> * Allow defining a node in a structure even if page tracking is disabled, e.g.
> diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
> index 1b17b12393a8..f070dacc4028 100644
> --- a/arch/x86/kvm/mmu/page_track.c
> +++ b/arch/x86/kvm/mmu/page_track.c
> @@ -217,10 +217,11 @@ static int kvm_enable_external_write_tracking(struct kvm *kvm)
> * register the notifier so that event interception for the tracked guest
> * pages can be received.
> */
> -int kvm_page_track_register_notifier(struct kvm *kvm,
> +int kvm_page_track_register_notifier(struct file *file,
> struct kvm_page_track_notifier_node *n)
> {
> struct kvm_page_track_notifier_head *head;
> + struct kvm *kvm = file_to_kvm(file);
> int r;
>
> if (!kvm || kvm->mm != current->mm)
> @@ -232,7 +233,7 @@ int kvm_page_track_register_notifier(struct kvm *kvm,
> return r;
> }
>
> - kvm_get_kvm(kvm);
> + get_file(file);
>
> head = &kvm->arch.track_notifier_head;
>
> @@ -247,10 +248,11 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
> * stop receiving the event interception. It is the opposed operation of
> * kvm_page_track_register_notifier().
> */
> -void kvm_page_track_unregister_notifier(struct kvm *kvm,
> +void kvm_page_track_unregister_notifier(struct file *file,
> struct kvm_page_track_notifier_node *n)
> {
> struct kvm_page_track_notifier_head *head;
> + struct kvm *kvm = file_to_kvm(file);
>
> head = &kvm->arch.track_notifier_head;
>
> @@ -259,7 +261,7 @@ void kvm_page_track_unregister_notifier(struct kvm *kvm,
> write_unlock(&kvm->mmu_lock);
> synchronize_srcu(&head->track_srcu);
>
> - kvm_put_kvm(kvm);
> + fput(file);
> }
> EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
>
> @@ -319,8 +321,9 @@ void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
> * @kvm: the guest instance we are interested in.
> * @gfn: the guest page.
> */
> -int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
> +int kvm_write_track_add_gfn(struct file *file, gfn_t gfn)
> {
> + struct kvm *kvm = file_to_kvm(file);
> struct kvm_memory_slot *slot;
> int idx;
>
> @@ -349,8 +352,9 @@ EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
> * @kvm: the guest instance we are interested in.
> * @gfn: the guest page.
> */
> -int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
> +int kvm_write_track_remove_gfn(struct file *file, gfn_t gfn)
> {
> + struct kvm *kvm = file_to_kvm(file);
> struct kvm_memory_slot *slot;
> int idx;
>
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index 4f15016d2a5f..f4a81d1b63fa 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -858,7 +858,7 @@ bool vfio_group_enforced_coherent(struct vfio_group *group)
> return ret;
> }
>
> -void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
> +void vfio_group_set_kvm(struct vfio_group *group, struct file *kvm)
> {
> spin_lock(&group->kvm_ref_lock);
> group->kvm = kvm;
> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
> index 50128da18bca..aa93c1ec6b10 100644
> --- a/drivers/vfio/vfio.h
> +++ b/drivers/vfio/vfio.h
> @@ -23,7 +23,7 @@ struct vfio_device_file {
> u8 access_granted;
> u32 devid; /* only valid when iommufd is valid */
> spinlock_t kvm_ref_lock; /* protect kvm field */
> - struct kvm *kvm;
> + struct file *kvm;
> struct iommufd_ctx *iommufd; /* protected by struct vfio_device_set::lock */
> };
>
> @@ -88,7 +88,7 @@ struct vfio_group {
> #endif
> enum vfio_group_type type;
> struct mutex group_lock;
> - struct kvm *kvm;
> + struct file *kvm;
> struct file *opened_file;
> struct blocking_notifier_head notifier;
> struct iommufd_ctx *iommufd;
> @@ -108,7 +108,7 @@ void vfio_device_group_unuse_iommu(struct vfio_device *device);
> void vfio_df_group_close(struct vfio_device_file *df);
> struct vfio_group *vfio_group_from_file(struct file *file);
> bool vfio_group_enforced_coherent(struct vfio_group *group);
> -void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm);
> +void vfio_group_set_kvm(struct vfio_group *group, struct file *kvm);
> bool vfio_device_has_container(struct vfio_device *device);
> int __init vfio_group_init(void);
> void vfio_group_cleanup(void);
> @@ -171,7 +171,7 @@ static inline bool vfio_group_enforced_coherent(struct vfio_group *group)
> return true;
> }
>
> -static inline void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
> +static inline void vfio_group_set_kvm(struct vfio_group *group, struct file *kvm)
> {
> }
>
> @@ -435,11 +435,11 @@ static inline void vfio_virqfd_exit(void)
> #endif
>
> #if IS_ENABLED(CONFIG_KVM)
> -void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm);
> +void vfio_device_get_kvm_safe(struct vfio_device *device, struct file *kvm);
> void vfio_device_put_kvm(struct vfio_device *device);
> #else
> static inline void vfio_device_get_kvm_safe(struct vfio_device *device,
> - struct kvm *kvm)
> + struct file *kvm)
> {
> }
>
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 742477546b15..7af53d1288f8 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -433,35 +433,13 @@ void vfio_unregister_group_dev(struct vfio_device *device)
> EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
>
> #if IS_ENABLED(CONFIG_KVM)
> -void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
> +void vfio_device_get_kvm_safe(struct vfio_device *device, struct file *kvm)
> {
> - void (*pfn)(struct kvm *kvm);
> - bool (*fn)(struct kvm *kvm);
> - bool ret;
> -
> lockdep_assert_held(&device->dev_set->lock);
>
> - if (!kvm)
> + if (!get_file_active(&kvm))
> return;
>
> - pfn = symbol_get(kvm_put_kvm);
> - if (WARN_ON(!pfn))
> - return;
> -
> - fn = symbol_get(kvm_get_kvm_safe);
> - if (WARN_ON(!fn)) {
> - symbol_put(kvm_put_kvm);
> - return;
> - }
> -
> - ret = fn(kvm);
> - symbol_put(kvm_get_kvm_safe);
> - if (!ret) {
> - symbol_put(kvm_put_kvm);
> - return;
> - }
> -
> - device->put_kvm = pfn;
> device->kvm = kvm;
> }
>
> @@ -472,14 +450,7 @@ void vfio_device_put_kvm(struct vfio_device *device)
> if (!device->kvm)
> return;
>
> - if (WARN_ON(!device->put_kvm))
> - goto clear;
> -
> - device->put_kvm(device->kvm);
> - device->put_kvm = NULL;
> - symbol_put(kvm_put_kvm);
> -
> -clear:
> + fput(device->kvm);
> device->kvm = NULL;
> }
> #endif
> @@ -1483,7 +1454,7 @@ bool vfio_file_enforced_coherent(struct file *file)
> }
> EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
>
> -static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm)
> +static void vfio_device_file_set_kvm(struct file *file, struct file *kvm)
> {
> struct vfio_device_file *df = file->private_data;
>
> @@ -1500,12 +1471,12 @@ static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm)
> /**
> * vfio_file_set_kvm - Link a kvm with VFIO drivers
> * @file: VFIO group file or VFIO device file
> - * @kvm: KVM to link
> + * @kvm: KVM file to link
> *
> * When a VFIO device is first opened the KVM will be available in
> * device->kvm if one was associated with the file.
> */
> -void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
> +void vfio_file_set_kvm(struct file *file, struct file *kvm)
> {
> struct vfio_group *group;
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6b76e7a6f4c2..7ea398d33e9f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1066,6 +1066,7 @@ void kvm_get_kvm(struct kvm *kvm);
> bool kvm_get_kvm_safe(struct kvm *kvm);
> void kvm_put_kvm(struct kvm *kvm);
> bool file_is_kvm(struct file *file);
> +struct kvm *file_to_kvm(struct file *file);
> void kvm_put_kvm_no_destroy(struct kvm *kvm);
>
> static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
> @@ -2310,6 +2311,7 @@ extern unsigned int halt_poll_ns_shrink;
>
> struct kvm_device {
> const struct kvm_device_ops *ops;
> + struct file *kvm_file;
> struct kvm *kvm;
> void *private;
> struct list_head vm_node;
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index e90859956514..6dd5dd2550a0 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -52,7 +52,7 @@ struct vfio_device {
> struct vfio_device_set *dev_set;
> struct list_head dev_set_list;
> unsigned int migration_flags;
> - struct kvm *kvm;
> + struct file *kvm;
>
> /* Members below here are private, not for driver use */
> unsigned int index;
> @@ -64,7 +64,6 @@ struct vfio_device {
> unsigned int open_count;
> struct completion comp;
> struct iommufd_access *iommufd_access;
> - void (*put_kvm)(struct kvm *kvm);
> struct inode *inode;
> #if IS_ENABLED(CONFIG_IOMMUFD)
> struct iommufd_device *iommufd_device;
> @@ -339,7 +338,7 @@ static inline bool vfio_file_has_dev(struct file *file, struct vfio_device *devi
> #endif
> bool vfio_file_is_valid(struct file *file);
> bool vfio_file_enforced_coherent(struct file *file);
> -void vfio_file_set_kvm(struct file *file, struct kvm *kvm);
> +void vfio_file_set_kvm(struct file *file, struct file *kvm);
>
> #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long))
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9093251beb39..b240a3cc5995 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4808,7 +4808,7 @@ void kvm_unregister_device_ops(u32 type)
> kvm_device_ops_table[type] = NULL;
> }
>
> -static int kvm_ioctl_create_device(struct kvm *kvm,
> +static int kvm_ioctl_create_device(struct file *file, struct kvm *kvm,
> struct kvm_create_device *cd)
> {
> const struct kvm_device_ops *ops;
> @@ -4834,6 +4834,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
>
> dev->ops = ops;
> dev->kvm = kvm;
> + dev->kvm_file = file;
>
> mutex_lock(&kvm->lock);
> ret = ops->create(dev, type);
> @@ -5351,7 +5352,7 @@ static long kvm_vm_ioctl(struct file *filp,
> if (copy_from_user(&cd, argp, sizeof(cd)))
> goto out;
>
> - r = kvm_ioctl_create_device(kvm, &cd);
> + r = kvm_ioctl_create_device(filp, kvm, &cd);
> if (r)
> goto out;
>
> @@ -5483,6 +5484,15 @@ bool file_is_kvm(struct file *file)
> }
> EXPORT_SYMBOL_FOR_KVM_INTERNAL(file_is_kvm);
>
> +struct kvm *file_to_kvm(struct file *file)
> +{
> + if (!file_is_kvm(file))
> + return NULL;
> +
> + return file->private_data;
> +}
> +EXPORT_SYMBOL_FOR_KVM_INTERNAL(file_to_kvm);
> +
> static int kvm_dev_ioctl_create_vm(unsigned long type)
> {
> char fdname[ITOA_MAX_LEN + 1];
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index 9f9acb66cc1e..f6681dacf7b9 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -35,9 +35,9 @@ struct kvm_vfio {
> bool noncoherent;
> };
>
> -static void kvm_vfio_file_set_kvm(struct file *file, struct kvm *kvm)
> +static void kvm_vfio_file_set_kvm(struct file *file, struct file *kvm)
> {
> - void (*fn)(struct file *file, struct kvm *kvm);
> + void (*fn)(struct file *file, struct file *kvm);
>
> fn = symbol_get(vfio_file_set_kvm);
> if (!fn)
> @@ -175,7 +175,7 @@ static int kvm_vfio_file_add(struct kvm_device *dev, unsigned int fd)
> kvf->file = get_file(filp);
> list_add_tail(&kvf->node, &kv->file_list);
>
> - kvm_vfio_file_set_kvm(kvf->file, dev->kvm);
> + kvm_vfio_file_set_kvm(kvf->file, dev->kvm_file);
> kvm_vfio_update_coherency(dev);
>
> out_unlock:
>
> base-commit: df83746075778958954aa0460cca55f4b3fc9c02
> --
>