Re: [PATCH v6 11/43] KVM: guest_memfd: Ensure pages are not in use before conversion
From: Fuad Tabba
Date: Wed May 20 2026 - 10:39:48 EST
On Thu, 7 May 2026 at 21:22, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@xxxxxxxxxx> wrote:
>
> From: Ackerley Tng <ackerleytng@xxxxxxxxxx>
>
> When converting memory to private in guest_memfd, it is necessary to ensure
> that the pages are not currently being accessed by any other part of the
> kernel or userspace to avoid any current user writing to guest private
> memory.
>
> guest_memfd checks for unexpected refcounts to determine whether a page is
> still in use. The only expected refcounts after unmapping the range
> requested for conversion are those that are held by guest_memfd itself.
>
> Update the kvm_memory_attributes2 structure to include an error_offset
> field. This allows KVM to report the exact offset where a conversion
> failed to userspace. If the safety check fails, return -EAGAIN and copy
> the error_offset back to userspace so that it can potentially retry the
> operation or handle the failure gracefully.
>
> Suggested-by: David Hildenbrand <david@xxxxxxxxxx>
> Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx>
> Co-developed-by: Vishal Annapurve <vannapurve@xxxxxxxxxx>
> Signed-off-by: Vishal Annapurve <vannapurve@xxxxxxxxxx>
Reviewed-by: Fuad Tabba <tabba@xxxxxxxxxx>
Cheers,
/fuad
> ---
> include/uapi/linux/kvm.h | 3 ++-
> virt/kvm/guest_memfd.c | 65 ++++++++++++++++++++++++++++++++++++++++++++----
> 2 files changed, 62 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index e6bbf68a83813..0b55258573d3d 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1658,7 +1658,8 @@ struct kvm_memory_attributes2 {
> __u64 size;
> __u64 attributes;
> __u64 flags;
> - __u64 reserved[12];
> + __u64 error_offset;
> + __u64 reserved[11];
> };
>
> #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 91e89b188f583..9d82642a025e9 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -572,9 +572,42 @@ static int kvm_gmem_mas_preallocate(struct ma_state *mas, u64 attributes,
> return mas_preallocate(mas, xa_mk_value(attributes), GFP_KERNEL);
> }
>
> +static bool kvm_gmem_is_safe_for_conversion(struct inode *inode, pgoff_t start,
> + size_t nr_pages, pgoff_t *err_index)
> +{
> + struct address_space *mapping = inode->i_mapping;
> + const int filemap_get_folios_refcount = 1;
> + pgoff_t last = start + nr_pages - 1;
> + struct folio_batch fbatch;
> + bool safe = true;
> + int i;
> +
> + folio_batch_init(&fbatch);
> + while (safe && filemap_get_folios(mapping, &start, last, &fbatch)) {
> +
> + for (i = 0; i < folio_batch_count(&fbatch); ++i) {
> + struct folio *folio = fbatch.folios[i];
> +
> + if (folio_ref_count(folio) !=
> + folio_nr_pages(folio) + filemap_get_folios_refcount) {
> + safe = false;
> + *err_index = folio->index;
> + break;
> + }
> + }
> +
> + folio_batch_release(&fbatch);
> + cond_resched();
> + }
> +
> + return safe;
> +}
> +
> static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
> - size_t nr_pages, uint64_t attrs)
> + size_t nr_pages, uint64_t attrs,
> + pgoff_t *err_index)
> {
> + bool to_private = attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> struct address_space *mapping = inode->i_mapping;
> struct gmem_inode *gi = GMEM_I(inode);
> pgoff_t end = start + nr_pages;
> @@ -588,8 +621,21 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
>
> mas_init(&mas, mt, start);
> r = kvm_gmem_mas_preallocate(&mas, attrs, start, nr_pages);
> - if (r)
> + if (r) {
> + *err_index = start;
> goto out;
> + }
> +
> + if (to_private) {
> + unmap_mapping_pages(mapping, start, nr_pages, false);
> +
> + if (!kvm_gmem_is_safe_for_conversion(inode, start, nr_pages,
> + err_index)) {
> + mas_destroy(&mas);
> + r = -EAGAIN;
> + goto out;
> + }
> + }
>
> /*
> * From this point on guest_memfd has performed necessary
> @@ -609,9 +655,10 @@ static long kvm_gmem_set_attributes(struct file *file, void __user *argp)
> struct gmem_file *f = file->private_data;
> struct inode *inode = file_inode(file);
> struct kvm_memory_attributes2 attrs;
> + pgoff_t err_index;
> size_t nr_pages;
> pgoff_t index;
> - int i;
> + int i, r;
>
> if (copy_from_user(&attrs, argp, sizeof(attrs)))
> return -EFAULT;
> @@ -635,8 +682,16 @@ static long kvm_gmem_set_attributes(struct file *file, void __user *argp)
>
> nr_pages = attrs.size >> PAGE_SHIFT;
> index = attrs.offset >> PAGE_SHIFT;
> - return __kvm_gmem_set_attributes(inode, index, nr_pages,
> - attrs.attributes);
> + r = __kvm_gmem_set_attributes(inode, index, nr_pages, attrs.attributes,
> + &err_index);
> + if (r) {
> + attrs.error_offset = ((uint64_t)err_index) << PAGE_SHIFT;
> +
> + if (copy_to_user(argp, &attrs, sizeof(attrs)))
> + return -EFAULT;
> + }
> +
> + return r;
> }
>
> static long kvm_gmem_ioctl(struct file *file, unsigned int ioctl,
>
> --
> 2.54.0.563.g4f69b47b94-goog
>
>