Re: [PATCH v8 05/13] KVM: guest_memfd: Add flag to remove from direct map

From: Nikita Kalyazin

Date: Wed Jan 14 2026 - 08:56:11 EST




On 08/12/2025 08:43, Vlastimil Babka wrote:
On 12/5/25 17:58, Kalyazin, Nikita wrote:
+static int kvm_gmem_folio_zap_direct_map(struct folio *folio)
+{
+ int r = 0;
+ unsigned long addr = (unsigned long) folio_address(folio);
+ u64 gmem_flags = GMEM_I(folio_inode(folio))->flags;
+
+ if (kvm_gmem_folio_no_direct_map(folio) || !(gmem_flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP))
+ goto out;
+
+ r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio),
+ false);
+
+ if (r)
+ goto out;
+
+ folio->private = (void *) KVM_GMEM_FOLIO_NO_DIRECT_MAP;

With Dave's suggestion on patch 1/13 to have folio_zap_direct_map(), setting
this folio->private flag wouldn't be possible between the zap and tlb flush,
but it's not an issue to set it before the zap, right?

I can't see an issue with that. Did it in the v9.


+ flush_tlb_kernel_range(addr, addr + folio_size(folio));
+
+out:
+ return r;
+}
+
+static void kvm_gmem_folio_restore_direct_map(struct folio *folio)
+{
+ /*
+ * Direct map restoration cannot fail, as the only error condition
+ * for direct map manipulation is failure to allocate page tables
+ * when splitting huge pages, but this split would have already
+ * happened in set_direct_map_invalid_noflush() in kvm_gmem_folio_zap_direct_map().
+ * Thus set_direct_map_valid_noflush() here only updates prot bits.
+ */
+ if (kvm_gmem_folio_no_direct_map(folio))
+ set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio),
+ true);

I think you're missing here clearing KVM_GMEM_FOLIO_NO_DIRECT_MAP from
folio->private, which means if there's another
kvm_gmem_folio_zap_direct_map() call on it in the future, it will do nothing?

You're very right, thanks. Fixed in the v9.


+}
+
static inline void kvm_gmem_mark_prepared(struct folio *folio)
{
folio_mark_uptodate(folio);
@@ -398,6 +444,7 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
struct inode *inode = file_inode(vmf->vma->vm_file);
struct folio *folio;
vm_fault_t ret = VM_FAULT_LOCKED;
+ int err;

if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
return VM_FAULT_SIGBUS;
@@ -423,6 +470,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
kvm_gmem_mark_prepared(folio);
}

+ err = kvm_gmem_folio_zap_direct_map(folio);
+ if (err) {
+ ret = vmf_error(err);
+ goto out_folio;
+ }
+
vmf->page = folio_file_page(folio, vmf->pgoff);

out_folio:
@@ -533,6 +586,8 @@ static void kvm_gmem_free_folio(struct folio *folio)
kvm_pfn_t pfn = page_to_pfn(page);
int order = folio_order(folio);

+ kvm_gmem_folio_restore_direct_map(folio);
+
kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
}

@@ -596,6 +651,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
/* Unmovable mappings are supposed to be marked unevictable as well. */
WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));

+ if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)
+ mapping_set_no_direct_map(inode->i_mapping);
+
GMEM_I(inode)->flags = flags;

file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops);
@@ -807,6 +865,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
if (!is_prepared)
r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);

+ kvm_gmem_folio_zap_direct_map(folio);
+
folio_unlock(folio);

if (!r)