filemap_add_folio(), called from filemap_grab_folio(), adds the folio
onto some LRU list, which is not necessary for guest_memfd since
guest_memfd folios don't participate in any swapping.
This patch reimplements part of filemap_add_folio() to avoid adding
allocated guest_memfd folios to the filemap.
[...]
With shared to private conversions dependent on refcounts, avoiding
usage of LRU ensures that LRU lists no longer take any refcounts on
guest_memfd folios and significantly reduces the chance of elevated
refcounts during conversion.
Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx>
Change-Id: Ia2540d9fc132d46219e6e714fd42bc82a62a27fa
---
mm/filemap.c | 1 +
mm/memcontrol.c | 2 +
virt/kvm/guest_memfd.c | 91 ++++++++++++++++++++++++++++++++++++++----
3 files changed, 86 insertions(+), 8 deletions(-)
/*
* Returns a locked folio on success. The caller is responsible for
* setting the up-to-date flag before the memory is mapped into the guest.
@@ -477,8 +509,46 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
*/
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
{
+ struct folio *folio;
+ gfp_t gfp;
+ int ret;
+
+repeat:
+ folio = filemap_lock_folio(inode->i_mapping, index);
+ if (!IS_ERR(folio))
+ return folio;
+
+ gfp = mapping_gfp_mask(inode->i_mapping);
+
/* TODO: Support huge pages. */
- return filemap_grab_folio(inode->i_mapping, index);
+ folio = filemap_alloc_folio(gfp, 0);
+ if (!folio)
+ return ERR_PTR(-ENOMEM);
+
+ ret = mem_cgroup_charge(folio, NULL, gfp);
+ if (ret) {
+ folio_put(folio);
+ return ERR_PTR(ret);
+ }
+
+ ret = kvm_gmem_filemap_add_folio(inode->i_mapping, folio, index);
+ if (ret) {
+ folio_put(folio);
+
+ /*
+ * There was a race, two threads tried to get a folio indexing
+ * to the same location in the filemap. The losing thread should
+ * free the allocated folio, then lock the folio added to the
+ * filemap by the winning thread.
+ */
+ if (ret == -EEXIST)
+ goto repeat;
+
+ return ERR_PTR(ret);
+ }
+
+ __folio_set_locked(folio);
+ return folio;
}
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
@@ -956,23 +1026,28 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
}
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+static void kvm_gmem_invalidate(struct folio *folio)
+{
+ kvm_pfn_t pfn = folio_pfn(folio);
+
+ kvm_arch_gmem_invalidate(pfn, pfn + folio_nr_pages(folio));
+}
+#else
+static inline void kvm_gmem_invalidate(struct folio *folio) {}
+#endif[...]
+