[PATCH RFC v2 15/15] vfio/nvgrace-egm: register EGM PFNMAP range with memory_failure

From: ankita

Date: Mon Feb 23 2026 - 10:56:32 EST


From: Ankit Agrawal <ankita@xxxxxxxxxx>

EGM carveout memory is mapped directly into userspace (QEMU) and is not
added to the kernel. It is not managed by the kernel page allocator and
has no struct pages. The module can thus utilize the Linux memory manager's
memory_failure mechanism for regions with no struct pages. The Linux MM
code exposes register/unregister APIs allowing modules to register such
memory regions for memory_failure handling.

Register the EGM PFN range with the MM memory_failure infrastructure on
open, and unregister it on the last close. Provide a PFN-to-VMA offset
callback that validates the PFN is within the EGM region and the VMA,
then converts it to a file offset and records the poisoned offset in the
existing hashtable for reporting to userspace.

Signed-off-by: Ankit Agrawal <ankita@xxxxxxxxxx>
---
drivers/vfio/pci/nvgrace-gpu/egm.c | 100 +++++++++++++++++++++++++++++
1 file changed, 100 insertions(+)

diff --git a/drivers/vfio/pci/nvgrace-gpu/egm.c b/drivers/vfio/pci/nvgrace-gpu/egm.c
index 2e4024c25e8a..5b60db6294a8 100644
--- a/drivers/vfio/pci/nvgrace-gpu/egm.c
+++ b/drivers/vfio/pci/nvgrace-gpu/egm.c
@@ -6,6 +6,7 @@
#include <linux/vfio_pci_core.h>
#include <linux/nvgrace-egm.h>
#include <linux/egm.h>
+#include <linux/memory-failure.h>

#define MAX_EGM_NODES 4

@@ -23,6 +24,7 @@ struct chardev {
struct cdev cdev;
atomic_t open_count;
DECLARE_HASHTABLE(htbl, 0x10);
+ struct pfn_address_space pfn_address_space;
};

static struct nvgrace_egm_dev *
@@ -34,6 +36,94 @@ egm_chardev_to_nvgrace_egm_dev(struct chardev *egm_chardev)
return container_of(aux_dev, struct nvgrace_egm_dev, aux_dev);
}

+static int pfn_memregion_offset(struct chardev *egm_chardev,
+ unsigned long pfn,
+ pgoff_t *pfn_offset_in_region)
+{
+ unsigned long start_pfn, num_pages;
+ struct nvgrace_egm_dev *egm_dev =
+ egm_chardev_to_nvgrace_egm_dev(egm_chardev);
+
+ start_pfn = PHYS_PFN(egm_dev->egmphys);
+ num_pages = egm_dev->egmlength >> PAGE_SHIFT;
+
+ if (pfn < start_pfn || pfn >= start_pfn + num_pages)
+ return -EFAULT;
+
+ *pfn_offset_in_region = pfn - start_pfn;
+
+ return 0;
+}
+
+static int track_ecc_offset(struct chardev *egm_chardev,
+ unsigned long mem_offset)
+{
+ struct h_node *cur_page, *ecc_page;
+
+ hash_for_each_possible(egm_chardev->htbl, cur_page, node, mem_offset) {
+ if (cur_page->mem_offset == mem_offset)
+ return 0;
+ }
+
+ ecc_page = kzalloc(sizeof(*ecc_page), GFP_NOFS);
+ if (!ecc_page)
+ return -ENOMEM;
+
+ ecc_page->mem_offset = mem_offset;
+
+ hash_add(egm_chardev->htbl, &ecc_page->node, ecc_page->mem_offset);
+
+ return 0;
+}
+
+static int nvgrace_egm_pfn_to_vma_pgoff(struct vm_area_struct *vma,
+ unsigned long pfn,
+ pgoff_t *pgoff)
+{
+ struct chardev *egm_chardev = vma->vm_file->private_data;
+ pgoff_t vma_offset_in_region = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ pgoff_t pfn_offset_in_region;
+ int ret;
+
+ ret = pfn_memregion_offset(egm_chardev, pfn, &pfn_offset_in_region);
+ if (ret)
+ return ret;
+
+ /* Ensure PFN is not before VMA's start within the region */
+ if (pfn_offset_in_region < vma_offset_in_region)
+ return -EFAULT;
+
+ /* Calculate offset from VMA start */
+ *pgoff = vma->vm_pgoff +
+ (pfn_offset_in_region - vma_offset_in_region);
+
+ /* Track and save the poisoned offset */
+ return track_ecc_offset(egm_chardev, *pgoff << PAGE_SHIFT);
+}
+
+static int
+nvgrace_egm_vfio_pci_register_pfn_range(struct inode *inode,
+ struct chardev *egm_chardev)
+{
+ struct nvgrace_egm_dev *egm_dev =
+ egm_chardev_to_nvgrace_egm_dev(egm_chardev);
+ unsigned long pfn, nr_pages;
+ int ret;
+
+ pfn = PHYS_PFN(egm_dev->egmphys);
+ nr_pages = egm_dev->egmlength >> PAGE_SHIFT;
+
+ egm_chardev->pfn_address_space.node.start = pfn;
+ egm_chardev->pfn_address_space.node.last = pfn + nr_pages - 1;
+ egm_chardev->pfn_address_space.mapping = inode->i_mapping;
+ egm_chardev->pfn_address_space.pfn_to_vma_pgoff = nvgrace_egm_pfn_to_vma_pgoff;
+
+ ret = register_pfn_address_space(&egm_chardev->pfn_address_space);
+
+ return ret;
+}
+
static int nvgrace_egm_open(struct inode *inode, struct file *file)
{
struct chardev *egm_chardev =
@@ -41,6 +131,7 @@ static int nvgrace_egm_open(struct inode *inode, struct file *file)
struct nvgrace_egm_dev *egm_dev =
egm_chardev_to_nvgrace_egm_dev(egm_chardev);
void *memaddr;
+ int ret;

if (atomic_cmpxchg(&egm_chardev->open_count, 0, 1) != 0)
return -EBUSY;
@@ -77,6 +168,13 @@ static int nvgrace_egm_open(struct inode *inode, struct file *file)

file->private_data = egm_chardev;

+ ret = nvgrace_egm_vfio_pci_register_pfn_range(inode, egm_chardev);
+ if (ret && ret != -EOPNOTSUPP) {
+ file->private_data = NULL;
+ atomic_dec(&egm_chardev->open_count);
+ return ret;
+ }
+
return 0;
}

@@ -85,6 +183,8 @@ static int nvgrace_egm_release(struct inode *inode, struct file *file)
struct chardev *egm_chardev =
container_of(inode->i_cdev, struct chardev, cdev);

+ unregister_pfn_address_space(&egm_chardev->pfn_address_space);
+
file->private_data = NULL;

atomic_dec(&egm_chardev->open_count);
--
2.34.1