[PATCH 02/12] pci/p2pdma: Don't initialise page refcount to one

From: Alistair Popple
Date: Tue Sep 10 2024 - 00:16:06 EST


The reference counts for ZONE_DEVICE private pages should be
initialised by the driver when the page is actually allocated by the
driver allocator, not when they are first created. This is currently
the case for MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_COHERENT pages
but not MEMORY_DEVICE_PCI_P2PDMA pages so fix that up.

Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx>
---
drivers/pci/p2pdma.c | 6 ++++++
mm/memremap.c | 17 +++++++++++++----
mm/mm_init.c | 22 ++++++++++++++++++----
3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 4f47a13..210b9f4 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -129,6 +129,12 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
}

/*
+ * Initialise the refcount for the freshly allocated page. As we have
+ * just allocated the page no one else should be using it.
+ */
+ set_page_count(virt_to_page(kaddr), 1);
+
+ /*
* vm_insert_page() can sleep, so a reference is taken to mapping
* such that rcu_read_unlock() can be done before inserting the
* pages
diff --git a/mm/memremap.c b/mm/memremap.c
index 40d4547..07bbe0e 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -488,15 +488,24 @@ void free_zone_device_folio(struct folio *folio)
folio->mapping = NULL;
folio->page.pgmap->ops->page_free(folio_page(folio, 0));

- if (folio->page.pgmap->type != MEMORY_DEVICE_PRIVATE &&
- folio->page.pgmap->type != MEMORY_DEVICE_COHERENT)
+ switch (folio->page.pgmap->type) {
+ case MEMORY_DEVICE_PRIVATE:
+ case MEMORY_DEVICE_COHERENT:
+ put_dev_pagemap(folio->page.pgmap);
+ break;
+
+ case MEMORY_DEVICE_FS_DAX:
+ case MEMORY_DEVICE_GENERIC:
/*
* Reset the refcount to 1 to prepare for handing out the page
* again.
*/
folio_set_count(folio, 1);
- else
- put_dev_pagemap(folio->page.pgmap);
+ break;
+
+ case MEMORY_DEVICE_PCI_P2PDMA:
+ break;
+ }
}

void zone_device_page_init(struct page *page)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 4ba5607..0489820 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1015,12 +1015,26 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
}

/*
- * ZONE_DEVICE pages are released directly to the driver page allocator
- * which will set the page count to 1 when allocating the page.
+ * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC and
+ * MEMORY_TYPE_FS_DAX pages are released directly to the driver page
+ * allocator which will set the page count to 1 when allocating the
+ * page.
+ *
+ * MEMORY_TYPE_GENERIC and MEMORY_TYPE_FS_DAX pages automatically have
+ * their refcount reset to one whenever they are freed (ie. after
+ * their refcount drops to 0).
*/
- if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
- pgmap->type == MEMORY_DEVICE_COHERENT)
+ switch (pgmap->type) {
+ case MEMORY_DEVICE_PRIVATE:
+ case MEMORY_DEVICE_COHERENT:
+ case MEMORY_DEVICE_PCI_P2PDMA:
set_page_count(page, 0);
+ break;
+
+ case MEMORY_DEVICE_FS_DAX:
+ case MEMORY_DEVICE_GENERIC:
+ break;
+ }
}

/*
--
git-series 0.9.1