[PATCH v2 04/20] mm: introduce __get_dev_pagemap()

From: Dan Williams
Date: Fri Oct 09 2015 - 21:02:11 EST


There are several scenarios where we need to retrieve and update
metadata associated with a given devm_memremap_pages() mapping, and the
only lookup key available is a pfn in the range:

1/ We want to augment vmemmap_populate() (called via arch_add_memory())
to allocate memmap storage from pre-allocated pages reserved by the
device driver. At vmemmap_alloc_block_buf() time it grabs device pages
rather than page allocator pages. This is in support of
devm_memremap_pages() mappings where the memmap is too large to fit in
main memory (i.e. large persistent memory devices).

2/ Taking a reference against the mapping when inserting device pages
into the address_space radix of a given inode. This facilitates
unmap_mapping_range() and truncate_inode_pages() operations when the
driver is tearing down the mapping.

3/ get_user_pages() operations on ZONE_DEVICE memory require taking a
reference against the mapping so that the driver teardown path can
revoke and drain usage of device pages.

Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Dave Chinner <david@xxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
include/linux/mm.h | 18 ++++++++++++++++++
kernel/memremap.c | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 58 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80001de019ba..30c3c8764649 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,6 +717,24 @@ static inline enum zone_type page_zonenum(const struct page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}

+/**
+ * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @dev: host device of the mapping for debug
+ */
+struct dev_pagemap {
+ /* TODO: vmem_altmap and percpu_ref count */
+ struct device *dev;
+};
+
+#ifdef CONFIG_ZONE_DEVICE
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys);
+#else
+static inline struct dev_pagemap *get_dev_pagemap(resource_size_t phys)
+{
+ return NULL;
+}
+#endif
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 3218e8b1fc28..64bfd9fa93aa 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -10,6 +10,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
+#include <linux/rculist.h>
#include <linux/device.h>
#include <linux/types.h>
#include <linux/io.h>
@@ -138,18 +139,52 @@ void devm_memunmap(struct device *dev, void *addr)
EXPORT_SYMBOL(devm_memunmap);

#ifdef CONFIG_ZONE_DEVICE
+static LIST_HEAD(ranges);
+static DEFINE_SPINLOCK(range_lock);
+
struct page_map {
struct resource res;
+ struct dev_pagemap pgmap;
+ struct list_head list;
};

+static void add_page_map(struct page_map *page_map)
+{
+ spin_lock(&range_lock);
+ list_add_rcu(&page_map->list, &ranges);
+ spin_unlock(&range_lock);
+}
+
+static void del_page_map(struct page_map *page_map)
+{
+ spin_lock(&range_lock);
+ list_del_rcu(&page_map->list);
+ spin_unlock(&range_lock);
+}
+
static void devm_memremap_pages_release(struct device *dev, void *res)
{
struct page_map *page_map = res;

+ del_page_map(page_map);
+
/* pages are dead and unused, undo the arch mapping */
arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
}

+/* assumes rcu_read_lock() held at entry */
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys)
+{
+ struct page_map *page_map;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ list_for_each_entry_rcu(page_map, &ranges, list)
+ if (phys >= page_map->res.start && phys <= page_map->res.end)
+ return &page_map->pgmap;
+ return NULL;
+}
+
void *devm_memremap_pages(struct device *dev, struct resource *res)
{
int is_ram = region_intersects(res->start, resource_size(res),
@@ -173,12 +208,17 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)

memcpy(&page_map->res, res, sizeof(*res));

+ page_map->pgmap.dev = dev;
+ INIT_LIST_HEAD(&page_map->list);
+ add_page_map(page_map);
+
nid = dev_to_node(dev);
if (nid < 0)
nid = numa_mem_id();

error = arch_add_memory(nid, res->start, resource_size(res), true);
if (error) {
+ del_page_map(page_map);
devres_free(page_map);
return ERR_PTR(error);
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/