[PATCH 5/6] mm, memory_hotplug: Provide argument for the pgprot_t in arch_add_memory()

From: Logan Gunthorpe
Date: Mon Dec 09 2019 - 14:14:12 EST


devm_memremap_pages() is currently used by the PCI P2PDMA code to create
struct page mappings for IO memory. At present, these mappings are created
with PAGE_KERNEL which implies setting the PAT bits to be WB. However, on
x86, an mtrr register will typically override this and force the cache
type to be UC-. In the case firmware doesn't set this register it is
effectively WB and will typically result in a machine check exception
when it's accessed.

Other arches are not currently likely to function correctly seeing they
don't have any MTRR registers to fall back on.

To solve this, add an argument to arch_add_memory() to explicitly
set the pgprot value to a specific value.

Of the arches that support MEMORY_HOTPLUG: x86_64, s390 and arm64 is a
simple change to pass the pgprot_t down to their respective functions
which set up the page tables. For x86_32, set the page tables explicitly
using _set_memory_prot() (seeing they are already mapped). For sh, reject
anything but PAGE_KERNEL settings -- this should be fine, for now, seeing
sh doesn't support ZONE_DEVICE anyway.

Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Signed-off-by: Logan Gunthorpe <logang@xxxxxxxxxxxx>
---
arch/arm64/mm/mmu.c | 4 ++--
arch/ia64/mm/init.c | 5 ++++-
arch/powerpc/mm/mem.c | 4 ++--
arch/s390/mm/init.c | 4 ++--
arch/sh/mm/init.c | 5 ++++-
arch/x86/mm/init_32.c | 7 ++++++-
arch/x86/mm/init_64.c | 4 ++--
include/linux/memory_hotplug.h | 2 +-
mm/memory_hotplug.c | 2 +-
mm/memremap.c | 2 +-
10 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 60c929f3683b..48b65272df15 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1050,7 +1050,7 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
}

#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size,
+int arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions)
{
int flags = 0;
@@ -1059,7 +1059,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;

__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
- size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
+ size, prot, __pgd_pgtable_alloc, flags);

return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
restrictions);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index bf9df2625bc8..15a1efcecd83 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -669,13 +669,16 @@ mem_init (void)
}

#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size,
+int arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;

+ if (prot != PAGE_KERNEL)
+ return -EINVAL;
+
ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
if (ret)
printk("%s: Problem encountered in __add_pages() as ret=%d\n",
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22525d8935ce..a901c2b65801 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -105,7 +105,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}

-int __ref arch_add_memory(int nid, u64 start, u64 size,
+int __ref arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -115,7 +115,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
resize_hpt_for_hotplug(memblock_phys_mem_size());

start = (unsigned long)__va(start);
- rc = create_section_mapping(start, start + size, nid, PAGE_KERNEL);
+ rc = create_section_mapping(start, start + size, nid, prot);
if (rc) {
pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
start, start + size, rc);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 263ebb074cdd..d3a67d8a1317 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -266,7 +266,7 @@ device_initcall(s390_cma_mem_init);

#endif /* CONFIG_CMA */

-int arch_add_memory(int nid, u64 start, u64 size,
+int arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = PFN_DOWN(start);
@@ -276,7 +276,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
if (WARN_ON_ONCE(restrictions->altmap))
return -EINVAL;

- rc = vmem_add_mapping(start, size, PAGE_KERNEL);
+ rc = vmem_add_mapping(start, size, prot);
if (rc)
return rc;

diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index dfdbaa50946e..cf9f788115ff 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -405,13 +405,16 @@ void __init mem_init(void)
}

#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size,
+int arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;

+ if (prot != PAGE_KERNEL)
+ return -EINVAL;
+
/* We only have ZONE_NORMAL, so this is easy.. */
ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
if (unlikely(ret))
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d3cdd9137f42..c0fe624eb304 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -852,11 +852,16 @@ void __init mem_init(void)
}

#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size,
+int arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ int ret;
+
+ ret = _set_memory_prot(start, nr_pages, prot);
+ if (ret)
+ return ret;

return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 65a5093ec97b..c7d170d67b57 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -862,13 +862,13 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
return ret;
}

-int arch_add_memory(int nid, u64 start, u64 size,
+int arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;

- init_memory_mapping(start, start + size, PAGE_KERNEL);
+ init_memory_mapping(start, start + size, prot);

return add_pages(nid, start_pfn, nr_pages, restrictions);
}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f46ea71b4ffd..82e8b3fcebab 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -111,7 +111,7 @@ extern void __online_page_free(struct page *page);

extern int try_online_node(int nid);

-extern int arch_add_memory(int nid, u64 start, u64 size,
+extern int arch_add_memory(int nid, u64 start, u64 size, pgprot_t prot,
struct mhp_restrictions *restrictions);
extern u64 max_mem_size;

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index df570e5c71cc..0a581a344a00 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1035,7 +1035,7 @@ int __ref add_memory_resource(int nid, struct resource *res)
new_node = ret;

/* call arch's memory hotadd */
- ret = arch_add_memory(nid, start, size, &restrictions);
+ ret = arch_add_memory(nid, start, size, PAGE_KERNEL, &restrictions);
if (ret < 0)
goto error;

diff --git a/mm/memremap.c b/mm/memremap.c
index 03ccbdfeb697..4edcca074e15 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -281,7 +281,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}

error = arch_add_memory(nid, res->start, resource_size(res),
- &restrictions);
+ pgprot, &restrictions);
}

if (!error) {
--
2.20.1