[RFC][PATCH v2 12/21] x86/pgtable: allocate page table pages from DRAM

From: Fengguang Wu
Date: Wed Dec 26 2018 - 08:39:14 EST


On rand read/writes on large data, we find near half memory accesses
caused by TLB misses, hence hit the page table pages. So better keep
page table pages in faster DRAM nodes.

Signed-off-by: Fengguang Wu <fengguang.wu@xxxxxxxxx>
---
arch/x86/include/asm/pgalloc.h | 10 +++++++---
arch/x86/mm/pgtable.c | 22 ++++++++++++++++++----
2 files changed, 25 insertions(+), 7 deletions(-)

--- linux.orig/arch/x86/mm/pgtable.c 2018-12-26 19:41:57.494900885 +0800
+++ linux/arch/x86/mm/pgtable.c 2018-12-26 19:42:35.531621035 +0800
@@ -22,17 +22,30 @@ EXPORT_SYMBOL(physical_mask);
#endif

gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
+nodemask_t all_node_mask = NODE_MASK_ALL;
+
+unsigned long __get_free_pgtable_pages(gfp_t gfp_mask,
+ unsigned int order)
+{
+ struct page *page;
+
+ page = __alloc_pages_nodemask(gfp_mask, order, numa_node_id(), &all_node_mask);
+ if (!page)
+ return 0;
+ return (unsigned long) page_address(page);
+}
+EXPORT_SYMBOL(__get_free_pgtable_pages);

pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
+ return (pte_t *)__get_free_pgtable_pages(PGALLOC_GFP & ~__GFP_ACCOUNT, 0);
}

pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;

- pte = alloc_pages(__userpte_alloc_gfp, 0);
+ pte = __alloc_pages_nodemask(__userpte_alloc_gfp, 0, numa_node_id(), &all_node_mask);
if (!pte)
return NULL;
if (!pgtable_page_ctor(pte)) {
@@ -241,7 +254,7 @@ static int preallocate_pmds(struct mm_st
gfp &= ~__GFP_ACCOUNT;

for (i = 0; i < count; i++) {
- pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
+ pmd_t *pmd = (pmd_t *)__get_free_pgtable_pages(gfp, 0);
if (!pmd)
failed = true;
if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
@@ -422,7 +435,8 @@ static inline void _pgd_free(pgd_t *pgd)

static inline pgd_t *_pgd_alloc(void)
{
- return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+ return (pgd_t *)__get_free_pgtable_pages(PGALLOC_GFP,
+ PGD_ALLOCATION_ORDER);
}

static inline void _pgd_free(pgd_t *pgd)
--- linux.orig/arch/x86/include/asm/pgalloc.h 2018-12-26 19:40:12.992251270 +0800
+++ linux/arch/x86/include/asm/pgalloc.h 2018-12-26 19:42:35.531621035 +0800
@@ -96,10 +96,11 @@ static inline pmd_t *pmd_alloc_one(struc
{
struct page *page;
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+ nodemask_t all_node_mask = NODE_MASK_ALL;

if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
- page = alloc_pages(gfp, 0);
+ page = __alloc_pages_nodemask(gfp, 0, numa_node_id(), &all_node_mask);
if (!page)
return NULL;
if (!pgtable_pmd_page_ctor(page)) {
@@ -141,13 +142,16 @@ static inline void p4d_populate(struct m
set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
}

+extern unsigned long __get_free_pgtable_pages(gfp_t gfp_mask,
+ unsigned int order);
+
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
gfp_t gfp = GFP_KERNEL_ACCOUNT;

if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
- return (pud_t *)get_zeroed_page(gfp);
+ return (pud_t *)__get_free_pgtable_pages(gfp | __GFP_ZERO, 0);
}

static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -179,7 +183,7 @@ static inline p4d_t *p4d_alloc_one(struc

if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
- return (p4d_t *)get_zeroed_page(gfp);
+ return (p4d_t *)__get_free_pgtable_pages(gfp | __GFP_ZERO, 0);
}

static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)