Re: [PATCH v2] x86, mm: set NX across entire PMD at boot

From: Yinghai Lu
Date: Sun Nov 16 2014 - 22:31:04 EST


On Sun, Nov 16, 2014 at 1:26 PM, Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote:
> On Sat, 15 Nov 2014, Yinghai Lu wrote:
>> +static pmd_t *last_pmd;
>> /*
>> * The head.S code sets up the kernel high mapping:
>> *
>> @@ -408,9 +409,26 @@ void __init cleanup_highmap(void)
>> continue;
>> if (vaddr < (unsigned long) _text || vaddr > end)
>> set_pmd(pmd, __pmd(0));
>> + else
>> + last_pmd = pmd;
>
> Why do you need to store this? You can compute this.

I'm not quite sure about the xen path.

>
>> +static void __init cleanup_highmap_tail(unsigned long addr)
>
> Brilliant stuff. mark_rodata_ro() is called AFTER free_initmem() which
> will free exactly that code.

I missed that.

Please check this one that address three problems that you pointed out.

Subject: [PATCH v2] x86, 64bit: cleanup highmap tail near partial 2M range

1. should use _brk_end instead of &_end in mark_rodata_ro().
_brk_end can move up to &_end, i.e. to __brk_limit. It's safe to
use _brk_end when mark_rodata_ro() is called because extend_brk()
is gone already at that point.
2. [_brk_end, pm_end) page range is already converted mem. and
is not wasted.
3. add cleanup_highmap_tail for [_brk_end, pm_end).

Kernel Layout:
[ 0.000000] .brk: [0x0437c000-0x043a1fff]

Actually used brk:
[ 0.272959] memblock_reserve: [0x0000000437c000-0x00000004382fff]
flags 0x0 BRK

Before patch:
---[ High Kernel Mapping ]---
...
0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd
0xffffffff84200000-0xffffffff843a2000 1672K RW GLB NX pte
0xffffffff843a2000-0xffffffff84400000 376K RW GLB x pte
0xffffffff84400000-0xffffffffa0000000 444M pmd
After patch:
---[ High Kernel Mapping ]---
...
0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd
0xffffffff84200000-0xffffffff84383000 1548K RW GLB NX pte
0xffffffff84383000-0xffffffff84400000 500K pte
0xffffffff84400000-0xffffffffa0000000 444M pmd

-v2: according to tglx
caculate the pmd postion instead of passing last_pmd.
cleanup_highmap_tail could not have __init, as it is called in mark_rodata_ro
and mark_rodata_ro is called after free_initmem.
highmap_end_pfn should keep PMD_SIZE alignment on !CONFIG_DEBUG_RODATA

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
arch/x86/mm/init_64.c | 22 +++++++++++++++++++++-
arch/x86/mm/pageattr.c | 4 ++++
2 files changed, 25 insertions(+), 1 deletion(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -411,6 +411,23 @@ void __init cleanup_highmap(void)
}
}

+static void cleanup_highmap_tail(unsigned long addr)
+{
+ int i;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ pud = (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
+ pmd = (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
+ pte = (pte_t *)pmd_page_vaddr(*pmd) + pte_index(addr);
+
+ for (i = pte_index(addr); i < PTRS_PER_PTE; i++, pte++)
+ set_pte(pte, __pte(0));
+}
+
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
pgprot_t prot)
@@ -1124,7 +1141,8 @@ void mark_rodata_ro(void)
unsigned long end = (unsigned long) &__end_rodata_hpage_align;
unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
- unsigned long all_end = PFN_ALIGN(&_end);
+ unsigned long all_end = PFN_ALIGN(_brk_end);
+ unsigned long pmd_end = roundup(all_end, PMD_SIZE);

printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -1137,6 +1155,8 @@ void mark_rodata_ro(void)
* should also be not-executable.
*/
set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
+ if (all_end < pmd_end)
+ cleanup_highmap_tail(all_end);

rodata_test();

Index: linux-2.6/arch/x86/mm/pageattr.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/pageattr.c
+++ linux-2.6/arch/x86/mm/pageattr.c
@@ -100,7 +100,11 @@ static inline unsigned long highmap_star

static inline unsigned long highmap_end_pfn(void)
{
+#ifdef CONFIG_DEBUG_RODATA
+ return __pa_symbol(PFN_ALIGN(_brk_end)) >> PAGE_SHIFT;
+#else
return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+#endif
}

#endif
Subject: [PATCH v2] x86, 64bit: cleanup highmap tail near partial 2M range

1. should use _brk_end instead of &_end in mark_rodata_ro().
_brk_end can move up to &_end, i.e. to __brk_limit. It's safe to
use _brk_end when mark_rodata_ro() is called because extend_brk()
is gone already at that point.
2. [_brk_end, pm_end) page range is already converted mem. and
is not wasted.
3. add cleanup_highmap_tail for [_brk_end, pm_end).

Kernel Layout:
[ 0.000000] .brk: [0x0437c000-0x043a1fff]

Actually used brk:
[ 0.272959] memblock_reserve: [0x0000000437c000-0x00000004382fff] flags 0x0 BRK

Before patch:
---[ High Kernel Mapping ]---
...
0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd
0xffffffff84200000-0xffffffff843a2000 1672K RW GLB NX pte
0xffffffff843a2000-0xffffffff84400000 376K RW GLB x pte
0xffffffff84400000-0xffffffffa0000000 444M pmd
After patch:
---[ High Kernel Mapping ]---
...
0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd
0xffffffff84200000-0xffffffff84383000 1548K RW GLB NX pte
0xffffffff84383000-0xffffffff84400000 500K pte
0xffffffff84400000-0xffffffffa0000000 444M pmd

-v2: according to tglx
caculate the pmd postion instead of passing last_pmd.
cleanup_highmap_tail could not have __init, as it is called in mark_rodata_ro
and mark_rodata_ro is called after free_initmem.
highmap_end_pfn should keep PMD_SIZE alignment on !CONFIG_DEBUG_RODATA

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
arch/x86/mm/init_64.c | 22 +++++++++++++++++++++-
arch/x86/mm/pageattr.c | 4 ++++
2 files changed, 25 insertions(+), 1 deletion(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -411,6 +411,23 @@ void __init cleanup_highmap(void)
}
}

+static void cleanup_highmap_tail(unsigned long addr)
+{
+ int i;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ pud = (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
+ pmd = (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
+ pte = (pte_t *)pmd_page_vaddr(*pmd) + pte_index(addr);
+
+ for (i = pte_index(addr); i < PTRS_PER_PTE; i++, pte++)
+ set_pte(pte, __pte(0));
+}
+
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
pgprot_t prot)
@@ -1124,7 +1141,8 @@ void mark_rodata_ro(void)
unsigned long end = (unsigned long) &__end_rodata_hpage_align;
unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
- unsigned long all_end = PFN_ALIGN(&_end);
+ unsigned long all_end = PFN_ALIGN(_brk_end);
+ unsigned long pmd_end = roundup(all_end, PMD_SIZE);

printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -1137,6 +1155,8 @@ void mark_rodata_ro(void)
* should also be not-executable.
*/
set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
+ if (all_end < pmd_end)
+ cleanup_highmap_tail(all_end);

rodata_test();

Index: linux-2.6/arch/x86/mm/pageattr.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/pageattr.c
+++ linux-2.6/arch/x86/mm/pageattr.c
@@ -100,7 +100,11 @@ static inline unsigned long highmap_star

static inline unsigned long highmap_end_pfn(void)
{
+#ifdef CONFIG_DEBUG_RODATA
+ return __pa_symbol(PFN_ALIGN(_brk_end)) >> PAGE_SHIFT;
+#else
return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+#endif
}

#endif