Re: [PATCH 4/5] RISC-V: Protect .init.text & .init.data

From: Atish Patra
Date: Mon Oct 12 2020 - 19:27:17 EST


On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <greentime.hu@xxxxxxxxxx> wrote:
>
> Atish Patra <atish.patra@xxxxxxx> 於 2020年10月10日 週六 上午5:13寫道:
> >
> > Currently, .init.text & .init.data are intermixed which makes it impossible
> > apply different permissions to them. .init.data shouldn't need exec
> > permissions while .init.text shouldn't have write permission.
> >
> > Keep them in separate sections so that different permissions are applied to
> > each section. This improves the kernel protection under
> > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > entire _init section after it is freed so that those pages can be used for
> > other purpose.
> >
> > Signed-off-by: Atish Patra <atish.patra@xxxxxxx>
> > ---
> > arch/riscv/include/asm/sections.h | 2 ++
> > arch/riscv/include/asm/set_memory.h | 2 ++
> > arch/riscv/kernel/setup.c | 4 ++++
> > arch/riscv/kernel/vmlinux.lds.S | 10 +++++++++-
> > arch/riscv/mm/init.c | 6 ++++++
> > arch/riscv/mm/pageattr.c | 6 ++++++
> > 6 files changed, 29 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > index d60802bfafbc..730d2c4a844d 100644
> > --- a/arch/riscv/include/asm/sections.h
> > +++ b/arch/riscv/include/asm/sections.h
> > @@ -10,6 +10,8 @@
> > #include <asm-generic/sections.h>
> > extern char _start[];
> > extern char _start_kernel[];
> > +extern char __init_data_begin[], __init_data_end[];
> > +extern char __init_text_begin[], __init_text_end[];
> >
> > extern bool init_mem_is_free;
> >
> > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > index 4cc3a4e2afd3..913429c9c1ae 100644
> > --- a/arch/riscv/include/asm/set_memory.h
> > +++ b/arch/riscv/include/asm/set_memory.h
> > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > int set_memory_rw(unsigned long addr, int numpages);
> > int set_memory_x(unsigned long addr, int numpages);
> > int set_memory_nx(unsigned long addr, int numpages);
> > +int set_memory_default(unsigned long addr, int numpages);
> > void protect_kernel_text_data(void);
> > #else
> > static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > static inline void protect_kernel_text_data(void) {};
> > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > #endif
> >
> > int set_direct_map_invalid_noflush(struct page *page);
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index 4176a2affd1d..b8a35ef0eab0 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> >
> > void free_initmem(void)
> > {
> > + unsigned long init_begin = (unsigned long)__init_begin;
> > + unsigned long init_end = (unsigned long)__init_end;
> > +
> > + set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > free_initmem_default(POISON_FREE_INITMEM);
> > init_mem_is_free = true;
> > }
> > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > index 0807633f0dc8..15b9882588ae 100644
> > --- a/arch/riscv/kernel/vmlinux.lds.S
> > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > @@ -30,8 +30,8 @@ SECTIONS
> > . = ALIGN(PAGE_SIZE);
> >
> > __init_begin = .;
> > + __init_text_begin = .;
> > INIT_TEXT_SECTION(PAGE_SIZE)
> > - INIT_DATA_SECTION(16)
> > . = ALIGN(8);
> > __soc_early_init_table : {
> > __soc_early_init_table_start = .;
> > @@ -48,11 +48,19 @@ SECTIONS
> > {
> > EXIT_TEXT
> > }
> > +
> > + __init_text_end = .;
> > + . = ALIGN(SECTION_ALIGN);
> > + /* Start of init data section */
> > + __init_data_begin = .;
> > + INIT_DATA_SECTION(16)
> > .exit.data :
> > {
> > EXIT_DATA
> > }
> > PERCPU_SECTION(L1_CACHE_BYTES)
> > +
> > + __init_data_end = .;
> > __init_end = .;
> >
> > . = ALIGN(SECTION_ALIGN);
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > {
> > unsigned long text_start = (unsigned long)_text;
> > unsigned long text_end = (unsigned long)_etext;
> > + unsigned long init_text_start = (unsigned long)__init_text_begin;
> > + unsigned long init_text_end = (unsigned long)__init_text_end;
> > + unsigned long init_data_start = (unsigned long)__init_data_begin;
> > + unsigned long init_data_end = (unsigned long)__init_data_end;
> > unsigned long rodata_start = (unsigned long)__start_rodata;
> > unsigned long data_start = (unsigned long)_data;
> > unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> >
> > + set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > + set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > }
> > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > index 19fecb362d81..aecedaf086ab 100644
> > --- a/arch/riscv/mm/pageattr.c
> > +++ b/arch/riscv/mm/pageattr.c
> > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > return ret;
> > }
> >
> > +int set_memory_default(unsigned long addr, int numpages)
> > +{
> > + return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > + __pgprot(0));
> > +}
> > +
> > int set_memory_ro(unsigned long addr, int numpages)
> > {
> > return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
>
>
> Hi Atish,
>
> I tested this patchset and CONFIG_DEBUG_WX=y
>

Thanks for testing the patch.

> [ 2.664012] Freeing unused kernel memory: 114420K
> [ 2.666081] ------------[ cut here ]------------
> [ 2.666779] riscv/mm: Found insecure W+X mapping at address
> (____ptrval____)/0xffffffe000000000
> [ 2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> note_page+0xc2/0x238
> [ 2.669147] Modules linked in:
> [ 2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> 5.9.0-rc8-00033-gfacf070a80ea #153
> [ 2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> [ 2.671054] gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> ffffffe007e861d0
> [ 2.671683] t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> ffffffe0fba9bc60
> [ 2.672499] s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> 0000000000000020
> [ 2.673119] a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> d4328dc070ccb100
> [ 2.673729] a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> ffffffe007851e98
> [ 2.674333] s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> 0000000087200000
> [ 2.674963] s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> ffffffe007e7ac00
> [ 2.675564] s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> ffffffe000000000
> [ 2.676392] s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> 000000000001da48
> [ 2.676992] t5 : ffffffe007e74490 t6 : ffffffe007e81432
> [ 2.677449] status: 0000000000000120 badaddr: 0000000000000000
> cause: 0000000000000003
> [ 2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> [ 2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> [ 2.679737] Run /init as init process
>

This would be triggered for the current kernel as well as we don't
protect the permission for __init section at all. As you correctly
pointed out, __init & .head sections are mapped with same permissions
because of 2MB mappings.

Currently, the entire head.text & init section have RWX permission.
This patch is trying remove the write permission from .init.text &
.head.text and execute permission from .init.data until kernel is
booted.
It doesn't provide full protection but better than the current scheme.

To remove the write permission of .head.txt only, we have to keep
.head.txt & .init.text section in separate sections. The linear
mapping would look like this in that case.
There are no issues as such but kernel size would increase by another 2M.

---[ Linear mapping ]---
0xffffffe000000000-0xffffffe000200000 0x0000000080200000 2M
PMD D A . . X . R V
0xffffffe000200000-0xffffffe000600000 0x0000000080400000 4M
PMD D A . . . W R V
0xffffffe000600000-0xffffffe000e00000 0x0000000080800000 8M
PMD D A . . X . R V
0xffffffe000e00000-0xffffffe001400000 0x0000000081000000 6M
PMD D A . . . . R V
0xffffffe001400000-0xffffffe03fe00000 0x0000000081600000 1002M
PMD D A . . . W R V

The other solution would be move init section below text. Keep text &
head in one section.
The .init.text & .init.data will be in separate sections after that.
Here is the mapping in that case.

---[ Linear mapping ]---
0xffffffe000000000-0xffffffe000800000 0x0000000080200000 8M
PMD D A . . X . R V
0xffffffe000800000-0xffffffe000c00000 0x0000000080a00000 4M
PMD D A . . . W R V
0xffffffe000c00000-0xffffffe001200000 0x0000000080e00000 6M
PMD D A . . . . R V
0xffffffe001200000-0xffffffe03fe00000 0x0000000081400000 1004M
PMD D A . . . W R V

I prefer the 2nd approach compared to the first one as it saves memory
and we can fix lockdep issue without adding arch_is_kernel_data
to sections.h (Guo's patch).

However, the 2nd approach throws this problem if
CONFIG_HARDENED_USERCOPY is enabled.

net/ipv4/ipconfig.o: in function `ic_setup_routes':
/home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
defined in .text section in net/ipv4/fib_frontend.o
make: *** [Makefile:1162: vmlinux] Error 1

I am currently looking into this to understand why R_RISCV_JAL is
generated a generic function invocation
where auipc + jalr should have been used.

> After debugging, I found it is because
> set_memory_ro()/set_memory_nx()/... can't handle the 4KB-aligned
> address if it is a 2MB mapping address.
> For example, if we gave 0xffffffe000001000 and this address is in a
> PMD mapping which is 2MB mapping, it will set the page attributes from
> 0xffffffe000000000.
> And this caused the CPU hotplug failed because it is in head.text section.
>
> #0 pageattr_pmd_entry (pmd=0xffffffe0ffdff000,
> addr=18446743936270602240, next=18446743936270766080,
> walk=0xffffffe007e03e98) at arch/riscv/mm/pageattr.c:70
> 70 pmd_t val = READ_ONCE(*pmd);
> => 0xffffffe007005e96 <pageattr_pmd_entry+0>: 41 11 addi sp,sp,-16
> 0xffffffe007005e98 <pageattr_pmd_entry+2>: 22 e4 sd s0,8(sp)
> 0xffffffe007005e9a <pageattr_pmd_entry+4>: 00 08 addi s0,sp,16
> 0xffffffe007005e9c <pageattr_pmd_entry+6>: 1c 61 ld a5,0(a0)
> (gdb) p/x addr
> $21 = 0xffffffe000001000
>
> # cat /sys/kernel/debug/kernel_page_tables
> ---[ Fixmap start ]---
> 0xffffffcefef00000-0xffffffceff000000 0x0000000082200000 1M
> PTE D A . . . W R V
> ---[ Fixmap end ]---
> ---[ PCI I/O start ]---
> ---[ PCI I/O end ]---
> ---[ vmalloc() area ]---
> 0xffffffd000090000-0xffffffd000093000 0x00000001741bb000 12K
> PTE D A . . . W R V
> 0xffffffdffffbb000-0xffffffdffffbe000 0x0000000175f97000 12K
> PTE D A . . . W R V
> 0xffffffdffffcc000-0xffffffdffffcf000 0x0000000175f9a000 12K
> PTE D A . . . W R V
> 0xffffffdffffdd000-0xffffffdffffe0000 0x0000000175f9d000 12K
> PTE D A . . . W R V
> 0xffffffdffffee000-0xffffffdfffff1000 0x0000000175fa0000 12K
> PTE D A . . . W R V
> ---[ vmalloc() end ]---
> ---[ Linear mapping ]---
> 0xffffffe000000000-0xffffffe007000000 0x0000000080200000 112M
> PMD D A . . X W R V
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 0xffffffe007000000-0xffffffe007800000 0x0000000087200000 8M
> PMD D A . . X . R V
> 0xffffffe007800000-0xffffffe007e00000 0x0000000087a00000 6M
> PMD D A . . . . R V
> 0xffffffe007e00000-0xffffffe0ffe00000 0x0000000088000000 3968M
> PMD D A . . . W R V
> #
>
> Any idea?
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@xxxxxxxxxxxxxxxxxxx
> http://lists.infradead.org/mailman/listinfo/linux-riscv



--
Regards,
Atish