Re: [PATCH] x86/mpx: fix recursive munmap() corruption

From: Laurent Dufour
Date: Tue May 07 2019 - 12:36:50 EST


Le 01/05/2019 Ã 12:32, Michael Ellerman a ÃcritÂ:
Laurent Dufour <ldufour@xxxxxxxxxxxxxxxxxx> writes:
Le 23/04/2019 Ã 18:04, Dave Hansen a ÃcritÂ:
On 4/23/19 4:16 AM, Laurent Dufour wrote:
...
There are 2 assumptions here:
1. 'start' and 'end' are page aligned (this is guaranteed by __do_munmap().
2. the VDSO is 1 page (this is guaranteed by the union vdso_data_store on powerpc)

Are you sure about #2? The 'vdso64_pages' variable seems rather
unnecessary if the VDSO is only 1 page. ;)

Hum, not so sure now ;)
I got confused, only the header is one page.
The test is working as a best effort, and don't cover the case where
only few pages inside the VDSO are unmmapped (start >
mm->context.vdso_base). This is not what CRIU is doing and so this was
enough for CRIU support.

Michael, do you think there is a need to manage all the possibility
here, since the only user is CRIU and unmapping the VDSO is not a so
good idea for other processes ?

Couldn't we implement the semantic that if any part of the VDSO is
unmapped then vdso_base is set to zero? That should be fairly easy, eg:

if (start < vdso_end && end >= mm->context.vdso_base)
mm->context.vdso_base = 0;


We might need to add vdso_end to the mm->context, but that should be OK.

That seems like it would work for CRIU and make sense in general?

Sorry for the late answer, yes this would make more sense.

Here is a patch doing that.

Cheers,
Laurent


From 5b64a86c2a8042c7785c3d3f5e58e954a2c8c843 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@xxxxxxxxxxxxx>
Date: Tue, 7 May 2019 16:29:46 +0200
Subject: [PATCH] powerpc/vdso: handle generic unmap of the VDSO

Make the unmap of the VDSO more generic by checking for the start and end
of the VDSO.

This implies to add the vdso_end address in the mm_context_t structure.

Signed-off-by: Laurent Dufour <ldufour@xxxxxxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Paul Mackerras <paulus@xxxxxxxxx>
---
arch/powerpc/include/asm/book3s/32/mmu-hash.h | 3 ++-
arch/powerpc/include/asm/book3s/64/mmu.h | 2 +-
arch/powerpc/include/asm/mm-arch-hooks.h | 5 ++++-
arch/powerpc/include/asm/mmu_context.h | 21 +++++++++++++++++--
arch/powerpc/include/asm/nohash/32/mmu-40x.h | 2 +-
arch/powerpc/include/asm/nohash/32/mmu-44x.h | 2 +-
arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 2 +-
arch/powerpc/include/asm/nohash/mmu-book3e.h | 2 +-
arch/powerpc/kernel/vdso.c | 2 ++
9 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 2e277ca0170f..452152b809fc 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -29,6 +29,7 @@
#define BPP_RX 0x01 /* Read only */
#define BPP_RW 0x02 /* Read/write */

+
#ifndef __ASSEMBLY__
/* Contort a phys_addr_t into the right format/bits for a BAT */
#ifdef CONFIG_PHYS_64BIT
@@ -90,7 +91,7 @@ struct hash_pte {

typedef struct {
unsigned long id;
- unsigned long vdso_base;
+ unsigned long vdso_base, vdso_end;
} mm_context_t;

void update_bats(void);
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 74d24201fc4f..7a5a91a0696f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -120,7 +120,7 @@ typedef struct {
struct npu_context *npu_context;
struct hash_mm_context *hash_context;

- unsigned long vdso_base;
+ unsigned long vdso_base, vdso_end;
/*
* pagetable fragment support
*/
diff --git a/arch/powerpc/include/asm/mm-arch-hooks.h b/arch/powerpc/include/asm/mm-arch-hooks.h
index f2a2da895897..1e2d527d3d1f 100644
--- a/arch/powerpc/include/asm/mm-arch-hooks.h
+++ b/arch/powerpc/include/asm/mm-arch-hooks.h
@@ -16,12 +16,15 @@ static inline void arch_remap(struct mm_struct *mm,
unsigned long old_start, unsigned long old_end,
unsigned long new_start, unsigned long new_end)
{
+ unsigned long length = mm->context.vdso_end - mm->context.vdso_base;
/*
* mremap() doesn't allow moving multiple vmas so we can limit the
* check to old_start == vdso_base.
*/
- if (old_start == mm->context.vdso_base)
+ if (old_start == mm->context.vdso_base) {
+ mm->context.vdso_end = new_start + length;
mm->context.vdso_base = new_start;
+ }
}
#define arch_remap arch_remap

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 611204e588b9..c24f5ed0aeff 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -235,8 +235,25 @@ static inline void arch_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- if (start <= mm->context.vdso_base && mm->context.vdso_base < end)
- mm->context.vdso_base = 0;
+ unsigned long vdso_base, vdso_end;
+
+ vdso_base = mm->context.vdso_base;
+ vdso_end = mm->context.vdso_end;
+
+ /*
+ * Partial unmapping of pages inside the VDSO, is consider equivalent
+ * to unmapping the VDSO.
+ *
+ * case 1 > | VDSO | <
+ * case 2 > | < |
+ * case 3 | > < |
+ * case 4 | > | <
+ */
+
+ if ((start <= vdso_base && vdso_end <= end) || /* 1 */
+ (vdso_base <= start && start < vdso_end) || /* 3,4 */
+ (vdso_base < end && end <= vdso_end)) /* 2,3 */
+ mm->context.vdso_base = mm->context.vdso_end = 0;
}

static inline void arch_bprm_mm_init(struct mm_struct *mm,
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-40x.h b/arch/powerpc/include/asm/nohash/32/mmu-40x.h
index 74f4edb5916e..98739ba9d36e 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-40x.h
@@ -57,7 +57,7 @@
typedef struct {
unsigned int id;
unsigned int active;
- unsigned long vdso_base;
+ unsigned long vdso_base, vdso_end;
} mm_context_t;

#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
index 28aa3b339c5e..de1d5b1c8cec 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
@@ -108,7 +108,7 @@ extern unsigned int tlb_44x_index;
typedef struct {
unsigned int id;
unsigned int active;
- unsigned long vdso_base;
+ unsigned long vdso_base, vdso_end;
} mm_context_t;

/* patch sites */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 76af5b0cb16e..414ce6638b20 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -209,7 +209,7 @@ struct slice_mask {
typedef struct {
unsigned int id;
unsigned int active;
- unsigned long vdso_base;
+ unsigned long vdso_base, vdso_end;
#ifdef CONFIG_PPC_MM_SLICES
u16 user_psize; /* page size index */
unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index 4c9777d256fb..8f406ad9fe25 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -229,7 +229,7 @@ extern unsigned int tlbcam_index;
typedef struct {
unsigned int id;
unsigned int active;
- unsigned long vdso_base;
+ unsigned long vdso_base, vdso_end;
} mm_context_t;

/* Page size definitions, common between 32 and 64-bit
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index a31b6234fcd7..263f820cc666 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -182,6 +182,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
#endif

current->mm->context.vdso_base = 0;
+ current->mm->context.vdso_end = 0;

/* vDSO has a problem and was disabled, just don't "enable" it for the
* process
@@ -217,6 +218,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* will fail to recognise it as a vDSO (since arch_vma_name fails).
*/
current->mm->context.vdso_base = vdso_base;
+ current->mm->context.vdso_end = vdso_base + (vdso_pages << PAGE_SHIFT);

/*
* our vma flags don't have VM_WRITE so by default, the process isn't
--
2.21.0