[PATCH 3.10 238/268] mm: Tighten x86 /dev/mem with zeroing reads

From: Willy Tarreau
Date: Mon Jun 19 2017 - 15:30:22 EST


From: Kees Cook <keescook@xxxxxxxxxxxx>

commit a4866aa812518ed1a37d8ea0c881dc946409de94 upstream.

Under CONFIG_STRICT_DEVMEM, reading System RAM through /dev/mem is
disallowed. However, on x86, the first 1MB was always allowed for BIOS
and similar things, regardless of it actually being System RAM. It was
possible for heap to end up getting allocated in low 1MB RAM, and then
read by things like x86info or dd, which would trip hardened usercopy:

usercopy: kernel memory exposure attempt detected from ffff880000090000 (dma-kmalloc-256) (4096 bytes)

This changes the x86 exception for the low 1MB by reading back zeros for
System RAM areas instead of blindly allowing them. More work is needed to
extend this to mmap, but currently mmap doesn't go through usercopy, so
hardened usercopy won't Oops the kernel.

Reported-by: Tommi Rantala <tommi.t.rantala@xxxxxxxxx>
Tested-by: Tommi Rantala <tommi.t.rantala@xxxxxxxxx>
Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Brad Spengler <spender@xxxxxxxxxxxxxx>
Signed-off-by: Jiri Slaby <jslaby@xxxxxxx>
Signed-off-by: Willy Tarreau <w@xxxxxx>
---
arch/x86/mm/init.c | 41 +++++++++++++++++++--------
drivers/char/mem.c | 82 ++++++++++++++++++++++++++++++++++--------------------
2 files changed, 82 insertions(+), 41 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a5bf1b..4094078 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -475,21 +475,40 @@ void __init init_mem_mapping(void)
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number.
*
- *
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains bios code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
+ * On x86, access has to be given to the first megabyte of RAM because that
+ * area traditionally contains BIOS code and data regions used by X, dosemu,
+ * and similar apps. Since they map the entire memory range, the whole range
+ * must be allowed (for mapping), but any areas that would otherwise be
+ * disallowed are flagged as being "zero filled" instead of rejected.
+ * Access has to be given to non-kernel-ram areas as well, these contain the
+ * PCI mmio resources as well as potential bios/acpi data regions.
*/
int devmem_is_allowed(unsigned long pagenr)
{
- if (pagenr < 256)
- return 1;
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+ if (page_is_ram(pagenr)) {
+ /*
+ * For disallowed memory regions in the low 1MB range,
+ * request that the page be shown as all zeros.
+ */
+ if (pagenr < 256)
+ return 2;
+
+ return 0;
+ }
+
+ /*
+ * This must follow RAM test, since System RAM is considered a
+ * restricted resource under CONFIG_STRICT_IOMEM.
+ */
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
+ /* Low 1MB bypasses iomem restrictions. */
+ if (pagenr < 256)
+ return 1;
+
return 0;
- if (!page_is_ram(pagenr))
- return 1;
- return 0;
+ }
+
+ return 1;
}

void free_init_pages(char *what, unsigned long begin, unsigned long end)
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 598ece7..40d2e99 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -61,6 +61,10 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
#endif

#ifdef CONFIG_STRICT_DEVMEM
+static inline int page_is_allowed(unsigned long pfn)
+{
+ return devmem_is_allowed(pfn);
+}
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
u64 from = ((u64)pfn) << PAGE_SHIFT;
@@ -76,6 +80,10 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
return 1;
}
#else
+static inline int page_is_allowed(unsigned long pfn)
+{
+ return 1;
+}
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
return 1;
@@ -117,23 +125,31 @@ static ssize_t read_mem(struct file *file, char __user *buf,

while (count > 0) {
unsigned long remaining;
+ int allowed;

sz = size_inside_page(p, count);

- if (!range_is_allowed(p >> PAGE_SHIFT, count))
+ allowed = page_is_allowed(p >> PAGE_SHIFT);
+ if (!allowed)
return -EPERM;
+ if (allowed == 2) {
+ /* Show zeros for restricted memory. */
+ remaining = clear_user(buf, sz);
+ } else {
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur.
+ */
+ ptr = xlate_dev_mem_ptr(p);
+ if (!ptr)
+ return -EFAULT;

- /*
- * On ia64 if a page has been mapped somewhere as uncached, then
- * it must also be accessed uncached by the kernel or data
- * corruption may occur.
- */
- ptr = xlate_dev_mem_ptr(p);
- if (!ptr)
- return -EFAULT;
+ remaining = copy_to_user(buf, ptr, sz);
+
+ unxlate_dev_mem_ptr(p, ptr);
+ }

- remaining = copy_to_user(buf, ptr, sz);
- unxlate_dev_mem_ptr(p, ptr);
if (remaining)
return -EFAULT;

@@ -173,30 +189,36 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
#endif

while (count > 0) {
+ int allowed;
+
sz = size_inside_page(p, count);

- if (!range_is_allowed(p >> PAGE_SHIFT, sz))
+ allowed = page_is_allowed(p >> PAGE_SHIFT);
+ if (!allowed)
return -EPERM;

- /*
- * On ia64 if a page has been mapped somewhere as uncached, then
- * it must also be accessed uncached by the kernel or data
- * corruption may occur.
- */
- ptr = xlate_dev_mem_ptr(p);
- if (!ptr) {
- if (written)
- break;
- return -EFAULT;
- }
+ /* Skip actual writing when a page is marked as restricted. */
+ if (allowed == 1) {
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur.
+ */
+ ptr = xlate_dev_mem_ptr(p);
+ if (!ptr) {
+ if (written)
+ break;
+ return -EFAULT;
+ }

- copied = copy_from_user(ptr, buf, sz);
- unxlate_dev_mem_ptr(p, ptr);
- if (copied) {
- written += sz - copied;
- if (written)
- break;
- return -EFAULT;
+ copied = copy_from_user(ptr, buf, sz);
+ unxlate_dev_mem_ptr(p, ptr);
+ if (copied) {
+ written += sz - copied;
+ if (written)
+ break;
+ return -EFAULT;
+ }
}

buf += sz;
--
2.8.0.rc2.1.gbe9624a