[PATCH 3.18 056/124] mm: Tighten x86 /dev/mem with zeroing reads

From: Greg Kroah-Hartman
Date: Thu Apr 20 2017 - 02:59:12 EST


3.18-stable review patch. If anyone has any objections, please let me know.

------------------

From: Kees Cook <keescook@xxxxxxxxxxxx>

commit a4866aa812518ed1a37d8ea0c881dc946409de94 upstream.

Under CONFIG_STRICT_DEVMEM, reading System RAM through /dev/mem is
disallowed. However, on x86, the first 1MB was always allowed for BIOS
and similar things, regardless of it actually being System RAM. It was
possible for heap to end up getting allocated in low 1MB RAM, and then
read by things like x86info or dd, which would trip hardened usercopy:

usercopy: kernel memory exposure attempt detected from ffff880000090000 (dma-kmalloc-256) (4096 bytes)

This changes the x86 exception for the low 1MB by reading back zeros for
System RAM areas instead of blindly allowing them. More work is needed to
extend this to mmap, but currently mmap doesn't go through usercopy, so
hardened usercopy won't Oops the kernel.

Reported-by: Tommi Rantala <tommi.t.rantala@xxxxxxxxx>
Tested-by: Tommi Rantala <tommi.t.rantala@xxxxxxxxx>
Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Brad Spengler <spender@xxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
arch/x86/mm/init.c | 40 +++++++++++++++++++------
drivers/char/mem.c | 82 +++++++++++++++++++++++++++++++++--------------------
2 files changed, 82 insertions(+), 40 deletions(-)

--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -581,20 +581,40 @@ void __init init_mem_mapping(void)
* is valid. The argument is a physical page number.
*
*
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains bios code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
+ * On x86, access has to be given to the first megabyte of RAM because that
+ * area traditionally contains BIOS code and data regions used by X, dosemu,
+ * and similar apps. Since they map the entire memory range, the whole range
+ * must be allowed (for mapping), but any areas that would otherwise be
+ * disallowed are flagged as being "zero filled" instead of rejected.
+ * Access has to be given to non-kernel-ram areas as well, these contain the
+ * PCI mmio resources as well as potential bios/acpi data regions.
*/
int devmem_is_allowed(unsigned long pagenr)
{
- if (pagenr < 256)
- return 1;
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+ if (page_is_ram(pagenr)) {
+ /*
+ * For disallowed memory regions in the low 1MB range,
+ * request that the page be shown as all zeros.
+ */
+ if (pagenr < 256)
+ return 2;
+
return 0;
- if (!page_is_ram(pagenr))
- return 1;
- return 0;
+ }
+
+ /*
+ * This must follow RAM test, since System RAM is considered a
+ * restricted resource under CONFIG_STRICT_IOMEM.
+ */
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
+ /* Low 1MB bypasses iomem restrictions. */
+ if (pagenr < 256)
+ return 1;
+
+ return 0;
+ }
+
+ return 1;
}

void free_init_pages(char *what, unsigned long begin, unsigned long end)
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -59,6 +59,10 @@ static inline int valid_mmap_phys_addr_r
#endif

#ifdef CONFIG_STRICT_DEVMEM
+static inline int page_is_allowed(unsigned long pfn)
+{
+ return devmem_is_allowed(pfn);
+}
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
u64 from = ((u64)pfn) << PAGE_SHIFT;
@@ -78,6 +82,10 @@ static inline int range_is_allowed(unsig
return 1;
}
#else
+static inline int page_is_allowed(unsigned long pfn)
+{
+ return 1;
+}
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
return 1;
@@ -122,23 +130,31 @@ static ssize_t read_mem(struct file *fil

while (count > 0) {
unsigned long remaining;
+ int allowed;

sz = size_inside_page(p, count);

- if (!range_is_allowed(p >> PAGE_SHIFT, count))
+ allowed = page_is_allowed(p >> PAGE_SHIFT);
+ if (!allowed)
return -EPERM;
+ if (allowed == 2) {
+ /* Show zeros for restricted memory. */
+ remaining = clear_user(buf, sz);
+ } else {
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur.
+ */
+ ptr = xlate_dev_mem_ptr(p);
+ if (!ptr)
+ return -EFAULT;

- /*
- * On ia64 if a page has been mapped somewhere as uncached, then
- * it must also be accessed uncached by the kernel or data
- * corruption may occur.
- */
- ptr = xlate_dev_mem_ptr(p);
- if (!ptr)
- return -EFAULT;
+ remaining = copy_to_user(buf, ptr, sz);
+
+ unxlate_dev_mem_ptr(p, ptr);
+ }

- remaining = copy_to_user(buf, ptr, sz);
- unxlate_dev_mem_ptr(p, ptr);
if (remaining)
return -EFAULT;

@@ -181,30 +197,36 @@ static ssize_t write_mem(struct file *fi
#endif

while (count > 0) {
+ int allowed;
+
sz = size_inside_page(p, count);

- if (!range_is_allowed(p >> PAGE_SHIFT, sz))
+ allowed = page_is_allowed(p >> PAGE_SHIFT);
+ if (!allowed)
return -EPERM;

- /*
- * On ia64 if a page has been mapped somewhere as uncached, then
- * it must also be accessed uncached by the kernel or data
- * corruption may occur.
- */
- ptr = xlate_dev_mem_ptr(p);
- if (!ptr) {
- if (written)
- break;
- return -EFAULT;
- }
+ /* Skip actual writing when a page is marked as restricted. */
+ if (allowed == 1) {
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur.
+ */
+ ptr = xlate_dev_mem_ptr(p);
+ if (!ptr) {
+ if (written)
+ break;
+ return -EFAULT;
+ }

- copied = copy_from_user(ptr, buf, sz);
- unxlate_dev_mem_ptr(p, ptr);
- if (copied) {
- written += sz - copied;
- if (written)
- break;
- return -EFAULT;
+ copied = copy_from_user(ptr, buf, sz);
+ unxlate_dev_mem_ptr(p, ptr);
+ if (copied) {
+ written += sz - copied;
+ if (written)
+ break;
+ return -EFAULT;
+ }
}

buf += sz;