[PATCH 06/13] xen: detect pre-allocated memory interfering with e820 map

From: Juergen Gross
Date: Wed Feb 18 2015 - 01:53:54 EST


Currently especially for dom0 guest memory with guest pfns not
matching host areas populated with RAM are remapped to areas which
are RAM native as well. This is done to be able to use identity
mappings (pfn == mfn) for I/O areas.

Up to now it is not checked whether the remapped memory is already
in use. Remapping used memory will probably result in data corruption,
as the remapped memory will no longer be reserved. Any memory
allocation after the remap can claim that memory.

Add an infrastructure to check for conflicts of reserved memory areas
and in case of a conflict to react via an area specific function.

This function has 3 options:
- Panic
- Handle the conflict by moving the data to another memory area.
This is indicated by a return value other than 0.
- Just return 0. This will delay invalidating the conflicting memory
area to just before doing the remap. This option will be usable for
cases only where the memory will no longer be needed when the remap
operation will be started, e.g. for the p2m list, which is already
copied then.

When doing the remap, check for not remapping a reserved page.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
arch/x86/xen/setup.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++--
arch/x86/xen/xen-ops.h | 2 +
2 files changed, 182 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 0dda131..a0af554 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -59,6 +59,20 @@ static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
static unsigned long xen_remap_pfn;
static unsigned long xen_max_pfn;

+/*
+ * Areas with memblock_reserve()d memory to be checked against final E820 map.
+ * Each area has an associated function to handle conflicts (by either
+ * removing the conflict or by just crashing with an appropriate message).
+ * The array has a fixed size as there are only few areas of interest which are
+ * well known: kernel, page tables, p2m, initrd.
+ */
+#define XEN_N_RESERVED_AREAS 4
+static struct {
+ phys_addr_t start;
+ phys_addr_t size;
+ int (*func)(phys_addr_t start, phys_addr_t size);
+} xen_reserved_area[XEN_N_RESERVED_AREAS] __initdata;
+
/*
* The maximum amount of extra memory compared to the base size. The
* main scaling factor is the size of struct page. At extreme ratios
@@ -365,10 +379,10 @@ static void __init xen_set_identity_and_remap_chunk(unsigned long start_pfn,
unsigned long end_pfn, unsigned long *released, unsigned long *remapped)
{
unsigned long pfn;
- unsigned long i = 0;
+ unsigned long i;
unsigned long n = end_pfn - start_pfn;

- while (i < n) {
+ for (i = 0; i < n; ) {
unsigned long cur_pfn = start_pfn + i;
unsigned long left = n - i;
unsigned long size = left;
@@ -411,6 +425,53 @@ static void __init xen_set_identity_and_remap_chunk(unsigned long start_pfn,
(unsigned long)__va(pfn << PAGE_SHIFT),
mfn_pte(pfn, PAGE_KERNEL_IO), 0);
}
+/* Check to be remapped memory area for conflicts with reserved areas.
+ *
+ * Skip regions known to be reserved which are handled later. For these
+ * regions we have to increase the remapped counter in order to reserve
+ * extra memory space.
+ *
+ * In case a memory page already in use is to be remapped, just BUG().
+ */
+static void __init xen_set_identity_and_remap_chunk_chk(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long *released, unsigned long *remapped)
+{
+ unsigned long pfn;
+ unsigned long area_start, area_end;
+ unsigned i;
+
+ for (i = 0; i < XEN_N_RESERVED_AREAS; i++) {
+
+ if (!xen_reserved_area[i].size)
+ break;
+
+ area_start = PFN_DOWN(xen_reserved_area[i].start);
+ area_end = PFN_UP(xen_reserved_area[i].start +
+ xen_reserved_area[i].size);
+ if (area_start >= end_pfn || area_end <= start_pfn)
+ continue;
+
+ if (area_start > start_pfn)
+ xen_set_identity_and_remap_chunk(start_pfn, area_start,
+ released, remapped);
+
+ if (area_end < end_pfn)
+ xen_set_identity_and_remap_chunk(area_end, end_pfn,
+ released, remapped);
+
+ *remapped += min(area_end, end_pfn) -
+ max(area_start, start_pfn);
+
+ return;
+ }
+
+ /* Test for memory already in use */
+ for (pfn = start_pfn; pfn < end_pfn; pfn++)
+ BUG_ON(memblock_is_reserved(PFN_PHYS(pfn)));
+
+ xen_set_identity_and_remap_chunk(start_pfn, end_pfn,
+ released, remapped);
+}

static void __init xen_set_identity_and_remap(unsigned long *released,
unsigned long *remapped)
@@ -444,7 +505,7 @@ static void __init xen_set_identity_and_remap(unsigned long *released,
end_pfn = PFN_UP(entry->addr);

if (start_pfn < end_pfn)
- xen_set_identity_and_remap_chunk(start_pfn,
+ xen_set_identity_and_remap_chunk_chk(start_pfn,
end_pfn, &num_released, &num_remapped);
start = end;
}
@@ -456,6 +517,45 @@ static void __init xen_set_identity_and_remap(unsigned long *released,
pr_info("Released %ld page(s)\n", num_released);
}

+static void __init xen_late_set_identity_and_remap(void)
+{
+ const struct e820entry *entry = xen_e820_map;
+ int i, e;
+ unsigned long num_released = 0;
+ unsigned long num_remapped = 0;
+
+ for (i = 0; i < XEN_N_RESERVED_AREAS; i++) {
+ unsigned long area_start, area_end;
+
+ if (!xen_reserved_area[i].size)
+ return;
+
+ area_start = PFN_DOWN(xen_reserved_area[i].start);
+ area_end = PFN_UP(xen_reserved_area[i].start +
+ xen_reserved_area[i].size);
+
+ for (e = 0; e < xen_e820_map_entries; e++, entry++) {
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+
+ if (entry->type == E820_RAM)
+ continue;
+
+ start_pfn = PFN_DOWN(entry->addr);
+ end_pfn = PFN_UP(entry->addr + entry->size);
+
+ if (area_start >= end_pfn || area_end <= start_pfn)
+ continue;
+
+ start_pfn = max(area_start, start_pfn);
+ end_pfn = min(area_end, end_pfn);
+
+ xen_set_identity_and_remap_chunk(start_pfn, end_pfn,
+ &num_released, &num_remapped);
+ }
+ }
+}
+
/*
* Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
* The remap information (which mfn remap to which pfn) is contained in the
@@ -472,6 +572,8 @@ void __init xen_remap_memory(void)
unsigned long pfn_s = ~0UL;
unsigned long len = 0;

+ xen_late_set_identity_and_remap();
+
mfn_save = virt_to_mfn(buf);

while (xen_remap_mfn != INVALID_P2M_ENTRY) {
@@ -560,6 +662,76 @@ static void __init xen_ignore_unusable(void)
}

/*
+ * Check reserved memory areas for conflicts with E820 map.
+ */
+static void __init xen_chk_e820_reserved(void)
+{
+ struct e820entry *entry;
+ unsigned areacnt, mapcnt;
+ phys_addr_t start, end;
+ int ok;
+
+ for (areacnt = 0; areacnt < XEN_N_RESERVED_AREAS; areacnt++) {
+ start = xen_reserved_area[areacnt].start;
+ end = start + xen_reserved_area[areacnt].size;
+ if (start == end)
+ return;
+
+ ok = 0;
+ entry = xen_e820_map;
+
+ for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++) {
+ if (entry->type == E820_RAM && entry->addr <= start &&
+ (entry->addr + entry->size) >= end) {
+ ok = 1;
+ break;
+ }
+ entry++;
+ }
+
+ if (ok || !xen_reserved_area[areacnt].func(start, end - start))
+ continue;
+
+ for (mapcnt = areacnt; mapcnt < XEN_N_RESERVED_AREAS - 1;
+ mapcnt++)
+ xen_reserved_area[mapcnt] =
+ xen_reserved_area[mapcnt + 1];
+ xen_reserved_area[mapcnt].start = 0;
+ xen_reserved_area[mapcnt].size = 0;
+
+ areacnt--;
+ }
+}
+
+void __init xen_add_reserved_area(phys_addr_t start, phys_addr_t size,
+ int (*func)(phys_addr_t, phys_addr_t), int reserve)
+{
+ unsigned idx;
+
+ if (!size)
+ return;
+
+ BUG_ON(xen_reserved_area[XEN_N_RESERVED_AREAS - 1].size);
+
+ for (idx = XEN_N_RESERVED_AREAS - 1; idx > 0; idx--) {
+ if (!xen_reserved_area[idx - 1].size)
+ continue;
+
+ if (start > xen_reserved_area[idx - 1].start)
+ break;
+
+ xen_reserved_area[idx] = xen_reserved_area[idx - 1];
+ }
+
+ xen_reserved_area[idx].start = start;
+ xen_reserved_area[idx].size = size;
+ xen_reserved_area[idx].func = func;
+
+ if (reserve)
+ memblock_reserve(start, size);
+}
+
+/*
* Reserve Xen mfn_list.
* See comment above "struct start_info" in <xen/interface/xen.h>
* We tried to make the the memblock_reserve more selective so
@@ -608,6 +780,8 @@ char * __init xen_memory_setup(void)
int i;
int op;

+ xen_reserve_xen_mfnlist();
+
xen_max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
mem_end = PFN_PHYS(xen_max_pfn);

@@ -647,6 +821,9 @@ char * __init xen_memory_setup(void)
sanitize_e820_map(xen_e820_map, xen_e820_map_entries,
&xen_e820_map_entries);

+ /* Check for conflicts between used memory and memory map. */
+ xen_chk_e820_reserved();
+
max_pages = xen_get_max_pages();
if (max_pages > xen_max_pfn)
extra_pages += max_pages - xen_max_pfn;
@@ -718,8 +895,6 @@ char * __init xen_memory_setup(void)

sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

- xen_reserve_xen_mfnlist();
-
return "Xen";
}

diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e195c6..fee4f70 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -42,6 +42,8 @@ void xen_mm_unpin_all(void);
unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
void __init xen_inv_extra_mem(void);
void __init xen_remap_memory(void);
+void __init xen_add_reserved_area(phys_addr_t start, phys_addr_t size,
+ int (*func)(phys_addr_t, phys_addr_t), int reserve);
char * __init xen_memory_setup(void);
char * xen_auto_xlated_memory_setup(void);
void __init xen_arch_setup(void);
--
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/