[PATCH] xen/p2m: Check __brk_limit before allocating.

From: Konrad Rzeszutek Wilk
Date: Tue Jul 24 2012 - 16:32:21 EST


The P2M code is smart enough to return that it cannot allocate
anymore and it can perculate up the stack without trouble.
So check the __brk_limit values before allocating from extend_brk.

This allows us to boot on machines where we do not have enough
__brk space. This means that instead of getting:

Freeing 9f-100 pfn range: 97 pages freed
Freeing b7ee0-ecd9b pfn range: 216763 pages freed
Released 216860 pages of unused memory
Set 295297 page(s) to 1-1 mapping
-Populating 100000-134f1c pfn range: 216860 pages added
+Populating 100000-134f1c pfn range: 30720 pages added

We would get a limited amount of pages populated back, but
without hitting this:

(XEN) domain_crash_sync called from entry.S
(XEN) CPU: 0
(XEN) RIP: e033:[<ffffffff818aad3b>]
(XEN) RFLAGS: 0000000000000206 EM: 1 CONTEXT: pv guest
(XEN) rax: ffffffff81a7c000 rbx: 000000000000003d rcx: 0000000000001000
(XEN) rdx: ffffffff81a7b000 rsi: 0000000000001000 rdi: 0000000000001000
(XEN) rbp: ffffffff81801cd8 rsp: ffffffff81801c98 r8: 0000000000100000
(XEN) r9: ffffffff81a7a000 r10: 0000000000000001 r11: 0000000000000003
(XEN) r12: 0000000000000004 r13: 0000000000000004 r14: 000000000000003d
(XEN) r15: 00000000000001e8 cr0: 000000008005003b cr4: 00000000000006f0
(XEN) cr3: 0000000125803000 cr2: 0000000000000000
(XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033
(XEN) Guest stack trace from rsp=ffffffff81801c98:

.. which is extend_brk hitting a BUG_ON.

Note that git commit c3d93f880197953f86ab90d9da4744e926b38e33
(xen: populate correct number of pages when across mem boundary (v2))
exposed this bug).

Interestingly enough, most of the time we are not going to hit this
b/c the _brk space is quite large:
ffffffff81a25000 B __brk_base
ffffffff81e43000 B __brk_limit
= ~4MB.
vs earlier kernels (with this back-ported), the space is smaller:
ffffffff81a25000 B __brk_base
ffffffff81a7b000 B __brk_limit
= 344 kBytes.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
arch/x86/xen/p2m.c | 17 +++++++++++++++--
1 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 64effdc..b5bb26c 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -498,7 +498,14 @@ static bool alloc_p2m(unsigned long pfn)

return true;
}
-
+#include <asm/sections.h>
+bool __init can_extend_brk()
+{
+ /* Always reserve one for the DMI extend_brk call. */
+ if ((char *)(_brk_end + 2 * PAGE_SIZE) > __brk_limit)
+ return false;
+ return true;
+}
static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary)
{
unsigned topidx, mididx, idx;
@@ -524,6 +531,9 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary
return false;

/* Boundary cross-over for the edges: */
+ if (!can_extend_brk())
+ return false;
+
p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);

p2m_init(p2m);
@@ -540,7 +550,6 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary

return true;
}
-
static bool __init early_alloc_p2m(unsigned long pfn)
{
unsigned topidx = p2m_top_index(pfn);
@@ -550,6 +559,8 @@ static bool __init early_alloc_p2m(unsigned long pfn)
mid = p2m_top[topidx];
mid_mfn_p = p2m_top_mfn_p[topidx];
if (mid == p2m_mid_missing) {
+ if (!can_extend_brk())
+ return false;
mid = extend_brk(PAGE_SIZE, PAGE_SIZE);

p2m_mid_init(mid);
@@ -560,6 +571,8 @@ static bool __init early_alloc_p2m(unsigned long pfn)
}
/* And the save/restore P2M tables.. */
if (mid_mfn_p == p2m_mid_missing_mfn) {
+ if (!can_extend_brk())
+ return false;
mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(mid_mfn_p);

--
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/