[PATCH] mm: make reserve_bootmem can crossed the nodes

From: Yinghai Lu
Date: Thu Mar 13 2008 - 19:45:52 EST



[PATCH] mm: make reserve_bootmem can crossed the nodes

split reserve_bootmem_core to two function, one check conflicts, and one set bits.

and make reserve_bootmem to loop bdata_list to cross the nodes.

user could be crashkernel and ramdisk..., in case the range cross the nodes

Signed-off-by: Yinghai Lu <yhlu.kernel@xxxxxxxxx>

Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -111,44 +111,69 @@ static unsigned long __init init_bootmem
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
-static int __init reserve_bootmem_core(bootmem_data_t *bdata,
+static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
unsigned long addr, unsigned long size, int flags)
{
unsigned long sidx, eidx;
unsigned long i;
- int ret;
+
+ BUG_ON(!size);
+
+ /* out of range, don't hold other */
+ if (addr >= bdata->node_boot_start && addr < bdata->last_success)
+ return 0;

/*
- * round up, partially reserved pages are considered
- * fully reserved.
+ * Round up to index to the range.
*/
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
+
+ eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+
+ for (i = sidx; i < eidx; i++)
+ if (test_bit(i, bdata->node_bootmem_map)) {
+ if (flags & BOOTMEM_EXCLUSIVE)
+ return -EBUSY;
+ }
+
+ return 0;
+
+}
+static void __init reserve_bootmem_core(bootmem_data_t *bdata,
+ unsigned long addr, unsigned long size, int flags)
+{
+ unsigned long sidx, eidx;
+ unsigned long i;
+
BUG_ON(!size);
- BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
- BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
- BUG_ON(addr < bdata->node_boot_start);

- sidx = PFN_DOWN(addr - bdata->node_boot_start);
+ /* out of range */
+ if (addr >= bdata->node_boot_start && addr < bdata->last_success)
+ return;
+
+ /*
+ * Round up to index to the range.
+ */
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
+
eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);

for (i = sidx; i < eidx; i++)
if (test_and_set_bit(i, bdata->node_bootmem_map)) {
#ifdef CONFIG_DEBUG_BOOTMEM
printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
#endif
- if (flags & BOOTMEM_EXCLUSIVE) {
- ret = -EBUSY;
- goto err;
- }
}
-
- return 0;
-
-err:
- /* unreserve memory we accidentally reserved */
- for (i--; i >= sidx; i--)
- clear_bit(i, bdata->node_bootmem_map);
-
- return ret;
}

static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
@@ -407,6 +432,11 @@ unsigned long __init init_bootmem_node(p
void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
+ int ret;
+
+ ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+ if (ret < 0)
+ return;
reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
}

@@ -432,7 +462,16 @@ unsigned long __init init_bootmem(unsign
int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
- return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags);
+ int ret;
+ bootmem_data_t *bdata;
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ret = can_reserve_bootmem_core(bdata, addr, size, flags);
+ if (ret < 0)
+ return ret;
+ }
+ list_for_each_entry(bdata, &bdata_list, list)
+ reserve_bootmem_core(bdata, addr, size, flags);
+ return 0;
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */