Re: [PATCH] mm: make reserve_bootmem can crossed the nodes

From: Johannes Weiner
Date: Thu Mar 13 2008 - 20:16:34 EST


Hi,

"Yinghai Lu" <yhlu.kernel@xxxxxxxxx> writes:

> Index: linux-2.6/mm/bootmem.c
> ===================================================================
> --- linux-2.6.orig/mm/bootmem.c
> +++ linux-2.6/mm/bootmem.c
> @@ -111,44 +111,69 @@ static unsigned long __init init_bootmem
> * might be used for boot-time allocations - or it might get added
> * to the free page pool later on.
> */
> -static int __init reserve_bootmem_core(bootmem_data_t *bdata,
> +static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
> unsigned long addr, unsigned long size, int flags)
> {
> unsigned long sidx, eidx;
> unsigned long i;
> - int ret;
> +
> + BUG_ON(!size);
> +
> + /* out of range, don't hold other */
> + if (addr >= bdata->node_boot_start && addr < bdata->last_success)
> + return 0;
>
> /*
> - * round up, partially reserved pages are considered
> - * fully reserved.
> + * Round up to index to the range.
> */
> + if (addr > bdata->node_boot_start)
> + sidx= PFN_DOWN(addr - bdata->node_boot_start);
> + else
> + sidx = 0;
> +
> + eidx = PFN_UP(addr + size - bdata->node_boot_start);
> + if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
> + eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
> +
> + for (i = sidx; i < eidx; i++)
> + if (test_bit(i, bdata->node_bootmem_map)) {
> + if (flags & BOOTMEM_EXCLUSIVE)
> + return -EBUSY;
> + }
> +
> + return 0;
> +
> +}
> +static void __init reserve_bootmem_core(bootmem_data_t *bdata,
> + unsigned long addr, unsigned long size, int flags)
> +{
> + unsigned long sidx, eidx;
> + unsigned long i;
> +
> BUG_ON(!size);
> - BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
> - BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
> - BUG_ON(addr < bdata->node_boot_start);
>
> - sidx = PFN_DOWN(addr - bdata->node_boot_start);
> + /* out of range */
> + if (addr >= bdata->node_boot_start && addr < bdata->last_success)
> + return;
> +
> + /*
> + * Round up to index to the range.
> + */
> + if (addr > bdata->node_boot_start)
> + sidx= PFN_DOWN(addr - bdata->node_boot_start);
> + else
> + sidx = 0;
> +
> eidx = PFN_UP(addr + size - bdata->node_boot_start);
> + if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
> + eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
>
> for (i = sidx; i < eidx; i++)
> if (test_and_set_bit(i, bdata->node_bootmem_map)) {
> #ifdef CONFIG_DEBUG_BOOTMEM
> printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
> #endif
> - if (flags & BOOTMEM_EXCLUSIVE) {
> - ret = -EBUSY;
> - goto err;
> - }
> }
> -
> - return 0;
> -
> -err:
> - /* unreserve memory we accidentally reserved */
> - for (i--; i >= sidx; i--)
> - clear_bit(i, bdata->node_bootmem_map);
> -
> - return ret;
> }
>
> static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
> @@ -407,6 +432,11 @@ unsigned long __init init_bootmem_node(p
> void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
> unsigned long size, int flags)
> {
> + int ret;
> +
> + ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
> + if (ret < 0)
> + return;
> reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);

I don't get it. Sorry. What is the purpose of
can_reserve_bootmem_core()? It does exactly what reserve_bootmem_core
does besides actually setting the bits. All the pre-checking you wanted
to have out of the way is repeated again in reserve_bootmem_core()
(well, almost all).

Hannes
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/