Re: [linux-pm] [PATCH 1/2] Hibernate: Take overlapping zones intoaccount

From: Nigel Cunningham
Date: Tue Nov 11 2008 - 16:25:31 EST


Hi Rafael.

On Tue, 2008-11-11 at 21:31 +0100, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rjw@xxxxxxx>
> Subject: Hibernate: Take overlapping zones into account
>
> It has been requested to make hibernation work with memory
> hotplugging enabled and for this purpose the hibernation code has to
> be reworked to take the possible overlapping of zones into account.
> Thus, rework the hibernation memory bitmaps code to prevent
> duplication of PFNs from occuring and add checks to make sure that
> one page frame will not be marked as saveable many times.
>
> Additionally, use list.h lists instead of open-coded lists to
> implement the memory bitmaps.
>
> Signed-off-by: Rafael J. Wysocki <rjw@xxxxxxx>
> Cc: Pavel Machek <pavel@xxxxxxx>
> Cc: Dave Hansen <dave@xxxxxxxxxxxxxxxxxx>
> Cc: Andy Whitcroft <apw@xxxxxxxxxxxx>
> ---
> kernel/power/snapshot.c | 326 ++++++++++++++++++++++++------------------------
> 1 file changed, 166 insertions(+), 160 deletions(-)
>
> Index: linux-2.6/kernel/power/snapshot.c
> ===================================================================
> --- linux-2.6.orig/kernel/power/snapshot.c
> +++ linux-2.6/kernel/power/snapshot.c
> @@ -25,6 +25,7 @@
> #include <linux/syscalls.h>
> #include <linux/console.h>
> #include <linux/highmem.h>
> +#include <linux/list.h>
>
> #include <asm/uaccess.h>
> #include <asm/mmu_context.h>
> @@ -192,12 +193,6 @@ static void *chain_alloc(struct chain_al
> return ret;
> }
>
> -static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
> -{
> - free_list_of_pages(ca->chain, clear_page_nosave);
> - memset(ca, 0, sizeof(struct chain_allocator));
> -}
> -
> /**
> * Data types related to memory bitmaps.
> *
> @@ -233,7 +228,7 @@ static void chain_free(struct chain_allo
> #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
>
> struct bm_block {
> - struct bm_block *next; /* next element of the list */
> + struct list_head hook; /* hook into a list of bitmap blocks */
> unsigned long start_pfn; /* pfn represented by the first bit */
> unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
> unsigned long *data; /* bitmap representing pages */
> @@ -244,24 +239,15 @@ static inline unsigned long bm_block_bit
> return bb->end_pfn - bb->start_pfn;
> }
>
> -struct zone_bitmap {
> - struct zone_bitmap *next; /* next element of the list */
> - unsigned long start_pfn; /* minimal pfn in this zone */
> - unsigned long end_pfn; /* maximal pfn in this zone plus 1 */
> - struct bm_block *bm_blocks; /* list of bitmap blocks */
> - struct bm_block *cur_block; /* recently used bitmap block */
> -};
> -
> /* strcut bm_position is used for browsing memory bitmaps */
>
> struct bm_position {
> - struct zone_bitmap *zone_bm;
> struct bm_block *block;
> int bit;
> };
>
> struct memory_bitmap {
> - struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */
> + struct list_head blocks; /* list of bitmap blocks */
> struct linked_page *p_list; /* list of pages used to store zone
> * bitmap objects and bitmap block
> * objects
> @@ -273,11 +259,7 @@ struct memory_bitmap {
>
> static void memory_bm_position_reset(struct memory_bitmap *bm)
> {
> - struct zone_bitmap *zone_bm;
> -
> - zone_bm = bm->zone_bm_list;
> - bm->cur.zone_bm = zone_bm;
> - bm->cur.block = zone_bm->bm_blocks;
> + bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
> bm->cur.bit = 0;
> }
>
> @@ -285,151 +267,183 @@ static void memory_bm_free(struct memory
>
> /**
> * create_bm_block_list - create a list of block bitmap objects
> - */
> -
> -static inline struct bm_block *
> -create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca)
> + * @nr_blocks - number of blocks to allocate
> + * @list - list to put the allocated blocks into
> + * @ca - chain allocator to be used for allocating memory
> + */
> +static int create_bm_block_list(unsigned long pages,
> + struct list_head *list,
> + struct chain_allocator *ca)
> {
> - struct bm_block *bblist = NULL;
> + unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
>
> while (nr_blocks-- > 0) {
> struct bm_block *bb;
>
> bb = chain_alloc(ca, sizeof(struct bm_block));
> if (!bb)
> - return NULL;
> -
> - bb->next = bblist;
> - bblist = bb;
> + return -ENOMEM;
> + list_add(&bb->hook, list);
> }
> - return bblist;
> +
> + return 0;
> }
>
> +struct mem_extent {
> + struct list_head hook;
> + unsigned long start;
> + unsigned long end;
> +};
> +
> /**
> - * create_zone_bm_list - create a list of zone bitmap objects
> + * free_mem_extents - free a list of memory extents
> + * @list - list of extents to empty
> */
> +static void free_mem_extents(struct list_head *list)
> +{
> + struct mem_extent *ext, *aux;
>
> -static inline struct zone_bitmap *
> -create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
> + list_for_each_entry_safe(ext, aux, list, hook) {
> + list_del(&ext->hook);
> + kfree(ext);
> + }
> +}
> +
> +/**
> + * create_mem_extents - create a list of memory extents representing
> + * contiguous ranges of PFNs
> + * @list - list to put the extents into
> + * @gfp_mask - mask to use for memory allocations
> + */
> +static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
> {
> - struct zone_bitmap *zbmlist = NULL;
> + struct zone *zone;
>
> - while (nr_zones-- > 0) {
> - struct zone_bitmap *zbm;
> + INIT_LIST_HEAD(list);
>
> - zbm = chain_alloc(ca, sizeof(struct zone_bitmap));
> - if (!zbm)
> - return NULL;
> + for_each_zone(zone) {
> + unsigned long zone_start, zone_end;
> + struct mem_extent *ext, *cur, *aux;
> +
> + if (!populated_zone(zone))
> + continue;
> +
> + zone_start = zone->zone_start_pfn;
> + zone_end = zone->zone_start_pfn + zone->spanned_pages;
> +
> + list_for_each_entry(ext, list, hook)
> + if (zone_start <= ext->end)
> + break;
> +
> + if (&ext->hook == list || zone_end < ext->start) {
> + /* New extent is necessary */
> + struct mem_extent *new_ext;
> +
> + new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
> + if (!new_ext) {
> + free_mem_extents(list);
> + return -ENOMEM;
> + }
> + new_ext->start = zone_start;
> + new_ext->end = zone_end;
> + list_add_tail(&new_ext->hook, &ext->hook);

Is list_add_tail right? You seem to be relying on the list being ordered
in the list_for_each_entry above.

> + continue;
> + }
>
> - zbm->next = zbmlist;
> - zbmlist = zbm;
> + /* Merge this zone's range of PFNs with the existing one */
> + if (zone_start < ext->start)
> + ext->start = zone_start;
> + if (zone_end > ext->end)
> + ext->end = zone_end;
> +
> + /* More merging may be possible */
> + cur = ext;
> + list_for_each_entry_safe_continue(cur, aux, list, hook) {
> + if (zone_end < cur->start)
> + break;
> + if (zone_end < cur->end)
> + ext->end = cur->end;
> + list_del(&cur->hook);

kfree?


> + }
> }
> - return zbmlist;
> +
> + return 0;
> }
>
> /**
> * memory_bm_create - allocate memory for a memory bitmap
> */
> -
> static int
> memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
> {
> struct chain_allocator ca;
> - struct zone *zone;
> - struct zone_bitmap *zone_bm;
> - struct bm_block *bb;
> - unsigned int nr;
> + struct list_head mem_extents;
> + struct mem_extent *ext;
> + int error;
>
> chain_init(&ca, gfp_mask, safe_needed);
> + INIT_LIST_HEAD(&bm->blocks);
>
> - /* Compute the number of zones */
> - nr = 0;
> - for_each_zone(zone)
> - if (populated_zone(zone))
> - nr++;
> -
> - /* Allocate the list of zones bitmap objects */
> - zone_bm = create_zone_bm_list(nr, &ca);
> - bm->zone_bm_list = zone_bm;
> - if (!zone_bm) {
> - chain_free(&ca, PG_UNSAFE_CLEAR);
> - return -ENOMEM;
> - }
> + error = create_mem_extents(&mem_extents, gfp_mask);
> + if (error)
> + return error;
>
> - /* Initialize the zone bitmap objects */
> - for_each_zone(zone) {
> - unsigned long pfn;
> + list_for_each_entry(ext, &mem_extents, hook) {
> + struct bm_block *bb;
> + unsigned long pfn = ext->start;
> + unsigned long pages = ext->end - ext->start;
>
> - if (!populated_zone(zone))
> - continue;
> + bb = list_entry(bm->blocks.prev, struct bm_block, hook);
>
> - zone_bm->start_pfn = zone->zone_start_pfn;
> - zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
> - /* Allocate the list of bitmap block objects */
> - nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
> - bb = create_bm_block_list(nr, &ca);
> - zone_bm->bm_blocks = bb;
> - zone_bm->cur_block = bb;
> - if (!bb)
> - goto Free;
> + error = create_bm_block_list(pages, bm->blocks.prev, &ca);
> + if (error)
> + goto Error;
>
> - nr = zone->spanned_pages;
> - pfn = zone->zone_start_pfn;
> - /* Initialize the bitmap block objects */
> - while (bb) {
> - unsigned long *ptr;
> -
> - ptr = get_image_page(gfp_mask, safe_needed);
> - bb->data = ptr;
> - if (!ptr)
> - goto Free;
> + list_for_each_entry_continue(bb, &bm->blocks, hook) {
> + bb->data = get_image_page(gfp_mask, safe_needed);
> + if (!bb->data) {
> + error = -ENOMEM;
> + goto Error;
> + }
>
> bb->start_pfn = pfn;
> - if (nr >= BM_BITS_PER_BLOCK) {
> + if (pages >= BM_BITS_PER_BLOCK) {
> pfn += BM_BITS_PER_BLOCK;
> - nr -= BM_BITS_PER_BLOCK;
> + pages -= BM_BITS_PER_BLOCK;
> } else {
> /* This is executed only once in the loop */
> - pfn += nr;
> + pfn += pages;
> }
> bb->end_pfn = pfn;
> - bb = bb->next;
> }
> - zone_bm = zone_bm->next;
> }
> +
> bm->p_list = ca.chain;
> memory_bm_position_reset(bm);
> - return 0;
> + Exit:
> + free_mem_extents(&mem_extents);
> + return error;
>
> - Free:
> + Error:
> bm->p_list = ca.chain;
> memory_bm_free(bm, PG_UNSAFE_CLEAR);
> - return -ENOMEM;
> + goto Exit;
> }
>
> /**
> * memory_bm_free - free memory occupied by the memory bitmap @bm
> */
> -
> static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
> {
> - struct zone_bitmap *zone_bm;
> + struct bm_block *bb;
>
> - /* Free the list of bit blocks for each zone_bitmap object */
> - zone_bm = bm->zone_bm_list;
> - while (zone_bm) {
> - struct bm_block *bb;
> + list_for_each_entry(bb, &bm->blocks, hook)
> + if (bb->data)
> + free_image_page(bb->data, clear_nosave_free);
>
> - bb = zone_bm->bm_blocks;
> - while (bb) {
> - if (bb->data)
> - free_image_page(bb->data, clear_nosave_free);
> - bb = bb->next;
> - }
> - zone_bm = zone_bm->next;
> - }
> free_list_of_pages(bm->p_list, clear_nosave_free);
> - bm->zone_bm_list = NULL;
> +
> + INIT_LIST_HEAD(&bm->blocks);
> }
>
> /**
> @@ -437,38 +451,33 @@ static void memory_bm_free(struct memory
> * to given pfn. The cur_zone_bm member of @bm and the cur_block member
> * of @bm->cur_zone_bm are updated.
> */
> -
> static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
> void **addr, unsigned int *bit_nr)
> {
> - struct zone_bitmap *zone_bm;
> struct bm_block *bb;
>
> - /* Check if the pfn is from the current zone */
> - zone_bm = bm->cur.zone_bm;
> - if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
> - zone_bm = bm->zone_bm_list;
> - /* We don't assume that the zones are sorted by pfns */
> - while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
> - zone_bm = zone_bm->next;
> -
> - if (!zone_bm)
> - return -EFAULT;
> - }
> - bm->cur.zone_bm = zone_bm;
> - }
> - /* Check if the pfn corresponds to the current bitmap block */
> - bb = zone_bm->cur_block;
> + /*
> + * Check if the pfn corresponds to the current bitmap block and find
> + * the block where it fits if this is not the case.
> + */
> + bb = bm->cur.block;
> if (pfn < bb->start_pfn)
> - bb = zone_bm->bm_blocks;
> + list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
> + if (pfn >= bb->start_pfn)
> + break;
> +
> + if (pfn >= bb->end_pfn)
> + list_for_each_entry_continue(bb, &bm->blocks, hook)
> + if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
> + break;
>
> - while (pfn >= bb->end_pfn) {
> - bb = bb->next;
> + if (&bb->hook == &bm->blocks)
> + return -EFAULT;
>
> - BUG_ON(!bb);
> - }
> - zone_bm->cur_block = bb;
> + /* The block has been found */
> + bm->cur.block = bb;
> pfn -= bb->start_pfn;
> + bm->cur.bit = pfn + 1;
> *bit_nr = pfn;
> *addr = bb->data;
> return 0;
> @@ -530,29 +539,21 @@ static int memory_bm_test_bit(struct mem
>
> static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
> {
> - struct zone_bitmap *zone_bm;
> struct bm_block *bb;
> int bit;
>
> + bb = bm->cur.block;
> do {
> - bb = bm->cur.block;
> - do {
> - bit = bm->cur.bit;
> - bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
> - if (bit < bm_block_bits(bb))
> - goto Return_pfn;
> -
> - bb = bb->next;
> - bm->cur.block = bb;
> - bm->cur.bit = 0;
> - } while (bb);
> - zone_bm = bm->cur.zone_bm->next;
> - if (zone_bm) {
> - bm->cur.zone_bm = zone_bm;
> - bm->cur.block = zone_bm->bm_blocks;
> - bm->cur.bit = 0;
> - }
> - } while (zone_bm);
> + bit = bm->cur.bit;
> + bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
> + if (bit < bm_block_bits(bb))
> + goto Return_pfn;
> +
> + bb = list_entry(bb->hook.next, struct bm_block, hook);
> + bm->cur.block = bb;
> + bm->cur.bit = 0;
> + } while (&bb->hook != &bm->blocks);
> +
> memory_bm_position_reset(bm);
> return BM_END_OF_MAP;

Is anything above here related to the hotplug memory support? If not,
perhaps this could be two patches?

> @@ -808,8 +809,7 @@ static unsigned int count_free_highmem_p
> * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
> * and it isn't a part of a free chunk of pages.
> */
> -
> -static struct page *saveable_highmem_page(unsigned long pfn)
> +static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
> {
> struct page *page;
>
> @@ -817,6 +817,8 @@ static struct page *saveable_highmem_pag
> return NULL;
>
> page = pfn_to_page(pfn);
> + if (page_zone(page) != zone)
> + return NULL;
>
> BUG_ON(!PageHighMem(page));
>
> @@ -846,13 +848,16 @@ unsigned int count_highmem_pages(void)
> mark_free_pages(zone);
> max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
> for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> - if (saveable_highmem_page(pfn))
> + if (saveable_highmem_page(zone, pfn))
> n++;
> }
> return n;
> }
> #else
> -static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
> +static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
> +{
> + return NULL;
> +}
> #endif /* CONFIG_HIGHMEM */
>
> /**
> @@ -863,8 +868,7 @@ static inline void *saveable_highmem_pag
> * of pages statically defined as 'unsaveable', and it isn't a part of
> * a free chunk of pages.
> */
> -
> -static struct page *saveable_page(unsigned long pfn)
> +static struct page *saveable_page(struct zone *zone, unsigned long pfn)
> {
> struct page *page;
>
> @@ -872,6 +876,8 @@ static struct page *saveable_page(unsign
> return NULL;
>
> page = pfn_to_page(pfn);
> + if (page_zone(page) != zone)
> + return NULL;
>
> BUG_ON(PageHighMem(page));
>
> @@ -903,7 +909,7 @@ unsigned int count_data_pages(void)
> mark_free_pages(zone);
> max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
> for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> - if(saveable_page(pfn))
> + if (saveable_page(zone, pfn))
> n++;
> }
> return n;
> @@ -944,7 +950,7 @@ static inline struct page *
> page_is_saveable(struct zone *zone, unsigned long pfn)
> {
> return is_highmem(zone) ?
> - saveable_highmem_page(pfn) : saveable_page(pfn);
> + saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
> }
>
> static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> @@ -975,7 +981,7 @@ static void copy_data_page(unsigned long
> }
> }
> #else
> -#define page_is_saveable(zone, pfn) saveable_page(pfn)
> +#define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
>
> static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> {

Regards,

Nigel

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/