swsusp: use non-contiguous memory on resume

From: Pavel Machek
Date: Fri Mar 04 2005 - 05:08:03 EST


From: Rafael J. Wysocki <rjw@xxxxxxx>
From: hugang <hugang@xxxxxxxxxxxx>

Subject: non-contiguous pagedir for resume

This fixes problem where we could have enough memory but not in
continuous chunk, and resume would fail.

Signed-off-by: Pavel Machek <pavel@xxxxxxx>

Please apply,
Pavel

--- linux-mm/kernel/power/swsusp.c 2005-02-28 01:14:08.000000000 +0100
+++ linux.middle/kernel/power/swsusp.c 2005-02-28 21:29:06.000000000 +0100
@@ -241,7 +241,7 @@
swp_entry_t entry;
int error = 0;

- entry = get_swap_page(NULL, swp_offset(*loc));
+ entry = get_swap_page();
if (swp_offset(entry) &&
swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
error = rw_swap_page_sync(WRITE, entry,
@@ -605,19 +605,46 @@
return nr_copy;
}

+static inline void free_pagedir(struct pbe *pblist);
+
/**
- * free_pagedir - free pages allocated with alloc_pagedir()
+ * fill_pb_page - Create a list of PBEs on a given memory page
*/

-static inline void free_pagedir(struct pbe *pblist)
+static inline void fill_pb_page(struct pbe *pbpage)
{
- struct pbe *pbe;
+ struct pbe *p;

- while (pblist) {
- pbe = (pblist + PB_PAGE_SKIP)->next;
- free_page((unsigned long)pblist);
- pblist = pbe;
+ p = pbpage;
+ pbpage += PB_PAGE_SKIP;
+ do
+ p->next = p + 1;
+ while (++p < pbpage);
+}
+
+/**
+ * create_pbe_list - Create a list of PBEs on top of a given chain
+ * of memory pages allocated with alloc_pagedir()
+ */
+
+static void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
+{
+ struct pbe *pbpage, *p;
+ unsigned num = PBES_PER_PAGE;
+
+ for_each_pb_page (pbpage, pblist) {
+ if (num >= nr_pages)
+ break;
+
+ fill_pb_page(pbpage);
+ num += PBES_PER_PAGE;
}
+ if (pbpage) {
+ for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
}

/**
@@ -636,7 +663,7 @@
static struct pbe * alloc_pagedir(unsigned nr_pages)
{
unsigned num;
- struct pbe *pblist, *pbe, *p;
+ struct pbe *pblist, *pbe;

if (!nr_pages)
return NULL;
@@ -645,25 +672,32 @@
pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
pbe = pbe->next, num += PBES_PER_PAGE) {
- p = pbe;
pbe += PB_PAGE_SKIP;
- do
- p->next = p + 1;
- while (p++ < pbe);
pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
}
- if (pbe) {
- for (num -= PBES_PER_PAGE - 1, p = pbe; num < nr_pages; p++, num++)
- p->next = p + 1;
- } else { /* get_zeroed_page() failed */
+ if (!pbe) { /* get_zeroed_page() failed */
free_pagedir(pblist);
pblist = NULL;
}
- pr_debug("alloc_pagedir(): allocated %d PBEs\n", num);
return pblist;
}

/**
+ * free_pagedir - free pages allocated with alloc_pagedir()
+ */
+
+static inline void free_pagedir(struct pbe *pblist)
+{
+ struct pbe *pbe;
+
+ while (pblist) {
+ pbe = (pblist + PB_PAGE_SKIP)->next;
+ free_page((unsigned long)pblist);
+ pblist = pbe;
+ }
+}
+
+/**
* free_image_pages - Free pages allocated for snapshot
*/

@@ -760,12 +794,11 @@
if (!enough_swap())
return -ENOSPC;

- nr_copy_pages = calc_nr(nr_copy_pages);
-
if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
return -ENOMEM;
}
+ create_pbe_list(pagedir_save, nr_copy_pages);
pagedir_nosave = pagedir_save;
if ((error = alloc_image_pages())) {
printk(KERN_ERR "suspend: Allocating image pages failed.\n");
@@ -790,6 +823,7 @@

drain_local_pages();
count_data_pages();
+ nr_copy_pages = calc_nr(nr_copy_pages);
printk("swsusp: Need to copy %u pages\n", nr_copy_pages);

error = swsusp_alloc();
@@ -868,7 +902,6 @@
/* Restore control flow magically appears here */
restore_processor_state();
restore_highmem();
- touch_softlockup_watchdog();
device_power_up();
local_irq_enable();
return error;
@@ -888,7 +921,8 @@
{
int error;
local_irq_disable();
- device_power_down(PMSG_FREEZE);
+ if (device_power_down(PMSG_FREEZE))
+ printk(KERN_ERR "Some devices failed to power down, very bad\n");
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
error = swsusp_arch_resume();
@@ -918,44 +952,103 @@
return 0;
}

-/*
- * We check here that pagedir & pages it points to won't collide with pages
- * where we're going to restore from the loaded pages later
+/**
+ * On resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * which had been used before suspend.
+ *
+ * We don't know which pages are usable until we allocate them.
+ *
+ * Allocated but unusable (ie eaten) memory pages are linked together
+ * to create a list, so that we can free them easily
+ *
+ * We could have used a type other than (void *)
+ * for this purpose, but ...
*/
-static int __init check_pagedir(void)
+static void **eaten_memory = NULL;
+
+static inline void eat_page(void *page) {
+ void **c;
+
+ c = eaten_memory;
+ eaten_memory = page;
+ *eaten_memory = c;
+}
+
+static unsigned long __init get_usable_page(unsigned gfp_mask)
{
- int i;
+ unsigned long m;

- for(i=0; i < nr_copy_pages; i++) {
- unsigned long addr;
+ m = get_zeroed_page(gfp_mask);
+ while (does_collide_order(m, 0)) {
+ eat_page((void *)m);
+ m = get_zeroed_page(gfp_mask);
+ if (!m)
+ break;
+ }
+ return m;
+}

- do {
- addr = get_zeroed_page(GFP_ATOMIC);
- if(!addr)
- return -ENOMEM;
- } while (does_collide_order(addr, 0));
+static void __init free_eaten_memory(void)
+{
+ unsigned long m;
+ void **c;
+ int i = 0;

- (pagedir_nosave+i)->address = addr;
+ c = eaten_memory;
+ while (c) {
+ m = (unsigned long)c;
+ c = *c;
+ free_page(m);
+ i++;
}
- return 0;
+ eaten_memory = NULL;
+ pr_debug("swsusp: %d unused pages freed\n", i);
}

-static int __init swsusp_pagedir_relocate(void)
+/**
+ * check_pagedir - We ensure here that pages that the PBEs point to
+ * won't collide with pages where we're going to restore from the loaded
+ * pages later
+ */
+
+static int __init check_pagedir(struct pbe *pblist)
{
- /*
- * We have to avoid recursion (not to overflow kernel stack),
- * and that's why code looks pretty cryptic
+ struct pbe *p;
+
+ /* This is necessary, so that we can free allocated pages
+ * in case of failure
*/
- suspend_pagedir_t *old_pagedir = pagedir_nosave;
- void **eaten_memory = NULL;
- void **c = eaten_memory, *m, *f;
- int ret = 0;
+ for_each_pbe (p, pblist)
+ p->address = 0UL;
+
+ for_each_pbe (p, pblist) {
+ p->address = get_usable_page(GFP_ATOMIC);
+ if (!p->address)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * swsusp_pagedir_relocate - It is possible, that some memory pages
+ * occupied by the list of PBEs collide with pages where we're going to
+ * restore from the loaded pages later. We relocate them here.
+ */
+
+static struct pbe * __init swsusp_pagedir_relocate(struct pbe *pblist)
+{
struct zone *zone;
- int i;
- struct pbe *p;
unsigned long zone_pfn;
+ struct pbe *pbpage, *tail, *p;
+ void *m;
+ int rel = 0, error = 0;

- printk("Relocating pagedir ");
+ if (!pblist) /* a sanity check */
+ return NULL;
+
+ pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
+ swsusp_info.pagedir_pages);

/* Set page flags */

@@ -965,45 +1058,52 @@
zone->zone_start_pfn));
}

- /* Clear orig address */
+ /* Clear orig addresses */

- for(i = 0, p = pagedir_nosave; i < nr_copy_pages; i++, p++) {
+ for_each_pbe (p, pblist)
ClearPageNosaveFree(virt_to_page(p->orig_address));
- }

- if (!does_collide_order((unsigned long)old_pagedir, pagedir_order)) {
- printk("not necessary\n");
- return check_pagedir();
- }
+ tail = pblist + PB_PAGE_SKIP;

- while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
- if (!does_collide_order((unsigned long)m, pagedir_order))
- break;
- eaten_memory = m;
- printk( "." );
- *eaten_memory = c;
- c = eaten_memory;
- }
+ /* Relocate colliding pages */
+
+ for_each_pb_page (pbpage, pblist) {
+ if (does_collide_order((unsigned long)pbpage, 0)) {
+ m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
+ if (!m) {
+ error = -ENOMEM;
+ break;
+ }
+ memcpy(m, (void *)pbpage, PAGE_SIZE);
+ if (pbpage == pblist)
+ pblist = (struct pbe *)m;
+ else
+ tail->next = (struct pbe *)m;
+
+ eat_page((void *)pbpage);
+ pbpage = (struct pbe *)m;
+
+ /* We have to link the PBEs again */
+
+ for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++)
+ if (p->next) /* needed to save the end */
+ p->next = p + 1;

- if (!m) {
- printk("out of memory\n");
- ret = -ENOMEM;
- } else {
- pagedir_nosave =
- memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
+ rel++;
+ }
+ tail = pbpage + PB_PAGE_SKIP;
}

- c = eaten_memory;
- while (c) {
- printk(":");
- f = c;
- c = *c;
- free_pages((unsigned long)f, pagedir_order);
+ if (error) {
+ printk("\nswsusp: Out of memory\n\n");
+ free_pagedir(pblist);
+ free_eaten_memory();
+ pblist = NULL;
}
- if (ret)
- return ret;
- printk("|\n");
- return check_pagedir();
+ else
+ printk("swsusp: Relocated %d pages\n", rel);
+
+ return pblist;
}

/**
@@ -1139,7 +1239,7 @@
*/
error = bio_write_page(0, &swsusp_header);
} else {
- pr_debug(KERN_ERR "swsusp: Suspend partition has wrong signature?\n");
+ printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n");
return -EINVAL;
}
if (!error)
@@ -1148,76 +1248,117 @@
}

/**
- * swsusp_read_data - Read image pages from swap.
+ * data_read - Read image pages from swap.
*
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/

-static int __init data_read(void)
+static int __init data_read(struct pbe *pblist)
{
struct pbe * p;
- int error;
- int i;
- int mod = nr_copy_pages / 100;
+ int error = 0;
+ int i = 0;
+ int mod = swsusp_info.image_pages / 100;

if (!mod)
mod = 1;

- if ((error = swsusp_pagedir_relocate()))
- return error;
+ printk("swsusp: Reading image data (%lu pages): ",
+ swsusp_info.image_pages);
+
+ for_each_pbe (p, pblist) {
+ if (!(i % mod))
+ printk("\b\b\b\b%3d%%", i / mod);

- printk( "Reading image data (%d pages): ", nr_copy_pages );
- for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
+ if (error)
+ return error;
+
+ i++;
}
- printk(" %d done.\n",i);
+ printk("\b\b\b\bdone\n");
return error;
-
}

extern dev_t __init name_to_dev_t(const char *line);

-static int __init read_pagedir(void)
+/**
+ * read_pagedir - Read page backup list pages from swap
+ */
+
+static int __init read_pagedir(struct pbe *pblist)
{
- unsigned long addr;
- int i, n = swsusp_info.pagedir_pages;
- int error = 0;
+ struct pbe *pbpage, *p;
+ unsigned i = 0;
+ int error;

- addr = __get_free_pages(GFP_ATOMIC, pagedir_order);
- if (!addr)
- return -ENOMEM;
- pagedir_nosave = (struct pbe *)addr;
+ if (!pblist)
+ return -EFAULT;

- pr_debug("swsusp: Reading pagedir (%d Pages)\n",n);
+ printk("swsusp: Reading pagedir (%lu pages)\n",
+ swsusp_info.pagedir_pages);

- for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
- unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
- if (offset)
- error = bio_read_page(offset, (void *)addr);
- else
- error = -EFAULT;
+ for_each_pb_page (pbpage, pblist) {
+ unsigned long offset = swp_offset(swsusp_info.pagedir[i++]);
+
+ error = -EFAULT;
+ if (offset) {
+ p = (pbpage + PB_PAGE_SKIP)->next;
+ error = bio_read_page(offset, (void *)pbpage);
+ (pbpage + PB_PAGE_SKIP)->next = p;
+ }
+ if (error)
+ break;
}
+
if (error)
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+ free_page((unsigned long)pblist);
+
+ BUG_ON(i != swsusp_info.pagedir_pages);
+
return error;
}

+
static int __init read_suspend_image(void)
{
int error = 0;
+ struct pbe *p;

if ((error = check_sig()))
return error;
+
if ((error = check_header()))
return error;
- if ((error = read_pagedir()))
+
+ if (!(p = alloc_pagedir(nr_copy_pages)))
+ return -ENOMEM;
+
+ if ((error = read_pagedir(p)))
return error;
- if ((error = data_read()))
- free_pages((unsigned long)pagedir_nosave, pagedir_order);
+
+ create_pbe_list(p, nr_copy_pages);
+
+ if (!(pagedir_nosave = swsusp_pagedir_relocate(p)))
+ return -ENOMEM;
+
+ /* Allocate memory for the image and read the data from swap */
+
+ error = check_pagedir(pagedir_nosave);
+ free_eaten_memory();
+ if (!error)
+ error = data_read(pagedir_nosave);
+
+ if (error) { /* We fail cleanly */
+ for_each_pbe (p, pagedir_nosave)
+ if (p->address) {
+ free_page(p->address);
+ p->address = 0UL;
+ }
+ free_pagedir(pagedir_nosave);
+ }
return error;
}

--- linux-mm/arch/i386/kernel/asm-offsets.c 2004-10-01 00:29:59.000000000 +0200
+++ linux.middle/arch/i386/kernel/asm-offsets.c 2005-02-28 21:47:15.000000000 +0100
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/personality.h>
+#include <linux/suspend.h>
#include <asm/ucontext.h>
#include "sigframe.h"
#include <asm/fixmap.h>
@@ -56,6 +57,11 @@

OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
+ BLANK();
+
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);

/* Offset from the sysenter stack to tss.esp0 */
DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
--- linux-mm/arch/i386/power/swsusp.S 2004-10-01 00:29:59.000000000 +0200
+++ linux.middle/arch/i386/power/swsusp.S 2005-02-28 21:29:05.000000000 +0100
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
+#include <asm/asm_offsets.h>

.text

@@ -28,28 +29,28 @@
ret

ENTRY(swsusp_arch_resume)
- movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
- movl %ecx,%cr3
+ movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
+ movl %ecx,%cr3

- movl pagedir_nosave, %ebx
- xorl %eax, %eax
- xorl %edx, %edx
+ movl pagedir_nosave, %edx
.p2align 4,,7

copy_loop:
- movl 4(%ebx,%edx),%edi
- movl (%ebx,%edx),%esi
+ testl %edx, %edx
+ jz done
+
+ movl pbe_address(%edx), %esi
+ movl pbe_orig_address(%edx), %edi

movl $1024, %ecx
rep
movsl

- incl %eax
- addl $16, %edx
- cmpl nr_copy_pages,%eax
- jb copy_loop
+ movl pbe_next(%edx), %edx
+ jmp copy_loop
.p2align 4,,7

+done:
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx
--- linux-mm/arch/x86_64/kernel/asm-offsets.c 2005-02-03 22:27:09.000000000 +0100
+++ linux.middle/arch/x86_64/kernel/asm-offsets.c 2005-02-28 21:29:05.000000000 +0100
@@ -62,8 +62,8 @@
offsetof (struct rt_sigframe32, uc.uc_mcontext));
BLANK();
#endif
- DEFINE(SIZEOF_PBE, sizeof(struct pbe));
DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
return 0;
}
--- linux-mm/arch/x86_64/kernel/suspend_asm.S 2005-02-03 22:27:09.000000000 +0100
+++ linux.middle/arch/x86_64/kernel/suspend_asm.S 2005-02-28 21:29:05.000000000 +0100
@@ -54,16 +54,10 @@
movq %rax, %cr4; # turn PGE back on

movq pagedir_nosave(%rip), %rdx
- /* compute the limit */
- movl nr_copy_pages(%rip), %eax
- testl %eax, %eax
- jz done
- movq %rdx,%r8
- movl $SIZEOF_PBE,%r9d
- mul %r9 # with rax, clobbers rdx
- movq %r8, %rdx
- addq %r8, %rax
loop:
+ testq %rdx, %rdx
+ jz done
+
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
movq pbe_orig_address(%rdx), %rdi
@@ -72,9 +66,8 @@
movsq

/* progress to the next pbe */
- addq $SIZEOF_PBE, %rdx
- cmpq %rax, %rdx
- jb loop
+ movq pbe_next(%rdx), %rdx
+ jmp loop
done:
movl $24, %eax
movl %eax, %ds


--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/