Re: [Bisected Regression in 2.6.35] A full tmpfs filesystem causeshibernationto hang

From: M. Vefa Bicakci
Date: Tue Aug 31 2010 - 07:26:11 EST


On 31/08/10 09:54 AM, KOSAKI Motohiro wrote:
>> Great!
>> I've attached more verbose debug message patch and trial bug fixing patch.
>> Could you please to try this?
>
> Oops, please apply attached patch instead 0002-add-gfp_noretry.patch.
>
> Thanks.

Hello!

I have applied the patches you mentioned, and rebuilt and tested the
2.6.35.4 kernel. I am really happy to say that your patches (cumulatively)
fixed the issue!

Unfortunately, because the hibernation is rather quick, I am having a
hard time getting screen-shots with my camera. If you would like, I can
try to put some sleeps around the code so that I can get the output for
you.

For the record, the attached patch is the cumulative version of all of
your patches. It applies cleanly to 2.6.35.4, and most importantly, it
fixes the issue.

All in all, thanks a lot!

Is there anything else I can do? Would you like me to try a trimmed
version of your patch, maybe without the debugging parts and the 5-pass
swap-out procedure, which I am not sure is essential or not?

Thanks again,

M. Vefa Bicakci
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 25ce010..c672931 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1257,12 +1257,15 @@ static unsigned long minimum_image_size(unsigned long saveable)
int hibernate_preallocate_memory(void)
{
struct zone *zone;
- unsigned long saveable, size, max_size, count, highmem, pages = 0;
+ unsigned long saveable, max_size, count, highmem, pages = 0;
unsigned long alloc, save_highmem, pages_highmem;
struct timeval start, stop;
int error;
+ unsigned long additional_size, img_size;
+ unsigned long pages_fraction;
+ unsigned long img_pages;

- printk(KERN_INFO "PM: Preallocating image memory... ");
+ printk(KERN_INFO "PM: Preallocating image memory... \n");
do_gettimeofday(&start);

error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
@@ -1287,9 +1290,9 @@ int hibernate_preallocate_memory(void)
count = saveable;
saveable += save_highmem;
highmem = save_highmem;
- size = 0;
+ additional_size = 0;
for_each_populated_zone(zone) {
- size += snapshot_additional_pages(zone);
+ additional_size += snapshot_additional_pages(zone);
if (is_highmem(zone))
highmem += zone_page_state(zone, NR_FREE_PAGES);
else
@@ -1298,25 +1301,36 @@ int hibernate_preallocate_memory(void)
count += highmem;
count -= totalreserve_pages;

+ printk(KERN_INFO "PM: save_highmem(%lu), saveable(%lu) count=(%lu)\n",
+ save_highmem, saveable, count);
+
/* Compute the maximum number of saveable pages to leave in memory. */
- max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES;
- size = DIV_ROUND_UP(image_size, PAGE_SIZE);
- if (size > max_size)
- size = max_size;
+ max_size = (count - (additional_size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES;
+ img_size = DIV_ROUND_UP(image_size, PAGE_SIZE);
+ if (img_size > max_size)
+ img_size = max_size;
+
+ printk(KERN_INFO "PM: max_size(%lu), image_size(%lu), img_size(%lu)\n",
+ max_size, image_size, img_size);
+
/*
* If the maximum is not less than the current number of saveable pages
* in memory, allocate page frames for the image and we're done.
*/
- if (size >= saveable) {
+ if (img_size >= saveable) {
+ printk(KERN_INFO "PM: size >= saveable. skip \n");
pages = preallocate_image_highmem(save_highmem);
pages += preallocate_image_memory(saveable - pages);
goto out;
}

/* Estimate the minimum size of the image. */
- pages = minimum_image_size(saveable);
- if (size < pages)
- size = min_t(unsigned long, pages, max_size);
+ img_pages = minimum_image_size(saveable);
+ if (img_size < img_pages)
+ img_size = min_t(unsigned long, img_pages, max_size);
+
+ printk(KERN_INFO "PM: img_pages(%lu), img_size(%lu)\n",
+ img_pages, img_size);

/*
* Let the memory management subsystem know that we're going to need a
@@ -1324,7 +1338,7 @@ int hibernate_preallocate_memory(void)
* NOTE: If this is not done, performance will be hurt badly in some
* test cases.
*/
- shrink_all_memory(saveable - size);
+ shrink_all_memory(saveable - img_size);

/*
* The number of saveable pages in memory was too high, so apply some
@@ -1334,16 +1348,31 @@ int hibernate_preallocate_memory(void)
* highmem and non-highmem zones separately.
*/
pages_highmem = preallocate_image_highmem(highmem / 2);
+ printk(KERN_INFO "PM: preallocate_image_highmem %lu %lu free(%u/%lu)\n",
+ highmem, pages_highmem, nr_free_highpages(), global_page_state(NR_FREE_PAGES));
+
alloc = (count - max_size) - pages_highmem;
pages = preallocate_image_memory(alloc);
- if (pages < alloc)
+ printk(KERN_INFO "PM: preallocate_image_memory %lu %lu free(%u/%lu)\n",
+ alloc, pages, nr_free_highpages(), global_page_state(NR_FREE_PAGES));
+
+ if (pages < alloc) {
+ printk(KERN_INFO "PM: pages(%lu) < alloc(%lu). error. \n",
+ pages, alloc);
goto err_out;
- size = max_size - size;
- alloc = size;
- size = preallocate_highmem_fraction(size, highmem, count);
- pages_highmem += size;
- alloc -= size;
+ }
+
+ alloc = max_size - img_size;
+
+ pages_fraction = preallocate_highmem_fraction(alloc, highmem, count);
+ printk(KERN_INFO "PM: preallocate_highmem_fraction %lu %lu %lu -> %lu free(%u/%lu)\n",
+ alloc, highmem, count, pages_fraction, nr_free_highpages(), global_page_state(NR_FREE_PAGES));
+
+ pages_highmem += pages_fraction;
+ alloc -= pages_fraction;
pages += preallocate_image_memory(alloc);
+ printk(KERN_INFO "PM: preallocate_image_memory %lu %lu free(%u/%lu)\n",
+ alloc, pages, nr_free_highpages(), global_page_state(NR_FREE_PAGES));
pages += pages_highmem;

/*
@@ -1351,18 +1380,20 @@ int hibernate_preallocate_memory(void)
* pages in memory, but we have allocated more. Release the excessive
* ones now.
*/
+ printk(KERN_INFO "PM: free_unnecessary_pages() \n");
free_unnecessary_pages();

out:
do_gettimeofday(&stop);
- printk(KERN_CONT "done (allocated %lu pages)\n", pages);
+ printk(KERN_INFO "done (allocated %lu pages)\n", pages);
swsusp_show_speed(&start, &stop, pages, "Allocated");

return 0;

err_out:
- printk(KERN_CONT "\n");
+ printk(KERN_INFO "PM: err_out \n");
swsusp_free();
+ printk(KERN_INFO "PM: swsusp_free() end \n");
return -ENOMEM;
}

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d24f761..3b72836 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -961,6 +961,43 @@ static struct ctl_table kern_table[] = {
{ }
};

+static int sysctl_shrink_all_memory;
+static int shrink_all_memory_handler(ctl_table *table, int write,
+ void __user *buffer, size_t *length,
+ loff_t *ppos)
+{
+ proc_dointvec_minmax(table, write, buffer, length, ppos);
+ if (write) {
+ shrink_all_memory(sysctl_shrink_all_memory);
+ }
+ return 0;
+}
+
+#include <linux/mm_types.h>
+#include <linux/mmzone.h>
+
+static int reset_reclaim_stat_handler(ctl_table *table, int write,
+ void __user *buffer, size_t *length,
+ loff_t *ppos)
+{
+ struct zone *zone;
+ struct zone_reclaim_stat *rstat;
+
+ if (write) {
+ for_each_populated_zone(zone) {
+ spin_lock_irq(&zone->lru_lock);
+ rstat = &zone->reclaim_stat;
+ rstat->recent_scanned[0] = 0;
+ rstat->recent_rotated[0] = 0;
+ rstat->recent_scanned[1] = 0;
+ rstat->recent_rotated[1] = 0;
+ spin_unlock_irq(&zone->lru_lock);
+ }
+
+ }
+ return 0;
+}
+
static struct ctl_table vm_table[] = {
{
.procname = "overcommit_memory",
@@ -1318,6 +1355,20 @@ static struct ctl_table vm_table[] = {
.extra2 = &one,
},
#endif
+ {
+ .procname = "shrink_all_memory",
+ .data = &sysctl_shrink_all_memory,
+ .maxlen = sizeof(sysctl_shrink_all_memory),
+ .mode = 0644,
+ .proc_handler = shrink_all_memory_handler,
+ },
+ {
+ .procname = "reset_reclaim_stat",
+ .data = &sysctl_shrink_all_memory,
+ .maxlen = sizeof(sysctl_shrink_all_memory),
+ .mode = 0644,
+ .proc_handler = reset_reclaim_stat_handler,
+ },

/*
* NOTE: do not add new entries to this table unless you have read
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b94fe1b..100282c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -40,6 +40,7 @@
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <linux/sysctl.h>
+#include <linux/oom.h>

#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -611,7 +612,7 @@ static enum page_references page_check_references(struct page *page,
}

/* Reclaim if clean, defer dirty pages to writeback */
- if (referenced_page)
+ if (referenced_page && !PageSwapBacked(page))
return PAGEREF_RECLAIM_CLEAN;

return PAGEREF_RECLAIM;
@@ -1879,7 +1880,7 @@ out:
return sc->nr_reclaimed;

/* top priority shrink_zones still had more to do? don't OOM, then */
- if (scanning_global_lru(sc) && !all_unreclaimable)
+ if (scanning_global_lru(sc) && !all_unreclaimable && !oom_killer_disabled)
return 1;

return 0;
@@ -2395,6 +2396,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
*/
unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
{
+ int i;
struct reclaim_state reclaim_state;
struct scan_control sc = {
.gfp_mask = GFP_HIGHUSER_MOVABLE,
@@ -2410,17 +2412,36 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
struct task_struct *p = current;
unsigned long nr_reclaimed;

+ printk(KERN_ERR "shrink_all_memory start\n");
+
p->flags |= PF_MEMALLOC;
lockdep_set_current_reclaim_state(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;

- nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+ for (i = 1; i <= 5; i++) {
+ nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+
+ printk(KERN_INFO "PM: shrink memory: pass=%d, req:%ld reclaimed:%ld free:%ld\n",
+ i,
+ nr_to_reclaim,
+ nr_reclaimed,
+ global_page_state(NR_FREE_PAGES));
+
+ if (nr_to_reclaim <= nr_reclaimed)
+ break;
+ nr_to_reclaim -= nr_reclaimed;
+ }

p->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
p->flags &= ~PF_MEMALLOC;

+ printk(KERN_ERR "shrink_all_memory: req:%ld reclaimed:%ld free:%ld\n",
+ nr_to_reclaim,
+ nr_reclaimed,
+ global_page_state(NR_FREE_PAGES));
+
return nr_reclaimed;
}
#endif /* CONFIG_HIBERNATION */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7759941..bee3ba3 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -860,6 +860,33 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
zone->prev_priority,
zone->zone_start_pfn,
zone->inactive_ratio);
+
+ {
+ extern int vm_swappiness;
+ int anon_prio = vm_swappiness;
+ int file_prio = 200 - vm_swappiness;
+ unsigned long ap;
+ unsigned long fp;
+ struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
+
+ ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
+ ap /= reclaim_stat->recent_rotated[0] + 1;
+ fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
+ fp /= reclaim_stat->recent_rotated[1] + 1;
+
+ seq_printf(m,
+ "\n recent_anon_rotated: %lu"
+ "\n recent_anon_scanned: %lu"
+ "\n recent_file_rotated: %lu"
+ "\n recent_file_scanned: %lu"
+ "\n anon_ratio: %lu"
+ ,
+ zone->reclaim_stat.recent_rotated[0],
+ zone->reclaim_stat.recent_scanned[0],
+ zone->reclaim_stat.recent_rotated[1],
+ zone->reclaim_stat.recent_scanned[1],
+ (ap * 100) / (ap + fp +1));
+ }
seq_putc(m, '\n');
}