Re: [PATCH] mm: fix a regression with HIGHMEM introduced bychangeset 7f1290f2f2a4d

From: Andrew Morton
Date: Thu Nov 15 2012 - 14:24:45 EST


On Wed, 14 Nov 2012 22:52:03 +0800
Jiang Liu <liuj97@xxxxxxxxx> wrote:

> So how about totally reverting the changeset 7f1290f2f2a4d2c3f1b7ce8e87256e052ca23125
> and I will post another version once I found a cleaner way?

We do need to get this regression fixed and I guess that a
straightforward revert is an acceptable way of doing that, for now.


I queued the below, with a plan to send it to Linus next week.


From: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Subject: revert "mm: fix-up zone present pages"

Revert

commit 7f1290f2f2a4d2c3f1b7ce8e87256e052ca23125
Author: Jianguo Wu <wujianguo@xxxxxxxxxx>
AuthorDate: Mon Oct 8 16:33:06 2012 -0700
Commit: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
CommitDate: Tue Oct 9 16:22:54 2012 +0900

mm: fix-up zone present pages


That patch tried to fix a issue when calculating zone->present_pages, but
it caused a regression on 32bit systems with HIGHMEM. With that
changeset, reset_zone_present_pages() resets all zone->present_pages to
zero, and fixup_zone_present_pages() is called to recalculate
zone->present_pages when the boot allocator frees core memory pages into
buddy allocator. Because highmem pages are not freed by bootmem
allocator, all highmem zones' present_pages becomes zero.

Various options for improving the situation are being discussed but for
now, let's return to the 3.6 code.

Cc: Jianguo Wu <wujianguo@xxxxxxxxxx>
Cc: Jiang Liu <jiang.liu@xxxxxxxxxx>
Cc: Petr Tesarik <ptesarik@xxxxxxx>
Cc: "Luck, Tony" <tony.luck@xxxxxxxxx>
Cc: Mel Gorman <mel@xxxxxxxxx>
Cc: Yinghai Lu <yinghai@xxxxxxxxxx>
Cc: Minchan Kim <minchan.kim@xxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

arch/ia64/mm/init.c | 1 -
include/linux/mm.h | 4 ----
mm/bootmem.c | 10 +---------
mm/memory_hotplug.c | 7 -------
mm/nobootmem.c | 3 ---
mm/page_alloc.c | 34 ----------------------------------
6 files changed, 1 insertion(+), 58 deletions(-)

diff -puN arch/ia64/mm/init.c~revert-1 arch/ia64/mm/init.c
--- a/arch/ia64/mm/init.c~revert-1
+++ a/arch/ia64/mm/init.c
@@ -637,7 +637,6 @@ mem_init (void)

high_memory = __va(max_low_pfn * PAGE_SIZE);

- reset_zone_present_pages();
for_each_online_pgdat(pgdat)
if (pgdat->bdata->node_bootmem_map)
totalram_pages += free_all_bootmem_node(pgdat);
diff -puN include/linux/mm.h~revert-1 include/linux/mm.h
--- a/include/linux/mm.h~revert-1
+++ a/include/linux/mm.h
@@ -1684,9 +1684,5 @@ static inline unsigned int debug_guardpa
static inline bool page_is_guard(struct page *page) { return false; }
#endif /* CONFIG_DEBUG_PAGEALLOC */

-extern void reset_zone_present_pages(void);
-extern void fixup_zone_present_pages(int nid, unsigned long start_pfn,
- unsigned long end_pfn);
-
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff -puN mm/bootmem.c~revert-1 mm/bootmem.c
--- a/mm/bootmem.c~revert-1
+++ a/mm/bootmem.c
@@ -198,8 +198,6 @@ static unsigned long __init free_all_boo
int order = ilog2(BITS_PER_LONG);

__free_pages_bootmem(pfn_to_page(start), order);
- fixup_zone_present_pages(page_to_nid(pfn_to_page(start)),
- start, start + BITS_PER_LONG);
count += BITS_PER_LONG;
start += BITS_PER_LONG;
} else {
@@ -210,9 +208,6 @@ static unsigned long __init free_all_boo
if (vec & 1) {
page = pfn_to_page(start + off);
__free_pages_bootmem(page, 0);
- fixup_zone_present_pages(
- page_to_nid(page),
- start + off, start + off + 1);
count++;
}
vec >>= 1;
@@ -226,11 +221,8 @@ static unsigned long __init free_all_boo
pages = bdata->node_low_pfn - bdata->node_min_pfn;
pages = bootmem_bootmap_pages(pages);
count += pages;
- while (pages--) {
- fixup_zone_present_pages(page_to_nid(page),
- page_to_pfn(page), page_to_pfn(page) + 1);
+ while (pages--)
__free_pages_bootmem(page++, 0);
- }

bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);

diff -puN mm/memory_hotplug.c~revert-1 mm/memory_hotplug.c
--- a/mm/memory_hotplug.c~revert-1
+++ a/mm/memory_hotplug.c
@@ -106,7 +106,6 @@ static void get_page_bootmem(unsigned lo
void __ref put_page_bootmem(struct page *page)
{
unsigned long type;
- struct zone *zone;

type = (unsigned long) page->lru.next;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
@@ -117,12 +116,6 @@ void __ref put_page_bootmem(struct page
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
__free_pages_bootmem(page, 0);
-
- zone = page_zone(page);
- zone_span_writelock(zone);
- zone->present_pages++;
- zone_span_writeunlock(zone);
- totalram_pages++;
}

}
diff -puN mm/nobootmem.c~revert-1 mm/nobootmem.c
--- a/mm/nobootmem.c~revert-1
+++ a/mm/nobootmem.c
@@ -116,8 +116,6 @@ static unsigned long __init __free_memor
return 0;

__free_pages_memory(start_pfn, end_pfn);
- fixup_zone_present_pages(pfn_to_nid(start >> PAGE_SHIFT),
- start_pfn, end_pfn);

return end_pfn - start_pfn;
}
@@ -128,7 +126,6 @@ unsigned long __init free_low_memory_cor
phys_addr_t start, end, size;
u64 i;

- reset_zone_present_pages();
for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL)
count += __free_memory_core(start, end);

diff -puN mm/page_alloc.c~revert-1 mm/page_alloc.c
--- a/mm/page_alloc.c~revert-1
+++ a/mm/page_alloc.c
@@ -6098,37 +6098,3 @@ void dump_page(struct page *page)
dump_page_flags(page->flags);
mem_cgroup_print_bad_page(page);
}
-
-/* reset zone->present_pages */
-void reset_zone_present_pages(void)
-{
- struct zone *z;
- int i, nid;
-
- for_each_node_state(nid, N_HIGH_MEMORY) {
- for (i = 0; i < MAX_NR_ZONES; i++) {
- z = NODE_DATA(nid)->node_zones + i;
- z->present_pages = 0;
- }
- }
-}
-
-/* calculate zone's present pages in buddy system */
-void fixup_zone_present_pages(int nid, unsigned long start_pfn,
- unsigned long end_pfn)
-{
- struct zone *z;
- unsigned long zone_start_pfn, zone_end_pfn;
- int i;
-
- for (i = 0; i < MAX_NR_ZONES; i++) {
- z = NODE_DATA(nid)->node_zones + i;
- zone_start_pfn = z->zone_start_pfn;
- zone_end_pfn = zone_start_pfn + z->spanned_pages;
-
- /* if the two regions intersect */
- if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn))
- z->present_pages += min(end_pfn, zone_end_pfn) -
- max(start_pfn, zone_start_pfn);
- }
-}
_

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/