[PATCH 1/3] makedumpfile: hugepage filtering: add hugepagefiltering functions

From: Jingbai Ma
Date: Tue Nov 05 2013 - 08:45:54 EST


Add functions to exclude hugepage from vmcore dump.

Signed-off-by: Jingbai Ma <jingbai.ma@xxxxxx>
---
makedumpfile.c | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
makedumpfile.h | 19 ++++
2 files changed, 289 insertions(+), 2 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index b42565c..f0b2531 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -46,6 +46,8 @@ unsigned long long pfn_cache_private;
unsigned long long pfn_user;
unsigned long long pfn_free;
unsigned long long pfn_hwpoison;
+unsigned long long pfn_free_huge;
+unsigned long long pfn_active_huge;

unsigned long long num_dumped;

@@ -1038,6 +1040,7 @@ get_symbol_info(void)
SYMBOL_INIT(mem_map, "mem_map");
SYMBOL_INIT(vmem_map, "vmem_map");
SYMBOL_INIT(mem_section, "mem_section");
+ SYMBOL_INIT(hstates, "hstates");
SYMBOL_INIT(pkmap_count, "pkmap_count");
SYMBOL_INIT_NEXT(pkmap_count_next, "pkmap_count");
SYMBOL_INIT(system_utsname, "system_utsname");
@@ -1174,6 +1177,19 @@ get_structure_info(void)
OFFSET_INIT(list_head.prev, "list_head", "prev");

/*
+ * Get offsets of the hstate's members.
+ */
+ SIZE_INIT(hstate, "hstate");
+ OFFSET_INIT(hstate.order, "hstate", "order");
+ OFFSET_INIT(hstate.nr_huge_pages, "hstate", "nr_huge_pages");
+ OFFSET_INIT(hstate.free_huge_pages, "hstate", "free_huge_pages");
+ OFFSET_INIT(hstate.hugepage_activelist, "hstate",
+ "hugepage_activelist");
+ OFFSET_INIT(hstate.hugepage_freelists, "hstate", "hugepage_freelists");
+ MEMBER_ARRAY_LENGTH_INIT(hstate.hugepage_freelists, "hstate",
+ "hugepage_freelists");
+
+ /*
* Get offsets of the node_memblk_s's members.
*/
SIZE_INIT(node_memblk_s, "node_memblk_s");
@@ -1555,6 +1571,7 @@ write_vmcoreinfo_data(void)
WRITE_SYMBOL("mem_map", mem_map);
WRITE_SYMBOL("vmem_map", vmem_map);
WRITE_SYMBOL("mem_section", mem_section);
+ WRITE_SYMBOL("hstates", hstates);
WRITE_SYMBOL("pkmap_count", pkmap_count);
WRITE_SYMBOL("pkmap_count_next", pkmap_count_next);
WRITE_SYMBOL("system_utsname", system_utsname);
@@ -1590,6 +1607,7 @@ write_vmcoreinfo_data(void)
WRITE_STRUCTURE_SIZE("zone", zone);
WRITE_STRUCTURE_SIZE("free_area", free_area);
WRITE_STRUCTURE_SIZE("list_head", list_head);
+ WRITE_STRUCTURE_SIZE("hstate", hstate);
WRITE_STRUCTURE_SIZE("node_memblk_s", node_memblk_s);
WRITE_STRUCTURE_SIZE("nodemask_t", nodemask_t);
WRITE_STRUCTURE_SIZE("pageflags", pageflags);
@@ -1628,6 +1646,13 @@ write_vmcoreinfo_data(void)
WRITE_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr);
WRITE_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start);
WRITE_MEMBER_OFFSET("vmap_area.list", vmap_area.list);
+ WRITE_MEMBER_OFFSET("hstate.order", hstate.order);
+ WRITE_MEMBER_OFFSET("hstate.nr_huge_pages", hstate.nr_huge_pages);
+ WRITE_MEMBER_OFFSET("hstate.free_huge_pages", hstate.free_huge_pages);
+ WRITE_MEMBER_OFFSET("hstate.hugepage_activelist",
+ hstate.hugepage_activelist);
+ WRITE_MEMBER_OFFSET("hstate.hugepage_freelists",
+ hstate.hugepage_freelists);
WRITE_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec);
WRITE_MEMBER_OFFSET("log.len", log.len);
WRITE_MEMBER_OFFSET("log.text_len", log.text_len);
@@ -1647,6 +1672,9 @@ write_vmcoreinfo_data(void)
WRITE_ARRAY_LENGTH("zone.free_area", zone.free_area);
WRITE_ARRAY_LENGTH("free_area.free_list", free_area.free_list);

+ WRITE_ARRAY_LENGTH("hstate.hugepage_freelists",
+ hstate.hugepage_freelists);
+
WRITE_NUMBER("NR_FREE_PAGES", NR_FREE_PAGES);
WRITE_NUMBER("N_ONLINE", N_ONLINE);

@@ -1659,6 +1687,8 @@ write_vmcoreinfo_data(void)

WRITE_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE);

+ WRITE_NUMBER("HUGE_MAX_HSTATE", HUGE_MAX_HSTATE);
+
/*
* write the source file of 1st kernel
*/
@@ -1874,6 +1904,7 @@ read_vmcoreinfo(void)
READ_SYMBOL("mem_map", mem_map);
READ_SYMBOL("vmem_map", vmem_map);
READ_SYMBOL("mem_section", mem_section);
+ READ_SYMBOL("hstates", hstates);
READ_SYMBOL("pkmap_count", pkmap_count);
READ_SYMBOL("pkmap_count_next", pkmap_count_next);
READ_SYMBOL("system_utsname", system_utsname);
@@ -1906,6 +1937,7 @@ read_vmcoreinfo(void)
READ_STRUCTURE_SIZE("zone", zone);
READ_STRUCTURE_SIZE("free_area", free_area);
READ_STRUCTURE_SIZE("list_head", list_head);
+ READ_STRUCTURE_SIZE("hstate", hstate);
READ_STRUCTURE_SIZE("node_memblk_s", node_memblk_s);
READ_STRUCTURE_SIZE("nodemask_t", nodemask_t);
READ_STRUCTURE_SIZE("pageflags", pageflags);
@@ -1940,6 +1972,13 @@ read_vmcoreinfo(void)
READ_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr);
READ_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start);
READ_MEMBER_OFFSET("vmap_area.list", vmap_area.list);
+ READ_MEMBER_OFFSET("hstate.order", hstate.order);
+ READ_MEMBER_OFFSET("hstate.nr_huge_pages", hstate.nr_huge_pages);
+ READ_MEMBER_OFFSET("hstate.free_huge_pages", hstate.free_huge_pages);
+ READ_MEMBER_OFFSET("hstate.hugepage_activelist",
+ hstate.hugepage_activelist);
+ READ_MEMBER_OFFSET("hstate.hugepage_freelists",
+ hstate.hugepage_freelists);
READ_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec);
READ_MEMBER_OFFSET("log.len", log.len);
READ_MEMBER_OFFSET("log.text_len", log.text_len);
@@ -1950,6 +1989,8 @@ read_vmcoreinfo(void)
READ_ARRAY_LENGTH("node_memblk", node_memblk);
READ_ARRAY_LENGTH("zone.free_area", zone.free_area);
READ_ARRAY_LENGTH("free_area.free_list", free_area.free_list);
+ READ_ARRAY_LENGTH("hstate.hugepage_freelists",
+ hstate.hugepage_freelists);
READ_ARRAY_LENGTH("node_remap_start_pfn", node_remap_start_pfn);

READ_NUMBER("NR_FREE_PAGES", NR_FREE_PAGES);
@@ -1966,6 +2007,8 @@ read_vmcoreinfo(void)

READ_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE);

+ READ_NUMBER("HUGE_MAX_HSTATE", HUGE_MAX_HSTATE);
+
return TRUE;
}

@@ -4040,6 +4083,214 @@ exclude_free_page(void)
return TRUE;
}

+inline int
+clear_huge_page(unsigned long long pfn, unsigned int order)
+{
+ unsigned int i;
+
+ DEBUG_MSG("Exclude huge page. start pfn: %lld, order: %d\n",
+ pfn, order);
+
+ for (i = 0; i < (1 << order); i++) {
+ if (!clear_bit_on_2nd_bitmap_for_kernel(pfn + i)) {
+ ERRMSG("Can't clear 2nd bitmap! pfn=0x%llx\n", pfn + i);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+int
+_exclude_huge_page(void)
+{
+ int i, node, freelist_length;
+ unsigned long curr_hstate, curr_page, head, curr, previous, curr_prev;
+ struct timeval tv_start;
+ unsigned long long pfn;
+ unsigned int order;
+ unsigned long nr_huge_pages, free_huge_pages, active_huge_pages;
+
+ freelist_length = ARRAY_LENGTH(hstate.hugepage_freelists);
+ /* Exclude free huge pages */
+ if (info->dump_level & (DL_EXCLUDE_FREE_HUGE
+ | DL_EXCLUDE_ACTIVE_HUGE)) {
+ gettimeofday(&tv_start, NULL);
+ for (i = 0; i < NUMBER(HUGE_MAX_HSTATE); i++) {
+ curr_hstate = SYMBOL(hstates) + SIZE(hstate) * i;
+ /* Read order */
+ if (!readmem(VADDR,
+ curr_hstate + OFFSET(hstate.order),
+ &order, sizeof(order))) {
+ ERRMSG("Can't get hstate.order!");
+ return FALSE;
+ }
+ /* Read free_huge_pages */
+ if (!readmem(VADDR,
+ curr_hstate + OFFSET(hstate.free_huge_pages),
+ &free_huge_pages, sizeof(free_huge_pages))) {
+ ERRMSG("Can't get hstate.free_huge_pages!");
+ return FALSE;
+ }
+ for (node = 0; node < freelist_length; node++) {
+ /* head = hstate.hugepage_freelists[node] */
+ head = curr_hstate
+ + OFFSET(hstate.hugepage_freelists)
+ + SIZE(list_head) * node;
+ if (!readmem(VADDR,
+ head + OFFSET(list_head.next),
+ &curr, sizeof(curr))) {
+ ERRMSG("Can't get free list!");
+ return FALSE;
+ }
+ curr_prev = head;
+ /* Walking free list of the node */
+ while (head != curr && curr != 0) {
+ print_progress(PROGRESS_FREE_HUGE,
+ pfn_free_huge, free_huge_pages);
+ if (!readmem(VADDR,
+ curr + OFFSET(list_head.prev),
+ &previous, sizeof(previous))) {
+ ERRMSG("Can't get free list!");
+ return FALSE;
+ }
+ if (previous != curr_prev) {
+ ERRMSG("Free list is broken!");
+ return FALSE;
+ }
+ curr_page = curr - OFFSET(page.lru);
+ pfn = page_to_pfn(curr_page);
+ if (!clear_huge_page(pfn, order))
+ return FALSE;
+ pfn_free_huge++;
+ curr_prev = curr;
+ if (!readmem(VADDR,
+ curr + OFFSET(list_head.next),
+ &curr, sizeof(curr))) {
+ ERRMSG("Can't get free list!");
+ return FALSE;
+ }
+ }
+ }
+ }
+ /*
+ * print [100 %]
+ */
+ print_progress(PROGRESS_FREE_HUGE, 1, 1);
+ print_execution_time(PROGRESS_FREE_HUGE, &tv_start);
+ }
+
+ /* Exclude active huge pages */
+ if (info->dump_level & DL_EXCLUDE_ACTIVE_HUGE) {
+ gettimeofday(&tv_start, NULL);
+ for (i = 0; i < NUMBER(HUGE_MAX_HSTATE); i++) {
+ curr_hstate = SYMBOL(hstates) + SIZE(hstate) * i;
+ /* Read order */
+ if (!readmem(VADDR,
+ curr_hstate + OFFSET(hstate.order),
+ &order, sizeof(order))) {
+ ERRMSG("Can't get hstate.order!");
+ return FALSE;
+ }
+ /* Read nr_huge_pages */
+ if (!readmem(VADDR,
+ curr_hstate + OFFSET(hstate.nr_huge_pages),
+ &nr_huge_pages, sizeof(nr_huge_pages))) {
+ ERRMSG("Can't get hstate.nr_huge_pages!");
+ return FALSE;
+ }
+ /* Read free_huge_pages */
+ if (!readmem(VADDR,
+ curr_hstate + OFFSET(hstate.free_huge_pages),
+ &free_huge_pages, sizeof(free_huge_pages))) {
+ ERRMSG("Can't get hstate.free_huge_pages!");
+ return FALSE;
+ }
+ if (nr_huge_pages < free_huge_pages) {
+ ERRMSG("nr_huge_pages < free_huge_pages!");
+ return FALSE;
+ }
+ active_huge_pages = nr_huge_pages - free_huge_pages;
+ /* head = hstate.hugepage_freelists[node] */
+ head = curr_hstate + OFFSET(hstate.hugepage_activelist);
+ if (!readmem(VADDR, head + OFFSET(list_head.next),
+ &curr, sizeof(curr))) {
+ ERRMSG("Can't get active list!");
+ }
+ curr_prev = head;
+ /* Walking active list */
+ while (head != curr && curr != 0) {
+ print_progress(PROGRESS_ACTIVE_HUGE,
+ pfn_active_huge,
+ active_huge_pages);
+ if (!readmem(VADDR,
+ curr + OFFSET(list_head.prev),
+ &previous, sizeof(previous))) {
+ ERRMSG("Can't get active list!");
+ return FALSE;
+ }
+ if (previous != curr_prev) {
+ ERRMSG("Active list is broken!");
+ return FALSE;
+ }
+ curr_page = curr - OFFSET(page.lru);
+ pfn = page_to_pfn(curr_page);
+ if (!clear_huge_page(pfn, order))
+ return FALSE;
+ pfn_active_huge++;
+ curr_prev = curr;
+ if (!readmem(VADDR,
+ curr + OFFSET(list_head.next),
+ &curr, sizeof(curr))) {
+ ERRMSG("Can't get active list!");
+ return FALSE;
+ }
+ }
+ }
+ /*
+ * print [100 %]
+ */
+ print_progress(PROGRESS_ACTIVE_HUGE, 1, 1);
+ print_execution_time(PROGRESS_ACTIVE_HUGE, &tv_start);
+ }
+
+ DEBUG_MSG("\n");
+ DEBUG_MSG("free huge pages : %lld\n", pfn_free_huge);
+ DEBUG_MSG("active huge pages: %lld\n", pfn_active_huge);
+
+ return TRUE;
+}
+
+int
+exclude_huge_page(void)
+{
+ /*
+ * Check having necessary information.
+ */
+ if (SYMBOL(hstates) == NOT_FOUND_SYMBOL)
+ ERRMSG("Can't get necessary symbols for huge pages.\n");
+
+ if ((SIZE(hstate) == NOT_FOUND_STRUCTURE)
+ || (OFFSET(hstate.order) == NOT_FOUND_STRUCTURE)
+ || (OFFSET(hstate.nr_huge_pages) == NOT_FOUND_STRUCTURE)
+ || (OFFSET(hstate.free_huge_pages) == NOT_FOUND_STRUCTURE)
+ || (OFFSET(hstate.hugepage_activelist) == NOT_FOUND_STRUCTURE)
+ || (OFFSET(hstate.hugepage_freelists) == NOT_FOUND_STRUCTURE)
+ || (ARRAY_LENGTH(hstate.hugepage_freelists)
+ == NOT_FOUND_STRUCTURE)) {
+ ERRMSG("Can't get necessary structures for huge pages.\n");
+ return FALSE;
+ }
+
+ /*
+ * Detect huge pages and update 2nd-bitmap.
+ */
+ if (!_exclude_huge_page())
+ return FALSE;
+
+ return TRUE;
+}
+
/*
* Let C be a cyclic buffer size and B a bitmap size used for
* representing maximum block size managed by buddy allocator.
@@ -4532,6 +4783,13 @@ exclude_unnecessary_pages_cyclic(void)
return FALSE;

/*
+ * Exclude huge pages.
+ */
+ if (info->dump_level & (DL_EXCLUDE_FREE_HUGE | DL_EXCLUDE_ACTIVE_HUGE))
+ if (!exclude_huge_page())
+ return FALSE;
+
+ /*
* Exclude cache pages, cache private pages, user data pages,
* free pages and hwpoison pages.
*/
@@ -4661,6 +4919,13 @@ create_2nd_bitmap(void)
return FALSE;

/*
+ * Exclude huge pages.
+ */
+ if (info->dump_level & (DL_EXCLUDE_FREE_HUGE | DL_EXCLUDE_ACTIVE_HUGE))
+ if (!exclude_huge_page())
+ return FALSE;
+
+ /*
* Exclude Xen user domain.
*/
if (info->flag_exclude_xen_dom) {
@@ -6513,6 +6778,7 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d
*/
pfn_zero = pfn_cache = pfn_cache_private = 0;
pfn_user = pfn_free = pfn_hwpoison = 0;
+ pfn_free_huge = pfn_active_huge = 0;
pfn_memhole = info->max_mapnr;

cd_header->offset
@@ -7416,7 +7682,8 @@ print_report(void)
pfn_original = info->max_mapnr - pfn_memhole;

pfn_excluded = pfn_zero + pfn_cache + pfn_cache_private
- + pfn_user + pfn_free + pfn_hwpoison;
+ + pfn_user + pfn_free + pfn_hwpoison
+ + pfn_free_huge + pfn_active_huge;
shrinking = (pfn_original - pfn_excluded) * 100;
shrinking = shrinking / pfn_original;

@@ -7429,6 +7696,9 @@ print_report(void)
pfn_cache_private);
REPORT_MSG(" User process data pages : 0x%016llx\n", pfn_user);
REPORT_MSG(" Free pages : 0x%016llx\n", pfn_free);
+ REPORT_MSG(" Free hugepage pages : 0x%016llx\n", pfn_free_huge);
+ REPORT_MSG(" Active hugepage pages : 0x%016llx\n",
+ pfn_active_huge);
REPORT_MSG(" Hwpoison pages : 0x%016llx\n", pfn_hwpoison);
REPORT_MSG(" Remaining pages : 0x%016llx\n",
pfn_original - pfn_excluded);
diff --git a/makedumpfile.h b/makedumpfile.h
index a5826e0..1a0a5fa 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -178,7 +178,7 @@ isAnon(unsigned long mapping)
* Dump Level
*/
#define MIN_DUMP_LEVEL (0)
-#define MAX_DUMP_LEVEL (31)
+#define MAX_DUMP_LEVEL (127)
#define NUM_ARRAY_DUMP_LEVEL (MAX_DUMP_LEVEL + 1) /* enough to allocate
all the dump_level */
#define DL_EXCLUDE_ZERO (0x001) /* Exclude Pages filled with Zeros */
@@ -189,6 +189,9 @@ isAnon(unsigned long mapping)
#define DL_EXCLUDE_USER_DATA (0x008) /* Exclude UserProcessData Pages */
#define DL_EXCLUDE_FREE (0x010) /* Exclude Free Pages */

+#define DL_EXCLUDE_FREE_HUGE (0x020) /* Exclude Free Huge Pages */
+#define DL_EXCLUDE_ACTIVE_HUGE (0x040) /* Exclude Active Huge Pages */
+

/*
* For parse_line()
@@ -1098,6 +1101,7 @@ struct symbol_table {
unsigned long long mem_map;
unsigned long long vmem_map;
unsigned long long mem_section;
+ unsigned long long hstates;
unsigned long long pkmap_count;
unsigned long long pkmap_count_next;
unsigned long long system_utsname;
@@ -1174,6 +1178,7 @@ struct size_table {
long zone;
long free_area;
long list_head;
+ long hstate;
long node_memblk_s;
long nodemask_t;

@@ -1232,6 +1237,13 @@ struct offset_table {
struct free_area {
long free_list;
} free_area;
+ struct hstate {
+ long order;
+ long nr_huge_pages;
+ long free_huge_pages;
+ long hugepage_activelist;
+ long hugepage_freelists;
+ } hstate;
struct list_head {
long next;
long prev;
@@ -1368,6 +1380,9 @@ struct array_table {
struct free_area_at {
long free_list;
} free_area;
+ struct hstate_at {
+ long hugepage_freelists;
+ } hstate;
struct kimage_at {
long segment;
} kimage;
@@ -1388,6 +1403,8 @@ struct number_table {
long PG_hwpoison;

long PAGE_BUDDY_MAPCOUNT_VALUE;
+
+ long HUGE_MAX_HSTATE;
};

struct srcfile_table {

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/