[patch 5/6] memory hotplug for hugetlbpages

From: Hirokazu Takahashi
Date: Tue Apr 06 2004 - 07:53:02 EST


This is a part 5 of memory hotplug patches for hugetlbpages.

--- linux-2.6.5.ORG/include/linux/hugetlb.h Tue Apr 6 22:28:09 2032
+++ linux-2.6.5/include/linux/hugetlb.h Tue Apr 6 15:00:59 2032
@@ -31,6 +31,7 @@ int pmd_huge(pmd_t pmd);
extern int hugetlb_fault(struct mm_struct *, struct vm_area_struct *,
int, unsigned long);
int try_to_unmap_hugepage(struct page *, pte_addr_t, struct list_head *);
+int remap_hugetlb_pages(struct zone *);

extern int htlbpage_max;

@@ -83,6 +84,7 @@ static inline unsigned long hugetlb_tota
#define hugetlb_free_pgtables(tlb, prev, start, end) do { } while (0)
#define hugetlb_fault(mm, vma, write, addr) 0
#define try_to_unmap_hugepage(page, paddr, force) 0
+#define remap_hugetlb_pages(zone) 0

#ifndef HPAGE_MASK
#define HPAGE_MASK 0 /* Keep the compiler happy */
--- linux-2.6.5.ORG/arch/i386/mm/hugetlbpage.c Tue Apr 6 22:28:09 2032
+++ linux-2.6.5/arch/i386/mm/hugetlbpage.c Tue Apr 6 22:30:59 2032
@@ -13,6 +13,7 @@
#include <linux/smp_lock.h>
#include <linux/slab.h>
#include <linux/rmap-locking.h>
+#include <linux/memhotplug.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/sysctl.h>
@@ -92,7 +93,10 @@ static struct page *dequeue_huge_page(vo
static struct page *alloc_fresh_huge_page(void)
{
static int nid = 0;
+ struct pglist_data *pgdat;
struct page *page;
+ while ((pgdat = NODE_DATA(nid)) == NULL || !pgdat->enabled)
+ nid = (nid + 1) % numnodes;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes;
return page;
@@ -114,6 +118,8 @@ static struct page *alloc_hugetlb_page(v
htlbpagemem--;
spin_unlock(&htlbpage_lock);
set_page_count(page, 1);
+ page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
+ 1 << PG_referenced | 1 << PG_again);
page->lru.prev = (void *)free_huge_page;
for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
clear_highpage(&page[i]);
@@ -468,6 +474,15 @@ again:
goto again;
}
}
+
+ if (page->mapping == NULL) {
+ BUG_ON(! PageAgain(page));
+ /* This page will go back to freelists[] */
+ huge_page_release(page); /* XXX */
+ unlock_page(page);
+ goto again;
+ }
+
spin_lock(&mm->page_table_lock);
if (pte_none(*pte)) {
set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
@@ -614,7 +629,7 @@ static void update_and_free_page(struct
__free_pages(page, HUGETLB_PAGE_ORDER);
}

-static int try_to_free_low(int count)
+int try_to_free_hugepages(int idx, int count, struct zone *zone)
{
struct list_head *p;
struct page *page, *map;
@@ -622,7 +637,7 @@ static int try_to_free_low(int count)
map = NULL;
spin_lock(&htlbpage_lock);
/* all lowmem is on node 0 */
- list_for_each(p, &hugepage_freelists[0]) {
+ list_for_each(p, &hugepage_freelists[idx]) {
if (map) {
list_del(&map->list);
unregister_huge_page(map);
@@ -633,7 +648,8 @@ static int try_to_free_low(int count)
break;
}
page = list_entry(p, struct page, list);
- if (!PageHighMem(page))
+ if ((zone == NULL && !PageHighMem(page)) ||
+ (page_zone(page) == zone))
map = page;
}
if (map) {
@@ -647,6 +663,11 @@ static int try_to_free_low(int count)
return count;
}

+int try_to_free_low(int count)
+{
+ return try_to_free_hugepages(0, count, NULL);
+}
+
static int set_hugetlb_mem_size(int count)
{
int lcount;
@@ -686,6 +707,146 @@ static int set_hugetlb_mem_size(int coun
}
return (int) htlbzone_pages;
}
+
+#ifdef CONFIG_MEMHOTPLUG
+static int copy_hugepage(struct page *to, struct page *from)
+{
+ int size;
+ for (size = 0; size < HPAGE_SIZE; size += PAGE_SIZE) {
+ copy_highpage(to, from);
+ to++;
+ from++;
+ }
+ return 0;
+}
+
+/*
+ * Allocate a hugepage from Buddy system directly.
+ */
+static struct page *
+hugepage_remap_alloc(int nid)
+{
+ struct page *page;
+ /*
+ * ToDo:
+ * - NUMA aware page allocation is required. we should allocate
+ * a hugepage from the node which the process depends on.
+ * - New hugepages should be preallocated prior to remapping pages
+ * so that lack of memory can be found before them.
+ * - New hugepages should be allocate from the node specified by nid.
+ */
+ page = alloc_fresh_huge_page();
+
+ if (page == NULL) {
+ printk(KERN_WARNING "remap: Failed to allocate new hugepage\n");
+ } else {
+ spin_lock(&htlbpage_lock);
+ register_huge_page(page);
+ enqueue_huge_page(page);
+ htlbpagemem++;
+ htlbzone_pages++;
+ spin_unlock(&htlbpage_lock);
+ }
+ page = alloc_hugetlb_page();
+ unregister_huge_page(page); /* XXXX */
+ return page;
+}
+
+/*
+ * Free a hugepage into Buddy system directly.
+ */
+static int
+hugepage_delete(struct page *page)
+{
+ BUG_ON(page_count(page) != 1);
+ BUG_ON(page->mapping);
+
+ spin_lock(&htlbpage_lock);
+ update_and_free_page(page);
+ spin_unlock(&htlbpage_lock);
+ return 0;
+}
+
+static int
+hugepage_register(struct page *page)
+{
+ spin_lock(&htlbpage_lock);
+ register_huge_page(page);
+ spin_unlock(&htlbpage_lock);
+ return 0;
+}
+
+static int
+hugepage_release_buffer(struct page *page)
+{
+ BUG();
+ return -1;
+}
+
+static struct remap_operations hugepage_remap_ops = {
+ .remap_alloc_page = hugepage_remap_alloc,
+ .remap_delete_page = hugepage_delete,
+ .remap_copy_page = copy_hugepage,
+ .remap_lru_add_page = hugepage_register,
+ .remap_release_buffers = hugepage_release_buffer,
+ .remap_prepare = NULL,
+ .remap_stick_page = NULL
+};
+
+int remap_hugetlb_pages(struct zone *zone)
+{
+ struct list_head *p;
+ struct page *page, *map;
+ int idx = zone->zone_pgdat->node_id;
+ LIST_HEAD(templist);
+ int ret = 0;
+
+ try_to_free_hugepages(idx, -htlbpagemem, zone);
+/* htlbpage_max = set_hugetlb_mem_size(htlbpage_max); */
+
+ map = NULL;
+ spin_lock(&htlbpage_lock);
+ list_for_each(p, &hugepage_alllists[idx]) {
+ page = list_entry(p, struct page, list);
+ if (map) {
+ page_cache_get(map-1);
+ unregister_huge_page(map-1);
+ list_add(&map->list, &templist);
+ map = NULL;
+ }
+ if (page_zone(page) == zone) {
+ map = page;
+ }
+ }
+ if (map) {
+ page_cache_get(map-1);
+ unregister_huge_page(map-1);
+ list_add(&map->list, &templist);
+ map = NULL;
+ }
+ spin_unlock(&htlbpage_lock);
+
+ while (!list_empty(&templist)) {
+ page = list_entry(templist.next, struct page, list);
+ list_del(&page->list);
+ INIT_LIST_HEAD(&page->list);
+ page--;
+
+ if (page_count(page) <= 1 || page->mapping == NULL ||
+ remap_onepage(page, REMAP_ANYNODE, 0, &hugepage_remap_ops)) {
+ /* free the page later */
+ spin_lock(&htlbpage_lock);
+ register_huge_page(page);
+ spin_unlock(&htlbpage_lock);
+ page_cache_release(page);
+ ret++;
+ }
+ }
+
+ htlbpage_max = set_hugetlb_mem_size(htlbpage_max);
+ return ret;
+}
+#endif /* CONFIG_MEMHOTPLUG */

int hugetlb_sysctl_handler(ctl_table *table, int write,
struct file *file, void *buffer, size_t *length)
--- linux-2.6.5.ORG/mm/memhotplug.c Tue Apr 6 22:28:09 2032
+++ linux-2.6.5/mm/memhotplug.c Tue Apr 6 15:00:59 2032
@@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/rmap-locking.h>
+#include <linux/hugetlb.h>
#include <linux/memhotplug.h>

#ifdef CONFIG_KDB
@@ -595,6 +596,8 @@ int remapd(void *p)
return 0;
}
atomic_inc(&remapd_count);
+ if (remap_hugetlb_pages(zone))
+ goto out;
on_each_cpu(lru_drain_schedule, NULL, 1, 1);
while(nr_failed < 100) {
spin_lock_irq(&zone->lru_lock);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/