hugetlb demand paging patch part [2/3]

From: Chen, Kenneth W
Date: Tue Apr 13 2004 - 18:37:04 EST


Part 2 of 3

2. hugetlb_demand_generic.patch - this handles bulk of hugetlb demand
paging for generic portion of the kernel. I've put hugetlb fault
handler in mm/hugetlbpage.c since the fault handler is *exactly* the
same for all arch, but that requires opening up huge_pte_alloc() and
set_huge_pte() functions in each arch. If people object where it
should live. It takes me less than a minute to delete the common
code and replicate it in each of the 5 arch that supports hugetlb.
Just let me know if that's the case.


fs/hugetlbfs/inode.c | 18 +++------------
include/linux/hugetlb.h | 9 ++++---
mm/hugetlb.c | 56 ++++++++++++++++++++++++++++++++++++++++++++----
mm/memory.c | 7 ------
4 files changed, 62 insertions(+), 28 deletions(-)

diff -Nurp linux-2.6.5/fs/hugetlbfs/inode.c linux-2.6.5.htlb/fs/hugetlbfs/inode.c
--- linux-2.6.5/fs/hugetlbfs/inode.c 2004-04-13 12:56:29.000000000 -0700
+++ linux-2.6.5.htlb/fs/hugetlbfs/inode.c 2004-04-13 12:42:35.000000000 -0700
@@ -218,11 +218,6 @@ static int hugetlb_acct_commit(struct in
{
return region_add(&inode->i_mapping->private_list, from, to);
}
-static void hugetlb_acct_undo(struct inode *inode, int chg)
-{
- hugetlb_put_quota(inode->i_mapping, chg);
- hugetlb_acct_memory(-chg);
-}
static void hugetlb_acct_release(struct inode *inode, int to)
{
int chg;
@@ -252,9 +247,8 @@ static struct backing_dev_info hugetlbfs
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_dentry->d_inode;
- struct address_space *mapping = inode->i_mapping;
loff_t len, vma_len;
- int ret;
+ int ret = 0;
int chg;

if (vma->vm_start & ~HPAGE_MASK)
@@ -281,15 +275,11 @@ static int hugetlbfs_file_mmap(struct fi
file_accessed(file);
vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
vma->vm_ops = &hugetlb_vm_ops;
- ret = hugetlb_prefault(mapping, vma);
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
- if (ret == 0) {
- if (inode->i_size < len)
- inode->i_size = len;
- hugetlb_acct_commit(inode, VMACCTPG(vma->vm_pgoff),
+ if (inode->i_size < len)
+ inode->i_size = len;
+ hugetlb_acct_commit(inode, VMACCTPG(vma->vm_pgoff),
VMACCTPG(vma->vm_pgoff + (vma_len >> PAGE_SHIFT)));
- } else
- hugetlb_acct_undo(inode, chg);

unlock_out:
up(&inode->i_sem);
diff -Nurp linux-2.6.5/include/linux/hugetlb.h linux-2.6.5.htlb/include/linux/hugetlb.h
--- linux-2.6.5/include/linux/hugetlb.h 2004-04-13 12:56:29.000000000 -0700
+++ linux-2.6.5.htlb/include/linux/hugetlb.h 2004-04-13 12:02:31.000000000 -0700
@@ -14,10 +14,12 @@ static inline int is_vm_hugetlb_page(str

int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void *, size_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int
*, int);
void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
-int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
+pte_t *huge_pte_alloc(struct mm_struct *, unsigned long);
+void set_huge_pte(struct mm_struct *, struct vm_area_struct *, struct page *, pte_t *, int);
+int handle_hugetlb_mm_fault(struct mm_struct *, struct vm_area_struct *,
+ unsigned long, int);
void huge_page_release(struct page *);
int hugetlb_report_meminfo(char *);
int is_hugepage_mem_enough(size_t);
@@ -66,10 +68,9 @@ static inline unsigned long hugetlb_tota
return 0;
}

-#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; })
#define follow_huge_addr(mm, vma, addr, write) 0
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
-#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
+#define handle_hugetlb_mm_fault(mm, vma, addr, write) BUG()
#define zap_hugepage_range(vma, start, len) BUG()
#define unmap_hugepage_range(vma, start, end) BUG()
#define huge_page_release(page) BUG()
diff -Nurp linux-2.6.5/mm/hugetlb.c linux-2.6.5.htlb/mm/hugetlb.c
--- linux-2.6.5/mm/hugetlb.c 2004-04-13 12:56:13.000000000 -0700
+++ linux-2.6.5.htlb/mm/hugetlb.c 2004-04-13 12:18:29.000000000 -0700
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
#include <linux/sysctl.h>

const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
@@ -217,11 +218,58 @@ unsigned long hugetlb_total_pages(void)
}
EXPORT_SYMBOL(hugetlb_total_pages);

+int handle_hugetlb_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+ unsigned long addr, int write_access)
+{
+ pte_t *pte;
+ struct page *page;
+ struct address_space *mapping;
+ int idx, ret = VM_FAULT_MINOR;
+
+ spin_lock(&mm->page_table_lock);
+ pte = huge_pte_alloc(mm, addr & HPAGE_MASK);
+ if (!pte) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
+ if (!pte_none(*pte))
+ goto out;
+ spin_unlock(&mm->page_table_lock);
+
+ mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+ idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+ + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+retry:
+ page = find_get_page(mapping, idx);
+ if (!page) {
+ page = alloc_huge_page();
+ if (!page)
+ goto retry;
+ ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
+ if (!ret) {
+ unlock_page(page);
+ } else {
+ free_huge_page(page);
+ if (ret == -EEXIST)
+ goto retry;
+ else
+ return VM_FAULT_OOM;
+ }
+ }
+
+ spin_lock(&mm->page_table_lock);
+ if (pte_none(*pte))
+ set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+ else
+ page_cache_release(page);
+out:
+ spin_unlock(&mm->page_table_lock);
+ return VM_FAULT_MINOR;
+}
+
/*
- * We cannot handle pagefaults against hugetlb pages at all. They cause
- * handle_mm_fault() to try to instantiate regular-sized pages in the
- * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
- * this far.
+ * We should not get here because handle_mm_fault() is supposed to trap
+ * hugetlb page fault. BUG if we get here.
*/
static struct page *hugetlb_nopage(struct vm_area_struct *vma,
unsigned long address, int *unused)
diff -Nurp linux-2.6.5/mm/memory.c linux-2.6.5.htlb/mm/memory.c
--- linux-2.6.5/mm/memory.c 2004-04-13 12:56:13.000000000 -0700
+++ linux-2.6.5.htlb/mm/memory.c 2004-04-13 12:02:31.000000000 -0700
@@ -769,11 +769,6 @@ int get_user_pages(struct task_struct *t
if ((pages && vm_io) || !(flags & vma->vm_flags))
return i ? : -EFAULT;

- if (is_vm_hugetlb_page(vma)) {
- i = follow_hugetlb_page(mm, vma, pages, vmas,
- &start, &len, i);
- continue;
- }
spin_lock(&mm->page_table_lock);
do {
struct page *map = NULL;
@@ -1697,7 +1692,7 @@ int handle_mm_fault(struct mm_struct *mm
inc_page_state(pgfault);

if (is_vm_hugetlb_page(vma))
- return VM_FAULT_SIGBUS; /* mapping truncation does this. */
+ return handle_hugetlb_mm_fault(mm, vma, address, write_access);

/*
* We need the page table lock to synchronize with kswapd


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/