hugetlb page patch for 2.5.48-bug fixes

From: Rohit Seth (rseth@unix-os.sc.intel.com)
Date: Thu Nov 21 2002 - 17:05:53 EST


Linus, Andrew,

Attached is the hugetlbpage patch for 2.5.48 containing following main
changes:

1) Bug fixes (mainly in the unsuccessful attempts of hugepages).
2) Removal of Radix Tree field in key structure (as it is not needed).
3) Include the IPC_LOCK for permission to use hugepages.
4) Increment the key_counts during forks.

thanks,
rohit


--- linux-2.5.48/include/linux/hugetlb.h Sun Nov 17 20:29:45 2002
+++ linux-2.5.48.work//include/linux/hugetlb.h Thu Nov 21 11:49:57 2002
@@ -4,7 +4,17 @@
 #ifdef CONFIG_HUGETLB_PAGE
 
 struct ctl_table;
-struct hugetlb_key;
+struct hugetlb_key {
+ struct page *root;
+ loff_t size;
+ atomic_t count;
+ spinlock_t lock;
+ int key;
+ int busy;
+ uid_t uid;
+ gid_t gid;
+ umode_t mode;
+};
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
--- linux-2.5.48/arch/i386/mm/hugetlbpage.c Sun Nov 17 20:29:55 2002
+++ linux-2.5.48.work/arch/i386/mm/hugetlbpage.c Thu Nov 21 12:12:18 2002
@@ -19,6 +19,8 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+#include <linux/sysctl.h>
+
 static long htlbpagemem;
 int htlbpage_max;
 static long htlbzone_pages;
@@ -29,18 +31,6 @@
 
 #define MAX_ID 32
 
-struct hugetlb_key {
- struct radix_tree_root tree;
- atomic_t count;
- spinlock_t lock;
- int key;
- int busy;
- uid_t uid;
- gid_t gid;
- umode_t mode;
- loff_t size;
-};
-
 static struct hugetlb_key htlbpagek[MAX_ID];
 
 static void mark_key_busy(struct hugetlb_key *hugetlb_key)
@@ -81,7 +71,7 @@
                 spin_lock(&htlbpage_lock);
                 hugetlb_key = find_key(key);
                 if (!hugetlb_key) {
- if (!capable(CAP_SYS_ADMIN) || !in_group_p(0))
+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_IPC_LOCK) && !in_group_p(0))
                                 hugetlb_key = ERR_PTR(-EPERM);
                         else if (!(flag & IPC_CREAT))
                                 hugetlb_key = ERR_PTR(-ENOENT);
@@ -96,7 +86,7 @@
                                         hugetlb_key = &htlbpagek[i];
                                         mark_key_busy(hugetlb_key);
                                         hugetlb_key->key = key;
- INIT_RADIX_TREE(&hugetlb_key->tree, GFP_ATOMIC);
+ hugetlb_key->root = NULL;
                                         hugetlb_key->uid = current->fsuid;
                                         hugetlb_key->gid = current->fsgid;
                                         hugetlb_key->mode = prot;
@@ -107,7 +97,6 @@
                         hugetlb_key = ERR_PTR(-EAGAIN);
                         spin_unlock(&htlbpage_lock);
                 } else if (check_size_prot(hugetlb_key, len, prot, flag) < 0) {
- hugetlb_key->key = 0;
                         hugetlb_key = ERR_PTR(-EINVAL);
                 }
         } while (hugetlb_key == ERR_PTR(-EAGAIN));
@@ -120,7 +109,10 @@
 {
         unsigned long index;
         unsigned long max_idx;
+ struct page *page, *prev;
 
+ if (key == NULL)
+ return;
         if (!atomic_dec_and_test(&key->count)) {
                 spin_lock(&htlbpage_lock);
                 clear_key_busy(key);
@@ -129,16 +121,19 @@
         }
 
         max_idx = (key->size >> HPAGE_SHIFT);
+ page = key->root;
         for (index = 0; index < max_idx; ++index) {
- struct page *page = radix_tree_lookup(&key->tree, index);
                 if (!page)
                         continue;
- huge_page_release(page);
+ prev = page;
+ page = (struct page *)page->private;
+ prev->private = 0UL;
+ huge_page_release(prev);
         }
         spin_lock(&htlbpage_lock);
         key->key = 0;
         clear_key_busy(key);
- INIT_RADIX_TREE(&key->tree, GFP_ATOMIC);
+ key->root = NULL;
         spin_unlock(&htlbpage_lock);
 }
 
@@ -247,7 +242,7 @@
                 vma->vm_end = end;
         }
         spin_unlock(&mm->page_table_lock);
- out_error1:
+out_error1:
         return -1;
 }
 
@@ -259,7 +254,10 @@
         struct page *ptepage;
         unsigned long addr = vma->vm_start;
         unsigned long end = vma->vm_end;
+ struct hugetlb_key *key = vma->vm_private_data;
 
+ if ( key )
+ atomic_inc(&key->count);
         while (addr < end) {
                 dst_pte = huge_pte_alloc(dst, addr);
                 if (!dst_pte)
@@ -352,6 +350,8 @@
         spin_unlock(&htlbpage_lock);
         for (address = start; address < end; address += HPAGE_SIZE) {
                 pte = huge_pte_offset(mm, address);
+ if (pte_none(*pte))
+ continue;
                 page = pte_page(*pte);
                 huge_page_release(page);
                 pte_clear(pte);
@@ -381,25 +381,10 @@
         return 0;
 }
 
-struct page *key_find_page(struct hugetlb_key *key, unsigned long index)
-{
- struct page *page = radix_tree_lookup(&key->tree, index);
- if (page)
- get_page(page);
- return page;
-}
-
-int key_add_page(struct page *page, struct hugetlb_key *key, unsigned long index)
-{
- int error = radix_tree_insert(&key->tree, index, page);
- if (!error)
- get_page(page);
- return error;
-}
-
-static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma)
+static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma, unsigned long *temp)
 {
         struct mm_struct *mm = current->mm;
+ struct page *page, *prev;
         unsigned long addr;
         int ret = 0;
 
@@ -408,21 +393,18 @@
 
         spin_lock(&mm->page_table_lock);
         spin_lock(&key->lock);
+ prev = page = key->root;
         for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
- unsigned long idx;
                 pte_t *pte = huge_pte_alloc(mm, addr);
- struct page *page;
 
                 if (!pte) {
+ spin_unlock(&key->lock);
                         ret = -ENOMEM;
                         goto out;
                 }
                 if (!pte_none(*pte))
                         continue;
 
- idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
- + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
- page = key_find_page(key, idx);
                 if (!page) {
                         page = alloc_hugetlb_page();
                         if (!page) {
@@ -430,13 +412,20 @@
                                 ret = -ENOMEM;
                                 goto out;
                         }
- key_add_page(page, key, idx);
+ if (key->root == NULL)
+ key->root = page;
+ else
+ prev->private = (unsigned long)page;
                 }
+ get_page(page);
                 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+ prev = page;
+ page = (struct page *)page->private;
         }
         spin_unlock(&key->lock);
 out:
         spin_unlock(&mm->page_table_lock);
+ *temp = addr;
         return ret;
 }
 
@@ -446,6 +435,7 @@
         struct vm_area_struct *vma;
         struct hugetlb_key *hugetlb_key;
         int retval = -ENOMEM;
+ unsigned long temp;
 
         hugetlb_key = alloc_key(key, len, prot, flag );
         spin_unlock(&htlbpage_lock);
@@ -455,17 +445,18 @@
         addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot,
                         MAP_NORESERVE|MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0);
         if (IS_ERR((void *) addr))
- goto out_release;
+ goto out;
 
         vma = find_vma(mm, addr);
         if (!vma) {
                 retval = -EINVAL;
- goto out_release;
+ goto out;
         }
 
- retval = prefault_key(hugetlb_key, vma);
+ retval = prefault_key(hugetlb_key, vma, &temp);
+ addr = temp;
         if (retval)
- goto out;
+ goto out_release;
 
         vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
         vma->vm_ops = &hugetlb_vm_ops;
@@ -474,7 +465,7 @@
         clear_key_busy(hugetlb_key);
         spin_unlock(&htlbpage_lock);
         return retval;
-out:
+out_release:
         if (addr > vma->vm_start) {
                 unsigned long raddr;
                 raddr = vma->vm_end;
@@ -482,10 +473,8 @@
                 zap_hugepage_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
                 vma->vm_end = raddr;
         }
- spin_lock(&mm->page_table_lock);
         do_munmap(mm, vma->vm_start, len);
- spin_unlock(&mm->page_table_lock);
-out_release:
+out:
         hugetlb_release_key(hugetlb_key);
         return retval;
 }
@@ -533,10 +522,8 @@
 
 static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag)
 {
- if (!capable(CAP_SYS_ADMIN)) {
- if (!in_group_p(0))
- return -EPERM;
- }
+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_IPC_LOCK) && !in_group_p(0))
+ return -EPERM;
         addr = do_mmap_pgoff(NULL, addr, len, prot,
                         MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0);
         if (IS_ERR((void *) addr))
--- linux-2.5.48/arch/i386/kernel/sys_i386.c Sun Nov 17 20:29:56 2002
+++ linux-2.5.48.work/arch/i386/kernel/sys_i386.c Thu Nov 21 12:01:08 2002
@@ -294,17 +294,17 @@
 {
         struct mm_struct *mm = current->mm;
         struct vm_area_struct *vma;
- struct hugetlb_key *key;
         int retval;
 
- vma = find_vma(current->mm, addr);
- if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr)
- return -EINVAL;
         down_write(&mm->mmap_sem);
- key = (struct hugetlb_key *)vma->vm_private_data;
+ vma = find_vma(current->mm, addr);
+ if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr) {
+ retval = -EINVAL;
+ goto out;
+ }
         retval = do_munmap(vma->vm_mm, addr, vma->vm_end - addr);
+out:
         up_write(&mm->mmap_sem);
- hugetlb_release_key(key);
         return retval;
 }
 #else

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Nov 23 2002 - 22:00:38 EST