Linus, Andrew,
Attached is the hugetlbpage patch for 2.5.48 containing following main
changes:
1) Bug fixes (mainly in the unsuccessful attempts of hugepages).
2) Removal of Radix Tree field in key structure (as it is not needed).
3) Include the IPC_LOCK for permission to use hugepages.
4) Increment the key_counts during forks.
thanks,
rohit
--- linux-2.5.48/include/linux/hugetlb.h Sun Nov 17 20:29:45 2002
+++ linux-2.5.48.work//include/linux/hugetlb.h Thu Nov 21 11:49:57 2002
@@ -4,7 +4,17 @@
#ifdef CONFIG_HUGETLB_PAGE
struct ctl_table;
-struct hugetlb_key;
+struct hugetlb_key {
+ struct page *root;
+ loff_t size;
+ atomic_t count;
+ spinlock_t lock;
+ int key;
+ int busy;
+ uid_t uid;
+ gid_t gid;
+ umode_t mode;
+};
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
{
--- linux-2.5.48/arch/i386/mm/hugetlbpage.c Sun Nov 17 20:29:55 2002
+++ linux-2.5.48.work/arch/i386/mm/hugetlbpage.c Thu Nov 21 12:12:18 2002
@@ -19,6 +19,8 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <linux/sysctl.h>
+
static long htlbpagemem;
int htlbpage_max;
static long htlbzone_pages;
@@ -29,18 +31,6 @@
#define MAX_ID 32
-struct hugetlb_key {
- struct radix_tree_root tree;
- atomic_t count;
- spinlock_t lock;
- int key;
- int busy;
- uid_t uid;
- gid_t gid;
- umode_t mode;
- loff_t size;
-};
-
static struct hugetlb_key htlbpagek[MAX_ID];
static void mark_key_busy(struct hugetlb_key *hugetlb_key)
@@ -81,7 +71,7 @@
spin_lock(&htlbpage_lock);
hugetlb_key = find_key(key);
if (!hugetlb_key) {
- if (!capable(CAP_SYS_ADMIN) || !in_group_p(0))
+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_IPC_LOCK) && !in_group_p(0))
hugetlb_key = ERR_PTR(-EPERM);
else if (!(flag & IPC_CREAT))
hugetlb_key = ERR_PTR(-ENOENT);
@@ -96,7 +86,7 @@
hugetlb_key = &htlbpagek[i];
mark_key_busy(hugetlb_key);
hugetlb_key->key = key;
- INIT_RADIX_TREE(&hugetlb_key->tree, GFP_ATOMIC);
+ hugetlb_key->root = NULL;
hugetlb_key->uid = current->fsuid;
hugetlb_key->gid = current->fsgid;
hugetlb_key->mode = prot;
@@ -107,7 +97,6 @@
hugetlb_key = ERR_PTR(-EAGAIN);
spin_unlock(&htlbpage_lock);
} else if (check_size_prot(hugetlb_key, len, prot, flag) < 0) {
- hugetlb_key->key = 0;
hugetlb_key = ERR_PTR(-EINVAL);
}
} while (hugetlb_key == ERR_PTR(-EAGAIN));
@@ -120,7 +109,10 @@
{
unsigned long index;
unsigned long max_idx;
+ struct page *page, *prev;
+ if (key == NULL)
+ return;
if (!atomic_dec_and_test(&key->count)) {
spin_lock(&htlbpage_lock);
clear_key_busy(key);
@@ -129,16 +121,19 @@
}
max_idx = (key->size >> HPAGE_SHIFT);
+ page = key->root;
for (index = 0; index < max_idx; ++index) {
- struct page *page = radix_tree_lookup(&key->tree, index);
if (!page)
continue;
- huge_page_release(page);
+ prev = page;
+ page = (struct page *)page->private;
+ prev->private = 0UL;
+ huge_page_release(prev);
}
spin_lock(&htlbpage_lock);
key->key = 0;
clear_key_busy(key);
- INIT_RADIX_TREE(&key->tree, GFP_ATOMIC);
+ key->root = NULL;
spin_unlock(&htlbpage_lock);
}
@@ -247,7 +242,7 @@
vma->vm_end = end;
}
spin_unlock(&mm->page_table_lock);
- out_error1:
+out_error1:
return -1;
}
@@ -259,7 +254,10 @@
struct page *ptepage;
unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end;
+ struct hugetlb_key *key = vma->vm_private_data;
+ if ( key )
+ atomic_inc(&key->count);
while (addr < end) {
dst_pte = huge_pte_alloc(dst, addr);
if (!dst_pte)
@@ -352,6 +350,8 @@
spin_unlock(&htlbpage_lock);
for (address = start; address < end; address += HPAGE_SIZE) {
pte = huge_pte_offset(mm, address);
+ if (pte_none(*pte))
+ continue;
page = pte_page(*pte);
huge_page_release(page);
pte_clear(pte);
@@ -381,25 +381,10 @@
return 0;
}
-struct page *key_find_page(struct hugetlb_key *key, unsigned long index)
-{
- struct page *page = radix_tree_lookup(&key->tree, index);
- if (page)
- get_page(page);
- return page;
-}
-
-int key_add_page(struct page *page, struct hugetlb_key *key, unsigned long index)
-{
- int error = radix_tree_insert(&key->tree, index, page);
- if (!error)
- get_page(page);
- return error;
-}
-
-static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma)
+static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma, unsigned long *temp)
{
struct mm_struct *mm = current->mm;
+ struct page *page, *prev;
unsigned long addr;
int ret = 0;
@@ -408,21 +393,18 @@
spin_lock(&mm->page_table_lock);
spin_lock(&key->lock);
+ prev = page = key->root;
for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
- unsigned long idx;
pte_t *pte = huge_pte_alloc(mm, addr);
- struct page *page;
if (!pte) {
+ spin_unlock(&key->lock);
ret = -ENOMEM;
goto out;
}
if (!pte_none(*pte))
continue;
- idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
- + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
- page = key_find_page(key, idx);
if (!page) {
page = alloc_hugetlb_page();
if (!page) {
@@ -430,13 +412,20 @@
ret = -ENOMEM;
goto out;
}
- key_add_page(page, key, idx);
+ if (key->root == NULL)
+ key->root = page;
+ else
+ prev->private = (unsigned long)page;
}
+ get_page(page);
set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+ prev = page;
+ page = (struct page *)page->private;
}
spin_unlock(&key->lock);
out:
spin_unlock(&mm->page_table_lock);
+ *temp = addr;
return ret;
}
@@ -446,6 +435,7 @@
struct vm_area_struct *vma;
struct hugetlb_key *hugetlb_key;
int retval = -ENOMEM;
+ unsigned long temp;
hugetlb_key = alloc_key(key, len, prot, flag );
spin_unlock(&htlbpage_lock);
@@ -455,17 +445,18 @@
addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot,
MAP_NORESERVE|MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0);
if (IS_ERR((void *) addr))
- goto out_release;
+ goto out;
vma = find_vma(mm, addr);
if (!vma) {
retval = -EINVAL;
- goto out_release;
+ goto out;
}
- retval = prefault_key(hugetlb_key, vma);
+ retval = prefault_key(hugetlb_key, vma, &temp);
+ addr = temp;
if (retval)
- goto out;
+ goto out_release;
vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
vma->vm_ops = &hugetlb_vm_ops;
@@ -474,7 +465,7 @@
clear_key_busy(hugetlb_key);
spin_unlock(&htlbpage_lock);
return retval;
-out:
+out_release:
if (addr > vma->vm_start) {
unsigned long raddr;
raddr = vma->vm_end;
@@ -482,10 +473,8 @@
zap_hugepage_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
vma->vm_end = raddr;
}
- spin_lock(&mm->page_table_lock);
do_munmap(mm, vma->vm_start, len);
- spin_unlock(&mm->page_table_lock);
-out_release:
+out:
hugetlb_release_key(hugetlb_key);
return retval;
}
@@ -533,10 +522,8 @@
static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{
- if (!capable(CAP_SYS_ADMIN)) {
- if (!in_group_p(0))
- return -EPERM;
- }
+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_IPC_LOCK) && !in_group_p(0))
+ return -EPERM;
addr = do_mmap_pgoff(NULL, addr, len, prot,
MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0);
if (IS_ERR((void *) addr))
--- linux-2.5.48/arch/i386/kernel/sys_i386.c Sun Nov 17 20:29:56 2002
+++ linux-2.5.48.work/arch/i386/kernel/sys_i386.c Thu Nov 21 12:01:08 2002
@@ -294,17 +294,17 @@
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct hugetlb_key *key;
int retval;
- vma = find_vma(current->mm, addr);
- if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr)
- return -EINVAL;
down_write(&mm->mmap_sem);
- key = (struct hugetlb_key *)vma->vm_private_data;
+ vma = find_vma(current->mm, addr);
+ if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr) {
+ retval = -EINVAL;
+ goto out;
+ }
retval = do_munmap(vma->vm_mm, addr, vma->vm_end - addr);
+out:
up_write(&mm->mmap_sem);
- hugetlb_release_key(key);
return retval;
}
#else
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sat Nov 23 2002 - 22:00:38 EST