RE: [PATCH] [0/6] HUGETLB memory commitment

From: Andy Whitcroft
Date: Thu Apr 01 2004 - 16:35:51 EST


--On 31 March 2004 00:51 -0800 "Chen, Kenneth W" <kenneth.w.chen@xxxxxxxxx> wrote:

Under common case, worked perfectly! But there are always corner cases.

I can think of two ugliness:
1. very sparse hugetlb file. I can mmap one hugetlb page, at offset
512 GB. This would account 512GB + 1 hugetlb page as committed_AS.
But I only asked for one page mapping. One can say it's a feature,
but I think it's a bug.

2. There is no error checking (to undo the committed_AS accounting) after
hugetlb_prefault(). hugetlb_prefault doesn't always succeed in allocat-
ing all the pages user asked for due to disk quota limit. It can have
partial allocation which would put the committed_AS in a wedged state.

O.k. Here is the latest version of the hugetlb commitment tracking patch
(hugetlb_tracking_R4). This now understands the difference between shm
allocated and mmap allocated and handles them differently. This should
fix 1. We now handle the commitments correctly under quota failures.

Please review.

-apw

---
arch/i386/mm/hugetlbpage.c | 30 +++++++++++++------
file | 1
fs/hugetlbfs/inode.c | 69 +++++++++++++++++++++++++++++++++++++++++++--
fs/proc/proc_misc.c | 1
include/linux/hugetlb.h | 5 +++
5 files changed, 93 insertions(+), 13 deletions(-)

diff -X /home/apw/lib/vdiff.excl -rupN reference/arch/i386/mm/hugetlbpage.c current/arch/i386/mm/hugetlbpage.c
--- reference/arch/i386/mm/hugetlbpage.c 2004-04-01 13:37:14.000000000 +0100
+++ current/arch/i386/mm/hugetlbpage.c 2004-04-01 21:54:54.000000000 +0100
@@ -72,6 +72,7 @@ static struct page *alloc_hugetlb_page(v
spin_unlock(&htlbpage_lock);
return NULL;
}
+printk(KERN_WARNING "alloc_hugetlb_page: alloced %08lx\n", (unsigned long) page);
htlbpagemem--;
spin_unlock(&htlbpage_lock);
set_page_count(page, 1);
@@ -282,6 +283,7 @@ static void free_huge_page(struct page *

INIT_LIST_HEAD(&page->list);

+printk(KERN_WARNING "free_huge_page: returned %08lx\n", (unsigned long) page);
spin_lock(&htlbpage_lock);
enqueue_huge_page(page);
htlbpagemem++;
@@ -334,6 +336,7 @@ int hugetlb_prefault(struct address_spac
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret = 0;
+ struct page *page;

BUG_ON(vma->vm_start & ~HPAGE_MASK);
BUG_ON(vma->vm_end & ~HPAGE_MASK);
@@ -342,7 +345,6 @@ int hugetlb_prefault(struct address_spac
for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
unsigned long idx;
pte_t *pte = huge_pte_alloc(mm, addr);
- struct page *page;

if (!pte) {
ret = -ENOMEM;
@@ -355,30 +357,38 @@ int hugetlb_prefault(struct address_spac
+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
page = find_get_page(mapping, idx);
if (!page) {
- /* charge the fs quota first */
+ /* charge against commitment */
+ ret = hugetlb_charge_page(vma);
+ if (ret)
+ goto out;
+ /* charge the fs quota */
if (hugetlb_get_quota(mapping)) {
ret = -ENOMEM;
- goto out;
+ goto undo_charge;
}
page = alloc_hugetlb_page();
if (!page) {
- hugetlb_put_quota(mapping);
ret = -ENOMEM;
- goto out;
+ goto undo_quota;
}
ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
unlock_page(page);
- if (ret) {
- hugetlb_put_quota(mapping);
- free_huge_page(page);
- goto out;
- }
+ if (ret)
+ goto undo_page;
}
set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
}
out:
spin_unlock(&mm->page_table_lock);
return ret;
+
+undo_page:
+ free_huge_page(page);
+undo_quota:
+ hugetlb_put_quota(mapping);
+undo_charge:
+ hugetlb_uncharge_page(vma);
+ goto out;
}

static void update_and_free_page(struct page *page)
diff -X /home/apw/lib/vdiff.excl -rupN reference/file current/file
--- reference/file 1970-01-01 01:00:00.000000000 +0100
+++ current/file 2004-04-01 13:37:14.000000000 +0100
@@ -0,0 +1 @@
+this is more text
diff -X /home/apw/lib/vdiff.excl -rupN reference/fs/hugetlbfs/inode.c current/fs/hugetlbfs/inode.c
--- reference/fs/hugetlbfs/inode.c 2004-03-25 02:43:00.000000000 +0000
+++ current/fs/hugetlbfs/inode.c 2004-04-01 22:41:07.000000000 +0100
@@ -32,6 +32,53 @@
/* some random number */
#define HUGETLBFS_MAGIC 0x958458f6

+#define HUGETLBFS_NOACCT (~0UL)
+
+atomic_t hugetlb_committed_space = ATOMIC_INIT(0);
+
+int hugetlb_acct_memory(long delta)
+{
+printk(KERN_WARNING "hugetlb_acct_memory: delta<%ld>\n", delta);
+ atomic_add(delta, &hugetlb_committed_space);
+ if (delta > 0 && atomic_read(&hugetlb_committed_space) >
+ hugetlb_total_pages()) {
+ atomic_add(-delta, &hugetlb_committed_space);
+ return -ENOMEM;
+ }
+ return 0;
+}
+int hugetlb_charge_page(struct vm_area_struct *vma)
+{
+ int ret;
+
+ /* if this file is marked for commit on demand then see if we can
+ * commmit a page, if so account for it against this file. */
+ if (vma->vm_file->f_dentry->d_inode->i_blocks != ~0) {
+ ret = hugetlb_acct_memory(HPAGE_SIZE / PAGE_SIZE);
+ if (ret)
+ return ret;
+ vma->vm_file->f_dentry->d_inode->i_blocks++;
+ }
+ return 0;
+}
+int hugetlb_uncharge_page(struct vm_area_struct *vma)
+{
+ /* if this file is marked for commit on demand return a page. */
+ if (vma->vm_file->f_dentry->d_inode->i_blocks != ~0) {
+ hugetlb_acct_memory(-(HPAGE_SIZE / PAGE_SIZE));
+ vma->vm_file->f_dentry->d_inode->i_blocks--;
+ }
+ return 0;
+}
+
+int hugetlbfs_report_meminfo(char *buf)
+{
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+ long htlb = atomic_read(&hugetlb_committed_space);
+ return sprintf(buf, "HugeCommitted_AS: %5lu kB\n", K(htlb));
+#undef K
+}
+
static struct super_operations hugetlbfs_ops;
static struct address_space_operations hugetlbfs_aops;
struct file_operations hugetlbfs_file_operations;
@@ -62,11 +109,11 @@ static int hugetlbfs_file_mmap(struct fi
vma_len = (loff_t)(vma->vm_end - vma->vm_start);

down(&inode->i_sem);
+ len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
file_accessed(file);
vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
vma->vm_ops = &hugetlb_vm_ops;
ret = hugetlb_prefault(mapping, vma);
- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
if (ret == 0 && inode->i_size < len)
inode->i_size = len;
up(&inode->i_sem);
@@ -200,6 +247,11 @@ static void hugetlbfs_delete_inode(struc

if (inode->i_data.nrpages)
truncate_hugepages(&inode->i_data, 0);
+ if (inode->i_blocks != HUGETLBFS_NOACCT)
+ hugetlb_acct_memory(-(inode->i_blocks *
+ (HPAGE_SIZE / PAGE_SIZE)));
+ else
+ hugetlb_acct_memory(-(inode->i_size / PAGE_SIZE));

security_inode_delete(inode);

@@ -241,6 +293,11 @@ out_truncate:
spin_unlock(&inode_lock);
if (inode->i_data.nrpages)
truncate_hugepages(&inode->i_data, 0);
+ if (inode->i_blocks != HUGETLBFS_NOACCT)
+ hugetlb_acct_memory(-(inode->i_blocks *
+ (HPAGE_SIZE / PAGE_SIZE)));
+ else
+ hugetlb_acct_memory(-(inode->i_size / PAGE_SIZE));

if (sbinfo->free_inodes >= 0) {
spin_lock(&sbinfo->stat_lock);
@@ -350,6 +407,10 @@ static int hugetlbfs_setattr(struct dent
error = hugetlb_vmtruncate(inode, attr->ia_size);
if (error)
goto out;
+ /* We rely on the fact that the sizes are hugepage aligned,
+ * and that hugetlb_vmtruncate prevents extend. */
+ hugetlb_acct_memory((attr->ia_size - i_size_read(inode)) /
+ PAGE_SIZE);
attr->ia_valid &= ~ATTR_SIZE;
}
error = inode_setattr(inode, attr);
@@ -710,8 +771,9 @@ struct file *hugetlb_zero_setup(size_t s
if (!capable(CAP_IPC_LOCK))
return ERR_PTR(-EPERM);

- if (!is_hugepage_mem_enough(size))
- return ERR_PTR(-ENOMEM);
+ error = hugetlb_acct_memory(size / PAGE_SIZE);
+ if (error)
+ return ERR_PTR(error);

root = hugetlbfs_vfsmount->mnt_root;
snprintf(buf, 16, "%lu", hugetlbfs_counter());
@@ -736,6 +798,7 @@ struct file *hugetlb_zero_setup(size_t s
d_instantiate(dentry, inode);
inode->i_size = size;
inode->i_nlink = 0;
+ inode->i_blocks = HUGETLBFS_NOACCT;
file->f_vfsmnt = mntget(hugetlbfs_vfsmount);
file->f_dentry = dentry;
file->f_mapping = inode->i_mapping;
diff -X /home/apw/lib/vdiff.excl -rupN reference/fs/proc/proc_misc.c current/fs/proc/proc_misc.c
--- reference/fs/proc/proc_misc.c 2004-03-29 12:10:18.000000000 +0100
+++ current/fs/proc/proc_misc.c 2004-04-01 13:37:14.000000000 +0100
@@ -232,6 +232,7 @@ static int meminfo_read_proc(char *page,
);

len += hugetlb_report_meminfo(page + len);
+ len += hugetlbfs_report_meminfo(page + len);

return proc_calc_metrics(page, start, off, count, eof, len);
#undef K
diff -X /home/apw/lib/vdiff.excl -rupN reference/include/linux/hugetlb.h current/include/linux/hugetlb.h
--- reference/include/linux/hugetlb.h 2004-03-29 12:10:22.000000000 +0100
+++ current/include/linux/hugetlb.h 2004-04-01 21:56:56.000000000 +0100
@@ -115,11 +115,16 @@ static inline void set_file_hugepages(st
{
file->f_op = &hugetlbfs_file_operations;
}
+int hugetlbfs_report_meminfo(char *);
+int hugetlb_charge_page(struct vm_area_struct *vma);
+int hugetlb_uncharge_page(struct vm_area_struct *vma);
+
#else /* !CONFIG_HUGETLBFS */

#define is_file_hugepages(file) 0
#define set_file_hugepages(file) BUG()
#define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS)
+#define hugetlbfs_report_meminfo(buf) 0

#endif /* !CONFIG_HUGETLBFS */




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/