[PATCH] Account for MAP_SHARED mappings using VM_MAYSHARE and not VM_SHARED in hugetlbfs V2

From: Mel Gorman
Date: Tue May 26 2009 - 05:47:09 EST


Changelog since V1
o Convert follow_hugetlb_page to use VM_MAYSHARE

hugetlbfs reserves huge pages and accounts for them differently depending on
whether the mapping was mapped MAP_SHARED or MAP_PRIVATE. For MAP_SHARED
mappings, hugepages are reserved when mmap() is first called and are
tracked based on information associated with the inode. MAP_PRIVATE track
the reservations based on the VMA created as part of the mmap() operation.

However, the check hugetlbfs makes when determining if a VMA is MAP_SHARED
is with the VM_SHARED flag and not VM_MAYSHARE. For file-backed mappings,
such as hugetlbfs, VM_SHARED is set only if the mapping is MAP_SHARED
and the file was opened read-write. If a shared memory mapping was mapped
shared-read-write for populating of data and mapped shared-read-only by
other processes, then hugetlbfs gets inconsistent on how it accounts for
the creation of reservations and how they are consumed.

This patch alters mm/hugetlb.c and replaces VM_SHARED with VM_MAYSHARE when
the intent of the code was to check whether the VMA was mapped MAP_SHARED
or MAP_PRIVATE.

If this patch passes review, it's needed for 2.6.30 and -stable.

Signed-off-by: Mel Gorman <mel@xxxxxxxxx>
---
mm/hugetlb.c | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 28c655b..3687f42 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -316,7 +316,7 @@ static void resv_map_release(struct kref *ref)
static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- if (!(vma->vm_flags & VM_SHARED))
+ if (!(vma->vm_flags & VM_MAYSHARE))
return (struct resv_map *)(get_vma_private_data(vma) &
~HPAGE_RESV_MASK);
return NULL;
@@ -325,7 +325,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- VM_BUG_ON(vma->vm_flags & VM_SHARED);
+ VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);

set_vma_private_data(vma, (get_vma_private_data(vma) &
HPAGE_RESV_MASK) | (unsigned long)map);
@@ -334,7 +334,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- VM_BUG_ON(vma->vm_flags & VM_SHARED);
+ VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);

set_vma_private_data(vma, get_vma_private_data(vma) | flags);
}
@@ -353,7 +353,7 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
if (vma->vm_flags & VM_NORESERVE)
return;

- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
/* Shared mappings always use reserves */
h->resv_huge_pages--;
} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
@@ -369,14 +369,14 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- if (!(vma->vm_flags & VM_SHARED))
+ if (!(vma->vm_flags & VM_MAYSHARE))
vma->vm_private_data = (void *)0;
}

/* Returns true if the VMA has associated reserve pages */
static int vma_has_reserves(struct vm_area_struct *vma)
{
- if (vma->vm_flags & VM_SHARED)
+ if (vma->vm_flags & VM_MAYSHARE)
return 1;
if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
return 1;
@@ -924,7 +924,7 @@ static long vma_needs_reservation(struct hstate *h,
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;

- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
pgoff_t idx = vma_hugecache_offset(h, vma, addr);
return region_chg(&inode->i_mapping->private_list,
idx, idx + 1);
@@ -949,7 +949,7 @@ static void vma_commit_reservation(struct hstate *h,
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;

- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
pgoff_t idx = vma_hugecache_offset(h, vma, addr);
region_add(&inode->i_mapping->private_list, idx, idx + 1);

@@ -1893,7 +1893,7 @@ retry_avoidcopy:
* at the time of fork() could consume its reserves on COW instead
* of the full address range.
*/
- if (!(vma->vm_flags & VM_SHARED) &&
+ if (!(vma->vm_flags & VM_MAYSHARE) &&
is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
old_page != pagecache_page)
outside_reserve = 1;
@@ -2000,7 +2000,7 @@ retry:
clear_huge_page(page, address, huge_page_size(h));
__SetPageUptodate(page);

- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
int err;
struct inode *inode = mapping->host;

@@ -2104,7 +2104,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_mutex;
}

- if (!(vma->vm_flags & VM_SHARED))
+ if (!(vma->vm_flags & VM_MAYSHARE))
pagecache_page = hugetlbfs_pagecache_page(h,
vma, address);
}
@@ -2168,7 +2168,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
int remainder = *length;
struct hstate *h = hstate_vma(vma);
int zeropage_ok = 0;
- int shared = vma->vm_flags & VM_SHARED;
+ int shared = vma->vm_flags & VM_MAYSHARE;

spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
@@ -2289,7 +2289,7 @@ int hugetlb_reserve_pages(struct inode *inode,
* to reserve the full area even if read-only as mprotect() may be
* called to make the mapping read-write. Assume !vma is a shm mapping
*/
- if (!vma || vma->vm_flags & VM_SHARED)
+ if (!vma || vma->vm_flags & VM_MAYSHARE)
chg = region_chg(&inode->i_mapping->private_list, from, to);
else {
struct resv_map *resv_map = resv_map_alloc();
@@ -2330,7 +2330,7 @@ int hugetlb_reserve_pages(struct inode *inode,
* consumed reservations are stored in the map. Hence, nothing
* else has to be done for private mappings here
*/
- if (!vma || vma->vm_flags & VM_SHARED)
+ if (!vma || vma->vm_flags & VM_MAYSHARE)
region_add(&inode->i_mapping->private_list, from, to);
return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/