[patch 12/39] remap_file_pages protection support: enhance syscall interface and swapout code
From: blaisorblade
Date: Fri Aug 12 2005 - 12:55:59 EST
From: Ingo Molnar <mingo@xxxxxxx>, Paolo 'Blaisorblade' Giarrusso <blaisorblade@xxxxxxxx>
This is the "main" patch for the syscall code, containing the core of what was
sent by Ingo Molnar, variously reworked.
Differently from his patch, I've *not* added a new syscall, choosing to add a
new flag (MAP_NOINHERIT) which the application must specify to get the new
behavior (prot != 0 is accepted and prot == 0 means PROT_NONE).
The changes to the page fault handler have been separated, even because that
has required considerable amount of effort.
Handle the possibility that remap_file_pages changes protections in
various places.
* Enable the 'prot' parameter for shared-writable mappings (the ones
which are the primary target for remap_file_pages), without breaking up the
vma
* Use pte_file PTE's also when protections don't match, not only when the
offset doesn't match; and add set_nonlinear_pte() for this testing
* Save the current protection too when clearing a nonlinear PTE, by
replacing pgoff_to_pte() uses with pgoff_prot_to_pte().
* Use the supplied protections on restore and on populate (partially
uncomplete, fixed in subsequent patches)
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@xxxxxxxx>
---
linux-2.6.git-paolo/include/linux/pagemap.h | 19 ++++++++++
linux-2.6.git-paolo/mm/fremap.c | 50 +++++++++++++++++-----------
linux-2.6.git-paolo/mm/memory.c | 14 ++++---
linux-2.6.git-paolo/mm/rmap.c | 3 -
4 files changed, 60 insertions(+), 26 deletions(-)
diff -puN include/linux/pagemap.h~rfp-enhance-syscall-and-swapout-code include/linux/pagemap.h
--- linux-2.6.git/include/linux/pagemap.h~rfp-enhance-syscall-and-swapout-code 2005-08-11 22:59:47.000000000 +0200
+++ linux-2.6.git-paolo/include/linux/pagemap.h 2005-08-11 22:59:47.000000000 +0200
@@ -159,6 +159,25 @@ static inline pgoff_t linear_page_index(
return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
}
+/***
+ * Checks if the PTE is nonlinear, and if yes sets it.
+ * @vma: the VMA in which @addr is; we don't check if it's VM_NONLINEAR, just
+ * if this PTE is nonlinear.
+ * @addr: the addr which @pte refers to.
+ * @pte: the old PTE value (to read its protections.
+ * @ptep: the PTE pointer (for setting it).
+ * @mm: passed to set_pte_at.
+ * @page: the page which was installed (to read its ->index, i.e. the old
+ * offset inside the file.
+ */
+static inline void set_nonlinear_pte(pte_t pte, pte_t * ptep, struct vm_area_struct *vma, struct mm_struct *mm, struct page* page, unsigned long addr)
+{
+ pgprot_t pgprot = pte_to_pgprot(pte);
+ if(linear_page_index(vma, addr) != page->index ||
+ pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
+ set_pte_at(mm, addr, ptep, pgoff_prot_to_pte(page->index, pgprot));
+}
+
extern void FASTCALL(__lock_page(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
diff -puN mm/fremap.c~rfp-enhance-syscall-and-swapout-code mm/fremap.c
--- linux-2.6.git/mm/fremap.c~rfp-enhance-syscall-and-swapout-code 2005-08-11 22:59:47.000000000 +0200
+++ linux-2.6.git-paolo/mm/fremap.c 2005-08-11 23:01:14.000000000 +0200
@@ -54,7 +54,7 @@ static inline void zap_pte(struct mm_str
* previously existing mapping.
*/
int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, struct page *page, pgprot_t prot)
+ unsigned long addr, struct page *page, pgprot_t pgprot)
{
struct inode *inode;
pgoff_t size;
@@ -94,7 +94,7 @@ int install_page(struct mm_struct *mm, s
inc_mm_counter(mm,rss);
flush_icache_page(vma, page);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
+ set_pte_at(mm, addr, pte, mk_pte(page, pgprot));
page_add_file_rmap(page);
pte_val = *pte;
pte_unmap(pte);
@@ -113,7 +113,7 @@ EXPORT_SYMBOL(install_page);
* previously existing mapping.
*/
int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long pgoff, pgprot_t prot)
+ unsigned long addr, unsigned long pgoff, pgprot_t pgprot)
{
int err = -ENOMEM;
pte_t *pte;
@@ -139,7 +139,7 @@ int install_file_pte(struct mm_struct *m
zap_pte(mm, vma, addr, pte);
- set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
+ set_pte_at(mm, addr, pte, pgoff_prot_to_pte(pgoff, pgprot));
pte_val = *pte;
pte_unmap(pte);
update_mmu_cache(vma, addr, pte_val);
@@ -157,31 +157,28 @@ err_unlock:
* file within an existing vma.
* @start: start of the remapped virtual memory range
* @size: size of the remapped virtual memory range
- * @prot: new protection bits of the range
+ * @prot: new protection bits of the range, must be 0 if not using MAP_NOINHERIT
* @pgoff: to be mapped page of the backing store file
- * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
+ * @flags: bits MAP_NOINHERIT or MAP_NONBLOCKED - the later will cause no IO.
*
* this syscall works purely via pagetables, so it's the most efficient
* way to map the same (large) file into a given virtual window. Unlike
* mmap()/mremap() it does not create any new vmas. The new mappings are
* also safe across swapout.
- *
- * NOTE: the 'prot' parameter right now is ignored, and the vma's default
- * protection is used. Arbitrary protections might be implemented in the
- * future.
*/
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
- unsigned long __prot, unsigned long pgoff, unsigned long flags)
+ unsigned long prot, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
+ pgprot_t pgprot;
struct address_space *mapping;
unsigned long end = start + size;
struct vm_area_struct *vma;
int err = -EINVAL;
int has_write_lock = 0;
- if (__prot)
- return err;
+ if (prot && !(flags & MAP_NOINHERIT))
+ goto out;
/*
* Sanitize the syscall parameters:
*/
@@ -200,7 +197,7 @@ asmlinkage long sys_remap_file_pages(uns
/* We need down_write() to change vma->vm_flags. */
down_read(&mm->mmap_sem);
- retry:
+retry:
vma = find_vma(mm, start);
/*
@@ -210,7 +207,22 @@ asmlinkage long sys_remap_file_pages(uns
* swapout cursor in a VM_NONLINEAR vma (unless VM_RESERVED
* or VM_LOCKED, but VM_LOCKED could be revoked later on).
*/
- if (vma && (vma->vm_flags & VM_SHARED) &&
+ if (!vma)
+ goto out_unlock;
+
+ if (flags & MAP_NOINHERIT) {
+ err = -EPERM;
+ if (((prot & PROT_READ) && !(vma->vm_flags & VM_MAYREAD)))
+ goto out_unlock;
+ if (((prot & PROT_WRITE) && !(vma->vm_flags & VM_MAYWRITE)))
+ goto out_unlock;
+ if (((prot & PROT_EXEC) && !(vma->vm_flags & VM_MAYEXEC)))
+ goto out_unlock;
+ pgprot = protection_map[calc_vm_prot_bits(prot) | VM_SHARED];
+ } else
+ pgprot = vma->vm_page_prot;
+
+ if ((vma->vm_flags & VM_SHARED) &&
(!vma->vm_private_data ||
(vma->vm_flags & (VM_NONLINEAR|VM_RESERVED))) &&
vma->vm_ops && vma->vm_ops->populate &&
@@ -236,9 +248,8 @@ asmlinkage long sys_remap_file_pages(uns
spin_unlock(&mapping->i_mmap_lock);
}
- err = vma->vm_ops->populate(vma, start, size,
- vma->vm_page_prot,
- pgoff, flags & MAP_NONBLOCK);
+ err = vma->vm_ops->populate(vma, start, size, pgprot, pgoff,
+ flags & MAP_NONBLOCK);
/*
* We can't clear VM_NONLINEAR because we'd have to do
@@ -246,11 +257,14 @@ asmlinkage long sys_remap_file_pages(uns
* downgrading the lock. (Locks can't be upgraded).
*/
}
+
+out_unlock:
if (likely(!has_write_lock))
up_read(&mm->mmap_sem);
else
up_write(&mm->mmap_sem);
+out:
return err;
}
diff -puN mm/memory.c~rfp-enhance-syscall-and-swapout-code mm/memory.c
--- linux-2.6.git/mm/memory.c~rfp-enhance-syscall-and-swapout-code 2005-08-11 22:59:47.000000000 +0200
+++ linux-2.6.git-paolo/mm/memory.c 2005-08-11 22:59:47.000000000 +0200
@@ -555,11 +555,11 @@ static void zap_pte_range(struct mmu_gat
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
- if (unlikely(details) && details->nonlinear_vma
- && linear_page_index(details->nonlinear_vma,
- addr) != page->index)
- set_pte_at(tlb->mm, addr, pte,
- pgoff_to_pte(page->index));
+ if (unlikely(details) && details->nonlinear_vma) {
+ set_nonlinear_pte(ptent, pte,
+ details->nonlinear_vma,
+ tlb->mm, page, addr);
+ }
if (pte_dirty(ptent))
set_page_dirty(page);
if (PageAnon(page))
@@ -1926,6 +1926,7 @@ static int do_file_page(struct mm_struct
unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
{
unsigned long pgoff;
+ pgprot_t pgprot;
int err;
BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage);
@@ -1940,11 +1941,12 @@ static int do_file_page(struct mm_struct
}
pgoff = pte_to_pgoff(*pte);
+ pgprot = pte_to_pgprot(*pte);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
- err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0);
+ err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, pgprot, pgoff, 0);
if (err == -ENOMEM)
return VM_FAULT_OOM;
if (err)
diff -puN mm/rmap.c~rfp-enhance-syscall-and-swapout-code mm/rmap.c
--- linux-2.6.git/mm/rmap.c~rfp-enhance-syscall-and-swapout-code 2005-08-11 22:59:47.000000000 +0200
+++ linux-2.6.git-paolo/mm/rmap.c 2005-08-11 22:59:47.000000000 +0200
@@ -660,8 +660,7 @@ static void try_to_unmap_cluster(unsigne
pteval = ptep_clear_flush(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
- if (page->index != linear_page_index(vma, address))
- set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
+ set_nonlinear_pte(pteval, pte, vma, mm, page, address);
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))
_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/