[PATCH] mm: take i_mmap_lock in unmap_mapping_range() for DAX

From: Kirill A. Shutemov
Date: Fri Aug 07 2015 - 07:54:25 EST


DAX is not so special: we need i_mmap_lock to protect mapping->i_mmap.

__dax_pmd_fault() uses unmap_mapping_range() shoot out zero page from
all mappings. We need to drop i_mmap_lock there to avoid lock deadlock.

Re-aquiring the lock should be fine since we check i_size after the
point.

Not-yet-signed-off-by: Matthew Wilcox <willy@xxxxxxxxxxxxxxx>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
fs/dax.c | 35 +++++++++++++++++++----------------
mm/memory.c | 11 ++---------
2 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9ef9b80cc132..ed54efedade6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -554,6 +554,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
goto fallback;

+ if (buffer_unwritten(&bh) || buffer_new(&bh)) {
+ int i;
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ clear_page(kaddr + i * PAGE_SIZE);
+ count_vm_event(PGMAJFAULT);
+ mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ result |= VM_FAULT_MAJOR;
+ }
+
+ /*
+ * If we allocated new storage, make sure no process has any
+ * zero pages covering this hole
+ */
+ if (buffer_new(&bh)) {
+ i_mmap_unlock_write(mapping);
+ unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
+ i_mmap_lock_write(mapping);
+ }
+
/*
* If a truncate happened while we were allocating blocks, we may
* leave blocks allocated to the file that are beyond EOF. We can't
@@ -568,13 +587,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((pgoff | PG_PMD_COLOUR) >= size)
goto fallback;

- /*
- * If we allocated new storage, make sure no process has any
- * zero pages covering this hole
- */
- if (buffer_new(&bh))
- unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-
if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
spinlock_t *ptl;
pmd_t entry;
@@ -605,15 +617,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
goto fallback;

- if (buffer_unwritten(&bh) || buffer_new(&bh)) {
- int i;
- for (i = 0; i < PTRS_PER_PMD; i++)
- clear_page(kaddr + i * PAGE_SIZE);
- count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
- result |= VM_FAULT_MAJOR;
- }
-
result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
}

diff --git a/mm/memory.c b/mm/memory.c
index 5a3427bb3f32..670cdfa9f33e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2426,17 +2426,10 @@ void unmap_mapping_range(struct address_space *mapping,
if (details.last_index < details.first_index)
details.last_index = ULONG_MAX;

-
- /*
- * DAX already holds i_mmap_lock to serialise file truncate vs
- * page fault and page fault vs page fault.
- */
- if (!IS_DAX(mapping->host))
- i_mmap_lock_write(mapping);
+ i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
- if (!IS_DAX(mapping->host))
- i_mmap_unlock_write(mapping);
+ i_mmap_unlock_write(mapping);
}
EXPORT_SYMBOL(unmap_mapping_range);

--
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/