Re: [PATCH v8 00/17] gfs2: Fix mmap + page fault deadlocks
From: Andreas Gruenbacher
Date: Wed Oct 27 2021 - 17:23:15 EST
One of the arguments against Dave Hansen's patch that eliminates the
pre-faulting was that it doubles the number of page faults in the slow
case. This can be avoided by using get_user_pages() to do the
"post-faulting", though. For what it's worth, here's a patch for that
(on top of this series).
Andreas
--
fs: Avoid predictable page faults for sys_write() user buffer pages
Introduce a new fault_in_iov_iter_slow_readable() helper for faulting in
an iterator via get_user_pages() instead of triggering page faults.
This is slower than a simple memory read when the underlying pages are
resident, but avoids the page fault overhead when the underlying pages
need to be faulted in.
Use fault_in_iov_iter_slow_readable() in generic_perform_write and
iomap_write_iter when reading from the user buffer fails.
Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx>
---
fs/iomap/buffered-io.c | 2 +-
include/linux/pagemap.h | 3 ++-
include/linux/uio.h | 17 ++++++++++++++++-
lib/iov_iter.c | 10 ++++++----
mm/filemap.c | 2 +-
mm/gup.c | 10 ++++++----
6 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d8809cd9ab31..15a0b4bb9528 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -770,7 +770,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
bytes = copied;
goto again;
}
- if (fault_in_iov_iter_readable(i, bytes) != bytes)
+ if (fault_in_iov_iter_slow_readable(i, bytes) != bytes)
goto again;
status = -EFAULT;
break;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2f7dd14083d9..43844ed5675f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -736,8 +736,9 @@ extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
* Fault in userspace address range.
*/
size_t fault_in_writeable(char __user *uaddr, size_t size);
-size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
size_t fault_in_readable(const char __user *uaddr, size_t size);
+size_t __fault_in_slow(const char __user *uaddr, size_t size,
+ unsigned int flags);
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6350354f97e9..b071f4445059 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/thread_info.h>
#include <uapi/linux/uio.h>
+#include <linux/mm.h>
struct page;
struct pipe_inode_info;
@@ -135,7 +136,21 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
-size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
+size_t __fault_in_iov_iter_slow(const struct iov_iter *i, size_t bytes,
+ unsigned int flags);
+
+static inline size_t fault_in_iov_iter_slow_readable(const struct iov_iter *i,
+ size_t bytes)
+{
+ return __fault_in_iov_iter_slow(i, bytes, 0);
+}
+
+static inline size_t fault_in_iov_iter_writeable(const struct iov_iter *i,
+ size_t bytes)
+{
+ return __fault_in_iov_iter_slow(i, bytes, FOLL_WRITE);
+}
+
size_t iov_iter_single_seg_count(const struct iov_iter *i);
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 66a740e6e153..73789a5409f6 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -468,9 +468,10 @@ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
EXPORT_SYMBOL(fault_in_iov_iter_readable);
/*
- * fault_in_iov_iter_writeable - fault in iov iterator for writing
+ * __fault_in_iov_iter_slow - fault in iov iterator for reading/writing
* @i: iterator
* @size: maximum length
+ * @flags: FOLL_* flags (FOLL_WRITE for writing)
*
* Faults in the iterator using get_user_pages(), i.e., without triggering
* hardware page faults. This is primarily useful when we already know that
@@ -481,7 +482,8 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);
*
* Always returns 0 for non-user-space iterators.
*/
-size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
+size_t __fault_in_iov_iter_slow(const struct iov_iter *i, size_t size,
+ unsigned int flags)
{
if (iter_is_iovec(i)) {
size_t count = min(size, iov_iter_count(i));
@@ -495,7 +497,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
if (unlikely(!len))
continue;
- ret = fault_in_safe_writeable(p->iov_base + skip, len);
+ ret = __fault_in_slow(p->iov_base + skip, len, flags);
count -= len - ret;
if (ret)
break;
@@ -504,7 +506,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
}
return 0;
}
-EXPORT_SYMBOL(fault_in_iov_iter_writeable);
+EXPORT_SYMBOL(__fault_in_iov_iter_slow);
void iov_iter_init(struct iov_iter *i, unsigned int direction,
const struct iovec *iov, unsigned long nr_segs,
diff --git a/mm/filemap.c b/mm/filemap.c
index 467cdb7d086d..7ca76f4aa974 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3787,7 +3787,7 @@ ssize_t generic_perform_write(struct file *file,
bytes = copied;
goto again;
}
- if (fault_in_iov_iter_readable(i, bytes) != bytes)
+ if (fault_in_iov_iter_slow_readable(i, bytes) != bytes)
goto again;
status = -EFAULT;
break;
diff --git a/mm/gup.c b/mm/gup.c
index e1c7e4bde11f..def9f478a621 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1694,9 +1694,10 @@ size_t fault_in_writeable(char __user *uaddr, size_t size)
EXPORT_SYMBOL(fault_in_writeable);
/*
- * fault_in_safe_writeable - fault in an address range for writing
+ * __fault_in_slow - fault in an address range for reading/writing
* @uaddr: start of address range
* @size: length of address range
+ * @flags: FOLL_* flags (FOLL_WRITE for writing)
*
* Faults in an address range using get_user_pages, i.e., without triggering
* hardware page faults. This is primarily useful when we already know that
@@ -1711,7 +1712,8 @@ EXPORT_SYMBOL(fault_in_writeable);
* Returns the number of bytes not faulted in, like copy_to_user() and
* copy_from_user().
*/
-size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
+size_t __fault_in_slow(const char __user *uaddr, size_t size,
+ unsigned int flags)
{
unsigned long start = (unsigned long)untagged_addr(uaddr);
unsigned long end, nstart, nend;
@@ -1743,7 +1745,7 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
nr_pages = (nend - nstart) / PAGE_SIZE;
ret = __get_user_pages_locked(mm, nstart, nr_pages,
NULL, NULL, &locked,
- FOLL_TOUCH | FOLL_WRITE);
+ FOLL_TOUCH | flags);
if (ret <= 0)
break;
nend = nstart + ret * PAGE_SIZE;
@@ -1754,7 +1756,7 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
return 0;
return size - min_t(size_t, nstart - start, size);
}
-EXPORT_SYMBOL(fault_in_safe_writeable);
+EXPORT_SYMBOL(__fault_in_slow);
/**
* fault_in_readable - fault in userspace address range for reading
--
2.26.3