[PATCH v3 5/5] mm: userfaultfd: add UFFDIO_CONTINUE_MODE_WP to install WP PTEs

From: Axel Rasmussen
Date: Mon Mar 06 2023 - 17:51:10 EST


UFFDIO_COPY already has UFFDIO_COPY_MODE_WP, so when installing a new
PTE to resolve a missing fault, one can install a write-protected one.
This is useful when using UFFDIO_REGISTER_MODE_{MISSING,WP} in
combination.

So, add an analogous UFFDIO_CONTINUE_MODE_WP, which does the same thing
but for *minor* faults.

Update the selftest to do some very basic exercising of the new flag.

Signed-off-by: Axel Rasmussen <axelrasmussen@xxxxxxxxxx>
---
fs/userfaultfd.c | 8 ++++++--
include/linux/userfaultfd_k.h | 2 +-
include/uapi/linux/userfaultfd.h | 7 +++++++
mm/userfaultfd.c | 5 +++--
tools/testing/selftests/mm/userfaultfd.c | 4 ++++
5 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 984b63b0fc75..b5750e20ae00 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1859,6 +1859,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
struct uffdio_continue uffdio_continue;
struct uffdio_continue __user *user_uffdio_continue;
struct uffdio_range range;
+ int flags = 0;

user_uffdio_continue = (struct uffdio_continue __user *)arg;

@@ -1881,12 +1882,15 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
/* double check for wraparound just in case. */
if (range.start + range.len <= range.start)
goto out;
- if (uffdio_continue.mode & ~UFFDIO_CONTINUE_MODE_DONTWAKE)
+ if (uffdio_continue.mode & ~(UFFDIO_CONTINUE_MODE_DONTWAKE |
+ UFFDIO_CONTINUE_MODE_WP))
goto out;
+ if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_WP)
+ flags |= MFILL_ATOMIC_WP;

if (mmget_not_zero(ctx->mm)) {
ret = mfill_atomic_continue(ctx->mm, &range,
- &ctx->mmap_changing);
+ &ctx->mmap_changing, flags);
mmput(ctx->mm);
} else {
return -ESRCH;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index fcd95e3d3dcd..d691f898bae2 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -71,7 +71,7 @@ extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,
atomic_t *mmap_changing);
extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm,
const struct uffdio_range *dst,
- atomic_t *mmap_changing);
+ atomic_t *mmap_changing, int flags);
extern int mwriteprotect_range(struct mm_struct *dst_mm,
const struct uffdio_range *range,
bool enable_wp, atomic_t *mmap_changing);
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 005e5e306266..14059a0861bf 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -297,6 +297,13 @@ struct uffdio_writeprotect {
struct uffdio_continue {
struct uffdio_range range;
#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_CONTINUE_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1)
__u64 mode;

/*
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 870e7489e8d1..6adbfc8dc277 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -691,10 +691,11 @@ ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,

ssize_t mfill_atomic_continue(struct mm_struct *dst_mm,
const struct uffdio_range *dst,
- atomic_t *mmap_changing)
+ atomic_t *mmap_changing,
+ int flags)
{
return mfill_atomic(dst_mm, 0, dst,
- mmap_changing, MFILL_ATOMIC_CONTINUE);
+ mmap_changing, flags | MFILL_ATOMIC_CONTINUE);
}

long uffd_wp_range(struct vm_area_struct *dst_vma,
diff --git a/tools/testing/selftests/mm/userfaultfd.c b/tools/testing/selftests/mm/userfaultfd.c
index 7f22844ed704..41c1f9abc481 100644
--- a/tools/testing/selftests/mm/userfaultfd.c
+++ b/tools/testing/selftests/mm/userfaultfd.c
@@ -585,6 +585,8 @@ static void continue_range(int ufd, __u64 start, __u64 len)
req.range.start = start;
req.range.len = len;
req.mode = 0;
+ if (test_uffdio_wp)
+ req.mode |= UFFDIO_CONTINUE_MODE_WP;

if (ioctl(ufd, UFFDIO_CONTINUE, &req))
err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
@@ -1332,6 +1334,8 @@ static int userfaultfd_minor_test(void)
uffdio_register.range.start = (unsigned long)area_dst_alias;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR;
+ if (test_uffdio_wp)
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
err("register failure");

--
2.40.0.rc0.216.gc4246ad0f0-goog