Re: [PATCH v6 05/15] mm: add VM_UFFD_RWP VMA flag
From: Lorenzo Stoakes
Date: Wed Jun 03 2026 - 08:58:39 EST
On Fri, May 29, 2026 at 06:26:34PM +0100, Kiryl Shutsemau (Meta) wrote:
> Preparatory patch for userfaultfd read-write protection (RWP). RWP
> extends userfaultfd protection from plain write-protection (WP) to
> full read-write protection: accesses to an RWP-protected range --
> reads as well as writes -- trap through userfaultfd.
>
> Reserve VM_UFFD_RWP, add the userfaultfd_rwp() and
> userfaultfd_protected() helpers, and wire up the smaps "ur" entry and
> the trace-flag table the rest of the series will use. The flag is
> gated on CONFIG_USERFAULTFD_RWP, which is introduced together with the
> UAPI in a later patch; until then VM_UFFD_RWP aliases VM_NONE and
> every downstream check folds to dead code.
>
> Nothing sets or queries the flag yet.
>
> Signed-off-by: Kiryl Shutsemau <kas@xxxxxxxxxx>
> Assisted-by: Claude:claude-opus-4-6
> Reviewed-by: Mike Rapoport (Microsoft) <rppt@xxxxxxxxxx>
> Reviewed-by: SeongJae Park <sj@xxxxxxxxxx>
LGTM, so:
Reviewed-by: Lorenzo Stoakes <ljs@xxxxxxxxxx>
> ---
> Documentation/filesystems/proc.rst | 1 +
> fs/proc/task_mmu.c | 3 +++
> include/linux/mm.h | 41 ++++++++++++++++++++----------
> include/linux/userfaultfd_k.h | 32 +++++++++++++++++++----
> include/trace/events/mmflags.h | 7 +++++
> 5 files changed, 65 insertions(+), 19 deletions(-)
>
> diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
> index db6167befb7b..db28207c5290 100644
> --- a/Documentation/filesystems/proc.rst
> +++ b/Documentation/filesystems/proc.rst
> @@ -607,6 +607,7 @@ encoded manner. The codes are the following:
> um userfaultfd missing tracking
> uw userfaultfd wr-protect tracking
> ui userfaultfd minor fault
> + ur userfaultfd read-write-protect tracking
> ss shadow/guarded control stack page
> sl sealed
> lf lock on fault pages
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 939657aa334a..ca0f69b347e8 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1237,6 +1237,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
> [ilog2(VM_UFFD_MINOR)] = "ui",
> #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
> +#ifdef CONFIG_USERFAULTFD_RWP
> + [ilog2(VM_UFFD_RWP)] = "ur",
> +#endif
> #ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK
> [ilog2(VM_SHADOW_STACK)] = "ss",
> #endif
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 485df9c2dbdd..5ac31fbadeef 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -353,6 +353,7 @@ enum {
> #endif
> DECLARE_VMA_BIT(UFFD_MINOR, 41),
> DECLARE_VMA_BIT(SEALED, 42),
> + DECLARE_VMA_BIT(UFFD_RWP, 43),
> /* Flags that reuse flags above. */
> DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
> DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
> @@ -496,12 +497,17 @@ enum {
> #else
> #define VM_UFFD_MINOR VM_NONE
> #endif
> +#ifdef CONFIG_USERFAULTFD_RWP
> +#define VM_UFFD_RWP INIT_VM_FLAG(UFFD_RWP)
> +#else
> +#define VM_UFFD_RWP VM_NONE
> +#endif
>
> /*
> - * vma_flags_t masks for the userfaultfd VMA flags. VMA_UFFD_MINOR is gated on
> - * the same config as VM_UFFD_MINOR -- which implies 64BIT, where the bit fits
> - * -- so an out-of-range bit is never fed to mk_vma_flags() on a build whose
> - * bitmap cannot hold it.
> + * vma_flags_t masks for the userfaultfd VMA flags. The two high-bit modes are
> + * gated on the same configs as their VM_* flags above -- both of which imply
> + * 64BIT -- so an out-of-range bit is never fed to mk_vma_flags() on a build
> + * whose bitmap cannot hold it.
> */
> #define VMA_UFFD_MISSING mk_vma_flags(VMA_UFFD_MISSING_BIT)
> #define VMA_UFFD_WP mk_vma_flags(VMA_UFFD_WP_BIT)
> @@ -510,6 +516,11 @@ enum {
> #else
> #define VMA_UFFD_MINOR EMPTY_VMA_FLAGS
> #endif
> +#ifdef CONFIG_USERFAULTFD_RWP
> +#define VMA_UFFD_RWP mk_vma_flags(VMA_UFFD_RWP_BIT)
> +#else
> +#define VMA_UFFD_RWP EMPTY_VMA_FLAGS
> +#endif
>
> #ifdef CONFIG_64BIT
> #define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
> @@ -648,22 +659,24 @@ enum {
> * reconsistuted upon page fault, so necessitate page table copying upon fork.
> *
> * Note that these flags should be compared with the DESTINATION VMA not the
> - * source, as VM_UFFD_WP may not be propagated to destination, while all other
> - * flags will be.
> + * source: VM_UFFD_WP and VM_UFFD_RWP may be cleared on the destination
> + * (dup_userfaultfd() -> userfaultfd_reset_ctx() when the parent context did
> + * not negotiate UFFD_FEATURE_EVENT_FORK), while all other flags propagate.
> *
> * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
> * reasonably reconstructed on page fault.
> *
> * VM_UFFD_WP - Encodes metadata about an installed uffd
> - * write protect handler, which cannot be
> - * reconstructed on page fault.
> + * VM_UFFD_RWP write- or read-write-protect handler, which
> + * cannot be reconstructed on page fault.
> *
> - * We always copy pgtables when dst_vma has uffd-wp
> - * enabled even if it's file-backed
> - * (e.g. shmem). Because when uffd-wp is enabled,
> - * pgtable contains uffd-wp protection information,
> - * that's something we can't retrieve from page cache,
> - * and skip copying will lose those info.
> + * We always copy pgtables when dst_vma has the
> + * uffd PTE bit in use even if it's file-backed
> + * (e.g. shmem). Because when the uffd bit is
> + * in use, the pgtable contains the protection
> + * information, that's something we can't
> + * retrieve from page cache, and skip copying
> + * will lose those info.
> *
> * VM_MAYBE_GUARD - Could contain page guard region markers which
> * by design are a property of the page tables
> diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
> index c4f2cc6dfcf0..f3b2db27989b 100644
> --- a/include/linux/userfaultfd_k.h
> +++ b/include/linux/userfaultfd_k.h
> @@ -21,10 +21,11 @@
> #include <linux/hugetlb_inline.h>
>
> /* The set of all possible UFFD-related VM flags. */
> -#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR)
> +#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_MINOR | \
> + VM_UFFD_WP | VM_UFFD_RWP)
>
> #define __VMA_UFFD_FLAGS mk_vma_flags_from_masks(VMA_UFFD_MISSING, VMA_UFFD_WP, \
> - VMA_UFFD_MINOR)
> + VMA_UFFD_MINOR, VMA_UFFD_RWP)
>
> /*
> * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
> @@ -179,7 +180,8 @@ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
> static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma)
> {
> return vma_test_any_mask(vma,
> - mk_vma_flags_from_masks(VMA_UFFD_WP, VMA_UFFD_MINOR));
> + mk_vma_flags_from_masks(VMA_UFFD_WP, VMA_UFFD_MINOR,
> + VMA_UFFD_RWP));
Wonder if a vma_test_any_masks() is worth adding now :) [will do separately though]
> }
>
> /*
> @@ -210,6 +212,16 @@ static inline bool userfaultfd_minor(struct vm_area_struct *vma)
> return vma_test_any_mask(vma, VMA_UFFD_MINOR);
> }
>
> +static inline bool userfaultfd_rwp(struct vm_area_struct *vma)
> +{
> + return vma_test_any_mask(vma, VMA_UFFD_RWP);
NIT: You could use vma_flags_test_single_mask() just to make it clear it's a single
flag. It's not a big deal though really.
> +}
> +
> +static inline bool userfaultfd_protected(struct vm_area_struct *vma)
> +{
> + return userfaultfd_wp(vma) || userfaultfd_rwp(vma);
> +}
> +
> static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
> pte_t pte)
> {
> @@ -330,6 +342,16 @@ static inline bool userfaultfd_minor(struct vm_area_struct *vma)
> return false;
> }
>
> +static inline bool userfaultfd_rwp(struct vm_area_struct *vma)
> +{
> + return false;
> +}
> +
> +static inline bool userfaultfd_protected(struct vm_area_struct *vma)
> +{
> + return false;
> +}
> +
> static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
> pte_t pte)
> {
> @@ -423,8 +445,8 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
> }
>
> /*
> - * Returns true if this is a swap pte and was uffd-wp wr-protected in either
> - * forms (pte marker or a normal swap pte), false otherwise.
> + * Returns true if this swap pte carries uffd-tracked state in either
> + * form (pte marker or a normal swap pte), false otherwise.
> */
> static inline bool pte_swp_uffd_any(pte_t pte)
> {
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index a6e5a44c9b42..bfface3d0203 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -194,6 +194,12 @@ IF_HAVE_PG_ARCH_3(arch_3)
> # define IF_HAVE_UFFD_MINOR(flag, name)
> #endif
>
> +#ifdef CONFIG_USERFAULTFD_RWP
> +# define IF_HAVE_UFFD_RWP(flag, name) {flag, name},
> +#else
> +# define IF_HAVE_UFFD_RWP(flag, name)
> +#endif
> +
> #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
> # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
> #else
> @@ -215,6 +221,7 @@ IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \
> {VM_PFNMAP, "pfnmap" }, \
> {VM_MAYBE_GUARD, "maybe_guard" }, \
> {VM_UFFD_WP, "uffd_wp" }, \
> +IF_HAVE_UFFD_RWP(VM_UFFD_RWP, "uffd_rwp" ) \
> {VM_LOCKED, "locked" }, \
> {VM_IO, "io" }, \
> {VM_SEQ_READ, "seqread" }, \
> --
> 2.54.0
>