[PATCH v3 1/3] mm/mprotect: Fix soft-dirty check in can_change_pte_writable()

From: Peter Xu
Date: Thu Jul 21 2022 - 14:33:49 EST


The check wanted to make sure when soft-dirty tracking is enabled we won't
grant write bit by accident, as a page fault is needed for dirty tracking.
The intention is correct but we didn't check it right because VM_SOFTDIRTY
set actually means soft-dirty tracking disabled. Fix it.

There's another thing tricky about soft-dirty is that, we can't check the
vma flag !(vma_flags & VM_SOFTDIRTY) directly but only check it after we
checked CONFIG_MEM_SOFT_DIRTY because otherwise VM_SOFTDIRTY will be
defined as zero, and !(vma_flags & VM_SOFTDIRTY) will constantly return
true. To avoid misuse, introduce a helper for checking whether vma has
soft-dirty tracking enabled.

We can easily verify this with any exclusive anonymous page, like program
below:

=======8<======
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <assert.h>
#include <inttypes.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdbool.h>

#define BIT_ULL(nr) (1ULL << (nr))
#define PM_SOFT_DIRTY BIT_ULL(55)

unsigned int psize;
char *page;

uint64_t pagemap_read_vaddr(int fd, void *vaddr)
{
uint64_t value;
int ret;

ret = pread(fd, &value, sizeof(uint64_t),
((uint64_t)vaddr >> 12) * sizeof(uint64_t));
assert(ret == sizeof(uint64_t));

return value;
}

void clear_refs_write(void)
{
int fd = open("/proc/self/clear_refs", O_RDWR);

assert(fd >= 0);
write(fd, "4", 2);
close(fd);
}

#define check_soft_dirty(str, expect) do { \
bool dirty = pagemap_read_vaddr(fd, page) & PM_SOFT_DIRTY; \
if (dirty != expect) { \
printf("ERROR: %s, soft-dirty=%d (expect: %d)\n", str, dirty, expect); \
exit(-1); \
} \
} while (0)

int main(void)
{
int fd = open("/proc/self/pagemap", O_RDONLY);

assert(fd >= 0);
psize = getpagesize();
page = mmap(NULL, psize, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
assert(page != MAP_FAILED);

*page = 1;
check_soft_dirty("Just faulted in page", 1);
clear_refs_write();
check_soft_dirty("Clear_refs written", 0);
mprotect(page, psize, PROT_READ);
check_soft_dirty("Marked RO", 0);
mprotect(page, psize, PROT_READ|PROT_WRITE);
check_soft_dirty("Marked RW", 0);
*page = 2;
check_soft_dirty("Wrote page again", 1);

munmap(page, psize);
close(fd);
printf("Test passed.\n");

return 0;
}
=======8<======

Here we attach a Fixes to commit 64fe24a3e05e only for easy tracking, as
this patch won't apply to a tree before that point. However the commit
wasn't the source of problem, it's just that then anonymous memory will
also suffer from this problem with mprotect().

Fixes: 64fe24a3e05e ("mm/mprotect: try avoiding write faults for exclusive anonymous pages when changing protection")
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
mm/internal.h | 18 ++++++++++++++++++
mm/mmap.c | 2 +-
mm/mprotect.c | 2 +-
3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 15e8cb118832..e2d442e3c0b2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -860,4 +860,22 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);

DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);

+static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
+{
+ /*
+ * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty
+ * enablements, because when without soft-dirty being compiled in,
+ * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
+ * will be constantly true.
+ */
+ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ return false;
+
+ /*
+ * Soft-dirty is kind of special: its tracking is enabled when the
+ * vma flags not set.
+ */
+ return !(vma->vm_flags & VM_SOFTDIRTY);
+}
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index 125e8903c93c..93f9913409ea 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1518,7 +1518,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
return 0;

/* Do we need to track softdirty? */
- if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
+ if (vma_soft_dirty_enabled(vma))
return 1;

/* Specialty mapping? */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 0420c3ed936c..c403e84129d4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -49,7 +49,7 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma,
return false;

/* Do we need write faults for softdirty tracking? */
- if ((vma->vm_flags & VM_SOFTDIRTY) && !pte_soft_dirty(pte))
+ if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
return false;

/* Do we need write faults for uffd-wp tracking? */
--
2.32.0