Re: [tip: x86/mm] x86/mm/pat: Fix VM_PAT handling when fork() fails in copy_page_range()
From: David Hildenbrand
Date: Wed Mar 19 2025 - 06:17:22 EST
On 19.03.25 10:53, Borislav Petkov wrote:
On Wed, Mar 19, 2025 at 09:15:25AM +0100, David Hildenbrand wrote:
@Ingo, can you drop this patch for now? It's supposed to be "!get_pat_info"
here, and I want to re-verify now that a couple of months passed, whether
it's all working as expected with that.
(we could actually complain if get_pat_info() would fail at this point, let
me think about that)
I'll resend once I get to it. Thanks!
That patch is deep into the x86/mm branch. We could
- rebase: not good, especially one week before the merge window
- send a revert: probably better along with an explanation why we're reverting
- do a small fix which disables it ontop
- fix it properly: probably best! :-)
Ahh, the commit id is already supposed to be stable, got it.
I'm currently testing the following as fix, that avoids the second lookup completely.
From 0f42e29d5ba413affa2494f9cbbdf7b5b6b4ae2e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@xxxxxxxxxx>
Date: Fri, 18 Oct 2024 12:44:59 +0200
Subject: [PATCH v1] x86/mm/pat: fix error handling in untrack_pfn_copy()
We perform another get_pat_info() to lookup the PFN, but we
accidentally
Let's fix it by just avoiding another get_pat_info() completely,
simplifying untrack_pfn_copy() to simply call untrack_pfn() with the pfn
obtained through track_pfn_copy().
Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
Closes: https://lkml.kernel.org/r/lore.kernel.org/r/1d5de3d6-999b-47ca-8d43-22703b8442bc@stanley.mountain
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Peter Xu <peterx@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Ma Wupeng <mawupeng1@xxxxxxxxxx>
Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
arch/x86/mm/pat/memtype.c | 32 ++++----------------------------
include/linux/pgtable.h | 23 ++++++++++++++++++-----
mm/memory.c | 6 +++---
3 files changed, 25 insertions(+), 36 deletions(-)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 3a9e6dd58e2f0..dc5c8e6e3001e 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -985,7 +985,7 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
}
int track_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma)
+ struct vm_area_struct *src_vma, unsigned long *pfn)
{
const unsigned long vma_size = src_vma->vm_end - src_vma->vm_start;
resource_size_t paddr;
@@ -1002,36 +1002,12 @@ int track_pfn_copy(struct vm_area_struct *dst_vma,
if (get_pat_info(src_vma, &paddr, &pgprot))
return -EINVAL;
rc = reserve_pfn_range(paddr, vma_size, &pgprot, 1);
- if (!rc)
+ if (!rc) {
/* Reservation for the destination VMA succeeded. */
vm_flags_set(dst_vma, VM_PAT);
- return rc;
-}
-
-void untrack_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma)
-{
- resource_size_t paddr;
- unsigned long size;
-
- if (!(dst_vma->vm_flags & VM_PAT))
- return;
-
- /*
- * As the page tables might not have been copied yet, the PAT
- * information is obtained from the src VMA, just like during
- * track_pfn_copy().
- */
- if (get_pat_info(src_vma, &paddr, NULL)) {
- size = src_vma->vm_end - src_vma->vm_start;
- free_pfn_range(paddr, size);
+ *pfn = PHYS_PFN(paddr);
}
-
- /*
- * Reservation was freed, any copied page tables will get cleaned
- * up later, but without getting PAT involved again.
- */
- vm_flags_clear(dst_vma, VM_PAT);
+ return rc;
}
/*
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index acf387d199d7b..97f8afccfec76 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1509,10 +1509,11 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
/*
* track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
- * tables copied during copy_page_range().
+ * tables copied during copy_page_range(). Returns the pfn to be passed to
+ * untrack_pfn_copy() if anything goes wrong while copying page tables.
*/
static inline int track_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma)
+ struct vm_area_struct *src_vma, unsigned long *pfn)
{
return 0;
}
@@ -1553,14 +1554,26 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
pfn_t pfn);
extern int track_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma);
-extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma);
+ struct vm_area_struct *src_vma, unsigned long *pfn);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked);
extern void untrack_pfn_clear(struct vm_area_struct *vma);
#endif
+/*
+ * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during
+ * copy_page_range(), but after track_pfn_copy() was already called.
+ */
+static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma,
+ unsigned long pfn)
+{
+ untrack_pfn(dst_vma, pfn, dst_vma->vm_end - dst_vma->vm_start, true);
+ /*
+ * Reservation was freed, any copied page tables will get cleaned
+ * up later, but without getting PAT involved again.
+ */
+}
+
#ifdef CONFIG_MMU
#ifdef __HAVE_COLOR_ZERO_PAGE
static inline int is_zero_pfn(unsigned long pfn)
diff --git a/mm/memory.c b/mm/memory.c
index e4b6e599c34d8..dc8efa1358e94 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1362,12 +1362,12 @@ int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
{
pgd_t *src_pgd, *dst_pgd;
- unsigned long next;
unsigned long addr = src_vma->vm_start;
unsigned long end = src_vma->vm_end;
struct mm_struct *dst_mm = dst_vma->vm_mm;
struct mm_struct *src_mm = src_vma->vm_mm;
struct mmu_notifier_range range;
+ unsigned long next, pfn;
bool is_cow;
int ret;
@@ -1378,7 +1378,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
return copy_hugetlb_page_range(dst_mm, src_mm, dst_vma, src_vma);
if (unlikely(src_vma->vm_flags & VM_PFNMAP)) {
- ret = track_pfn_copy(dst_vma, src_vma);
+ ret = track_pfn_copy(dst_vma, src_vma, &pfn);
if (ret)
return ret;
}
@@ -1425,7 +1425,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
mmu_notifier_invalidate_range_end(&range);
}
if (ret && unlikely(src_vma->vm_flags & VM_PFNMAP))
- untrack_pfn_copy(dst_vma, src_vma);
+ untrack_pfn_copy(dst_vma, pfn);
return ret;
}
--
2.48.1
--
Cheers,
David / dhildenb