Re: 6.6/regression/bisected - after commit a349d72fd9efc87c8fd1d16d3164752d84a7275b system stopped booting

From: Hugh Dickins
Date: Fri Sep 01 2023 - 05:08:31 EST


On Fri, 1 Sep 2023, Mikhail Gavrilov wrote:
> On Fri, Sep 1, 2023 at 12:29 PM Hugh Dickins <hughd@xxxxxxxxxx> wrote:
> >
> >
> > Thanks for all the info, which has helped in several ways. The only
> > thing I can do is to offer you a debug (and then keep running) patch -
> > suitable for the config you showed there, not for anyone else's config.
> >
> > I've never used stackdepot before, but I've tried this out in good and
> > bad cases, and expect it to work for you, shedding light on where is
> > going wrong - machine should boot up fine, and in dmesg you'll find one
> > stacktrace between "WARNING: pte_map..." and "End of pte_map..." lines.
> >
> > To apply on top of a349d72fd9ef ("mm/pgtable: add rcu_read_lock() and
> > rcu_read_unlock()s"), the bad end point of your bisection; but if you
> > prefer, I can provide a version to go on top of whatever later Linus
> > commit suits you.
> >
> > Patch not for general consumption, just for Mike's debugging:
> > please report back the stacktrace shown - thanks!
> >
>
> Thanks for digging into the problem.
> With the attached patch I got FTBFS when build kernel at commit a349d72fd9ef.
>
>
> LD [M] drivers/gpu/drm/amd/amdgpu/amdgpu.o
> MODPOST Module.symvers
> ERROR: modpost: "pte_unmap" [arch/x86/kvm/kvm.ko] undefined!
> ERROR: modpost: "pte_unmap" [drivers/vfio/vfio_iommu_type1.ko] undefined!
> make[2]: *** [scripts/Makefile.modpost:144: Module.symvers] Error 1
> make[1]: *** [/home/mikhail/packaging-work/git/linux/Makefile:1984:
> modpost] Error 2
> make: *** [Makefile:234: __sub-make] Error 2

Sorry about that, please try this instead, adds EXPORT_SYMBOL(pte_unmap).

---
include/linux/pgtable.h | 5 +----
mm/memory.c | 1 +
mm/mremap.c | 1 +
mm/pgtable-generic.c | 41 +++++++++++++++++++++++++++++++++++++++--
4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5134edcec668..131392f1c33e 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -106,10 +106,7 @@ static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
{
return pte_offset_kernel(pmd, address);
}
-static inline void pte_unmap(pte_t *pte)
-{
- rcu_read_unlock();
-}
+void pte_unmap(pte_t *pte);
#endif

/* Find an entry in the second-level page table.. */
diff --git a/mm/memory.c b/mm/memory.c
index 44d11812a88f..b1ee8ab51978 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1033,6 +1033,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
ret = -ENOMEM;
goto out;
}
+ pte_unmap(NULL); /* avoid warning when knowingly nested */
src_pte = pte_offset_map_nolock(src_mm, src_pmd, addr, &src_ptl);
if (!src_pte) {
pte_unmap_unlock(dst_pte, dst_ptl);
diff --git a/mm/mremap.c b/mm/mremap.c
index 11e06e4ab33b..56d981add487 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -175,6 +175,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
err = -EAGAIN;
goto out;
}
+ pte_unmap(NULL); /* avoid warning when knowingly nested */
new_pte = pte_offset_map_nolock(mm, new_pmd, new_addr, &new_ptl);
if (!new_pte) {
pte_unmap_unlock(old_pte, old_ptl);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 400e5a045848..958ee5cf91b1 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -232,11 +232,48 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

+#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
+#include <linux/timekeeping.h>
+
+static depot_stack_handle_t depot_stack;
+
+static void pte_map(void)
+{
+ static bool done = false;
+ unsigned long entries[16];
+ unsigned int nr_entries;
+
+ /* rcu_read_lock(); */
+ if (raw_smp_processor_id() != 0 || done)
+ return;
+ if (depot_stack) {
+ pr_warn("WARNING: pte_map was not pte_unmapped:\n");
+ stack_depot_print(depot_stack);
+ pr_warn("End of pte_map warning.\n");
+ done = true;
+ return;
+ }
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
+ depot_stack = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+ if (ktime_get_seconds() > 1800) /* give up after half an hour */
+ done = true;
+}
+
+void pte_unmap(pte_t *pte)
+{
+ /* rcu_read_unlock(); */
+ if (raw_smp_processor_id() != 0)
+ return;
+ depot_stack = 0;
+}
+EXPORT_SYMBOL(pte_unmap);
+
pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
{
pmd_t pmdval;

- rcu_read_lock();
+ pte_map();
pmdval = pmdp_get_lockless(pmd);
if (pmdvalp)
*pmdvalp = pmdval;
@@ -250,7 +287,7 @@ pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
}
return __pte_map(&pmdval, addr);
nomap:
- rcu_read_unlock();
+ pte_unmap(NULL);
return NULL;
}

--
2.35.3