Re: [PATCH] mm, kasan: don't poison boot memory

From: George Kennedy
Date: Thu Feb 25 2021 - 11:08:02 EST




On 2/25/2021 10:22 AM, George Kennedy wrote:


On 2/25/2021 9:57 AM, Mike Rapoport wrote:
On Thu, Feb 25, 2021 at 07:38:19AM -0500, George Kennedy wrote:
On 2/25/2021 3:53 AM, Mike Rapoport wrote:
Hi George,

On 2/24/2021 5:37 AM, Mike Rapoport wrote:
On Tue, Feb 23, 2021 at 04:46:28PM -0500, George Kennedy wrote:
Mike,

Still no luck.

[   30.193723] iscsi: registered transport (iser)
[   30.195970] iBFT detected.
[   30.196571] BUG: unable to handle page fault for address: ffffffffff240004
Hmm, we cannot set ibft_addr to early pointer to the ACPI table.
Let's try something more disruptive and move the reservation back to
iscsi_ibft_find.c.

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7bdc0239a943..c118dd54a747 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1551,6 +1551,7 @@ void __init acpi_boot_table_init(void)
        if (acpi_disabled)
            return;
+#if 0
        /*
         * Initialize the ACPI boot-time table parser.
         */
@@ -1558,6 +1559,7 @@ void __init acpi_boot_table_init(void)
            disable_acpi();
            return;
        }
+#endif
        acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d883176ef2ce..c615ce96c9a2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -570,16 +570,6 @@ void __init reserve_standard_io_resources(void)
    }
-static __init void reserve_ibft_region(void)
-{
-    unsigned long addr, size = 0;
-
-    addr = find_ibft_region(&size);
-
-    if (size)
-        memblock_reserve(addr, size);
-}
-
    static bool __init snb_gfx_workaround_needed(void)
    {
    #ifdef CONFIG_PCI
@@ -1032,6 +1022,12 @@ void __init setup_arch(char **cmdline_p)
         */
        find_smp_config();
+    /*
+     * Initialize the ACPI boot-time table parser.
+     */
+    if (acpi_table_init())
+        disable_acpi();
+
        reserve_ibft_region();
        early_alloc_pgt_buf();
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 64bb94523281..01be513843d6 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -47,7 +47,25 @@ static const struct {
    #define VGA_MEM 0xA0000 /* VGA buffer */
    #define VGA_SIZE 0x20000 /* 128kB */
-static int __init find_ibft_in_mem(void)
+static void __init *acpi_find_ibft_region(void)
+{
+    int i;
+    struct acpi_table_header *table = NULL;
+    acpi_status status;
+
+    if (acpi_disabled)
+        return NULL;
+
+    for (i = 0; i < ARRAY_SIZE(ibft_signs) && !ibft_addr; i++) {
+        status = acpi_get_table(ibft_signs[i].sign, 0, &table);
+        if (ACPI_SUCCESS(status))
+            return table;
+    }
+
+    return NULL;
+}
+
+static void __init *find_ibft_in_mem(void)
    {
        unsigned long pos;
        unsigned int len = 0;
@@ -70,35 +88,44 @@ static int __init find_ibft_in_mem(void)
                    /* if the length of the table extends past 1M,
                     * the table cannot be valid. */
                    if (pos + len <= (IBFT_END-1)) {
-                    ibft_addr = (struct acpi_table_ibft *)virt;
                        pr_info("iBFT found at 0x%lx.\n", pos);
-                    goto done;
+                    return virt;
                    }
                }
            }
        }
-done:
-    return len;
+
+    return NULL;
    }
+
+static void __init *find_ibft(void)
+{
+    /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
+     * only use ACPI for this */
+    if (!efi_enabled(EFI_BOOT))
+        return find_ibft_in_mem();
+    else
+        return acpi_find_ibft_region();
+}
+
    /*
     * Routine used to find the iSCSI Boot Format Table. The logical
     * kernel address is set in the ibft_addr global variable.
     */
-unsigned long __init find_ibft_region(unsigned long *sizep)
+void __init reserve_ibft_region(void)
    {
-    ibft_addr = NULL;
+    struct acpi_table_ibft *table;
+    unsigned long size;
-    /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
-     * only use ACPI for this */
+    table = find_ibft();
+    if (!table)
+        return;
-    if (!efi_enabled(EFI_BOOT))
-        find_ibft_in_mem();
-
-    if (ibft_addr) {
-        *sizep = PAGE_ALIGN(ibft_addr->header.length);
-        return (u64)virt_to_phys(ibft_addr);
-    }
+    size = PAGE_ALIGN(table->header.length);
+    memblock_reserve(virt_to_phys(table), size);
-    *sizep = 0;
-    return 0;
+    if (efi_enabled(EFI_BOOT))
+        acpi_put_table(&table->header);
+    else
+        ibft_addr = table;
    }
diff --git a/include/linux/iscsi_ibft.h b/include/linux/iscsi_ibft.h
index b7b45ca82bea..da813c891990 100644
--- a/include/linux/iscsi_ibft.h
+++ b/include/linux/iscsi_ibft.h
@@ -26,13 +26,9 @@ extern struct acpi_table_ibft *ibft_addr;
     * mapped address is set in the ibft_addr variable.
     */
    #ifdef CONFIG_ISCSI_IBFT_FIND
-unsigned long find_ibft_region(unsigned long *sizep);
+void reserve_ibft_region(void);
    #else
-static inline unsigned long find_ibft_region(unsigned long *sizep)
-{
-    *sizep = 0;
-    return 0;
-}
+static inline void reserve_ibft_region(void) {}
    #endif
    #endif /* ISCSI_IBFT_H */
Still no luck Mike,

We're back to the original problem where the only thing that worked was to
run "SetPageReserved(page)" before calling "kmap(page)". The page is being
"freed" before ibft_init() is called as a result of the recent buddy page
freeing changes.
I keep missing some little details each time :(
No worries. Thanks for all your help. Does this patch go on top of your
previous patch or is it standalone?
This is standalone.
George
Ok, let's try from the different angle.

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 4b9b329a5a92..ec43e1447336 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -7,6 +7,8 @@
    *
*****************************************************************************/
+#include <linux/memblock.h>
+
   #include <acpi/acpi.h>
   #include "accommon.h"
   #include "actables.h"
@@ -339,6 +341,21 @@ acpi_tb_parse_root_table(acpi_physical_address rsdp_address)
               acpi_tb_parse_fadt();
           }
+        if (ACPI_SUCCESS(status) &&
+ ACPI_COMPARE_NAMESEG(&acpi_gbl_root_table_list.
+                     tables[table_index].signature,
+                     ACPI_SIG_IBFT)) {
+            struct acpi_table_header *ibft;
+            struct acpi_table_desc *desc;
+
+            desc = &acpi_gbl_root_table_list.tables[table_index];
+            status = acpi_tb_get_table(desc, &ibft);
+            if (ACPI_SUCCESS(status)) {
+                memblock_reserve(address, ibft->length);
+                acpi_tb_put_table(desc);
+
+        }
+
   next_table:
           table_entry += table_entry_size;


Applied just your latest patch, but same failure.

I thought there was an earlier comment (which I can't find now) that stated that memblock_reserve() wouldn't reserve the page, which is what's needed here.
Mike,

Here was David's explanation of what he thinks is going on (or should be going on) from a few days ago:

QUOTE...
I assume that acpi_map()/acpi_unmap() map some firmware blob that is provided via firmware/bios/... to us.

should_use_kmap() tells us whether
a) we have a "struct page" and should kmap() that one
b) we don't have a "struct page" and should ioremap.

As it is a blob, the firmware should always reserve that memory region via memblock (e.g., memblock_reserve()), such that we either
1) don't create a memmap ("struct page") at all (-> case b) )
2) if we have to create e memmap, we mark the page PG_reserved and
*never* expose it to the buddy (-> case a) )


Are you telling me that in this case we might have a memmap for the HW blob that is *not* PG_reserved? In that case it most probably got exposed to the buddy where it can happily get allocated/freed.

The latent BUG would be that that blob gets exposed to the system like ordinary RAM, and not reserved via memblock early during boot. Assuming that blob has a low physical address, with my patch it will get allocated/used a lot earlier - which would mean we trigger this latent BUG now more easily.
...END_QUOTE

Your most recent patch has added the memblock_reserve(), but it's still missing the PG_reserved setting.

Thanks,
George


[   30.308229] iBFT detected..
[   30.308796] ==================================================================
[   30.308890] BUG: KASAN: use-after-free in ibft_init+0x134/0xc33
[   30.308890] Read of size 4 at addr ffff8880be453004 by task swapper/0/1
[   30.308890]
[   30.308890] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.11.0-f9593a0 #12
[   30.308890] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
[   30.308890] Call Trace:
[   30.308890]  dump_stack+0xdb/0x120
[   30.308890]  ? ibft_init+0x134/0xc33
[   30.308890]  print_address_description.constprop.7+0x41/0x60
[   30.308890]  ? ibft_init+0x134/0xc33
[   30.308890]  ? ibft_init+0x134/0xc33
[   30.308890]  kasan_report.cold.10+0x78/0xd1
[   30.308890]  ? ibft_init+0x134/0xc33
[   30.308890]  __asan_report_load_n_noabort+0xf/0x20
[   30.308890]  ibft_init+0x134/0xc33
[   30.308890]  ? write_comp_data+0x2f/0x90
[   30.308890]  ? ibft_check_initiator_for+0x159/0x159
[   30.308890]  ? write_comp_data+0x2f/0x90
[   30.308890]  ? ibft_check_initiator_for+0x159/0x159
[   30.308890]  do_one_initcall+0xc4/0x3e0
[   30.308890]  ? perf_trace_initcall_level+0x3e0/0x3e0
[   30.308890]  ? unpoison_range+0x14/0x40
[   30.308890]  ? ____kasan_kmalloc.constprop.5+0x8f/0xc0
[   30.308890]  ? kernel_init_freeable+0x420/0x652
[   30.308890]  ? __kasan_kmalloc+0x9/0x10
[   30.308890]  ? __sanitizer_cov_trace_pc+0x21/0x50
[   30.308890]  kernel_init_freeable+0x596/0x652
[   30.308890]  ? console_on_rootfs+0x7d/0x7d
[   30.308890]  ? __sanitizer_cov_trace_pc+0x21/0x50
[   30.308890]  ? rest_init+0xf0/0xf0
[   30.308890]  kernel_init+0x16/0x1d0
[   30.308890]  ? rest_init+0xf0/0xf0
[   30.308890]  ret_from_fork+0x22/0x30
[   30.308890]
[   30.308890] The buggy address belongs to the page:
[   30.308890] page:0000000001b7b17c refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0xbe453
[   30.308890] flags: 0xfffffc0000000()
[   30.308890] raw: 000fffffc0000000 ffffea0002ef9788 ffffea0002f91488 0000000000000000
[   30.308890] raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
[   30.308890] page dumped because: kasan: bad access detected
[   30.308890] page_owner tracks the page as freed
[   30.308890] page last allocated via order 0, migratetype Movable, gfp_mask 0x100dca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), pid 204, ts 28121288605
[   30.308890]  prep_new_page+0xfb/0x140
[   30.308890]  get_page_from_freelist+0x3503/0x5730
[   30.308890]  __alloc_pages_nodemask+0x2d8/0x650
[   30.308890]  alloc_pages_vma+0xe2/0x560
[   30.308890]  __handle_mm_fault+0x930/0x26c0
[   30.308890]  handle_mm_fault+0x1f9/0x810
[   30.308890]  do_user_addr_fault+0x6f7/0xca0
[   30.308890]  exc_page_fault+0xaf/0x1a0
[   30.308890]  asm_exc_page_fault+0x1e/0x30
[   30.308890] page last free stack trace:
[   30.308890]  free_pcp_prepare+0x122/0x290
[   30.308890]  free_unref_page_list+0xe6/0x490
[   30.308890]  release_pages+0x2ed/0x1270
[   30.308890]  free_pages_and_swap_cache+0x245/0x2e0
[   30.308890]  tlb_flush_mmu+0x11e/0x680
[   30.308890]  tlb_finish_mmu+0xa6/0x3e0
[   30.308890]  exit_mmap+0x2b3/0x540
[   30.308890]  mmput+0x11d/0x450
[   30.308890]  do_exit+0xaa6/0x2d40
[   30.308890]  do_group_exit+0x128/0x340
[   30.308890]  __x64_sys_exit_group+0x43/0x50
[   30.308890]  do_syscall_64+0x37/0x50
[   30.308890]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   30.308890]
[   30.308890] Memory state around the buggy address:
[   30.308890]  ffff8880be452f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   30.308890]  ffff8880be452f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   30.308890] >ffff8880be453000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   30.308890]                    ^
[   30.308890]  ffff8880be453080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   30.308890]  ffff8880be453100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   30.308890] ==================================================================

George