Re: [PATCH v3] x86/gart/kcore: Exclude GART aperture from kcore
From: Kairui Song
Date: Mon Feb 25 2019 - 02:42:31 EST
On Wed, Feb 13, 2019 at 4:28 PM Kairui Song <kasong@xxxxxxxxxx> wrote:
>
> On machines where the GART aperture is mapped over physical RAM,
> /proc/kcore contains the GART aperture range and reading it may lead
> to kernel panic.
>
> In 'commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")',
> a workaround is applied for vmcore to let /proc/vmcore return zeroes
> when attempting to read the GART region, as vmcore have the same issue,
> and after 'commit 707d4eefbdb3 ("Revert "[PATCH] Insert GART region
> into resource map"")', userspace tools won't be able to detect GART
> region so have to avoid it from being reading in kernel.
>
> This patch applies a similar workaround for kcore. Let /proc/kcore
> return zeroes for GART aperture.
>
> Both vmcore and kcore maintain a memory mapping list, in the vmcore
> workaround we exclude the GART region by registering a hook for checking
> if PFN is valid before reading, because vmcore's memory mapping could
> be generated by userspace which doesn't know about GART. But for kcore
> it will be simpler to just alter the memory area list, kcore's area list
> is always generated by kernel on init.
>
> Kcore's memory area list is generated very late so can't exclude the
> overlapped area when GART is initialized, instead simply introduce a
> new area enum type KCORE_NORAM, register GART aperture as KCORE_NORAM
> and let kcore return zeros for all KCORE_NORAM area. This fixes the
> problem well with minor code changes.
>
> ---
> Update from V2:
> Instead of repeating the same hook infrastructure for kcore, introduce
> a new kcore area type to avoid reading from, and let kcore always bypass
> this kind of area.
>
> Update from V1:
> Fix a complie error when CONFIG_PROC_KCORE is not set
>
> arch/x86/kernel/aperture_64.c | 14 ++++++++++++++
> fs/proc/kcore.c | 13 +++++++++++++
> include/linux/kcore.h | 1 +
> 3 files changed, 28 insertions(+)
>
> diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
> index 58176b56354e..5fb04bdd3221 100644
> --- a/arch/x86/kernel/aperture_64.c
> +++ b/arch/x86/kernel/aperture_64.c
> @@ -31,6 +31,7 @@
> #include <asm/amd_nb.h>
> #include <asm/x86_init.h>
> #include <linux/crash_dump.h>
> +#include <linux/kcore.h>
>
> /*
> * Using 512M as goal, in case kexec will load kernel_big
> @@ -84,6 +85,17 @@ static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> }
> #endif
>
> +#ifdef CONFIG_PROC_KCORE
> +static struct kcore_list kcore_gart;
> +
> +static void __init exclude_from_kcore(u64 aper_base, u32 aper_order) {
> + u32 aper_size = (32 * 1024 * 1024) << aper_order;
> + kclist_add(&kcore_gart, __va(aper_base), aper_size, KCORE_NORAM);
> +}
> +#else
> +static inline void __init exclude_from_kcore(u64 aper_base, u32 aper_order) { }
> +#endif
> +
> /* This code runs before the PCI subsystem is initialized, so just
> access the northbridge directly. */
>
> @@ -475,6 +487,7 @@ int __init gart_iommu_hole_init(void)
> * and fixed up the northbridge
> */
> exclude_from_vmcore(last_aper_base, last_aper_order);
> + exclude_from_kcore(last_aper_base, last_aper_order);
>
> return 1;
> }
> @@ -521,6 +534,7 @@ int __init gart_iommu_hole_init(void)
> * range through vmcore even though it should be part of the dump.
> */
> exclude_from_vmcore(aper_alloc, aper_order);
> + exclude_from_kcore(aper_alloc, aper_order);
>
> /* Fix up the north bridges */
> for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index bbcc185062bb..15e0d74d7c56 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -75,6 +75,8 @@ static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
> size = 0;
>
> list_for_each_entry(m, &kclist_head, list) {
> + if (m->type == KCORE_NORAM)
> + continue;
> try = kc_vaddr_to_offset((size_t)m->addr + m->size);
> if (try > size)
> size = try;
> @@ -256,6 +258,9 @@ static int kcore_update_ram(void)
> list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
> if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
> list_move(&pos->list, &garbage);
> + /* Move NORAM area to head of the new list */
> + if (pos->type == KCORE_NORAM)
> + list_move(&pos->list, &list);
> }
> list_splice_tail(&list, &kclist_head);
>
> @@ -356,6 +361,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
>
> phdr = &phdrs[1];
> list_for_each_entry(m, &kclist_head, list) {
> + if (m->type == KCORE_NORAM)
> + continue;
> phdr->p_type = PT_LOAD;
> phdr->p_flags = PF_R | PF_W | PF_X;
> phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
> @@ -465,6 +472,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
> goto out;
> }
> m = NULL; /* skip the list anchor */
> + } else if (m->type == KCORE_NORAM) {
> + /* for NORAM area just fill zero */
> + if (clear_user(buffer, tsz)) {
> + ret = -EFAULT;
> + goto out;
> + }
> } else if (m->type == KCORE_VMALLOC) {
> vread(buf, (char *)start, tsz);
> /* we have to zero-fill user buffer even if no read */
> diff --git a/include/linux/kcore.h b/include/linux/kcore.h
> index 8c3f8c14eeaa..372a4093f794 100644
> --- a/include/linux/kcore.h
> +++ b/include/linux/kcore.h
> @@ -13,6 +13,7 @@ enum kcore_type {
> KCORE_USER,
> KCORE_OTHER,
> KCORE_REMAP,
> + KCORE_NORAM,
> };
>
> struct kcore_list {
> --
> 2.20.1
>
Hi Jiri,
Could you help have a look of this fix too? I saw your reviewed V1,
and this V3 changed the approach a lot. Thanks!
--
Best Regards,
Kairui Song