Re: [PATCH 4/4 V3] Help to dump the old memory encrypted into vmcore file
From: lijiang
Date: Wed Jun 20 2018 - 00:50:26 EST
å 2018å06æ19æ 22:46, lijiang åé:
> å 2018å06æ19æ 11:16, Dave Young åé:
>> On 06/16/18 at 04:27pm, Lianbo Jiang wrote:
>>> In kdump mode, we need to dump the old memory into vmcore file,
>>> if SME is enabled in the first kernel, we must remap the old
>>> memory in encrypted manner, which will be automatically decrypted
>>> when we read from DRAM. It helps to parse the vmcore for some tools.
>>>
>>> Signed-off-by: Lianbo Jiang <lijiang@xxxxxxxxxx>
>>> ---
>>> Some changes:
>>> 1. add a new file and modify Makefile.
>>> 2. remove some code in sev_active().
>>>
>>> arch/x86/kernel/Makefile | 1 +
>>> arch/x86/kernel/crash_dump_encrypt.c | 53 ++++++++++++++++++++++++++++++++++++
>>> fs/proc/vmcore.c | 20 ++++++++++----
>>> include/linux/crash_dump.h | 11 ++++++++
>>> 4 files changed, 79 insertions(+), 6 deletions(-)
>>> create mode 100644 arch/x86/kernel/crash_dump_encrypt.c
>>>
>>> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
>>> index 02d6f5c..afb5bad 100644
>>> --- a/arch/x86/kernel/Makefile
>>> +++ b/arch/x86/kernel/Makefile
>>> @@ -96,6 +96,7 @@ obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o
>>> obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o
>>> obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
>>> obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
>>> +obj-$(CONFIG_AMD_MEM_ENCRYPT) += crash_dump_encrypt.o
>>> obj-y += kprobes/
>>> obj-$(CONFIG_MODULES) += module.o
>>> obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
>>> diff --git a/arch/x86/kernel/crash_dump_encrypt.c b/arch/x86/kernel/crash_dump_encrypt.c
>>> new file mode 100644
>>> index 0000000..e44ef33
>>> --- /dev/null
>>> +++ b/arch/x86/kernel/crash_dump_encrypt.c
>>> @@ -0,0 +1,53 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Memory preserving reboot related code.
>>> + *
>>> + * Created by: Lianbo Jiang (lijiang@xxxxxxxxxx)
>>> + * Copyright (C) RedHat Corporation, 2018. All rights reserved
>>> + */
>>> +
>>> +#include <linux/errno.h>
>>> +#include <linux/crash_dump.h>
>>> +#include <linux/uaccess.h>
>>> +#include <linux/io.h>
>>> +
>>> +/**
>>> + * copy_oldmem_page_encrypted - copy one page from "oldmem encrypted"
>>> + * @pfn: page frame number to be copied
>>> + * @buf: target memory address for the copy; this can be in kernel address
>>> + * space or user address space (see @userbuf)
>>> + * @csize: number of bytes to copy
>>> + * @offset: offset in bytes into the page (based on pfn) to begin the copy
>>> + * @userbuf: if set, @buf is in user address space, use copy_to_user(),
>>> + * otherwise @buf is in kernel address space, use memcpy().
>>> + *
>>> + * Copy a page from "oldmem encrypted". For this page, there is no pte
>>> + * mapped in the current kernel. We stitch up a pte, similar to
>>> + * kmap_atomic.
>>> + */
>>> +
>>> +ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf,
>>> + size_t csize, unsigned long offset, int userbuf)
>>> +{
>>> + void *vaddr;
>>> +
>>> + if (!csize)
>>> + return 0;
>>> +
>>> + vaddr = (__force void *)ioremap_encrypted(pfn << PAGE_SHIFT,
>>> + PAGE_SIZE);
>>> + if (!vaddr)
>>> + return -ENOMEM;
>>> +
>>> + if (userbuf) {
>>> + if (copy_to_user((void __user *)buf, vaddr + offset, csize)) {
>>> + iounmap((void __iomem *)vaddr);
>>> + return -EFAULT;
>>> + }
>>> + } else
>>> + memcpy(buf, vaddr + offset, csize);
>>> +
>>> + set_iounmap_nonlazy();
>>> + iounmap((void __iomem *)vaddr);
>>> + return csize;
>>> +}
>>> diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
>>> index a45f0af..5200266 100644
>>> --- a/fs/proc/vmcore.c
>>> +++ b/fs/proc/vmcore.c
>>> @@ -25,6 +25,8 @@
>>> #include <linux/uaccess.h>
>>> #include <asm/io.h>
>>> #include "internal.h"
>>> +#include <linux/mem_encrypt.h>
>>> +#include <asm/pgtable.h>
>>>
>>> /* List representing chunks of contiguous memory areas and their offsets in
>>> * vmcore file.
>>> @@ -86,7 +88,8 @@ static int pfn_is_ram(unsigned long pfn)
>>>
>>> /* Reads a page from the oldmem device from given offset. */
>>> static ssize_t read_from_oldmem(char *buf, size_t count,
>>> - u64 *ppos, int userbuf)
>>> + u64 *ppos, int userbuf,
>>> + bool encrypted)
>>> {
>>> unsigned long pfn, offset;
>>> size_t nr_bytes;
>>> @@ -108,8 +111,11 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
>>> if (pfn_is_ram(pfn) == 0)
>>> memset(buf, 0, nr_bytes);
>>> else {
>>> - tmp = copy_oldmem_page(pfn, buf, nr_bytes,
>>> - offset, userbuf);
>>> + tmp = encrypted ? copy_oldmem_page_encrypted(pfn,
>>> + buf, nr_bytes, offset, userbuf)
>>> + : copy_oldmem_page(pfn, buf, nr_bytes,
>>> + offset, userbuf);
>>> +
>>> if (tmp < 0)
>>> return tmp;
>>> }
>>> @@ -143,7 +149,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
>>> */
>>> ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
>>> {
>>> - return read_from_oldmem(buf, count, ppos, 0);
>>> + return read_from_oldmem(buf, count, ppos, 0, false);
>>
>> The elf header actually stays in kdump kernel reserved memory so it is
>> not "oldmem", the original function is misleading and doing unnecessary
>> things. But as for your patch maybe using it as is is good for the time
>> being and add a code comment why the encrypted is "false".
>>
> Thank you, Dave. It is a good idea to add some comments for the code.
> I rechecked the code, the elf header should be still the old memory in the first kernel,
> but why is the old memory unencrypted? Because it copies the elf header from the memory
> encrypted(user space) to the memory unencrypted(kernel space) when SME is activated in the
> first kernel, this operation just leads to decryption.
>
I just know your means, we allocated the memory unencrypted(kernel space) in reserved
crash memory, from this point of view, it is not "oldmem". Thanks for your comments.
Thanks.
Lianbo
> Thanks.
> Lianbo
>> /* elfcorehdr stays in kdump kernel memory and it is not encrypted. */
>> return read_from_oldmem(buf, count, ppos, 0, false);
>>
>>
>> I'm thinking to move the function to something like below, still not sure
>> memremap works on every arches or not, still need more test
>>
>> diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
>> index cfb6674331fd..40c01cc42b38 100644
>> --- a/fs/proc/vmcore.c
>> +++ b/fs/proc/vmcore.c
>> @@ -136,6 +136,24 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
>> return read;
>> }
>>
>> +static ssize_t read_from_mem(char *buf, size_t count, u64 *ppos)
>> +{
>> + resource_size_t offset = (resource_size_t)*ppos;
>> + char *kbuf;
>> +
>> + if (!count)
>> + return 0;
>> +
>> + kbuf = memremap(offset, count, MEMREMAP_WB);
>> + if (!kbuf)
>> + return 0;
>> +
>> + memcpy(buf, kbuf, count);
>> + memunmap(kbuf);
>> +
>> + return count;
>> +}
>> +
>> /*
>> * Architectures may override this function to allocate ELF header in 2nd kernel
>> */
>> @@ -155,7 +173,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
>> */
>> ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
>> {
>> - return read_from_oldmem(buf, count, ppos, 0);
>> + return read_from_mem(buf, count, ppos);
>> }
>>
>> /*
>>
>>
>>> }
>>>
>>> /*
>>> @@ -151,7 +157,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
>>> */
>>> ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
>>> {
>>> - return read_from_oldmem(buf, count, ppos, 0);
>>> + return read_from_oldmem(buf, count, ppos, 0, sme_active());
>>> }
>>>
>>> /*
>>> @@ -161,6 +167,7 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
>>> unsigned long from, unsigned long pfn,
>>> unsigned long size, pgprot_t prot)
>>> {
>>> + prot = pgprot_encrypted(prot);
>>> return remap_pfn_range(vma, from, pfn, size, prot);
>>> }
>>>
>>> @@ -235,7 +242,8 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
>>> m->offset + m->size - *fpos,
>>> buflen);
>>> start = m->paddr + *fpos - m->offset;
>>> - tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
>>> + tmp = read_from_oldmem(buffer, tsz, &start, userbuf,
>>> + sme_active());
>>> if (tmp < 0)
>>> return tmp;
>>> buflen -= tsz;
>>> diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
>>> index f7ac2aa..f3414ff 100644
>>> --- a/include/linux/crash_dump.h
>>> +++ b/include/linux/crash_dump.h
>>> @@ -25,6 +25,17 @@ extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
>>>
>>> extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
>>> unsigned long, int);
>>> +#ifdef CONFIG_AMD_MEM_ENCRYPT
>>> +extern ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf,
>>> + size_t csize, unsigned long offset,
>>> + int userbuf);
>>> +#else
>>> +static inline ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf,
>>> + size_t csize, unsigned long offset,
>>> + int userbuf) {
>>
>> Personally I prefer below because it is too long:
>>
>> static inline
>> ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
>> unsigned long offset, int userbuf)
>> {
>> return 0;
>> }
>>
>>
>>> + return csize;
>>
>> As above it should be return 0;
>>
Great, Thank you, Dave.
Lianbo
>>> +}
>>> +#endif
>>> void vmcore_cleanup(void);
>>>
>>> /* Architecture code defines this if there are other possible ELF
>>> --
>>> 2.9.5
>>>
>>>
>>> _______________________________________________
>>> kexec mailing list
>>> kexec@xxxxxxxxxxxxxxxxxxx
>>> http://lists.infradead.org/mailman/listinfo/kexec
>>
>> Thanks
>> Dave
>>