Re: [PATCHv3] x86/boot/KASLR: skip the specified crashkernel region

From: Pingfan Liu
Date: Tue Apr 02 2019 - 23:00:04 EST


On Tue, Apr 2, 2019 at 2:46 PM Baoquan He <bhe@xxxxxxxxxx> wrote:
>
> On 04/02/19 at 12:10pm, Pingfan Liu wrote:
> > crashkernel=x@y or or =range1:size1[,range2:size2,...]@offset option may
> > fail to reserve the required memory region if KASLR puts kernel into the
> > region. To avoid this uncertainty, asking KASLR to skip the required
> > region.
> >
> > Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx>
> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> > Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> > Cc: Borislav Petkov <bp@xxxxxxxxx>
> > Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> > Cc: Baoquan He <bhe@xxxxxxxxxx>
> > Cc: Will Deacon <will.deacon@xxxxxxx>
> > Cc: Nicolas Pitre <nico@xxxxxxxxxx>
> > Cc: Pingfan Liu <kernelfans@xxxxxxxxx>
> > Cc: Chao Fan <fanc.fnst@xxxxxxxxxxxxxx>
> > Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>
> > Cc: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
> > Cc: linux-kernel@xxxxxxxxxxxxxxx
> > ---
> > v2 -> v3: adding parsing of crashkernel=range1:size1[,range2:size2,...]@offset
> >
> > arch/x86/boot/compressed/kaslr.c | 116 ++++++++++++++++++++++++++++++++++++++-
> > 1 file changed, 114 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > index 2e53c05..7f698f4 100644
> > --- a/arch/x86/boot/compressed/kaslr.c
> > +++ b/arch/x86/boot/compressed/kaslr.c
> > @@ -107,6 +107,7 @@ enum mem_avoid_index {
> > MEM_AVOID_BOOTPARAMS,
> > MEM_AVOID_MEMMAP_BEGIN,
> > MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
> > + MEM_AVOID_CRASHKERNEL,
> > MEM_AVOID_MAX,
> > };
> >
> > @@ -238,6 +239,115 @@ static void parse_gb_huge_pages(char *param, char *val)
> > }
> > }
> >
> > +/* code heavily copied from parse_crashkernel_mem() */
> > +static void handle_crashkernel_mem(char *cmdline,
> > + unsigned long long system_ram,
> > + unsigned long long *crash_size,
> > + unsigned long long *crash_base)
>
> This version looks better and the logic is simple. It will be much better
> if we can share code with parse_crashkernel_mem() since both of them look
> almost the same.
>
A little hard, but I will have a try.
> > +{
> > + char *tmp, *cur = cmdline;
> > +
> > + /* for each entry of the comma-separated list */
> > + do {
> > + unsigned long long start, end = ULLONG_MAX, size;
> > +
> > + /* get the start of the range */
> > + start = memparse(cur, &tmp);
> > + /* no value given */
> > + if (cur == tmp)
> > + return;
> > + cur = tmp;
> > + if (*cur != '-')
> > + return;
> > + cur++;
> > +
> > + /* if no ':' is here, than we read the end */
> > + if (*cur != ':') {
> > + end = memparse(cur, &tmp);
> > + /* no value given */
> > + if (cur == tmp)
> > + return;
> > + cur = tmp;
> > + /* invalid if crashkernel end <= start */
> > + if (end <= start)
> > + return;
> > + }
> > + /* expect ":" after range */
> > + if (*cur != ':')
> > + return;
> > + cur++;
> > +
> > + size = memparse(cur, &tmp);
> > + /* no size value given */
> > + if (cur == tmp)
> > + return;
> > + cur = tmp;
> > + if (size >= system_ram)
> > + return;
> > +
> > + /* match ? */
> > + if (system_ram >= start && system_ram < end) {
> > + *crash_size = size;
> > + break;
> > + }
> > + } while (*cur++ == ',');
> > +
> > + if (*crash_size > 0) {
> > + while (*cur && *cur != ' ' && *cur != '@')
> > + cur++;
> > + if (*cur == '@') {
> > + cur++;
> > + *crash_base = memparse(cur, &tmp);
> > + }
> > + }
> > +}
> > +
> > +/* handle crashkernel=x@y or =range1:size1[,range2:size2,...]@offset options */
> > +static void mem_avoid_specified_crashkernel_region(char *option)
>
> Maybe just add more words to explain the specified crashkernel region
> cases, but remove the 'speecified' word in function name?
>
OK.
> > +{
> > + unsigned long long crash_size, crash_base = 0;
> > + char *first_colon, *first_space, *cur = option;
> > +
> > + first_colon = strchr(option, ':');
> > + first_space = strchr(option, ' ');
> > + /* if contain ":" */
> > + if (first_colon && (!first_space || first_colon < first_space)) {
> > + int i;
> > + u64 total_sz = 0;
> > + struct boot_e820_entry *entry;
> > +
> > + for (i = 0; i < boot_params->e820_entries; i++) {
> > + entry = &boot_params->e820_table[i];
> > + /* Skip non-RAM entries. */
> > + if (entry->type != E820_TYPE_RAM)
> > + continue;
> > + total_sz += entry->size;
>
> Wrap this for loop into a static function to calculate the system RAM
> size?
>
OK.

Thanks for your review.

Regards,
Pingfan
> Other than these, I think this adding looks good. It won't impact the
> current handling, and very easy to recognize what it's doing. Thanks for
> the effort.
>
> Thanks
> Baoquan
> > + }
> > + handle_crashkernel_mem(option, total_sz, &crash_size,
> > + &crash_base);
> > + } else {
> > + crash_size = memparse(option, &cur);
> > + if (option == cur)
> > + return;
> > + while (*cur && *cur != ' ' && *cur != '@')
> > + cur++;
> > + if (*cur == '@') {
> > + option = cur + 1;
> > + crash_base = memparse(option, &cur);
> > + }
> > + }
> > + if (crash_base) {
> > + mem_avoid[MEM_AVOID_CRASHKERNEL].start = crash_base;
> > + mem_avoid[MEM_AVOID_CRASHKERNEL].size = crash_size;
> > + } else {
> > + /*
> > + * Clearing mem_avoid if no offset is given. This is consistent
> > + * with kernel, which uses the last crashkernel= option.
> > + */
> > + mem_avoid[MEM_AVOID_CRASHKERNEL].start = 0;
> > + mem_avoid[MEM_AVOID_CRASHKERNEL].size = 0;
> > + }
> > +}
> >
> > static void handle_mem_options(void)
> > {
> > @@ -248,7 +358,7 @@ static void handle_mem_options(void)
> > u64 mem_size;
> >
> > if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
> > - !strstr(args, "hugepages"))
> > + !strstr(args, "hugepages") && !strstr(args, "crashkernel="))
> > return;
> >
> > tmp_cmdline = malloc(len + 1);
> > @@ -284,6 +394,8 @@ static void handle_mem_options(void)
> > goto out;
> >
> > mem_limit = mem_size;
> > + } else if (strstr(param, "crashkernel")) {
> > + mem_avoid_specified_crashkernel_region(val);
> > }
> > }
> >
> > @@ -412,7 +524,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
> >
> > /* We don't need to set a mapping for setup_data. */
> >
> > - /* Mark the memmap regions we need to avoid */
> > + /* Mark the regions we need to avoid */
> > handle_mem_options();
> >
> > /* Enumerate the immovable memory regions */
> > --
> > 2.7.4
> >