Re: [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled
From: Baoquan He
Date: Thu Apr 27 2017 - 06:47:21 EST
On 04/27/17 at 06:31pm, Baoquan He wrote:
> Hi Thomas,
>
> Thanks for reviewing!
>
> On 04/26/17 at 07:49am, Thomas Garnier wrote:
> > On Wed, Apr 26, 2017 at 3:43 AM, Baoquan He <bhe@xxxxxxxxxx> wrote:
> > > > arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++--------
> > > > 1 file changed, 27 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> > > > index 2ee7694..2e7baff 100644
> > > > --- a/arch/x86/platform/efi/efi_64.c
> > > > +++ b/arch/x86/platform/efi/efi_64.c
> > > > @@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable)
> > > >
> > > > pgd_t * __init efi_call_phys_prolog(void)
> > > > {
> > > > - unsigned long vaddress;
> > > > + unsigned long vaddr, left_vaddr;
> > > > + unsigned int num_entries;
> > > > pgd_t *save_pgd;
> > > > -
> > > > + pud_t *pud, *pud_k;
> > > > int n_pgds;
> > > > + int i;
> > > >
> > > > if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > > > save_pgd = (pgd_t *)read_cr3();
> > > > @@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void)
> > > > n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
> > > > save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
> > > >
> > > > - for (pgd = 0; pgd < n_pgds; pgd++) {
> > > > - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
> > > > - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
> > > > - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
> > > > + for (i = 0; i < n_pgds; i++) {
> > > > + save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE);
> > > > +
> > > > + vaddr = (unsigned long)__va(i * PGDIR_SIZE);
> > > > + pud = pud_alloc_one(NULL, 0);
> >
> > Please check if pud is NULL.
Or panic if pud_alloc_one failed since kernel won't function well
anyway.
>
> I considered it a while. I didn't check because I thought it's still in
> kernel init stage, and at most 128 page frames are cost for 64TB,
> namely 512KB. If kernel can't give 512KB at this time, it will die soon.
> I would like to hear what people are suggesting. Since you have pointed
> it out, I will add checking here.
>
> However I think we can keep those allocated page and try our best to
> build as much ident mapping as possible. E.g if we have 10TB memory, but
> failed to allocate page for 11th pud table, we can break the for loop,
> leave those built ident mapping there since efi region could be located
> inside those 0~5TB region.
>
> Then inefi_call_phys_epilog() only free these allocated pud tables in
> efi_call_phys_prolog, check and avoid freeing those pud tables from
> direct mapping which still existed because of allocation failure in
> efi_call_phys_prolog.
>
> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> index 2e7baff..67920d4 100644
> --- a/arch/x86/platform/efi/efi_64.c
> +++ b/arch/x86/platform/efi/efi_64.c
> @@ -94,6 +94,11 @@ pgd_t * __init efi_call_phys_prolog(void)
>
> vaddr = (unsigned long)__va(i * PGDIR_SIZE);
> pud = pud_alloc_one(NULL, 0);
> + if (!pud) {
> + pr_err("Failed to allocate page for %d-th pud table "
> + "to build 1:1 mapping!\n", i);
> + break;
> + }
>
> num_entries = PTRS_PER_PUD - pud_index(vaddr);
> pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> @@ -132,8 +137,10 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
>
> for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
> pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> - pud = (pud_t *)pgd_page_vaddr(*pgd);
> - pud_free(NULL, pud);
> + if (*pgd != save_pgd[pgd_idx]) {
> + pud = (pud_t *)pgd_page_vaddr(*pgd);
> + pud_free(NULL, pud);
> + }
> set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
> }
>
>
> >
> > > > +
> > > > + num_entries = PTRS_PER_PUD - pud_index(vaddr);
> > > > + pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> > > > + memcpy(pud, pud_k, num_entries);
> > > > + if (pud_index(vaddr) > 0) {
> >
> > You are using pud_index(vaddr) 3 times, might be worth using a local variable.
>
> Sure, will do, thanks.
>
> >
> > > > + left_vaddr = vaddr + (num_entries * PUD_SIZE);
> > > > + pud_k = pud_offset(pgd_offset_k(left_vaddr),
> > > > + left_vaddr);
> > > > + memcpy(pud + num_entries, pud_k, pud_index(vaddr));
> >
> > I think this section (or the overall for loop) would benefit with a
> > comment explaining explaining why you are shifting the new PUD like
> > this.
>
> Will write a paragraph.
> >
> > > > + }
> > > > + pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud);
> > > > }
> > > > out:
> > > > __flush_tlb_all();
> > > > @@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > > > */
> > > > int pgd_idx;
> > > > int nr_pgds;
> > > > + pud_t *pud;
> > > > + pgd_t *pgd;
> > > >
> > > > if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > > > write_cr3((unsigned long)save_pgd);
> > > > @@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > > >
> > > > nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
> > > >
> > > > - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
> > > > + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
> > > > + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> > > > + pud = (pud_t *)pgd_page_vaddr(*pgd);
> > > > + pud_free(NULL, pud);
> > > > set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
> > > > + }
> > > >
> > > > kfree(save_pgd);
> > > >
> > > > --
> > > > 2.5.5
> > > >
> >
> >
> >
> >
> > --
> > Thomas