Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G
From: Yinghai Lu
Date: Sat Feb 21 2015 - 15:01:29 EST
On Fri, Feb 20, 2015 at 6:49 PM, Baoquan He <bhe@xxxxxxxxxx> wrote:
> On 02/20/15 at 03:53pm, Yinghai Lu wrote:
> At the beginning I did it just as you said, add IDT table and $PF
> handler. Get page fault address and built ident mapping around it when
> reload kernel above 4G. In this case 3 more pages are enough if kernel
> is put to another 512G and cross the boundary of 512G.
> kernel_ident_mapping_init code can be borrowed and need be adjusted a
> little bit. This works as expected, but a GPF reported and reboot to
> BIOS. That's why I made a simple debug patch as I pasted before to
> filter unnecessary interference.
Please use attached one to instead of the #PF handler in boot stage.
It works when hard-code to move output above 4G.
From: Yinghai Lu <yinghai@xxxxxxxxxx>
Subject: [PATCH] x86, boot: Enable ident_mapping for kasl above 4G for 64bit
split kernel_ident_mapping_init() and call that in boot::misc.c stage.
it will cover new range kernel space that is above 4G.
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>
---
arch/x86/boot/compressed/misc.c | 10 ++++
arch/x86/boot/compressed/misc_pgt.c | 61 +++++++++++++++++++++++++++++
arch/x86/include/asm/page.h | 5 ++
arch/x86/mm/ident_map.c | 74 ++++++++++++++++++++++++++++++++++++
arch/x86/mm/init_64.c | 74 ------------------------------------
5 files changed, 151 insertions(+), 73 deletions(-)
Index: linux-2.6/arch/x86/boot/compressed/misc.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/misc.c
+++ linux-2.6/arch/x86/boot/compressed/misc.c
@@ -9,6 +9,11 @@
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
+#ifdef CONFIG_X86_64
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+#endif
+
#include "misc.h"
#include "../string.h"
@@ -366,6 +371,8 @@ static void parse_elf(void *output)
free(phdrs);
}
+#include "misc_pgt.c"
+
asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
@@ -421,6 +428,9 @@ asmlinkage __visible void *decompress_ke
error("Wrong destination address");
#endif
+ if (output != output_orig)
+ fill_linux64_pagetable((unsigned long)output, output_len);
+
debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
Index: linux-2.6/arch/x86/include/asm/page.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page.h
+++ linux-2.6/arch/x86/include/asm/page.h
@@ -37,7 +37,10 @@ static inline void copy_user_page(void *
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
+#endif
+
#define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x))
/* __pa_symbol should be used for C visible symbols.
This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@ static inline void copy_user_page(void *
#define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
+#ifndef __va
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#endif
#define __boot_va(x) __va(x)
#define __boot_pa(x) __pa(x)
Index: linux-2.6/arch/x86/boot/compressed/misc_pgt.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/boot/compressed/misc_pgt.c
@@ -0,0 +1,61 @@
+
+#ifdef CONFIG_X86_64
+#include <asm/init.h>
+#include <asm/pgtable.h>
+
+#include "../../mm/ident_map.h"
+
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
+ unsigned char *p = (unsigned char *)d->pgt_buf;
+
+ if (d->pgt_buf_offset >= d->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in misc.c\n");
+ return NULL;
+ }
+
+ p += d->pgt_buf_offset;
+ d->pgt_buf_offset += 4096;
+ memset(p, 0, 4096);
+
+ return p;
+}
+
+#define PGT_BUF_SIZE (4096*4)
+
+unsigned long __force_order;
+static unsigned char pgt_buf[PGT_BUF_SIZE] __aligned(4096);
+
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+ struct alloc_pgt_data data = {
+ .pgt_buf = (unsigned char *) pgt_buf,
+ .pgt_buf_size = sizeof(pgt_buf),
+ .pgt_buf_offset = 0,
+ };
+ struct x86_mapping_info mapping_info = {
+ .alloc_pgt_page = alloc_pgt_page,
+ .context = &data,
+ .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ };
+ unsigned long end = start + size;
+ pgd_t *level4p = (pgd_t *)read_cr3();
+
+ /* align boundary to 2M */
+ start = (start >> 21) << 21;
+ end = ((end + (1<<21) - 1) >> 21) << 21;
+ if (start >= (1UL<<32))
+ kernel_ident_mapping_init(&mapping_info, level4p, start, end);
+}
+#else
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+}
+#endif
Index: linux-2.6/arch/x86/mm/ident_map.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/mm/ident_map.c
@@ -0,0 +1,74 @@
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+ unsigned long addr, unsigned long end)
+{
+ addr &= PMD_MASK;
+ for (; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd = pmd_page + pmd_index(addr);
+
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | pmd_flag));
+ }
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ pud_t *pud = pud_page + pud_index(addr);
+ pmd_t *pmd;
+
+ next = (addr & PUD_MASK) + PUD_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, 0);
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ continue;
+ }
+ pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+ if (!pmd)
+ return -ENOMEM;
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ int result;
+ int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+ for (; addr < end; addr = next) {
+ pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+ pud_t *pud;
+
+ next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, 0);
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ continue;
+ }
+
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -56,79 +56,7 @@
#include "mm_internal.h"
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
- unsigned long addr, unsigned long end)
-{
- addr &= PMD_MASK;
- for (; addr < end; addr += PMD_SIZE) {
- pmd_t *pmd = pmd_page + pmd_index(addr);
-
- if (!pmd_present(*pmd))
- set_pmd(pmd, __pmd(addr | pmd_flag));
- }
-}
-static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
-
- for (; addr < end; addr = next) {
- pud_t *pud = pud_page + pud_index(addr);
- pmd_t *pmd;
-
- next = (addr & PUD_MASK) + PUD_SIZE;
- if (next > end)
- next = end;
-
- if (pud_present(*pud)) {
- pmd = pmd_offset(pud, 0);
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- continue;
- }
- pmd = (pmd_t *)info->alloc_pgt_page(info->context);
- if (!pmd)
- return -ENOMEM;
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- }
-
- return 0;
-}
-
-int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- int result;
- int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
-
- for (; addr < end; addr = next) {
- pgd_t *pgd = pgd_page + pgd_index(addr) + off;
- pud_t *pud;
-
- next = (addr & PGDIR_MASK) + PGDIR_SIZE;
- if (next > end)
- next = end;
-
- if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, 0);
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- continue;
- }
-
- pud = (pud_t *)info->alloc_pgt_page(info->context);
- if (!pud)
- return -ENOMEM;
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
- }
-
- return 0;
-}
+#include "ident_map.c"
static int __init parse_direct_gbpages_off(char *arg)
{
From: Yinghai Lu <yinghai@xxxxxxxxxx>
Subject: [PATCH] x86, boot: Enable ident_mapping for kasl above 4G for 64bit
split kernel_ident_mapping_init() and call that in boot::misc.c stage.
it will cover new range kernel space that is above 4G.
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>
---
arch/x86/boot/compressed/misc.c | 10 ++++
arch/x86/boot/compressed/misc_pgt.c | 61 +++++++++++++++++++++++++++++
arch/x86/include/asm/page.h | 5 ++
arch/x86/mm/ident_map.c | 74 ++++++++++++++++++++++++++++++++++++
arch/x86/mm/init_64.c | 74 ------------------------------------
5 files changed, 151 insertions(+), 73 deletions(-)
Index: linux-2.6/arch/x86/boot/compressed/misc.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/misc.c
+++ linux-2.6/arch/x86/boot/compressed/misc.c
@@ -9,6 +9,11 @@
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
+#ifdef CONFIG_X86_64
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+#endif
+
#include "misc.h"
#include "../string.h"
@@ -366,6 +371,8 @@ static void parse_elf(void *output)
free(phdrs);
}
+#include "misc_pgt.c"
+
asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
@@ -421,6 +428,9 @@ asmlinkage __visible void *decompress_ke
error("Wrong destination address");
#endif
+ if (output != output_orig)
+ fill_linux64_pagetable((unsigned long)output, output_len);
+
debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
Index: linux-2.6/arch/x86/include/asm/page.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page.h
+++ linux-2.6/arch/x86/include/asm/page.h
@@ -37,7 +37,10 @@ static inline void copy_user_page(void *
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
+#endif
+
#define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x))
/* __pa_symbol should be used for C visible symbols.
This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@ static inline void copy_user_page(void *
#define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
+#ifndef __va
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#endif
#define __boot_va(x) __va(x)
#define __boot_pa(x) __pa(x)
Index: linux-2.6/arch/x86/boot/compressed/misc_pgt.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/boot/compressed/misc_pgt.c
@@ -0,0 +1,61 @@
+
+#ifdef CONFIG_X86_64
+#include <asm/init.h>
+#include <asm/pgtable.h>
+
+#include "../../mm/ident_map.h"
+
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
+ unsigned char *p = (unsigned char *)d->pgt_buf;
+
+ if (d->pgt_buf_offset >= d->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in misc.c\n");
+ return NULL;
+ }
+
+ p += d->pgt_buf_offset;
+ d->pgt_buf_offset += 4096;
+ memset(p, 0, 4096);
+
+ return p;
+}
+
+#define PGT_BUF_SIZE (4096*4)
+
+unsigned long __force_order;
+static unsigned char pgt_buf[PGT_BUF_SIZE] __aligned(4096);
+
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+ struct alloc_pgt_data data = {
+ .pgt_buf = (unsigned char *) pgt_buf,
+ .pgt_buf_size = sizeof(pgt_buf),
+ .pgt_buf_offset = 0,
+ };
+ struct x86_mapping_info mapping_info = {
+ .alloc_pgt_page = alloc_pgt_page,
+ .context = &data,
+ .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ };
+ unsigned long end = start + size;
+ pgd_t *level4p = (pgd_t *)read_cr3();
+
+ /* align boundary to 2M */
+ start = (start >> 21) << 21;
+ end = ((end + (1<<21) - 1) >> 21) << 21;
+ if (start >= (1UL<<32))
+ kernel_ident_mapping_init(&mapping_info, level4p, start, end);
+}
+#else
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+}
+#endif
Index: linux-2.6/arch/x86/mm/ident_map.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/mm/ident_map.c
@@ -0,0 +1,74 @@
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+ unsigned long addr, unsigned long end)
+{
+ addr &= PMD_MASK;
+ for (; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd = pmd_page + pmd_index(addr);
+
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | pmd_flag));
+ }
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ pud_t *pud = pud_page + pud_index(addr);
+ pmd_t *pmd;
+
+ next = (addr & PUD_MASK) + PUD_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, 0);
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ continue;
+ }
+ pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+ if (!pmd)
+ return -ENOMEM;
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ int result;
+ int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+ for (; addr < end; addr = next) {
+ pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+ pud_t *pud;
+
+ next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, 0);
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ continue;
+ }
+
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -56,79 +56,7 @@
#include "mm_internal.h"
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
- unsigned long addr, unsigned long end)
-{
- addr &= PMD_MASK;
- for (; addr < end; addr += PMD_SIZE) {
- pmd_t *pmd = pmd_page + pmd_index(addr);
-
- if (!pmd_present(*pmd))
- set_pmd(pmd, __pmd(addr | pmd_flag));
- }
-}
-static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
-
- for (; addr < end; addr = next) {
- pud_t *pud = pud_page + pud_index(addr);
- pmd_t *pmd;
-
- next = (addr & PUD_MASK) + PUD_SIZE;
- if (next > end)
- next = end;
-
- if (pud_present(*pud)) {
- pmd = pmd_offset(pud, 0);
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- continue;
- }
- pmd = (pmd_t *)info->alloc_pgt_page(info->context);
- if (!pmd)
- return -ENOMEM;
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- }
-
- return 0;
-}
-
-int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- int result;
- int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
-
- for (; addr < end; addr = next) {
- pgd_t *pgd = pgd_page + pgd_index(addr) + off;
- pud_t *pud;
-
- next = (addr & PGDIR_MASK) + PGDIR_SIZE;
- if (next > end)
- next = end;
-
- if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, 0);
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- continue;
- }
-
- pud = (pud_t *)info->alloc_pgt_page(info->context);
- if (!pud)
- return -ENOMEM;
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
- }
-
- return 0;
-}
+#include "ident_map.c"
static int __init parse_direct_gbpages_off(char *arg)
{