[PATCH v1 04/12] xen/hvmlite: Bootstrap HVMlite guest

From: Boris Ostrovsky
Date: Fri Jan 22 2016 - 16:38:46 EST


Start HVMlite guest XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall
page, initialize boot_params, enable early page tables.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
arch/x86/xen/Makefile | 1 +
arch/x86/xen/enlighten.c | 91 +++++++++++++++++++++++++-
arch/x86/xen/xen-hvmlite.S | 158 ++++++++++++++++++++++++++++++++++++++++++++
include/xen/xen.h | 6 ++
4 files changed, 255 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/xen/xen-hvmlite.S

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index e47e527..1d913d7 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
obj-$(CONFIG_XEN_DOM0) += vga.o
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
obj-$(CONFIG_XEN_EFI) += efi.o
+obj-$(CONFIG_XEN_PVHVM) += xen-hvmlite.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2cf446a..2ed8b2b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -118,7 +118,8 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
*/
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);

-enum xen_domain_type xen_domain_type = XEN_NATIVE;
+enum xen_domain_type xen_domain_type
+ __attribute__((section(".data"))) = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);

unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
@@ -171,6 +172,17 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);

+#ifdef CONFIG_XEN_PVHVM
+/*
+ * HVMlite variables. These need to live in data segment since they are
+ * initialized before startup_{32|64}, which clear .bss, are invoked.
+ */
+int xen_hvmlite __attribute__((section(".data"))) = 0;
+struct hvm_start_info hvmlite_start_info __attribute__((section(".data")));
+uint hvmlite_start_info_sz = sizeof(hvmlite_start_info);
+struct boot_params xen_hvmlite_boot_params __attribute__((section(".data")));
+#endif
+
static void clamp_max_cpus(void)
{
#ifdef CONFIG_SMP
@@ -1736,6 +1748,83 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
}

+#ifdef CONFIG_XEN_PVHVM
+static void __init hvmlite_bootparams(void)
+{
+ struct xen_memory_map memmap;
+ int i;
+
+ memset(&xen_hvmlite_boot_params, 0, sizeof(xen_hvmlite_boot_params));
+
+ memmap.nr_entries = ARRAY_SIZE(xen_hvmlite_boot_params.e820_map);
+ set_xen_guest_handle(memmap.buffer, xen_hvmlite_boot_params.e820_map);
+ if (HYPERVISOR_memory_op(XENMEM_memory_map, &memmap)) {
+ xen_raw_console_write("XENMEM_memory_map failed\n");
+ BUG();
+ }
+
+ xen_hvmlite_boot_params.e820_map[memmap.nr_entries].addr =
+ ISA_START_ADDRESS;
+ xen_hvmlite_boot_params.e820_map[memmap.nr_entries].size =
+ ISA_END_ADDRESS - ISA_START_ADDRESS;
+ xen_hvmlite_boot_params.e820_map[memmap.nr_entries++].type =
+ E820_RESERVED;
+
+ sanitize_e820_map(xen_hvmlite_boot_params.e820_map,
+ ARRAY_SIZE(xen_hvmlite_boot_params.e820_map),
+ &memmap.nr_entries);
+
+ xen_hvmlite_boot_params.e820_entries = memmap.nr_entries;
+ for (i = 0; i < xen_hvmlite_boot_params.e820_entries; i++)
+ e820_add_region(xen_hvmlite_boot_params.e820_map[i].addr,
+ xen_hvmlite_boot_params.e820_map[i].size,
+ xen_hvmlite_boot_params.e820_map[i].type);
+
+ xen_hvmlite_boot_params.hdr.cmd_line_ptr =
+ hvmlite_start_info.cmdline_paddr;
+
+ /* The first module is always ramdisk */
+ if (hvmlite_start_info.nr_modules) {
+ struct hvm_modlist_entry *modaddr =
+ __va(hvmlite_start_info.modlist_paddr);
+ xen_hvmlite_boot_params.hdr.ramdisk_image = modaddr->paddr;
+ xen_hvmlite_boot_params.hdr.ramdisk_size = modaddr->size;
+ }
+
+ /*
+ * See Documentation/x86/boot.txt.
+ *
+ * Version 2.12 supports Xen entry point but we will use default x86/PC
+ * environment (i.e. hardware_subarch 0).
+ */
+ xen_hvmlite_boot_params.hdr.version = 0x212;
+ xen_hvmlite_boot_params.hdr.type_of_loader = 9; /* Xen loader */
+}
+
+/*
+ * This routine (and those that it might call) should not use
+ * anything that lives in .bss since that segment will be cleared later
+ */
+void __init xen_prepare_hvmlite(void)
+{
+ u32 eax, ecx, edx, msr;
+ u64 pfn;
+
+ cpuid(xen_cpuid_base() + 2, &eax, &msr, &ecx, &edx);
+ pfn = __pa(hypercall_page);
+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+ pv_info.name = "Xen HVMlite";
+ xen_domain_type = XEN_HVM_DOMAIN;
+ xen_hvmlite = 1;
+
+ x86_init.oem.arch_setup = xen_init_kernel;
+ x86_init.oem.banner = xen_banner;
+
+ hvmlite_bootparams();
+}
+#endif
+
void __ref xen_hvm_init_shared_info(void)
{
int cpu;
diff --git a/arch/x86/xen/xen-hvmlite.S b/arch/x86/xen/xen-hvmlite.S
new file mode 100644
index 0000000..90f03d0
--- /dev/null
+++ b/arch/x86/xen/xen-hvmlite.S
@@ -0,0 +1,158 @@
+/*
+ * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+ .code32
+ .text
+#define _pa(x) ((x) - __START_KERNEL_map)
+
+#include <linux/elfnote.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/asm.h>
+#include <asm/boot.h>
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <xen/interface/elfnote.h>
+
+ __HEAD
+ .code32
+
+/* Entry point for HVMlite guests */
+ENTRY(hvmlite_start_xen)
+ cli
+ cld
+
+ mov $_pa(gdt), %eax
+ lgdt (%eax)
+
+ movl $(__BOOT_DS),%eax
+ movl %eax,%ds
+ movl %eax,%es
+ movl %eax,%ss
+
+ /* Stash hvm_start_info */
+ mov $_pa(hvmlite_start_info), %edi
+ mov %ebx, %esi
+ mov $_pa(hvmlite_start_info_sz), %ecx
+ mov (%ecx), %ecx
+ rep
+ movsb
+
+ movl $_pa(early_stack_end), %eax
+ movl %eax, %esp
+
+ /* Enable PAE mode */
+ movl %cr4, %eax
+ orl $X86_CR4_PAE, %eax
+ movl %eax, %cr4
+
+#ifdef CONFIG_X86_64
+ /* Enable Long mode */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* Enable pre-constructed page tables */
+ mov $_pa(init_level4_pgt), %eax
+ movl %eax, %cr3
+ movl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movl %eax, %cr0
+
+ /* Jump to 64-bit mode. */
+ pushl $__KERNEL_CS
+ leal _pa(1f), %eax
+ pushl %eax
+ lret
+
+ /* 64-bit entry point */
+ .code64
+1:
+ call xen_prepare_hvmlite
+
+ /* startup_64 expects boot_params in %rsi */
+ mov $_pa(xen_hvmlite_boot_params), %rsi
+ movq $_pa(startup_64), %rax
+ jmp *%rax
+
+#else /* CONFIG_X86_64 */
+
+ /* Use initial_page table and set level 2 to map 2M pages */
+ movl $_pa(initial_pg_pmd), %edi
+ movl $(_PAGE_PSE | _PAGE_RW | _PAGE_PRESENT), %eax
+ movl $2048, %ecx
+2:
+ movl %eax, 0(%edi)
+ addl $0x00200000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 2b
+
+ /* Enable the boot paging */
+ movl $_pa(initial_page_table), %eax
+ movl %eax, %cr3
+ movl %cr0, %eax
+ orl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movl %eax, %cr0
+
+ ljmp $__BOOT_CS,$3f
+3:
+ call xen_prepare_hvmlite
+ mov $_pa(xen_hvmlite_boot_params), %esi
+
+ /* startup_32 doesn't expect paging and PAE to be on */
+ ljmp $__BOOT_CS,$_pa(4f)
+4:
+ movl %cr0, %eax
+ andl $~X86_CR0_PG, %eax
+ movl %eax, %cr0
+ movl %cr4, %eax
+ andl $~X86_CR4_PAE, %eax
+ movl %eax, %cr4
+
+ /* Restore initial_pg_pmd to its original (zero) state */
+ movl $_pa(initial_pg_pmd), %edi
+ xorl %eax, %eax
+ movl $(PAGE_SIZE/4), %ecx
+ rep stosl
+
+ ljmp $0x10, $_pa(startup_32)
+#endif
+
+ .data
+gdt:
+ .word gdt_end - gdt
+ .long _pa(gdt)
+ .word 0
+ .quad 0x0000000000000000 /* NULL descriptor */
+#ifdef CONFIG_X86_64
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+#else
+ .quad 0x00cf9a000000ffff /* __KERNEL_CS */
+#endif
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+gdt_end:
+
+ .bss
+ .balign 4
+early_stack:
+ .fill 16, 1, 0
+early_stack_end:
+
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
+ _ASM_PTR (hvmlite_start_xen - __START_KERNEL_map))
diff --git a/include/xen/xen.h b/include/xen/xen.h
index 0c0e3ef..6a0d3f3 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -29,6 +29,12 @@ extern enum xen_domain_type xen_domain_type;
#define xen_initial_domain() (0)
#endif /* CONFIG_XEN_DOM0 */

+#ifdef CONFIG_XEN_PVHVM
+extern int xen_hvmlite;
+#else
+#define xen_hvmlite (0)
+#endif
+
#ifdef CONFIG_XEN_PVH
/* This functionality exists only for x86. The XEN_PVHVM support exists
* only in x86 world - hence on ARM it will be always disabled.
--
1.7.1