[PATCH 2/9] xen/x86: allow PVH Dom0 without XEN_PV=y

From: Jan Beulich
Date: Tue Sep 07 2021 - 06:08:24 EST


Decouple XEN_DOM0 from XEN_PV, converting some existing uses of XEN_DOM0
to a new XEN_PV_DOM0. (I'm not convinced all are really / should really
be PV-specific, but for starters I've tried to be conservative.)

For PVH Dom0 the hypervisor populates MADT with only x2APIC entries, so
without x2APIC support enabled in the kernel things aren't going to work
very well. (As opposed, DomU-s would only ever see LAPIC entries in MADT
as of now.) Note that this then requires PVH Dom0 to be 64-bit, as
X86_X2APIC depends on X86_64.

In the course of this xen_running_on_version_or_later() needs to be
available more broadly. Move it from a PV-specific to a generic file,
considering that what it does isn't really PV-specific at all anyway.

Note that xen/interface/version.h cannot be included on its own; in
enlighten.c, which uses SCHEDOP_* anyway, include xen/interface/sched.h
first to resolve the apparently sole missing type (xen_ulong_t).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
For drivers/xen/pci.c it's not clear to me what the intentions are: On
one hand I would think this is needed in PVH as well (especially for
hotplugged devices), yet otoh the hypervisor's hvm_physdev_op() doesn't
let the respective physdev-ops through. As a result with how I have
things, a lot of "Failed to add - passthrough or MSI/MSI-X might fail!"
can be observed.

For arch/x86/xen/vga.c I think it really is needed for PVH Dom0 as well,
except that the needed data doesn't get supplied yet by the hypervisor
afaict.

--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -10,6 +10,8 @@

#include <xen/xen.h>
#include <xen/features.h>
+#include <xen/interface/sched.h>
+#include <xen/interface/version.h>
#include <xen/page.h>

#include <asm/xen/hypercall.h>
@@ -257,6 +259,21 @@ int xen_vcpu_setup(int cpu)
return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0);
}

+/* Check if running on Xen version (major, minor) or later */
+bool xen_running_on_version_or_later(unsigned int major, unsigned int minor)
+{
+ unsigned int version;
+
+ if (!xen_domain())
+ return false;
+
+ version = HYPERVISOR_xen_version(XENVER_version, NULL);
+ if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
+ ((version >> 16) > major))
+ return true;
+ return false;
+}
+
void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -142,22 +142,6 @@ static void __init xen_pv_guest_late_ini
#endif
}

-/* Check if running on Xen version (major, minor) or later */
-bool
-xen_running_on_version_or_later(unsigned int major, unsigned int minor)
-{
- unsigned int version;
-
- if (!xen_domain())
- return false;
-
- version = HYPERVISOR_xen_version(XENVER_version, NULL);
- if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
- ((version >> 16) > major))
- return true;
- return false;
-}
-
static __read_mostly unsigned int cpuid_leaf5_ecx_val;
static __read_mostly unsigned int cpuid_leaf5_edx_val;

--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -14,16 +14,19 @@ static inline int pci_xen_hvm_init(void)
return -1;
}
#endif
-#if defined(CONFIG_XEN_DOM0)
+#ifdef CONFIG_XEN_PV_DOM0
int __init pci_xen_initial_domain(void);
-int xen_find_device_domain_owner(struct pci_dev *dev);
-int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
-int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else
static inline int __init pci_xen_initial_domain(void)
{
return -1;
}
+#endif
+#ifdef CONFIG_XEN_DOM0
+int xen_find_device_domain_owner(struct pci_dev *dev);
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
+int xen_unregister_device_domain_owner(struct pci_dev *dev);
+#else
static inline int xen_find_device_domain_owner(struct pci_dev *dev)
{
return -1;
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -113,7 +113,7 @@ static int acpi_register_gsi_xen_hvm(str
false /* no mapping of GSI to PIRQ */);
}

-#ifdef CONFIG_XEN_DOM0
+#ifdef CONFIG_XEN_PV_DOM0
static int xen_register_gsi(u32 gsi, int triggering, int polarity)
{
int rc, irq;
@@ -261,7 +261,7 @@ error:
return irq;
}

-#ifdef CONFIG_XEN_DOM0
+#ifdef CONFIG_XEN_PV_DOM0
static bool __read_mostly pci_seg_supported = true;

static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
@@ -375,10 +375,10 @@ static void xen_initdom_restore_msi_irqs
WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
}
}
-#else /* CONFIG_XEN_DOM0 */
+#else /* CONFIG_XEN_PV_DOM0 */
#define xen_initdom_setup_msi_irqs NULL
#define xen_initdom_restore_msi_irqs NULL
-#endif /* !CONFIG_XEN_DOM0 */
+#endif /* !CONFIG_XEN_PV_DOM0 */

static void xen_teardown_msi_irqs(struct pci_dev *dev)
{
@@ -555,7 +555,7 @@ int __init pci_xen_hvm_init(void)
return 0;
}

-#ifdef CONFIG_XEN_DOM0
+#ifdef CONFIG_XEN_PV_DOM0
int __init pci_xen_initial_domain(void)
{
int irq;
@@ -583,6 +583,9 @@ int __init pci_xen_initial_domain(void)
}
return 0;
}
+#endif
+
+#ifdef CONFIG_XEN_DOM0

struct xen_device_domain_owner {
domid_t domain;
@@ -656,4 +659,4 @@ int xen_unregister_device_domain_owner(s
return 0;
}
EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
-#endif
+#endif /* CONFIG_XEN_DOM0 */
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -43,13 +43,9 @@ config XEN_PV_SMP
def_bool y
depends on XEN_PV && SMP

-config XEN_DOM0
- bool "Xen PV Dom0 support"
- default y
- depends on XEN_PV && PCI_XEN && SWIOTLB_XEN
- depends on X86_IO_APIC && ACPI && PCI
- help
- Support running as a Xen PV Dom0 guest.
+config XEN_PV_DOM0
+ def_bool y
+ depends on XEN_PV && XEN_DOM0

config XEN_PVHVM
def_bool y
@@ -86,3 +82,12 @@ config XEN_PVH
def_bool n
help
Support for running as a Xen PVH guest.
+
+config XEN_DOM0
+ bool "Xen Dom0 support"
+ default XEN_PV
+ depends on (XEN_PV && SWIOTLB_XEN) || (XEN_PVH && X86_64)
+ depends on X86_IO_APIC && ACPI && PCI
+ select X86_X2APIC if XEN_PVH && X86_64
+ help
+ Support running as a Xen Dom0 guest.
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -45,7 +45,7 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinl

obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o

-obj-$(CONFIG_XEN_DOM0) += vga.o
+obj-$(CONFIG_XEN_PV_DOM0) += vga.o

obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o

--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -109,7 +109,7 @@ static inline void xen_uninit_lock_cpu(i

struct dom0_vga_console_info;

-#ifdef CONFIG_XEN_DOM0
+#ifdef CONFIG_XEN_PV_DOM0
void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
#else
static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -240,7 +240,7 @@ config XEN_PRIVCMD

config XEN_ACPI_PROCESSOR
tristate "Xen ACPI processor"
- depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
+ depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
default m
help
This ACPI processor uploads Power Management information to the Xen
@@ -258,7 +258,7 @@ config XEN_ACPI_PROCESSOR

config XEN_MCE_LOG
bool "Xen platform mcelog"
- depends on XEN_DOM0 && X86_MCE
+ depends on XEN_PV_DOM0 && X86_MCE
help
Allow kernel fetching MCE error from Xen platform and
converting it into Linux mcelog format for mcelog tools