Re: [PATCH 2/3] xen/enlighten: Expose MWAIT and MWAIT_LEAF ifhypervisor OKs it.

From: Jan Beulich
Date: Fri Feb 24 2012 - 05:32:19 EST


>>> On 23.02.12 at 23:31, Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> wrote:
> For the hypervisor to take advantage of the MWAIT support it needs
> to extract from the ACPI _CST the register address. But the
> hypervisor does not have the support to parse DSDT so it relies on
> the initial domain (dom0) to parse the ACPI Power Management information
> and push it up to the hypervisor. The pushing of the data is done
> by the processor_harveset_xen module which parses the information that
> the ACPI parser has graciously exposed in 'struct acpi_processor'.
>
> For the ACPI parser to also expose the Cx states for MWAIT, we need
> to expose the MWAIT capability (leaf 1). Furthermore we also need to
> expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly
> function.
>
> The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX
> operations, but it can't do it since it needs to be backwards compatible.
> Instead we choose to use the native CPUID to figure out if the MWAIT
> capability exists and use the XEN_SET_PDC query hypercall to figure out
> if the hypervisor wants us to expose the MWAIT_LEAF capability or not.
>
> Note: The XEN_SET_PDC query was implemented in c/s 23783:
> "ACPI: add _PDC input override mechanism".
>
> With this in place, instead of
> C3 ACPI IOPORT 415
> we get now
> C3:ACPI FFH INTEL MWAIT 0x20
>
> Note: The cpu_idle which would be calling the mwait variants for idling
> never gets set b/c we set the default pm_idle to be the hypercall variant.
>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
> ---
> arch/x86/xen/enlighten.c | 92
> +++++++++++++++++++++++++++++++++++++-
> include/xen/interface/platform.h | 4 +-
> 2 files changed, 94 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 12eb07b..4c82936 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -62,6 +62,14 @@
> #include <asm/reboot.h>
> #include <asm/stackprotector.h>
> #include <asm/hypervisor.h>
> +#include <asm/mwait.h>
> +
> +#ifdef CONFIG_ACPI
> +#include <asm/acpi.h>
> +#include <acpi/pdc_intel.h>
> +#include <acpi/processor.h>
> +#include <xen/interface/platform.h>
> +#endif
>
> #include "xen-ops.h"
> #include "mmu.h"
> @@ -200,13 +208,17 @@ static void __init xen_banner(void)
> static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
> static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
>
> +static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
> +static __read_mostly unsigned int cpuid_leaf5_ecx_val;
> +static __read_mostly unsigned int cpuid_leaf5_edx_val;
> +
> static void xen_cpuid(unsigned int *ax, unsigned int *bx,
> unsigned int *cx, unsigned int *dx)
> {
> unsigned maskebx = ~0;
> unsigned maskecx = ~0;
> unsigned maskedx = ~0;
> -
> + unsigned setecx = 0;
> /*
> * Mask out inconvenient features, to try and disable as many
> * unsupported kernel subsystems as possible.
> @@ -214,9 +226,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int
> *bx,
> switch (*ax) {
> case 1:
> maskecx = cpuid_leaf1_ecx_mask;
> + setecx = cpuid_leaf1_ecx_set_mask;
> maskedx = cpuid_leaf1_edx_mask;
> break;
>
> + case CPUID_MWAIT_LEAF:
> + /* Synthesize the values.. */
> + *ax = 0;
> + *bx = 0;
> + *cx = cpuid_leaf5_ecx_val;
> + *dx = cpuid_leaf5_edx_val;
> + return;
> +
> case 0xb:
> /* Suppress extended topology stuff */
> maskebx = 0;
> @@ -232,9 +253,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int
> *bx,
>
> *bx &= maskebx;
> *cx &= maskecx;
> + *cx |= setecx;
> *dx &= maskedx;
> +
> }
>
> +static bool __init xen_check_mwait(void)
> +{
> +#if CONFIG_ACPI

#ifdef

> + struct xen_platform_op op = {
> + .cmd = XENPF_set_processor_pminfo,
> + .u.set_pminfo.id = -1,
> + .u.set_pminfo.type = XEN_PM_PDC,
> + };
> + uint32_t buf[3];
> + unsigned int ax, bx, cx, dx;
> + unsigned int mwait_mask;
> +
> + /* We need to determine whether it is OK to expose the MWAIT
> + * capability to the kernel to harvest deeper than C3 states from ACPI
> + * _CST using the processor_harvest_xen.c module. For this to work, we
> + * need to gather the MWAIT_LEAF values (which the cstate.c code
> + * checks against). The hypervisor won't expose the MWAIT flag because
> + * it would break backwards compatibility; so we will find out directly
> + * from the hardware and hypercall.
> + */
> + if (!xen_initial_domain())
> + return false;
> +
> + ax = 1;
> + cx = 0;
> +
> + native_cpuid(&ax, &bx, &cx, &dx);
> +
> + mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
> + (1 << (X86_FEATURE_MWAIT % 32));
> +
> + if ((cx & mwait_mask) != mwait_mask)
> + return false;
> +
> + /* We need to emulate the MWAIT_LEAF and for that we need both
> + * ecx and edx. The hypercall provides only partial information.
> + */
> +
> + ax = CPUID_MWAIT_LEAF;
> + bx = 0;
> + cx = 0;
> + dx = 0;
> +
> + native_cpuid(&ax, &bx, &cx, &dx);
> +
> + /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
> + * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
> + */
> + buf[0] = ACPI_PDC_REVISION_ID;
> + buf[1] = 1;
> + buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
> +
> + set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
> +
> + if ((HYPERVISOR_dom0_op(&op) == 0) &&
> + (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
> + cpuid_leaf5_ecx_val = cx;
> + cpuid_leaf5_edx_val = dx;
> + }
> + return true;
> +#else
> + return false;
> +#endif
> +}
> static void __init xen_init_cpuid_mask(void)
> {
> unsigned int ax, bx, cx, dx;
> @@ -261,6 +348,9 @@ static void __init xen_init_cpuid_mask(void)
> /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
> if ((cx & xsave_mask) != xsave_mask)
> cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
> +
> + if (xen_check_mwait())
> + cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
> }
>
> static void xen_set_debugreg(int reg, unsigned long val)
> diff --git a/include/xen/interface/platform.h
> b/include/xen/interface/platform.h
> index c168468..6220b98 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
> #define XEN_PM_CX 0
> #define XEN_PM_PX 1
> #define XEN_PM_TX 2
> -
> +#define XEN_PM_PDC 3
> /* Px sub info type */
> #define XEN_PX_PCT 1
> #define XEN_PX_PSS 2
> @@ -286,6 +286,7 @@ struct xen_processor_performance {
> };
> DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
>
> +DEFINE_GUEST_HANDLE(uint32_t);

Do you really need to introduce (step by step) handles for all those
uintNN_t types in a way different from Xen's
(__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t))?

Looks good to me otherwise.

Jan

> struct xenpf_set_processor_pminfo {
> /* IN variables */
> uint32_t id; /* ACPI CPU ID */
> @@ -293,6 +294,7 @@ struct xenpf_set_processor_pminfo {
> union {
> struct xen_processor_power power;/* Cx: _CST/_CSD */
> struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
> + GUEST_HANDLE(uint32_t) pdc;
> };
> };
> DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
> --
> 1.7.9.48.g85da4d


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/