Re: [PATCH v2 2/2] LoongArch: Add steal time support in guest side

From: Huacai Chen
Date: Tue Apr 30 2024 - 04:24:08 EST


Hi, Bibo,

On Tue, Apr 30, 2024 at 9:45 AM Bibo Mao <maobibo@xxxxxxxxxxx> wrote:
>
> Percpu struct kvm_steal_time is added here, its size is 64 bytes and
> also defined as 64 bytes, so that the whole structure is in one physical
> page.
>
> When vcpu is onlined, function pv_enable_steal_time() is called. This
> function will pass guest physical address of struct kvm_steal_time and
> tells hypervisor to enable steal time. When vcpu is offline, physical
> address is set as 0 and tells hypervisor to disable steal time.
>
> Here is output of vmstat on guest when there is workload on both host
> and guest. It includes steal time stat information.
>
> procs -----------memory---------- -----io---- -system-- ------cpu-----
> r b swpd free inact active bi bo in cs us sy id wa st
> 15 1 0 7583616 184112 72208 20 0 162 52 31 6 43 0 20
> 17 0 0 7583616 184704 72192 0 0 6318 6885 5 60 8 5 22
> 16 0 0 7583616 185392 72144 0 0 1766 1081 0 49 0 1 50
> 16 0 0 7583616 184816 72304 0 0 6300 6166 4 62 12 2 20
> 18 0 0 7583632 184480 72240 0 0 2814 1754 2 58 4 1 35
>
> Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
> ---
> arch/loongarch/Kconfig | 11 +++
> arch/loongarch/include/asm/paravirt.h | 5 +
> arch/loongarch/kernel/paravirt.c | 131 ++++++++++++++++++++++++++
> arch/loongarch/kernel/time.c | 2 +
> 4 files changed, 149 insertions(+)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 0a1540a8853e..f3a03c33a052 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -592,6 +592,17 @@ config PARAVIRT
> over full virtualization. However, when run without a hypervisor
> the kernel is theoretically slower and slightly larger.
>
> +config PARAVIRT_TIME_ACCOUNTING
> + bool "Paravirtual steal time accounting"
> + select PARAVIRT
> + help
> + Select this option to enable fine granularity task steal time
> + accounting. Time spent executing other tasks in parallel with
> + the current vCPU is discounted from the vCPU power. To account for
> + that, there can be a small performance impact.
> +
> + If in doubt, say N here.
> +
Can we use a hidden selection manner, which means:

config PARAVIRT_TIME_ACCOUNTING
def_bool PARAVIRT

Because I think we needn't give too many choices to users (and bring
much more effort to test).

PowerPC even hide all the PARAVIRT config options...
see arch/powerpc/platforms/pseries/Kconfig

Huacai

> config ARCH_SUPPORTS_KEXEC
> def_bool y
>
> diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
> index 58f7b7b89f2c..fe27fb5e82b8 100644
> --- a/arch/loongarch/include/asm/paravirt.h
> +++ b/arch/loongarch/include/asm/paravirt.h
> @@ -17,11 +17,16 @@ static inline u64 paravirt_steal_clock(int cpu)
> }
>
> int pv_ipi_init(void);
> +int __init pv_time_init(void);
> #else
> static inline int pv_ipi_init(void)
> {
> return 0;
> }
>
> +static inline int pv_time_init(void)
> +{
> + return 0;
> +}
> #endif // CONFIG_PARAVIRT
> #endif
> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
> index 9044ed62045c..3f83afc7e2d2 100644
> --- a/arch/loongarch/kernel/paravirt.c
> +++ b/arch/loongarch/kernel/paravirt.c
> @@ -5,10 +5,13 @@
> #include <linux/jump_label.h>
> #include <linux/kvm_para.h>
> #include <asm/paravirt.h>
> +#include <linux/reboot.h>
> #include <linux/static_call.h>
>
> struct static_key paravirt_steal_enabled;
> struct static_key paravirt_steal_rq_enabled;
> +static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
> +static int has_steal_clock;
>
> static u64 native_steal_clock(int cpu)
> {
> @@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu)
>
> DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>
> +static bool steal_acc = true;
> +static int __init parse_no_stealacc(char *arg)
> +{
> + steal_acc = false;
> + return 0;
> +}
> +early_param("no-steal-acc", parse_no_stealacc);
> +
> +static u64 para_steal_clock(int cpu)
> +{
> + u64 steal;
> + struct kvm_steal_time *src;
> + int version;
> +
> + src = &per_cpu(steal_time, cpu);
> + do {
> +
> + version = src->version;
> + /* Make sure that the version is read before the steal */
> + virt_rmb();
> + steal = src->steal;
> + /* Make sure that the steal is read before the next version */
> + virt_rmb();
> +
> + } while ((version & 1) || (version != src->version));
> + return steal;
> +}
> +
> +static int pv_enable_steal_time(void)
> +{
> + int cpu = smp_processor_id();
> + struct kvm_steal_time *st;
> + unsigned long addr;
> +
> + if (!has_steal_clock)
> + return -EPERM;
> +
> + st = &per_cpu(steal_time, cpu);
> + addr = per_cpu_ptr_to_phys(st);
> +
> + /* The whole structure kvm_steal_time should be one page */
> + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) {
> + pr_warn("Illegal PV steal time addr %lx\n", addr);
> + return -EFAULT;
> + }
> +
> + addr |= KVM_STEAL_PHYS_VALID;
> + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr);
> + return 0;
> +}
> +
> #ifdef CONFIG_SMP
> static void pv_send_ipi_single(int cpu, unsigned int action)
> {
> @@ -110,6 +164,32 @@ static void pv_init_ipi(void)
> if (r < 0)
> panic("SWI0 IRQ request failed\n");
> }
> +
> +static void pv_disable_steal_time(void)
> +{
> + if (has_steal_clock)
> + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0);
> +}
> +
> +static int pv_time_cpu_online(unsigned int cpu)
> +{
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + pv_enable_steal_time();
> + local_irq_restore(flags);
> + return 0;
> +}
> +
> +static int pv_time_cpu_down_prepare(unsigned int cpu)
> +{
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + pv_disable_steal_time();
> + local_irq_restore(flags);
> + return 0;
> +}
> #endif
>
> static bool kvm_para_available(void)
> @@ -149,3 +229,54 @@ int __init pv_ipi_init(void)
>
> return 1;
> }
> +
> +static void pv_cpu_reboot(void *unused)
> +{
> + pv_disable_steal_time();
> +}
> +
> +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code,
> + void *unused)
> +{
> + on_each_cpu(pv_cpu_reboot, NULL, 1);
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block pv_reboot_nb = {
> + .notifier_call = pv_reboot_notify,
> +};
> +
> +int __init pv_time_init(void)
> +{
> + int feature;
> +
> + if (!cpu_has_hypervisor)
> + return 0;
> + if (!kvm_para_available())
> + return 0;
> +
> + feature = read_cpucfg(CPUCFG_KVM_FEATURE);
> + if (!(feature & KVM_FEATURE_STEAL_TIME))
> + return 0;
> +
> + has_steal_clock = 1;
> + if (pv_enable_steal_time()) {
> + has_steal_clock = 0;
> + return 0;
> + }
> +
> + register_reboot_notifier(&pv_reboot_nb);
> + static_call_update(pv_steal_clock, para_steal_clock);
> + static_key_slow_inc(&paravirt_steal_enabled);
> + if (steal_acc)
> + static_key_slow_inc(&paravirt_steal_rq_enabled);
> +
> +#ifdef CONFIG_SMP
> + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> + "loongarch/pvi_time:online",
> + pv_time_cpu_online, pv_time_cpu_down_prepare) < 0)
> + pr_err("Failed to install cpu hotplug callbacks\n");
> +#endif
> + pr_info("Using stolen time PV\n");
> + return 0;
> +}
> diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
> index fd5354f9be7c..46d7d40c87e3 100644
> --- a/arch/loongarch/kernel/time.c
> +++ b/arch/loongarch/kernel/time.c
> @@ -15,6 +15,7 @@
>
> #include <asm/cpu-features.h>
> #include <asm/loongarch.h>
> +#include <asm/paravirt.h>
> #include <asm/time.h>
>
> u64 cpu_clock_freq;
> @@ -214,4 +215,5 @@ void __init time_init(void)
>
> constant_clockevent_init();
> constant_clocksource_init();
> + pv_time_init();
> }
> --
> 2.39.3
>
>