Re: [PATCH v5 6/6] LoongArch: Add pv ipi support on LoongArch system

From: Huacai Chen
Date: Sat Feb 24 2024 - 04:19:29 EST


Hi, Bibo,

On Thu, Feb 22, 2024 at 11:28 AM Bibo Mao <maobibo@xxxxxxxxxxx> wrote:
>
> On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
> register access on ipi sending, and two iocsr access on ipi receiving
> which is ipi interrupt handler. On VM mode all iocsr accessing will
> cause VM to trap into hypervisor. So with one ipi hw notification
> there will be three times of trap.
>
> PV ipi is added for VM, hypercall instruction is used for ipi sender,
> and hypervisor will inject SWI to destination vcpu. During SWI interrupt
> handler, only estat CSR register is written to clear irq. Estat CSR
> register access will not trap into hypervisor. So with pv ipi supported,
> there is one trap with pv ipi sender, and no trap with ipi receiver,
> there is only one trap with ipi notification.
>
> Also this patch adds ipi multicast support, the method is similar with
> x86. With ipi multicast support, ipi notification can be sent to at most
> 128 vcpus at one time. It reduces trap times into hypervisor greatly.
>
> Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
> ---
> arch/loongarch/include/asm/hardirq.h | 1 +
> arch/loongarch/include/asm/kvm_host.h | 1 +
> arch/loongarch/include/asm/kvm_para.h | 123 +++++++++++++++++++++++++
> arch/loongarch/include/asm/loongarch.h | 1 +
> arch/loongarch/kernel/irq.c | 2 +-
> arch/loongarch/kernel/paravirt.c | 112 ++++++++++++++++++++++
> arch/loongarch/kernel/setup.c | 1 +
> arch/loongarch/kernel/smp.c | 2 +-
> arch/loongarch/kvm/exit.c | 73 ++++++++++++++-
> arch/loongarch/kvm/vcpu.c | 1 +
> 10 files changed, 313 insertions(+), 4 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
> index 9f0038e19c7f..b26d596a73aa 100644
> --- a/arch/loongarch/include/asm/hardirq.h
> +++ b/arch/loongarch/include/asm/hardirq.h
> @@ -21,6 +21,7 @@ enum ipi_msg_type {
> typedef struct {
> unsigned int ipi_irqs[NR_IPI];
> unsigned int __softirq_pending;
> + atomic_t message ____cacheline_aligned_in_smp;
> } ____cacheline_aligned irq_cpustat_t;
>
> DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> index 3ba16ef1fe69..0b96c6303cf7 100644
> --- a/arch/loongarch/include/asm/kvm_host.h
> +++ b/arch/loongarch/include/asm/kvm_host.h
> @@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
> u64 idle_exits;
> u64 cpucfg_exits;
> u64 signal_exits;
> + u64 hypercall_exits;
> };
>
> #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
> index af5d677a9052..a82bffbbf8a1 100644
> --- a/arch/loongarch/include/asm/kvm_para.h
> +++ b/arch/loongarch/include/asm/kvm_para.h
> @@ -8,6 +8,9 @@
> #define HYPERVISOR_KVM 1
> #define HYPERVISOR_VENDOR_SHIFT 8
> #define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
> +#define KVM_HCALL_CODE_PV_SERVICE 0
> +#define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE)
> +#define KVM_HCALL_FUNC_PV_IPI 1
>
> /*
> * LoongArch hypercall return code
> @@ -16,6 +19,126 @@
> #define KVM_HCALL_INVALID_CODE -1UL
> #define KVM_HCALL_INVALID_PARAMETER -2UL
>
> +/*
> + * Hypercall interface for KVM hypervisor
> + *
> + * a0: function identifier
> + * a1-a6: args
> + * Return value will be placed in v0.
> + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
> + */
> +static __always_inline long kvm_hypercall(u64 fid)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
> + : "=r" (ret)
> + : "r" (fun)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
> + : "=r" (ret)
> + : "r" (fun), "r" (a1)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall2(u64 fid,
> + unsigned long arg0, unsigned long arg1)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
> + : "=r" (ret)
> + : "r" (fun), "r" (a1), "r" (a2)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall3(u64 fid,
> + unsigned long arg0, unsigned long arg1, unsigned long arg2)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> + register unsigned long a3 asm("a3") = arg2;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
> + : "=r" (ret)
> + : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall4(u64 fid,
> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
> + unsigned long arg3)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> + register unsigned long a3 asm("a3") = arg2;
> + register unsigned long a4 asm("a4") = arg3;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
> + : "=r" (ret)
> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall5(u64 fid,
> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
> + unsigned long arg3, unsigned long arg4)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> + register unsigned long a3 asm("a3") = arg2;
> + register unsigned long a4 asm("a4") = arg3;
> + register unsigned long a5 asm("a5") = arg4;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
> + : "=r" (ret)
> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +
> static inline unsigned int kvm_arch_para_features(void)
> {
> return 0;
> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
> index a1d22e8b6f94..0ad36704cb4b 100644
> --- a/arch/loongarch/include/asm/loongarch.h
> +++ b/arch/loongarch/include/asm/loongarch.h
> @@ -167,6 +167,7 @@
> #define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
> #define KVM_SIGNATURE "KVM\0"
> #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
> +#define KVM_FEATURE_PV_IPI BIT(1)
>
> #ifndef __ASSEMBLY__
>
> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
> index ce36897d1e5a..4863e6c1b739 100644
> --- a/arch/loongarch/kernel/irq.c
> +++ b/arch/loongarch/kernel/irq.c
> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
> per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
> }
>
> - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
> + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
> }
> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
> index 5cf794e8490f..4c30e1c73c72 100644
> --- a/arch/loongarch/kernel/paravirt.c
> +++ b/arch/loongarch/kernel/paravirt.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <linux/export.h>
> #include <linux/types.h>
> +#include <linux/interrupt.h>
> #include <linux/jump_label.h>
> #include <linux/kvm_para.h>
> #include <asm/paravirt.h>
> @@ -16,6 +17,103 @@ static u64 native_steal_clock(int cpu)
>
> DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>
> +#ifdef CONFIG_SMP
> +static void pv_send_ipi_single(int cpu, unsigned int action)
> +{
> + unsigned int min, old;
> + unsigned long bitmap = 0;
> + irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
> +
> + action = BIT(action);
> + old = atomic_fetch_or(action, &info->message);
> + if (old == 0) {
> + min = cpu_logical_map(cpu);
> + bitmap = 1;
> + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, bitmap, 0, min);
> + }
Early return style can make it a little easy, which means:

if (old)
return;

min = ......

> +}
> +
> +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> +{
> + unsigned int cpu, i, min = 0, max = 0, old;
> + __uint128_t bitmap = 0;
> + irq_cpustat_t *info;
> +
> + if (cpumask_empty(mask))
> + return;
> +
> + action = BIT(action);
> + for_each_cpu(i, mask) {
> + info = &per_cpu(irq_stat, i);
> + old = atomic_fetch_or(action, &info->message);
> + if (old)
> + continue;
> +
> + cpu = cpu_logical_map(i);
> + if (!bitmap) {
> + min = max = cpu;
> + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
> + max = cpu > max ? cpu : max;
> + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
> + bitmap <<= min - cpu;
> + min = cpu;
> + } else {
> + /*
> + * Physical cpuid is sorted in ascending order ascend
> + * for the next mask calculation, send IPI here
> + * directly and skip the remainding cpus
> + */
> + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI,
> + (unsigned long)bitmap,
> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
> + min = max = cpu;
> + bitmap = 0;
> + }
> + __set_bit(cpu - min, (unsigned long *)&bitmap);
> + }
> +
> + if (bitmap)
> + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap,
> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
> +}
> +
> +static irqreturn_t loongson_do_swi(int irq, void *dev)
> +{
> + irq_cpustat_t *info;
> + long action;
> +
> + /* Clear swi interrupt */
> + clear_csr_estat(1 << INT_SWI0);
> + info = this_cpu_ptr(&irq_stat);
> + action = atomic_xchg(&info->message, 0);
> + if (action & SMP_CALL_FUNCTION) {
> + generic_smp_call_function_interrupt();
> + info->ipi_irqs[IPI_CALL_FUNCTION]++;
> + }
> +
> + if (action & SMP_RESCHEDULE) {
> + scheduler_ipi();
> + info->ipi_irqs[IPI_RESCHEDULE]++;
> + }
> +
> + return IRQ_HANDLED;
> +}
> +
> +static void pv_init_ipi(void)
> +{
> + int r, swi0;
> +
> + swi0 = get_percpu_irq(INT_SWI0);
> + if (swi0 < 0)
> + panic("SWI0 IRQ mapping failed\n");
> + irq_set_percpu_devid(swi0);
> + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
> + if (r < 0)
> + panic("SWI0 IRQ request failed\n");
> +}
> +#endif
> +
> static bool kvm_para_available(void)
> {
> static int hypervisor_type;
> @@ -32,10 +130,24 @@ static bool kvm_para_available(void)
>
> int __init pv_ipi_init(void)
> {
> + int feature;
> +
> if (!cpu_has_hypervisor)
> return 0;
> if (!kvm_para_available())
> return 0;
>
> + /*
> + * check whether KVM hypervisor supports pv_ipi or not
> + */
> + feature = read_cpucfg(CPUCFG_KVM_FEATURE);
> +#ifdef CONFIG_SMP
> + if (feature & KVM_FEATURE_PV_IPI) {
> + smp_ops.init_ipi = pv_init_ipi;
> + smp_ops.send_ipi_single = pv_send_ipi_single;
> + smp_ops.send_ipi_mask = pv_send_ipi_mask;
> + }
> +#endif
> +
> return 1;
> }
> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> index b79a1244b56f..c95ed3224b7d 100644
> --- a/arch/loongarch/kernel/setup.c
> +++ b/arch/loongarch/kernel/setup.c
> @@ -368,6 +368,7 @@ void __init platform_init(void)
> pr_info("The BIOS Version: %s\n", b_info.bios_version);
>
> efi_runtime_init();
> + pv_ipi_init();
Move the callsite to loongson_smp_setup() is better.

Huacai

> }
>
> static void __init check_kernel_sections_mem(void)
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index 2182e7cc2ed6..9e9fda1fe18a 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
> void loongson_init_secondary(void)
> {
> unsigned int cpu = smp_processor_id();
> - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
> + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
> ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
>
> change_csr_ecfg(ECFG0_IM, imask);
> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> index 6a38fd59d86d..46940e97975b 100644
> --- a/arch/loongarch/kvm/exit.c
> +++ b/arch/loongarch/kvm/exit.c
> @@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
> case CPUCFG_KVM_SIG:
> vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
> break;
> + case CPUCFG_KVM_FEATURE:
> + vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
> + break;
> default:
> vcpu->arch.gprs[rd] = 0;
> break;
> @@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
> return RESUME_GUEST;
> }
>
> +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
> +{
> + unsigned long ipi_bitmap;
> + unsigned int min, cpu, i;
> + struct kvm_vcpu *dest;
> +
> + min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
> + for (i = 0; i < 2; i++) {
> + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
> + if (!ipi_bitmap)
> + continue;
> +
> + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
> + while (cpu < BITS_PER_LONG) {
> + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
> + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
> + cpu + 1);
> + if (!dest)
> + continue;
> +
> + /*
> + * Send SWI0 to dest vcpu to emulate IPI interrupt
> + */
> + kvm_queue_irq(dest, INT_SWI0);
> + kvm_vcpu_kick(dest);
> + }
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * hypercall emulation always return to guest, Caller should check retval.
> + */
> +static void kvm_handle_pv_service(struct kvm_vcpu *vcpu)
> +{
> + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
> + long ret;
> +
> + switch (func) {
> + case KVM_HCALL_FUNC_PV_IPI:
> + kvm_pv_send_ipi(vcpu);
> + ret = KVM_HCALL_STATUS_SUCCESS;
> + break;
> + default:
> + ret = KVM_HCALL_INVALID_CODE;
> + break;
> + };
> +
> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
> +}
> +
> static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
> {
> + larch_inst inst;
> + unsigned int code;
> +
> + inst.word = vcpu->arch.badi;
> + code = inst.reg0i15_format.immediate;
> update_pc(&vcpu->arch);
>
> - /* Treat it as noop intruction, only set return value */
> - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
> + switch (code) {
> + case KVM_HCALL_PV_SERVICE:
> + vcpu->stat.hypercall_exits++;
> + kvm_handle_pv_service(vcpu);
> + break;
> + default:
> + /* Treat it as noop intruction, only set return value */
> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
> + break;
> + }
> +
> return RESUME_GUEST;
> }
>
> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> index 40296d8ef297..24fd5e4647f3 100644
> --- a/arch/loongarch/kvm/vcpu.c
> +++ b/arch/loongarch/kvm/vcpu.c
> @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
> STATS_DESC_COUNTER(VCPU, idle_exits),
> STATS_DESC_COUNTER(VCPU, cpucfg_exits),
> STATS_DESC_COUNTER(VCPU, signal_exits),
> + STATS_DESC_COUNTER(VCPU, hypercall_exits)
> };
>
> const struct kvm_stats_header kvm_vcpu_stats_header = {
> --
> 2.39.3
>
>