Re: [PATCH 3/3] LoongArch: KVM: Set vcpu_is_preempted() macro rather than function

From: Bibo Mao

Date: Tue Mar 10 2026 - 23:51:40 EST




On 2026/3/11 上午11:35, Huacai Chen wrote:
Hi, Bibo,

On Wed, Mar 11, 2026 at 11:21 AM Bibo Mao <maobibo@xxxxxxxxxxx> wrote:

vcpu_is_preempted() is performance sensitive called in function
osq_lock(), here set it as macro. So that parameter is not parsed
at most time, it can avoid cache line thrashing across numa node.
I remember that I had suggested you define vcpu_is_preempted() in the
header file but you refused.
yes, I refuse then, that is a tragedy, I am sorry for that:(

By some benchmarks, vcpu_is_preempted() is called in busy-loop method, more obvious with more CPUs, it may be a bottleneck.

Regards
Bibo Mao


Huacai


Here is part of unixbench result on 3C5000 DualWay machine with 32
Cores and 2 Numa node.
origin with patch CONFIG_PARAVIRT disabled
execl 6871.9 7134.2 7190.8
fstime 425.5 959.9 956.1

From the test result, with macro method it is almost the same with
CONFIG_PARAVIRT disabled, and there is some improvment compared with
function method.

Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
---
arch/loongarch/include/asm/qspinlock.h | 27 +++++++++++++++++++++-----
arch/loongarch/kernel/paravirt.c | 15 ++------------
2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
index 66244801db67..305afd37cdcb 100644
--- a/arch/loongarch/include/asm/qspinlock.h
+++ b/arch/loongarch/include/asm/qspinlock.h
@@ -5,8 +5,10 @@
#include <linux/jump_label.h>

#ifdef CONFIG_PARAVIRT
-
+#include <asm/kvm_para.h>
DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+DECLARE_STATIC_KEY_FALSE(virt_preempt_key);
+DECLARE_PER_CPU(struct kvm_steal_time, steal_time);

#define virt_spin_lock virt_spin_lock

@@ -34,10 +36,25 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
return true;
}

-#define vcpu_is_preempted vcpu_is_preempted
-
-bool vcpu_is_preempted(int cpu);
-
+/*
+ * Macro is better than inline function here
+ * With inline function, parameter cpu is parsed even though it is not used.
+ * This may cause cache line thrashing across NUMA node.
+ * With macro method, parameter cpu is parsed only when it is used.
+ */
+#define vcpu_is_preempted(cpu) \
+({ \
+ bool __val; \
+ \
+ if (!static_branch_unlikely(&virt_preempt_key)) \
+ __val = false; \
+ else { \
+ struct kvm_steal_time *src; \
+ src = &per_cpu(steal_time, cpu); \
+ __val = !!(src->preempted & KVM_VCPU_PREEMPTED); \
+ } \
+ __val; \
+})
#endif /* CONFIG_PARAVIRT */

#include <asm-generic/qspinlock.h>
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index b74fe6db49ab..2d1206e486e2 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -10,8 +10,8 @@
#include <asm/paravirt.h>

static int has_steal_clock;
-static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
-static DEFINE_STATIC_KEY_FALSE(virt_preempt_key);
+DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_STATIC_KEY_FALSE(virt_preempt_key);
DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key);

static bool steal_acc = true;
@@ -261,17 +261,6 @@ static int pv_time_cpu_down_prepare(unsigned int cpu)
return 0;
}

-bool vcpu_is_preempted(int cpu)
-{
- struct kvm_steal_time *src;
-
- if (!static_branch_unlikely(&virt_preempt_key))
- return false;
-
- src = &per_cpu(steal_time, cpu);
- return !!(src->preempted & KVM_VCPU_PREEMPTED);
-}
-EXPORT_SYMBOL(vcpu_is_preempted);
#endif

static void pv_cpu_reboot(void *unused)
--
2.39.3