Re: [PATCH 3/3] LoongArch: KVM: Set vcpu_is_preempted() macro rather than function
From: Bibo Mao
Date: Tue Mar 10 2026 - 23:51:40 EST
On 2026/3/11 上午11:35, Huacai Chen wrote:
Hi, Bibo,yes, I refuse then, that is a tragedy, I am sorry for that:(
On Wed, Mar 11, 2026 at 11:21 AM Bibo Mao <maobibo@xxxxxxxxxxx> wrote:
I remember that I had suggested you define vcpu_is_preempted() in the
vcpu_is_preempted() is performance sensitive called in function
osq_lock(), here set it as macro. So that parameter is not parsed
at most time, it can avoid cache line thrashing across numa node.
header file but you refused.
By some benchmarks, vcpu_is_preempted() is called in busy-loop method, more obvious with more CPUs, it may be a bottleneck.
Regards
Bibo Mao
Huacai
Here is part of unixbench result on 3C5000 DualWay machine with 32
Cores and 2 Numa node.
origin with patch CONFIG_PARAVIRT disabled
execl 6871.9 7134.2 7190.8
fstime 425.5 959.9 956.1
From the test result, with macro method it is almost the same with
CONFIG_PARAVIRT disabled, and there is some improvment compared with
function method.
Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
---
arch/loongarch/include/asm/qspinlock.h | 27 +++++++++++++++++++++-----
arch/loongarch/kernel/paravirt.c | 15 ++------------
2 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
index 66244801db67..305afd37cdcb 100644
--- a/arch/loongarch/include/asm/qspinlock.h
+++ b/arch/loongarch/include/asm/qspinlock.h
@@ -5,8 +5,10 @@
#include <linux/jump_label.h>
#ifdef CONFIG_PARAVIRT
-
+#include <asm/kvm_para.h>
DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+DECLARE_STATIC_KEY_FALSE(virt_preempt_key);
+DECLARE_PER_CPU(struct kvm_steal_time, steal_time);
#define virt_spin_lock virt_spin_lock
@@ -34,10 +36,25 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
return true;
}
-#define vcpu_is_preempted vcpu_is_preempted
-
-bool vcpu_is_preempted(int cpu);
-
+/*
+ * Macro is better than inline function here
+ * With inline function, parameter cpu is parsed even though it is not used.
+ * This may cause cache line thrashing across NUMA node.
+ * With macro method, parameter cpu is parsed only when it is used.
+ */
+#define vcpu_is_preempted(cpu) \
+({ \
+ bool __val; \
+ \
+ if (!static_branch_unlikely(&virt_preempt_key)) \
+ __val = false; \
+ else { \
+ struct kvm_steal_time *src; \
+ src = &per_cpu(steal_time, cpu); \
+ __val = !!(src->preempted & KVM_VCPU_PREEMPTED); \
+ } \
+ __val; \
+})
#endif /* CONFIG_PARAVIRT */
#include <asm-generic/qspinlock.h>
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index b74fe6db49ab..2d1206e486e2 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -10,8 +10,8 @@
#include <asm/paravirt.h>
static int has_steal_clock;
-static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
-static DEFINE_STATIC_KEY_FALSE(virt_preempt_key);
+DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_STATIC_KEY_FALSE(virt_preempt_key);
DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key);
static bool steal_acc = true;
@@ -261,17 +261,6 @@ static int pv_time_cpu_down_prepare(unsigned int cpu)
return 0;
}
-bool vcpu_is_preempted(int cpu)
-{
- struct kvm_steal_time *src;
-
- if (!static_branch_unlikely(&virt_preempt_key))
- return false;
-
- src = &per_cpu(steal_time, cpu);
- return !!(src->preempted & KVM_VCPU_PREEMPTED);
-}
-EXPORT_SYMBOL(vcpu_is_preempted);
#endif
static void pv_cpu_reboot(void *unused)
--
2.39.3