Re: [PATCH] KVM: Pre-allocate 1 cpumask variable per cpu for both pv tlb and pv ipis

From: Vitaly Kuznetsov
Date: Tue Feb 04 2020 - 07:57:15 EST


Wanpeng Li <kernellwp@xxxxxxxxx> writes:

> From: Wanpeng Li <wanpengli@xxxxxxxxxxx>
>
> Nick Desaulniers Reported:
>
> When building with:
> $ make CC=clang arch/x86/ CFLAGS=-Wframe-larger-than=1000
> The following warning is observed:
> arch/x86/kernel/kvm.c:494:13: warning: stack frame size of 1064 bytes in
> function 'kvm_send_ipi_mask_allbutself' [-Wframe-larger-than=]
> static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int
> vector)
> ^
> Debugging with:
> https://github.com/ClangBuiltLinux/frame-larger-than
> via:
> $ python3 frame_larger_than.py arch/x86/kernel/kvm.o \
> kvm_send_ipi_mask_allbutself
> points to the stack allocated `struct cpumask newmask` in
> `kvm_send_ipi_mask_allbutself`. The size of a `struct cpumask` is
> potentially large, as it's CONFIG_NR_CPUS divided by BITS_PER_LONG for
> the target architecture. CONFIG_NR_CPUS for X86_64 can be as high as
> 8192, making a single instance of a `struct cpumask` 1024 B.
>
> This patch fixes it by pre-allocate 1 cpumask variable per cpu and use it for
> both pv tlb and pv ipis..
>
> Reported-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> Acked-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> ---
> arch/x86/kernel/kvm.c | 33 +++++++++++++++++++++------------
> 1 file changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 81045aab..b1e8efa 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -425,6 +425,8 @@ static void __init sev_map_percpu_data(void)
> }
> }
>
> +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
> +
> #ifdef CONFIG_SMP
> #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
>
> @@ -490,12 +492,12 @@ static void kvm_send_ipi_mask(const struct
> cpumask *mask, int vector)
> static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask,
> int vector)
> {
> unsigned int this_cpu = smp_processor_id();
> - struct cpumask new_mask;
> + struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
> const struct cpumask *local_mask;
>
> - cpumask_copy(&new_mask, mask);
> - cpumask_clear_cpu(this_cpu, &new_mask);
> - local_mask = &new_mask;
> + cpumask_copy(new_mask, mask);
> + cpumask_clear_cpu(this_cpu, new_mask);
> + local_mask = new_mask;
> __send_ipi_mask(local_mask, vector);
> }
>
> @@ -575,7 +577,6 @@ static void __init kvm_apf_trap_init(void)
> update_intr_gate(X86_TRAP_PF, async_page_fault);
> }
>
> -static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
>
> static void kvm_flush_tlb_others(const struct cpumask *cpumask,
> const struct flush_tlb_info *info)
> @@ -583,7 +584,7 @@ static void kvm_flush_tlb_others(const struct
> cpumask *cpumask,
> u8 state;
> int cpu;
> struct kvm_steal_time *src;
> - struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
> + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
>
> cpumask_copy(flushmask, cpumask);
> /*
> @@ -624,6 +625,7 @@ static void __init kvm_guest_init(void)
> kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
> pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
> pv_ops.mmu.tlb_remove_table = tlb_remove_table;
> + pr_info("KVM setup pv remote TLB flush\n");
> }
>
> if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> @@ -732,23 +734,30 @@ static __init int activate_jump_labels(void)
> }
> arch_initcall(activate_jump_labels);
>
> -static __init int kvm_setup_pv_tlb_flush(void)
> +static __init int kvm_alloc_cpumask(void)
> {
> int cpu;
> + bool alloc = false;
>
> if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
> !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
> - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
> + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
> + alloc = true;
> +
> +#if defined(CONFIG_SMP)
> + if (!alloc && kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))

'!alloc' check is superfluous.

> + alloc = true;
> +#endif
> +
> + if (alloc)
> for_each_possible_cpu(cpu) {
> - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
> + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
> GFP_KERNEL, cpu_to_node(cpu));
> }
> - pr_info("KVM setup pv remote TLB flush\n");
> - }
>
> return 0;
> }
> -arch_initcall(kvm_setup_pv_tlb_flush);
> +arch_initcall(kvm_alloc_cpumask);

Honestly, I'd simplify the check in kvm_alloc_cpumask() as

if (!kvm_para_available())
return;

and allocated masks for all other cases.

>
> #ifdef CONFIG_PARAVIRT_SPINLOCKS
>
> --
> 1.8.3.1
>

--
Vitaly