Re: [PATCH v2 2/4] arm64/fpsimd: Discover maximum vector length implemented by any CPU
From: Fuad Tabba
Date: Fri Jun 07 2024 - 03:34:40 EST
Hi Mark,
On Thu, Jun 6, 2024 at 4:32 PM Mark Brown <broonie@xxxxxxxxxx> wrote:
>
> When discovering the vector lengths for SVE and SME we do not currently
> record the maximum VL supported on any individual CPU. This is expected
> to be the same for all CPUs but the architecture allows asymmetry, if we
> do encounter an asymmetric system then some CPUs may support VLs higher
> than the maximum Linux will use. Since the pKVM hypervisor needs to
> support saving and restoring anything the host can physically set it
> needs to know the maximum value any CPU could have, add support for
> enumerating it and validation for late CPUs.
>
> Signed-off-by: Mark Brown <broonie@xxxxxxxxxx>
> ---
> arch/arm64/include/asm/fpsimd.h | 13 +++++++++++++
> arch/arm64/kernel/fpsimd.c | 26 +++++++++++++++++++++++++-
> 2 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 51c21265b4fa..cd19713c9deb 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -188,6 +188,9 @@ struct vl_info {
> int max_vl;
> int max_virtualisable_vl;
>
> + /* Maximum vector length observed on any CPU */
> + int max_cpu_vl;
> +
> /*
> * Set of available vector lengths,
> * where length vq encoded as bit __vq_to_bit(vq):
> @@ -278,6 +281,11 @@ static inline int vec_max_virtualisable_vl(enum vec_type type)
> return vl_info[type].max_virtualisable_vl;
> }
>
> +static inline int vec_max_cpu_vl(enum vec_type type)
> +{
> + return vl_info[type].max_cpu_vl;
> +}
> +
> static inline int sve_max_vl(void)
> {
> return vec_max_vl(ARM64_VEC_SVE);
> @@ -288,6 +296,11 @@ static inline int sve_max_virtualisable_vl(void)
> return vec_max_virtualisable_vl(ARM64_VEC_SVE);
> }
>
> +static inline int sve_max_cpu_vl(void)
> +{
> + return vec_max_cpu_vl(ARM64_VEC_SVE);
> +}
> +
> /* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
> static inline bool vq_available(enum vec_type type, unsigned int vq)
> {
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 22542fb81812..ee6fb8c4b16d 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -129,6 +129,7 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
> .min_vl = SVE_VL_MIN,
> .max_vl = SVE_VL_MIN,
> .max_virtualisable_vl = SVE_VL_MIN,
> + .max_cpu_vl = SVE_VL_MIN,
> },
> #endif
> #ifdef CONFIG_ARM64_SME
> @@ -1041,8 +1042,13 @@ static void vec_probe_vqs(struct vl_info *info,
> void __init vec_init_vq_map(enum vec_type type)
> {
> struct vl_info *info = &vl_info[type];
> + unsigned long b;
> +
> vec_probe_vqs(info, info->vq_map);
> bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
> +
> + b = find_first_bit(info->vq_map, SVE_VQ_MAX);
> + info->max_cpu_vl = __bit_to_vl(b);
> }
>
> /*
> @@ -1054,11 +1060,16 @@ void vec_update_vq_map(enum vec_type type)
> {
> struct vl_info *info = &vl_info[type];
> DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> + unsigned long b;
>
> vec_probe_vqs(info, tmp_map);
> bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
> bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
> SVE_VQ_MAX);
> +
> + b = find_first_bit(tmp_map, SVE_VQ_MAX);
> + if (__bit_to_vl(b) > info->max_cpu_vl)
> + info->max_cpu_vl = __bit_to_vl(b);
> }
>
> /*
> @@ -1069,10 +1080,23 @@ int vec_verify_vq_map(enum vec_type type)
> {
> struct vl_info *info = &vl_info[type];
> DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> - unsigned long b;
> + unsigned long b, max_vl;
>
> vec_probe_vqs(info, tmp_map);
>
> + /*
> + * Currently the maximum VL is only used for pKVM which
> + * doesn't allow late CPUs but we don't expect asymmetry and
> + * if we encounter any then future users will need handling so
> + * warn if we see anything.
> + */
> + max_vl = __bit_to_vl(find_first_bit(tmp_map, SVE_VQ_MAX));
> + if (max_vl > info->max_cpu_vl) {
> + pr_warn("%s: cpu%d: increases maximum VL to %u\n",
This should be %lu since it's an unsigned long. Otherwise it doesn't
build (clang).
Cheers,
/fuad
> + info->name, smp_processor_id(), max_vl);
> + info->max_cpu_vl = max_vl;
> + }
> +
> bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
> pr_warn("%s: cpu%d: Required vector length(s) missing\n",
>
> --
> 2.39.2
>