Re: [PATCH v1 1/2] cpumask: Fix invalid uniprocessor mask assumption

From: Yury Norov
Date: Thu Jun 02 2022 - 18:50:36 EST


On Thu, Jun 02, 2022 at 11:04:19PM +0200, Sander Vanheule wrote:
> On uniprocessor builds, any CPU mask is assumed to contain exactly one
> CPU (cpu0). This ignores the existence of empty masks, resulting in
> incorrect behaviour for most of the implemented optimisations.
>
> Replace the uniprocessor optimisations with implementations that also
> take into account empty masks. Also drop the incorrectly optimised
> for_each_cpu implementations and use the generic implementations in all
> cases.
>
> Signed-off-by: Sander Vanheule <sander@xxxxxxxxxxxxx>
> ---
> include/linux/cpumask.h | 35 +++++++++++++++--------------------
> 1 file changed, 15 insertions(+), 20 deletions(-)
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index fe29ac7cc469..ce8c7b27f6c9 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -117,51 +117,54 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
> }
>
> #if NR_CPUS == 1
> -/* Uniprocessor. Assume all masks are "1". */
> +/* Uniprocessor. Assume all valid masks are "1" or empty. */
> static inline unsigned int cpumask_first(const struct cpumask *srcp)
> {
> - return 0;
> + return !(*cpumask_bits(srcp) & 1);
> }

I think, you can just drop this '#if NR_CPUS == 1' part and rely on SMP
versions. find_first_bit() is optimized for short bitmaps, so I expect
no overhead comparing to this.

> static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
> {
> - return 0;
> + return *cpumask_bits(srcp) & 1;
> }
>
> static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
> const struct cpumask *srcp2)
> {
> - return 0;
> + return !(*cpumask_bits(srcp1) & *cpumask_bits(srcp2) & 1);
> }
>
> static inline unsigned int cpumask_last(const struct cpumask *srcp)
> {
> - return 0;
> + return cpumask_first(srcp);
> }
>
> /* Valid inputs for n are -1 and 0. */
> static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
> {
> - return n+1;
> + return !!(n + 1 + cpumask_first(srcp));
> }
>
> static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
> {
> - return n+1;
> + return !!(n + 1 + cpumask_first_zero(srcp));
> }
>
> static inline unsigned int cpumask_next_and(int n,
> const struct cpumask *srcp,
> const struct cpumask *andp)
> {
> - return n+1;
> + return !!(n + 1 + cpumask_first_and(srcp, andp));
> }
>
> static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask,
> int start, bool wrap)
> {
> - /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */
> - return (wrap && n == 0);
> + /*
> + * nr_cpumask_bits at stop condition, wrap and at cpu0, or empty mask
> + * otherwise cpu0
> + */
> + return (wrap && n == 0) || cpumask_first(mask);
> }
>
> /* cpu must be a valid cpu, ie 0, so there's no other choice. */
> @@ -186,14 +189,6 @@ static inline int cpumask_any_distribute(const struct cpumask *srcp)
> return cpumask_first(srcp);
> }
>
> -#define for_each_cpu(cpu, mask) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> -#define for_each_cpu_not(cpu, mask) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> -#define for_each_cpu_wrap(cpu, mask, start) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
> -#define for_each_cpu_and(cpu, mask1, mask2) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
> #else
> /**
> * cpumask_first - get the first cpu in a cpumask
> @@ -259,11 +254,13 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
> }
>
> int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
> +extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);

is this extern needed?

Thanks,
Yury

> int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
> unsigned int cpumask_local_spread(unsigned int i, int node);
> int cpumask_any_and_distribute(const struct cpumask *src1p,
> const struct cpumask *src2p);
> int cpumask_any_distribute(const struct cpumask *srcp);
> +#endif /* SMP */
>
> /**
> * for_each_cpu - iterate over every cpu in a mask
> @@ -289,7 +286,6 @@ int cpumask_any_distribute(const struct cpumask *srcp);
> (cpu) = cpumask_next_zero((cpu), (mask)), \
> (cpu) < nr_cpu_ids;)
>
> -extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
>
> /**
> * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
> @@ -324,7 +320,6 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
> for ((cpu) = -1; \
> (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \
> (cpu) < nr_cpu_ids;)
> -#endif /* SMP */
>
> #define CPU_BITS_NONE \
> { \
> --
> 2.36.1