Re: [PATCH v3 2/2] vfs: avoid duplicating creds in faccessat if possible

From: Linus Torvalds
Date: Sat Mar 04 2023 - 15:35:09 EST


On Sat, Mar 4, 2023 at 11:19 AM Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> Rather than commit aa47a7c215e7, we should just have fixed 'setall()'
> and 'for_each_cpu()' to use nr_cpu_ids, and then the rest would
> continue to use nr_cpumask_bits.

Looking around, there's a few more "might set high bits in the
bitmask" cases - notably cpumask_complement(). It uses
bitmap_complement(), which in turn can set bits past the end of the
bit range.

Of course, that function is then used in exactly one place (ia64 ACPI):

cpumask_complement(&tmp_map, cpu_present_mask);
cpu = cpumask_first(&tmp_map);

it's basically a nasty way or writing

cpu = cpumask_first_zero(cpu_present_mask);

so the "fix" is to just do that cleanup, and get rid of
"cpumask_complement()" entirely.

So I would suggest we simply do something like the attached patch.

NOTE! This is *entirely* untested. See the _intent_ behind the patch
in the big comment above the 'nr_cpumask_bits' #define.

So this patch is more of a "maybe something like this?"

And no, nothing here helps the MAXSMP case. I don't think it's
entirely unfixable, but it's close. Some very involved static jump
infrastructure *might* make the MAXSMP case be something we could
generate good code for too, but the whole "we potentially have
thousands of CPUs" case really shouldn't have ever been used on normal
machines.

It is what it is. I think the best we can do is to try to generate
good code for a distribution that cares about good code. Once the
distro maintainers go "let's enable MAXSMP even though the kernel
Kconfig help file tells us not to", there's very little we as kernel
maintainers can do.

Linus
arch/ia64/kernel/acpi.c | 4 +---
include/linux/cpumask.h | 63 +++++++++++++++++++++++++++++++------------------
2 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 96d13cb7c19f..15f6cfddcc08 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -783,11 +783,9 @@ __init void prefill_possible_map(void)

static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
{
- cpumask_t tmp_map;
int cpu;

- cpumask_complement(&tmp_map, cpu_present_mask);
- cpu = cpumask_first(&tmp_map);
+ cpu = cpumask_first_zero(cpu_present_mask);
if (cpu >= nr_cpu_ids)
return -EINVAL;

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 10c92bd9b807..9b75ba191f39 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -50,8 +50,30 @@ static inline void set_nr_cpu_ids(unsigned int nr)
#endif
}

-/* Deprecated. Always use nr_cpu_ids. */
-#define nr_cpumask_bits nr_cpu_ids
+/*
+ * The difference between nr_cpumask_bits and nr_cpu_ids is that
+ * 'nr_cpu_ids' is the actual number of CPU ids in the system, while
+ * nr_cpumask_bits is a "reasonable upper value" that is often more
+ * efficient because it can be a fixed constant.
+ *
+ * So when clearing or traversing a cpumask, use 'nr_cpumask_bits',
+ * but when checking exact limits (and when _setting_ bits), use the
+ * tighter exact limit of 'nr_cpu_ids'.
+ *
+ * NOTE! The code depends on any exyta bits in nr_cpumask_bits a always
+ * being (a) allocated and (b) zero, so that the only effect of using
+ * 'nr_cpumask_bits' is that we might return a higher maximum CPU value
+ * (which is why we have that pattern of
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ *
+ * for many of the functions - they can return that higher value).
+ */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ #define nr_cpumask_bits ((unsigned int)NR_CPUS)
+#else
+ #define nr_cpumask_bits nr_cpu_ids
+#endif

/*
* The following particular system cpumasks and operations manage
@@ -114,7 +136,7 @@ static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bit
/* verify cpu argument to cpumask_* operators */
static __always_inline unsigned int cpumask_check(unsigned int cpu)
{
- cpu_max_bits_warn(cpu, nr_cpumask_bits);
+ cpu_max_bits_warn(cpu, nr_cpu_ids);
return cpu;
}

@@ -254,9 +276,12 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
* @mask: the cpumask pointer
*
* After the loop, cpu is >= nr_cpu_ids.
+ *
+ * Note that since we iterate over zeroes, we have to use the tighter
+ * 'nr_cpu_ids' limit to not iterate past the end.
*/
#define for_each_cpu_not(cpu, mask) \
- for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
+ for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpu_ids)

#if NR_CPUS == 1
static inline
@@ -495,10 +520,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
/**
* cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
* @dstp: the cpumask pointer
+ *
+ * Note: since we set bits, we should use the tighter 'bitmap_set()' with
+ * the eact number of bits, not 'bitmap_fill()' that will fill past the
+ * end.
*/
static inline void cpumask_setall(struct cpumask *dstp)
{
- bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
+ bitmap_set(cpumask_bits(dstp), 0, nr_cpu_ids);
}

/**
@@ -569,18 +598,6 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
cpumask_bits(src2p), nr_cpumask_bits);
}

-/**
- * cpumask_complement - *dstp = ~*srcp
- * @dstp: the cpumask result
- * @srcp: the input to invert
- */
-static inline void cpumask_complement(struct cpumask *dstp,
- const struct cpumask *srcp)
-{
- bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp),
- nr_cpumask_bits);
-}
-
/**
* cpumask_equal - *src1p == *src2p
* @src1p: the first input
@@ -648,7 +665,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp)
*/
static inline bool cpumask_full(const struct cpumask *srcp)
{
- return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits);
+ return bitmap_full(cpumask_bits(srcp), nr_cpu_ids);
}

/**
@@ -694,7 +711,7 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
const struct cpumask *srcp, int n)
{
bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n,
- nr_cpumask_bits);
+ nr_cpu_ids);
}

/**
@@ -742,7 +759,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
static inline int cpumask_parse_user(const char __user *buf, int len,
struct cpumask *dstp)
{
- return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
+ return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids);
}

/**
@@ -757,7 +774,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
struct cpumask *dstp)
{
return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
- nr_cpumask_bits);
+ nr_cpu_ids);
}

/**
@@ -769,7 +786,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
*/
static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
{
- return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits);
+ return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpu_ids);
}

/**
@@ -781,7 +798,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
*/
static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
{
- return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
+ return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids);
}

/**