[tip: x86/cpu] x86/cacheinfo: Remove CPUID leaf 0x2 parsing loop

From: tip-bot2 for Ahmed S. Darwish
Date: Tue Mar 25 2025 - 05:43:22 EST


The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: 09a1da4beb310420b720994f7b6599e8d0bce3e1
Gitweb: https://git.kernel.org/tip/09a1da4beb310420b720994f7b6599e8d0bce3e1
Author: Ahmed S. Darwish <darwi@xxxxxxxxxxxxx>
AuthorDate: Mon, 24 Mar 2025 14:32:57 +01:00
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Tue, 25 Mar 2025 10:22:02 +01:00

x86/cacheinfo: Remove CPUID leaf 0x2 parsing loop

Leaf 0x2 output includes a "query count" byte where it was supposed to
specify the number of repeated CPUID leaf 0x2 subleaf 0 queries needed to
extract all of the CPU's cache and TLB descriptors.

Per current Intel manuals, all CPUs supporting this leaf "will always"
return an iteration count of 1.

Remove the leaf 0x2 query loop and just query the hardware once.

Note, as previously done at commit aec28d852ed2 ("x86/cpuid: Standardize
on u32 in <asm/cpuid/api.h>"), standardize on using 'u32' and 'u8' types.

Suggested-by: Ingo Molnar <mingo@xxxxxxxxxx>
Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Ahmed S. Darwish <darwi@xxxxxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/r/20250324133324.23458-3-darwi@xxxxxxxxxxxxx
---
arch/x86/kernel/cpu/cacheinfo.c | 77 +++++++++++++++-----------------
1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index b3a5209..36782fd 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -42,7 +42,7 @@ static cpumask_var_t cpu_cacheinfo_mask;
unsigned int memory_caching_control __ro_after_init;

struct _cache_table {
- unsigned char descriptor;
+ u8 descriptor;
char cache_type;
short size;
};
@@ -783,50 +783,47 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)

/* Don't use CPUID(2) if CPUID(4) is supported. */
if (!ci->num_leaves && c->cpuid_level > 1) {
- /* supports eax=2 call */
- int j, n;
- unsigned int regs[4];
- unsigned char *dp = (unsigned char *)regs;
-
- /* Number of times to iterate */
- n = cpuid_eax(2) & 0xFF;
-
- for (i = 0 ; i < n ; i++) {
- cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
-
- /* If bit 31 is set, this is an unknown format */
- for (j = 0 ; j < 4 ; j++)
- if (regs[j] & (1 << 31))
- regs[j] = 0;
-
- /* Byte 0 is level count, not a descriptor */
- for (j = 1 ; j < 16 ; j++) {
- unsigned char des = dp[j];
- unsigned char k = 0;
-
- /* look up this descriptor in the table */
- while (cache_table[k].descriptor != 0) {
- if (cache_table[k].descriptor == des) {
- switch (cache_table[k].cache_type) {
- case LVL_1_INST:
- l1i += cache_table[k].size;
- break;
- case LVL_1_DATA:
- l1d += cache_table[k].size;
- break;
- case LVL_2:
- l2 += cache_table[k].size;
- break;
- case LVL_3:
- l3 += cache_table[k].size;
- break;
- }
+ u32 regs[4];
+ u8 *desc = (u8 *)regs;

+ cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+
+ /* Intel CPUs must report an iteration count of 1 */
+ if (desc[0] != 0x01)
+ return;
+
+ /* If a register's bit 31 is set, it is an unknown format */
+ for (int i = 0; i < 4; i++) {
+ if (regs[i] & (1 << 31))
+ regs[i] = 0;
+ }
+
+ /* Skip the first byte as it is not a descriptor */
+ for (int i = 1; i < 16; i++) {
+ u8 des = desc[i];
+ u8 k = 0;
+
+ /* look up this descriptor in the table */
+ while (cache_table[k].descriptor != 0) {
+ if (cache_table[k].descriptor == des) {
+ switch (cache_table[k].cache_type) {
+ case LVL_1_INST:
+ l1i += cache_table[k].size;
+ break;
+ case LVL_1_DATA:
+ l1d += cache_table[k].size;
+ break;
+ case LVL_2:
+ l2 += cache_table[k].size;
+ break;
+ case LVL_3:
+ l3 += cache_table[k].size;
break;
}

- k++;
+ break;
}
+ k++;
}
}
}