[tip: x86/fpu] x86/fpu/xstate: Introduce xfeature order table and accessor macro

From: tip-bot2 for Chang S. Bae
Date: Tue Mar 25 2025 - 06:18:30 EST


The following commit has been merged into the x86/fpu branch of tip:

Commit-ID: b03c328e9711b4af8e4a433a1c4dad38f2e160a2
Gitweb: https://git.kernel.org/tip/b03c328e9711b4af8e4a433a1c4dad38f2e160a2
Author: Chang S. Bae <chang.seok.bae@xxxxxxxxx>
AuthorDate: Thu, 20 Mar 2025 16:42:53 -07:00
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Tue, 25 Mar 2025 11:02:20 +01:00

x86/fpu/xstate: Introduce xfeature order table and accessor macro

The kernel has largely assumed that higher xstate component numbers
correspond to later offsets in the buffer. However, this assumption no
longer holds for the non-compacted format, where a newer state component
may have a lower offset.

When iterating over xstate components in offset order, using the feature
number as an index may be misleading. At the same time, the CPU exposes
each component’s size and offset based on its feature number, making it a
key for state information.

To provide flexibility in handling xstate ordering, introduce a mapping
table: feature order -> feature number. The table is dynamically
populated based on the CPU-exposed features and is sorted in offset order
at boot time.

Additionally, add an accessor macro to facilitate sequential traversal of
xstate components based on their actual buffer positions, given a feature
bitmask. This accessor macro will be particularly useful for computing
custom non-compacted format sizes and iterating over xstate offsets in
non-compacted buffers.

Suggested-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Signed-off-by: Chang S. Bae <chang.seok.bae@xxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Link: https://lore.kernel.org/r/20250320234301.8342-3-chang.seok.bae@xxxxxxxxx
---
arch/x86/kernel/fpu/xstate.c | 58 ++++++++++++++++++++++++++++++-----
1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 542c698..1e22103 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -14,6 +14,7 @@
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/coredump.h>
+#include <linux/sort.h>

#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
@@ -88,6 +89,31 @@ static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;

+/*
+ * Ordering of xstate components in uncompacted format: The xfeature
+ * number does not necessarily indicate its position in the XSAVE buffer.
+ * This array defines the traversal order of xstate features.
+ */
+static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
+
+static inline unsigned int next_xfeature_order(unsigned int i, u64 mask)
+{
+ for (; xfeature_uncompact_order[i] != -1; i++) {
+ if (mask & BIT_ULL(xfeature_uncompact_order[i]))
+ break;
+ }
+
+ return i;
+}
+
+/* Iterate xstate features in uncompacted order: */
+#define for_each_extended_xfeature_in_order(i, mask) \
+ for (i = 0; \
+ i = next_xfeature_order(i, mask), \
+ xfeature_uncompact_order[i] != -1; \
+ i++)
+
#define XSTATE_FLAG_SUPERVISOR BIT(0)
#define XSTATE_FLAG_ALIGNED64 BIT(1)

@@ -209,13 +235,20 @@ static bool xfeature_enabled(enum xfeature xfeature)
return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
}

+static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2)
+{
+ return xstate_offsets[*(unsigned int *)xfeature1] -
+ xstate_offsets[*(unsigned int *)xfeature2];
+}
+
/*
* Record the offsets and sizes of various xstates contained
- * in the XSAVE state memory layout.
+ * in the XSAVE state memory layout. Also, create an ordered
+ * list of xfeatures for handling out-of-order offsets.
*/
static void __init setup_xstate_cache(void)
{
- u32 eax, ebx, ecx, edx, i;
+ u32 eax, ebx, ecx, edx, xfeature, i = 0;
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
@@ -229,21 +262,30 @@ static void __init setup_xstate_cache(void)
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);

- for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
- cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
+ for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) {
+ cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx);

- xstate_sizes[i] = eax;
- xstate_flags[i] = ecx;
+ xstate_sizes[xfeature] = eax;
+ xstate_flags[xfeature] = ecx;

/*
* If an xfeature is supervisor state, the offset in EBX is
* invalid, leave it to -1.
*/
- if (xfeature_is_supervisor(i))
+ if (xfeature_is_supervisor(xfeature))
continue;

- xstate_offsets[i] = ebx;
+ xstate_offsets[xfeature] = ebx;
+
+ /* Populate the list of xfeatures before sorting */
+ xfeature_uncompact_order[i++] = xfeature;
}
+
+ /*
+ * Sort xfeatures by their offsets to support out-of-order
+ * offsets in the uncompacted format.
+ */
+ sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL);
}

/*