Re: [PATCH v9 08/26] x86/fpu/xstate: Introduce helpers to manage the XSTATE buffer dynamically

From: Bae, Chang Seok
Date: Wed Aug 25 2021 - 12:02:14 EST


On Aug 18, 2021, at 12:46, Bae, Chang Seok <chang.seok.bae@xxxxxxxxx> wrote:
> Let me consider a case to mimic the situation somehow.

Once the below changes were applied on top of this series v9, the self-test
ran:

$ ./tools/testing/selftests/x86/amx_64
Inject tile data
Tile data was not written on ptracee.

Check the kernel messages:

$ sudo dmesg | tail -n 2
[ 82.780882] x86/fpu: Assume new re-allocation fails here and fpu->state
retains the old re-allocation (0x000000009f3a83cc)

The ptracee loaded tile data, so it’s XSTATE per-task buffer had been
re-allocated. Then, the ptracer attempted to inject new tile data but the
kernel returned ENOMEM along with the message. This emulates the behavior with
reallocation failure on the ptrace path.

[ 82.793127] process: x86/fpu: Free the re-allocated buffer at
0x000000009f3a83cc

The program exited. This message indicates the old buffer was freed at that
moment.

Thanks,
Chang


diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index ee71ffd7c221..3153dc91c715 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -170,8 +170,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
*
* Check if the expansion is possibly needed.
*/
- if (xfeatures_mask_user_dynamic &&
- ((fpu->state_mask & xfeatures_mask_user_dynamic) != xfeatures_mask_user_dynamic)) {
+ if (xfeatures_mask_user_dynamic) {
u64 state_mask, dynstate_mask;

/* Retrieve XSTATE_BV. */
@@ -186,9 +185,13 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
goto out;
}

- ret = alloc_xstate_buffer(fpu, dynstate_mask);
- if (ret)
+ if (fpu->state != &fpu->__default_state) {
+ pr_info("x86/fpu: Assume new re-allocation fails here and "
+ "fpu->state retains the old re-allocation (0x%p)\n",
+ fpu->state);
+ ret = -ENOMEM;
goto out;
+ }
}
}

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5b4f9b82aea1..c04098db58b6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -99,8 +99,15 @@ void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size)

void arch_release_task_struct(struct task_struct *task)
{
- if (cpu_feature_enabled(X86_FEATURE_FPU))
- free_xstate_buffer(&task->thread.fpu);
+ if (cpu_feature_enabled(X86_FEATURE_FPU)) {
+ struct fpu *fpu = &task->thread.fpu;
+
+ if (fpu->state != &fpu->__default_state)
+ pr_info("x86/fpu: Free the re-allocated buffer at 0x%p\n",
+ fpu->state);
+
+ free_xstate_buffer(fpu);
+ }
}

/*
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index afd8c66ca206..6393ec01a9a1 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -610,8 +610,6 @@ static void test_context_switch(void)

/* Ptrace test */

-static bool ptracee_state_perm;
-
static int inject_tiledata(pid_t target)
{
struct iovec iov;
@@ -624,12 +622,8 @@ static int inject_tiledata(pid_t target)
set_rand_tiledata(xsave_buffer + xsave_xtiledata_offset);
memcpy(tiledata, xsave_buffer + xsave_xtiledata_offset, xtiledata_size);

- if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) {
- if (errno != EFAULT)
- err(1, "PTRACE_SETREGSET");
- else
- return errno;
- }
+ if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ return errno;

if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
err(1, "PTRACE_GETREGSET");
@@ -640,18 +634,19 @@ static int inject_tiledata(pid_t target)
return -1;
}

-static void test_tile_write(void)
+static void test_kernel_xbuffer_free_with_ptrace_failure(void)
{
int status, rc;
pid_t child;
- bool pass;

child = fork();
if (child < 0) {
err(1, "fork");
} else if (!child) {
- if (ptracee_state_perm)
- enable_tiledata();
+ clear_xstate_header(xsave_buffer);
+ set_xstatebv(xsave_buffer, XFEATURE_MASK_XTILEDATA);
+ set_rand_tiledata(xsave_buffer + xsave_xtiledata_offset);
+ xrstor_safe(xsave_buffer, -1, -1);

if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
err(1, "PTRACE_TRACEME");
@@ -664,16 +659,11 @@ static void test_tile_write(void)
wait(&status);
} while (WSTOPSIG(status) != SIGTRAP);

- printf("\tInject tile data %s ARCH_SET_STATE_ENABLE\n",
- ptracee_state_perm ? "with" : "without");
+ printf("\tInject tile data\n");

rc = inject_tiledata(child);
- pass = (rc == EFAULT && !ptracee_state_perm) ||
- (!rc && ptracee_state_perm);
- if (!pass)
- nerrs++;
- printf("[%s]\tTile data was %swritten on ptracee.\n",
- pass ? "OK" : "FAIL", errs ? "not " : "");
+ if (rc)
+ printf("Tile data was not written on ptracee.\n");

ptrace(PTRACE_DETACH, child, NULL, NULL);
wait(&status);
@@ -681,17 +671,6 @@ static void test_tile_write(void)
err(1, "ptrace test");
}

-static void test_ptrace(void)
-{
- printf("[RUN]\tCheck ptrace() to inject tile data.\n");
-
- ptracee_state_perm = false;
- test_tile_write();
-
- ptracee_state_perm = true;
- test_tile_write();
-}
-
/* Signal handling test */

static bool init_tiledata, load_tiledata;
@@ -951,13 +930,8 @@ int main(int argc, char **argv)
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
err(1, "sched_setaffinity to CPU 0");

- test_arch_prctl(argc, argv);
- test_ptrace();
-
enable_tiledata();
- test_context_switch();
- test_fork();
- test_signal();
+ test_kernel_xbuffer_free_with_ptrace_failure();

clearhandler(SIGILL);