[PATCH 07/15] perf test: Refactor parallel poll loop to drain all pipes simultaneously

From: Ian Rogers

Date: Mon Jun 01 2026 - 02:17:05 EST


When running tests in parallel with verbose output (-v), child processes
write to pipes. If a test produces significant output (e.G. Granite Rapids
metric parsing printing hundreds of lines), it fills the 64KB pipe buffer
and blocks.

Previously, the parent harness (finish_test) only polled the pipe of the
current test waiting to be printed. Other children blocked indefinitely
until the parent reached them, severely sequentializing execution.

Address this by implementing finish_tests_parallel() to poll and drain
output pipes from all running children simultaneously into per-child buffers,
employing safe strbuf_addstr string operations alongside thorough variable
orderings for strict ISO C90 compliance. Reaping occurs out of order as
children finish, while final result printing remains strictly in order.

This drops parallel verbose execution time for the PMU events suite from
~35 seconds down to ~5.9 seconds.

Assisted-by: Gemini-CLI:Google Gemini 3
Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/tests/builtin-test.c | 234 +++++++++++++++++++++++++++++++-
1 file changed, 232 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 2ccb52a776cc..1ce7f596ad70 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -302,6 +302,9 @@ struct child_test {
struct test_suite *test;
int suite_num;
int test_case_num;
+ struct strbuf err_output;
+ int result;
+ bool done;
};

static jmp_buf run_test_jmp_buf;
@@ -356,6 +359,11 @@ static int run_test_child(struct child_process *process)

#define TEST_RUNNING -3

+static struct pollfd *global_pfds;
+static size_t *global_pfd_indices;
+
+static int strbuf_addstr_safe(struct strbuf *sb, const char *s);
+
static int print_test_result(struct test_suite *t, int curr_suite, int curr_test_case,
int result, int width, int running)
{
@@ -499,16 +507,233 @@ static void finish_test(struct child_test **child_tests, int running_test, int c
}
/* Clean up child process. */
ret = finish_command(&child_test->process);
+ if (child_test->err_output.len > 0) {
+ struct strbuf merged = STRBUF_INIT;
+
+ if (child_test->err_output.buf)
+ strbuf_addstr_safe(&merged, child_test->err_output.buf);
+ if (err_output.buf)
+ strbuf_addstr_safe(&merged, err_output.buf);
+ strbuf_release(&err_output);
+ err_output = merged;
+ }
if (verbose > 1 || (verbose == 1 && ret == TEST_FAIL))
fprintf(stderr, "%s", err_output.buf);

strbuf_release(&err_output);
+ strbuf_release(&child_test->err_output);
print_test_result(t, curr_suite, curr_test_case, ret, width, /*running=*/0);
if (err > 0)
close(err);
zfree(&child_tests[running_test]);
}

+static int strbuf_addstr_safe(struct strbuf *sb, const char *s)
+{
+ sigset_t set, oldset;
+ int ret;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGINT);
+ sigaddset(&set, SIGTERM);
+ sigprocmask(SIG_BLOCK, &set, &oldset);
+ ret = strbuf_addstr(sb, s);
+ sigprocmask(SIG_SETMASK, &oldset, NULL);
+ return ret;
+}
+
+static void drain_child_process_err(struct child_test *child)
+{
+ char buf[512];
+ ssize_t len;
+
+ while ((len = read(child->process.err, buf, sizeof(buf) - 1)) > 0) {
+ buf[len] = '\0';
+ strbuf_addstr_safe(&child->err_output, buf);
+ }
+}
+
+static int finish_tests_parallel(struct child_test **child_tests, size_t num_tests, int width)
+{
+ size_t next_to_print = 0;
+ struct pollfd *pfds;
+ size_t *pfd_indices;
+ size_t num_pfds = 0;
+ int last_running = -1;
+ size_t i;
+ int last_suite_printed = -1;
+
+ global_pfds = calloc(num_tests, sizeof(*pfds));
+ global_pfd_indices = calloc(num_tests, sizeof(*pfd_indices));
+ pfds = global_pfds;
+ pfd_indices = global_pfd_indices;
+ if (!pfds || !pfd_indices) {
+ free(pfds);
+ free(pfd_indices);
+ global_pfds = NULL;
+ global_pfd_indices = NULL;
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < num_tests; i++) {
+ struct child_test *child = child_tests[i];
+
+ if (!child)
+ continue;
+ strbuf_init(&child->err_output, 0);
+ if (child->process.err > 0)
+ fcntl(child->process.err, F_SETFL, O_NONBLOCK);
+ }
+
+ while (next_to_print < num_tests) {
+ size_t running_count = 0;
+ size_t p;
+
+ while (next_to_print < num_tests &&
+ (!child_tests[next_to_print] || child_tests[next_to_print]->done))
+ next_to_print++;
+
+ if (next_to_print >= num_tests)
+ break;
+
+ num_pfds = 0;
+
+ for (i = next_to_print; i < num_tests; i++) {
+ struct child_test *child = child_tests[i];
+
+ if (!child || child->done)
+ continue;
+
+ if (!check_if_command_finished(&child->process))
+ running_count++;
+
+ if (child->process.err > 0) {
+ pfds[num_pfds].fd = child->process.err;
+ pfds[num_pfds].events = POLLIN | POLLERR | POLLHUP | POLLNVAL;
+ pfd_indices[num_pfds] = i;
+ num_pfds++;
+ }
+ }
+
+ if (perf_use_color_default && running_count != (size_t)last_running) {
+ struct child_test *next_child = child_tests[next_to_print];
+
+ if (last_running != -1)
+ fprintf(debug_file(), PERF_COLOR_DELETE_LINE);
+
+ if (next_child) {
+ if (test_suite__num_test_cases(next_child->test) > 1 &&
+ last_suite_printed != next_child->suite_num) {
+ pr_info("%3d: %-*s:\n", next_child->suite_num + 1, width,
+ test_description(next_child->test, -1));
+ last_suite_printed = next_child->suite_num;
+ }
+ print_test_result(next_child->test, next_child->suite_num,
+ next_child->test_case_num, TEST_RUNNING, width,
+ running_count);
+ }
+ last_running = running_count;
+ }
+
+ if (num_pfds == 0) {
+ if (running_count > 0)
+ usleep(10 * 1000);
+ } else {
+ int pret = poll(pfds, num_pfds, 100);
+
+ if (pret > 0) {
+ for (p = 0; p < num_pfds; p++) {
+ if (pfds[p].revents) {
+ size_t idx = pfd_indices[p];
+ struct child_test *child = child_tests[idx];
+
+ drain_child_process_err(child);
+ /*
+ * If the child closed its end of the pipe (EOF) or encountered
+ * an error, close the file descriptor immediately and set it
+ * to -1. This removes it from the pfds array for subsequent
+ * iterations, preventing a tight CPU busy-loop while waiting
+ * for the process itself to exit.
+ */
+ if (pfds[p].revents & (POLLHUP | POLLERR | POLLNVAL)) {
+ close(child->process.err);
+ child->process.err = -1;
+ }
+ }
+ }
+ }
+ }
+
+ for (i = next_to_print; i < num_tests; i++) {
+ struct child_test *child = child_tests[i];
+
+ if (!child || child->done)
+ continue;
+
+ if (check_if_command_finished(&child->process)) {
+ if (child->process.err > 0) {
+ drain_child_process_err(child);
+ close(child->process.err);
+ child->process.err = -1;
+ }
+ child->result = finish_command(&child->process);
+ child->done = true;
+ }
+ }
+
+ while (next_to_print < num_tests) {
+ struct child_test *child = child_tests[next_to_print];
+
+ if (!child) {
+ next_to_print++;
+ continue;
+ }
+ if (!child->done)
+ break;
+
+ if (perf_use_color_default && last_running != -1) {
+ fprintf(debug_file(), PERF_COLOR_DELETE_LINE);
+ last_running = -1;
+ }
+
+ if (test_suite__num_test_cases(child->test) > 1 &&
+ last_suite_printed != child->suite_num) {
+ pr_info("%3d: %-*s:\n", child->suite_num + 1, width,
+ test_description(child->test, -1));
+ last_suite_printed = child->suite_num;
+ }
+
+ if (verbose > 1) {
+ if (test_suite__num_test_cases(child->test) > 1) {
+ pr_info("%3d.%1d: %s:\n", child->suite_num + 1,
+ child->test_case_num + 1,
+ test_description(child->test,
+ child->test_case_num));
+ } else {
+ pr_info("%3d: %s:\n", child->suite_num + 1,
+ test_description(child->test, -1));
+ }
+ }
+
+ if (verbose > 1 || (verbose == 1 && child->result == TEST_FAIL))
+ fprintf(stderr, "%s", child->err_output.buf);
+
+ print_test_result(child->test, child->suite_num, child->test_case_num,
+ child->result, width, 0);
+ strbuf_release(&child->err_output);
+ child_tests[next_to_print] = NULL;
+ zfree(&child);
+ next_to_print++;
+ }
+ }
+
+ free(global_pfds);
+ free(global_pfd_indices);
+ global_pfds = NULL;
+ global_pfd_indices = NULL;
+ return 0;
+}
+
static int start_test(struct test_suite *test, int curr_suite, int curr_test_case,
struct child_test **child, int width, int pass)
{
@@ -671,8 +896,9 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
}
if (!sequential) {
/* Parallel mode starts tests but doesn't finish them. Do that now. */
- for (size_t x = 0; x < num_tests; x++)
- finish_test(child_tests, x, num_tests, width);
+ err = finish_tests_parallel(child_tests, num_tests, width);
+ if (err)
+ goto err_out;
}
}
err_out:
@@ -683,6 +909,10 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
for (size_t x = 0; x < num_tests; x++)
finish_test(child_tests, x, num_tests, width);
}
+ free(global_pfds);
+ free(global_pfd_indices);
+ global_pfds = NULL;
+ global_pfd_indices = NULL;
free(child_tests);
return err;
}
--
2.54.0.823.g6e5bcc1fc9-goog