Re: [Patch v8 5/5] perf dwarf-regs: Add SIMD/eGPRs support for x86 DWARF registers
From: Ian Rogers
Date: Fri May 29 2026 - 13:56:29 EST
On Fri, May 29, 2026 at 1:30 AM Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> wrote:
>
> Enhance the x86-specific DWARF register handling by adding support for
> SIMD and eGPRs registers.
>
> This update is based on the "DWARF Register Number Mapping" table from
> the "System V Application Binary Interface AMD64 Architecture Processor
> Supplement" (version 1.0).
>
> Modifications include:
> - Updating the x86_64_regidx_table[] array to incorporate SIMD and eGPRs
> registers.
> - Enhancing the __get_dwarf_regnum_for_perf_regnum_x86_64() function to
> retrieve the DWARF register index for eGPRs.
> - Enlarge the x86_64 supported register number to 146 to cover eGPRs and
> SIMD registers (get_libdw_frame_nregs()).
>
> Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
> ---
> .../util/dwarf-regs-arch/dwarf-regs-x86.c | 138 +++++++++++++++---
> tools/perf/util/dwarf-regs.c | 7 +-
> tools/perf/util/include/dwarf-regs.h | 7 +-
> tools/perf/util/unwind-libdw.c | 6 +-
> 4 files changed, 129 insertions(+), 29 deletions(-)
>
> diff --git a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
> index cadef120aeb4..b014a36d21b5 100644
> --- a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
> +++ b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
> @@ -90,22 +90,22 @@ static const struct dwarf_regs_idx x86_64_regidx_table[] = {
> { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 },
> { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 },
> // 16 - Return Address RA
> - { "xmm0", 17},
> - { "xmm1", 18},
> - { "xmm2", 19},
> - { "xmm3", 20},
> - { "xmm4", 21},
> - { "xmm5", 22},
> - { "xmm6", 23},
> - { "xmm7", 24},
> - { "xmm8", 25},
> - { "xmm9", 26},
> - { "xmm10", 27},
> - { "xmm11", 28},
> - { "xmm12", 29},
> - { "xmm13", 30},
> - { "xmm14", 31},
> - { "xmm15", 32},
> + { "zmm0", 17 }, { "ymm0", 17 }, { "xmm0", 17 },
> + { "zmm1", 18 }, { "ymm1", 18 }, { "xmm1", 18 },
> + { "zmm2", 19 }, { "ymm2", 19 }, { "xmm2", 19 },
> + { "zmm3", 20 }, { "ymm3", 20 }, { "xmm3", 20 },
> + { "zmm4", 21 }, { "ymm4", 21 }, { "xmm4", 21 },
> + { "zmm5", 22 }, { "ymm5", 22 }, { "xmm5", 22 },
> + { "zmm6", 23 }, { "ymm6", 23 }, { "xmm6", 23 },
> + { "zmm7", 24 }, { "ymm7", 24 }, { "xmm7", 24 },
> + { "zmm8", 25 }, { "ymm8", 25 }, { "xmm8", 25 },
> + { "zmm9", 26 }, { "ymm9", 26 }, { "xmm9", 26 },
> + { "zmm10", 27 }, { "ymm10", 27 }, { "xmm10", 27 },
> + { "zmm11", 28 }, { "ymm11", 28 }, { "xmm11", 28 },
> + { "zmm12", 29 }, { "ymm12", 29 }, { "xmm12", 29 },
> + { "zmm13", 30 }, { "ymm13", 30 }, { "xmm13", 30 },
> + { "zmm14", 31 }, { "ymm14", 31 }, { "xmm14", 31 },
> + { "zmm15", 32 }, { "ymm15", 32 }, { "xmm15", 32 },
> { "st0", 33},
> { "st1", 34},
> { "st2", 35},
> @@ -129,7 +129,7 @@ static const struct dwarf_regs_idx x86_64_regidx_table[] = {
> { "ds", 53},
> { "fs", 54},
> { "gs", 55},
> - // 56-47 - reserved
> + // 56-57 - reserved
> { "fs.base", 58},
> { "gs.base", 59},
> // 60-61 - reserved
> @@ -138,6 +138,49 @@ static const struct dwarf_regs_idx x86_64_regidx_table[] = {
> { "mxcsr", 64}, // 128-bit Media Control and Status
> { "fcw", 65}, // x87 Control Word
> { "fsw", 66}, // x87 Status Word
> + // 67-82 - Upper Vector Registers 16–31
> + { "zmm16", 67 }, { "ymm16", 67 }, { "xmm16", 67 },
> + { "zmm17", 68 }, { "ymm17", 68 }, { "xmm17", 68 },
> + { "zmm18", 69 }, { "ymm18", 69 }, { "xmm18", 69 },
> + { "zmm19", 70 }, { "ymm19", 70 }, { "xmm19", 70 },
> + { "zmm20", 71 }, { "ymm20", 71 }, { "xmm20", 71 },
> + { "zmm21", 72 }, { "ymm21", 72 }, { "xmm21", 72 },
> + { "zmm22", 73 }, { "ymm22", 73 }, { "xmm22", 73 },
> + { "zmm23", 74 }, { "ymm23", 74 }, { "xmm23", 74 },
> + { "zmm24", 75 }, { "ymm24", 75 }, { "xmm24", 75 },
> + { "zmm25", 76 }, { "ymm25", 76 }, { "xmm25", 76 },
> + { "zmm26", 77 }, { "ymm26", 77 }, { "xmm26", 77 },
> + { "zmm27", 78 }, { "ymm27", 78 }, { "xmm27", 78 },
> + { "zmm28", 79 }, { "ymm28", 79 }, { "xmm28", 79 },
> + { "zmm29", 80 }, { "ymm29", 80 }, { "xmm29", 80 },
> + { "zmm30", 81 }, { "ymm30", 81 }, { "xmm30", 81 },
> + { "zmm31", 82 }, { "ymm31", 82 }, { "xmm31", 82 },
> + // 118-125 - Vector Mask Registers 0–7
> + { "k0", 118 },
> + { "k1", 119 },
> + { "k2", 120 },
> + { "k3", 121 },
> + { "k4", 122 },
> + { "k5", 123 },
> + { "k6", 124 },
> + { "k7", 125 },
> + // 130-145 - APX Integer Registers 16-31
> + { "r16", 130 }, { "r16d", 130 }, { "r16w", 130 }, { "r16b", 130 },
> + { "r17", 131 }, { "r17d", 131 }, { "r17w", 131 }, { "r17b", 131 },
> + { "r18", 132 }, { "r18d", 132 }, { "r18w", 132 }, { "r18b", 132 },
> + { "r19", 133 }, { "r19d", 133 }, { "r19w", 133 }, { "r19b", 133 },
> + { "r20", 134 }, { "r20d", 134 }, { "r20w", 134 }, { "r20b", 134 },
> + { "r21", 135 }, { "r21d", 135 }, { "r21w", 135 }, { "r21b", 135 },
> + { "r22", 136 }, { "r22d", 136 }, { "r22w", 136 }, { "r22b", 136 },
> + { "r23", 137 }, { "r23d", 137 }, { "r23w", 137 }, { "r23b", 137 },
> + { "r24", 138 }, { "r24d", 138 }, { "r24w", 138 }, { "r24b", 138 },
> + { "r25", 139 }, { "r25d", 139 }, { "r25w", 139 }, { "r25b", 139 },
> + { "r26", 140 }, { "r26d", 140 }, { "r26w", 140 }, { "r26b", 140 },
> + { "r27", 141 }, { "r27d", 141 }, { "r27w", 141 }, { "r27b", 141 },
> + { "r28", 142 }, { "r28d", 142 }, { "r28w", 142 }, { "r28b", 142 },
> + { "r29", 143 }, { "r29d", 143 }, { "r29w", 143 }, { "r29b", 143 },
> + { "r30", 144 }, { "r30d", 144 }, { "r30w", 144 }, { "r30b", 144 },
> + { "r31", 145 }, { "r31d", 145 }, { "r31w", 145 }, { "r31b", 145 },
> // End of regular dwarf registers.
> { "rip", DWARF_REG_PC }, { "eip", DWARF_REG_PC }, { "ip", DWARF_REG_PC },
> };
> @@ -204,7 +247,7 @@ int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum)
> return dwarf_i386_regnums[perf_regnum];
> }
>
> -int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum)
> +int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum, int abi)
> {
> static const int dwarf_x86_64_regnums[] = {
> [PERF_REG_X86_AX] = 0,
> @@ -248,13 +291,66 @@ int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum)
> [PERF_REG_X86_XMM14] = 31,
> [PERF_REG_X86_XMM15] = 32,
> };
> + static const int dwarf_x86_64_regnums_apx[] = {
> + [PERF_REG_X86_AX] = 0,
> + [PERF_REG_X86_BX] = 3,
> + [PERF_REG_X86_CX] = 2,
> + [PERF_REG_X86_DX] = 1,
> + [PERF_REG_X86_SI] = 4,
> + [PERF_REG_X86_DI] = 5,
> + [PERF_REG_X86_BP] = 6,
> + [PERF_REG_X86_SP] = 7,
> + [PERF_REG_X86_IP] = 16,
> + [PERF_REG_X86_FLAGS] = 49,
> + [PERF_REG_X86_CS] = 51,
> + [PERF_REG_X86_SS] = 52,
> + [PERF_REG_X86_DS] = 53,
> + [PERF_REG_X86_ES] = 50,
> + [PERF_REG_X86_FS] = 54,
> + [PERF_REG_X86_GS] = 55,
> + [PERF_REG_X86_R8] = 8,
> + [PERF_REG_X86_R9] = 9,
> + [PERF_REG_X86_R10] = 10,
> + [PERF_REG_X86_R11] = 11,
> + [PERF_REG_X86_R12] = 12,
> + [PERF_REG_X86_R13] = 13,
> + [PERF_REG_X86_R14] = 14,
> + [PERF_REG_X86_R15] = 15,
> + [PERF_REG_X86_R16] = 130,
> + [PERF_REG_X86_R17] = 131,
> + [PERF_REG_X86_R18] = 132,
> + [PERF_REG_X86_R19] = 133,
> + [PERF_REG_X86_R20] = 134,
> + [PERF_REG_X86_R21] = 135,
> + [PERF_REG_X86_R22] = 136,
> + [PERF_REG_X86_R23] = 137,
> + [PERF_REG_X86_R24] = 138,
> + [PERF_REG_X86_R25] = 139,
> + [PERF_REG_X86_R26] = 140,
> + [PERF_REG_X86_R27] = 141,
> + [PERF_REG_X86_R28] = 142,
> + [PERF_REG_X86_R29] = 143,
> + [PERF_REG_X86_R30] = 144,
> + [PERF_REG_X86_R31] = 145,
> + };
>
> if (perf_regnum == 0)
> return 0;
>
> - if (perf_regnum < 0 || perf_regnum > (int)ARRAY_SIZE(dwarf_x86_64_regnums) ||
> - dwarf_x86_64_regnums[perf_regnum] == 0)
> + if (perf_regnum < 0)
> + return -ENOENT;
> +
> + if (!(abi & PERF_SAMPLE_REGS_ABI_SIMD) &&
> + (perf_regnum >= (int)ARRAY_SIZE(dwarf_x86_64_regnums) ||
> + dwarf_x86_64_regnums[perf_regnum] == 0))
> + return -ENOENT;
> +
> + if ((abi & PERF_SAMPLE_REGS_ABI_SIMD) &&
> + (perf_regnum >= (int)ARRAY_SIZE(dwarf_x86_64_regnums_apx) ||
> + dwarf_x86_64_regnums_apx[perf_regnum] == 0))
> return -ENOENT;
>
> - return dwarf_x86_64_regnums[perf_regnum];
> + return abi & PERF_SAMPLE_REGS_ABI_SIMD ?
> + dwarf_x86_64_regnums_apx[perf_regnum] :
> + dwarf_x86_64_regnums[perf_regnum];
> }
> diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
> index 797f455eba0d..9e2a0c93ecc9 100644
> --- a/tools/perf/util/dwarf-regs.c
> +++ b/tools/perf/util/dwarf-regs.c
> @@ -158,7 +158,7 @@ static int get_libdw_frame_nregs(unsigned int machine, unsigned int flags __mayb
> {
> switch (machine) {
> case EM_X86_64:
> - return 17;
> + return 146; /* Support APX eGPRs. */
So the 17 comes from libdw as I believe there are places in the libdw
stack walking code where additional registers cause libdw to fail. The
key is `libdw_set_initial_registers`, which passes the flag
`/*only_libdw_supported=*/true`. This is where capping the registers
at 17 will have an effect. Presumably increasing this value requires
some degree of libdw support? We could add a feature test for this,
but it may be safer to leave it as 17 as it is unlikely we're using
vector registers when walking the stack. A comment capturing this
would be good, as clearly the intent in the code hadn't been clear
enough.
Thanks,
Ian
> case EM_386:
> return 9;
> case EM_ARM:
> @@ -187,13 +187,14 @@ static int get_libdw_frame_nregs(unsigned int machine, unsigned int flags __mayb
> }
>
> int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine,
> - unsigned int flags, bool only_libdw_supported)
> + unsigned int flags,
> + bool only_libdw_supported, int abi)
> {
> int reg;
>
> switch (machine) {
> case EM_X86_64:
> - reg = __get_dwarf_regnum_for_perf_regnum_x86_64(perf_regnum);
> + reg = __get_dwarf_regnum_for_perf_regnum_x86_64(perf_regnum, abi);
> break;
> case EM_386:
> reg = __get_dwarf_regnum_for_perf_regnum_i386(perf_regnum);
> diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
> index 46a764cf322f..92cf0af93e9e 100644
> --- a/tools/perf/util/include/dwarf-regs.h
> +++ b/tools/perf/util/include/dwarf-regs.h
> @@ -103,7 +103,7 @@ int __get_csky_regnum(const char *name, unsigned int flags);
> int __get_dwarf_regnum_i386(const char *name);
> int __get_dwarf_regnum_x86_64(const char *name);
> int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum);
> -int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum);
> +int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum, int abi);
>
> int __get_dwarf_regnum_for_perf_regnum_arm(int perf_regnum);
> int __get_dwarf_regnum_for_perf_regnum_arm64(int perf_regnum);
> @@ -125,8 +125,9 @@ int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags)
> /*
> * get_dwarf_regnum - Returns DWARF regnum from perf register number.
> */
> -int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine, unsigned int flags,
> - bool only_libdw_supported);
> +int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine,
> + unsigned int flags,
> + bool only_libdw_supported, int abi);
>
> void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc);
>
> diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
> index 05e8e68bd49c..678db5a65ada 100644
> --- a/tools/perf/util/unwind-libdw.c
> +++ b/tools/perf/util/unwind-libdw.c
> @@ -273,7 +273,8 @@ static bool libdw_set_initial_registers(Dwfl_Thread *thread, void *arg)
> int dwarf_reg =
> get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
> e_flags,
> - /*only_libdw_supported=*/true);
> + /*only_libdw_supported=*/true,
> + user_regs->abi);
> if (dwarf_reg > max_dwarf_reg)
> max_dwarf_reg = dwarf_reg;
> }
> @@ -288,7 +289,8 @@ static bool libdw_set_initial_registers(Dwfl_Thread *thread, void *arg)
> int dwarf_reg =
> get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
> e_flags,
> - /*only_libdw_supported=*/true);
> + /*only_libdw_supported=*/true,
> + user_regs->abi);
> if (dwarf_reg >= 0) {
> val = 0;
> if (perf_reg_value(&val, user_regs, perf_reg) == 0)
> --
> 2.34.1
>