Re: [Patch v5 07/19] perf: Add sampling support for SIMD registers
From: Mi, Dapeng
Date: Mon Dec 08 2025 - 00:25:30 EST
On 12/5/2025 7:07 PM, Peter Zijlstra wrote:
> On Wed, Dec 03, 2025 at 02:54:48PM +0800, Dapeng Mi wrote:
>
>> @@ -545,6 +547,25 @@ struct perf_event_attr {
>> __u64 sig_data;
>>
>> __u64 config3; /* extension of config2 */
>> +
>> +
>> + /*
>> + * Defines set of SIMD registers to dump on samples.
>> + * The sample_simd_regs_enabled !=0 implies the
>> + * set of SIMD registers is used to config all SIMD registers.
>> + * If !sample_simd_regs_enabled, sample_regs_XXX may be used to
>> + * config some SIMD registers on X86.
>> + */
>> + union {
>> + __u16 sample_simd_regs_enabled;
>> + __u16 sample_simd_pred_reg_qwords;
>> + };
>> + __u32 sample_simd_pred_reg_intr;
>> + __u32 sample_simd_pred_reg_user;
>> + __u16 sample_simd_vec_reg_qwords;
>> + __u64 sample_simd_vec_reg_intr;
>> + __u64 sample_simd_vec_reg_user;
>> + __u32 __reserved_4;
>> };
> This is poorly aligned and causes holes.
>
> This:
>
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index d292f96bc06f..2deb8dd0ca37 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -545,6 +545,14 @@ struct perf_event_attr {
> __u64 sig_data;
>
> __u64 config3; /* extension of config2 */
> +
> + __u16 sample_simd_pred_reg_qwords;
> + __u32 sample_simd_pred_reg_intr;
> + __u32 sample_simd_pred_reg_user;
> + __u16 sample_simd_vec_reg_qwords;
> + __u64 sample_simd_vec_reg_intr;
> + __u64 sample_simd_vec_reg_user;
> + __u32 __reserved_4;
> };
>
> /*
>
> results in:
>
> __u64 config3; /* 128 8 */
> __u16 sample_simd_pred_reg_qwords; /* 136 2 */
>
> /* XXX 2 bytes hole, try to pack */
>
> __u32 sample_simd_pred_reg_intr; /* 140 4 */
> __u32 sample_simd_pred_reg_user; /* 144 4 */
> __u16 sample_simd_vec_reg_qwords; /* 148 2 */
>
> /* XXX 2 bytes hole, try to pack */
>
> __u64 sample_simd_vec_reg_intr; /* 152 8 */
> __u64 sample_simd_vec_reg_user; /* 160 8 */
> __u32 __reserved_4; /* 168 4 */
>
>
>
> A better layout might be:
>
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index d292f96bc06f..f72707e9df68 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -545,6 +545,15 @@ struct perf_event_attr {
> __u64 sig_data;
>
> __u64 config3; /* extension of config2 */
> +
> + __u16 sample_simd_pred_reg_qwords;
> + __u16 sample_simd_vec_reg_qwords;
> + __u32 __reserved_4;
> +
> + __u32 sample_simd_pred_reg_intr;
> + __u32 sample_simd_pred_reg_user;
> + __u64 sample_simd_vec_reg_intr;
> + __u64 sample_simd_vec_reg_user;
> };
>
> /*
>
> such that:
>
> __u64 config3; /* 128 8 */
> __u16 sample_simd_pred_reg_qwords; /* 136 2 */
> __u16 sample_simd_vec_reg_qwords; /* 138 2 */
> __u32 __reserved_4; /* 140 4 */
> __u32 sample_simd_pred_reg_intr; /* 144 4 */
> __u32 sample_simd_pred_reg_user; /* 148 4 */
> __u64 sample_simd_vec_reg_intr; /* 152 8 */
> __u64 sample_simd_vec_reg_user; /* 160 8 */
>
Sure. Thanks.
>