RE: [PATCH v4 3/6] cxl/events: Update General Media Event Record to CXL spec rev 3.1

From: Shiju Jose
Date: Wed Nov 27 2024 - 05:12:25 EST


>-----Original Message-----
>From: Steven Rostedt <rostedt@xxxxxxxxxxx>
>Sent: 26 November 2024 17:03
>To: Shiju Jose <shiju.jose@xxxxxxxxxx>
>Cc: dave.jiang@xxxxxxxxx; dan.j.williams@xxxxxxxxx; Jonathan Cameron
><jonathan.cameron@xxxxxxxxxx>; alison.schofield@xxxxxxxxx;
>nifan.cxl@xxxxxxxxx; vishal.l.verma@xxxxxxxxx; ira.weiny@xxxxxxxxx;
>dave@xxxxxxxxxxxx; linux-cxl@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx;
>Linuxarm <linuxarm@xxxxxxxxxx>; tanxiaofei <tanxiaofei@xxxxxxxxxx>;
>Zengtao (B) <prime.zeng@xxxxxxxxxxxxx>
>Subject: Re: [PATCH v4 3/6] cxl/events: Update General Media Event Record to
>CXL spec rev 3.1
>
>On Tue, 26 Nov 2024 11:51:23 +0000
>Shiju Jose <shiju.jose@xxxxxxxxxx> wrote:
>
>> We are encountering a parsing error ("FAILED TO PARSE") from
>> libtraceevent when it tries to parse some of the CXL trace events for the user-
>space tool rasdaemon.
>> This issue appeared after new fields were added to the trace events.
>> It was found that the issue does not occur when all or some of the
>> decoded strings for the event's data and flags are removed from the
>> TP_printk() function in the kernel, and only the values are printed instead.
>> https://elixir.bootlin.com/linux/v6.12/source/drivers/cxl/core/trace.h
>> https://lore.kernel.org/lkml/20241120093745.1847-1-shiju.jose@huawei.c
>> om/
>>
>> Below is the information from the debugging in libtraceevent:
>> The failure occurs in the following functions and locations within libtraceevent:
>> File: src/event-parse.c
>> Function: event_read_format()
>> ret = event_read_fields(event->tep, event, &event->format.fields); if (ret < 0)
>> return ret;
>>
>> Function: event_read_fields()
>> if (test_type_token(type, token, TEP_EVENT_ITEM, "field"))
>> goto fail;
>>
>> Can you recognize if there are any limitations or issues that would
>> prevent libtraceevent from parsing the trace event in the condition described
>above?
>
>Can you show me the output of the format files for the affected trace events:
>
> # cat /sys/kernel/tracing/cxl/<affected_event>/format
>
>You can attach it too if your email does whitespace mangling.

Hi Steve,

Please find attached, output of format file for the CXL general media trace event.

>
>Thanks,
>
>-- Steve
Thanks,
Shiju
root@localhost:~# cat /sys/kernel/debug/tracing/events/cxl/cxl_general_media/format
name: cxl_general_media
ID: 1464
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;

field:__data_loc char[] memdev; offset:8; size:4; signed:0;
field:__data_loc char[] host; offset:12; size:4; signed:0;
field:int log; offset:16; size:4; signed:1;
field:uuid_t hdr_uuid; offset:20; size:16; signed:0;
field:u64 serial; offset:40; size:8; signed:0;
field:u32 hdr_flags; offset:48; size:4; signed:0;
field:u16 hdr_handle; offset:52; size:2; signed:0;
field:u16 hdr_related_handle; offset:54; size:2; signed:0;
field:u64 hdr_timestamp; offset:56; size:8; signed:0;
field:u8 hdr_length; offset:64; size:1; signed:0;
field:u8 hdr_maint_op_class; offset:65; size:1; signed:0;
field:u8 hdr_maint_op_sub_class; offset:66; size:1; signed:0;
field:u64 dpa; offset:72; size:8; signed:0;
field:u8 descriptor; offset:80; size:1; signed:0;
field:u8 type; offset:81; size:1; signed:0;
field:u8 transaction_type; offset:82; size:1; signed:0;
field:u8 channel; offset:83; size:1; signed:0;
field:u32 device; offset:84; size:4; signed:0;
field:u8 comp_id[16]; offset:88; size:16; signed:0;
field:u64 hpa; offset:104; size:8; signed:0;
field:uuid_t region_uuid; offset:112; size:16; signed:0;
field:u16 validity_flags; offset:128; size:2; signed:0;
field:u8 rank; offset:130; size:1; signed:0;
field:u8 dpa_flags; offset:131; size:1; signed:0;
field:__data_loc char[] region_name; offset:132; size:4; signed:0;
field:u8 sub_type; offset:136; size:1; signed:0;
field:u8 cme_threshold_ev_flags; offset:137; size:1; signed:0;
field:u32 cme_count; offset:140; size:4; signed:0;

print fmt: "memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb len=%d flags='%s' handle=%x related_handle=%x maint_op_class=%u maint_op_sub_class=%u : dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u device=%x validity_flags='%s' comp_id=%shpa=%llx region=%s region_uuid=%pUb sub_type=%u cme_threshold_ev_flags=%u cme_count=%u", __get_str(memdev), __get_str(host), REC->serial, __print_symbolic(REC->log, { CXL_EVENT_TYPE_INFO, "Informational" }, { CXL_EVENT_TYPE_WARN, "Warning" }, { CXL_EVENT_TYPE_FAIL, "Failure" }, { CXL_EVENT_TYPE_FATAL, "Fatal" }), REC->hdr_timestamp, &REC->hdr_uuid, REC->hdr_length, __print_flags(REC->hdr_flags, " | ", { ((((1UL))) << (2)), "PERMANENT_CONDITION" }, { ((((1UL))) << (3)), "MAINTENANCE_NEEDED" }, { ((((1UL))) << (4)), "PERFORMANCE_DEGRADED" }, { ((((1UL))) << (5)), "HARDWARE_REPLACEMENT_NEEDED" }, { ((((1UL))) << (6)), "MAINT_OP_SUB_CLASS_VALID" } ), REC->hdr_handle, REC->hdr_related_handle, REC->hdr_maint_op_class, REC->hdr_maint_op_sub_class, REC->dpa, __print_flags(REC->dpa_flags, "|", { ((((1UL))) << (0)), "VOLATILE" }, { ((((1UL))) << (1)), "NOT_REPAIRABLE" } ), __print_flags(REC->descriptor, "|", { ((((1UL))) << (0)), "UNCORRECTABLE_EVENT" }, { ((((1UL))) << (1)), "THRESHOLD_EVENT" }, { ((((1UL))) << (2)), "POISON_LIST_OVERFLOW" } ), __print_symbolic(REC->type, { 0x00, "ECC Error" }, { 0x01, "Invalid Address" }, { 0x02, "Data Path Error" }, { 0x03, "TE State Violation" }, { 0x04, "Scrub Media ECC Error" }, { 0x05, "Adv Prog CME Counter Expiration" }, { 0x06, "CKID Violation" } ), __print_symbolic(REC->transaction_type, { 0x00, "Unknown" }, { 0x01, "Host Read" }, { 0x02, "Host Write" }, { 0x03, "Host Scan Media" }, { 0x04, "Host Inject Poison" }, { 0x05, "Internal Media Scrub" }, { 0x06, "Internal Media Management" }, { 0x07, "Internal Media Error Check Scrub" }, { 0x08, "Media Initialization" } ), REC->channel, REC->rank, REC->device, __print_flags(REC->validity_flags, "|", { ((((1UL))) << (0)), "CHANNEL" }, { ((((1UL))) << (1)), "RANK" }, { ((((1UL))) << (2)), "DEVICE" }, { ((((1UL))) << (3)), "COMPONENT" }, { ((((1UL))) << (4)), "COMPONENT PLDM FORMAT" } ), __print_hex(REC->comp_id, 0x10), REC->hpa, __get_str(region_name), &REC->region_uuid, REC->sub_type, REC->cme_threshold_ev_flags, REC->cme_count
root@localhost:~#