Re: [PATCH V3 3/8] cxl/mem: Trace General Media Event Record

From: Ira Weiny
Date: Sun Dec 11 2022 - 11:08:41 EST


On Fri, Dec 09, 2022 at 02:04:23PM -0800, Dan Williams wrote:
> ira.weiny@ wrote:
> > From: Ira Weiny <ira.weiny@xxxxxxxxx>
> >
> > CXL rev 3.0 section 8.2.9.2.1.1 defines the General Media Event Record.
> >
> > Determine if the event read is a general media record and if so trace
> > the record as a General Media Event Record.
> >
> > Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> >
> > ---
> > Changes from V2:
> > Dan
> > Remove trace_*_enabled() calls
> > Pass struct device to trace points
> >
> > Changes from V1:
> > Jonathan
> > fix spec references for CXL rev 3.0
> > Make flags all caps
> >
> > Changes from RFC v2:
> > Output DPA flags as a single field
> > Ensure names of fields match what TP_print outputs
> > Steven
> > prefix TRACE_EVENT with 'cxl_'
> > Jonathan
> > Remove Reserved field
> >
> > Changes from RFC:
> > Add reserved byte array
> > Use common CXL event header record macros
> > Jonathan
> > Use unaligned_le{24,16} for unaligned fields
> > Don't use the inverse of phy addr mask
> > Dave Jiang
> > s/cxl_gen_media_event/general_media
> > s/cxl_evt_gen_media/cxl_event_gen_media
> > ---
> > drivers/cxl/core/mbox.c | 30 +++++++++-
> > drivers/cxl/core/trace.h | 124 +++++++++++++++++++++++++++++++++++++++
> > drivers/cxl/cxlmem.h | 19 ++++++
> > 3 files changed, 171 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 2b25691a9b09..0d8c66f1cdc5 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -718,6 +718,32 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
> > }
> > EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
> >
> > +/*
> > + * General Media Event Record
> > + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
> > + */
> > +static const uuid_t gen_media_event_uuid =
> > + UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
> > + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
> > +
> > +static void cxl_trace_event_record(const struct device *dev,
> > + enum cxl_event_log_type type,
> > + struct cxl_event_record_raw *record)
> > +{
> > + uuid_t *id = &record->hdr.id;
> > +
> > + if (uuid_equal(id, &gen_media_event_uuid)) {
> > + struct cxl_event_gen_media *rec =
> > + (struct cxl_event_gen_media *)record;
> > +
> > + trace_cxl_general_media(dev, type, rec);
> > + return;
> > + }
> > +
> > + /* For unknown record types print just the header */
> > + trace_cxl_generic_event(dev, type, record);
> > +}
> > +
> > static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
> > enum cxl_event_log_type log,
> > struct cxl_get_event_payload *get_pl)
> > @@ -810,8 +836,8 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> > int i;
> >
> > for (i = 0; i < nr_rec; i++)
> > - trace_cxl_generic_event(cxlds->dev, type,
> > - &payload->records[i]);
> > + cxl_trace_event_record(cxlds->dev, type,
> > + &payload->records[i]);
> >
> > rc = cxl_clear_event_record(cxlds, type, payload);
> > if (rc) {
> > diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
> > index 24eef6909f13..82462942590b 100644
> > --- a/drivers/cxl/core/trace.h
> > +++ b/drivers/cxl/core/trace.h
> > @@ -223,6 +223,130 @@ TRACE_EVENT(cxl_generic_event,
> > __print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH))
> > );
> >
> > +/*
> > + * Physical Address field masks
> > + *
> > + * General Media Event Record
> > + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
> > + *
> > + * DRAM Event Record
> > + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
> > + */
> > +#define CXL_DPA_FLAGS_MASK 0x3F
> > +#define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK)
> > +
> > +#define CXL_DPA_VOLATILE BIT(0)
> > +#define CXL_DPA_NOT_REPAIRABLE BIT(1)
> > +#define show_dpa_flags(flags) __print_flags(flags, "|", \
> > + { CXL_DPA_VOLATILE, "VOLATILE" }, \
> > + { CXL_DPA_NOT_REPAIRABLE, "NOT_REPAIRABLE" } \
> > +)
> > +
> > +/*
> > + * General Media Event Record - GMER
> > + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
> > + */
> > +#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0)
> > +#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1)
> > +#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2)
> > +#define show_event_desc_flags(flags) __print_flags(flags, "|", \
> > + { CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, "UNCORRECTABLE_EVENT" }, \
> > + { CXL_GMER_EVT_DESC_THRESHOLD_EVENT, "THRESHOLD_EVENT" }, \
> > + { CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, "POISON_LIST_OVERFLOW" } \
> > +)
> > +
> > +#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00
> > +#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01
> > +#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02
> > +#define show_mem_event_type(type) __print_symbolic(type, \
> > + { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
> > + { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
> > + { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
> > +)
> > +
> > +#define CXL_GMER_TRANS_UNKNOWN 0x00
> > +#define CXL_GMER_TRANS_HOST_READ 0x01
> > +#define CXL_GMER_TRANS_HOST_WRITE 0x02
> > +#define CXL_GMER_TRANS_HOST_SCAN_MEDIA 0x03
> > +#define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04
> > +#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05
> > +#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06
> > +#define show_trans_type(type) __print_symbolic(type, \
> > + { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \
> > + { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \
> > + { CXL_GMER_TRANS_HOST_WRITE, "Host Write" }, \
> > + { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \
> > + { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \
> > + { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \
> > + { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \
> > +)
> > +
> > +#define CXL_GMER_VALID_CHANNEL BIT(0)
> > +#define CXL_GMER_VALID_RANK BIT(1)
> > +#define CXL_GMER_VALID_DEVICE BIT(2)
> > +#define CXL_GMER_VALID_COMPONENT BIT(3)
> > +#define show_valid_flags(flags) __print_flags(flags, "|", \
> > + { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
> > + { CXL_GMER_VALID_RANK, "RANK" }, \
> > + { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
> > + { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
> > +)
> > +
> > +TRACE_EVENT(cxl_general_media,
> > +
> > + TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
> > + struct cxl_event_gen_media *rec),
> > +
> > + TP_ARGS(dev, log, rec),
> > +
> > + TP_STRUCT__entry(
> > + CXL_EVT_TP_entry
> > + /* General Media */
> > + __field(u64, dpa)
> > + __field(u8, descriptor)
> > + __field(u8, type)
> > + __field(u8, transaction_type)
> > + __field(u8, channel)
> > + __field(u32, device)
> > + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
> > + __field(u16, validity_flags)
> > + /* Following are out of order to pack trace record */
> > + __field(u8, rank)
> > + __field(u8, dpa_flags)
> > + ),
> > +
> > + TP_fast_assign(
> > + CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
> > +
> > + /* General Media */
> > + __entry->dpa = le64_to_cpu(rec->phys_addr);
> > + __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
> > + /* Mask after flags have been parsed */
> > + __entry->dpa &= CXL_DPA_MASK;
> > + __entry->descriptor = rec->descriptor;
> > + __entry->type = rec->type;
> > + __entry->transaction_type = rec->transaction_type;
> > + __entry->channel = rec->channel;
> > + __entry->rank = rec->rank;
> > + __entry->device = get_unaligned_le24(rec->device);
> > + memcpy(__entry->comp_id, &rec->component_id,
> > + CXL_EVENT_GEN_MED_COMP_ID_SIZE);
> > + __entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
> > + ),
> > +
> > + CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
> > + "descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \
> > + "device=%x comp_id=%s validity_flags='%s'",
> > + __entry->dpa, show_dpa_flags(__entry->dpa_flags),
> > + show_event_desc_flags(__entry->descriptor),
> > + show_mem_event_type(__entry->type),
> > + show_trans_type(__entry->transaction_type),
> > + __entry->channel, __entry->rank, __entry->device,
> > + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
> > + show_valid_flags(__entry->validity_flags)
> > + )
> > +);
> > +
> > #endif /* _CXL_EVENTS_H */
> >
> > #define TRACE_INCLUDE_FILE trace
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > index 350cb460e7fc..a5f5d4a380af 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> > @@ -456,6 +456,25 @@ struct cxl_mbox_clear_event_payload {
> > (sizeof(__le16) * CXL_CLEAR_EVENT_MAX_HANDLES))) / \
> > sizeof(__le16))
> >
> > +/*
> > + * General Media Event Record
> > + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
> > + */
> > +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
> > +struct cxl_event_gen_media {
> > + struct cxl_event_record_hdr hdr;
> > + __le64 phys_addr;
> > + u8 descriptor;
> > + u8 type;
> > + u8 transaction_type;
> > + u8 validity_flags[2];
> > + u8 channel;
> > + u8 rank;
> > + u8 device[3];
> > + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
> > + u8 reserved[0x2e];
>
> If you reflow this one again to make capitalization of symbols
> consistent in the trace prints perhaps change that to decimal, but
> that's not a blocker.

Done.

>
> Reviewed-by: Dan Williams <dan.j.williams@xxxxxxxxx>

Thanks!
Ira