Re: [PATCH 1/1] cxl/acpi.c: Add buggy BIOS hint for CXL ACPI lookup failure

From: Dan Williams
Date: Mon Apr 08 2024 - 16:45:42 EST


PJ Waskiewicz wrote:
[..]
> > Other than that seems reasonable to hint it is probably a bios
> > bug - however I wonder how many other cases we should do this for and
> > whether it is worth the effort of marking them all?
>
> I can confirm this was definitely a BIOS bug in this particular case.
> The vendor spun a quick test BIOS for us to test on an EMR and SPR host,
> and the _UID's were finally correct. I could successfully walk the CEDT
> and get to the CAPS structs I was after (link speed, bus width, etc.).

Oh, in that case I think there's no need to worry about a Linux quirk.

I do think cxl_acpi has multiple overlapping failure cases when what you
really want to know is whether:

"CXL host bridge can be found in CEDT.CHBS"

Turns out that straightforward message is aleady a driver message, but
it gets skipped in this case. So, I am thinking of cleanup /
clarification along the following lines:

1/ Lean on the existing cxl_get_chbs() validation paths to report on
errors

2/ Include the device-name rather than the UID since if UID is
unreliable it does not help you communicate with your BIOS vendor. I.e.
give a breadcrumb for the BIOS engineer to match the AML device name
with the CEDT content.

3/ Do not fail driver load on a single host-bridge parsing failure

4/ These are all cxl_acpi driver init events, so consistently use the
ACPI0017 device, and the cxl_acpi driver, as the originator of the error
message.

Would this clarification have saved you time with the debug?

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 32091379a97b..5a70d7312c64 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -511,29 +511,26 @@ static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg,
return 0;
}

-static int cxl_get_chbs(struct device *dev, struct acpi_device *hb,
- struct cxl_chbs_context *ctx)
+static void cxl_get_chbs(struct device *dev, struct acpi_device *hb,
+ struct cxl_chbs_context *ctx)
{
- unsigned long long uid;
int rc;

- rc = acpi_evaluate_integer(hb->handle, METHOD_NAME__UID, NULL, &uid);
- if (rc != AE_OK) {
- dev_err(dev, "unable to retrieve _UID\n");
- return -ENOENT;
- }
-
- dev_dbg(dev, "UID found: %lld\n", uid);
- *ctx = (struct cxl_chbs_context) {
+ *ctx = (struct cxl_chbs_context){
.dev = dev,
- .uid = uid,
.base = CXL_RESOURCE_NONE,
.cxl_version = UINT_MAX,
};

- acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbs_iter, ctx);
+ rc = acpi_evaluate_integer(hb->handle, METHOD_NAME__UID, NULL,
+ &ctx->uid);
+ if (rc != AE_OK) {
+ dev_dbg(dev, "unable to retrieve _UID\n");
+ return;
+ }

- return 0;
+ dev_dbg(dev, "UID found: %lld\n", ctx->uid);
+ acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbs_iter, ctx);
}

static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
@@ -561,7 +558,6 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
static int add_host_bridge_dport(struct device *match, void *arg)
{
int ret;
- acpi_status rc;
struct device *bridge;
struct cxl_dport *dport;
struct cxl_chbs_context ctx;
@@ -573,19 +569,16 @@ static int add_host_bridge_dport(struct device *match, void *arg)
if (!hb)
return 0;

- rc = cxl_get_chbs(match, hb, &ctx);
- if (rc)
- return rc;
-
+ cxl_get_chbs(match, hb, &ctx);
if (ctx.cxl_version == UINT_MAX) {
- dev_warn(match, "No CHBS found for Host Bridge (UID %lld)\n",
- ctx.uid);
+ dev_err(host, FW_BUG "No CHBS found for Host Bridge (%s)\n",
+ dev_name(match));
return 0;
}

if (ctx.base == CXL_RESOURCE_NONE) {
- dev_warn(match, "CHBS invalid for Host Bridge (UID %lld)\n",
- ctx.uid);
+ dev_err(host, FW_BUG "CHBS invalid for Host Bridge (%s)\n",
+ dev_name(match));
return 0;
}

@@ -650,13 +643,11 @@ static int add_host_bridge_uport(struct device *match, void *arg)
return 0;
}

- rc = cxl_get_chbs(match, hb, &ctx);
- if (rc)
- return rc;
-
+ cxl_get_chbs(match, hb, &ctx);
if (ctx.cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11) {
- dev_warn(bridge,
- "CXL CHBS version mismatch, skip port registration\n");
+ dev_err(host,
+ FW_BUG "CXL CHBS version mismatch, skip port registration for %s\n",
+ dev_name(match));
return 0;
}