[PATCH v4 4/8] x86/amd_nb: Enhance SMN access error checking

From: Yazen Ghannam
Date: Thu Jun 06 2024 - 12:17:47 EST


AMD Zen-based systems use a System Management Network (SMN) that
provides access to implementation-specific registers.

SMN accesses are done indirectly through an index/data pair in PCI
config space. The accesses can fail for a variety of reasons.

Include code comments to describe some possible scenarios.

Require error checking for callers of amd_smn_read() and amd_smn_write().
This is needed because many error conditions cannot be checked by these
functions.

Also, drop the extern keyword as it's not needed. And remove a warning
that will not be triggered in many cases.

Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
Reviewed-by: Mario Limonciello <mario.limonciello@xxxxxxx>
---
arch/x86/include/asm/amd_nb.h | 4 ++--
arch/x86/kernel/amd_nb.c | 39 ++++++++++++++++++++++++++++++++++-----
2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 5c37944c8a5e..6f3b6aef47ba 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -21,8 +21,8 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, unsigned long);

-extern int amd_smn_read(u16 node, u32 address, u32 *value);
-extern int amd_smn_write(u16 node, u32 address, u32 value);
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
+int __must_check amd_smn_write(u16 node, u32 address, u32 value);

struct amd_l3_cache {
unsigned indices;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 027a8c7a2c9e..0616a7727898 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -180,6 +180,38 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev,
return dev;
}

+/*
+ * SMN accesses may fail in ways that are difficult to detect here in the called
+ * functions smn_read() and smn_write(). Therefore, callers of these functions
+ * must do their own checking based on what behavior they expect.
+ *
+ * For SMN reads, the returned SMN value may be zero if the register is
+ * Read-as-Zero . Or it may be a "PCI Error Response", e.g. all 0xFFs. The "PCI
+ * Error Response" can be checked here, and a proper error code can be returned.
+ * But the Read-as-Zero response cannot be verified here. A value of 0 may be
+ * correct in some cases, so callers must check that this correct is for the
+ * register/fields they need.
+ *
+ * For SMN writes, success can be determined through a "write and read back"
+ * procedure. However, this is not robust when done here.
+ *
+ * Possible issues:
+ * 1) Bits that are "Write-1-to-Clear". In this case, the read value should
+ * *not* match the write value.
+ * 2) Bits that are "Read-as-Zero"/"Writes-Ignored". This information cannot be
+ * known here.
+ * 3) Bits that are "Reserved / Set to 1". Ditto above.
+ *
+ * Callers of amd_smn_write() should do the "write and read back" check themselves,
+ * if needed.
+ *
+ * For #1, they can see if their target bits got cleared.
+ *
+ * For #2 and #3, they can check if their target bits got set as intended.
+ *
+ * This matches what is done for rdmsr/wrmsr. As long as there's no #GP, then
+ * the operation is considered a success, and the caller does their own checking.
+ */
static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
{
struct pci_dev *root;
@@ -202,9 +234,6 @@ static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)

err = (write ? pci_write_config_dword(root, 0x64, *value)
: pci_read_config_dword(root, 0x64, value));
- if (err)
- pr_warn("Error %s SMN address 0x%x.\n",
- (write ? "writing to" : "reading from"), address);

out_unlock:
mutex_unlock(&smn_mutex);
@@ -213,7 +242,7 @@ static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
return err;
}

-int amd_smn_read(u16 node, u32 address, u32 *value)
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value)
{
int err = __amd_smn_rw(node, address, value, false);

@@ -226,7 +255,7 @@ int amd_smn_read(u16 node, u32 address, u32 *value)
}
EXPORT_SYMBOL_GPL(amd_smn_read);

-int amd_smn_write(u16 node, u32 address, u32 value)
+int __must_check amd_smn_write(u16 node, u32 address, u32 value)
{
return __amd_smn_rw(node, address, &value, true);
}

--
2.34.1