Re: [PATCH v3 09/12] x86/amd_nb: Move SMN access code to a new amd_node driver

From: Shyam Sundar S K
Date: Wed Jan 08 2025 - 00:30:45 EST




On 1/8/2025 03:58, Yazen Ghannam wrote:
> From: Mario Limonciello <mario.limonciello@xxxxxxx>
>
> SMN access was bolted into amd_nb mostly as convenience. This has
> limitations though that require incurring tech debt to keep it working.
>
> Move SMN access to the newly introduced AMD Node driver.
>

There are couple of nit-picks (see below), but aside from those, this
looks good to me.

Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@xxxxxxx> # for PMF and
PMC drivers

> Signed-off-by: Mario Limonciello <mario.limonciello@xxxxxxx>
> Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
> Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx>
> Acked-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx> # pdx86
> Link: https://lore.kernel.org/r/20241206161210.163701-11-yazen.ghannam@xxxxxxx
> ---
> MAINTAINERS | 1 +
> arch/x86/include/asm/amd_nb.h | 3 -
> arch/x86/include/asm/amd_node.h | 3 +
> arch/x86/kernel/amd_nb.c | 89 ---------------------------
> arch/x86/kernel/amd_node.c | 90 ++++++++++++++++++++++++++++
> arch/x86/pci/fixup.c | 4 +-
> drivers/edac/Kconfig | 1 +
> drivers/edac/amd64_edac.c | 1 +
> drivers/hwmon/Kconfig | 2 +-
> drivers/hwmon/k10temp.c | 2 +-
> drivers/platform/x86/amd/pmc/Kconfig | 2 +-
> drivers/platform/x86/amd/pmc/pmc.c | 3 +-
> drivers/platform/x86/amd/pmf/Kconfig | 2 +-
> drivers/platform/x86/amd/pmf/core.c | 2 +-
> drivers/ras/amd/atl/Kconfig | 1 +
> drivers/ras/amd/atl/internal.h | 1 +
> 16 files changed, 107 insertions(+), 100 deletions(-)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 290989ab9f72..27a5bc2fc49b 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1122,6 +1122,7 @@ S: Supported
> F: drivers/i2c/busses/i2c-amd-asf-plat.c
>
> AMD NODE DRIVER
> +M: Mario Limonciello <mario.limonciello@xxxxxxx>
> M: Yazen Ghannam <yazen.ghannam@xxxxxxx>
> L: linux-kernel@xxxxxxxxxxxxxxx
> S: Supported
> diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
> index 094c3be81a8d..5e0333534abc 100644
> --- a/arch/x86/include/asm/amd_nb.h
> +++ b/arch/x86/include/asm/amd_nb.h
> @@ -21,9 +21,6 @@ extern int amd_numa_init(void);
> extern int amd_get_subcaches(int);
> extern int amd_set_subcaches(int, unsigned long);
>
> -int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
> -int __must_check amd_smn_write(u16 node, u32 address, u32 value);
> -
> struct amd_l3_cache {
> unsigned indices;
> u8 subcaches[4];
> diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd_node.h
> index 419a0ad13ef2..113ad3e8ee40 100644
> --- a/arch/x86/include/asm/amd_node.h
> +++ b/arch/x86/include/asm/amd_node.h
> @@ -30,4 +30,7 @@ static inline u16 amd_num_nodes(void)
> return topology_amd_nodes_per_pkg() * topology_max_packages();
> }
>
> +int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
> +int __must_check amd_smn_write(u16 node, u32 address, u32 value);
> +
> #endif /*_ASM_X86_AMD_NODE_H_*/
> diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
> index e335d89ddad7..11fac09e3a8c 100644
> --- a/arch/x86/kernel/amd_nb.c
> +++ b/arch/x86/kernel/amd_nb.c
> @@ -15,9 +15,6 @@
> #include <linux/pci_ids.h>
> #include <asm/amd_nb.h>
>
> -/* Protect the PCI config register pairs used for SMN. */
> -static DEFINE_MUTEX(smn_mutex);
> -
> static u32 *flush_words;
>
> static const struct pci_device_id amd_nb_misc_ids[] = {
> @@ -59,92 +56,6 @@ struct amd_northbridge *node_to_amd_nb(int node)
> }
> EXPORT_SYMBOL_GPL(node_to_amd_nb);
>
> -/*
> - * SMN accesses may fail in ways that are difficult to detect here in the called
> - * functions amd_smn_read() and amd_smn_write(). Therefore, callers must do
> - * their own checking based on what behavior they expect.
> - *
> - * For SMN reads, the returned value may be zero if the register is Read-as-Zero.
> - * Or it may be a "PCI Error Response", e.g. all 0xFFs. The "PCI Error Response"
> - * can be checked here, and a proper error code can be returned.
> - *
> - * But the Read-as-Zero response cannot be verified here. A value of 0 may be
> - * correct in some cases, so callers must check that this correct is for the
> - * register/fields they need.
> - *
> - * For SMN writes, success can be determined through a "write and read back"
> - * However, this is not robust when done here.
> - *
> - * Possible issues:
> - *
> - * 1) Bits that are "Write-1-to-Clear". In this case, the read value should
> - * *not* match the write value.
> - *
> - * 2) Bits that are "Read-as-Zero"/"Writes-Ignored". This information cannot be
> - * known here.
> - *
> - * 3) Bits that are "Reserved / Set to 1". Ditto above.
> - *
> - * Callers of amd_smn_write() should do the "write and read back" check
> - * themselves, if needed.
> - *
> - * For #1, they can see if their target bits got cleared.
> - *
> - * For #2 and #3, they can check if their target bits got set as intended.
> - *
> - * This matches what is done for RDMSR/WRMSR. As long as there's no #GP, then
> - * the operation is considered a success, and the caller does their own
> - * checking.
> - */
> -static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
> -{
> - struct pci_dev *root;
> - int err = -ENODEV;
> -
> - if (node >= amd_northbridges.num)
> - goto out;
> -
> - root = node_to_amd_nb(node)->root;
> - if (!root)
> - goto out;
> -
> - mutex_lock(&smn_mutex);
> -
> - err = pci_write_config_dword(root, 0x60, address);
> - if (err) {
> - pr_warn("Error programming SMN address 0x%x.\n", address);
> - goto out_unlock;
> - }
> -
> - err = (write ? pci_write_config_dword(root, 0x64, *value)
> - : pci_read_config_dword(root, 0x64, value));
> -
> -out_unlock:
> - mutex_unlock(&smn_mutex);
> -
> -out:
> - return err;
> -}
> -
> -int __must_check amd_smn_read(u16 node, u32 address, u32 *value)
> -{
> - int err = __amd_smn_rw(node, address, value, false);
> -
> - if (PCI_POSSIBLE_ERROR(*value)) {
> - err = -ENODEV;
> - *value = 0;
> - }
> -
> - return err;
> -}
> -EXPORT_SYMBOL_GPL(amd_smn_read);
> -
> -int __must_check amd_smn_write(u16 node, u32 address, u32 value)
> -{
> - return __amd_smn_rw(node, address, &value, true);
> -}
> -EXPORT_SYMBOL_GPL(amd_smn_write);
> -
> static int amd_cache_northbridges(void)
> {
> struct amd_northbridge *nb;
> diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c
> index 4eea8c7d8090..95e5ca0acc90 100644
> --- a/arch/x86/kernel/amd_node.c
> +++ b/arch/x86/kernel/amd_node.c
> @@ -8,6 +8,7 @@
> * Author: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
> */
>
> +#include <asm/amd_nb.h>
> #include <asm/amd_node.h>
>
> /*
> @@ -88,3 +89,92 @@ struct pci_dev *amd_node_get_root(u16 node)
> pci_dbg(root, "is root for AMD node %u\n", node);
> return root;
> }
> +
> +/* Protect the PCI config register pairs used for SMN. */
> +static DEFINE_MUTEX(smn_mutex);
> +
> +/*
> + * SMN accesses may fail in ways that are difficult to detect here in the called
> + * functions amd_smn_read() and amd_smn_write(). Therefore, callers must do
> + * their own checking based on what behavior they expect.
> + *
> + * For SMN reads, the returned value may be zero if the register is Read-as-Zero.
> + * Or it may be a "PCI Error Response", e.g. all 0xFFs. The "PCI Error Response"
> + * can be checked here, and a proper error code can be returned.
> + *
> + * But the Read-as-Zero response cannot be verified here. A value of 0 may be
> + * correct in some cases, so callers must check that this correct is for the
> + * register/fields they need.
> + *
> + * For SMN writes, success can be determined through a "write and read back"
> + * However, this is not robust when done here.
> + *
> + * Possible issues:
> + *
> + * 1) Bits that are "Write-1-to-Clear". In this case, the read value should
> + * *not* match the write value.
> + *
> + * 2) Bits that are "Read-as-Zero"/"Writes-Ignored". This information cannot be
> + * known here.
> + *
> + * 3) Bits that are "Reserved / Set to 1". Ditto above.
> + *
> + * Callers of amd_smn_write() should do the "write and read back" check
> + * themselves, if needed.
> + *
> + * For #1, they can see if their target bits got cleared.
> + *
> + * For #2 and #3, they can check if their target bits got set as intended.
> + *
> + * This matches what is done for RDMSR/WRMSR. As long as there's no #GP, then
> + * the operation is considered a success, and the caller does their own
> + * checking.
> + */
> +static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
> +{
> + struct pci_dev *root;
> + int err = -ENODEV;
> +
> + if (node >= amd_nb_num())
> + goto out;
> +
> + root = node_to_amd_nb(node)->root;
> + if (!root)
> + goto out;
> +
> + mutex_lock(&smn_mutex);
> +
> + err = pci_write_config_dword(root, 0x60, address);
> + if (err) {
> + pr_warn("Error programming SMN address 0x%x.\n", address);
> + goto out_unlock;
> + }
> +
> + err = (write ? pci_write_config_dword(root, 0x64, *value)
> + : pci_read_config_dword(root, 0x64, value));
> +
> +out_unlock:
> + mutex_unlock(&smn_mutex);
> +
> +out:
> + return err;
> +}
> +
> +int __must_check amd_smn_read(u16 node, u32 address, u32 *value)
> +{
> + int err = __amd_smn_rw(node, address, value, false);
> +
> + if (PCI_POSSIBLE_ERROR(*value)) {
> + err = -ENODEV;
> + *value = 0;
> + }
> +
> + return err;
> +}
> +EXPORT_SYMBOL_GPL(amd_smn_read);
> +
> +int __must_check amd_smn_write(u16 node, u32 address, u32 value)
> +{
> + return __amd_smn_rw(node, address, &value, true);
> +}
> +EXPORT_SYMBOL_GPL(amd_smn_write);
> diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
> index 0681ecfe3430..592fb9d97e77 100644
> --- a/arch/x86/pci/fixup.c
> +++ b/arch/x86/pci/fixup.c
> @@ -9,7 +9,7 @@
> #include <linux/pci.h>
> #include <linux/suspend.h>
> #include <linux/vgaarb.h>
> -#include <asm/amd_nb.h>
> +#include <asm/amd_node.h>
> #include <asm/hpet.h>
> #include <asm/pci_x86.h>
>
> @@ -828,7 +828,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
>
> #endif
>
> -#ifdef CONFIG_AMD_NB
> +#ifdef CONFIG_AMD_NODE
>
> #define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008
> #define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 06f7b43a6f78..cb97d7bdae31 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -78,6 +78,7 @@ config EDAC_GHES
> config EDAC_AMD64
> tristate "AMD64 (Opteron, Athlon64)"
> depends on AMD_NB && EDAC_DECODE_MCE
> + depends on AMD_NODE
> imply AMD_ATL
> help
> Support for error detection and correction of DRAM ECC errors on
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index ddfbdb66b794..29465088639c 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -2,6 +2,7 @@
> #include <linux/ras.h>
> #include "amd64_edac.h"
> #include <asm/amd_nb.h>
> +#include <asm/amd_node.h>
>
> static struct edac_pci_ctl_info *pci_ctl;
>
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index dd376602f3f1..ea13ea482a63 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -324,7 +324,7 @@ config SENSORS_K8TEMP
>
> config SENSORS_K10TEMP
> tristate "AMD Family 10h+ temperature sensor"
> - depends on X86 && PCI && AMD_NB
> + depends on X86 && PCI && AMD_NODE
> help
> If you say yes here you get support for the temperature
> sensor(s) inside your CPU. Supported are later revisions of
> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
> index cefa8cd184c8..d0b4cc9a5011 100644
> --- a/drivers/hwmon/k10temp.c
> +++ b/drivers/hwmon/k10temp.c
> @@ -20,7 +20,7 @@
> #include <linux/module.h>
> #include <linux/pci.h>
> #include <linux/pci_ids.h>
> -#include <asm/amd_nb.h>
> +#include <asm/amd_node.h>
> #include <asm/processor.h>
>
> MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor");
> diff --git a/drivers/platform/x86/amd/pmc/Kconfig b/drivers/platform/x86/amd/pmc/Kconfig
> index 94f9563d8be7..eeffdafd686e 100644
> --- a/drivers/platform/x86/amd/pmc/Kconfig
> +++ b/drivers/platform/x86/amd/pmc/Kconfig
> @@ -5,7 +5,7 @@
>
> config AMD_PMC
> tristate "AMD SoC PMC driver"
> - depends on ACPI && PCI && RTC_CLASS && AMD_NB
> + depends on ACPI && PCI && RTC_CLASS && AMD_NODE
> depends on SUSPEND
> select SERIO
> help
> diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
> index 26b878ee5191..941b7753dd78 100644
> --- a/drivers/platform/x86/amd/pmc/pmc.c
> +++ b/drivers/platform/x86/amd/pmc/pmc.c
> @@ -10,7 +10,6 @@
>
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>
> -#include <asm/amd_nb.h>
> #include <linux/acpi.h>
> #include <linux/bitfield.h>
> #include <linux/bits.h>
> @@ -28,6 +27,8 @@
> #include <linux/seq_file.h>
> #include <linux/uaccess.h>
>
> +#include <asm/amd_node.h>
> +
> #include "pmc.h"
>
> /* SMU communication registers */
> diff --git a/drivers/platform/x86/amd/pmf/Kconfig b/drivers/platform/x86/amd/pmf/Kconfig
> index 99d67cdbd91e..25b8f7ae3abd 100644
> --- a/drivers/platform/x86/amd/pmf/Kconfig
> +++ b/drivers/platform/x86/amd/pmf/Kconfig
> @@ -7,7 +7,7 @@ config AMD_PMF
> tristate "AMD Platform Management Framework"
> depends on ACPI && PCI
> depends on POWER_SUPPLY
> - depends on AMD_NB
> + depends on AMD_NODE
> select ACPI_PLATFORM_PROFILE
> depends on TEE && AMDTEE
> depends on AMD_SFH_HID
> diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
> index 06a97c533cb8..7f88f3121cf5 100644
> --- a/drivers/platform/x86/amd/pmf/core.c
> +++ b/drivers/platform/x86/amd/pmf/core.c
> @@ -8,13 +8,13 @@
> * Author: Shyam Sundar S K <Shyam-sundar.S-k@xxxxxxx>
> */
>
> -#include <asm/amd_nb.h>
> #include <linux/debugfs.h>
> #include <linux/iopoll.h>
> #include <linux/module.h>
> #include <linux/pci.h>
> #include <linux/platform_device.h>
> #include <linux/power_supply.h>
> +#include <asm/amd_node.h>

You can adjust the first header inclusion to maintain alphabetical order.

> #include "pmf.h"
>
> /* PMF-SMU communication registers */
> diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig
> index 551680073e43..6e03942cd7da 100644
> --- a/drivers/ras/amd/atl/Kconfig
> +++ b/drivers/ras/amd/atl/Kconfig
> @@ -10,6 +10,7 @@
> config AMD_ATL
> tristate "AMD Address Translation Library"
> depends on AMD_NB && X86_64 && RAS
> + depends on AMD_NODE

the above "depends on" can be updated to:

depends on AMD_NODE && X86_64 && RAS

instead of new "depends on AMD_NODE"

Thanks,
Shyam

> depends on MEMORY_FAILURE
> default N
> help
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 143d04c779a8..f9be26d25348 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -18,6 +18,7 @@
> #include <linux/ras.h>
>
> #include <asm/amd_nb.h>
> +#include <asm/amd_node.h>
>
> #include "reg_fields.h"
>