Re: [RFC 08/10] platform/x86/intel/ifs: Add IFS sysfs interface
From: Williams, Dan J
Date: Thu Mar 03 2022 - 19:31:51 EST
On Tue, 2022-03-01 at 11:54 -0800, Jithu Joseph wrote:
> Implement sysfs interface to trigger ifs test for a targeted core or
> all cores. For all core testing, the kernel will start testing from core 0
> and proceed to the next core one after another. After the ifs test on the
> last core, the test stops until the administrator starts another round of
> tests. A "targeted core" test runs a single ifs on a single core. The
> kernel will only test the target core.
>
> The basic usage is as below.
>
> 1. For all cores testing:
> echo 1 > /sys/devices/system/cpu/ifs/run_test
> cat /sys/devices/system/cpu/ifs/status
>
> 2. For "targeted core" testing:
> To start test, for example, cpu0:
> echo 1 > /sys/devices/system/cpu/cpu#/ifs/run_test
> cat /sys/devices/system/cpu/cpu#/ifs/status
>
> 3. For reloading IFS image: (e.g, when new IFS image is released)
> - copy the new image to /lib/firmware/intel/ifs/
> - rename it as {family/model/stepping}.{testname}
> - echo 1 > /sys/devices/system/cpu/ifs/reload
>
> This module accepts two tunable parameters. Defaults could be overridden
> by passing appropriate values during load time. The parameters are as
> described below.
>
> 1. noint: When set, system interrupts are not allowed to interrupt a scan.
> 2. retry: Maximum retry counter when the test is not executed due to an
> event such as interrupt.
>
> Originally-by: Kyung Min Park <kyung.min.park@xxxxxxxxx>
> Signed-off-by: Jithu Joseph <jithu.joseph@xxxxxxxxx>
> Reviewed-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
> drivers/platform/x86/intel/ifs/Makefile | 2 +-
> drivers/platform/x86/intel/ifs/core.c | 8 +
> drivers/platform/x86/intel/ifs/ifs.h | 4 +
> drivers/platform/x86/intel/ifs/sysfs.c | 394 ++++++++++++++++++++++++
> 4 files changed, 407 insertions(+), 1 deletion(-)
> create mode 100644 drivers/platform/x86/intel/ifs/sysfs.c
>
> diff --git a/drivers/platform/x86/intel/ifs/Makefile b/drivers/platform/x86/intel/ifs/Makefile
> index 105b377de410..a2e05bf78c3e 100644
> --- a/drivers/platform/x86/intel/ifs/Makefile
> +++ b/drivers/platform/x86/intel/ifs/Makefile
> @@ -4,4 +4,4 @@
>
> obj-$(CONFIG_INTEL_IFS) += intel_ifs.o
>
> -intel_ifs-objs := core.o load.o
> +intel_ifs-objs := core.o load.o sysfs.o
> diff --git a/drivers/platform/x86/intel/ifs/core.c b/drivers/platform/x86/intel/ifs/core.c
> index 6747b523587a..c9ca385082e9 100644
> --- a/drivers/platform/x86/intel/ifs/core.c
> +++ b/drivers/platform/x86/intel/ifs/core.c
> @@ -283,11 +283,16 @@ static void ifs_first_time(unsigned int cpu)
>
> static int ifs_online_cpu(unsigned int cpu)
> {
> + int ret;
> +
> /* If the CPU is coming online for the first time*/
> if (per_cpu(ifs_state, cpu).first_time == 0)
> ifs_first_time(cpu);
>
> cpumask_clear_cpu(cpu, &(per_cpu(ifs_state, cpu).mask));
> + ret = ifs_sysfs_create(cpu);
> + if (ret)
> + return ret;
>
> per_cpu(ifs_state, cpu).scan_task = kthread_create_on_node(scan_test_worker, (void *)&cpu,
> cpu_to_node(cpu), "ifsCpu/%u",
> @@ -311,6 +316,7 @@ static int ifs_offline_cpu(unsigned int cpu)
>
> if (thread)
> kthread_stop(thread);
> + ifs_sysfs_remove(cpu);
>
> return 0;
> }
> @@ -336,6 +342,7 @@ static int __init ifs_init(void)
> return ret;
> }
>
> + cpu_ifs_init();
> init_completion(&test_thread_done);
> ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/ifs:online",
> ifs_online_cpu, ifs_offline_cpu);
> @@ -361,6 +368,7 @@ static void __exit ifs_exit(void)
> if (thread)
> kthread_stop(thread);
> }
> + cpu_ifs_exit();
> cpus_read_unlock();
> cpuhp_remove_state(cpuhp_scan_state);
>
> diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h
> index fcbbb49faa19..4442ccd626c6 100644
> --- a/drivers/platform/x86/intel/ifs/ifs.h
> +++ b/drivers/platform/x86/intel/ifs/ifs.h
> @@ -143,6 +143,10 @@ struct ifs_state {
> DECLARE_PER_CPU(struct ifs_state, ifs_state);
>
> int load_ifs_binary(void);
> +void cpu_ifs_init(void);
> +void cpu_ifs_exit(void);
> +int ifs_sysfs_create(unsigned int cpu);
> +void ifs_sysfs_remove(unsigned int cpu);
> extern struct ifs_params ifs_params;
> extern atomic_t siblings_in;
> extern atomic_t siblings_out;
> diff --git a/drivers/platform/x86/intel/ifs/sysfs.c b/drivers/platform/x86/intel/ifs/sysfs.c
> new file mode 100644
> index 000000000000..f441968de642
> --- /dev/null
> +++ b/drivers/platform/x86/intel/ifs/sysfs.c
> @@ -0,0 +1,394 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2021 Intel Corporation.
> + *
> + * Author: Jithu Joseph <jithu.joseph@xxxxxxxxx>
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/delay.h>
> +#include <linux/fs.h>
> +#include <linux/semaphore.h>
> +
> +#include "ifs.h"
> +
> +static DEFINE_SEMAPHORE(ifs_sem);
> +static int core_delay = 1;
> +static bool ifs_disabled;
> +
> +/*
> + * Initiate per core test. It wakes up all sibling threads that belongs to the
> + * target cpu. Once all sibling threads wake up, the scan test gets executed and
> + * wait for all sibling threads to finish the scan test.
> + */
> +static void do_core_test(int cpu)
> +{
> + int sibling;
> +
> + reinit_completion(&test_thread_done);
> + atomic_set(&siblings_in, 0);
> + atomic_set(&siblings_out, 0);
> +
> + cpu_sibl_ct = cpumask_weight(topology_sibling_cpumask(cpu));
> +
> + for_each_cpu(sibling, topology_sibling_cpumask(cpu))
> + cpumask_set_cpu(sibling, &per_cpu(ifs_state, sibling).mask);
> +
> + for_each_cpu(sibling, topology_sibling_cpumask(cpu))
> + wake_up_interruptible(&per_cpu(ifs_state, sibling).scan_wq);
> +
> + if (wait_for_completion_timeout(&test_thread_done, HZ) == 0) {
> + pr_err("Core locked up during IFS test? IFS disabled\n");
> + ifs_disabled = true;
> + }
> +}
> +
> +/*
> + * The sysfs interface to check the test status:
> + * To check the result, for example, cpu0
> + * cat /sys/devices/system/cpu/cpu0/ifs/details
> + */
> +static ssize_t details_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + unsigned int cpu = dev->id;
> + int ret;
> +
> + if (down_trylock(&ifs_sem))
> + return -EBUSY;
What is the ifs_sem protecting? This result is immediately invalid
after the lock is dropped anyway, so why hold it over reading the
value? You can't prevent 2 threads racing each other here.
> +
> + ret = sprintf(buf, "%llx\n", per_cpu(ifs_state, cpu).scan_details);
Should be sysfs_emit() which includes the page buffer safety.
Also, you likely want that format string to be %#llx so that userspace
knows explicitly that this is a hexadecimal value.
> + up(&ifs_sem);
> +
> + return ret;
> +}
> +
> +static DEVICE_ATTR_RO(details);
> +
> +/*
> + * The sysfs interface to check the test status:
> + * To check the status, for example, cpu0
> + * cat /sys/devices/system/cpu/cpu0/ifs/status
> + */
> +static ssize_t status_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + unsigned int cpu = dev->id;
> + u32 scan_result;
> + int ret;
> +
> + if (down_trylock(&ifs_sem))
> + return -EBUSY;
> +
> + scan_result = per_cpu(ifs_state, cpu).status;
> +
> + if (scan_result == SCAN_TEST_FAIL)
> + ret = sprintf(buf, "fail\n");
> + else if (scan_result == SCAN_NOT_TESTED)
> + ret = sprintf(buf, "untested\n");
> + else
> + ret = sprintf(buf, "pass\n");
sysfs_emit() for all of the above.
> +
> + up(&ifs_sem);
> +
> + return ret;
> +}
> +
> +static DEVICE_ATTR_RO(status);
> +
> +/*
> + * The sysfs interface for single core testing
> + * To start test, for example, cpu0
> + * echo 1 > /sys/devices/system/cpu/cpu0/ifs/run_test
> + * To check the result:
> + * cat /sys/devices/system/cpu/cpu0/ifs/result
Just have a CPU mask as an input parameter and avoid needing to hang
ifs sysfs attributes underneath /sys/device/system/cpu/ifs.
> + * The sibling core gets tested at the same time.
> + */
> +static ssize_t run_test_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + unsigned int cpu = dev->id;
> + bool var;
> + int rc;
> +
> + if (ifs_disabled)
> + return -ENXIO;
> +
> + rc = kstrtobool(buf, &var);
> + if (rc < 0 || var != 1)
> + return -EINVAL;
> +
> + if (down_trylock(&ifs_sem)) {
> + pr_info("another instance in progress.\n");
> + return -EBUSY;
> + }
> + cpu_hotplug_disable();
> + do_core_test(cpu);
> + cpu_hotplug_enable();
> + up(&ifs_sem);
> +
> + return count;
> +}
> +
> +static DEVICE_ATTR_WO(run_test);
> +
> +/* per-cpu scan sysfs attributes */
> +static struct attribute *ifs_attrs[] = {
> + &dev_attr_run_test.attr,
> + &dev_attr_status.attr,
> + &dev_attr_details.attr,
> + NULL
> +};
> +
> +const struct attribute_group ifs_attr_group = {
> + .attrs = ifs_attrs,
> + .name = "ifs",
> +};
> +
> +/* Creates the sysfs files under /sys/devices/system/cpu/cpuX/ifs */
> +int ifs_sysfs_create(unsigned int cpu)
> +{
> + struct device *dev;
> + int ret;
> +
> + dev = get_cpu_device(cpu);
get_cpu_device() neither takes a reference nor does it guarantee that
the cpu device stays registered. So this looks broken.
> + ret = sysfs_create_group(&dev->kobj, &ifs_attr_group);
Dynamic creation of sysfs attributes sometime after the driver loads is
not friendly for something that likely only needs the module loaded for
a test run and then unloaded again. I think this effort would be better
served by building a sysfs topology registered underneath an ifs
platform device.
The ifs platform device attributes can be defined statically such that
when the KOBJ_ADD event fires for the ifs platform device the sysfs
interface will already be up and ready.
With an attribute to configure the CPU mask for a test it also
eliminates the need to have per-CPU ifs/{run_test,result} files.
> + if (ret) {
> + pr_err("failed to create sysfs group\n");
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +/* Removes the sysfs files under /sys/devices/system/cpu/cpuX/ifs */
> +void ifs_sysfs_remove(unsigned int cpu)
> +{
> + struct device *dev;
> +
> + dev = get_cpu_device(cpu);
> + sysfs_remove_group(&dev->kobj, &ifs_attr_group);
> +}
> +
> +/*
> + * Reload the IFS image. When user wants to install new IFS image
> + * image, reloading must be done.
> + */
> +static ssize_t reload_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + bool var;
> + int rc;
> +
> + if (ifs_disabled)
> + return -ENXIO;
> +
> + rc = kstrtobool(buf, &var);
> + if (rc < 0 || var != 1)
> + return -EINVAL;
See below about sysfs_eq()
> +
> + down(&ifs_sem);
> + rc = load_ifs_binary();
> + up(&ifs_sem);
> + if (rc < 0) {
> + pr_info("failed to reload ifs hash and test\n");
> + return rc;
> + }
> +
> + return count;
> +}
> +
> +static DEVICE_ATTR_WO(reload);
> +
> +static int run_allcpu_scan_test(void)
> +{
> + int cpu;
> +
> + if (down_trylock(&ifs_sem)) {
> + pr_info("another instance in progress.\n");
> + return -EBUSY;
> + }
> +
> + cpu_hotplug_disable();
> + for_each_cpu(cpu, cpu_online_mask) {
> + /* Only execute test on the first thread on each core */
> + if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
> + continue;
> + do_core_test(cpu);
> + mdelay(core_delay);
> + }
> + cpu_hotplug_enable();
> +
> + up(&ifs_sem);
> + return 0;
> +}
> +
> +/*
> + * The sysfs interface to execute scan test for all online cpus.
> + * The test can be triggered as below:
> + * echo 1 > /sys/devices/system/cpu/ifs/run_test
> + */
> +static ssize_t allcpu_run_test_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + bool var;
> + int rc;
> +
> + if (ifs_disabled)
> + return -ENXIO;
> +
> + rc = kstrtobool(buf, &var);
> + if (rc < 0 || var != 1)
> + return -EINVAL;
You could just cut to the chase and do: sysfs_eq(buf, "1")
> +
> + rc = run_allcpu_scan_test();
> + if (rc < 0)
> + return rc;
> +
> + return count;
> +}
> +
> +/*
> + * Percpu and allcpu ifs have attributes named "run_test".
> + * Since the former is defined in this same file using DEVICE_ATTR_WO()
> + * the latter is defined directly.
> + */
> +static struct device_attribute dev_attr_allcpu_run_test = {
Same feedback to just use DEVICE_ATTR() for this.
> + .attr = { .name = "run_test", .mode = 0200 },
> + .store = allcpu_run_test_store,
> +};
> +
> +/*
> + * Currently loaded IFS image version.
> + */
> +static ssize_t image_version_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%x\n", ifs_params.loaded_version);
more %#x and sysfs_emit() feedback...
> +}
> +
> +static DEVICE_ATTR_RO(image_version);
> +
> +/*
> + * Currently loaded IFS image version.
> + */
> +static ssize_t cpu_fail_list_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + int ret;
> +
> + if (down_trylock(&ifs_sem))
> + return -EBUSY;
> +
> + ret = sprintf(buf, "%*pbl\n", cpumask_pr_args(&ifs_params.fail_mask));
> + up(&ifs_sem);
> + return ret;
> +}
> +
> +static DEVICE_ATTR_RO(cpu_fail_list);
> +
> +static ssize_t cpu_untested_list_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + int ret;
> +
> + if (down_trylock(&ifs_sem))
> + return -EBUSY;
> +
> + ret = sprintf(buf, "%*pbl\n", cpumask_pr_args(&ifs_params.not_tested_mask));
> + up(&ifs_sem);
> +
> + return ret;
> +}
> +
> +static DEVICE_ATTR_RO(cpu_untested_list);
> +
> +static ssize_t cpu_pass_list_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + int ret;
> +
> + if (down_trylock(&ifs_sem))
> + return -EBUSY;
> +
> + ret = sprintf(buf, "%*pbl\n", cpumask_pr_args(&ifs_params.pass_mask));
> + up(&ifs_sem);
> +
> + return ret;
> +}
> +
> +static DEVICE_ATTR_RO(cpu_pass_list);
> +
> +/*
> + * Status for global ifs test
> + */
> +static ssize_t allcpu_status_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + int ret;
> +
> + if (down_trylock(&ifs_sem))
> + return -EBUSY;
> +
> + if (!cpumask_empty(&ifs_params.fail_mask))
> + ret = sprintf(buf, "fail\n");
> + else if (!cpumask_empty(&ifs_params.not_tested_mask))
> + ret = sprintf(buf, "untested\n");
> + else
> + ret = sprintf(buf, "pass\n");
> +
> + up(&ifs_sem);
> +
> + return ret;
> +}
> +
> +/*
> + * Percpu and allcpu ifs have attributes named "status".
> + * Since the former is defined in this same file using DEVICE_ATTR_RO()
> + * the latter is defined directly.
> + */
> +static struct device_attribute dev_attr_allcpu_status = {
> + .attr = { .name = "status", .mode = 0444 },
> + .show = allcpu_status_show,
> +};
Can still do the one line declartion like this and skip the comment.
DEVICE_ATTR(status, 0444, allcpu_status_show, NULL);
> +
> +/* global scan sysfs attributes */
> +static struct attribute *cpu_ifs_attrs[] = {
> + &dev_attr_reload.attr,
> + &dev_attr_allcpu_run_test.attr,
> + &dev_attr_image_version.attr,
> + &dev_attr_cpu_fail_list.attr,
> + &dev_attr_cpu_untested_list.attr,
> + &dev_attr_cpu_pass_list.attr,
> + &dev_attr_allcpu_status.attr,
> + NULL
> +};
> +
> +const struct attribute_group cpu_ifs_attr_group = {
static?
> + .attrs = cpu_ifs_attrs,
> +};
> +
> +const struct attribute_group *cpu_ifs_attr_groups[] = {
static?
> + &cpu_ifs_attr_group,
> + NULL,
> +};
> +
> +static struct device *cpu_scan_device;
> +
> +/* Creates the sysfs files under /sys/devices/system/cpu/ifs */
> +void cpu_ifs_init(void)
> +{
> + struct device *root;
> +
> + root = cpu_subsys.dev_root;
> + cpu_scan_device = cpu_device_create(root, NULL, cpu_ifs_attr_groups, "ifs");
I just don't think ifs rises to the level of something that belongs in
the /sys/device/system/cpu/ifs namespace vs /sys/devices/platform/ifs/
especially since the module need not remain resident while a test is
not running. IIUC it's not a core capability that needs to be available
at all times it's something that will be kicked off during service
downtime to take inventory of CPU health, right?