Re: [PATCH v8 1/3] uacce: supports device isolation feature

From: Greg KH
Date: Fri Sep 09 2022 - 04:30:24 EST


On Fri, Sep 02, 2022 at 03:13:02AM +0000, Kai Ye wrote:
> UACCE adds the hardware error isolation API. Users can configure
> the isolation frequency by this sysfs node. UACCE reports the device
> isolate state to the user space. If the AER error frequency exceeds
> the value of setting for a certain period of time, the device will be
> isolated.
>
> Signed-off-by: Kai Ye <yekai13@xxxxxxxxxx>
> ---
> drivers/misc/uacce/uacce.c | 58 ++++++++++++++++++++++++++++++++++++++
> include/linux/uacce.h | 11 ++++++++
> 2 files changed, 69 insertions(+)
>
> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> index 281c54003edc..41f454c89cd1 100644
> --- a/drivers/misc/uacce/uacce.c
> +++ b/drivers/misc/uacce/uacce.c
> @@ -7,6 +7,8 @@
> #include <linux/slab.h>
> #include <linux/uacce.h>
>
> +#define MAX_ERR_ISOLATE_COUNT 65535

What units is this in? Shouldn't this be in a .h file somewhere as it
is a limit you impose on a driver implementing this API.

> +
> static struct class *uacce_class;
> static dev_t uacce_devt;
> static DEFINE_MUTEX(uacce_mutex);
> @@ -339,12 +341,57 @@ static ssize_t region_dus_size_show(struct device *dev,
> uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT);
> }
>
> +static ssize_t isolate_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct uacce_device *uacce = to_uacce_device(dev);
> +
> + if (!uacce->ops->get_isolate_state)
> + return -ENODEV;
> +
> + return sysfs_emit(buf, "%d\n", uacce->ops->get_isolate_state(uacce));
> +}
> +
> +static ssize_t isolate_strategy_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct uacce_device *uacce = to_uacce_device(dev);
> + u32 val;
> +
> + val = uacce->ops->isolate_strategy_read(uacce);
> + if (val > MAX_ERR_ISOLATE_COUNT)
> + return -EINVAL;

How can a driver return a higher number here?

> +
> + return sysfs_emit(buf, "%u\n", val);
> +}
> +
> +static ssize_t isolate_strategy_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + struct uacce_device *uacce = to_uacce_device(dev);
> + unsigned long val;
> + int ret;
> +
> + if (kstrtoul(buf, 0, &val) < 0)
> + return -EINVAL;
> +
> + if (val > MAX_ERR_ISOLATE_COUNT)
> + return -EINVAL;
> +
> + ret = uacce->ops->isolate_strategy_write(uacce, val);
> +
> + return ret ? ret : count;

Please write out if statements.

> +}
> +
> static DEVICE_ATTR_RO(api);
> static DEVICE_ATTR_RO(flags);
> static DEVICE_ATTR_RO(available_instances);
> static DEVICE_ATTR_RO(algorithms);
> static DEVICE_ATTR_RO(region_mmio_size);
> static DEVICE_ATTR_RO(region_dus_size);
> +static DEVICE_ATTR_RO(isolate);
> +static DEVICE_ATTR_RW(isolate_strategy);
>
> static struct attribute *uacce_dev_attrs[] = {
> &dev_attr_api.attr,
> @@ -353,6 +400,8 @@ static struct attribute *uacce_dev_attrs[] = {
> &dev_attr_algorithms.attr,
> &dev_attr_region_mmio_size.attr,
> &dev_attr_region_dus_size.attr,
> + &dev_attr_isolate.attr,
> + &dev_attr_isolate_strategy.attr,
> NULL,
> };
>
> @@ -368,6 +417,15 @@ static umode_t uacce_dev_is_visible(struct kobject *kobj,
> (!uacce->qf_pg_num[UACCE_QFRT_DUS])))
> return 0;
>
> + if (attr == &dev_attr_isolate_strategy.attr &&
> + (!uacce->ops->isolate_strategy_read ||
> + !uacce->ops->isolate_strategy_write))

So you need either a read or write? Why not both?

thanks,

greg k-h