Re: [PATCH V3 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface
From: Cyril Bur
Date: Mon Jun 19 2017 - 02:50:06 EST
On Mon, 2017-06-19 at 11:44 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
>
Hi Shilpasri,
Looks really nice now! I do have a few questions on the new stuff
around locking for read()ing the response. I've put one inline in the
read() code, feel free to disagree :)
My other question is now sort of the opposite problem, what if you have
two threads, one does a write() so now your read()ing mutex allows
reads and then thread A does write() at the 'same' time as thread B
does read(). Is there a risk of corruption? Would it be good to also
hold cmd_in_progress in read().
I'm also assuming that two writes is fine and that loosing the previous
response is by design - users might not care about the response, or
they might want ignore it or (as often happens in userspace) they don't
feel the need to check return codes.
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@xxxxxxxxxxxxxxxxxx>
> ---
> Changes from V2:
> - Use atomic_set while dropping locks
> - Add rsp_consumed flag to protect the read()
> - Change the logging level of prints
> - Add MAX_POSSIBLE_CHIPS
> - Move the endian conversion of data to opal_occ_cmd_prepare()
>
> arch/powerpc/include/asm/opal-api.h | 41 +++-
> arch/powerpc/include/asm/opal.h | 3 +
> arch/powerpc/platforms/powernv/Makefile | 2 +-
> arch/powerpc/platforms/powernv/opal-occ.c | 307 +++++++++++++++++++++++++
> arch/powerpc/platforms/powernv/opal-wrappers.S | 1 +
> arch/powerpc/platforms/powernv/opal.c | 8 +
> 6 files changed, 360 insertions(+), 2 deletions(-)
> create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
>
> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index cb3e624..011d86c 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,10 @@
> #define OPAL_I2C_STOP_ERR -24
> #define OPAL_XIVE_PROVISIONING -31
> #define OPAL_XIVE_FREE_ACTIVE -32
> +#define OPAL_OCC_INVALID_STATE -33
> +#define OPAL_OCC_BUSY -34
> +#define OPAL_OCC_CMD_TIMEOUT -35
> +#define OPAL_OCC_RSP_MISMATCH -36
>
> /* API Tokens (in r0) */
> #define OPAL_INVALID_CALL -1
> @@ -190,7 +194,8 @@
> #define OPAL_NPU_INIT_CONTEXT 146
> #define OPAL_NPU_DESTROY_CONTEXT 147
> #define OPAL_NPU_MAP_LPAR 148
> -#define OPAL_LAST 148
> +#define OPAL_OCC_COMMAND 149
> +#define OPAL_LAST 149
>
> /* Device tree flags */
>
> @@ -829,6 +834,40 @@ struct opal_prd_msg_header {
>
> struct opal_prd_msg;
>
> +enum occ_cmd {
> + OCC_CMD_AMESTER_PASS_THRU = 0,
> + OCC_CMD_CLEAR_SENSOR_DATA,
> + OCC_CMD_SET_POWER_CAP,
> + OCC_CMD_SET_POWER_SHIFTING_RATIO,
> + OCC_CMD_SELECT_SENSOR_GROUPS,
> + OCC_CMD_LAST
> +};
> +
> +struct opal_occ_cmd_rsp_msg {
> + __be64 cdata;
> + __be64 rdata;
> + __be16 cdata_size;
> + __be16 rdata_size;
> + u8 cmd;
> + u8 request_id;
> + u8 status;
> +};
> +
> +struct opal_occ_cmd_data {
> + __be16 size;
> + u8 cmd;
> + u8 data[];
> +};
> +
> +struct opal_occ_rsp_data {
> + __be16 size;
> + u8 status;
> + u8 data[];
> +};
> +
> +#define MAX_OPAL_CMD_DATA_LENGTH 4090
> +#define MAX_OCC_RSP_DATA_LENGTH 8698
> +
> #define OCC_RESET 0
> #define OCC_LOAD 1
> #define OCC_THROTTLE 2
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 03ed493..e55ed79 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>
> void opal_wake_poller(void);
>
> +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
> + bool retry);
> +
> #endif /* __ASSEMBLY__ */
>
> #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..f5f0902 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
> obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
> obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
> obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
> -obj-y += opal-kmsg.o
> +obj-y += opal-kmsg.o opal-occ.o
>
> obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
> obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
> diff --git a/arch/powerpc/platforms/powernv/opal-occ.c b/arch/powerpc/platforms/powernv/opal-occ.c
> new file mode 100644
> index 0000000..d3deaa3
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-occ.c
> @@ -0,0 +1,307 @@
> +/*
> + * Copyright IBM Corporation 2017
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +
> +#define pr_fmt(fmt) "opal-occ: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/miscdevice.h>
> +#include <linux/highmem.h>
> +#include <linux/slab.h>
> +#include <asm/opal.h>
> +
> +struct occ {
> + struct miscdevice dev;
> + struct opal_occ_rsp_data *rsp;
> + atomic_t session;
> + atomic_t cmd_in_progress;
> + atomic_t rsp_consumed;
> + int id;
> + u8 last_token;
> +} *occs;
> +static int nr_occs;
> +
> +static int __send_occ_command(struct opal_occ_cmd_rsp_msg *msg,
> + int chip_id, bool retry)
> +{
> + struct opal_msg async_msg;
> + int rc;
> +
> + rc = opal_occ_command(chip_id, msg, retry);
> + if (rc == OPAL_ASYNC_COMPLETION) {
> + rc = opal_async_wait_response(msg->request_id, &async_msg);
> + if (rc) {
> + pr_devel("Failed to wait for async response %d\n", rc);
> + return rc;
> + }
> + }
> +
> + return rc ? rc : opal_get_async_rc(async_msg);
> +}
> +
> +static int send_occ_command(struct opal_occ_cmd_rsp_msg *msg, struct occ *occ)
> +{
> + int token, rc;
> +
> + token = opal_async_get_unique_token_interruptible(occ->last_token);
> + if (token < 0) {
> + pr_devel("Failed to get the token for OCC command %d (%d)\n",
> + msg->cmd, token);
> + return token;
> + }
> +
> + msg->request_id = token;
> + rc = __send_occ_command(msg, occ->id, false);
> +
> + switch (rc) {
> + case OPAL_OCC_CMD_TIMEOUT:
> + case OPAL_OCC_RSP_MISMATCH:
> + pr_devel("Failed OCC command with %d. Retrying it again\n", rc);
> + occ->last_token = token;
> + opal_async_release_token(token);
> + token = opal_async_get_unique_token_interruptible(token);
> + if (token < 0) {
> + pr_devel("Failed to get the token for retry OCC command %d (%d)\n",
> + msg->cmd, token);
> + return opal_error_code(rc);
> + }
> +
> + msg->request_id = token;
> + rc = __send_occ_command(msg, occ->id, true);
> + break;
> + default:
> + break;
> + }
> +
> + occ->last_token = token;
> + opal_async_release_token(token);
> + return opal_error_code(rc);
> +}
> +
> +static int opal_occ_cmd_prepare(struct opal_occ_cmd_data *cmd, struct occ *occ)
> +{
> + struct opal_occ_cmd_rsp_msg msg;
> + int rc;
> +
> + msg.cmd = cmd->cmd;
> + msg.cdata = cpu_to_be64(__pa(cmd->data));
> + msg.cdata_size = cpu_to_be16(cmd->size);
> + msg.rdata = cpu_to_be64(__pa(occ->rsp->data));
> +
> + rc = send_occ_command(&msg, occ);
> + if (rc) {
> + pr_info("Failed OCC command %d with %d\n", cmd->cmd, rc);
> + return rc;
> + }
> +
> + occ->rsp->size = be16_to_cpu(msg.rdata_size);
> + occ->rsp->status = msg.status;
> + if (occ->rsp->size > MAX_OCC_RSP_DATA_LENGTH) {
> + pr_devel("Bigger OCC response size, clipping to %d\n",
> + MAX_OCC_RSP_DATA_LENGTH);
> + occ->rsp->size = MAX_OCC_RSP_DATA_LENGTH;
> + }
> +
> + atomic_set(&occ->rsp_consumed, 1);
> + return rc;
> +}
> +
> +static ssize_t opal_occ_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> + struct opal_occ_cmd_data *cmd;
> + int rc;
> +
> + if (count < sizeof(*cmd))
> + return -EINVAL;
> +
> + if (atomic_cmpxchg(&occ->cmd_in_progress, 0, 1))
> + return -EBUSY;
> +
> + cmd = kmalloc(count, GFP_KERNEL);
> + if (!cmd) {
> + rc = -ENOMEM;
> + goto out;
> + }
> +
> + rc = copy_from_user(cmd, buf, count);
> + if (rc) {
> + pr_err("Failed to copy OCC command request message\n");
> + rc = -EFAULT;
> + goto free_cmd;
> + }
> +
> + if (cmd->size > MAX_OPAL_CMD_DATA_LENGTH) {
> + rc = -EINVAL;
> + goto free_cmd;
> + }
> +
> + rc = opal_occ_cmd_prepare(cmd, occ);
> + if (!rc)
> + rc = count;
> +
> +free_cmd:
> + kfree(cmd);
> +out:
> + atomic_set(&occ->cmd_in_progress, 0);
> + return rc;
> +}
> +
> +static ssize_t opal_occ_read(struct file *file, char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> + int rc;
> +
> + if (count < sizeof(*occ->rsp) + occ->rsp->size)
> + return -EINVAL;
> +
> + if (!atomic_cmpxchg(&occ->rsp_consumed, 1, 0))
> + return -EBUSY;
> +
> + rc = copy_to_user((void __user *)buf, occ->rsp,
> + sizeof(occ->rsp) + occ->rsp->size);
> + if (rc) {
If copy_to_user() fails, has the response been consumed? I would argue
no also because what if rc is EAGAIN which (should) prompt userspace to
retry... and then they won't be able to get their response...
> + pr_err("Failed to copy OCC response data to user\n");
> + return rc;
> + }
> +
> + return sizeof(*occ->rsp) + occ->rsp->size;
> +}
> +
> +static int opal_occ_open(struct inode *inode, struct file *file)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> +
> + return atomic_cmpxchg(&occ->session, 0, 1) ? -EBUSY : 0;
> +}
> +
> +static int opal_occ_release(struct inode *inode, struct file *file)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> +
> + atomic_set(&occ->session, 0);
> +
> + return 0;
> +}
> +
> +static const struct file_operations opal_occ_fops = {
> + .open = opal_occ_open,
> + .read = opal_occ_read,
> + .write = opal_occ_write,
> + .release = opal_occ_release,
> + .owner = THIS_MODULE,
> +};
> +
> +#define MAX_POSSIBLE_CHIPS 256
> +
> +static int opal_occ_probe(struct platform_device *pdev)
> +{
> + unsigned int chip[MAX_POSSIBLE_CHIPS];
> + unsigned int cpu;
> + unsigned int prev_chip_id = UINT_MAX;
> + int i, rc;
> +
> + for_each_possible_cpu(cpu) {
> + unsigned int id = cpu_to_chip_id(cpu);
> +
> + if (prev_chip_id != id) {
> + int j = nr_occs;
> +
> + while (--j >= 0)
> + if (chip[j] == id)
> + continue;
> +
> + prev_chip_id = id;
> + chip[nr_occs++] = id;
> + WARN_ON_ONCE(nr_occs >= MAX_POSSIBLE_CHIPS - 1);
> + }
> + }
> +
> + occs = kcalloc(nr_occs, sizeof(*occs), GFP_KERNEL);
> + if (!occs)
> + return -ENOMEM;
> +
> + for (i = 0; i < nr_occs; i++) {
> + char name[10];
> +
> + occs[i].id = chip[i];
> + occs[i].dev.minor = MISC_DYNAMIC_MINOR;
> + snprintf(name, 10, "occ%d", chip[i]);
> + occs[i].dev.name = name;
> + occs[i].dev.fops = &opal_occ_fops;
> + occs[i].last_token = -1;
> + occs[i].rsp = kmalloc(sizeof(occs[i].rsp) +
> + MAX_OCC_RSP_DATA_LENGTH,
> + GFP_KERNEL);
> + if (!occs[i].rsp) {
> + rc = -ENOMEM;
> + goto free_occs;
> + }
> +
> + rc = misc_register(&occs[i].dev);
> + if (rc)
> + goto free_occ_rsp_data;
> + }
> +
> + return 0;
> +
> +free_occ_rsp_data:
> + kfree(occs[i].rsp);
> +free_occs:
> + while (--i >= 0) {
> + kfree(occs[i].rsp);
> + misc_deregister(&occs[i].dev);
> + }
> + kfree(occs);
> +
> + return rc;
> +}
> +
> +static int opal_occ_remove(struct platform_device *pdev)
> +{
> + int i;
> +
> + for (i = 0; i < nr_occs; i++) {
> + kfree(occs[i].rsp);
> + misc_deregister(&occs[i].dev);
> + }
> +
> + kfree(occs);
> + return 0;
> +}
> +
> +static const struct of_device_id opal_occ_match[] = {
> + { .compatible = "ibm,opal-occ-cmd-rsp-interface" },
> + { },
> +};
> +
> +static struct platform_driver opal_occ_driver = {
> + .driver = {
> + .name = "opal-occ",
> + .of_match_table = opal_occ_match,
> + },
> + .probe = opal_occ_probe,
> + .remove = opal_occ_remove,
> +};
> +
> +module_platform_driver(opal_occ_driver);
> +
> +MODULE_DESCRIPTION("PowerNV OPAL-OCC driver");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index f620572..e6bf18d 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -310,3 +310,4 @@ OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
> OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
> OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
> OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
> +OPAL_CALL(opal_occ_command, OPAL_OCC_COMMAND);
> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 59684b4..d87c61b 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -815,6 +815,9 @@ static int __init opal_init(void)
> opal_pdev_init("ibm,opal-flash");
> opal_pdev_init("ibm,opal-prd");
>
> + /* Initialize platform device: OCC_OPAL command-response interface */
> + opal_pdev_init("ibm,opal-occ-cmd-rsp-interface");
> +
> /* Initialise platform device: oppanel interface */
> opal_pdev_init("ibm,opal-oppanel");
>
> @@ -859,6 +862,7 @@ void opal_shutdown(void)
> EXPORT_SYMBOL_GPL(opal_flash_write);
> EXPORT_SYMBOL_GPL(opal_flash_erase);
> EXPORT_SYMBOL_GPL(opal_prd_msg);
> +EXPORT_SYMBOL_GPL(opal_occ_command);
>
> /* Convert a region of vmalloc memory to an opal sg list */
> struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
> @@ -937,6 +941,10 @@ int opal_error_code(int rc)
> case OPAL_UNSUPPORTED: return -EIO;
> case OPAL_HARDWARE: return -EIO;
> case OPAL_INTERNAL_ERROR: return -EIO;
> + case OPAL_OCC_BUSY: return -EBUSY;
> + case OPAL_OCC_INVALID_STATE:
> + case OPAL_OCC_CMD_TIMEOUT:
> + case OPAL_OCC_RSP_MISMATCH: return -EIO;
> default:
> pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
> return -EIO;