Re: [PATCH v4 14/25] nvdimm/ocxl: Add support for Admin commands

From: Dan Williams
Date: Thu Apr 02 2020 - 02:41:35 EST


On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva <alastair@xxxxxxxxxxx> wrote:
>
> Admin commands for these devices are the primary means of interacting
> with the device controller to provide functionality beyond the load/store
> capabilities offered via the NPU.
>
> For example, SMART data, firmware update, and device error logs are
> implemented via admin commands.
>
> This patch requests the metadata required to issue admin commands, as well
> as some helper functions to construct and check the completion of the
> commands.
>
> Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx>
> ---
> drivers/nvdimm/ocxl/main.c | 65 ++++++
> drivers/nvdimm/ocxl/ocxlpmem.h | 50 ++++-
> drivers/nvdimm/ocxl/ocxlpmem_internal.c | 261 ++++++++++++++++++++++++
> 3 files changed, 375 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
> index be76acd33d74..8db573036423 100644
> --- a/drivers/nvdimm/ocxl/main.c
> +++ b/drivers/nvdimm/ocxl/main.c
> @@ -217,6 +217,58 @@ static int register_lpc_mem(struct ocxlpmem *ocxlpmem)
> return 0;
> }
>
> +/**
> + * extract_command_metadata() - Extract command data from MMIO & save it for further use
> + * @ocxlpmem: the device metadata
> + * @offset: The base address of the command data structures (address of CREQO)
> + * @command_metadata: A pointer to the command metadata to populate
> + * Return: 0 on success, negative on failure
> + */
> +static int extract_command_metadata(struct ocxlpmem *ocxlpmem, u32 offset,
> + struct command_metadata *command_metadata)

How about "struct ocxlpmem *ocp" throughout all these patches? The
full duplication of the type name as the local variable name makes
this look like non-idiomatic Linux code to me. It had not quite hit me
until I saw "struct command_metadata *command_metadata" that just
strikes me as too literal and the person that gets to maintain this
code later will appreciate a smaller amount of typing.

Also, is it really the case that the layout of the admin command
metadata needs to be programmatically determined at runtime? I would
expect it to be a static command definition in the spec.


> +{
> + int rc;
> + u64 tmp;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, offset,
> + OCXL_LITTLE_ENDIAN, &tmp);
> + if (rc)
> + return rc;
> +
> + command_metadata->request_offset = tmp >> 32;
> + command_metadata->response_offset = tmp & 0xFFFFFFFF;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, offset + 8,
> + OCXL_LITTLE_ENDIAN, &tmp);
> + if (rc)
> + return rc;
> +
> + command_metadata->data_offset = tmp >> 32;
> + command_metadata->data_size = tmp & 0xFFFFFFFF;
> +
> + command_metadata->id = 0;
> +
> + return 0;
> +}
> +
> +/**
> + * setup_command_metadata() - Set up the command metadata
> + * @ocxlpmem: the device metadata
> + */
> +static int setup_command_metadata(struct ocxlpmem *ocxlpmem)
> +{
> + int rc;
> +
> + mutex_init(&ocxlpmem->admin_command.lock);
> +
> + rc = extract_command_metadata(ocxlpmem, GLOBAL_MMIO_ACMA_CREQO,
> + &ocxlpmem->admin_command);
> + if (rc)
> + return rc;
> +
> + return 0;
> +}
> +
> /**
> * allocate_minor() - Allocate a minor number to use for an OpenCAPI pmem device
> * @ocxlpmem: the device metadata
> @@ -421,6 +473,14 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>
> ocxlpmem->pdev = pci_dev_get(pdev);
>
> + ocxlpmem->timeouts[ADMIN_COMMAND_ERRLOG] = 2000; // ms
> + ocxlpmem->timeouts[ADMIN_COMMAND_HEARTBEAT] = 100; // ms
> + ocxlpmem->timeouts[ADMIN_COMMAND_SMART] = 100; // ms
> + ocxlpmem->timeouts[ADMIN_COMMAND_CONTROLLER_DUMP] = 1000; // ms
> + ocxlpmem->timeouts[ADMIN_COMMAND_CONTROLLER_STATS] = 100; // ms
> + ocxlpmem->timeouts[ADMIN_COMMAND_SHUTDOWN] = 1000; // ms
> + ocxlpmem->timeouts[ADMIN_COMMAND_FW_UPDATE] = 16000; // ms
> +
> pci_set_drvdata(pdev, ocxlpmem);
>
> ocxlpmem->ocxl_fn = ocxl_function_open(pdev);
> @@ -467,6 +527,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto err;
> }
>
> + if (setup_command_metadata(ocxlpmem)) {
> + dev_err(&pdev->dev, "Could not read command metadata\n");
> + goto err;
> + }
> +
> elapsed = 0;
> timeout = ocxlpmem->readiness_timeout +
> ocxlpmem->memory_available_timeout;
> diff --git a/drivers/nvdimm/ocxl/ocxlpmem.h b/drivers/nvdimm/ocxl/ocxlpmem.h
> index 3eadbe19f6d0..b72b3f909fc3 100644
> --- a/drivers/nvdimm/ocxl/ocxlpmem.h
> +++ b/drivers/nvdimm/ocxl/ocxlpmem.h
> @@ -7,6 +7,7 @@
> #include <linux/mm.h>
>
> #define LABEL_AREA_SIZE BIT_ULL(PA_SECTION_SHIFT)
> +#define DEFAULT_TIMEOUT 100
>
> #define GLOBAL_MMIO_CHI 0x000
> #define GLOBAL_MMIO_CHIC 0x008
> @@ -57,6 +58,7 @@
> #define GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED BIT_ULL(5) // CDC
> #define GLOBAL_MMIO_HCI_REQ_HEALTH_PERF BIT_ULL(6) // CHPD
>
> +// must be maintained with admin_command_names in ocxlpmem_internal.c
> #define ADMIN_COMMAND_HEARTBEAT 0x00u
> #define ADMIN_COMMAND_SHUTDOWN 0x01u
> #define ADMIN_COMMAND_FW_UPDATE 0x02u
> @@ -68,6 +70,13 @@
> #define ADMIN_COMMAND_CMD_CAPS 0x08u
> #define ADMIN_COMMAND_MAX 0x08u
>
> +#define NS_COMMAND_SECURE_ERASE 0x20ull
> +
> +#define NS_RESPONSE_SECURE_ERASE_ACCESSIBLE_SUCCESS 0x20
> +#define NS_RESPONSE_SECURE_ERASE_ACCESSIBLE_ATTEMPTED 0x28
> +#define NS_RESPONSE_SECURE_ERASE_DEFECTIVE_SUCCESS 0x30
> +#define NS_RESPONSE_SECURE_ERASE_DEFECTIVE_ATTEMPTED 0x38
> +
> #define STATUS_SUCCESS 0x00
> #define STATUS_MEM_UNAVAILABLE 0x20
> #define STATUS_BLOCKED_BG_TASK 0x21
> @@ -81,6 +90,16 @@
> #define STATUS_FW_ARG_INVALID STATUS_BAD_REQUEST_PARM
> #define STATUS_FW_INVALID STATUS_BAD_DATA_PARM
>
> +struct command_metadata {
> + u32 request_offset;
> + u32 response_offset;
> + u32 data_offset;
> + u32 data_size;
> + struct mutex lock; /* locks access to this command */
> + u16 id;
> + u8 op_code;
> +};
> +
> struct ocxlpmem {
> struct device dev;
> struct pci_dev *pdev;
> @@ -91,10 +110,11 @@ struct ocxlpmem {
> struct ocxl_afu *ocxl_afu;
> struct ocxl_context *ocxl_context;
> void *metadata_addr;
> + struct command_metadata admin_command;
> struct resource pmem_res;
> struct nd_region *nd_region;
> char fw_version[8 + 1];
> -
> + u32 timeouts[ADMIN_COMMAND_MAX + 1];
> u32 max_controller_dump_size;
> u16 scm_revision; // major/minor
> u8 readiness_timeout; /* The worst case time (in seconds) that the host
> @@ -123,3 +143,31 @@ struct ocxlpmem {
> * Returns 0 on success, negative on error
> */
> int ocxlpmem_chi(const struct ocxlpmem *ocxlpmem, u64 *chi);
> +
> +/**
> + * admin_command_execute() - Execute an admin command and wait for completion
> + *
> + * Additional MMIO registers (dependent on the command) may
> + * need to be initialized
> + *
> + * @ocxlpmem: the device metadata
> + * @op_code: the code for the admin command
> + * Returns 0 on success, -EINVAL for a bad op code, -EBUSY on timeout
> + */
> +int admin_command_execute(struct ocxlpmem *ocxlpmem, u8 op_code);
> +
> +/**
> + * admin_response_handled() - Notify the controller that the admin response has been handled
> + * @ocxlpmem: the device metadata
> + * Returns 0 on success, negative on failure
> + */
> +int admin_response_handled(const struct ocxlpmem *ocxlpmem);
> +
> +/**
> + * warn_status() - Emit a kernel warning showing a command status.
> + * @ocxlpmem: the device metadata
> + * @message: A message to accompany the warning
> + * @status: The command status
> + */
> +void warn_status(const struct ocxlpmem *ocxlpmem, const char *message,
> + u8 status);
> diff --git a/drivers/nvdimm/ocxl/ocxlpmem_internal.c b/drivers/nvdimm/ocxl/ocxlpmem_internal.c
> index 5578169b7515..7470a6ab3b08 100644
> --- a/drivers/nvdimm/ocxl/ocxlpmem_internal.c
> +++ b/drivers/nvdimm/ocxl/ocxlpmem_internal.c
> @@ -17,3 +17,264 @@ int ocxlpmem_chi(const struct ocxlpmem *ocxlpmem, u64 *chi)
>
> return 0;
> }
> +
> +#define COMMAND_REQUEST_SIZE (8 * sizeof(u64))
> +/**
> + * scm_command_request() - Set up a command request
> + * @cmd: The metadata for the type of command to be issued
> + * @op_code: the op code for the command
> + * @valid_bytes: the number of bytes in the header to preserve (these must be set before calling)
> + */
> +static int scm_command_request(const struct ocxlpmem *ocxlpmem,
> + struct command_metadata *cmd, u8 op_code,
> + u8 valid_bytes)

Ah, here's an abbreviation for command metadata.

> +{
> + u64 val = op_code;
> + int rc;
> + u8 i;
> +
> + cmd->op_code = op_code;
> + cmd->id++;
> +
> + val |= ((u64)cmd->id) << 16;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, cmd->request_offset,
> + OCXL_LITTLE_ENDIAN, val);
> + if (rc)
> + return rc;
> +
> + for (i = valid_bytes; i < COMMAND_REQUEST_SIZE; i += sizeof(u64)) {
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> + cmd->request_offset + i,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + return rc;
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * admin_command_request() - Issue an admin command request
> + * @ocxlpmem: the device metadata
> + * @op_code: The op-code for the command
> + *
> + * Returns an identifier for the command, or negative on error
> + */
> +static int admin_command_request(struct ocxlpmem *ocxlpmem, u8 op_code)
> +{
> + u8 valid_bytes = sizeof(u64);
> +
> + switch (op_code) {
> + case ADMIN_COMMAND_HEARTBEAT:
> + case ADMIN_COMMAND_SHUTDOWN:
> + case ADMIN_COMMAND_ERRLOG:
> + case ADMIN_COMMAND_CMD_CAPS:
> + valid_bytes += 0;
> + break;
> + case ADMIN_COMMAND_FW_UPDATE:
> + case ADMIN_COMMAND_SMART:
> + case ADMIN_COMMAND_CONTROLLER_STATS:
> + case ADMIN_COMMAND_CONTROLLER_DUMP:
> + valid_bytes += sizeof(u64);
> + break;
> + case ADMIN_COMMAND_FW_DEBUG:
> + valid_bytes += 3 * sizeof(u64);
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + return scm_command_request(ocxlpmem, &ocxlpmem->admin_command, op_code,
> + valid_bytes);

Why isn't this just:

ocp_command_request(ocp, op_code, valid_bytes);

...because it's not clear to me why this switches from a generic name
like admin_command_request() and then switches to scm_. The
"command_metadata" is implied by the ocp context and the op_code,
right?


> +}
> +
> +static int command_response(const struct ocxlpmem *ocxlpmem,
> + const struct command_metadata *cmd)
> +{
> + u64 val;
> + u16 id;
> + u8 status;
> + int rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + cmd->response_offset,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + return rc;
> +
> + status = val & 0xff;
> + id = (val >> 16) & 0xffff;
> +
> + if (id != cmd->id) {
> + dev_err(&ocxlpmem->dev,
> + "Expected response for command %d, but received response for command %d instead.\n",
> + cmd->id, id);
> + return -EBUSY;
> + }
> +
> + return status;
> +}
> +
> +/**
> + * admin_response() - Validate an admin response
> + * @ocxlpmem: the device metadata
> + * Returns the status code of the command, or negative on error
> + */
> +static int admin_response(const struct ocxlpmem *ocxlpmem)
> +{
> + return command_response(ocxlpmem, &ocxlpmem->admin_command);
> +}
> +
> +/**
> + * admin_command_exec() - Notify the controller to start processing a pending admin command
> + * @ocxlpmem: the device metadata
> + * Returns 0 on success, negative on error
> + */
> +static int admin_command_exec(const struct ocxlpmem *ocxlpmem)
> +{
> + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> + OCXL_LITTLE_ENDIAN, GLOBAL_MMIO_HCI_ACRW);
> +}
> +
> +static bool admin_command_complete(const struct ocxlpmem *ocxlpmem)
> +{
> + u64 val = 0;
> +
> + int rc = ocxlpmem_chi(ocxlpmem, &val);
> +
> + WARN_ON(rc);
> +
> + return (val & GLOBAL_MMIO_CHI_ACRA) != 0;
> +}
> +
> +/**
> + * admin_command_complete_timeout() - Wait for an admin command to finish executing
> + * @ocxlpmem: the device metadata
> + * @command: the admin command to wait for completion (determines the timeout)
> + * Returns 0 on success, -EBUSY on timeout
> + */
> +static int admin_command_complete_timeout(const struct ocxlpmem *ocxlpmem,
> + int command)
> +{
> + unsigned long timeout = jiffies +
> + msecs_to_jiffies(ocxlpmem->timeouts[command]);
> +
> + // 32 is the next power of 2 greater than the 20ms minimum for msleep
> +#define TIMEOUT_SLEEP_MILLIS 32
> + do {
> + if (admin_command_complete(ocxlpmem))
> + return 0;
> + msleep(TIMEOUT_SLEEP_MILLIS);
> + } while (time_before(jiffies, timeout));
> +
> + if (admin_command_complete(ocxlpmem))
> + return 0;
> +
> + return -EBUSY;
> +}
> +
> +// Must be maintained with ADMIN_COMMAND_* in ocxlpmem.h
> +static const char * const admin_command_names[] = {
> + "heartbeat",
> + "shutdown",
> + "firmware update",
> + "firmware debug",
> + "retrieve error log",
> + "retrieve SMART data",
> + "controller statistics",
> + "controller dump",
> + "command capabilities",
> +};
> +
> +/**
> + * admin_command_name() - get the name of an admin command
> + * @ocxlpmem: the device metadata
> + * @op_code: the code for the admin command
> + * Returns a string representing the name of the command
> + */
> +static const char *admin_command_name(u8 op_code)
> +{
> + if (op_code > ADMIN_COMMAND_MAX)
> + return "unknown command";
> +
> + return admin_command_names[op_code];
> +}
> +
> +/**
> + * admin_command_execute() - Execute an admin command and wait for completion
> + *
> + * Additional MMIO registers (dependent on the command) may
> + * need to be initialized
> + *
> + * @ocxlpmem: the device metadata
> + * @op_code: the code for the admin command
> + * Returns 0 on success, -EINVAL for a bad op code, -EBUSY on timeout
> + */
> +int admin_command_execute(struct ocxlpmem *ocxlpmem, u8 op_code)
> +{
> + int rc;
> +
> + if (op_code > ADMIN_COMMAND_MAX)
> + return -EINVAL;
> +
> + rc = admin_command_request(ocxlpmem, op_code);
> + if (rc)
> + return rc;
> +
> + rc = admin_command_exec(ocxlpmem);
> + if (rc)
> + return rc;
> +
> + rc = admin_command_complete_timeout(ocxlpmem, op_code);
> + if (rc < 0) {
> + dev_warn(&ocxlpmem->dev, "%s timed out\n",
> + admin_command_name(op_code));
> + return rc;
> + }
> +
> + return admin_response(ocxlpmem);
> +}
> +
> +int admin_response_handled(const struct ocxlpmem *ocxlpmem)
> +{
> + // writing to the CHIC register clears the bit in CHI
> + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
> + OCXL_LITTLE_ENDIAN, GLOBAL_MMIO_CHI_ACRA);
> +}
> +
> +void warn_status(const struct ocxlpmem *ocxlpmem, const char *message,
> + u8 status)
> +{
> + const char *text = "Unknown";
> +
> + switch (status) {
> + case STATUS_SUCCESS:
> + text = "Success";
> + break;
> +
> + case STATUS_MEM_UNAVAILABLE:
> + text = "Persistent memory unavailable";
> + break;
> +
> + case STATUS_BAD_OPCODE:
> + text = "Bad opcode";
> + break;
> +
> + case STATUS_BAD_REQUEST_PARM:
> + text = "Bad request parameter";
> + break;
> +
> + case STATUS_BAD_DATA_PARM:
> + text = "Bad data parameter";
> + break;
> +
> + case STATUS_DEBUG_BLOCKED:
> + text = "Debug action blocked";
> + break;
> +
> + case STATUS_FAIL:
> + text = "Failed";
> + break;
> + }
> +
> + dev_warn(&ocxlpmem->dev, "%s: %s (%x)\n", message, text, status);
> +}
> --
> 2.24.1
>