Re: [PATCH v4 13/25] nvdimm/ocxl: Read the capability registers & wait for device ready

From: Dan Williams
Date: Wed Apr 01 2020 - 20:20:42 EST


On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva <alastair@xxxxxxxxxxx> wrote:
>
> This patch reads timeouts & firmware version from the controller, and
> uses those timeouts to wait for the controller to report that it is ready
> before handing the memory over to libnvdimm.
>
> Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx>
> ---
> drivers/nvdimm/ocxl/Makefile | 2 +-
> drivers/nvdimm/ocxl/main.c | 85 +++++++++++++++++++++++++
> drivers/nvdimm/ocxl/ocxlpmem.h | 29 +++++++++
> drivers/nvdimm/ocxl/ocxlpmem_internal.c | 19 ++++++
> 4 files changed, 134 insertions(+), 1 deletion(-)
> create mode 100644 drivers/nvdimm/ocxl/ocxlpmem_internal.c
>
> diff --git a/drivers/nvdimm/ocxl/Makefile b/drivers/nvdimm/ocxl/Makefile
> index e0e8ade1987a..bab97082e062 100644
> --- a/drivers/nvdimm/ocxl/Makefile
> +++ b/drivers/nvdimm/ocxl/Makefile
> @@ -4,4 +4,4 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror
>
> obj-$(CONFIG_OCXL_PMEM) += ocxlpmem.o
>
> -ocxlpmem-y := main.o
> \ No newline at end of file
> +ocxlpmem-y := main.o ocxlpmem_internal.o
> diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
> index c0066fedf9cc..be76acd33d74 100644
> --- a/drivers/nvdimm/ocxl/main.c
> +++ b/drivers/nvdimm/ocxl/main.c
> @@ -8,6 +8,7 @@
>
> #include <linux/module.h>
> #include <misc/ocxl.h>
> +#include <linux/delay.h>
> #include <linux/ndctl.h>
> #include <linux/mm_types.h>
> #include <linux/memory_hotplug.h>
> @@ -327,6 +328,50 @@ static void remove(struct pci_dev *pdev)
> }
> }
>
> +/**
> + * read_device_metadata() - Retrieve config information from the AFU and save it for future use
> + * @ocxlpmem: the device metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int read_device_metadata(struct ocxlpmem *ocxlpmem)
> +{
> + u64 val;
> + int rc;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CCAP0,
> + OCXL_LITTLE_ENDIAN, &val);

This calling convention would seem to defeat the ability of sparse to
validate endian correctness. That's independent of this series, but I
wonder how does someone review why this argument is sometimes
OCXL_LITTLE_ENDIAN and sometimes OCXL_HOST_ENDIAN?

> + if (rc)
> + return rc;
> +
> + ocxlpmem->scm_revision = val & 0xFFFF;
> + ocxlpmem->read_latency = (val >> 32) & 0xFFFF;
> + ocxlpmem->readiness_timeout = (val >> 48) & 0x0F;
> + ocxlpmem->memory_available_timeout = val >> 52;

Maybe some macros to parse out these register fields?

> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CCAP1,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + return rc;
> +
> + ocxlpmem->max_controller_dump_size = val & 0xFFFFFFFF;
> +
> + // Extract firmware version text
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_FWVER,
> + OCXL_HOST_ENDIAN,
> + (u64 *)ocxlpmem->fw_version);
> + if (rc)
> + return rc;
> +
> + ocxlpmem->fw_version[8] = '\0';
> +
> + dev_info(&ocxlpmem->dev,
> + "Firmware version '%s' SCM revision %d:%d\n",
> + ocxlpmem->fw_version, ocxlpmem->scm_revision >> 4,
> + ocxlpmem->scm_revision & 0x0F);

Does the driver need to be chatty here. If this data is relevant
should it appear in sysfs by default?

> +
> + return 0;
> +}
> +
> /**
> * probe_function0() - Set up function 0 for an OpenCAPI persistent memory device
> * This is important as it enables templates higher than 0 across all other
> @@ -359,6 +404,9 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> {
> struct ocxlpmem *ocxlpmem;
> int rc;
> + u64 chi;
> + u16 elapsed, timeout;
> + bool ready = false;
>
> if (PCI_FUNC(pdev->devfn) == 0)
> return probe_function0(pdev);
> @@ -413,6 +461,43 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto err;
> }
>
> + rc = read_device_metadata(ocxlpmem);
> + if (rc) {
> + dev_err(&pdev->dev, "Could not read metadata\n");
> + goto err;
> + }
> +
> + elapsed = 0;
> + timeout = ocxlpmem->readiness_timeout +
> + ocxlpmem->memory_available_timeout;
> +
> + while (true) {
> + rc = ocxlpmem_chi(ocxlpmem, &chi);
> + ready = (chi & (GLOBAL_MMIO_CHI_CRDY | GLOBAL_MMIO_CHI_MA)) ==
> + (GLOBAL_MMIO_CHI_CRDY | GLOBAL_MMIO_CHI_MA);
> +
> + if (ready)
> + break;
> +
> + if (elapsed++ > timeout) {
> + dev_err(&ocxlpmem->dev,
> + "OpenCAPI Persistent Memory ready timeout.\n");
> +
> + if (!(chi & GLOBAL_MMIO_CHI_CRDY))
> + dev_err(&ocxlpmem->dev,
> + "controller is not ready.\n");
> +
> + if (!(chi & GLOBAL_MMIO_CHI_MA))
> + dev_err(&ocxlpmem->dev,
> + "controller does not have memory available.\n");
> +
> + rc = -ENXIO;
> + goto err;
> + }
> +
> + msleep(1000);

At platform boot this is going to serialize / delay other pci hardware
init. Do you need this determination to be synchronous with the call
to ->probe()? If not, let's move it out of line. For example nvdimm
device registration is asynchronous by default with options to flush
if userspace needs to know that the kernel has finished loading
drivers.

> + }
> +
> rc = register_lpc_mem(ocxlpmem);
> if (rc) {
> dev_err(&pdev->dev,
> diff --git a/drivers/nvdimm/ocxl/ocxlpmem.h b/drivers/nvdimm/ocxl/ocxlpmem.h
> index 322387873b4b..3eadbe19f6d0 100644
> --- a/drivers/nvdimm/ocxl/ocxlpmem.h
> +++ b/drivers/nvdimm/ocxl/ocxlpmem.h
> @@ -93,4 +93,33 @@ struct ocxlpmem {
> void *metadata_addr;
> struct resource pmem_res;
> struct nd_region *nd_region;
> + char fw_version[8 + 1];
> +
> + u32 max_controller_dump_size;
> + u16 scm_revision; // major/minor
> + u8 readiness_timeout; /* The worst case time (in seconds) that the host
> + * shall wait for the controller to become
> + * operational following a reset (CHI.CRDY).
> + */
> + u8 memory_available_timeout; /* The worst case time (in seconds) that
> + * the host shall wait for memory to
> + * become available following a reset
> + * (CHI.MA).
> + */
> +
> + u16 read_latency; /* The nominal measure of latency (in nanoseconds)
> + * associated with an unassisted read of a memory
> + * block.
> + * This represents the capability of the raw media
> + * technology without assistance
> + */
> };
> +
> +/**
> + * ocxlpmem_chi() - Get the value of the CHI register
> + * @ocxlpmem: the device metadata
> + * @chi: returns the CHI value
> + *
> + * Returns 0 on success, negative on error
> + */
> +int ocxlpmem_chi(const struct ocxlpmem *ocxlpmem, u64 *chi);
> diff --git a/drivers/nvdimm/ocxl/ocxlpmem_internal.c b/drivers/nvdimm/ocxl/ocxlpmem_internal.c
> new file mode 100644
> index 000000000000..5578169b7515
> --- /dev/null
> +++ b/drivers/nvdimm/ocxl/ocxlpmem_internal.c
> @@ -0,0 +1,19 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +// Copyright 2020 IBM Corp.
> +
> +#include <misc/ocxl.h>
> +#include <linux/delay.h>
> +#include "ocxlpmem.h"
> +
> +int ocxlpmem_chi(const struct ocxlpmem *ocxlpmem, u64 *chi)
> +{
> + u64 val;
> + int rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHI,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + return rc;
> +
> + *chi = val;
> +
> + return 0;
> +}
> --
> 2.24.1
>