Re: [PATCH v2 1/4] MicroSemi Switchtec management interface driver
From: Bjorn Helgaas
Date: Thu Feb 23 2017 - 19:46:34 EST
[+cc Peter, Ingo, Arnaldo, Alexander, Christoph]
On Thu, Feb 02, 2017 at 11:06:00AM -0700, Logan Gunthorpe wrote:
> Microsemi's "Switchtec" line of PCI switch devices is already well
> supported by the kernel with standard PCI switch drivers. However, the
> Switchtec device advertises a special management endpoint with a separate
> PCI function address and class code. This endpoint enables some additional
> functionality which includes:
>
> * Packet and Byte Counters
> * Switch Firmware Upgrades
> * Event and Error logs
> * Querying port link status
> * Custom user firmware commands
Would it make any sense to integrate this with the perf tool? It
looks like switchtec-user measures bandwidth and latency, which sounds
sort of perf-ish.
> This patch introduces the switchtec kernel module which provides
> PCI driver that exposes a char device. The char device provides
> userspace access to this interface through read, write and (optionally)
> poll calls.
>
> A userspace tool and library which utilizes this interface is available
> at [1]. This tool takes inspiration (and borrows some code) from
> nvme-cli [2]. The tool is largely complete at this time but additional
> features may be added in the future.
>
> [1] https://github.com/sbates130272/switchtec-user
> [2] https://github.com/linux-nvme/nvme-cli
>
> Signed-off-by: Logan Gunthorpe <logang@xxxxxxxxxxxx>
> Signed-off-by: Stephen Bates <stephen.bates@xxxxxxxxxxxxx>
> ---
> MAINTAINERS | 8 +
> drivers/pci/Kconfig | 1 +
> drivers/pci/Makefile | 1 +
> drivers/pci/switch/Kconfig | 13 +
> drivers/pci/switch/Makefile | 1 +
> drivers/pci/switch/switchtec.c | 1028 ++++++++++++++++++++++++++++++++++++++++
> 6 files changed, 1052 insertions(+)
> create mode 100644 drivers/pci/switch/Kconfig
> create mode 100644 drivers/pci/switch/Makefile
> create mode 100644 drivers/pci/switch/switchtec.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 5f10c28..9c35198 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9507,6 +9507,14 @@ S: Maintained
> F: Documentation/devicetree/bindings/pci/aardvark-pci.txt
> F: drivers/pci/host/pci-aardvark.c
>
> +PCI DRIVER FOR MICROSEMI SWITCHTEC
> +M: Kurt Schwemmer <kurt.schwemmer@xxxxxxxxxxxxx>
> +M: Stephen Bates <stephen.bates@xxxxxxxxxxxxx>
> +M: Logan Gunthorpe <logang@xxxxxxxxxxxx>
> +L: linux-pci@xxxxxxxxxxxxxxx
> +S: Maintained
> +F: drivers/pci/switch/switchtec*
> +
> PCI DRIVER FOR NVIDIA TEGRA
> M: Thierry Reding <thierry.reding@xxxxxxxxx>
> L: linux-tegra@xxxxxxxxxxxxxxx
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index 6555eb7..f72e8c5 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -133,3 +133,4 @@ config PCI_HYPERV
>
> source "drivers/pci/hotplug/Kconfig"
> source "drivers/pci/host/Kconfig"
> +source "drivers/pci/switch/Kconfig"
> diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
> index 8db5079..15b46dd 100644
> --- a/drivers/pci/Makefile
> +++ b/drivers/pci/Makefile
> @@ -68,3 +68,4 @@ ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
>
> # PCI host controller drivers
> obj-y += host/
> +obj-y += switch/
> diff --git a/drivers/pci/switch/Kconfig b/drivers/pci/switch/Kconfig
> new file mode 100644
> index 0000000..4c49648
> --- /dev/null
> +++ b/drivers/pci/switch/Kconfig
> @@ -0,0 +1,13 @@
> +menu "PCI switch controller drivers"
> + depends on PCI
> +
> +config PCI_SW_SWITCHTEC
> + tristate "MicroSemi Switchtec PCIe Switch Management Driver"
> + help
> + Enables support for the management interface for the MicroSemi
> + Switchtec series of PCIe switches. Supports userspace access
> + to submit MRPC commands to the switch via /dev/switchtecX
> + devices. See <file:Documentation/switchtec.txt> for more
> + information.
> +
> +endmenu
> diff --git a/drivers/pci/switch/Makefile b/drivers/pci/switch/Makefile
> new file mode 100644
> index 0000000..37d8cfb
> --- /dev/null
> +++ b/drivers/pci/switch/Makefile
> @@ -0,0 +1 @@
> +obj-$(CONFIG_PCI_SW_SWITCHTEC) += switchtec.o
> diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
> new file mode 100644
> index 0000000..e4a4294
> --- /dev/null
> +++ b/drivers/pci/switch/switchtec.c
> @@ -0,0 +1,1028 @@
> +/*
> + * Microsemi Switchtec(tm) PCIe Management Driver
> + * Copyright (c) 2017, Microsemi Corporation
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/uaccess.h>
> +#include <linux/poll.h>
> +#include <linux/pci.h>
> +#include <linux/cdev.h>
> +#include <linux/wait.h>
> +
> +MODULE_DESCRIPTION("Microsemi Switchtec(tm) PCI-E Management Driver");
Nit: s/PCI-E/PCIe/
> +MODULE_VERSION("0.1");
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Microsemi Corporation");
> +
> +static int max_devices = 16;
This static initialization is slightly misleading because
switchtec_init() immediately sets max_devices to at least 256.
> +module_param(max_devices, int, 0644);
> +MODULE_PARM_DESC(max_devices, "max number of switchtec device instances");
> +
> +static dev_t switchtec_devt;
> +static struct class *switchtec_class;
> +static DEFINE_IDA(switchtec_minor_ida);
> +
> +#define MICROSEMI_VENDOR_ID 0x11f8
> +#define MICROSEMI_NTB_CLASSCODE 0x068000
> +#define MICROSEMI_MGMT_CLASSCODE 0x058000
> +
> +#define SWITCHTEC_MRPC_PAYLOAD_SIZE 1024
> +#define SWITCHTEC_MAX_PFF_CSR 48
> +
> +#define SWITCHTEC_EVENT_OCCURRED BIT(0)
> +#define SWITCHTEC_EVENT_CLEAR BIT(0)
> +#define SWITCHTEC_EVENT_EN_LOG BIT(1)
> +#define SWITCHTEC_EVENT_EN_CLI BIT(2)
> +#define SWITCHTEC_EVENT_EN_IRQ BIT(3)
> +#define SWITCHTEC_EVENT_FATAL BIT(4)
> +
> +enum {
> + SWITCHTEC_GAS_MRPC_OFFSET = 0x0000,
> + SWITCHTEC_GAS_TOP_CFG_OFFSET = 0x1000,
> + SWITCHTEC_GAS_SW_EVENT_OFFSET = 0x1800,
> + SWITCHTEC_GAS_SYS_INFO_OFFSET = 0x2000,
> + SWITCHTEC_GAS_FLASH_INFO_OFFSET = 0x2200,
> + SWITCHTEC_GAS_PART_CFG_OFFSET = 0x4000,
> + SWITCHTEC_GAS_NTB_OFFSET = 0x10000,
> + SWITCHTEC_GAS_PFF_CSR_OFFSET = 0x134000,
> +};
> +
> +struct mrpc_regs {
> + u8 input_data[SWITCHTEC_MRPC_PAYLOAD_SIZE];
> + u8 output_data[SWITCHTEC_MRPC_PAYLOAD_SIZE];
> + u32 cmd;
> + u32 status;
> + u32 ret_value;
> +} __packed;
> +
> +enum mrpc_status {
> + SWITCHTEC_MRPC_STATUS_INPROGRESS = 1,
> + SWITCHTEC_MRPC_STATUS_DONE = 2,
> + SWITCHTEC_MRPC_STATUS_ERROR = 0xFF,
> + SWITCHTEC_MRPC_STATUS_INTERRUPTED = 0x100,
> +};
> +
> +struct sw_event_regs {
> + u64 event_report_ctrl;
> + u64 reserved1;
> + u64 part_event_bitmap;
> + u64 reserved2;
> + u32 global_summary;
> + u32 reserved3[3];
> + u32 stack_error_event_hdr;
> + u32 stack_error_event_data;
> + u32 reserved4[4];
> + u32 ppu_error_event_hdr;
> + u32 ppu_error_event_data;
> + u32 reserved5[4];
> + u32 isp_error_event_hdr;
> + u32 isp_error_event_data;
> + u32 reserved6[4];
> + u32 sys_reset_event_hdr;
> + u32 reserved7[5];
> + u32 fw_exception_hdr;
> + u32 reserved8[5];
> + u32 fw_nmi_hdr;
> + u32 reserved9[5];
> + u32 fw_non_fatal_hdr;
> + u32 reserved10[5];
> + u32 fw_fatal_hdr;
> + u32 reserved11[5];
> + u32 twi_mrpc_comp_hdr;
> + u32 twi_mrpc_comp_data;
> + u32 reserved12[4];
> + u32 twi_mrpc_comp_async_hdr;
> + u32 twi_mrpc_comp_async_data;
> + u32 reserved13[4];
> + u32 cli_mrpc_comp_hdr;
> + u32 cli_mrpc_comp_data;
> + u32 reserved14[4];
> + u32 cli_mrpc_comp_async_hdr;
> + u32 cli_mrpc_comp_async_data;
> + u32 reserved15[4];
> + u32 gpio_interrupt_hdr;
> + u32 gpio_interrupt_data;
> + u32 reserved16[4];
> +} __packed;
> +
> +struct sys_info_regs {
> + u32 device_id;
> + u32 device_version;
> + u32 firmware_version;
> + u32 reserved1;
> + u32 vendor_table_revision;
> + u32 table_format_version;
> + u32 partition_id;
> + u32 cfg_file_fmt_version;
> + u32 reserved2[58];
> + char vendor_id[8];
> + char product_id[16];
> + char product_revision[4];
> + char component_vendor[8];
> + u16 component_id;
> + u8 component_revision;
> +} __packed;
> +
> +struct flash_info_regs {
> + u32 flash_part_map_upd_idx;
> +
> + struct active_partition_info {
> + u32 address;
> + u32 build_version;
> + u32 build_string;
> + } active_img;
> +
> + struct active_partition_info active_cfg;
> + struct active_partition_info inactive_img;
> + struct active_partition_info inactive_cfg;
> +
> + u32 flash_length;
> +
> + struct partition_info {
> + u32 address;
> + u32 length;
> + } cfg0;
> +
> + struct partition_info cfg1;
> + struct partition_info img0;
> + struct partition_info img1;
> + struct partition_info nvlog;
> + struct partition_info vendor[8];
> +};
> +
> +struct ntb_info_regs {
> + u8 partition_count;
> + u8 partition_id;
> + u16 reserved1;
> + u64 ep_map;
> + u16 requester_id;
> +} __packed;
> +
> +struct part_cfg_regs {
> + u32 status;
> + u32 state;
> + u32 port_cnt;
> + u32 usp_port_mode;
> + u32 usp_pff_inst_id;
> + u32 vep_pff_inst_id;
> + u32 dsp_pff_inst_id[47];
> + u32 reserved1[11];
> + u16 vep_vector_number;
> + u16 usp_vector_number;
> + u32 port_event_bitmap;
> + u32 reserved2[3];
> + u32 part_event_summary;
> + u32 reserved3[3];
> + u32 part_reset_hdr;
> + u32 part_reset_data[5];
> + u32 mrpc_comp_hdr;
> + u32 mrpc_comp_data[5];
> + u32 mrpc_comp_async_hdr;
> + u32 mrpc_comp_async_data[5];
> + u32 dyn_binding_hdr;
> + u32 dyn_binding_data[5];
> + u32 reserved4[159];
> +} __packed;
> +
> +enum {
> + SWITCHTEC_PART_CFG_EVENT_RESET = 1 << 0,
> + SWITCHTEC_PART_CFG_EVENT_MRPC_CMP = 1 << 1,
> + SWITCHTEC_PART_CFG_EVENT_MRPC_ASYNC_CMP = 1 << 2,
> + SWITCHTEC_PART_CFG_EVENT_DYN_PART_CMP = 1 << 3,
> +};
> +
> +struct pff_csr_regs {
> + u16 vendor_id;
> + u16 device_id;
> + u32 pci_cfg_header[15];
> + u32 pci_cap_region[48];
> + u32 pcie_cap_region[448];
> + u32 indirect_gas_window[128];
> + u32 indirect_gas_window_off;
> + u32 reserved[127];
> + u32 pff_event_summary;
> + u32 reserved2[3];
> + u32 aer_in_p2p_hdr;
> + u32 aer_in_p2p_data[5];
> + u32 aer_in_vep_hdr;
> + u32 aer_in_vep_data[5];
> + u32 dpc_hdr;
> + u32 dpc_data[5];
> + u32 cts_hdr;
> + u32 cts_data[5];
> + u32 reserved3[6];
> + u32 hotplug_hdr;
> + u32 hotplug_data[5];
> + u32 ier_hdr;
> + u32 ier_data[5];
> + u32 threshold_hdr;
> + u32 threshold_data[5];
> + u32 power_mgmt_hdr;
> + u32 power_mgmt_data[5];
> + u32 tlp_throttling_hdr;
> + u32 tlp_throttling_data[5];
> + u32 force_speed_hdr;
> + u32 force_speed_data[5];
> + u32 credit_timeout_hdr;
> + u32 credit_timeout_data[5];
> + u32 link_state_hdr;
> + u32 link_state_data[5];
> + u32 reserved4[174];
> +} __packed;
> +
> +struct switchtec_dev {
> + struct pci_dev *pdev;
> + struct msix_entry msix[4];
> + struct device dev;
> + struct cdev cdev;
> + unsigned int event_irq;
> +
> + int partition;
> + int partition_count;
> + int pff_csr_count;
> + char pff_local[SWITCHTEC_MAX_PFF_CSR];
> +
> + void __iomem *mmio;
> + struct mrpc_regs __iomem *mmio_mrpc;
> + struct sw_event_regs __iomem *mmio_sw_event;
> + struct sys_info_regs __iomem *mmio_sys_info;
> + struct flash_info_regs __iomem *mmio_flash_info;
> + struct ntb_info_regs __iomem *mmio_ntb;
> + struct part_cfg_regs __iomem *mmio_part_cfg;
> + struct part_cfg_regs __iomem *mmio_part_cfg_all;
> + struct pff_csr_regs __iomem *mmio_pff_csr;
> +
> + /*
> + * The mrpc mutex must be held when accessing the other
> + * mrpc fields
> + */
> + struct mutex mrpc_mutex;
> + struct list_head mrpc_queue;
> + int mrpc_busy;
> + struct work_struct mrpc_work;
> + struct delayed_work mrpc_timeout;
> + wait_queue_head_t event_wq;
> + atomic_t event_cnt;
Why is this atomic_t? You only do an atomic_set() (in stdev_create())
and atomic_reads() -- no writes other than the initial one. So I'm
not sure what value atomic_t brings.
> +};
> +
> +static struct switchtec_dev *to_stdev(struct device *dev)
> +{
> + return container_of(dev, struct switchtec_dev, dev);
> +}
> +
> +struct switchtec_user {
> + struct switchtec_dev *stdev;
> +
> + enum mrpc_state {
> + MRPC_IDLE = 0,
> + MRPC_QUEUED,
> + MRPC_RUNNING,
> + MRPC_DONE,
> + } state;
> +
> + struct completion comp;
> + struct kref kref;
> + struct list_head list;
> +
> + u32 cmd;
> + u32 status;
> + u32 return_code;
> + size_t data_len;
> + unsigned char data[SWITCHTEC_MRPC_PAYLOAD_SIZE];
> + int event_cnt;
> +};
> +
> +static struct switchtec_user *stuser_create(struct switchtec_dev *stdev)
> +{
> + struct switchtec_user *stuser;
> +
> + stuser = kzalloc(sizeof(*stuser), GFP_KERNEL);
> + if (!stuser)
> + return ERR_PTR(-ENOMEM);
> +
> + get_device(&stdev->dev);
> + stuser->stdev = stdev;
> + kref_init(&stuser->kref);
> + INIT_LIST_HEAD(&stuser->list);
> + init_completion(&stuser->comp);
> + stuser->event_cnt = atomic_read(&stdev->event_cnt);
> +
> + dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
> +
> + return stuser;
> +}
> +
> +static void stuser_free(struct kref *kref)
> +{
> + struct switchtec_user *stuser;
> +
> + stuser = container_of(kref, struct switchtec_user, kref);
> +
> + dev_dbg(&stuser->stdev->dev, "%s: %p\n", __func__, stuser);
> +
> + put_device(&stuser->stdev->dev);
> + kfree(stuser);
> +}
> +
> +static void stuser_put(struct switchtec_user *stuser)
> +{
> + kref_put(&stuser->kref, stuser_free);
> +}
> +
> +static void stuser_set_state(struct switchtec_user *stuser,
> + enum mrpc_state state)
> +{
> + const char * const state_names[] = {
> + [MRPC_IDLE] = "IDLE",
> + [MRPC_QUEUED] = "QUEUED",
> + [MRPC_RUNNING] = "RUNNING",
> + [MRPC_DONE] = "DONE",
> + };
> +
> + stuser->state = state;
> +
> + dev_dbg(&stuser->stdev->dev, "stuser state %p -> %s",
> + stuser, state_names[state]);
> +}
> +
> +static int stdev_is_alive(struct switchtec_dev *stdev)
> +{
> + return stdev->mmio != NULL;
> +}
> +
> +static void mrpc_complete_cmd(struct switchtec_dev *stdev);
> +
> +static void mrpc_cmd_submit(struct switchtec_dev *stdev)
> +{
> + /* requires the mrpc_mutex to already be held when called */
> +
> + struct switchtec_user *stuser;
> +
> + if (stdev->mrpc_busy)
> + return;
> +
> + if (list_empty(&stdev->mrpc_queue))
> + return;
> +
> + stuser = list_entry(stdev->mrpc_queue.next, struct switchtec_user,
> + list);
> +
> + stuser_set_state(stuser, MRPC_RUNNING);
> + stdev->mrpc_busy = 1;
> + memcpy_toio(&stdev->mmio_mrpc->input_data,
> + stuser->data, stuser->data_len);
> + iowrite32(stuser->cmd, &stdev->mmio_mrpc->cmd);
> +
> + stuser->status = ioread32(&stdev->mmio_mrpc->status);
> + if (stuser->status != SWITCHTEC_MRPC_STATUS_INPROGRESS)
> + mrpc_complete_cmd(stdev);
> +
> + schedule_delayed_work(&stdev->mrpc_timeout,
> + msecs_to_jiffies(500));
> +}
> +
> +static void mrpc_queue_cmd(struct switchtec_user *stuser)
> +{
> + /* requires the mrpc_mutex to already be held when called */
> +
> + struct switchtec_dev *stdev = stuser->stdev;
> +
> + kref_get(&stuser->kref);
> + stuser_set_state(stuser, MRPC_QUEUED);
> + init_completion(&stuser->comp);
> + list_add_tail(&stuser->list, &stdev->mrpc_queue);
> +
> + mrpc_cmd_submit(stdev);
> +}
> +
> +static void mrpc_complete_cmd(struct switchtec_dev *stdev)
> +{
> + /* requires the mrpc_mutex to already be held when called */
> + struct switchtec_user *stuser;
> +
> + if (list_empty(&stdev->mrpc_queue))
> + return;
> +
> + stuser = list_entry(stdev->mrpc_queue.next, struct switchtec_user,
> + list);
> +
> + stuser->status = ioread32(&stdev->mmio_mrpc->status);
> + if (stuser->status == SWITCHTEC_MRPC_STATUS_INPROGRESS)
> + return;
> +
> + stuser_set_state(stuser, MRPC_DONE);
> + stuser->return_code = 0;
> +
> + if (stuser->status != SWITCHTEC_MRPC_STATUS_DONE)
> + goto out;
> +
> + stuser->return_code = ioread32(&stdev->mmio_mrpc->ret_value);
> + if (stuser->return_code != 0)
> + goto out;
> +
> + memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data,
> + sizeof(stuser->data));
Apparently you always get 1K of data back, no matter what?
> +
> +out:
> + complete_all(&stuser->comp);
> + list_del_init(&stuser->list);
> + stuser_put(stuser);
> + stdev->mrpc_busy = 0;
> +
> + mrpc_cmd_submit(stdev);
> +}
> +
> +static void mrpc_event_work(struct work_struct *work)
> +{
> + struct switchtec_dev *stdev;
> +
> + stdev = container_of(work, struct switchtec_dev, mrpc_work);
> +
> + dev_dbg(&stdev->dev, "%s\n", __func__);
> +
> + mutex_lock(&stdev->mrpc_mutex);
> + cancel_delayed_work(&stdev->mrpc_timeout);
> + mrpc_complete_cmd(stdev);
> + mutex_unlock(&stdev->mrpc_mutex);
> +}
> +
> +static void mrpc_timeout_work(struct work_struct *work)
> +{
> + struct switchtec_dev *stdev;
> + u32 status;
> +
> + stdev = container_of(work, struct switchtec_dev, mrpc_timeout.work);
> +
> + dev_dbg(&stdev->dev, "%s\n", __func__);
> +
> + mutex_lock(&stdev->mrpc_mutex);
> +
> + if (stdev_is_alive(stdev)) {
> + status = ioread32(&stdev->mmio_mrpc->status);
> + if (status == SWITCHTEC_MRPC_STATUS_INPROGRESS) {
> + schedule_delayed_work(&stdev->mrpc_timeout,
> + msecs_to_jiffies(500));
> + goto out;
> + }
> + }
> +
> + mrpc_complete_cmd(stdev);
> +
> +out:
> + mutex_unlock(&stdev->mrpc_mutex);
> +}
> +
> +static int switchtec_dev_open(struct inode *inode, struct file *filp)
> +{
> + struct switchtec_dev *stdev;
> + struct switchtec_user *stuser;
> +
> + stdev = container_of(inode->i_cdev, struct switchtec_dev, cdev);
> +
> + stuser = stuser_create(stdev);
> + if (!stuser)
> + return PTR_ERR(stuser);
> +
> + filp->private_data = stuser;
> + nonseekable_open(inode, filp);
> +
> + dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
> +
> + return 0;
> +}
> +
> +static int switchtec_dev_release(struct inode *inode, struct file *filp)
> +{
> + struct switchtec_user *stuser = filp->private_data;
> +
> + stuser_put(stuser);
> +
> + return 0;
> +}
> +
> +static ssize_t switchtec_dev_write(struct file *filp, const char __user *data,
> + size_t size, loff_t *off)
> +{
> + struct switchtec_user *stuser = filp->private_data;
> + struct switchtec_dev *stdev = stuser->stdev;
> + int rc;
> +
> + if (!stdev_is_alive(stdev))
> + return -ENXIO;
What exactly does this protect against? Is it OK if stdev_is_alive()
becomes false immediately after we check?
> +
> + if (size < sizeof(stuser->cmd) ||
> + size > sizeof(stuser->cmd) + SWITCHTEC_MRPC_PAYLOAD_SIZE)
I think I would use "sizeof(stuser->data)" instead of
SWITCHTEC_MRPC_PAYLOAD_SIZE so it matches what mrpc_complete_cmd()
does. Same below in switchtec_dev_read().
mrpc_mutex doesn't protect the stuser things, does it? If not, you
could do this without the gotos. And I think it's a little easier to
be confident there are no buffer overruns:
if (stuser->state != MRPC_IDLE)
return -EBADE;
if (size < sizeof(stuser->cmd))
return -EINVAL;
rc = copy_from_user(&stuser->cmd, data, sizeof(stuser->cmd));
if (rc)
return -EFAULT;
size -= sizeof(stuser->cmd);
data += sizeof(stuser->cmd);
if (size > sizeof(stuser->data))
return -EINVAL;
rc = copy_from_user(&stuser->data, data, size);
if (rc)
return -EFAULT;
stuser->data_len = size;
if (mutex_lock_interruptible(&stdev->mrpc_mutex))
return -EINTR;
mrpc_queue_cmd(stuser);
mutex_unlock(&stdev->mrpc_mutex);
return size;
> + return -EINVAL;
> +
> + if (mutex_lock_interruptible(&stdev->mrpc_mutex))
> + return -EINTR;
> +
> + if (stuser->state != MRPC_IDLE) {
> + rc = -EBADE;
> + goto out;
> + }
> +
> + rc = copy_from_user(&stuser->cmd, data, sizeof(stuser->cmd));
> + if (rc) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + data += sizeof(stuser->cmd);
> + rc = copy_from_user(&stuser->data, data, size - sizeof(stuser->cmd));
> + if (rc) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + stuser->data_len = size - sizeof(stuser->cmd);
> +
> + mrpc_queue_cmd(stuser);
> +
> + rc = size;
> +
> +out:
> + mutex_unlock(&stdev->mrpc_mutex);
> +
> + return rc;
> +}
> +
> +static ssize_t switchtec_dev_read(struct file *filp, char __user *data,
> + size_t size, loff_t *off)
> +{
> + struct switchtec_user *stuser = filp->private_data;
> + struct switchtec_dev *stdev = stuser->stdev;
> + int rc;
> +
> + if (!stdev_is_alive(stdev))
> + return -ENXIO;
> +
> + if (size < sizeof(stuser->cmd) ||
> + size > sizeof(stuser->cmd) + SWITCHTEC_MRPC_PAYLOAD_SIZE)
> + return -EINVAL;
> +
> + if (stuser->state == MRPC_IDLE)
> + return -EBADE;
> +
> + if (filp->f_flags & O_NONBLOCK) {
> + if (!try_wait_for_completion(&stuser->comp))
> + return -EAGAIN;
> + } else {
> + rc = wait_for_completion_interruptible(&stuser->comp);
> + if (rc < 0)
> + return rc;
> + }
> +
> + if (mutex_lock_interruptible(&stdev->mrpc_mutex))
> + return -EINTR;
> +
> + if (stuser->state != MRPC_DONE)
> + return -EBADE;
> +
> + rc = copy_to_user(data, &stuser->return_code,
> + sizeof(stuser->return_code));
> + if (rc) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + data += sizeof(stuser->return_code);
> + rc = copy_to_user(data, &stuser->data,
> + size - sizeof(stuser->return_code));
> + if (rc) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + stuser_set_state(stuser, MRPC_IDLE);
> +
> + if (stuser->status == SWITCHTEC_MRPC_STATUS_DONE)
> + rc = size;
> + else if (stuser->status == SWITCHTEC_MRPC_STATUS_INTERRUPTED)
> + rc = -ENXIO;
> + else
> + rc = -EBADMSG;
> +
> +out:
> + mutex_unlock(&stdev->mrpc_mutex);
> +
> + return rc;
> +}
> +
> +static unsigned int switchtec_dev_poll(struct file *filp, poll_table *wait)
> +{
> + struct switchtec_user *stuser = filp->private_data;
> + struct switchtec_dev *stdev = stuser->stdev;
> + int ret = 0;
> +
> + poll_wait(filp, &stuser->comp.wait, wait);
> + poll_wait(filp, &stdev->event_wq, wait);
> +
> + if (!stdev_is_alive(stdev))
> + return POLLERR;
> +
> + if (try_wait_for_completion(&stuser->comp))
> + ret |= POLLIN | POLLRDNORM;
> +
> + if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
> + ret |= POLLPRI | POLLRDBAND;
> +
> + return ret;
> +}
> +
> +static const struct file_operations switchtec_fops = {
> + .owner = THIS_MODULE,
> + .open = switchtec_dev_open,
> + .release = switchtec_dev_release,
> + .write = switchtec_dev_write,
> + .read = switchtec_dev_read,
> + .poll = switchtec_dev_poll,
> +};
> +
> +static void stdev_release(struct device *dev)
> +{
> + struct switchtec_dev *stdev = to_stdev(dev);
> +
> + ida_simple_remove(&switchtec_minor_ida,
> + MINOR(dev->devt));
> + kfree(stdev);
> +}
> +
> +static void stdev_unregister(struct switchtec_dev *stdev)
> +{
> + cdev_del(&stdev->cdev);
> + device_unregister(&stdev->dev);
> +}
> +
> +static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
> +{
> + struct switchtec_dev *stdev;
> + int minor;
> + struct device *dev;
> + struct cdev *cdev;
> + int rc;
> +
> + stdev = kzalloc_node(sizeof(*stdev), GFP_KERNEL,
> + dev_to_node(&pdev->dev));
> + if (!stdev)
> + return ERR_PTR(-ENOMEM);
> +
> + stdev->pdev = pdev;
> + INIT_LIST_HEAD(&stdev->mrpc_queue);
> + mutex_init(&stdev->mrpc_mutex);
> + stdev->mrpc_busy = 0;
> + INIT_WORK(&stdev->mrpc_work, mrpc_event_work);
> + INIT_DELAYED_WORK(&stdev->mrpc_timeout, mrpc_timeout_work);
> + init_waitqueue_head(&stdev->event_wq);
> + atomic_set(&stdev->event_cnt, 0);
> +
> + minor = ida_simple_get(&switchtec_minor_ida, 0, 0,
> + GFP_KERNEL);
> + if (minor < 0)
> + return ERR_PTR(minor);
> +
> + dev = &stdev->dev;
> + device_initialize(dev);
> + dev->devt = MKDEV(MAJOR(switchtec_devt), minor);
> + dev->class = switchtec_class;
> + dev->parent = &pdev->dev;
> + dev->release = stdev_release;
> + dev_set_name(dev, "switchtec%d", minor);
> +
> + cdev = &stdev->cdev;
> + cdev_init(cdev, &switchtec_fops);
> + cdev->owner = THIS_MODULE;
> + cdev->kobj.parent = &dev->kobj;
> +
> + rc = cdev_add(&stdev->cdev, dev->devt, 1);
> + if (rc)
> + goto err_cdev;
> +
> + rc = device_add(dev);
> + if (rc) {
> + cdev_del(&stdev->cdev);
> + put_device(dev);
> + return ERR_PTR(rc);
> + }
> +
> + return stdev;
> +
> +err_cdev:
> + ida_simple_remove(&switchtec_minor_ida, minor);
> +
> + return ERR_PTR(rc);
> +}
> +
> +static irqreturn_t switchtec_event_isr(int irq, void *dev)
> +{
> + struct switchtec_dev *stdev = dev;
> + u32 reg;
> + irqreturn_t ret = IRQ_NONE;
> +
> + reg = ioread32(&stdev->mmio_part_cfg->mrpc_comp_hdr);
> + if (reg & SWITCHTEC_EVENT_OCCURRED) {
> + dev_dbg(&stdev->dev, "%s: mrpc comp\n", __func__);
> + ret = IRQ_HANDLED;
> + schedule_work(&stdev->mrpc_work);
> + iowrite32(reg, &stdev->mmio_part_cfg->mrpc_comp_hdr);
> + }
> +
> + return ret;
> +}
> +
> +static int switchtec_init_msix_isr(struct switchtec_dev *stdev)
> +{
> + struct pci_dev *pdev = stdev->pdev;
> + int rc, i, msix_count, node;
> +
> + node = dev_to_node(&pdev->dev);
Unused?
> + for (i = 0; i < ARRAY_SIZE(stdev->msix); ++i)
> + stdev->msix[i].entry = i;
> +
> + msix_count = pci_enable_msix_range(pdev, stdev->msix, 1,
> + ARRAY_SIZE(stdev->msix));
> + if (msix_count < 0)
> + return msix_count;
Maybe this could be simplified by using pci_alloc_irq_vectors()?
> + stdev->event_irq = ioread32(&stdev->mmio_part_cfg->vep_vector_number);
> + if (stdev->event_irq < 0 || stdev->event_irq >= msix_count) {
> + rc = -EFAULT;
> + goto err_msix_request;
> + }
Not sure what this is doing, but you immediately overwrite
stdev->event_irq below. If you're using it as just a temporary above
for doing some validation, I would just use a local variable to avoid
the connection with stdev->event_irq.
> + stdev->event_irq = stdev->msix[stdev->event_irq].vector;
Oh, I guess you allocate several MSI-X vectors, but you only actually
use one of them? Why is that? I was confused about why you allocate
several vectors, but there's only one devm_request_irq() call below.
> + dev_dbg(&stdev->dev, "Using msix interrupts: event_irq=%d\n",
> + stdev->event_irq);
> +
> + return 0;
> +
> +err_msix_request:
> + pci_disable_msix(pdev);
> + return rc;
> +}
> +
> +static int switchtec_init_msi_isr(struct switchtec_dev *stdev)
> +{
> + int rc;
> + struct pci_dev *pdev = stdev->pdev;
> +
> + /* Try to set up msi irq */
> + rc = pci_enable_msi_range(pdev, 1, 4);
> + if (rc < 0)
> + return rc;
> +
> + stdev->event_irq = ioread32(&stdev->mmio_part_cfg->vep_vector_number);
> + if (stdev->event_irq < 0 || stdev->event_irq >= 4) {
> + rc = -EFAULT;
> + goto err_msi_request;
> + }
> +
> + stdev->event_irq = pdev->irq + stdev->event_irq;
> + dev_dbg(&stdev->dev, "Using msi interrupts: event_irq=%d\n",
> + stdev->event_irq);
> +
> + return 0;
> +
> +err_msi_request:
> + pci_disable_msi(pdev);
> + return rc;
> +}
> +
> +static int switchtec_init_isr(struct switchtec_dev *stdev)
> +{
> + int rc;
> +
> + rc = switchtec_init_msix_isr(stdev);
> + if (rc)
> + rc = switchtec_init_msi_isr(stdev);
> +
> + if (rc)
> + return rc;
> +
> + rc = devm_request_irq(&stdev->pdev->dev, stdev->event_irq,
> + switchtec_event_isr, 0, KBUILD_MODNAME, stdev);
> +
> + return rc;
> +}
> +
> +static void switchtec_deinit_isr(struct switchtec_dev *stdev)
> +{
> + devm_free_irq(&stdev->pdev->dev, stdev->event_irq, stdev);
> + pci_disable_msix(stdev->pdev);
> + pci_disable_msi(stdev->pdev);
> +}
> +
> +static void init_pff(struct switchtec_dev *stdev)
> +{
> + int i;
> + u32 reg;
> + struct part_cfg_regs *pcfg = stdev->mmio_part_cfg;
> +
> + for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) {
> + reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id);
> + if (reg != MICROSEMI_VENDOR_ID)
> + break;
> + }
> +
> + stdev->pff_csr_count = i;
> +
> + reg = ioread32(&pcfg->usp_pff_inst_id);
> + if (reg < SWITCHTEC_MAX_PFF_CSR)
> + stdev->pff_local[reg] = 1;
> +
> + reg = ioread32(&pcfg->vep_pff_inst_id);
> + if (reg < SWITCHTEC_MAX_PFF_CSR)
> + stdev->pff_local[reg] = 1;
> +
> + for (i = 0; i < ARRAY_SIZE(pcfg->dsp_pff_inst_id); i++) {
> + reg = ioread32(&pcfg->dsp_pff_inst_id[i]);
> + if (reg < SWITCHTEC_MAX_PFF_CSR)
> + stdev->pff_local[reg] = 1;
> + }
> +}
> +
> +static int switchtec_init_pci(struct switchtec_dev *stdev,
> + struct pci_dev *pdev)
> +{
> + int rc;
> +
> + rc = pcim_enable_device(pdev);
> + if (rc)
> + return rc;
> +
> + rc = pcim_iomap_regions(pdev, 0x1, KBUILD_MODNAME);
> + if (rc)
> + return rc;
> +
> + pci_set_master(pdev);
> +
> + stdev->mmio = pcim_iomap_table(pdev)[0];
> + stdev->mmio_mrpc = stdev->mmio + SWITCHTEC_GAS_MRPC_OFFSET;
> + stdev->mmio_sw_event = stdev->mmio + SWITCHTEC_GAS_SW_EVENT_OFFSET;
> + stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET;
> + stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET;
> + stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET;
> + stdev->partition = ioread8(&stdev->mmio_ntb->partition_id);
> + stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count);
> + stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET;
> + stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition];
> + stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET;
> +
> + init_pff(stdev);
> +
> + iowrite32(SWITCHTEC_EVENT_CLEAR |
> + SWITCHTEC_EVENT_EN_IRQ,
Does this enable the device to generate IRQs? You haven't connected
the ISR yet (but I guess you probably also haven't told the device to
do anything that would actually generate an IRQ).
> + &stdev->mmio_part_cfg->mrpc_comp_hdr);
> +
> + pci_set_drvdata(pdev, stdev);
> +
> + return 0;
> +}
> +
> +static void switchtec_deinit_pci(struct switchtec_dev *stdev)
> +{
> + struct pci_dev *pdev = stdev->pdev;
> +
> + stdev->mmio = NULL;
> + pci_clear_master(pdev);
> + pci_set_drvdata(pdev, NULL);
> +}
> +
> +static int switchtec_pci_probe(struct pci_dev *pdev,
> + const struct pci_device_id *id)
> +{
> + struct switchtec_dev *stdev;
> + int rc;
> +
> + stdev = stdev_create(pdev);
> + if (!stdev)
> + return PTR_ERR(stdev);
> +
> + rc = switchtec_init_pci(stdev, pdev);
> + if (rc)
> + goto err_init_pci;
> +
> + rc = switchtec_init_isr(stdev);
> + if (rc) {
> + dev_err(&stdev->dev, "failed to init isr.\n");
> + goto err_init_isr;
> + }
> +
> + dev_info(&stdev->dev, "Management device registered.\n");
> +
> + return 0;
> +
> +err_init_isr:
> + switchtec_deinit_pci(stdev);
> +err_init_pci:
> + stdev_unregister(stdev);
> + return rc;
> +}
> +
> +static void switchtec_pci_remove(struct pci_dev *pdev)
> +{
> + struct switchtec_dev *stdev = pci_get_drvdata(pdev);
> +
> + switchtec_deinit_isr(stdev);
> + switchtec_deinit_pci(stdev);
> + stdev_unregister(stdev);
> +}
> +
> +#define SWITCHTEC_PCI_DEVICE(device_id) \
> + { \
> + .vendor = MICROSEMI_VENDOR_ID, \
> + .device = device_id, \
> + .subvendor = PCI_ANY_ID, \
> + .subdevice = PCI_ANY_ID, \
> + .class = MICROSEMI_MGMT_CLASSCODE, \
> + .class_mask = 0xFFFFFFFF, \
> + }, \
> + { \
> + .vendor = MICROSEMI_VENDOR_ID, \
> + .device = device_id, \
> + .subvendor = PCI_ANY_ID, \
> + .subdevice = PCI_ANY_ID, \
> + .class = MICROSEMI_NTB_CLASSCODE, \
> + .class_mask = 0xFFFFFFFF, \
> + }
> +
> +static const struct pci_device_id switchtec_pci_tbl[] = {
> + SWITCHTEC_PCI_DEVICE(0x8531), //PFX 24xG3
> + SWITCHTEC_PCI_DEVICE(0x8532), //PFX 32xG3
> + SWITCHTEC_PCI_DEVICE(0x8533), //PFX 48xG3
> + SWITCHTEC_PCI_DEVICE(0x8534), //PFX 64xG3
> + SWITCHTEC_PCI_DEVICE(0x8535), //PFX 80xG3
> + SWITCHTEC_PCI_DEVICE(0x8536), //PFX 96xG3
> + SWITCHTEC_PCI_DEVICE(0x8543), //PSX 48xG3
> + SWITCHTEC_PCI_DEVICE(0x8544), //PSX 64xG3
> + SWITCHTEC_PCI_DEVICE(0x8545), //PSX 80xG3
> + SWITCHTEC_PCI_DEVICE(0x8546), //PSX 96xG3
> + {0}
> +};
> +MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl);
> +
> +static struct pci_driver switchtec_pci_driver = {
> + .name = KBUILD_MODNAME,
> + .id_table = switchtec_pci_tbl,
> + .probe = switchtec_pci_probe,
> + .remove = switchtec_pci_remove,
> +};
> +
> +static int __init switchtec_init(void)
> +{
> + int rc;
> +
> + max_devices = max(max_devices, 256);
> + rc = alloc_chrdev_region(&switchtec_devt, 0, max_devices,
> + "switchtec");
> + if (rc)
> + return rc;
> +
> + switchtec_class = class_create(THIS_MODULE, "switchtec");
> + if (IS_ERR(switchtec_class)) {
> + rc = PTR_ERR(switchtec_class);
> + goto err_create_class;
> + }
> +
> + rc = pci_register_driver(&switchtec_pci_driver);
> + if (rc)
> + goto err_pci_register;
> +
> + pr_info(KBUILD_MODNAME ": loaded.\n");
> +
> + return 0;
> +
> +err_pci_register:
> + class_destroy(switchtec_class);
> +
> +err_create_class:
> + unregister_chrdev_region(switchtec_devt, max_devices);
> +
> + return rc;
> +}
> +module_init(switchtec_init);
> +
> +static void __exit switchtec_exit(void)
> +{
> + pci_unregister_driver(&switchtec_pci_driver);
> + class_destroy(switchtec_class);
> + unregister_chrdev_region(switchtec_devt, max_devices);
> + ida_destroy(&switchtec_minor_ida);
> +
> + pr_info(KBUILD_MODNAME ": unloaded.\n");
> +}
> +module_exit(switchtec_exit);
> --
> 2.1.4