Re: [PATCH] habanalabs: Expose devices after initialization is done

From: Oded Gabbay
Date: Thu Aug 08 2019 - 10:12:09 EST


On Thu, Aug 8, 2019 at 3:25 PM Tomer Tayar <ttayar@xxxxxxxxx> wrote:
>
> The char devices are currently exposed to user before the device and
> driver initialization are done.
> This patch moves the cdev and device adding to the system to the end of
> the initialization sequence, while keeping the creation of the
> structures at the beginning to allow the usage of dev_*().
>
> Signed-off-by: Tomer Tayar <ttayar@xxxxxxxxx>
> ---
> drivers/misc/habanalabs/device.c | 163 ++++++++++++++++++---------
> drivers/misc/habanalabs/habanalabs.h | 2 +
> 2 files changed, 111 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
> index efde1fe7d286..9a5926888b99 100644
> --- a/drivers/misc/habanalabs/device.c
> +++ b/drivers/misc/habanalabs/device.c
> @@ -151,50 +151,94 @@ static const struct file_operations hl_ctrl_ops = {
> .compat_ioctl = hl_ioctl_control
> };
>
> +static void device_release_func(struct device *dev)
> +{
> + kfree(dev);
> +}
> +
> /*
> - * device_setup_cdev - setup cdev and device for habanalabs device
> + * device_init_cdev - Initialize cdev and device for habanalabs device
> *
> * @hdev: pointer to habanalabs device structure
> * @hclass: pointer to the class object of the device
> * @minor: minor number of the specific device
> * @fpos: file operations to install for this device
> * @name: name of the device as it will appear in the filesystem
> - * @cdev: pointer to the char device object that will be created
> - * @dev: pointer to the device object that will be created
> + * @cdev: pointer to the char device object that will be initialized
> + * @dev: pointer to the device object that will be initialized
> *
> - * Create a cdev and a Linux device for habanalabs's device. Need to be
> - * called at the end of the habanalabs device initialization process,
> - * because this function exposes the device to the user
> + * Initialize a cdev and a Linux device for habanalabs's device.
> */
> -static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
> +static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
> int minor, const struct file_operations *fops,
> char *name, struct cdev *cdev,
> struct device **dev)
> {
> - int err, devno = MKDEV(hdev->major, minor);
> -
> cdev_init(cdev, fops);
> cdev->owner = THIS_MODULE;
> - err = cdev_add(cdev, devno, 1);
> - if (err) {
> - pr_err("Failed to add char device %s\n", name);
> - return err;
> +
> + *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
> + if (!*dev)
> + return -ENOMEM;
> +
> + device_initialize(*dev);
> + (*dev)->devt = MKDEV(hdev->major, minor);
> + (*dev)->class = hclass;
> + (*dev)->release = device_release_func;
> + dev_set_drvdata(*dev, hdev);
> + dev_set_name(*dev, "%s", name);
> +
> + return 0;
> +}
> +
> +static int device_cdev_sysfs_add(struct hl_device *hdev)
> +{
> + int rc;
> +
> + rc = cdev_device_add(&hdev->cdev, hdev->dev);
> + if (rc) {
> + dev_err(hdev->dev,
> + "failed to add a char device to the system\n");
> + return rc;
> }
>
> - *dev = device_create(hclass, NULL, devno, NULL, "%s", name);
> - if (IS_ERR(*dev)) {
> - pr_err("Failed to create device %s\n", name);
> - err = PTR_ERR(*dev);
> - goto err_device_create;
> + rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
> + if (rc) {
> + dev_err(hdev->dev,
> + "failed to add a control char device to the system\n");
> + goto delete_cdev_device;
> }
>
> - dev_set_drvdata(*dev, hdev);
> + /* hl_sysfs_init() must be done after adding the device to the system */
> + rc = hl_sysfs_init(hdev);
> + if (rc) {
> + dev_err(hdev->dev, "failed to initialize sysfs\n");
> + goto delete_ctrl_cdev_device;
> + }
> +
> + hdev->cdev_sysfs_created = true;
>
> return 0;
>
> -err_device_create:
> - cdev_del(cdev);
> - return err;
> +delete_ctrl_cdev_device:
> + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
> +delete_cdev_device:
> + cdev_device_del(&hdev->cdev, hdev->dev);
> + return rc;
> +}
> +
> +static void device_cdev_sysfs_del(struct hl_device *hdev)
> +{
> + /* device_release() won't be called so must free devices explicitly */
> + if (!hdev->cdev_sysfs_created) {
> + kfree(hdev->dev_ctrl);
> + kfree(hdev->dev);
> + return;
> + }
> +
> + hl_sysfs_fini(hdev);
> + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
> + cdev_device_del(&hdev->cdev, hdev->dev);
> }
>
> /*
> @@ -898,13 +942,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> {
> int i, rc, cq_ready_cnt;
> char *name;
> + bool add_cdev_sysfs_on_err = false;
>
> name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
> - if (!name)
> - return -ENOMEM;
> + if (!name) {
> + rc = -ENOMEM;
> + goto out_disabled;
> + }
>
> - /* Create device */
> - rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops, name,
> + /* Initialize cdev and device structures */
> + rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
> &hdev->cdev, &hdev->dev);
>
> kfree(name);
> @@ -915,22 +962,22 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
> if (!name) {
> rc = -ENOMEM;
> - goto release_device;
> + goto free_dev;
> }
>
> - /* Create control device */
> - rc = device_setup_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
> + /* Initialize cdev and device structures for control device */
> + rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
> name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
>
> kfree(name);
>
> if (rc)
> - goto release_device;
> + goto free_dev;
>
> /* Initialize ASIC function pointers and perform early init */
> rc = device_early_init(hdev);
> if (rc)
> - goto release_control_device;
> + goto free_dev_ctrl;
>
> /*
> * Start calling ASIC initialization. First S/W then H/W and finally
> @@ -1016,12 +1063,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> goto release_ctx;
> }
>
> - rc = hl_sysfs_init(hdev);
> - if (rc) {
> - dev_err(hdev->dev, "failed to initialize sysfs\n");
> - goto free_cb_pool;
> - }
> -
> hl_debugfs_add_device(hdev);
>
> if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
> @@ -1030,6 +1071,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> hdev->asic_funcs->hw_fini(hdev, true);
> }
>
> + /*
> + * From this point, in case of an error, add char devices and create
> + * sysfs nodes as part of the error flow, to allow debugging.
> + */
> + add_cdev_sysfs_on_err = true;
> +
> rc = hdev->asic_funcs->hw_init(hdev);
> if (rc) {
> dev_err(hdev->dev, "failed to initialize the H/W\n");
> @@ -1066,9 +1113,24 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> }
>
> /*
> - * hl_hwmon_init must be called after device_late_init, because only
> + * Expose devices and sysfs nodes to user.
> + * From here there is no need to add char devices and create sysfs nodes
> + * in case of an error.
> + */
> + add_cdev_sysfs_on_err = false;
> + rc = device_cdev_sysfs_add(hdev);
> + if (rc) {
> + dev_err(hdev->dev,
> + "Failed to add char devices and sysfs nodes\n");
> + rc = 0;
> + goto out_disabled;
> + }
> +
> + /*
> + * hl_hwmon_init() must be called after device_late_init(), because only
> * there we get the information from the device about which
> - * hwmon-related sensors the device supports
> + * hwmon-related sensors the device supports.
> + * Furthermore, it must be done after adding the device to the system.
> */
> rc = hl_hwmon_init(hdev);
> if (rc) {
> @@ -1084,8 +1146,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
>
> return 0;
>
> -free_cb_pool:
> - hl_cb_pool_fini(hdev);
> release_ctx:
> if (hl_ctx_put(hdev->kernel_ctx) != 1)
> dev_err(hdev->dev,
> @@ -1106,14 +1166,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
> hdev->asic_funcs->sw_fini(hdev);
> early_fini:
> device_early_fini(hdev);
> -release_control_device:
> - device_destroy(hclass, hdev->dev_ctrl->devt);
> - cdev_del(&hdev->cdev_ctrl);
> -release_device:
> - device_destroy(hclass, hdev->dev->devt);
> - cdev_del(&hdev->cdev);
> +free_dev_ctrl:
> + kfree(hdev->dev_ctrl);
> +free_dev:
> + kfree(hdev->dev);
> out_disabled:
> hdev->disabled = true;
> + if (add_cdev_sysfs_on_err)
> + device_cdev_sysfs_add(hdev);
> if (hdev->pdev)
> dev_err(&hdev->pdev->dev,
> "Failed to initialize hl%d. Device is NOT usable !\n",
> @@ -1179,8 +1239,6 @@ void hl_device_fini(struct hl_device *hdev)
>
> hl_debugfs_remove_device(hdev);
>
> - hl_sysfs_fini(hdev);
> -
> /*
> * Halt the engines and disable interrupts so we won't get any more
> * completions from H/W and we won't have any accesses from the
> @@ -1223,11 +1281,8 @@ void hl_device_fini(struct hl_device *hdev)
>
> device_early_fini(hdev);
>
> - /* Hide device from user */
> - device_destroy(hdev->dev_ctrl->class, hdev->dev_ctrl->devt);
> - cdev_del(&hdev->cdev_ctrl);
> - device_destroy(hdev->dev->class, hdev->dev->devt);
> - cdev_del(&hdev->cdev);
> + /* Hide devices and sysfs nodes from user */
> + device_cdev_sysfs_del(hdev);
>
> pr_info("removed device successfully\n");
> }
> diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
> index aaaa29d98901..1bb181285589 100644
> --- a/drivers/misc/habanalabs/habanalabs.h
> +++ b/drivers/misc/habanalabs/habanalabs.h
> @@ -1216,6 +1216,7 @@ struct hl_device_reset_work {
> * @dma_mask: the dma mask that was set for this device
> * @in_debug: is device under debug. This, together with fpriv_list, enforces
> * that only a single user is configuring the debug infrastructure.
> + * @cdev_sysfs_created: were char devices and sysfs nodes created.
> */
> struct hl_device {
> struct pci_dev *pdev;
> @@ -1291,6 +1292,7 @@ struct hl_device {
> u8 device_cpu_disabled;
> u8 dma_mask;
> u8 in_debug;
> + u8 cdev_sysfs_created;
>
> /* Parameters for bring-up */
> u8 mmu_enable;
> --
> 2.17.1
>

This patch is:
Reviewed-by: Oded Gabbay <oded.gabbay@xxxxxxxxx>