[PATCH 3/3] drm/nouveau: clean up all clients on device removal
From: Jeremy Cline
Date: Tue Nov 03 2020 - 14:49:57 EST
The postclose handler can run after the device has been removed (or the
driver has been unbound) since userspace clients are free to hold the
file open the file as long as they want. Because the device removal
callback frees the entire nouveau_drm structure, any reference to it in
the postclose handler will result in a use-after-free.
To reproduce this, one must simply open the device file, unbind the
driver (or physically remove the device), and then close the device
file. This was found and can be reproduced easily with the IGT
core_hotunplug tests.
To avoid this, all clients are cleaned up in the device finialization
rather than deferring it to the postclose handler, and the postclose
handler is protected by a critical section which ensures the
drm_dev_unplug() and the postclose handler won't race.
This is not an ideal fix, since as I understand the proposed plan for
the kernel<->userspace interface for hotplug support, destroying the
client before the file is closed will cause problems. However, I believe
to properly fix this issue, the lifetime of the nouveau_drm structure
needs to be extended to match the drm_device, and this proved to be a
rather invasive change. Thus, I've broken this out so the fix can be
easily backported.
Signed-off-by: Jeremy Cline <jcline@xxxxxxxxxx>
---
drivers/gpu/drm/nouveau/nouveau_drm.c | 30 +++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index d182b877258a..74fab777f4d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -628,6 +628,7 @@ nouveau_drm_device_init(struct drm_device *dev)
static void
nouveau_drm_device_fini(struct drm_device *dev)
{
+ struct nouveau_cli *cli, *temp_cli;
struct nouveau_drm *drm = nouveau_drm(dev);
if (nouveau_pmops_runtime()) {
@@ -652,6 +653,24 @@ nouveau_drm_device_fini(struct drm_device *dev)
nouveau_ttm_fini(drm);
nouveau_vga_fini(drm);
+ /*
+ * There may be existing clients from as-yet unclosed files. For now,
+ * clean them up here rather than deferring until the file is closed,
+ * but this likely not correct if we want to support hot-unplugging
+ * properly.
+ */
+ mutex_lock(&drm->clients_lock);
+ list_for_each_entry_safe(cli, temp_cli, &drm->clients, head) {
+ list_del(&cli->head);
+ mutex_lock(&cli->mutex);
+ if (cli->abi16)
+ nouveau_abi16_fini(cli->abi16);
+ mutex_unlock(&cli->mutex);
+ nouveau_cli_fini(cli);
+ kfree(cli);
+ }
+ mutex_unlock(&drm->clients_lock);
+
nouveau_cli_fini(&drm->client);
nouveau_cli_fini(&drm->master);
nvif_parent_dtor(&drm->parent);
@@ -1110,6 +1129,16 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
{
struct nouveau_cli *cli = nouveau_cli(fpriv);
struct nouveau_drm *drm = nouveau_drm(dev);
+ int dev_index;
+
+ /*
+ * The device is gone, and as it currently stands all clients are
+ * cleaned up in the removal codepath. In the future this may change
+ * so that we can support hot-unplugging, but for now we immediately
+ * return to avoid a double-free situation.
+ */
+ if (!drm_dev_enter(dev, &dev_index))
+ return;
pm_runtime_get_sync(dev->dev);
@@ -1126,6 +1155,7 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
kfree(cli);
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
+ drm_dev_exit(dev_index);
}
static const struct drm_ioctl_desc
--
2.28.0