[PATCH] platform/chrome: cros_ec_rpmsg: Fix race with host command when probe failed.

From: Pi-Hsun Shih
Date: Wed Sep 04 2019 - 02:26:32 EST


Since the rpmsg_endpoint is created before probe is called, it's
possible that a host event is received during cros_ec_register, and
there would be some pending work in the host_event_work workqueue while
cros_ec_register is called.

If cros_ec_register fails, when the leftover work in host_event_work
run, the ec_dev from the drvdata of the rpdev could be already set to
NULL, causing kernel crash when trying to run cros_ec_get_next_event.

Fix this by creating the rpmsg_endpoint by ourself, and when
cros_ec_register fails (or on remove), destroy the endpoint first (to
make sure there's no more new calls to cros_ec_rpmsg_callback), and then
cancel all works in the host_event_work workqueue.

Fixes: 2de89fd98958 ("platform/chrome: cros_ec: Add EC host command support using rpmsg")
Signed-off-by: Pi-Hsun Shih <pihsun@xxxxxxxxxxxx>
---
drivers/platform/chrome/cros_ec_rpmsg.c | 33 +++++++++++++++++++++----
1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_rpmsg.c b/drivers/platform/chrome/cros_ec_rpmsg.c
index 8b6bd775cc9a..0c3738c3244d 100644
--- a/drivers/platform/chrome/cros_ec_rpmsg.c
+++ b/drivers/platform/chrome/cros_ec_rpmsg.c
@@ -41,6 +41,7 @@ struct cros_ec_rpmsg {
struct rpmsg_device *rpdev;
struct completion xfer_ack;
struct work_struct host_event_work;
+ struct rpmsg_endpoint *ept;
};

/**
@@ -72,7 +73,6 @@ static int cros_ec_pkt_xfer_rpmsg(struct cros_ec_device *ec_dev,
struct cros_ec_command *ec_msg)
{
struct cros_ec_rpmsg *ec_rpmsg = ec_dev->priv;
- struct rpmsg_device *rpdev = ec_rpmsg->rpdev;
struct ec_host_response *response;
unsigned long timeout;
int len;
@@ -85,7 +85,7 @@ static int cros_ec_pkt_xfer_rpmsg(struct cros_ec_device *ec_dev,
dev_dbg(ec_dev->dev, "prepared, len=%d\n", len);

reinit_completion(&ec_rpmsg->xfer_ack);
- ret = rpmsg_send(rpdev->ept, ec_dev->dout, len);
+ ret = rpmsg_send(ec_rpmsg->ept, ec_dev->dout, len);
if (ret) {
dev_err(ec_dev->dev, "rpmsg send failed\n");
return ret;
@@ -196,11 +196,24 @@ static int cros_ec_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
return 0;
}

+static struct rpmsg_endpoint *
+cros_ec_rpmsg_create_ept(struct rpmsg_device *rpdev)
+{
+ struct rpmsg_channel_info chinfo = {};
+
+ strscpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE);
+ chinfo.src = rpdev->src;
+ chinfo.dst = RPMSG_ADDR_ANY;
+
+ return rpmsg_create_ept(rpdev, cros_ec_rpmsg_callback, NULL, chinfo);
+}
+
static int cros_ec_rpmsg_probe(struct rpmsg_device *rpdev)
{
struct device *dev = &rpdev->dev;
struct cros_ec_rpmsg *ec_rpmsg;
struct cros_ec_device *ec_dev;
+ int ret;

ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL);
if (!ec_dev)
@@ -225,7 +238,18 @@ static int cros_ec_rpmsg_probe(struct rpmsg_device *rpdev)
INIT_WORK(&ec_rpmsg->host_event_work,
cros_ec_rpmsg_host_event_function);

- return cros_ec_register(ec_dev);
+ ec_rpmsg->ept = cros_ec_rpmsg_create_ept(rpdev);
+ if (!ec_rpmsg->ept)
+ return -ENOMEM;
+
+ ret = cros_ec_register(ec_dev);
+ if (ret < 0) {
+ rpmsg_destroy_ept(ec_rpmsg->ept);
+ cancel_work_sync(&ec_rpmsg->host_event_work);
+ return ret;
+ }
+
+ return 0;
}

static void cros_ec_rpmsg_remove(struct rpmsg_device *rpdev)
@@ -234,7 +258,7 @@ static void cros_ec_rpmsg_remove(struct rpmsg_device *rpdev)
struct cros_ec_rpmsg *ec_rpmsg = ec_dev->priv;

cros_ec_unregister(ec_dev);
-
+ rpmsg_destroy_ept(ec_rpmsg->ept);
cancel_work_sync(&ec_rpmsg->host_event_work);
}

@@ -271,7 +295,6 @@ static struct rpmsg_driver cros_ec_driver_rpmsg = {
},
.probe = cros_ec_rpmsg_probe,
.remove = cros_ec_rpmsg_remove,
- .callback = cros_ec_rpmsg_callback,
};

module_rpmsg_driver(cros_ec_driver_rpmsg);
--
2.23.0.187.g17f5b7556c-goog