Re: [PATCHv3 12/15] remoteproc/omap: add support for system suspend/resume

From: Suman Anna
Date: Fri Dec 20 2019 - 13:23:28 EST


On 12/20/19 5:04 AM, Tero Kristo wrote:
> On 20/12/2019 05:11, Suman Anna wrote:
>> Hi Mathieu, Tero,
>>
>> On 12/19/19 3:46 PM, Mathieu Poirier wrote:
>>> On Fri, Dec 13, 2019 at 02:55:34PM +0200, Tero Kristo wrote:
>>>> From: Suman Anna <s-anna@xxxxxx>
>>>>
>>>> This patch adds the support for system suspend/resume to the
>>>> OMAP remoteproc driver so that the OMAP remoteproc devices can
>>>> be suspended/resumed during a system suspend/resume. The support
>>>> is added through the driver PM .suspend/.resume callbacks, and
>>>> requires appropriate support from the OS running on the remote
>>>> processors.
>>>>
>>>> The IPU & DSP remote processors typically have their own private
>>>> modules like registers, internal memories, caches etc. The context
>>>> of these modules need to be saved and restored properly for a
>>>> suspend/resume to work. These are in general not accessible from
>>>> the MPU, so the remote processors themselves have to implement
>>>> the logic for the context save & restore of these modules.
>>>>
>>>> The OMAP remoteproc driver initiates a suspend by sending a mailbox
>>>> message requesting the remote processor to save its context and
>>>> enter into an idle/standby state. The remote processor should
>>>> usually stop whatever processing it is doing to switch to a context
>>>> save mode. The OMAP remoteproc driver detects the completion of
>>>> the context save by checking the module standby status for the
>>>> remoteproc device. It also stops any resources used by the remote
>>>> processors like the timers. The timers need to be running only
>>>> when the processor is active and executing, and need to be stopped
>>>> otherwise to allow the timer driver to reach low-power states. The
>>>> IOMMUs are automatically suspended by the PM core during the late
>>>> suspend stage, after the remoteproc suspend process is completed by
>>>> putting the remote processor cores into reset. Thereafter, the Linux
>>>> kernel can put the domain into further lower power states as possible.
>>>>
>>>> The resume sequence undoes the operations performed in the PM suspend
>>>> callback, by starting the timers and finally releasing the processors
>>>> from reset. This requires that the remote processor side OS be able to
>>>> distinguish a power-resume boot from a power-on/cold boot, restore the
>>>> context of its private modules saved during the suspend phase, and
>>>> resume executing code from where it was suspended. The IOMMUs would
>>>> have been resumed by the PM core during early resume, so they are
>>>> already enabled by the time remoteproc resume callback gets invoked.
>>>>
>>>> The remote processors should save their context into System RAM (DDR),
>>>> as any internal memories are not guaranteed to retain context as it
>>>> depends on the lowest power domain that the remote processor device
>>>> is put into. The management of the DDR contents will be managed by
>>>> the Linux kernel.
>>>>
>>>> Signed-off-by: Suman Anna <s-anna@xxxxxx>
>>>> [t-kristo@xxxxxx: converted to use ti-sysc instead of hwmod]
>>>> Signed-off-by: Tero Kristo <t-kristo@xxxxxx>
>>>> ---
>>>> Â drivers/remoteproc/omap_remoteproc.c | 179
>>>> +++++++++++++++++++++++++++
>>>> Â drivers/remoteproc/omap_remoteproc.h |Â 18 ++-
>>>> Â 2 files changed, 195 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/remoteproc/omap_remoteproc.c
>>>> b/drivers/remoteproc/omap_remoteproc.c
>>>> index 9c750c2ab29d..0a9b9f7d20da 100644
>>>> --- a/drivers/remoteproc/omap_remoteproc.c
>>>> +++ b/drivers/remoteproc/omap_remoteproc.c
>>>> @@ -16,6 +16,7 @@
>>>> Â #include <linux/kernel.h>
>>>> Â #include <linux/module.h>
>>>> Â #include <linux/err.h>
>>>> +#include <linux/io.h>
>>>> Â #include <linux/of_device.h>
>>>> Â #include <linux/of_reserved_mem.h>
>>>> Â #include <linux/platform_device.h>
>>>> @@ -23,10 +24,13 @@
>>>> Â #include <linux/remoteproc.h>
>>>> Â #include <linux/mailbox_client.h>
>>>> Â #include <linux/omap-mailbox.h>
>>>> +#include <linux/omap-iommu.h>
>>>
>>> Please move this up by one line.
>>>
>>>> Â #include <linux/regmap.h>
>>>> Â #include <linux/mfd/syscon.h>
>>>> Â #include <linux/reset.h>
>>>> Â #include <clocksource/timer-ti-dm.h>
>>>> +#include <linux/clk.h>
>>>> +#include <linux/clk/ti.h>
>>>
>>> Unless there is a dependency with timer-ti-dm.h, these should go just
>>> above linux/err.h
>>
>> No depencencies, can be reordered.
>
> Will fix these.
>
>>
>>>
>>>> Â Â #include <linux/platform_data/dmtimer-omap.h>
>>>> Â @@ -81,6 +85,9 @@ struct omap_rproc_timer {
>>>> ÂÂ * @timers: timer(s) info used by rproc
>>>> ÂÂ * @rproc: rproc handle
>>>> ÂÂ * @reset: reset handle
>>>> + * @pm_comp: completion primitive to sync for suspend response
>>>> + * @fck: functional clock for the remoteproc
>>>> + * @suspend_acked: state machine flag to store the suspend request ack
>>>> ÂÂ */
>>>> Â struct omap_rproc {
>>>> ÂÂÂÂÂ struct mbox_chan *mbox;
>>>> @@ -92,6 +99,9 @@ struct omap_rproc {
>>>> ÂÂÂÂÂ struct omap_rproc_timer *timers;
>>>> ÂÂÂÂÂ struct rproc *rproc;
>>>> ÂÂÂÂÂ struct reset_control *reset;
>>>> +ÂÂÂ struct completion pm_comp;
>>>> +ÂÂÂ struct clk *fck;
>>>> +ÂÂÂ bool suspend_acked;
>>>> Â };
>>>> Â Â /**
>>>> @@ -349,6 +359,11 @@ static void omap_rproc_mbox_callback(struct
>>>> mbox_client *client, void *data)
>>>> ÂÂÂÂÂ case RP_MBOX_ECHO_REPLY:
>>>> ÂÂÂÂÂÂÂÂÂ dev_info(dev, "received echo reply from %s\n", name);
>>>> ÂÂÂÂÂÂÂÂÂ break;
>>>> +ÂÂÂ case RP_MBOX_SUSPEND_ACK:
>>>
>>> We can't get away with implicit fallthroughs anymore - please add /*
>>> Fall through */"
>
> Ok, will do.
>
>>>
>>>> +ÂÂÂ case RP_MBOX_SUSPEND_CANCEL:
>>>> +ÂÂÂÂÂÂÂ oproc->suspend_acked = msg == RP_MBOX_SUSPEND_ACK;
>>>> +ÂÂÂÂÂÂÂ complete(&oproc->pm_comp);
>>>> +ÂÂÂÂÂÂÂ break;
>>>> ÂÂÂÂÂ default:
>>>> ÂÂÂÂÂÂÂÂÂ if (msg >= RP_MBOX_READY && msg < RP_MBOX_END_MSG)
>>>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ return;
>>>> @@ -529,6 +544,157 @@ static const struct rproc_ops omap_rproc_ops = {
>>>> ÂÂÂÂÂ .da_to_vaÂÂÂ = omap_rproc_da_to_va,
>>>> Â };
>>>> Â +#ifdef CONFIG_PM
>>>> +static bool _is_rproc_in_standby(struct omap_rproc *oproc)
>>>> +{
>>>> +ÂÂÂ return ti_clk_is_in_standby(oproc->fck);
>>>> +}
>>>> +
>>>> +/* 1 sec is long enough time to let the remoteproc side suspend the
>>>> device */
>>>> +#define DEF_SUSPEND_TIMEOUT 1000
>>>> +static int _omap_rproc_suspend(struct rproc *rproc)
>>>> +{
>>>> +ÂÂÂ struct device *dev = rproc->dev.parent;
>>>> +ÂÂÂ struct omap_rproc *oproc = rproc->priv;
>>>> +ÂÂÂ unsigned long to = msecs_to_jiffies(DEF_SUSPEND_TIMEOUT);
>>>> +ÂÂÂ unsigned long ta = jiffies + to;
>>>> +ÂÂÂ int ret;
>>>> +
>>>> +ÂÂÂ reinit_completion(&oproc->pm_comp);
>>>> +ÂÂÂ oproc->suspend_acked = false;
>>>> +ÂÂÂ ret = mbox_send_message(oproc->mbox, (void
>>>> *)RP_MBOX_SUSPEND_SYSTEM);
>>>> +ÂÂÂ if (ret < 0) {
>>>> +ÂÂÂÂÂÂÂ dev_err(dev, "PM mbox_send_message failed: %d\n", ret);
>>>> +ÂÂÂÂÂÂÂ return ret;
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ ret = wait_for_completion_timeout(&oproc->pm_comp, to);
>>>> +ÂÂÂ if (!oproc->suspend_acked)
>>>> +ÂÂÂÂÂÂÂ return -EBUSY;
>>>> +
>>>> +ÂÂÂ /*
>>>> +ÂÂÂÂ * The remoteproc side is returning the ACK message before
>>>> saving the
>>>> +ÂÂÂÂ * context, because the context saving is performed within a
>>>> SYS/BIOS
>>>> +ÂÂÂÂ * function, and it cannot have any inter-dependencies against
>>>> the IPC
>>>> +ÂÂÂÂ * layer. Also, as the SYS/BIOS needs to preserve properly the
>>>> processor
>>>> +ÂÂÂÂ * register set, sending this ACK or signalling the completion
>>>> of the
>>>> +ÂÂÂÂ * context save through a shared memory variable can never be the
>>>> +ÂÂÂÂ * absolute last thing to be executed on the remoteproc side,
>>>> and the
>>>> +ÂÂÂÂ * MPU cannot use the ACK message as a sync point to put the
>>>> remoteproc
>>>> +ÂÂÂÂ * into reset. The only way to ensure that the remote processor
>>>> has
>>>> +ÂÂÂÂ * completed saving the context is to check that the module has
>>>> reached
>>>> +ÂÂÂÂ * STANDBY state (after saving the context, the SYS/BIOS
>>>> executes the
>>>> +ÂÂÂÂ * appropriate target-specific WFI instruction causing the
>>>> module to
>>>> +ÂÂÂÂ * enter STANDBY).
>>>> +ÂÂÂÂ */
>>>> +ÂÂÂ while (!_is_rproc_in_standby(oproc)) {
>>>> +ÂÂÂÂÂÂÂ if (time_after(jiffies, ta))
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ return -ETIME;
>>>> +ÂÂÂÂÂÂÂ schedule();
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ reset_control_assert(oproc->reset);
>>>> +
>>>> +ÂÂÂ ret = omap_rproc_disable_timers(rproc, false);
>>>> +ÂÂÂ if (ret) {
>>>> +ÂÂÂÂÂÂÂ dev_err(dev, "disabling timers during suspend failed %d\n",
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ ret);
>>>> +ÂÂÂÂÂÂÂ goto enable_device;
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ return 0;
>>>> +
>>>> +enable_device:
>>>> +ÂÂÂ reset_control_deassert(oproc->reset);
>>>> +ÂÂÂ return ret;
>>>> +}
>>>> +
>>>> +static int _omap_rproc_resume(struct rproc *rproc)
>>>> +{
>>>> +ÂÂÂ struct device *dev = rproc->dev.parent;
>>>> +ÂÂÂ struct omap_rproc *oproc = rproc->priv;
>>>> +ÂÂÂ int ret;
>>>> +
>>>> +ÂÂÂ /* boot address could be lost after suspend, so restore it */
>>>> +ÂÂÂ if (oproc->boot_data) {
>>>> +ÂÂÂÂÂÂÂ ret = omap_rproc_write_dsp_boot_addr(rproc);
>>>> +ÂÂÂÂÂÂÂ if (ret) {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ dev_err(dev, "boot address restore failed %d\n", ret);
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ goto out;
>>>> +ÂÂÂÂÂÂÂ }
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ ret = omap_rproc_enable_timers(rproc, false);
>>>> +ÂÂÂ if (ret) {
>>>> +ÂÂÂÂÂÂÂ dev_err(dev, "enabling timers during resume failed %d\n",
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ ret);
>>>
>>> The "ret);" fits on the live above.
>
> Ok, will fix.
>
>>>
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ reset_control_deassert(oproc->reset);
>>>> +
>>>> +out:
>>>> +ÂÂÂ return ret;
>>>> +}
>>>> +
>>>> +static int __maybe_unused omap_rproc_suspend(struct device *dev)
>>>
>>> The "__maybe_unused" can be dropped because this is within the #ifdef
>>> CONFIG_PM
>>> block.
>>
>> These are suspend/resume callbacks, so are actually controlled by
>> CONFIG_PM_SLEEP which can be disabled independently from runtime
>> suspend, and hence the annotation.
>
> I'll add these two behind CONFIG_PM_SLEEP, in addition to the whole lot
> being behind CONFIG_PM.

AFAIK, the preferred method currently is to use the __may_unused
annotation. The SET_SYSTEM_SLEEP_PM_OPS is under that ifdef and the
compiler is smart enough to drop it when CONFIG_PM_SLEEP is not defined.
So, no need to change this.

regards
Suman

>
>>
>>>
>>>> +{
>>>> +ÂÂÂ struct platform_device *pdev = to_platform_device(dev);
>>>> +ÂÂÂ struct rproc *rproc = platform_get_drvdata(pdev);
>>>> +ÂÂÂ int ret = 0;
>>>> +
>>>> +ÂÂÂ mutex_lock(&rproc->lock);
>>>> +ÂÂÂ if (rproc->state == RPROC_OFFLINE)
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +
>>>> +ÂÂÂ if (rproc->state == RPROC_SUSPENDED)
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +
>>>> +ÂÂÂ if (rproc->state != RPROC_RUNNING) {
>>>> +ÂÂÂÂÂÂÂ ret = -EBUSY;
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ ret = _omap_rproc_suspend(rproc);
>>>> +ÂÂÂ if (ret) {
>>>> +ÂÂÂÂÂÂÂ dev_err(dev, "suspend failed %d\n", ret);
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ rproc->state = RPROC_SUSPENDED;
>>>> +out:
>>>> +ÂÂÂ mutex_unlock(&rproc->lock);
>>>> +ÂÂÂ return ret;
>>>> +}
>>>> +
>>>> +static int __maybe_unused omap_rproc_resume(struct device *dev)
>>>
>>> Same comment as above.
>>>
>>>> +{
>>>> +ÂÂÂ struct platform_device *pdev = to_platform_device(dev);
>>>> +ÂÂÂ struct rproc *rproc = platform_get_drvdata(pdev);
>>>> +ÂÂÂ int ret = 0;
>>>> +
>>>> +ÂÂÂ mutex_lock(&rproc->lock);
>>>> +ÂÂÂ if (rproc->state == RPROC_OFFLINE)
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +
>>>> +ÂÂÂ if (rproc->state != RPROC_SUSPENDED) {
>>>> +ÂÂÂÂÂÂÂ ret = -EBUSY;
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ ret = _omap_rproc_resume(rproc);
>>>> +ÂÂÂ if (ret) {
>>>> +ÂÂÂÂÂÂÂ dev_err(dev, "resume failed %d\n", ret);
>>>> +ÂÂÂÂÂÂÂ goto out;
>>>> +ÂÂÂ }
>>>> +
>>>> +ÂÂÂ rproc->state = RPROC_RUNNING;
>>>> +out:
>>>> +ÂÂÂ mutex_unlock(&rproc->lock);
>>>> +ÂÂÂ return ret;
>>>> +}
>>>> +#endif /* CONFIG_PM */
>>>> +
>>>> Â static const char * const ipu_mem_names[] = {
>>>> ÂÂÂÂÂ "l2ram", NULL
>>>> Â };
>>>> @@ -786,6 +952,14 @@ static int omap_rproc_probe(struct
>>>> platform_device *pdev)
>>>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ oproc->num_timers);
>>>> ÂÂÂÂÂ }
>>>> Â +ÂÂÂ init_completion(&oproc->pm_comp);
>>>> +
>>>> +ÂÂÂ oproc->fck = devm_clk_get(&pdev->dev, 0);
>>>> +ÂÂÂ if (IS_ERR(oproc->fck)) {
>>>> +ÂÂÂÂÂÂÂ ret = PTR_ERR(oproc->fck);
>>>> +ÂÂÂÂÂÂÂ goto free_rproc;
>>>> +ÂÂÂ }
>>>> +
>>>
>>> oproc->fck is not released if an error occurs in
>>> of_reserved_mem_device_init()
>>> and rproc_add().
>>
>> We are using a devres managed API, so why do we need to release it
>> specifically?
>
> Yea I don't think this is needed.
>
>>
>>>
>>>> ÂÂÂÂÂ ret = of_reserved_mem_device_init(&pdev->dev);
>>>> ÂÂÂÂÂ if (ret) {
>>>> ÂÂÂÂÂÂÂÂÂ dev_err(&pdev->dev, "device does not have specific CMA
>>>> pool\n");
>>>> @@ -818,11 +992,16 @@ static int omap_rproc_remove(struct
>>>> platform_device *pdev)
>>>> ÂÂÂÂÂ return 0;
>>>> Â }
>>>> Â +static const struct dev_pm_ops omap_rproc_pm_ops = {
>>>> +ÂÂÂ SET_SYSTEM_SLEEP_PM_OPS(omap_rproc_suspend, omap_rproc_resume)
>>>> +};
>>>> +
>>>> Â static struct platform_driver omap_rproc_driver = {
>>>> ÂÂÂÂÂ .probe = omap_rproc_probe,
>>>> ÂÂÂÂÂ .remove = omap_rproc_remove,
>>>> ÂÂÂÂÂ .driver = {
>>>> ÂÂÂÂÂÂÂÂÂ .name = "omap-rproc",
>>>> +ÂÂÂÂÂÂÂ .pm = &omap_rproc_pm_ops,
>>>> ÂÂÂÂÂÂÂÂÂ .of_match_table = omap_rproc_of_match,
>>>> ÂÂÂÂÂ },
>>>> Â };
>>>> diff --git a/drivers/remoteproc/omap_remoteproc.h
>>>> b/drivers/remoteproc/omap_remoteproc.h
>>>> index 72f656c93caa..c73383e707c7 100644
>>>> --- a/drivers/remoteproc/omap_remoteproc.h
>>>> +++ b/drivers/remoteproc/omap_remoteproc.h
>>>> @@ -1,7 +1,7 @@
>>>> Â /*
>>>> ÂÂ * Remote processor messaging
>>>> ÂÂ *
>>>> - * Copyright (C) 2011 Texas Instruments, Inc.
>>>> + * Copyright (C) 2011-2018 Texas Instruments, Inc.
>>
>> %s/2018/2019/
>
> Yep, will fix.
>
> -Tero
>
>>
>> regards
>> Suman
>>
>>>> ÂÂ * Copyright (C) 2011 Google, Inc.
>>>> ÂÂ * All rights reserved.
>>>> ÂÂ *
>>>> @@ -57,6 +57,16 @@
>>>> ÂÂ * @RP_MBOX_ABORT_REQUEST: a "please crash" request, used for
>>>> testing the
>>>> ÂÂ * recovery mechanism (to some extent).
>>>> ÂÂ *
>>>> + * @RP_MBOX_SUSPEND_AUTO: auto suspend request for the remote
>>>> processor
>>>> + *
>>>> + * @RP_MBOX_SUSPEND_SYSTEM: system suspend request for the remote
>>>> processor
>>>> + *
>>>> + * @RP_MBOX_SUSPEND_ACK: successful response from remote processor
>>>> for a
>>>> + * suspend request
>>>> + *
>>>> + * @RP_MBOX_SUSPEND_CANCEL: a cancel suspend response from a remote
>>>> processor
>>>> + * on a suspend request
>>>> + *
>>>> ÂÂ * Introduce new message definitions if any here.
>>>> ÂÂ *
>>>> ÂÂ * @RP_MBOX_END_MSG: Indicates end of known/defined messages from
>>>> remote core
>>>> @@ -70,7 +80,11 @@ enum omap_rp_mbox_messages {
>>>> ÂÂÂÂÂ RP_MBOX_ECHO_REQUESTÂÂÂ = 0xFFFFFF03,
>>>> ÂÂÂÂÂ RP_MBOX_ECHO_REPLYÂÂÂ = 0xFFFFFF04,
>>>> ÂÂÂÂÂ RP_MBOX_ABORT_REQUESTÂÂÂ = 0xFFFFFF05,
>>>> -ÂÂÂ RP_MBOX_END_MSGÂÂÂÂÂÂÂ = 0xFFFFFF06,
>>>> +ÂÂÂ RP_MBOX_SUSPEND_AUTOÂÂÂ = 0xFFFFFF10,
>>>> +ÂÂÂ RP_MBOX_SUSPEND_SYSTEMÂÂÂ = 0xFFFFFF11,
>>>> +ÂÂÂ RP_MBOX_SUSPEND_ACKÂÂÂ = 0xFFFFFF12,
>>>> +ÂÂÂ RP_MBOX_SUSPEND_CANCELÂÂÂ = 0xFFFFFF13,
>>>> +ÂÂÂ RP_MBOX_END_MSGÂÂÂÂÂÂÂ = 0xFFFFFF14,
>>>> Â };
>>>> Â Â #endif /* _OMAP_RPMSG_H */
>>>> --Â
>>>> 2.17.1
>>>>
>>>> --
>>
>
> --
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
> Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki