Re: [PATCH v2] media: qcom: camss: fix VFE pm domain off

From: Bryan O'Donoghue
Date: Tue Dec 03 2024 - 11:50:30 EST


On 03/12/2024 01:02, barnabas.czeman@xxxxxxxxxxxxxx wrote:
On 2024-12-03 00:10, Bryan O'Donoghue wrote:
On 30/11/2024 22:58, barnabas.czeman@xxxxxxxxxxxxxx wrote:
On 2024-11-30 22:48, Bryan O'Donoghue wrote:
On 29/11/2024 23:52, barnabas.czeman@xxxxxxxxxxxxxx wrote:
On 2024-11-30 00:07, Bryan O'Donoghue wrote:
On 29/11/2024 22:45, barnabas.czeman@xxxxxxxxxxxxxx wrote:
On 2024-11-29 23:08, Bryan O'Donoghue wrote:
On 29/11/2024 13:46, barnabas.czeman@xxxxxxxxxxxxxx wrote:
On 2024-11-29 13:25, Bryan O'Donoghue wrote:
On 29/11/2024 11:44, barnabas.czeman@xxxxxxxxxxxxxx wrote:
The change does not describe how to reproduce the problem, which commit
base is tested, which platform is testes, there is no enough information,
unfortunately.
I can reproduce the problem with megapixels-sensorprofile on msm8953 and
it can be reproduced with megapixels on msm8996.
The base is the last commit on next.

Can you verify if vfe_domain_on has run and if so whether or not genpd_link is NULL when that function exists.

I have added some debug logs it seems pm_domain_on and pm_domain_off is called twice on the same object.
[   63.473360] qcom-camss 1b00020.camss: pm_domain_on 19842ce8 link 42973800
[   63.481524] qcom-camss 1b00020.camss: pm_domain_on 19840080 link 4e413800
[   63.481555] qcom-camss 1b00020.camss: pm_domain_on 19842ce8 link 42973800
[   63.481632] qcom-camss 1b00020.camss: pm_domain_off 19840080 link 4e413800
[   63.481641] qcom-camss 1b00020.camss: pm_domain_off 19842ce8 link 42973800
[   63.654004] qcom-camss 1b00020.camss: pm_domain_off 19842ce8 link 0
That's the question.

---
bod

Could you provide this output ?

index 80a62ba112950..b25b8f6b00be1 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe.c
@@ -595,6 +595,9 @@ void vfe_isr_reset_ack(struct vfe_device *vfe)
  */
 void vfe_pm_domain_off(struct vfe_device *vfe)
 {
+dev_info(camss->dev, "%s VFE %d genpd %pK genpd_link %pK\n",
+        __func__, vfe->id, vfe->genpd, vfe->genpd_link);
+
        if (!vfe->genpd)
                return;

@@ -609,7 +612,8 @@ void vfe_pm_domain_off(struct vfe_device *vfe)
 int vfe_pm_domain_on(struct vfe_device *vfe)
 {
        struct camss *camss = vfe->camss;
-
+dev_info(camss->dev, "%s VFE %d genpd %pK genpd_link %pK\n",
+        __func__, vfe->id, vfe->genpd, vfe->genpd_link);
        if (!vfe->genpd)
                return 0;

---
bod
I think logging in pm_domain_on should be placed after device_link_add because only NULL
will be visible.
[   83.040694] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 1 genpd 000000009bd8355f genpd_link 0000000000000000
[   83.049293] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 0 genpd 00000000bfb65e7c genpd_link 0000000000000000
[   83.049353] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 1 genpd 000000009bd8355f genpd_link 00000000ccb0acd9
[   83.049641] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 0 genpd 00000000bfb65e7c genpd_link 00000000348ac3c1
[   83.049654] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 000000009bd8355f genpd_link 00000000ccb0acd9
[   83.241498] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 000000009bd8355f genpd_link 0000000000000000

Could you add

+++ b/drivers/media/platform/qcom/camss/camss-vfe.c
@@ -786,7 +786,7 @@ int vfe_get(struct vfe_device *vfe)
        int ret;

        mutex_lock(&vfe->power_lock);
-
+dev_info(vfe->camss->dev, "%s vfe %d power_count %d\n", __func__, vfe->id, vfe->power_count);
        if (vfe->power_count == 0) {
                ret = vfe->res->hw_ops->pm_domain_on(vfe);
                if (ret < 0)
@@ -823,6 +823,7 @@ int vfe_get(struct vfe_device *vfe)

        mutex_unlock(&vfe->power_lock);

+dev_info(camss->vfe->dev, "%s vfe %d err=%d\n", __func__, camss- >vfe- >id, 0);
        return 0;

 error_reset:
@@ -835,7 +836,7 @@ int vfe_get(struct vfe_device *vfe)

 error_pm_domain:
        mutex_unlock(&vfe->power_lock);
-
+dev_info(camss->vfe->dev, "%s vfe %d err=%d\n", __func__, camss- >vfe- >id, ret);
        return ret;
 }

?

---
bod
I have added little more from the logs because it is only failing in edge cases megapixels-sensorprofile failing by
different reason quickly and trying to release the device.
[   54.719030] qcom-camss 1b00020.camss: vfe_get vfe 0 err=0
[   54.750124] qcom-camss 1b00020.camss: vfe_get vfe 0 power_count 1
[   54.750236] qcom-camss 1b00020.camss: vfe_get vfe 0 err=0
[   54.751270] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 0 genpd 00000000beaef03c genpd_link 00000000251644d9

[   54.751433] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 1 genpd 000000007ce2da53 genpd_link 0000000000000000
[   54.755531] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 000000007ce2da53 genpd_link 0000000058dcd4d6

that's a bug genpd_link should be NULL unless power_count != 0

[  143.922868] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 000000007ce2da53 genpd_link 00000000d1fcd54b
[  144.126535] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 000000007ce2da53 genpd_link 0000000000000000

this is the corollary of the bug

can you provide the output of the attached please ?
[   50.787730] qcom-camss 1b00020.camss: vfe_get/806 vfe 1 power_count 0
[   50.794888] qcom-camss 1b00020.camss: vfe_get/811 vfe 1 power_count 0
[   50.795040] qcom-camss 1b00020.camss: vfe_get/816 vfe 1 power_count 0
[   50.795131] qcom-camss 1b00020.camss: vfe_get/822 vfe 1 power_count 0
[   50.795172] qcom-camss 1b00020.camss: vfe_get/827 vfe 1 power_count 0
[   50.795180] qcom-camss 1b00020.camss: vfe_get/830 vfe 1 power_count 0
[   50.795188] qcom-camss 1b00020.camss: vfe_get/841 vfe 1 power_count 1
[   50.795413] qcom-camss 1b00020.camss: vfe_put/868 vfe 1 power_count 1
[   50.795422] qcom-camss 1b00020.camss: vfe_put/874 vfe 1 power_count 1
[   50.795429] qcom-camss 1b00020.camss: vfe_put/882 vfe 1 power_count 1
[   50.795468] qcom-camss 1b00020.camss: vfe_put/884 vfe 1 power_count 1
[   50.799936] qcom-camss 1b00020.camss: vfe_put/886 vfe 1 power_count 1
[   50.800247] qcom-camss 1b00020.camss: vfe_put/888 vfe 1 power_count 1
[   50.800257] qcom-camss 1b00020.camss: vfe_put/891 vfe 1 power_count 1
[   50.800263] qcom-camss 1b00020.camss: vfe_put/893 vfe 1 power_count 0
[   51.086159] qcom-camss 1b00020.camss: vfe_get/801 vfe 0 power_count 0
[   51.088158] qcom-camss 1b00020.camss: vfe_get/806 vfe 0 power_count 0
[   51.092782] qcom-camss 1b00020.camss: vfe_get/811 vfe 0 power_count 0
[   51.092872] qcom-camss 1b00020.camss: vfe_get/816 vfe 0 power_count 0
[   51.092945] qcom-camss 1b00020.camss: vfe_get/822 vfe 0 power_count 0
[   51.092980] qcom-camss 1b00020.camss: vfe_get/827 vfe 0 power_count 0
[   51.092987] qcom-camss 1b00020.camss: vfe_get/830 vfe 0 power_count 0
[   51.092994] qcom-camss 1b00020.camss: vfe_get/841 vfe 0 power_count 1
[   51.117104] qcom-camss 1b00020.camss: vfe_get/841 vfe 0 power_count 2
[   52.181802] qcom-camss 1b00020.camss: vfe_put/868 vfe 0 power_count 2
[   52.181828] qcom-camss 1b00020.camss: vfe_put/891 vfe 0 power_count 2
[   52.181834] qcom-camss 1b00020.camss: vfe_put/893 vfe 0 power_count 1
[   52.189017] qcom-camss 1b00020.camss: vfe_get/841 vfe 0 power_count 2
[   64.920259] qcom-camss 1b00020.camss: vfe_get/841 vfe 0 power_count 3
[   64.920337] qcom-camss 1b00020.camss: vfe_get/841 vfe 0 power_count 4
[   64.920368] qcom-camss 1b00020.camss: vfe_get/801 vfe 1 power_count 0
[   64.920656] qcom-camss 1b00020.camss: vfe_get/806 vfe 1 power_count 0
[   64.920667] qcom-camss 1b00020.camss: vfe_get/811 vfe 1 power_count 0
[   64.920706] qcom-camss 1b00020.camss: vfe_get/816 vfe 1 power_count 0
[   64.920734] qcom-camss 1b00020.camss: vfe_get/822 vfe 1 power_count 0
[   64.920868] qcom-camss 1b00020.camss: vfe_get/827 vfe 1 power_count 0
[   64.920877] qcom-camss 1b00020.camss: vfe_get/830 vfe 1 power_count 0
[   64.920886] qcom-camss 1b00020.camss: vfe_get/841 vfe 1 power_count 1
[   64.920963] qcom-camss 1b00020.camss: vfe_get/841 vfe 1 power_count 2
[   64.921008] qcom-camss 1b00020.camss: vfe_get/841 vfe 1 power_count 3
[   64.921871] qcom-camss 1b00020.camss: vfe_put/868 vfe 0 power_count 4
[   64.921896] qcom-camss 1b00020.camss: vfe_put/891 vfe 0 power_count 4
[   64.921904] qcom-camss 1b00020.camss: vfe_put/893 vfe 0 power_count 3
[   64.927278] qcom-camss 1b00020.camss: vfe_get/841 vfe 0 power_count 4
[   65.096857] qcom-camss 1b00020.camss: vfe_put/868 vfe 1 power_count 3
[   65.096883] qcom-camss 1b00020.camss: vfe_put/891 vfe 1 power_count 3
[   65.096889] qcom-camss 1b00020.camss: vfe_put/893 vfe 1 power_count 2
[   65.096903] qcom-camss 1b00020.camss: vfe_put/868 vfe 1 power_count 2
[   65.096908] qcom-camss 1b00020.camss: vfe_put/891 vfe 1 power_count 2
[   65.096914] qcom-camss 1b00020.camss: vfe_put/893 vfe 1 power_count 1
[   65.096927] qcom-camss 1b00020.camss: vfe_put/868 vfe 1 power_count 1
[   65.096933] qcom-camss 1b00020.camss: vfe_put/874 vfe 1 power_count 1
[   65.096938] qcom-camss 1b00020.camss: vfe_put/882 vfe 1 power_count 1
[   65.096958] qcom-camss 1b00020.camss: vfe_put/884 vfe 1 power_count 1
[   65.096964] qcom-camss 1b00020.camss: vfe_put/886 vfe 1 power_count 1

Ah could you supply this output along with the output from the previous ?
[   55.993565] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 0 genpd 0000000003dcc927 genpd_link 00000000b216e0c0
[   55.993886] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 1 genpd 0000000012d2fc9c genpd_link 00000000e1d78ab3
[   55.993956] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 0 genpd 0000000003dcc927 genpd_link 00000000b216e0c0
[   55.993966] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 0000000012d2fc9c genpd_link 00000000e1d78ab3
[   95.804026] qcom-camss 1b00020.camss: vfe_get vfe 0 power_count 2
[   95.804092] qcom-camss 1b00020.camss: vfe_get/845 vfe 0 power_count 3
[   95.804104] qcom-camss 1b00020.camss: vfe_get vfe 0 err=0
[   95.804138] qcom-camss 1b00020.camss: vfe_get vfe 0 power_count 3
[   95.804158] qcom-camss 1b00020.camss: vfe_get/845 vfe 0 power_count 4
[   95.804169] qcom-camss 1b00020.camss: vfe_get vfe 0 err=0
[   95.804203] qcom-camss 1b00020.camss: vfe_get vfe 1 power_count 0
[   95.804214] qcom-camss 1b00020.camss: vfe_get/805 vfe 1 power_count 0
[   95.804526] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 1 genpd 0000000012d2fc9c genpd_link 00000000cf5c896a
[   95.804543] qcom-camss 1b00020.camss: vfe_get/810 vfe 1 power_count 0
[   95.804555] qcom-camss 1b00020.camss: vfe_get/815 vfe 1 power_count 0
[   95.804593] qcom-camss 1b00020.camss: vfe_get/820 vfe 1 power_count 0
[   95.804629] qcom-camss 1b00020.camss: vfe_get/826 vfe 1 power_count 0
[   95.804951] qcom-camss 1b00020.camss: vfe_get/831 vfe 1 power_count 0
[   95.804964] qcom-camss 1b00020.camss: vfe_get/834 vfe 1 power_count 0
[   95.804976] qcom-camss 1b00020.camss: vfe_get/845 vfe 1 power_count 1
[   95.804987] qcom-camss 1b00020.camss: vfe_get vfe 1 err=0
[   95.805028] qcom-camss 1b00020.camss: vfe_get vfe 1 power_count 1
[   95.805048] qcom-camss 1b00020.camss: vfe_get/845 vfe 1 power_count 2
[   95.805058] qcom-camss 1b00020.camss: vfe_get vfe 1 err=0
[   95.805094] qcom-camss 1b00020.camss: vfe_get vfe 1 power_count 2
[   95.805113] qcom-camss 1b00020.camss: vfe_get/845 vfe 1 power_count 3
[   95.805123] qcom-camss 1b00020.camss: vfe_get vfe 1 err=0
[   95.806117] qcom-camss 1b00020.camss: vfe_put/873 vfe 0 power_count 4
[   95.806131] qcom-camss 1b00020.camss: vfe_put/894 vfe 0 power_count 4
[   95.806142] qcom-camss 1b00020.camss: vfe_put/896 vfe 0 power_count 3
[   95.814108] qcom-camss 1b00020.camss: vfe_get vfe 0 power_count 3
[   95.814134] qcom-camss 1b00020.camss: vfe_get/845 vfe 0 power_count 4
[   95.814143] qcom-camss 1b00020.camss: vfe_get vfe 0 err=0
[   95.814886] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 0 genpd 0000000003dcc927 genpd_link 00000000b216e0c0
[   95.814910] qcom-camss 1b00020.camss: vfe_pm_domain_on VFE 1 genpd 0000000012d2fc9c genpd_link 00000000cf5c896a
[   95.815176] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 0 genpd 0000000003dcc927 genpd_link 00000000b216e0c0
[   95.815190] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 0000000012d2fc9c genpd_link 00000000cf5c896a
[   96.025733] qcom-camss 1b00020.camss: vfe_put/873 vfe 1 power_count 3
[   96.025756] qcom-camss 1b00020.camss: vfe_put/894 vfe 1 power_count 3
[   96.025762] qcom-camss 1b00020.camss: vfe_put/896 vfe 1 power_count 2
[   96.025775] qcom-camss 1b00020.camss: vfe_put/873 vfe 1 power_count 2
[   96.025790] qcom-camss 1b00020.camss: vfe_put/894 vfe 1 power_count 2
[   96.025806] qcom-camss 1b00020.camss: vfe_put/896 vfe 1 power_count 1
[   96.025839] qcom-camss 1b00020.camss: vfe_put/873 vfe 1 power_count 1
[   96.025856] qcom-camss 1b00020.camss: vfe_put/879 vfe 1 power_count 1
[   96.025907] qcom-camss 1b00020.camss: vfe_put/886 vfe 1 power_count 1
[   96.025952] qcom-camss 1b00020.camss: vfe_put/888 vfe 1 power_count 1
[   96.025972] qcom-camss 1b00020.camss: vfe_pm_domain_off VFE 1 genpd 0000000012d2fc9c genpd_link 0000000000000000

I'm thinking we are calling get() from inside of get().

---
bod

Pardon me and once more - your full demsg this time.

diff --git a/drivers/media/platform/qcom/camss/camss-vfe.c b/drivers/media/platform/qcom/camss/camss-vfe.c
index 80a62ba112950..fa5da05d02cf1 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe.c
@@ -595,9 +595,12 @@ void vfe_isr_reset_ack(struct vfe_device *vfe)
*/
void vfe_pm_domain_off(struct vfe_device *vfe)
{
+ WARN(1, "off");
+dev_info(vfe->camss->dev, "%s/%d vfe %d vfe->genpd %pK genpd_link %pK power-count %d\n", __func__, __LINE__, vfe->id, vfe->genpd, vfe->genpd_link, vfe->power_count);
if (!vfe->genpd)
return;

+dev_info(vfe->camss->dev, "%s/%d vfe %d vfe->genpd %pK genpd_link %pK power-count %d\n", __func__, __LINE__, vfe->id, vfe->genpd, vfe->genpd_link, vfe->power_count);
device_link_del(vfe->genpd_link);
vfe->genpd_link = NULL;
}
@@ -609,14 +612,17 @@ void vfe_pm_domain_off(struct vfe_device *vfe)
int vfe_pm_domain_on(struct vfe_device *vfe)
{
struct camss *camss = vfe->camss;
-
+ WARN(1, "on");
+dev_info(vfe->camss->dev, "%s/%d vfe %d vfe->genpd %pK genpd_link %pK power-count %d\n", __func__, __LINE__, vfe->id, vfe->genpd, vfe->genpd_link, vfe->power_count);
if (!vfe->genpd)
return 0;

+dev_info(vfe->camss->dev, "%s/%d vfe %d vfe->genpd %pK genpd_link %pK power-count %d\n", __func__, __LINE__, vfe->id, vfe->genpd, vfe->genpd_link, vfe->power_count);
vfe->genpd_link = device_link_add(camss->dev, vfe->genpd,
DL_FLAG_STATELESS |
DL_FLAG_PM_RUNTIME |
DL_FLAG_RPM_ACTIVE);
+dev_info(vfe->camss->dev, "%s/%d vfe %d vfe->genpd %pK genpd_link %pK power-count %d\n", __func__, __LINE__, vfe->id, vfe->genpd, vfe->genpd_link, vfe->power_count);
if (!vfe->genpd_link)
return -EINVAL;

@@ -774,6 +780,7 @@ static int vfe_check_clock_rates(struct vfe_device *vfe)

return 0;
}
+#define vfe_bug_trace(vfe) dev_info(vfe->camss->dev, "%s/%d vfe %d power_count %d\n", __func__, __LINE__, vfe->id, vfe->power_count);

/*
* vfe_get - Power up and reset VFE module
@@ -786,31 +793,37 @@ int vfe_get(struct vfe_device *vfe)
int ret;

mutex_lock(&vfe->power_lock);
-
if (vfe->power_count == 0) {
+ vfe_bug_trace(vfe);
ret = vfe->res->hw_ops->pm_domain_on(vfe);
if (ret < 0)
goto error_pm_domain;

+ vfe_bug_trace(vfe);
ret = pm_runtime_resume_and_get(vfe->camss->dev);
if (ret < 0)
goto error_domain_off;

+ vfe_bug_trace(vfe);
ret = vfe_set_clock_rates(vfe);
if (ret < 0)
goto error_pm_runtime_get;

+ vfe_bug_trace(vfe);
ret = camss_enable_clocks(vfe->nclocks, vfe->clock,
vfe->camss->dev);
if (ret < 0)
goto error_pm_runtime_get;

+ vfe_bug_trace(vfe);
ret = vfe_reset(vfe);
if (ret < 0)
goto error_reset;

+ vfe_bug_trace(vfe);
vfe_reset_output_maps(vfe);

+ vfe_bug_trace(vfe);
vfe_init_outputs(vfe);

vfe->res->hw_ops->hw_version(vfe);
@@ -821,6 +834,7 @@ int vfe_get(struct vfe_device *vfe)
}
vfe->power_count++;

+ vfe_bug_trace(vfe);
mutex_unlock(&vfe->power_lock);

return 0;
@@ -835,7 +849,7 @@ int vfe_get(struct vfe_device *vfe)

error_pm_domain:
mutex_unlock(&vfe->power_lock);
-
+dev_info(vfe->camss->dev, "%s vfe %d err=%d\n", __func__, vfe->id, ret);
return ret;
}

@@ -847,20 +861,32 @@ void vfe_put(struct vfe_device *vfe)
{
mutex_lock(&vfe->power_lock);

+ vfe_bug_trace(vfe);
if (vfe->power_count == 0) {
+ vfe_bug_trace(vfe);
dev_err(vfe->camss->dev, "vfe power off on power_count == 0\n");
goto exit;
} else if (vfe->power_count == 1) {
+ vfe_bug_trace(vfe);
if (vfe->was_streaming) {
+ vfe_bug_trace(vfe);
vfe->was_streaming = 0;
+ vfe_bug_trace(vfe);
vfe->res->hw_ops->vfe_halt(vfe);
+ vfe_bug_trace(vfe);
}
+ vfe_bug_trace(vfe);
camss_disable_clocks(vfe->nclocks, vfe->clock);
+ vfe_bug_trace(vfe);
pm_runtime_put_sync(vfe->camss->dev);
+ vfe_bug_trace(vfe);
vfe->res->hw_ops->pm_domain_off(vfe);
+ vfe_bug_trace(vfe);
}

+ vfe_bug_trace(vfe);
vfe->power_count--;
+ vfe_bug_trace(vfe);

exit:
mutex_unlock(&vfe->power_lock);