Re: [PATCH v4 18/25] drm/msm/dpu: Reserve resources for CWB

From: Jessica Zhang
Date: Thu Jan 09 2025 - 17:54:08 EST




On 1/9/2025 2:13 PM, Dmitry Baryshkov wrote:
On Thu, 9 Jan 2025 at 23:26, Jessica Zhang <quic_jesszhan@xxxxxxxxxxx> wrote:



On 12/28/2024 8:47 PM, Dmitry Baryshkov wrote:
On Thu, Dec 26, 2024 at 02:49:28PM -0800, Jessica Zhang wrote:


On 12/20/2024 5:07 PM, Dmitry Baryshkov wrote:
On Fri, Dec 20, 2024 at 04:12:29PM -0800, Jessica Zhang wrote:


On 12/19/2024 9:52 PM, Dmitry Baryshkov wrote:
On Mon, Dec 16, 2024 at 04:43:29PM -0800, Jessica Zhang wrote:
Add support for RM to reserve dedicated CWB PINGPONGs and CWB muxes

For concurrent writeback, even-indexed CWB muxes must be assigned to
even-indexed LMs and odd-indexed CWB muxes for odd-indexed LMs. The same
even/odd rule applies for dedicated CWB PINGPONGs.

Track the CWB muxes in the global state and add a CWB-specific helper to
reserve the correct CWB muxes and dedicated PINGPONGs following the
even/odd rule.

Signed-off-by: Jessica Zhang <quic_jesszhan@xxxxxxxxxxx>
---
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 34 ++++++++++--
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h | 2 +
drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 1 +
drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 83 +++++++++++++++++++++++++++++
4 files changed, 116 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index a895d48fe81ccc71d265e089992786e8b6268b1b..a95dc1f0c6a422485c7ba98743e944e1a4f43539 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2,7 +2,7 @@
/*
* Copyright (C) 2013 Red Hat
* Copyright (c) 2014-2018, 2020-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*
* Author: Rob Clark <robdclark@xxxxxxxxx>
*/
@@ -28,6 +28,7 @@
#include "dpu_hw_dsc.h"
#include "dpu_hw_merge3d.h"
#include "dpu_hw_cdm.h"
+#include "dpu_hw_cwb.h"
#include "dpu_formats.h"
#include "dpu_encoder_phys.h"
#include "dpu_crtc.h"
@@ -133,6 +134,9 @@ enum dpu_enc_rc_states {
* @cur_slave: As above but for the slave encoder.
* @hw_pp: Handle to the pingpong blocks used for the display. No.
* pingpong blocks can be different than num_phys_encs.
+ * @hw_cwb: Handle to the CWB muxes used for concurrent writeback
+ * display. Number of CWB muxes can be different than
+ * num_phys_encs.
* @hw_dsc: Handle to the DSC blocks used for the display.
* @dsc_mask: Bitmask of used DSC blocks.
* @intfs_swapped: Whether or not the phys_enc interfaces have been swapped
@@ -177,6 +181,7 @@ struct dpu_encoder_virt {
struct dpu_encoder_phys *cur_master;
struct dpu_encoder_phys *cur_slave;
struct dpu_hw_pingpong *hw_pp[MAX_CHANNELS_PER_ENC];
+ struct dpu_hw_cwb *hw_cwb[MAX_CHANNELS_PER_ENC];
struct dpu_hw_dsc *hw_dsc[MAX_CHANNELS_PER_ENC];
unsigned int dsc_mask;
@@ -1138,7 +1143,10 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc,
struct dpu_hw_blk *hw_pp[MAX_CHANNELS_PER_ENC];
struct dpu_hw_blk *hw_ctl[MAX_CHANNELS_PER_ENC];
struct dpu_hw_blk *hw_dsc[MAX_CHANNELS_PER_ENC];
+ struct dpu_hw_blk *hw_cwb[MAX_CHANNELS_PER_ENC];
int num_pp, num_dsc, num_ctl;
+ int num_cwb = 0;
+ bool is_cwb_encoder;
unsigned int dsc_mask = 0;
int i;
@@ -1152,6 +1160,8 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc,
priv = drm_enc->dev->dev_private;
dpu_kms = to_dpu_kms(priv->kms);
+ is_cwb_encoder = drm_crtc_in_clone_mode(crtc_state) &&
+ dpu_enc->disp_info.intf_type == INTF_WB;
global_state = dpu_kms_get_existing_global_state(dpu_kms);
if (IS_ERR_OR_NULL(global_state)) {
@@ -1162,9 +1172,25 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc,
trace_dpu_enc_mode_set(DRMID(drm_enc));
/* Query resource that have been reserved in atomic check step. */
- num_pp = dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state,
- drm_enc->crtc, DPU_HW_BLK_PINGPONG, hw_pp,
- ARRAY_SIZE(hw_pp));
+ if (is_cwb_encoder) {
+ num_pp = dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state,
+ drm_enc->crtc,
+ DPU_HW_BLK_DCWB_PINGPONG,
+ hw_pp, ARRAY_SIZE(hw_pp));
+ num_cwb = dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state,
+ drm_enc->crtc,
+ DPU_HW_BLK_CWB,
+ hw_cwb, ARRAY_SIZE(hw_cwb));
+ } else {
+ num_pp = dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state,
+ drm_enc->crtc,
+ DPU_HW_BLK_PINGPONG, hw_pp,
+ ARRAY_SIZE(hw_pp));
+ }
+
+ for (i = 0; i < num_cwb; i++)
+ dpu_enc->hw_cwb[i] = to_dpu_hw_cwb(hw_cwb[i]);
+
num_ctl = dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state,
drm_enc->crtc, DPU_HW_BLK_CTL, hw_ctl, ARRAY_SIZE(hw_ctl));
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
index ba7bb05efe9b8cac01a908e53121117e130f91ec..8d820cd1b5545d247515763039b341184e814e32 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
@@ -77,12 +77,14 @@ enum dpu_hw_blk_type {
DPU_HW_BLK_LM,
DPU_HW_BLK_CTL,
DPU_HW_BLK_PINGPONG,
+ DPU_HW_BLK_DCWB_PINGPONG,
DPU_HW_BLK_INTF,
DPU_HW_BLK_WB,
DPU_HW_BLK_DSPP,
DPU_HW_BLK_MERGE_3D,
DPU_HW_BLK_DSC,
DPU_HW_BLK_CDM,
+ DPU_HW_BLK_CWB,
DPU_HW_BLK_MAX,
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index 48d756d8f8c6e4ab94b72bac0418320f7dc8cda8..1fc8abda927fc094b369e0d1efc795b71d6a7fcb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -128,6 +128,7 @@ struct dpu_global_state {
uint32_t dspp_to_crtc_id[DSPP_MAX - DSPP_0];
uint32_t dsc_to_crtc_id[DSC_MAX - DSC_0];
uint32_t cdm_to_crtc_id;
+ uint32_t cwb_to_crtc_id[CWB_MAX - CWB_0];
};
struct dpu_global_state
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index 85adaf256b2c705d2d7df378b6ffc0e578f52bc3..ead24bb0ceb5d8ec4705f0d32330294d0b45b216 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -234,6 +234,55 @@ static int _dpu_rm_get_lm_peer(struct dpu_rm *rm, int primary_idx)
return -EINVAL;
}
+static int _dpu_rm_reserve_cwb_mux_and_pingpongs(struct dpu_rm *rm,
+ struct dpu_global_state *global_state,
+ uint32_t crtc_id,
+ struct msm_display_topology *topology)
+{
+ int num_cwb_pp = topology->num_lm, cwb_pp_count = 0;
+ int cwb_pp_start_idx = PINGPONG_CWB_0 - PINGPONG_0;
+ int cwb_pp_idx[MAX_BLOCKS];
+ int cwb_mux_idx[MAX_BLOCKS];
+
+ /*
+ * Reserve additional dedicated CWB PINGPONG blocks and muxes for each
+ * mixer
+ *
+ * TODO: add support reserving resources for platforms with no
+ * PINGPONG_CWB

What about doing it other way around: allocate CWBs first as required
(even/odd, proper count, etc). Then for each of CWBs allocate a PP block
(I think it's enough to simply make CWB blocks have a corresponding PP
index as a property). This way the driver can handle both legacy and
current platforms.

Hi Dmitry,

Sorry if I'm misunderstanding your suggestion, but the main change needed to
support platforms with no dedicated PINGPONG_CWB is where in the
rm->pingpong_blks list to start assigning pingpong blocks for the CWB mux.
I'm not sure how changing the order in which CWBs and the pingpong blocks
are assigned will address that.

(FWIW, the only change necessary to add support for non-dedicated
PINGPONG_CWBs platforms for this function should just be changing the
initialization value of cwb_pp_start_idx)

If I remember correctly, we have identified several generations of DPU
wrt. CWB handling:
- 8.1+ (or 8.0+?), DCWB, dedicated PP blocks
- 7.2, dedicated PP_1?
- 5.0+, shared PP blocks
- older DPUs, special handling of PP

If the driver allocates PP first and then first it has to allocated PP
(in a platform-specific way) and then go from PINGPONG to CWB (in a
platform-specific way). If CWB is allocated first, then you have only
one platform-specific piece of code that gets PINGPONG for the CWB (and
as this function is called after the CWB allocation, the major part of
the CWB / PP allocation is generic).

The issue with breaking this into separate helpers/functions is that the CWB
mux and PPB indices are dependent on each other. But I agree that we can
reserve CWB mux and the PPBs in 2 separate loops within this helper to
minimize the special platform-specific handling.

Doesn't it just PPB depend on CWB?

Sorry, poor wording on my part. Was referring specifically to this part
of the logic within the code chunk:

```
cwb_pp_idx[cwb_pp_count] = j;
cwb_mux_idx[cwb_pp_count] = j - cwb_pp_start_idx;
```

The point I wanted to make was that I think we can keep the CWB-specific
reservation of the the PPBs and muxes in this function, but I agree with
your suggestion to reserve the CWBs first and the PPBs second (just in
separate loops rather than separate functions).

Sounds fine with me. I don't think you even need a second loop, just
simple maths should be fine.

Got it.. do you mean just swapping the current for loop logic to loop over all the CWB blocks instead?

So something like:

```
for (int i = 0; i < ARRAY_SIZE(rm->mixer_blks) &&
cwb_pp_count < num_cwb_pp; i++) {
for (int j = cwb_pp_start_idx;
j < ARRAY_SIZE(rm->cwb_blks); j++) {
/*
* Odd LMs must be assigned to odd PINGPONGs and even
* LMs with even PINGPONGs
*/
if (reserved_by_other(global_state->cwb_to_crtc_id, j, crtc_id) ||
i % 2 != j % 2)
continue;

cwb_mux_idx[cwb_pp_count] = j;
cwb_pp_idx[cwb_pp_count] = j + cwb_pp_start_idx;
cwb_pp_count++;
break;
}
}
```







Also wanted to note that the comment doc on the PPB odd/even rule is
inaccurate -- technically the odd/even rule applies specifically to the CWB
mux as odd/even LMs are hardwired to their respective CWB muxes. Will
correct the comment doc to be more accurate.

Yes, please fix that.


Thanks,

Jessica Zhang


--
With best wishes
Dmitry



--
With best wishes
Dmitry