Re: [PATCH v17 1/2] drm/tegra: dc: Support memory bandwidth management

From: Thierry Reding
Date: Mon May 31 2021 - 12:59:34 EST


On Tue, May 11, 2021 at 02:27:08AM +0300, Dmitry Osipenko wrote:
> Display controller (DC) performs isochronous memory transfers, and thus,
> has a requirement for a minimum memory bandwidth that shall be fulfilled,
> otherwise framebuffer data can't be fetched fast enough and this results
> in a DC's data-FIFO underflow that follows by a visual corruption.
>
> The Memory Controller drivers provide facility for memory bandwidth
> management via interconnect API. Let's wire up the interconnect API
> support to the DC driver in order to fix the distorted display output
> on T30 Ouya, T124 TK1 and other Tegra devices.
>
> Tested-by: Peter Geis <pgwipeout@xxxxxxxxx> # Ouya T30
> Tested-by: Matt Merhar <mattmerhar@xxxxxxxxxxxxxx> # Ouya T30
> Tested-by: Nicolas Chauvet <kwizart@xxxxxxxxx> # PAZ00 T20 and TK1 T124
> Signed-off-by: Dmitry Osipenko <digetx@xxxxxxxxx>
> ---
> drivers/gpu/drm/tegra/Kconfig | 1 +
> drivers/gpu/drm/tegra/dc.c | 352 ++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/tegra/dc.h | 14 ++
> drivers/gpu/drm/tegra/drm.c | 14 ++
> drivers/gpu/drm/tegra/hub.c | 3 +
> drivers/gpu/drm/tegra/plane.c | 116 +++++++++++
> drivers/gpu/drm/tegra/plane.h | 15 ++
> 7 files changed, 515 insertions(+)
>
[...]
> diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
[...]
> @@ -2011,7 +2143,215 @@ static void tegra_crtc_atomic_flush(struct drm_crtc *crtc,
> value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
> }
>
> +static bool tegra_plane_is_cursor(const struct drm_plane_state *state)
> +{
> + const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc;
> + const struct drm_format_info *fmt = state->fb->format;
> + unsigned int src_w = drm_rect_width(&state->src) >> 16;
> + unsigned int dst_w = drm_rect_width(&state->dst);
> +
> + if (state->plane->type != DRM_PLANE_TYPE_CURSOR)
> + return false;
> +
> + if (soc->supports_cursor)
> + return true;
> +
> + if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256)
> + return false;

Technically there could be some random overlay window that matches these
conditions and is erroneously detected as being a cursor. I wonder if we
should add a field to a plane that marks it as being used as cursor for
the cases where we don't support a hardware cursor.

[...]
> diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
> index 29f19c3c6149..db10af097033 100644
> --- a/drivers/gpu/drm/tegra/dc.h
> +++ b/drivers/gpu/drm/tegra/dc.h
> @@ -15,6 +15,8 @@
>
> struct tegra_output;
>
> +#define TEGRA_DC_LEGACY_PLANES_NUM 7
> +
> struct tegra_dc_state {
> struct drm_crtc_state base;
>
> @@ -23,6 +25,8 @@ struct tegra_dc_state {
> unsigned int div;
>
> u32 planes;
> +
> + unsigned long plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM];

Why can we not store this peak bandwidth value within the plane state? I
know that this isn't exactly per-plane data because it depends on the
state of other planes, but that doesn't really prevent the value to live
within the plane state. The plane state is, after all, part of the
global state, and hence any such state needs to be considered within the
context of that global atomic state.

I suppose that might make it a little bit more difficult to get at the
data, but I think the end result would be less confusing than having an
array here with potentially unused fields. It would also get rid of the
need to look up planes by their per-CRTC index.

> };
>
> static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state)
> @@ -33,6 +37,12 @@ static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state)
> return NULL;
> }
>
> +static inline const struct tegra_dc_state *
> +to_const_dc_state(const struct drm_crtc_state *state)
> +{
> + return to_dc_state((struct drm_crtc_state *)state);
> +}
> +
> struct tegra_dc_stats {
> unsigned long frames;
> unsigned long vblank;
> @@ -66,7 +76,9 @@ struct tegra_dc_soc_info {
> unsigned int num_overlay_formats;
> const u64 *modifiers;
> bool has_win_a_without_filters;
> + bool has_win_b_vfilter_mem_client;
> bool has_win_c_without_vert_filter;
> + bool plane_tiled_memory_bandwidth_x2;
> };
>
> struct tegra_dc {
> @@ -152,6 +164,8 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc,
> struct drm_crtc_state *crtc_state,
> struct clk *clk, unsigned long pclk,
> unsigned int div);
> +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
> + struct drm_atomic_state *state);
>
> /* from rgb.c */
> int tegra_dc_rgb_probe(struct tegra_dc *dc);
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index d76bcb6c5622..a10513d7f2e0 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -24,6 +24,7 @@
> #include <asm/dma-iommu.h>
> #endif
>
> +#include "dc.h"
> #include "drm.h"
> #include "gem.h"
>
> @@ -63,6 +64,17 @@ static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = {
> .atomic_commit = drm_atomic_helper_commit,
> };
>
> +static void tegra_atomic_post_commit(struct drm_device *drm,
> + struct drm_atomic_state *old_state)
> +{
> + struct drm_crtc_state *old_crtc_state __maybe_unused;
> + struct drm_crtc *crtc;
> + unsigned int i;
> +
> + for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i)
> + tegra_crtc_atomic_post_commit(crtc, old_state);
> +}
> +
> static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
> {
> struct drm_device *drm = old_state->dev;
> @@ -82,6 +94,8 @@ static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
> } else {
> drm_atomic_helper_commit_tail_rpm(old_state);
> }
> +
> + tegra_atomic_post_commit(drm, old_state);
> }
>
> static const struct drm_mode_config_helper_funcs
> diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
> index bfae8a02f55b..f1bbc5991854 100644
> --- a/drivers/gpu/drm/tegra/hub.c
> +++ b/drivers/gpu/drm/tegra/hub.c
> @@ -358,6 +358,9 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane,
> struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc);
> int err;
>
> + plane_state->peak_memory_bandwidth = 0;
> + plane_state->avg_memory_bandwidth = 0;
> +

Since ICC isn't supported yet on Tegra186 and later, does it even make
sense to initialize these?

Thierry

Attachment: signature.asc
Description: PGP signature