Re: [PATCH 3/3] memory: samsung: exynos5422-dmc: Add support for interrupt from performance counters

From: Lukasz Luba
Date: Tue Oct 01 2019 - 06:11:35 EST


Hi Krzysztof,

On 9/27/19 11:19 AM, Krzysztof Kozlowski wrote:
> On Wed, Sep 25, 2019 at 06:18:13PM +0200, Lukasz Luba wrote:
>> Introduce a new interrupt driven mechanism for managing speed of the
>> memory controller. The interrupts are generated due to performance
>> counters overflow. The performance counters might track memory reads,
>> writes, transfers, page misses, etc. In the basic algorithm tracking
>> read transfers and calculating memory pressure should be enough to
>> skip polling mode in devfreq.
>>
>> Signed-off-by: Lukasz Luba <l.luba@xxxxxxxxxxxxxxxxxxx>
>> ---
>> drivers/memory/samsung/exynos5422-dmc.c | 297 ++++++++++++++++++++++--
>> 1 file changed, 272 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
>> index 0fe5f2186139..86e1844b97ef 100644
>> --- a/drivers/memory/samsung/exynos5422-dmc.c
>> +++ b/drivers/memory/samsung/exynos5422-dmc.c
>> @@ -8,6 +8,7 @@
>> #include <linux/devfreq.h>
>> #include <linux/devfreq-event.h>
>> #include <linux/device.h>
>> +#include <linux/interrupt.h>
>> #include <linux/io.h>
>> #include <linux/mfd/syscon.h>
>> #include <linux/module.h>
>> @@ -35,6 +36,29 @@
>> #define USE_BPLL_TIMINGS (0)
>> #define EXYNOS5_AREF_NORMAL (0x2e)
>>
>> +#define DREX_PPCCLKCON (0x0130)
>> +#define DREX_PEREV2CONFIG (0x013c)
>> +#define DREX_PMNC_PPC (0xE000)
>> +#define DREX_CNTENS_PPC (0xE010)
>> +#define DREX_CNTENC_PPC (0xE020)
>> +#define DREX_INTENS_PPC (0xE030)
>> +#define DREX_INTENC_PPC (0xE040)
>> +#define DREX_FLAG_PPC (0xE050)
>> +#define DREX_PMCNT2_PPC (0xE130)
>> +
>> +#define CC_RESET BIT(2)
>> +#define PPC_COUNTER_RESET BIT(1)
>> +#define PPC_ENABLE BIT(0)
>> +#define PEREV_CLK_EN BIT(0)
>> +#define PERF_CNT2 BIT(2)
>> +#define PERF_CCNT BIT(31)
>
> Describe to which registers these bitfields are applicable.
OK, I will add comment above them.
>
>> +
>> +#define READ_TRANSFER_CH0 (0x6d)
>> +#define READ_TRANSFER_CH1 (0x6f)
>
> The same. Otherwise they all look like some generic constants which is
> not true.
Make sense, agree. I will add comment above them.
>
>> +
>> +#define PERF_COUNTER_START_VALUE 0xff000000
>> +#define PERF_EVENT_UP_DOWN_THRESHOLD 900000000ULL
>> +
>> /**
>> * struct dmc_opp_table - Operating level desciption
>> *
>> @@ -85,6 +109,9 @@ struct exynos5_dmc {
>> struct clk *mout_mx_mspll_ccore_phy;
>> struct devfreq_event_dev **counter;
>> int num_counters;
>> + u64 last_overflow_ts[2];
>> + unsigned long load, total;
>
> One member per line. This decreases readability.
OK
>
>> + bool in_irq_mode;
>> };
>>
>> #define TIMING_FIELD(t_name, t_bit_beg, t_bit_end) \
>> @@ -653,6 +680,167 @@ static int exynos5_counters_get(struct exynos5_dmc *dmc,
>> return 0;
>> }
>>
>> +/**
>> + * exynos5_dmc_start_perf_events() - Setup and start performance event counters
>> + * @dmc: device for which the counters are going to be checked
>> + * @beg_value: initial value for the counter
>> + *
>> + * Function which enables needed counters, interrupts and sets initial values
>> + * then starts the counters.
>> + */
>> +static void exynos5_dmc_start_perf_events(struct exynos5_dmc *dmc,
>> + u32 beg_value)
>> +{
>> + /* Enable interrupts for counter 2 */
>> + writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENS_PPC);
>> + writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENS_PPC);
>
> Blank line.
>
>> + /* Enable counter 2 and CCNT */
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENS_PPC);
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENS_PPC);
>
> Blank line.
>
>> + /* Clear overflow flag for all counters */
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
>
> Blank line.
>
>> + /* Reset all counters */
>> + writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi0 + DREX_PMNC_PPC);
>> + writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi1 + DREX_PMNC_PPC);
>
> Blank line.
>
>> + /*
>> + * Set start value for the counters, the number of samples that
>> + * will be gathered is calculated as: 0xffffffff - beg_value
>> + */
>> + writel(beg_value, dmc->base_drexi0 + DREX_PMCNT2_PPC);
>> + writel(beg_value, dmc->base_drexi1 + DREX_PMCNT2_PPC);
>
> Blank line.
>
>> + /* Start all counters */
>> + writel(PPC_ENABLE, dmc->base_drexi0 + DREX_PMNC_PPC);
>> + writel(PPC_ENABLE, dmc->base_drexi1 + DREX_PMNC_PPC);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_perf_events_calc() - Calculate utilization
>> + * @dmc: device for which the counters are going to be checked
>> + * @diff_ts: time between last interrupt and current one
>> + *
>> + * Function which calculates needed utilization for the devfreq governor.
>> + * It prepares values for 'busy_time' and 'total_time' based on elapsed time
>> + * between interrupts, which approximates utilization.
>> + */
>> +static void exynos5_dmc_perf_events_calc(struct exynos5_dmc *dmc, u64 diff_ts)
>> +{
>> + /*
>> + * This is a simple algorithm for managing traffic on DMC.
>> + * When there is almost no load the counters overflow every 4s,
>> + * no mater the DMC frequency.
>> + * The high load might be approximated using linear function.
>> + * Knowing that, simple calculation can provide 'busy_time' and
>> + * 'total_time' to the devfreq governor which picks up target
>> + * frequency.
>> + * We want a fast ramp up and slow decay in frequency change function.
>> + */
>> + if (diff_ts < PERF_EVENT_UP_DOWN_THRESHOLD) {
>> + /*
>> + * Set higher utilization for the simple_ondemand governor.
>> + * The governor should increase the frequency of the DMC.
>> + */
>> + dmc->load = 70;
>> + dmc->total = 100;
>> + } else {
>> + /*
>> + * Set low utilization for the simple_ondemand governor.
>> + * The governor should decrease the frequency of the DMC.
>> + */
>> + dmc->load = 35;
>> + dmc->total = 100;
>> + }
>> +
>> + dev_dbg(dmc->dev, "diff_ts=%llu\n", diff_ts);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_perf_events_check() - Checks the status of the counters
>> + * @dmc: device for which the counters are going to be checked
>> + *
>> + * Function which is called from threaded IRQ to check the counters state
>> + * and to call approximation for the needed utilization.
>> + */
>> +static void exynos5_dmc_perf_events_check(struct exynos5_dmc *dmc)
>> +{
>> + u32 val;
>> + u64 diff_ts, ts;
>> +
>> + ts = ktime_get_ns();
>> +
>> + /* Stop all counters */
>> + writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
>> + writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
>> +
>> + /* Check the source in interrupt flag registers (which channel) */
>> + val = readl(dmc->base_drexi0 + DREX_FLAG_PPC);
>> + if (val) {
>> + diff_ts = ts - dmc->last_overflow_ts[0];
>> + dmc->last_overflow_ts[0] = ts;
>> + dev_dbg(dmc->dev, "drex0 0xE050 val= 0x%08x\n", val);
>> + } else {
>> + val = readl(dmc->base_drexi1 + DREX_FLAG_PPC);
>> + diff_ts = ts - dmc->last_overflow_ts[1];
>> + dmc->last_overflow_ts[1] = ts;
>> + dev_dbg(dmc->dev, "drex1 0xE050 val= 0x%08x\n", val);
>> + }
>> +
>> + exynos5_dmc_perf_events_calc(dmc, diff_ts);
>> +
>> + exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_enable_perf_events() - Enable performance events
>> + * @dmc: device for which the counters are going to be checked
>> + *
>> + * Function which is setup needed environment and enables counters.
>> + */
>> +static void exynos5_dmc_enable_perf_events(struct exynos5_dmc *dmc)
>> +{
>> + u64 ts;
>> +
>> + /* Enable Performance Event Clock */
>> + writel(PEREV_CLK_EN, dmc->base_drexi0 + DREX_PPCCLKCON);
>> + writel(PEREV_CLK_EN, dmc->base_drexi1 + DREX_PPCCLKCON);
>> +
>> + /* Select read transfers as performance event2 */
>> + writel(READ_TRANSFER_CH0, dmc->base_drexi0 + DREX_PEREV2CONFIG);
>> + writel(READ_TRANSFER_CH1, dmc->base_drexi1 + DREX_PEREV2CONFIG);
>> +
>> + dmc->in_irq_mode = 1;
>
> Move this outside, to the probe. Logically it belongs there.
OK
>
>> +
>> + ts = ktime_get_ns();
>> + dmc->last_overflow_ts[0] = ts;
>> + dmc->last_overflow_ts[1] = ts;
>> +
>> + /* Devfreq shouldn't be faster than initialization, play safe though. */
>> + dmc->load = 99;
>> + dmc->total = 100;
>> +}
>> +
>> +/**
>> + * exynos5_dmc_disable_perf_events() - Disable performance events
>> + * @dmc: device for which the counters are going to be checked
>> + *
>> + * Function which stops, disables performance event counters and interrupts.
>> + */
>> +static void exynos5_dmc_disable_perf_events(struct exynos5_dmc *dmc)
>> +{
>> + /* Stop all counters */
>> + writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
>> + writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
>
> Blank line here and later.
OK - for all the 'blank line' hints.
>
>> + /* Disable interrupts for counter 2 */
>> + writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENC_PPC);
>> + writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENC_PPC);
>> + /* Disable counter 2 and CCNT */
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENC_PPC);
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENC_PPC);
>> + /* Clear overflow flag for all counters */
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
>> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
>> +}
>> +
>> /**
>> * exynos5_dmc_get_status() - Read current DMC performance statistics.
>> * @dev: device for which the statistics are requested
>> @@ -669,18 +857,24 @@ static int exynos5_dmc_get_status(struct device *dev,
>> unsigned long load, total;
>> int ret;
>>
>> - ret = exynos5_counters_get(dmc, &load, &total);
>> - if (ret < 0)
>> - return -EINVAL;
>> + if (dmc->in_irq_mode) {
>> + stat->current_frequency = dmc->curr_rate;
>> + stat->busy_time = dmc->load;
>> + stat->total_time = dmc->total;
>> + } else {
>> + ret = exynos5_counters_get(dmc, &load, &total);
>> + if (ret < 0)
>> + return -EINVAL;
>>
>> - /* To protect from overflow in calculation ratios, divide by 1024 */
>> - stat->busy_time = load >> 10;
>> - stat->total_time = total >> 10;
>> + /* To protect from overflow, divide by 1024 */
>> + stat->busy_time = load >> 10;
>> + stat->total_time = total >> 10;
>>
>> - ret = exynos5_counters_set_event(dmc);
>> - if (ret < 0) {
>> - dev_err(dev, "could not set event counter\n");
>> - return ret;
>> + ret = exynos5_counters_set_event(dmc);
>> + if (ret < 0) {
>> + dev_err(dev, "could not set event counter\n");
>> + return ret;
>> + }
>> }
>>
>> return 0;
>> @@ -712,7 +906,6 @@ static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq)
>> * It provides to the devfreq framework needed functions and polling period.
>> */
>> static struct devfreq_dev_profile exynos5_dmc_df_profile = {
>> - .polling_ms = 500,
>> .target = exynos5_dmc_target,
>> .get_dev_status = exynos5_dmc_get_status,
>> .get_cur_freq = exynos5_dmc_get_cur_freq,
>> @@ -1108,6 +1301,26 @@ static inline int exynos5_dmc_set_pause_on_switching(struct exynos5_dmc *dmc)
>> return 0;
>> }
>>
>> +static irqreturn_t dmc_irq_thread(int irq, void *priv)
>> +{
>> + int res;
>> + struct exynos5_dmc *dmc = priv;
>> +
>> + dev_dbg(dmc->dev, "irq thread handler\n");
>
> Skip a debug in thread handler for memory. It can pollute your log (I
> guess depending on workload).
OK, I will remove it.
>
>> +
>> + mutex_lock(&dmc->df->lock);
>> +
>> + exynos5_dmc_perf_events_check(dmc);
>> +
>> + res = update_devfreq(dmc->df);
>> + if (res)
>> + dev_err(dmc->dev, "devfreq failed with %d\n", res);
>
> dev_warn()
OK
>
>> +
>> + mutex_unlock(&dmc->df->lock);
>> +
>> + return IRQ_HANDLED;
>> +}
>> +
>> /**
>> * exynos5_dmc_probe() - Probe function for the DMC driver
>> * @pdev: platform device for which the driver is going to be initialized
>> @@ -1125,6 +1338,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>> struct device_node *np = dev->of_node;
>> struct exynos5_dmc *dmc;
>> struct resource *res;
>> + int irq;
>>
>> dmc = devm_kzalloc(dev, sizeof(*dmc), GFP_KERNEL);
>> if (!dmc)
>> @@ -1172,24 +1386,48 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>> goto remove_clocks;
>> }
>>
>> - ret = exynos5_performance_counters_init(dmc);
>> - if (ret) {
>> - dev_warn(dev, "couldn't probe performance counters\n");
>> - goto remove_clocks;
>> - }
>> -
>> ret = exynos5_dmc_set_pause_on_switching(dmc);
>> if (ret) {
>> dev_warn(dev, "couldn't get access to PAUSE register\n");
>> goto err_devfreq_add;
>
> This is wrong now, I think.
Good point, should be 'goto remove_clocks'.
>
>> }
>>
>> - /*
>> - * Setup default thresholds for the devfreq governor.
>> - * The values are chosen based on experiments.
>> - */
>> - dmc->gov_data.upthreshold = 30;
>> - dmc->gov_data.downdifferential = 5;
>> + /* There is two modes in which the driver works: polling or IRQ */
>> + irq = platform_get_irq(pdev, 0);
>
> You need to document it in bindings.
OK
>
>> + if (irq < 0) {
>> + ret = exynos5_performance_counters_init(dmc);
>> + if (ret) {
>> + dev_warn(dev, "couldn't probe performance counters\n");
>> + goto remove_clocks;
>
> Weird, previous error jump goes to err_devfreq_add. This goes to error
> label which is narrower (less to cleanup).

Right.

Thank you for the review. I will address the issues in the next version.

Regards,
Lukasz