RE: [PATCH V4 3/4] x86/resctrl: Enable per-thread MBA
From: Babu Moger
Date: Mon May 18 2020 - 13:29:59 EST
> -----Original Message-----
> From: Reinette Chatre <reinette.chatre@xxxxxxxxx>
> Sent: Saturday, May 16, 2020 1:29 PM
> To: tglx@xxxxxxxxxxxxx; fenghua.yu@xxxxxxxxx; bp@xxxxxxxxx;
> tony.luck@xxxxxxxxx
> Cc: kuo-lang.tseng@xxxxxxxxx; ravi.v.shankar@xxxxxxxxx; mingo@xxxxxxxxxx;
> Moger, Babu <Babu.Moger@xxxxxxx>; hpa@xxxxxxxxx; x86@xxxxxxxxxx;
> linux-kernel@xxxxxxxxxxxxxxx; Reinette Chatre <reinette.chatre@xxxxxxxxx>
> Subject: [PATCH V4 3/4] x86/resctrl: Enable per-thread MBA
>
> From: Fenghua Yu <fenghua.yu@xxxxxxxxx>
>
> Current Memory Bandwidth Allocation (MBA) hardware has a limitation:
> all threads on the same core must have the same delay value. If there
> are different delay values across threads on one core, the original
> MBA implementation allocates the max delay value to the core and an
> updated implementation allocates either min or max delay value specified
> by a configuration MSR across threads on the core.
>
> Newer systems support per-thread MBA such that each thread is allocated
> with its own delay value.
>
> If per-thread MBA is supported, report "per-thread" in resctrl file
> "info/MB/thread_throttle_mode" to let user applications know memory
> bandwidth is allocated per thread and help them fine tune MBA on thread
> level.
>
> Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
> [reinette: transition patch to use membw_throttle_mode enum]
> Signed-off-by: Reinette Chatre <reinette.chatre@xxxxxxxxx>
Reviewed-by: Babu Moger <babu.moger@xxxxxxx>
> ---
> Changes since V3:
> - Use new thread throttling mode property.
> - Remove unnecessary empty line. (Babu)
>
> Documentation/x86/resctrl_ui.rst | 3 +++
> arch/x86/kernel/cpu/resctrl/core.c | 5 ++++-
> arch/x86/kernel/cpu/resctrl/internal.h | 2 ++
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 11 +++++++++++
> 4 files changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/x86/resctrl_ui.rst
> b/Documentation/x86/resctrl_ui.rst
> index 861ee2816470..1b066d1aafad 100644
> --- a/Documentation/x86/resctrl_ui.rst
> +++ b/Documentation/x86/resctrl_ui.rst
> @@ -150,6 +150,9 @@ with respect to allocation:
> "max":
> the smallest percentage is applied
> to all threads
> + "per-thread":
> + bandwidth percentages are directly applied to
> + the threads running on the core
>
> If RDT monitoring is available there will be an "L3_MON" directory
> with the following files:
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c
> b/arch/x86/kernel/cpu/resctrl/core.c
> index 129ff0cec7a7..bf1ff07efac8 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -309,7 +309,10 @@ static bool __get_mem_config_intel(struct
> rdt_resource *r)
> }
> r->data_width = 3;
>
> - if (mba_cfg_supports_min_max_intel()) {
> + if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) {
> + r->membw.arch_throttle_mode =
> THREAD_THROTTLE_PER_THREAD;
> + thread_throttle_mode_init_ro();
> + } else if (mba_cfg_supports_min_max_intel()) {
> r->membw.arch_throttle_mode =
> THREAD_THROTTLE_MIN_MAX;
> thread_throttle_mode_init_rw();
> } else {
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
> b/arch/x86/kernel/cpu/resctrl/internal.h
> index 6b9b21d67c9b..e198ea2a8468 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -391,11 +391,13 @@ struct rdt_cache {
> * @THREAD_THROTTLE_MAX_ONLY: Memory bandwidth is throttled at the
> core
> * always using smallest bandwidth percentage
> * assigned to threads, aka "max throttling"
> + * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled
> at the thread
> */
> enum membw_throttle_mode {
> THREAD_THROTTLE_UNDEFINED = 0,
> THREAD_THROTTLE_MIN_MAX,
> THREAD_THROTTLE_MAX_ONLY,
> + THREAD_THROTTLE_PER_THREAD,
> };
>
> /**
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 3ce6319b7226..088a1536bccc 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -1038,6 +1038,11 @@ static int max_threshold_occ_show(struct
> kernfs_open_file *of,
> * with the maximum delay value that from the software interface will be
> * the minimum of the bandwidth percentages assigned to the hardware threads
> * sharing the core.
> + *
> + * Some systems (identified by X86_FEATURE_PER_THREAD_MBA enumerated
> via CPUID)
> + * support per-thread MBA. On these systems hardware doesn't apply the
> minimum
> + * or maximum delay value to all threads in a core. Instead, a thread is
> + * allocated with the delay value that is assigned to the thread.
> */
> static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
> struct seq_file *seq, void *v)
> @@ -1047,12 +1052,18 @@ static int rdt_thread_throttle_mode_show(struct
> kernfs_open_file *of,
>
> mutex_lock(&rdtgroup_mutex);
>
> + if (r->membw.arch_throttle_mode ==
> THREAD_THROTTLE_PER_THREAD) {
> + seq_puts(seq, "per-thread\n");
> + goto out;
> + }
> +
> if (r->membw.arch_throttle_mode == THREAD_THROTTLE_MIN_MAX)
> throttle_mode = mba_cfg_msr &
> MBA_THROTTLE_MODE_MASK;
>
> seq_puts(seq,
> throttle_mode == MBA_THROTTLE_MODE_MIN ? "min\n" :
> "max\n");
>
> +out:
> mutex_unlock(&rdtgroup_mutex);
> return 0;
> }
> --
> 2.21.0