[PATCH v2 11/11] arch/x86: Introduce QOS feature for AMD

From: Moger, Babu
Date: Fri Oct 05 2018 - 16:56:19 EST


Enables QOS feature on AMD.
Following QoS sub-features are supported in AMD if the underlying
hardware supports it.
- L3 Cache allocation enforcement
- L3 Cache occupancy monitoring
- L3 Code-Data Prioritization support
- Memory Bandwidth Enforcement(Allocation)

There are differences in the way some of the features are implemented.
Separate those functions and add those as vendor specific functions.
The major difference is in MBA feature.
- AMD uses CPUID leaf 0x80000020 to initialize the MBA features.
- AMD uses direct bandwidth value instead of delay based on bandwidth
values.
- MSR register base addresses are different for MBA.
- Also AMD allows non-contiguous L3 cache bit masks.

Adds following functions to take care of the differences.
rdt_get_mem_config_amd : MBA initialization function
parse_bw_amd : Bandwidth parsing
mba_wrmsr_amd: Writes bandwidth value
cbm_validate_amd : L3 cache bitmask validation

Signed-off-by: Babu Moger <babu.moger@xxxxxxx>
---
arch/x86/kernel/cpu/rdt.c | 69 +++++++++++++++++++++++++-
arch/x86/kernel/cpu/rdt.h | 5 ++
arch/x86/kernel/cpu/rdt_ctrlmondata.c | 70 +++++++++++++++++++++++++++
3 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index c7c2dbaae7bb..99b3a69457c7 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -61,6 +61,9 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);

#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)

@@ -280,6 +283,31 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return true;
}

+static bool rdt_get_mem_config_amd(struct rdt_resource *r)
+{
+ union cpuid_0x10_3_eax eax;
+ union cpuid_0x10_x_edx edx;
+ u32 ebx, ecx;
+
+ cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full);
+ r->num_closid = edx.split.cos_max + 1;
+ r->default_ctrl = MAX_MBA_BW_AMD;
+
+ /* AMD does not use delay. Set delay_linear to false by default */
+ r->membw.delay_linear = false;
+
+ /* FIX ME - May need to be read from MSR */
+ r->membw.min_bw = 0;
+ r->membw.bw_gran = 1;
+ /* Max value is 2048, Data width should be 4 in decimal */
+ r->data_width = 4;
+
+ r->alloc_capable = true;
+ r->alloc_enabled = true;
+
+ return true;
+}
+
static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
@@ -339,6 +367,16 @@ static int get_cache_id(int cpu, int level)
return -1;
}

+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ unsigned int i;
+
+ /* Write the bw values for mba. */
+ for (i = m->low; i < m->high; i++)
+ wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+}
+
/*
* Map the memory b/w percentage value to delay values
* that can be written to QOS_MSRs.
@@ -788,8 +826,13 @@ static bool __init rdt_cpu_has(int flag)
static __init bool rdt_mba_config(void)
{
if (rdt_cpu_has(X86_FEATURE_MBA)) {
- if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
- return true;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ return true;
+ } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ return true;
+ }
}

return false;
@@ -890,10 +933,32 @@ static __init void rdt_init_res_defs_intel(void)
}
}

+static __init void rdt_init_res_defs_amd(void)
+{
+ struct rdt_resource *r;
+
+ for_each_rdt_resource(r) {
+ if ((r->rid == RDT_RESOURCE_L3) ||
+ (r->rid == RDT_RESOURCE_L3DATA) ||
+ (r->rid == RDT_RESOURCE_L3CODE) ||
+ (r->rid == RDT_RESOURCE_L2) ||
+ (r->rid == RDT_RESOURCE_L2DATA) ||
+ (r->rid == RDT_RESOURCE_L2CODE))
+ r->cbm_validate = cbm_validate_amd;
+ else if (r->rid == RDT_RESOURCE_MBA) {
+ r->msr_base = IA32_MBA_BW_BASE;
+ r->msr_update = mba_wrmsr_amd;
+ r->parse_ctrlval = parse_bw_amd;
+ }
+ }
+}
+
static __init void rdt_init_res_defs(void)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
rdt_init_res_defs_intel();
+ else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ rdt_init_res_defs_amd();
}

static enum cpuhp_state rdt_online;
diff --git a/arch/x86/kernel/cpu/rdt.h b/arch/x86/kernel/cpu/rdt.h
index cb7e5a4739fc..4bc8c5594650 100644
--- a/arch/x86/kernel/cpu/rdt.h
+++ b/arch/x86/kernel/cpu/rdt.h
@@ -11,6 +11,7 @@
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
#define IA32_MBA_THRTL_BASE 0xd50
+#define IA32_MBA_BW_BASE 0xc0000200

#define IA32_QM_CTR 0x0c8e
#define IA32_QM_EVTSEL 0x0c8d
@@ -34,6 +35,7 @@
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
#define MBA_MAX_MBPS U32_MAX
+#define MAX_MBA_BW_AMD 0x800

#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
@@ -451,6 +453,8 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d);
int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d);
+int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);

extern struct mutex rdtgroup_mutex;

@@ -583,5 +587,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free);
void update_mba_bw_intel(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm);
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
+bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r);

#endif /* _ASM_X86_RDT_H */
diff --git a/arch/x86/kernel/cpu/rdt_ctrlmondata.c b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
index af8506003ee8..f9eb0de5ba96 100644
--- a/arch/x86/kernel/cpu/rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
@@ -28,6 +28,52 @@
#include <linux/slab.h>
#include "rdt.h"

+/*
+ * Check whether MBA bandwidth percentage value is correct. The value is
+ * checked against the minimum and max bandwidth values specified by the
+ * hardware. The allocated bandwidth percentage is rounded to the next
+ * control step available on the hardware.
+ */
+static bool bw_validate_amd(char *buf, unsigned long *data,
+ struct rdt_resource *r)
+{
+ unsigned long bw;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &bw);
+ if (ret) {
+ rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
+ return false;
+ }
+
+ if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
+ r->membw.min_bw, r->default_ctrl);
+ return false;
+ }
+
+ *data = roundup(bw, (unsigned long)r->membw.bw_gran);
+ return true;
+}
+
+int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
+{
+ unsigned long bw_val;
+
+ if (d->have_new_ctrl) {
+ rdt_last_cmd_printf("duplicate domain %d\n", d->id);
+ return -EINVAL;
+ }
+
+ if (!bw_validate_amd(data->buf, &bw_val, r))
+ return -EINVAL;
+ d->new_ctrl = bw_val;
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
/*
* Check whether MBA bandwidth percentage value is correct. The value is
* checked against the minimum and max bandwidth values specified by the
@@ -123,6 +169,30 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
return true;
}

+/*
+ * Check whether a cache bit mask is valid. AMD allows non-contiguous
+ * bitmasks
+ */
+bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 16, &val);
+ if (ret) {
+ rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
+ return false;
+ }
+
+ if (val > r->default_ctrl) {
+ rdt_last_cmd_puts("mask out of range\n");
+ return false;
+ }
+
+ *data = val;
+ return true;
+}
+
/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
--
2.17.1