[PATCH 4/8] x86/intel_rct/mba: Add MBA structures and initialize MBA

From: Vikas Shivappa
Date: Fri Apr 07 2017 - 20:34:45 EST


The MBA feature details like minimum bandwidth supported, b/w
granularity etc are obtained via executing CPUID with EAX=10H
,ECX=3.

Setup and initialize the MBA specific extensions to data structures like
global list of RDT resources, RDT resource structure and RDT domain
structure.

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/intel_rdt.h | 92 +++++++++++++++---------
arch/x86/kernel/cpu/intel_rdt.c | 151 ++++++++++++++++++++++++++++++++++++---
2 files changed, 199 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 4c94f18..097134b 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,6 +12,7 @@
#define IA32_L3_QOS_CFG 0xc81
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
+#define IA32_MBA_THRTL_BASE 0xd50

#define L3_QOS_CDP_ENABLE 0x01ULL

@@ -69,39 +70,6 @@ struct rftype {
};

/**
- * struct rdt_resource - attributes of an RDT resource
- * @enabled: Is this feature enabled on this machine
- * @capable: Is this feature available on this machine
- * @name: Name to use in "schemata" file
- * @num_closid: Number of CLOSIDs available
- * @default_ctrl: Specifies default cache cbm or mem b/w percent.
- * @data_width: Character width of data when displaying
- * @min_cbm_bits: Minimum number of consecutive bits to be set
- * in a cache bit mask
- * @domains: All domains for this resource
- * @msr_base: Base MSR address for CBMs
- * @cache_level: Which cache level defines scope of this domain
- * @cbm_idx_multi: Multiplier of CBM index
- * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
- * closid * cbm_idx_multi + cbm_idx_offset
- */
-struct rdt_resource {
- bool enabled;
- bool capable;
- char *name;
- int num_closid;
- int cbm_len;
- int min_cbm_bits;
- u32 default_ctrl;
- int data_width;
- struct list_head domains;
- int msr_base;
- int cache_level;
- int cbm_idx_multi;
- int cbm_idx_offset;
-};
-
-/**
* struct rdt_domain - group of cpus sharing an RDT resource
* @list: all instances of this resource
* @id: unique id for this instance
@@ -131,6 +99,63 @@ struct msr_param {
int high;
};

+/**
+ * struct rdt_resource - attributes of an RDT resource
+ * @enabled: Is this feature enabled on this machine
+ * @capable: Is this feature available on this machine
+ * @name: Name to use in "schemata" file
+ * @num_closid: Number of CLOSIDs available
+ * @default_ctrl: Specifies default cache cbm or mem b/w percent.
+ * @data_width: Character width of data when displaying
+ * @msr_update: Function pointer to update QOS MSRs
+ * @domains: All domains for this resource
+ * @msr_base: Base MSR address for CBMs
+ * @cache_level: Which cache level defines scope of this domain
+ * @cbm_idx_multi: Multiplier of CBM index
+ * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
+ * closid * cbm_idx_multi + cbm_idx_offset
+ * @cbm_len Number of cbm bits
+ * @min_cbm_bits: Minimum number of consecutive bits to be set
+ * in a cache bit mask
+ * @max_delay: Max throttle delay. Delay is the hardware
+ * understandable value for memory bandwidth.
+ * @min_bw: Minimum memory bandwidth percentage user
+ * can request
+ * @bw_gran: Granularity at which the memory bandwidth
+ * is allocated
+ * @delay_linear: True if memory b/w delay is in linear scale
+ * @mb_map: Mapping of memory b/w percentage to
+ * memory b/w delay values
+ */
+struct rdt_resource {
+ bool enabled;
+ bool capable;
+ char *name;
+ int num_closid;
+ u32 default_ctrl;
+ int data_width;
+ void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
+ struct list_head domains;
+ int msr_base;
+ int cache_level;
+ int cbm_idx_multi;
+ int cbm_idx_offset;
+ union {
+ struct { /*cache ctrls*/
+ int cbm_len;
+ int min_cbm_bits;
+ };
+ struct { /*mem ctrls*/
+ u32 max_delay;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ u32 *mb_map;
+ };
+ };
+};
+
extern struct mutex rdtgroup_mutex;

extern struct rdt_resource rdt_resources_all[];
@@ -144,6 +169,7 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,

/* Must be the last */
RDT_NUM_RESOURCES,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index c4cf2e8..b9f1cfc 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -32,6 +32,9 @@
#include <asm/intel-family.h>
#include <asm/intel_rdt.h>

+#define MAX_MBA_BW 100u
+#define MBA_IS_LINEAR 0x4
+
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);

@@ -45,11 +48,17 @@
*/
int max_name_width, max_data_width;

+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+
struct rdt_resource rdt_resources_all[] = {
{
.name = "L3",
.domains = domain_init(RDT_RESOURCE_L3),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 1,
@@ -59,6 +68,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3DATA",
.domains = domain_init(RDT_RESOURCE_L3DATA),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -68,6 +78,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3CODE",
.domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -77,11 +88,21 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L2",
.domains = domain_init(RDT_RESOURCE_L2),
.msr_base = IA32_L2_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 2,
.cbm_idx_multi = 1,
.cbm_idx_offset = 0
},
+ {
+ .name = "MB",
+ .domains = domain_init(RDT_RESOURCE_MBA),
+ .msr_base = IA32_MBA_THRTL_BASE,
+ .msr_update = mba_wrmsr,
+ .cache_level = 3,
+ .cbm_idx_multi = 1,
+ .cbm_idx_offset = 0
+ },
};

static int cbm_idx(struct rdt_resource *r, int closid)
@@ -136,6 +157,53 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}

+/*
+ * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
+ * exposed to user interface and the h/w understandable delay values.
+ *
+ * The non-linear delay values have the granularity of power of two
+ * and also the h/w does not guarantee a curve for configured delay
+ * values vs. actual b/w enforced.
+ * Hence we need a mapping that is pre calibrated so the user can
+ * express the memory b/w as a percentage value.
+ */
+static inline bool rdt_get_mb_table(struct rdt_resource *r)
+{
+ /*
+ * There are no Intel SKUs as of now to support non-linear delay.
+ */
+ pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+ return false;
+}
+
+static bool rdt_get_mem_config(struct rdt_resource *r)
+{
+ union cpuid_0x10_3_eax eax;
+ union cpuid_0x10_x_edx edx;
+ u32 ebx, ecx;
+
+ cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
+ r->num_closid = edx.split.cos_max + 1;
+ r->max_delay = eax.split.max_delay + 1;
+ r->default_ctrl = MAX_MBA_BW;
+ if (ecx & MBA_IS_LINEAR) {
+ r->delay_linear = true;
+ r->min_bw = MAX_MBA_BW - r->max_delay;
+ r->bw_gran = MAX_MBA_BW - r->max_delay;
+ } else {
+ if (!rdt_get_mb_table(r))
+ return false;
+ }
+ r->data_width = 3;
+
+ r->capable = true;
+ r->enabled = true;
+
+ return true;
+}
+
static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
@@ -212,7 +280,8 @@ static inline bool get_rdt_resources(void)
ret = true;
}

- if (boot_cpu_has(X86_FEATURE_MBA))
+ if (boot_cpu_has(X86_FEATURE_MBA) &&
+ rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
ret = true;

rdt_init_padding();
@@ -233,6 +302,47 @@ static int get_cache_id(int cpu, int level)
return -1;
}

+/*
+ * Map the memory b/w percentage value to delay values
+ * that can be written to QOS_MSRs.
+ * There are currently no SKUs which support non linear delay values.
+ */
+static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+{
+ if (r->delay_linear)
+ return MAX_MBA_BW - bw;
+
+ WARN_ONCE(1, "Non Linear delay-bw map not supported but queried\n");
+ return r->default_ctrl;
+}
+
+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ int i;
+
+ for (i = m->low; i < m->high; i++) {
+ int idx = cbm_idx(r, i);
+
+ /*
+ * Write the delay value for mba.
+ */
+ wrmsrl(r->msr_base + idx, delay_bw_map(d->ctrl_val[i], r));
+ }
+}
+
+static void
+cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ int i;
+
+ for (i = m->low; i < m->high; i++) {
+ int idx = cbm_idx(r, i);
+
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
+ }
+}
+
void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
@@ -291,6 +401,33 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
return NULL;
}

+static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
+{
+ struct msr_param m;
+ u32 *dc;
+ int i;
+
+ dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ if (!dc)
+ return -ENOMEM;
+
+ d->ctrl_val = dc;
+
+ /*
+ * Initialize the Control MSRs to having no control.
+ * For Cache Allocation: Set all bits in cbm
+ * For Memory Allocation: Set b/w requested to 100
+ */
+ for (i = 0; i < r->num_closid; i++, dc++)
+ *dc = r->default_ctrl;
+
+ m.low = 0;
+ m.high = r->num_closid;
+ r->msr_update(d, &m, r);
+
+ return 0;
+}
+
/*
* domain_add_cpu - Add a cpu to a resource's domain list.
*
@@ -306,7 +443,7 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
*/
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
- int i, id = get_cache_id(cpu, r->cache_level);
+ int id = get_cache_id(cpu, r->cache_level);
struct list_head *add_pos = NULL;
struct rdt_domain *d;

@@ -327,19 +464,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)

d->id = id;

- d->ctrl_val = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
- if (!d->ctrl_val) {
+ if (domain_setup_ctrlval(r, d)) {
kfree(d);
return;
}

- for (i = 0; i < r->num_closid; i++) {
- int idx = cbm_idx(r, i);
-
- d->ctrl_val[i] = r->default_ctrl;
- wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
- }
-
cpumask_set_cpu(cpu, &d->cpu_mask);
list_add_tail(&d->list, add_pos);
}
--
1.9.1