Re: [PATCH v5 10/18] x86/intel_rdt: Build structures for each resource based on cache topology

From: Fenghua Yu
Date: Wed Oct 26 2016 - 17:15:26 EST


On Wed, Oct 26, 2016 at 03:02:56PM +0200, Thomas Gleixner wrote:
> On Sat, 22 Oct 2016, Fenghua Yu wrote:
> > +void rdt_cbm_update(void *arg)
> > +{
> > + struct msr_param *m = (struct msr_param *)arg;
> > + struct rdt_resource *r = m->res;
> > + int i, cpu = smp_processor_id();
> > + struct rdt_domain *d;
> > +
> > + list_for_each_entry(d, &r->domains, list) {
>
> > +static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> > + struct list_head **pos)
> > +{
> > + struct rdt_domain *d;
> > + struct list_head *l;
> > +
> > + if (id < 0)
> > + return ERR_PTR(id);
> > +
> > + list_for_each(l, &r->domains) {
> > + d = list_entry(l, struct rdt_domain, list);
>
> So above you converted to list_for_each_entry(). Is there a sensible
> reason, aside of being sloppy, why is this still using list_for_each()?

We use list_for_each() because we want to get the list_head "l". The l
is used to find the position that the new domain will be inserted.

The same function rdt_find_domain() takes care of two similar tasks (find
matched domain or find a position in the domain list to insert a new domain).
Maybe not good for two tasks to share the same function?

>
> > + /* When id is found, return its domain. */
> > + if (id == d->id)
> > + return d;
> > + /* Stop searching when finding id's position in sorted list. */
>
> What is the reason that this needs to be in a sorted list?
>
> I haven't found one so far. And if there is none, then this can use a hlist.
>
> > + if (id < d->id)
> > + break;
> > + }
> > + /*
> > + * No id is found in resource domains. Record the position
> > + * that the new domain will be added. The posistion is not used
> > + * when removing a domain.
>
> This comment makes no sense. If you want to document that a caller does not
> require the @pos argument, then you really should make it optional and do
>
> if (pos)
> *pos = l;
>
> But before doing that blindly, you want to explain why sorting is required
> at all.
>
> > + */
> > + *pos = l;
> > +
> > + return NULL;
> > +}
> > +
> > +static void domain_add_cpu(int cpu, struct rdt_resource *r)
> > +{
> > + int i, id = get_cache_id(cpu, r->cache_level);
> > + struct list_head *add_pos = NULL;
> > + struct rdt_domain *d;
> > +
> > + d = rdt_find_domain(r, id, &add_pos);
> > + if (IS_ERR(d)) {
> > + pr_warn("Could't find cache id for cpu %d\n", cpu);
> > + return;
> > + }
> > +
> > + if (d) {
> > + cpumask_set_cpu(cpu, &d->cpu_mask);
> > + return;
> > + }
> > +
> > + if (!add_pos) {
> > + pr_warn("Couldn't add cpu %d in %s domain\n", cpu, r->name);
>
> Errm, how can add_pos ever be NULL if you get here? Not at all AFAICT.
>
> > + return;
> > + }
> > +
> > + d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
> > + if (!d)
> > + return;
> > +
> > + d->id = id;
>
> Please move this after the allocation. This random code ordering just makes
> reading hard as one expects that d->id is a prerequisite for the
> allocation.
>
> > + d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
> > + if (!d->cbm) {
> > + pr_warn("Failed to alloc CBM array for cpu %d\n", cpu);
> > + kfree(d);
> > + return;
> > + }
>
> New line please. Visually seperating logical code blocks enhances
> readability.
>
> > + for (i = 0; i < r->num_closid; i++) {
>
> Thanks,
>
> tglx

The following patch #10 is supposed to fix issues you pointed out above.

Is it good now?

---
arch/x86/include/asm/intel_rdt.h | 35 ++++++++
arch/x86/kernel/cpu/intel_rdt.c | 189 +++++++++++++++++++++++++++++++++++++++
2 files changed, 224 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 9780409..c0d0a6e 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -39,6 +39,34 @@ struct rdt_resource {
int cbm_idx_offset;
};

+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list: all instances of this resource
+ * @id: unique id for this instance
+ * @cpu_mask: which cpus share this resource
+ * @cbm: array of cache bit masks (indexed by CLOSID)
+ */
+struct rdt_domain {
+ struct list_head list;
+ int id;
+ struct cpumask cpu_mask;
+ u32 *cbm;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res: The resource to use
+ * @low: Beginning index from base MSR
+ * @high: End index
+ */
+struct msr_param {
+ struct rdt_resource *res;
+ int low;
+ int high;
+};
+
+extern struct mutex rdtgroup_mutex;
+
extern struct rdt_resource rdt_resources_all[];

enum {
@@ -56,6 +84,11 @@ enum {
r++) \
if (r->capable)

+#define for_each_enabled_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->enabled)
+
/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
union cpuid_0x10_1_eax {
struct {
@@ -71,4 +104,6 @@ union cpuid_0x10_1_edx {
} split;
unsigned int full;
};
+
+void rdt_cbm_update(void *arg);
#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 29308e1..23f1740 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -26,11 +26,16 @@

#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpuhotplug.h>

#include <asm/intel_rdt_common.h>
#include <asm/intel-family.h>
#include <asm/intel_rdt.h>

+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)

struct rdt_resource rdt_resources_all[] = {
@@ -72,6 +77,11 @@ struct rdt_resource rdt_resources_all[] = {
},
};

+static int cbm_idx(struct rdt_resource *r, int closid)
+{
+ return closid * r->cbm_idx_multi + r->cbm_idx_offset;
+}
+
/*
* cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
* as they do not have CPUID enumeration support for Cache allocation.
@@ -176,14 +186,193 @@ static inline bool get_rdt_resources(void)
return ret;
}

+static int get_cache_id(int cpu, int level)
+{
+ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
+ int i;
+
+ for (i = 0; i < ci->num_leaves; i++) {
+ if (ci->info_list[i].level == level)
+ return ci->info_list[i].id;
+ }
+
+ return -1;
+}
+
+void rdt_cbm_update(void *arg)
+{
+ struct msr_param *m = (struct msr_param *)arg;
+ struct rdt_resource *r = m->res;
+ int i, cpu = smp_processor_id();
+ struct rdt_domain *d;
+
+ list_for_each_entry(d, &r->domains, list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->cpu_mask))
+ goto found;
+ }
+ pr_info_once("cpu %d not found in any domain for resource %s\n",
+ cpu, r->name);
+
+ return;
+
+found:
+ for (i = m->low; i < m->high; i++) {
+ int idx = cbm_idx(r, i);
+
+ wrmsrl(r->msr_base + idx, d->cbm[i]);
+ }
+}
+
+/*
+ * rdt_find_domain - Find a domain in a resource that matches input resource id
+ *
+ * Search a resource r's domain list to find the resource id. If the resource
+ * id is found in a domain, return the domain. Otherwise, if requested by
+ * caller, return the first domain whose id is bigger than the input id.
+ * The domain list is sorted by id in ascending order.
+ */
+static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+ struct list_head **pos)
+{
+ struct rdt_domain *d;
+ struct list_head *l;
+
+ if (id < 0)
+ return ERR_PTR(id);
+
+ list_for_each(l, &r->domains) {
+ d = list_entry(l, struct rdt_domain, list);
+ /* When id is found, return its domain. */
+ if (id == d->id)
+ return d;
+ /* Stop searching when finding id's position in sorted list. */
+ if (id < d->id)
+ break;
+ }
+
+ if (pos)
+ *pos = l;
+
+ return NULL;
+}
+
+/*
+ * domain_add_cpu - Add a cpu to a resource's domain list.
+ *
+ * If an existing domain in the resource r's domain list matches the cpu's
+ * resource id, add the cpu in the domain.
+ *
+ * Otherwise, a new domain is allocated and inserted into right position
+ * in the domain list sorted by id in ascending order.
+ *
+ * The order in the domain list is visible to users when we print entries
+ * in the schemata file and schemata input is validated to have the same order
+ * as this list.
+ */
+static void domain_add_cpu(int cpu, struct rdt_resource *r)
+{
+ int i, id = get_cache_id(cpu, r->cache_level);
+ struct list_head *add_pos = NULL;
+ struct rdt_domain *d;
+
+ d = rdt_find_domain(r, id, &add_pos);
+ if (IS_ERR(d)) {
+ pr_warn("Could't find cache id for cpu %d\n", cpu);
+ return;
+ }
+
+ if (d) {
+ cpumask_set_cpu(cpu, &d->cpu_mask);
+ return;
+ }
+
+ d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
+ if (!d)
+ return;
+
+ d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
+ if (!d->cbm) {
+ pr_warn("Failed to alloc CBM array for cpu %d\n", cpu);
+ kfree(d);
+ return;
+ }
+
+ for (i = 0; i < r->num_closid; i++) {
+ int idx = cbm_idx(r, i);
+
+ d->cbm[i] = r->max_cbm;
+ wrmsrl(r->msr_base + idx, d->cbm[i]);
+ }
+
+ d->id = id;
+ cpumask_set_cpu(cpu, &d->cpu_mask);
+ list_add_tail(&d->list, add_pos);
+ r->num_domains++;
+}
+
+static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+{
+ int id = get_cache_id(cpu, r->cache_level);
+ struct rdt_domain *d;
+
+ d = rdt_find_domain(r, id, NULL);
+ if (IS_ERR_OR_NULL(d)) {
+ pr_warn("Could't find cache id for cpu %d\n", cpu);
+ return;
+ }
+
+ cpumask_clear_cpu(cpu, &d->cpu_mask);
+ if (cpumask_empty(&d->cpu_mask)) {
+ r->num_domains--;
+ kfree(d->cbm);
+ list_del(&d->list);
+ kfree(d);
+ }
+}
+
+static int intel_rdt_online_cpu(unsigned int cpu)
+{
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ struct rdt_resource *r;
+
+ mutex_lock(&rdtgroup_mutex);
+ for_each_capable_rdt_resource(r)
+ domain_add_cpu(cpu, r);
+ state->closid = 0;
+ wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
+ mutex_unlock(&rdtgroup_mutex);
+
+ return 0;
+}
+
+static int intel_rdt_offline_cpu(unsigned int cpu)
+{
+ struct rdt_resource *r;
+
+ mutex_lock(&rdtgroup_mutex);
+ for_each_capable_rdt_resource(r)
+ domain_remove_cpu(cpu, r);
+ mutex_unlock(&rdtgroup_mutex);
+
+ return 0;
+}
+
static int __init intel_rdt_late_init(void)
{
bool first_resource = true;
struct rdt_resource *r;
+ int state;

if (!get_rdt_resources())
return -ENODEV;

+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "x86/rdt/cat:online:",
+ intel_rdt_online_cpu, intel_rdt_offline_cpu);
+ if (state < 0)
+ return state;
+
pr_info("Intel RDT allocation detected: ");
for_each_capable_rdt_resource(r) {
if (!first_resource)
--
2.5.0