Re: [PATCH v2 07/23] x86/resctrl: Add domain offline callback for resctrl work

From: Babu Moger
Date: Tue Oct 19 2021 - 19:20:08 EST




On 10/1/21 11:02 AM, James Morse wrote:
> Because domains are exposed to user-space via resctrl, the filesystem
> must update its state when CPU hotplug callbacks are triggered.
>
> Some of this work is common to any architecture that would support
> resctrl, but the work is tied up with the architecture code to
> free the memory.
>
> Move the monitor subdir removal and the cancelling of the mbm/limbo
> works into a new resctrl_offline_domain() call. These bits are not
> specific to the architecture. Grouping them in one function allows
> that code to be moved to /fs/ and re-used by another architecture.
>
> Signed-off-by: James Morse <james.morse@xxxxxxx>
> ---
> Changes since v1:
> * Removed a redundant mon_capable check
> * Capitalisation
> * Removed inline comment
> * Added to the commit message
> ---
> arch/x86/kernel/cpu/resctrl/core.c | 26 ++---------------
> arch/x86/kernel/cpu/resctrl/internal.h | 2 --
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 39 +++++++++++++++++++++++---
> include/linux/resctrl.h | 1 +
> 4 files changed, 38 insertions(+), 30 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 7a2c24c5652c..1dd8428df008 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -523,27 +523,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
>
> cpumask_clear_cpu(cpu, &d->cpu_mask);
> if (cpumask_empty(&d->cpu_mask)) {
> - /*
> - * If resctrl is mounted, remove all the
> - * per domain monitor data directories.
> - */
> - if (static_branch_unlikely(&rdt_mon_enable_key))
> - rmdir_mondata_subdir_allrdtgrp(r, d->id);
> + resctrl_offline_domain(r, d);
> list_del(&d->list);
> - if (r->mon_capable && is_mbm_enabled())
> - cancel_delayed_work(&d->mbm_over);
> - if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
> - /*
> - * When a package is going down, forcefully
> - * decrement rmid->ebusy. There is no way to know
> - * that the L3 was flushed and hence may lead to
> - * incorrect counts in rare scenarios, but leaving
> - * the RMID as busy creates RMID leaks if the
> - * package never comes back.
> - */
> - __check_limbo(d, true);
> - cancel_delayed_work(&d->cqm_limbo);
> - }
>
> /*
> * rdt_domain "d" is going to be freed below, so clear
> @@ -551,11 +532,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
> */
> if (d->plr)
> d->plr->d = NULL;
> -
> - bitmap_free(d->rmid_busy_llc);
> - kfree(d->mbm_total);
> - kfree(d->mbm_local);
> domain_free(hw_dom);
> +
> return;
> }
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index be48a682dbdb..e12b55f815bf 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -522,8 +522,6 @@ void free_rmid(u32 rmid);
> int rdt_get_mon_l3_config(struct rdt_resource *r);
> void mon_event_count(void *info);
> int rdtgroup_mondata_show(struct seq_file *m, void *arg);
> -void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> - unsigned int dom_id);
> void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> struct rdt_domain *d, struct rdtgroup *rdtgrp,
> int evtid, int first);
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 19691f9ab061..38670bb810cb 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -2499,14 +2499,12 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
> * Remove all subdirectories of mon_data of ctrl_mon groups
> * and monitor groups with given domain id.
> */
> -void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
> +static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> + unsigned int dom_id)
> {
> struct rdtgroup *prgrp, *crgrp;
> char name[32];
>
> - if (!r->mon_capable)
> - return;
> -
> list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
> sprintf(name, "mon_%s_%02d", r->name, dom_id);
> kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
> @@ -3233,6 +3231,39 @@ static int __init rdtgroup_setup_root(void)
> return ret;
> }
>
> +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
> +{
> + lockdep_assert_held(&rdtgroup_mutex);

Is this really required?

> +
> + if (!r->mon_capable)
> + return;

I don't see the need for this check either.

Thanks
Babu