Re: [PATCH v7 08/14] fs,x86,mpam/resctrl: Handle change in number of RMIDs on each mount
From: Reinette Chatre
Date: Mon Jun 08 2026 - 19:21:26 EST
Hi Tony,
On 6/1/26 12:56 PM, Tony Luck wrote:
> Application Energy Telemetry (AET) event enumeration takes place
> asynchronously. Linux builds the pmt_telemetry module into the kernel to
> kick of enumeration early enough that it completes before first mount of
> the resctrl file system.
>
> Allowing pmt_telemetry to be a loadable module means that it is possible
> for different numbers of RMIDs to be supported on each mount, depending
> on whether pmt_telemetry module is loaded.
>
> Add resctrl_arch_system_max_rmid_idx() interface to provide the maximum
> supported number of RMIDs on a system. For x86 this is RDT_RESOURCE_L3
> rdt_resource::mon.num_rmid (if L3 monitoring is enabled).
>
> Allocate the rmid_ptrs, rdt_l3_mon_domain::rmid_busy_llc, and
> rdt_l3_mon_domain::mbm_states based on the maximum possible.
This is a significant change to data structures expected to be
indexed by RMID. Could you please update kernel-doc of these members to reflect
the new usage?
>
> Initialize rmid_free_lru based on the number of RMIDs available for
> this mount.
>
Above three paragraps writes verbatim what can be seen in the patch self.
Could you please expand the changelog to help reader understand why these
changes are made?
This patch is quite difficult to decipher when one only has the code to consider.
> Note that some RMIDs may still be marked busy from a previous mount.
> Don't add these to the free list. Check current RMID limit in
> limbo_release_entry() and do not add out of range RMIDs to the
> free list.
>
> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
...
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 1af8f965fdd0..934492c7e643 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -129,6 +129,18 @@ u32 resctrl_arch_system_num_rmid_idx(void)
> return num_rmids == U32_MAX ? 0 : num_rmids;
> }
>
> +/**
> + * resctrl_arch_system_max_rmid_idx - Largest possible number of RMIDs
> + *
> + * Return: If L3 monitoring is supported, largest possible comes from L3.
> + */
> +u32 resctrl_arch_system_max_rmid_idx(void)
> +{
> + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> +
> + return r->mon_capable ? r->mon.num_rmid : resctrl_arch_system_num_rmid_idx();
> +}
> +
> struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
> {
> if (l >= RDT_NUM_RESOURCES)
> diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
> index 226ff6f532fa..7079870ca894 100644
> --- a/drivers/resctrl/mpam_resctrl.c
> +++ b/drivers/resctrl/mpam_resctrl.c
> @@ -272,6 +272,11 @@ u32 resctrl_arch_system_num_rmid_idx(void)
> return (mpam_pmg_max + 1) * (mpam_partid_max + 1);
> }
>
> +u32 resctrl_arch_system_max_rmid_idx(void)
> +{
> + return resctrl_arch_system_num_rmid_idx();
> +}
> +
> u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
> {
> return closid * (mpam_pmg_max + 1) + rmid;
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index 327e7a863614..b374e2f84a75 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -115,10 +115,17 @@ static inline struct rmid_entry *__rmid_entry(u32 idx)
>
> static void limbo_release_entry(struct rmid_entry *entry)
> {
> + u32 idx_limit = resctrl_arch_system_num_rmid_idx();
Having the code switch idx_limit to sometimes be resctrl_arch_system_num_rmid_idx()
and other times resctrl_arch_system_max_rmid_idx() makes this change difficult to
follow. I think it will help to use different variable names to differentiate
the context in which it is being used and not leave reader trying to understand
why there are *two* limits.
> lockdep_assert_held(&rdtgroup_mutex);
>
> rmid_limbo_count--;
> - list_add_tail(&entry->list, &rmid_free_lru);
> +
> + /*
> + * Limbo may be freeing an RMID from a previous mount where there
> + * were more RMIDs available.
> + */
> + if (resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid) < idx_limit)
> + list_add_tail(&entry->list, &rmid_free_lru);
>
> if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
> closid_num_dirty_rmid[entry->closid]--;
> @@ -133,7 +140,7 @@ static void limbo_release_entry(struct rmid_entry *entry)
> void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
> {
> struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
> - u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> + u32 idx_limit = resctrl_arch_system_max_rmid_idx();
> struct rmid_entry *entry;
> u32 idx, cur_idx = 1;
> void *arch_mon_ctx;
> @@ -192,7 +199,7 @@ void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
>
> bool has_busy_rmid(struct rdt_l3_mon_domain *d)
> {
> - u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> + u32 idx_limit = resctrl_arch_system_max_rmid_idx();
>
> return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
> }
> @@ -916,24 +923,32 @@ int setup_rmid_lru_list(void)
> return 0;
>
> /*
> - * Called on every mount, but the number of RMIDs cannot change
> - * after the first mount, so keep using the same set of rmid_ptrs[]
> - * until resctrl_exit(). Note that the limbo handler continues to
> - * access rmid_ptrs[] after resctrl is unmounted.
> + * Allocate the largest number of RMIDs that this system will ever
> + * need. These cannot be freed until resctrl_exit() because the limbo
> + * handler continues to access rmid_ptrs[] after resctrl is unmounted.
> */
> - if (rmid_ptrs)
> - return 0;
> + if (!rmid_ptrs) {
> + idx_limit = resctrl_arch_system_max_rmid_idx();
> + rmid_ptrs = kzalloc_objs(struct rmid_entry, idx_limit);
> + if (!rmid_ptrs)
> + return -ENOMEM;
> +
> + for (i = 0; i < idx_limit; i++) {
> + entry = &rmid_ptrs[i];
> + INIT_LIST_HEAD(&entry->list);
> +
> + resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
> + }
> + }
>
> + /* Find how many RMIDs are needed for this mount */
> idx_limit = resctrl_arch_system_num_rmid_idx();
> - rmid_ptrs = kzalloc_objs(struct rmid_entry, idx_limit);
> - if (!rmid_ptrs)
> - return -ENOMEM;
>
> + INIT_LIST_HEAD(&rmid_free_lru);
> for (i = 0; i < idx_limit; i++) {
> entry = &rmid_ptrs[i];
> - INIT_LIST_HEAD(&entry->list);
> -
> - resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
> + if (i && entry->busy)
> + continue;
> list_add_tail(&entry->list, &rmid_free_lru);
> }
>
> @@ -1156,7 +1171,7 @@ static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *
> */
> static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
> {
> - u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> + u32 idx_limit = resctrl_arch_system_max_rmid_idx();
> enum resctrl_event_id evt;
> int idx;
>
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index d2a1f88d8782..6d647b71c5db 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -4416,7 +4416,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *h
> */
> static int domain_setup_l3_mon_state(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
> {
> - u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> + u32 idx_limit = resctrl_arch_system_max_rmid_idx();
> size_t tsize = sizeof(*d->mbm_states[0]);
> enum resctrl_event_id eventid;
> int idx;
Reinette