Re: [PATCH v2 11/32] perf/x86/intel/cqm: add per-package RMID rotation

From: Thomas Gleixner
Date: Wed May 18 2016 - 17:39:17 EST


On Wed, 11 May 2016, David Carrillo-Cisneros wrote:
> @@ -216,6 +216,9 @@ static int pkg_data_init_cpu(int cpu)
> INIT_LIST_HEAD(&pkg_data->istate_pmonrs_lru);
> INIT_LIST_HEAD(&pkg_data->ilstate_pmonrs_lru);
>
> + pkg_data->nr_instate_pmonrs = 0;
> + pkg_data->nr_ilstate_pmonrs = 0;

with kzalloc you can avoid all this 0 initialization mess.

> mutex_init(&pkg_data->pkg_data_mutex);
> raw_spin_lock_init(&pkg_data->pkg_data_lock);
>
> @@ -276,6 +279,10 @@ static struct pmonr *pmonr_alloc(int cpu)
> pmonr->monr = NULL;
> INIT_LIST_HEAD(&pmonr->rotation_entry);
>
> + pmonr->last_enter_istate = 0;
> + pmonr->last_enter_astate = 0;
> + pmonr->nr_enter_istate = 0;

Ditto.

> +/*
> + * Transition from (IL)state to (A)state.
> + */
> +static inline void
> +__pmonr__ilstate_to_astate(struct pmonr *pmonr)

That nicely fits in a single line

> +{
> + struct prmid *prmid;
> +
> + lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
> + WARN_ON_ONCE(!pmonr->limbo_prmid);

I'm giving up on that.

> + prmid = pmonr->limbo_prmid;
> + pmonr->limbo_prmid = NULL;
> + list_del_init(&pmonr->limbo_rotation_entry);
> +
> + __pkg_data(pmonr, nr_ilstate_pmonrs)--;
> + __pkg_data(pmonr, nr_instate_pmonrs)++;

Just once more before stop reading.

struct pkg_data *pkd = pmonr->pkg_data;

Will get rid of all this __pkg_data() nonsense.

In this function the compiler will be able to figure out that its always the
same thing on its own. But there is enough code where it will simply
reevaluate.

> +static inline void
> +__pmonr__ilstate_to_instate(struct pmonr *pmonr)
> +{
> + lockdep_assert_held(&__pkg_data(pmonr, pkg_data_lock));
> +
> + list_move_tail(&pmonr->limbo_prmid->pool_entry,
> + &__pkg_data(pmonr, free_prmids_pool));
> + pmonr->limbo_prmid = NULL;
> +
> + __pkg_data(pmonr, nr_ilstate_pmonrs)--;
> + __pkg_data(pmonr, nr_instate_pmonrs)++;
> +
> + list_del_init(&pmonr->limbo_rotation_entry);
> + __pmonr__set_istate_summary(pmonr);
> +}
> +
> +/* Count all limbo prmids, including the ones still attached to pmonrs.
> + * Maximum number of prmids is fixed by hw and generally small.
> + */
> +static int count_limbo_prmids(struct pkg_data *pkg_data)
> +{
> + unsigned int c = 0;
> + struct prmid *prmid;
> +
> + lockdep_assert_held(&pkg_data->pkg_data_mutex);
> +
> + list_for_each_entry(
> + prmid, &pkg_data->pmonr_limbo_prmids_pool, pool_entry) {
> + c++;
> + }
> + list_for_each_entry(
> + prmid, &pkg_data->nopmonr_limbo_prmids_pool, pool_entry) {
> + c++;
> + }

And why can't you track the number of queued entries at list_add/del time and spare these loops?

> + return c;
> +}
> +
> static int intel_cqm_setup_pkg_prmid_pools(u16 pkg_id)
> {
> int r;
> @@ -858,6 +937,586 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
> return false;
> }
>
> +/*
> + * Try to reuse limbo prmid's for pmonrs at the front of ilstate_pmonrs_lru.
> + */
> +static int __try_reuse_ilstate_pmonrs(struct pkg_data *pkg_data)
> +{
> + int reused = 0;
> + struct pmonr *pmonr;
> +
> + lockdep_assert_held(&pkg_data->pkg_data_mutex);
> + lockdep_assert_held(&pkg_data->pkg_data_lock);
> +
> + while ((pmonr = list_first_entry_or_null(
> + &pkg_data->istate_pmonrs_lru, struct pmonr, rotation_entry))) {
> +
> + if (__pmonr__in_instate(pmonr))
> + break;

That really deserves a comment that pmonr is removed from the rotation
list. It's non obvious that the function below will do this.

> + __pmonr__ilstate_to_astate(pmonr);
> + reused++;
> + }
> + return reused;
> +}
> +
> +static int try_reuse_ilstate_pmonrs(struct pkg_data *pkg_data)
> +{
> + int reused;
> + unsigned long flags;
> +#ifdef CONFIG_LOCKDEP
> + u16 pkg_id = topology_physical_package_id(smp_processor_id());
> +#endif
> +
> + lockdep_assert_held(&pkg_data->pkg_data_mutex);
> +
> + raw_spin_lock_irqsave_nested(&pkg_data->pkg_data_lock, flags, pkg_id);

Why do you need nested here? You are not nesting pkd->lock at all.

> + reused = __try_reuse_ilstate_pmonrs(pkg_data);
> + raw_spin_unlock_irqrestore(&pkg_data->pkg_data_lock, flags);
> + return reused;
> +}
> +
> +
> +/*
> + * A monr is only readable when all it's used pmonrs have a RMID.
> + * Therefore, the time a monr entered (A)state is the maximum of the
> + * last_enter_astate times for all (A)state pmonrs if no pmonr is in (I)state.
> + * A monr with any pmonr in (I)state has no entered (A)state.
> + * Returns monr_enter_astate time if available, otherwise min_inh_pkg is
> + * set to the smallest pkg_id where the monr's pmnor is in (I)state and
> + * the return value is undefined.
> + */
> +static unsigned long
> +__monr__last_enter_astate(struct monr *monr, int *min_inh_pkg)
> +{
> + struct pkg_data *pkg_data;
> + u16 pkg_id;
> + unsigned long flags, astate_time = 0;
> +
> + *min_inh_pkg = -1;
> + cqm_pkg_id_for_each_online(pkg_id) {
> + struct pmonr *pmonr;
> +
> + if (min_inh_pkg >= 0)
> + break;
> +
> + raw_spin_lock_irqsave_nested(
> + &pkg_data->pkg_data_lock, flags, pkg_id);

Ditto.

> +
> + pmonr = monr->pmonrs[pkg_id];
> + if (__pmonr__in_istate(pmonr) && min_inh_pkg < 0)
> + *min_inh_pkg = pkg_id;
> + else if (__pmonr__in_astate(pmonr) &&
> + astate_time < pmonr->last_enter_astate)
> + astate_time = pmonr->last_enter_astate;
> +
> + raw_spin_unlock_irqrestore(&pkg_data->pkg_data_lock, flags);
> + }
> + return astate_time;
> +}

> +/* It will remove the prmid from the list its attached, if used. */
> +static inline int __try_use_free_prmid(struct pkg_data *pkg_data,
> + struct prmid *prmid, bool *succeed)
> +{
> + struct pmonr *pmonr;
> + int nr_activated = 0;
> +
> + lockdep_assert_held(&pkg_data->pkg_data_mutex);
> + lockdep_assert_held(&pkg_data->pkg_data_lock);
> +
> + *succeed = false;
> + nr_activated += __try_reuse_ilstate_pmonrs(pkg_data);
> + pmonr = list_first_entry_or_null(&pkg_data->istate_pmonrs_lru,
> + struct pmonr, rotation_entry);
> + if (!pmonr)
> + return nr_activated;
> + WARN_ON_ONCE(__pmonr__in_ilstate(pmonr));
> + WARN_ON_ONCE(!__pmonr__in_instate(pmonr));
> +
> + /* the state transition function will move the prmid to
> + * the active lru list.
> + */
> + __pmonr__instate_to_astate(pmonr, prmid);
> + nr_activated++;
> + *succeed = true;
> + return nr_activated;

I really give up on this now. I have no idea what that code has to do with the
comment above the function and I really can't figure out what this succeed
flag is for.

And to be honest, I'm too tired to figure this out now, but I'm also tired of
looking at this maze in general. I think I gave you enough hints for now and I
won't look at the rest of this before the next iteration comes along.

Here is a summary of observations which I like to be addressed:

- I like the general idea of splitting this apart in bits and pieces. But
some of these splits are just mechanical or by throwing a dice. Please make
this understandable

- Don't do variable definitions in header files

- Do not forward declare inlines or static function in headers.

- Add proper error handling and cleanups right from the very beginning

- Make this code modular

- Rethink the naming conventions

- Rethink the state tracking

- Avoid these gazillion of macros/inlines which just obfuscate the code

- Reduce the number of asserts and WARN_ONs to a useful set.

- Handle unexpected conditions gracefully instead of emitting a useles
warning followed by an oops. If there is no way to handle it gracefully get
rid of the WARN_ONs as they are pointless.

- Use kzalloc instead of zeroing/NULLing each newly added struct member.

- Rethink the use of your unions. If they make sense, explain why.

- Use consistent formatting for comments and functions, proper kerneldoc and
use a consistent ordering of your local variables.

- Avoid pointless long struct member names which mostly provide redundant
information

- Use intermediate variables instead of annoying line breaks.

- Cleanup the lock_nested() abuse.

- Please provide locking rules so there is a single place which explains
which data is protected by which lock and explain the nesting rules.

- Rethink the lock nesting. More than 8 packages in a system are reality
today.

- Please provide a proper overview how this stuff works. Your changelogs talk
about improvements over the previous code and focus on implementation
details, but the big picture is missing completely, unless I failed to find
it.

Thanks,

tglx