Re: [PATCH 4/4] perf/powerpc: Implement group_read() txn interface for 24x7 counters

From: Sukadev Bhattiprolu
Date: Thu Mar 26 2015 - 01:58:11 EST


Peter Zijlstra [peterz@xxxxxxxxxxxxx] wrote:
|
| Is there a down-side to always doing the txn based group read? If an
| arch does not implement the read txn support it'll fall back to doing
| independent read ops, but we end up doing those anyway.
|
| That way we get less special case code.

We could, but would need to move the perf_event_read() earlier in
the perf_event_read_group(). Can we do something like this (it
could be broken into two patches, but merging for easier review)
Would something liks this work?

----

perf_event_read_value() is mostly computing event count, enabled
and running times. Move the perf_event_read() into caller and
rename perf_event_read_value() to perf_event_compute_values().

Then, in perf_event_read_group(), read the event counts using the
transaction interface for all PMUs.


----
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8e6b7d8..5896cb1 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -144,9 +144,11 @@ static u64 read_pmc(struct kvm_pmc *pmc)

counter = pmc->counter;

- if (pmc->perf_event)
- counter += perf_event_read_value(pmc->perf_event,
+ if (pmc->perf_event) {
+ perf_event_read(pmc->perf_event);
+ counter += perf_event_compute_values(pmc->perf_event,
&enabled, &running);
+ }

/* FIXME: Scaling needed? */

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8fe60e..1e30560 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -579,7 +579,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
void *context);
extern void perf_pmu_migrate_context(struct pmu *pmu,
int src_cpu, int dst_cpu);
-extern u64 perf_event_read_value(struct perf_event *event,
+extern u64 perf_event_compute_values(struct perf_event *event,
u64 *enabled, u64 *running);


diff --git a/kernel/events/core.c b/kernel/events/core.c
index a6abcd3..f7e4705 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3643,7 +3643,27 @@ static void orphans_remove_work(struct work_struct *work)
put_ctx(ctx);
}

-u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+static int perf_event_read_values(struct perf_event *leader)
+{
+ int ret;
+ struct perf_event *sub;
+ struct pmu *pmu;
+
+ pmu = leader->pmu;
+
+ pmu->start_txn(pmu, PERF_PMU_TXN_READ);
+
+ pmu->read(leader);
+ list_for_each_entry(sub, &leader->sibling_list, group_entry)
+ pmu->read(sub);
+
+ ret = pmu->commit_txn(pmu, PERF_PMU_TXN_READ);
+
+ return ret;
+}
+
+u64 perf_event_compute_values(struct perf_event *event, u64 *enabled,
+ u64 *running)
{
struct perf_event *child;
u64 total = 0;
@@ -3653,7 +3673,6 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)

mutex_lock(&event->child_mutex);

- perf_event_read(event);
total += perf_event_count(event);

*enabled += event->total_time_enabled +
@@ -3671,7 +3690,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)

return total;
}
-EXPORT_SYMBOL_GPL(perf_event_read_value);
+EXPORT_SYMBOL_GPL(perf_event_compute_values);

static int perf_event_read_group(struct perf_event *event,
u64 read_format, char __user *buf)
@@ -3684,7 +3703,11 @@ static int perf_event_read_group(struct perf_event *event,

lockdep_assert_held(&ctx->mutex);

- count = perf_event_read_value(leader, &enabled, &running);
+ ret = perf_event_read_values(leader);
+ if (ret)
+ return ret;
+
+ count = perf_event_compute_values(leader, &enabled, &running);

values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -3705,7 +3728,7 @@ static int perf_event_read_group(struct perf_event *event,
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
n = 0;

- values[n++] = perf_event_read_value(sub, &enabled, &running);
+ values[n++] = perf_event_compute_values(sub, &enabled, &running);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);

@@ -3728,7 +3751,8 @@ static int perf_event_read_one(struct perf_event *event,
u64 values[4];
int n = 0;

- values[n++] = perf_event_read_value(event, &enabled, &running);
+ perf_event_read(event);
+ values[n++] = perf_event_compute_values(event, &enabled, &running);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = enabled;
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/