[PATCH v2 7/8] ARCv2: perf: SMP support

From: Alexey Brodkin
Date: Wed Aug 05 2015 - 11:15:32 EST

Next message: Alexey Brodkin: "[PATCH v2 4/8] ARCv2: perf: Support sampling events using overflow interrupts"
Previous message: Uladzislau Rezki: "[PATCH] sched: check pinned tasks before nohz balance, linux-4.2-rc5"
In reply to: Alexey Brodkin: "[PATCH v2 8/8] ARCv2: perf: Finally introduce HS perf unit"
Next in thread: Alexey Brodkin: "[PATCH v2 4/8] ARCv2: perf: Support sampling events using overflow interrupts"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

* split off pmu info into singleton and per-cpu bits
* setup PMU on all cores

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Signed-off-by: Alexey Brodkin <abrodkin@xxxxxxxxxxxx>
---

Compared to v1:
[1] Rebase on top of previos patches hence changes in patch itself
[2] Cosmetics

arch/arc/kernel/perf_event.c | 71 ++++++++++++++++++++++++++++++++++----------
1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 3203141..008fa58 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -21,13 +21,25 @@

struct arc_pmu {
struct pmu pmu;
+ unsigned int irq;
int n_counters;
int n_events;
- unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
u64 max_period;
int ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
+ u64 raw_events[ARC_PERF_MAX_EVENTS];
+};
+
+struct arc_pmu_cpu {
+ /*
+ * A 1 bit for an index indicates that the counter is being used for
+ * an event. A 0 means that the counter can be used.
+ */
+ unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
+
+ /*
+ * The events that are active on the PMU for the given index.
+ */
struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS];
- u64 raw_events[ARC_PERF_MAX_EVENTS];
};

struct arc_callchain_trace {
@@ -69,6 +81,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
}

static struct arc_pmu *arc_pmu;
+static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);

/* read counter #idx; note that counter# != event# on ARC! */
static uint64_t arc_pmu_read_counter(int idx)
@@ -323,10 +336,12 @@ static void arc_pmu_stop(struct perf_event *event, int flags)

static void arc_pmu_del(struct perf_event *event, int flags)
{
+ struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+
arc_pmu_stop(event, PERF_EF_UPDATE);
- __clear_bit(event->hw.idx, arc_pmu->used_mask);
+ __clear_bit(event->hw.idx, pmu_cpu->used_mask);

- arc_pmu->act_counter[event->hw.idx] = 0;
+ pmu_cpu->act_counter[event->hw.idx] = 0;

perf_event_update_userpage(event);
}
@@ -334,22 +349,23 @@ static void arc_pmu_del(struct perf_event *event, int flags)
/* allocate hardware counter and optionally start counting */
static int arc_pmu_add(struct perf_event *event, int flags)
{
+ struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;

- if (__test_and_set_bit(idx, arc_pmu->used_mask)) {
- idx = find_first_zero_bit(arc_pmu->used_mask,
+ if (__test_and_set_bit(idx, pmu_cpu->used_mask)) {
+ idx = find_first_zero_bit(pmu_cpu->used_mask,
arc_pmu->n_counters);
if (idx == arc_pmu->n_counters)
return -EAGAIN;

- __set_bit(idx, arc_pmu->used_mask);
+ __set_bit(idx, pmu_cpu->used_mask);
hwc->idx = idx;
}

write_aux_reg(ARC_REG_PCT_INDEX, idx);

- arc_pmu->act_counter[idx] = event;
+ pmu_cpu->act_counter[idx] = event;

if (is_sampling_event(event)) {
/* Mimic full counter overflow as other arches do */
@@ -380,7 +396,7 @@ static int arc_pmu_add(struct perf_event *event, int flags)
static irqreturn_t arc_pmu_intr(int irq, void *dev)
{
struct perf_sample_data data;
- struct arc_pmu *arc_pmu = (struct arc_pmu *)dev;
+ struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
struct pt_regs *regs;
int active_ints;
int idx;
@@ -392,7 +408,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
regs = get_irq_regs();

for (idx = 0; idx < arc_pmu->n_counters; idx++) {
- struct perf_event *event = arc_pmu->act_counter[idx];
+ struct perf_event *event = pmu_cpu->act_counter[idx];
struct hw_perf_event *hwc;

if (!(active_ints & (1 << idx)))
@@ -435,6 +451,17 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)

#endif /* CONFIG_ISA_ARCV2 */

+void arc_cpu_pmu_irq_init(void)
+{
+ struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+
+ arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr,
+ "ARC perf counters", pmu_cpu);
+
+ /* Clear all pending interrupt flags */
+ write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
+}
+
static int arc_pmu_device_probe(struct platform_device *pdev)
{
struct arc_reg_pct_build pct_bcr;
@@ -543,18 +570,30 @@ static int arc_pmu_device_probe(struct platform_device *pdev)

if (has_interrupts) {
int irq = platform_get_irq(pdev, 0);
+ unsigned long flags;

if (irq < 0) {
pr_err("Cannot get IRQ number for the platform\n");
return -ENODEV;
}

- ret = devm_request_irq(&pdev->dev, irq, arc_pmu_intr, 0,
- "arc-pmu", arc_pmu);
- if (ret) {
- pr_err("could not allocate PMU IRQ\n");
- return ret;
- }
+ arc_pmu->irq = irq;
+
+ /*
+ * arc_cpu_pmu_irq_init() needs to be called on all cores for
+ * their respective local PMU.
+ * However we use opencoded on_each_cpu() to ensure it is called
+ * on core0 first, so that arc_request_percpu_irq() sets up
+ * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable
+ * perf IRQ on non master cores.
+ * see arc_request_percpu_irq()
+ */
+ preempt_disable();
+ local_irq_save(flags);
+ arc_cpu_pmu_irq_init();
+ local_irq_restore(flags);
+ smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1);
+ preempt_enable();

/* Clean all pending interrupt flags */
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
--
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Alexey Brodkin: "[PATCH v2 4/8] ARCv2: perf: Support sampling events using overflow interrupts"
Previous message: Uladzislau Rezki: "[PATCH] sched: check pinned tasks before nohz balance, linux-4.2-rc5"
In reply to: Alexey Brodkin: "[PATCH v2 8/8] ARCv2: perf: Finally introduce HS perf unit"
Next in thread: Alexey Brodkin: "[PATCH v2 4/8] ARCv2: perf: Support sampling events using overflow interrupts"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]