Re: [RFC] perf/x86/intel: Account interrupts for PEBS errors

From: Peter Zijlstra
Date: Wed Dec 14 2016 - 15:09:02 EST


On Wed, Dec 14, 2016 at 07:16:36PM +0100, Jiri Olsa wrote:

> > > +++ b/arch/x86/events/intel/ds.c
> > > @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> > > continue;
> > >
> > > /* log dropped samples number */
> > > - if (error[bit])
> > > + if (error[bit]) {
> > > perf_log_lost_samples(event, error[bit]);
> > >
> > > + if (perf_event_account_interrupt(event, 1))
> >
> > Seems a bit daft to expose the .throttle argument, since that would be
> > the only point of calling this.
>
> there's also the other caller from __perf_event_overflow

See the below patchlet ;-)

> > > +static int __perf_event_overflow(struct perf_event *event,
> > > + int throttle, struct perf_sample_data *data,
> > > + struct pt_regs *regs)
> > > +{
> > > + int events = atomic_read(&event->event_limit);
> > > + struct hw_perf_event *hwc = &event->hw;
> > > + int ret = 0;
> > > +
> > > + /*
> > > + * Non-sampling counters might still use the PMI to fold short
> > > + * hardware counters, ignore those.
> > > + */
> > > + if (unlikely(!is_sampling_event(event)))
> > > + return 0;
> > > +
> > > + ret = perf_event_account_interrupt(event, throttle);
> > > +
> > > if (event->attr.freq) {
> > > u64 now = perf_clock();
> > > s64 delta = now - hwc->freq_time_stamp;
> >
> > Arguably, everything in __perf_event_overflow() except for calling of
> > ->overflow_handler() should be done I think.
>
> well, I was wondering about that period adjustment bit
>
> but I wasn't sure about those pending_kill/pending_wakeup bits,
> they make sense to me only if we have some data to deliver

Hmm, maybe. Please add a comment, that way we can at least rediscover we
thought about this.


--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1392,7 +1392,7 @@ static void intel_pmu_drain_pebs_nhm(str
if (error[bit]) {
perf_log_lost_samples(event, error[bit]);

- if (perf_event_account_interrupt(event, 1))
+ if (perf_event_account_interrupt(event))
x86_pmu_stop(event, 0);
}

--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,7 +1259,7 @@ extern void perf_event_disable(struct pe
extern void perf_event_disable_local(struct perf_event *event);
extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void);
-extern int perf_event_account_interrupt(struct perf_event *event, int throttle);
+extern int perf_event_account_interrupt(struct perf_event *event);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7034,7 +7034,7 @@ static void perf_log_itrace_start(struct
perf_output_end(&handle);
}

-int perf_event_account_interrupt(struct perf_event *event, int throttle)
+static int __perf_event_account_interrupt(struct perf_event *event, int throttle)
{
struct hw_perf_event *hwc = &event->hw;
int ret = 0;
@@ -7059,6 +7059,11 @@ int perf_event_account_interrupt(struct
return ret;
}

+int perf_event_account_interrupt(struct perf_event *event)
+{
+ return __perf_event_account_interrupt(event, 1);
+}
+
/*
* Generic event overflow handling, sampling.
*/
@@ -7078,7 +7083,7 @@ static int __perf_event_overflow(struct
if (unlikely(!is_sampling_event(event)))
return 0;

- ret = perf_event_account_interrupt(event, throttle);
+ ret = __perf_event_account_interrupt(event, throttle);

if (event->attr.freq) {
u64 now = perf_clock();