[tip:perfcounters/core] perf_counter: Allow for a wakeup watermark

From: tip-bot for Peter Zijlstra
Date: Thu Sep 17 2009 - 14:08:22 EST


Commit-ID: 0c733e0676f27fa325f6ba42f4035db8523abc90
Gitweb: http://git.kernel.org/tip/0c733e0676f27fa325f6ba42f4035db8523abc90
Author: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
AuthorDate: Thu, 17 Sep 2009 19:01:10 +0200
Committer: Ingo Molnar <mingo@xxxxxxx>
CommitDate: Thu, 17 Sep 2009 20:02:46 +0200

perf_counter: Allow for a wakeup watermark

Currently we wake the mmap() consumer once every PAGE_SIZE of data
and/or once event wakeup_events when specified.

For high speed sampling this results in too many wakeups wrt. the
buffer size, hence change this.

We move the default wakeup limit to 1/4-th the buffer size, and
provide for means to manually specify this limit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@xxxxxxx>


---
include/linux/perf_counter.h | 10 ++++++++--
kernel/perf_counter.c | 32 +++++++++++++++++++-------------
2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 972f90d..6c1ef72 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -199,10 +199,14 @@ struct perf_counter_attr {
inherit_stat : 1, /* per task counts */
enable_on_exec : 1, /* next exec enables */
task : 1, /* trace fork/exit */
+ watermark : 1, /* wakeup_watermark */

- __reserved_1 : 50;
+ __reserved_1 : 49;

- __u32 wakeup_events; /* wakeup every n events */
+ union {
+ __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_watermark; /* bytes before wakeup */
+ };
__u32 __reserved_2;

__u64 __reserved_3;
@@ -521,6 +525,8 @@ struct perf_mmap_data {
atomic_t wakeup; /* needs a wakeup */
atomic_t lost; /* nr records lost */

+ long watermark; /* wakeup watermark */
+
struct perf_counter_mmap_page *user_page;
void *data_pages[0];
};
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fe0d1ad..29b73b6 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2176,6 +2176,13 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
data->nr_pages = nr_pages;
atomic_set(&data->lock, -1);

+ if (counter->attr.watermark) {
+ data->watermark = min_t(long, PAGE_SIZE * nr_pages,
+ counter->attr.wakeup_watermark);
+ }
+ if (!data->watermark)
+ data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
+
rcu_assign_pointer(counter->data, data);

return 0;
@@ -2517,23 +2524,15 @@ struct perf_output_handle {
unsigned long flags;
};

-static bool perf_output_space(struct perf_mmap_data *data,
- unsigned int offset, unsigned int head)
+static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+ unsigned long offset, unsigned long head)
{
- unsigned long tail;
unsigned long mask;

if (!data->writable)
return true;

mask = (data->nr_pages << PAGE_SHIFT) - 1;
- /*
- * Userspace could choose to issue a mb() before updating the tail
- * pointer. So that all reads will be completed before the write is
- * issued.
- */
- tail = ACCESS_ONCE(data->user_page->data_tail);
- smp_rmb();

offset = (offset - tail) & mask;
head = (head - tail) & mask;
@@ -2679,7 +2678,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
{
struct perf_counter *output_counter;
struct perf_mmap_data *data;
- unsigned int offset, head;
+ unsigned long tail, offset, head;
int have_lost;
struct {
struct perf_event_header header;
@@ -2717,16 +2716,23 @@ static int perf_output_begin(struct perf_output_handle *handle,
perf_output_lock(handle);

do {
+ /*
+ * Userspace could choose to issue a mb() before updating the
+ * tail pointer. So that all reads will be completed before the
+ * write is issued.
+ */
+ tail = ACCESS_ONCE(data->user_page->data_tail);
+ smp_rmb();
offset = head = atomic_long_read(&data->head);
head += size;
- if (unlikely(!perf_output_space(data, offset, head)))
+ if (unlikely(!perf_output_space(data, tail, offset, head)))
goto fail;
} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);

handle->offset = offset;
handle->head = head;

- if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
+ if (head - tail > data->watermark)
atomic_set(&data->wakeup, 1);

if (have_lost) {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/