[PATCH 3/4] add tracepoints in rwsem
From: TÃrÃk Edwin
Date: Sun Oct 12 2008 - 09:12:42 EST
This allows a tracing framework, such as ftrace to measure the latency
during lock contention, and (unlike latencytop) allows to show whom to blame for holding
the lock for too long.
The tracepoints consist of 3 events
- a down_read, or down_write failed - a tracer can use this as a point of reference
- semaphore is granted after a failed down_read or down_write - usable to measure latency
- one writer, or all readers are woken - a tracer can see who was/is holding the lock,
and blame it for the latencies of all reader/writer tasks on the waiter list
Signed-off-by: TÃrÃk Edwin <edwintorok@xxxxxxxxx>
---
include/trace/rwsem.h | 35 +++++++++++++++++++++++++++++++++++
lib/rwsem-spinlock.c | 16 ++++++++--------
lib/rwsem.c | 13 +++++--------
3 files changed, 48 insertions(+), 16 deletions(-)
create mode 100644 include/trace/rwsem.h
diff --git a/include/trace/rwsem.h b/include/trace/rwsem.h
new file mode 100644
index 0000000..8afb773
--- /dev/null
+++ b/include/trace/rwsem.h
@@ -0,0 +1,35 @@
+#ifndef _TRACE_RWSEM_H
+#define _TRACE_RWSEM_H
+
+#include <linux/tracepoint.h>
+
+struct rwsem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ unsigned int flags;
+#define RWSEM_WAITING_FOR_READ 0x00000001
+#define RWSEM_WAITING_FOR_WRITE 0x00000002
+};
+
+/* some waiters from the waiter list are woken,
+ * some remain sleeping */
+DEFINE_TRACE(rwsem_wake,
+ TPPROTO(const struct rw_semaphore *sem,
+ const struct rwsem_waiter *waiter),
+ TPARGS(sem, waiter));
+
+/* a down_read() failed because a writer already has the lock,
+ * or a down_write() failed because another reader or writer
+ * already has the lock.
+ * tsk indicates this task, and waiter->list contains
+ * all other tasks that wait for this lock */
+DEFINE_TRACE(rwsem_lock_failed,
+ TPPROTO(const struct rw_semaphore *sem),
+ TPARGS(sem));
+
+/* the tsk was granted the lock, after previously failing to
+ * acquire the lock due to contention */
+DEFINE_TRACE(rwsem_lock_ok,
+ TPPROTO(const struct task_struct *tsk),
+ TPARGS(tsk));
+#endif
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 9df3ca5..9e32ce4 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -8,14 +8,7 @@
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/module.h>
-
-struct rwsem_waiter {
- struct list_head list;
- struct task_struct *task;
- unsigned int flags;
-#define RWSEM_WAITING_FOR_READ 0x00000001
-#define RWSEM_WAITING_FOR_WRITE 0x00000002
-};
+#include <trace/rwsem.h>
/*
* initialise the semaphore
@@ -69,6 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
list_del(&waiter->list);
tsk = waiter->task;
/* Don't touch waiter after ->task has been NULLed */
+ trace_rwsem_wake(sem, waiter);
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
@@ -78,6 +72,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
/* grant an infinite number of read locks to the front of the queue */
dont_wake_writers:
+ trace_rwsem_wake(sem, waiter);
woken = 0;
while (waiter->flags & RWSEM_WAITING_FOR_READ) {
struct list_head *next = waiter->list.next;
@@ -115,6 +110,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
list_del(&waiter->list);
tsk = waiter->task;
+ trace_rwsem_wake(sem, waiter);
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
@@ -152,6 +148,7 @@ void __sched __down_read(struct rw_semaphore *sem)
/* we don't need to touch the semaphore struct anymore */
spin_unlock_irq(&sem->wait_lock);
+ trace_rwsem_lock_failed(sem);
/* wait to be given the lock */
for (;;) {
if (!waiter.task)
@@ -160,6 +157,7 @@ void __sched __down_read(struct rw_semaphore *sem)
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
+ trace_rwsem_lock_ok(tsk);
tsk->state = TASK_RUNNING;
out:
;
@@ -218,6 +216,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
/* we don't need to touch the semaphore struct anymore */
spin_unlock_irq(&sem->wait_lock);
+ trace_rwsem_lock_failed(sem);
/* wait to be given the lock */
for (;;) {
if (!waiter.task)
@@ -226,6 +225,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
+ trace_rwsem_lock_ok(tsk);
tsk->state = TASK_RUNNING;
out:
;
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e..a9c5571 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <trace/rwsem.h>
/*
* Initialize an rwsem:
@@ -28,14 +29,6 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
EXPORT_SYMBOL(__init_rwsem);
-struct rwsem_waiter {
- struct list_head list;
- struct task_struct *task;
- unsigned int flags;
-#define RWSEM_WAITING_FOR_READ 0x00000001
-#define RWSEM_WAITING_FOR_WRITE 0x00000002
-};
-
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here from up_xxxx(), then:
@@ -81,6 +74,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
*/
list_del(&waiter->list);
tsk = waiter->task;
+ trace_rwsem_wake(sem, waiter);
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
@@ -99,6 +93,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
* readers before waking any processes up
*/
readers_only:
+ trace_rwsem_wake(sem, waiter);
woken = 0;
do {
woken++;
@@ -171,6 +166,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
spin_unlock_irq(&sem->wait_lock);
+ trace_rwsem_lock_failed(sem);
/* wait to be given the lock */
for (;;) {
if (!waiter->task)
@@ -179,6 +175,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
+ trace_rwsem_lock_ok(tsk);
tsk->state = TASK_RUNNING;
return sem;
--
1.5.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/