[RFC PATCH 2/3] latencytop: split latency_lock to global lock and per task lock

From: Feng Tang
Date: Mon Apr 29 2019 - 04:03:41 EST


Currently there is one global "latency_lock" to cover both the
global and per-task latency data updating. Splitting it into one
global lock and per-task one will improve lock's granularity and
reduce the contention.

Cc: Arjan van de Ven <arjan@xxxxxxxxxxxxxxx>
Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx>
---
include/linux/sched.h | 1 +
init/init_task.c | 3 +++
kernel/fork.c | 4 ++++
kernel/latencytop.c | 9 +++++----
4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9b43c9..84cf13c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1095,6 +1095,7 @@ struct task_struct {
unsigned long dirty_paused_when;

#ifdef CONFIG_LATENCYTOP
+ raw_spinlock_t latency_lock;
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
diff --git a/init/init_task.c b/init/init_task.c
index 5aebe3b..f7cc0fb 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -158,6 +158,9 @@ struct task_struct init_task
.numa_group = NULL,
.numa_faults = NULL,
#endif
+#ifdef CONFIG_LATENCYTOP
+ .latency_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.latency_lock),
+#endif
#ifdef CONFIG_KASAN
.kasan_depth = 1,
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index b69248e..2109468 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1963,6 +1963,10 @@ static __latent_entropy struct task_struct *copy_process(
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
#endif
+
+#ifdef CONFIG_LATENCYTOP
+ raw_spin_lock_init(&p->latency_lock);
+#endif
clear_all_latency_tracing(p);

/* ok, now we should be set up.. */
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 96b4179..6d7a174 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -74,10 +74,10 @@ void clear_all_latency_tracing(struct task_struct *p)
if (!latencytop_enabled)
return;

- raw_spin_lock_irqsave(&latency_lock, flags);
+ raw_spin_lock_irqsave(&p->latency_lock, flags);
memset(&p->latency_record, 0, sizeof(p->latency_record));
p->latency_record_count = 0;
- raw_spin_unlock_irqrestore(&latency_lock, flags);
+ raw_spin_unlock_irqrestore(&p->latency_lock, flags);
}

static void clear_global_latency_tracing(void)
@@ -194,9 +194,10 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
store_stacktrace(tsk, &lat);

raw_spin_lock_irqsave(&latency_lock, flags);
-
account_global_scheduler_latency(tsk, &lat);
+ raw_spin_unlock_irqrestore(&latency_lock, flags);

+ raw_spin_lock_irqsave(&tsk->latency_lock, flags);
for (i = 0; i < tsk->latency_record_count; i++) {
struct latency_record *mylat;
int same = 1;
@@ -234,7 +235,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));

out_unlock:
- raw_spin_unlock_irqrestore(&latency_lock, flags);
+ raw_spin_unlock_irqrestore(&tsk->latency_lock, flags);
}

static int lstats_show(struct seq_file *m, void *v)
--
2.7.4