Re: Possible deadlock errors in tools/perf/builtin-sched.c

From: 叶澄锋
Date: Sat Aug 28 2021 - 04:06:50 EST


Dear developers:

Thank you for your checking.

It seems there are two deadlock errors on the
locksched->work_done_wait_mutexandsched->start_work_mutex.

They are triggered due to one thread(A) runs function run_one_test
locating in a loop and unreleasing the two locks in
thewait_for_tasksfunction, and another thread(B) runs function
thread_func acquiring the two locks.

Because the two locks are not properly released in thread A, there
will be a deadlock problem if thread B acquires the two locks.

The related codes are below:

Thread A:

static void create_tasks(struct perf_sched *sched)
{
...;
err = pthread_mutex_lock(&sched->start_work_mutex);
...;
err = pthread_mutex_lock(&sched->work_done_wait_mutex);
...;
}

static int perf_sched__replay(struct perf_sched *sched)
{
...;
create_tasks(sched);
printf("------------------------------------------------------------\n");
for (i = 0; i < sched->replay_repeat; i++)
run_one_test(sched); // multiple reacquisition on the lock
sched->work_done_wait_mutex and sched->start_work_mutex

return 0;
}

static void run_one_test(struct perf_sched *sched)
{
...;
wait_for_tasks(sched);
...;
}

static void wait_for_tasks(struct perf_sched *sched)
{
...;
pthread_mutex_unlock(&sched->work_done_wait_mutex);
...;
ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
...;
pthread_mutex_unlock(&sched->start_work_mutex);
...;

ret = pthread_mutex_lock(&sched->start_work_mutex);
....;
}

Thread B:
static void *thread_func(void *ctx)
{
...;
ret = pthread_mutex_lock(&sched->start_work_mutex);
...;
ret = pthread_mutex_unlock(&sched->start_work_mutex);

...;

ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
...;
ret = pthread_mutex_unlock(&sched->work_done_wait_mutex);
..;

}

PS: The previous email fails to reach the maillist, so I send it again.

Thanks,

叶澄锋 <dg573847474@xxxxxxxxx> 于2021年8月28日周六 下午3:57写道:
>
> Dear developers:
>
> Thank you for your checking.
>
> It seems there are two deadlock errors on the locksched->work_done_wait_mutexandsched->start_work_mutex.
>
> They are triggered due to one thread(A) runs function run_one_test locating in a loop and unreleasing the two locks in thewait_for_tasksfunction, and another thread(B) runs function thread_func acquiring the two locks.
>
> Because the two locks are not properly released in thread A, there will be a deadlock problem if thread B acquires the two locks.
>
> The related codes are below:
>
> Thread A:
>
> static void create_tasks(struct perf_sched *sched)
> {
> ...;
> err = pthread_mutex_lock(&sched->start_work_mutex);
> ...;
> err = pthread_mutex_lock(&sched->work_done_wait_mutex);
> ...;
> }
> static int perf_sched__replay(struct perf_sched *sched)
> {
> ...;
>
> create_tasks(sched);
> printf("------------------------------------------------------------\n");
> for (i = 0; i < sched->replay_repeat; i++)
> run_one_test(sched); // multiple reacquisition on the lock sched->work_done_wait_mutex and sched->start_work_mutex
>
> return 0;
> }
>
> static void run_one_test(struct perf_sched *sched)
> {
> ...;
> wait_for_tasks(sched);
> ...;
> }
> static void wait_for_tasks(struct perf_sched *sched)
> {
> ...;
> pthread_mutex_unlock(&sched->work_done_wait_mutex);
>
> ...;
> ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
> ...;
> pthread_mutex_unlock(&sched->start_work_mutex);
>
> ...;
>
> ret = pthread_mutex_lock(&sched->start_work_mutex);
> ....;
> }
>
> Thread B:
>
> static void *thread_func(void *ctx)
> {
>
> ...;
> ret = pthread_mutex_lock(&sched->start_work_mutex);
> ...;
> ret = pthread_mutex_unlock(&sched->start_work_mutex);
>
> ...;
>
> ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
> ...;
> ret = pthread_mutex_unlock(&sched->work_done_wait_mutex);
> ..;
>
> }
>
>
> Thanks,