Re: [BUG] scheduler doesn't balance thread to idle cpu for 3 seconds
From: Jan Stancek
Date: Thu Jan 28 2016 - 10:55:15 EST
On 01/27/2016 03:52 PM, Jan Stancek wrote:
> Hello,
>
> pthread_cond_wait_1/2 [1] is rarely failing for me on 4.5.0-rc1,
> on x86_64 KVM guest with 2 CPUs.
>
> This test [1]:
> - spawns 2 SCHED_RR threads
> - first thread with higher priority sets alarm for 2 seconds and blocks on condition
> - second thread with lower priority is busy looping for 5 seconds
> - after 2 seconds alarm signal arrives and handler signals condition
> - high priority thread should resume running
I have slightly modified testcase, so it will finish immediately when high prio
thread is done. And also to allow it to compile outside of openposix testsuite.
Testcase is attached. I'm running it in following way:
gcc -O2 -pthread pthread_cond_wait_1.c
while [ True ]; do
time ./a.out
sleep 1
done
for couple thousand iterations. About half of those are
on system booted with init=/bin/bash.
>
> But rarely I see that high priority thread doesn't resume running until
> low priority thread completes its 5 second busy loop.
>
> Looking at traces (short version attached, long version at [2]),
> I see that after 2 seconds scheduler tries to wake up main thread, but it
> appears to do that on same CPU where SCHED_RR low prio thread is running,
> so nothing happens. Then scheduler makes numerous balance attempts,
> but main thread is not balanced to idle CPU.
>
> My guess is this started with following commit, which changed weighted_cpuload():
> commit b92486cbf2aa230d00f160664858495c81d2b37b
> Author: Alex Shi <alex.shi@xxxxxxxxx>
> Date: Thu Jun 20 10:18:50 2013 +0800
> sched: Compute runnable load avg in cpu_load and cpu_avg_load_per_task
Here are some numbers gathered from kernels with HEAD at b92486c and
previous commit 83dfd52. System is 2 CPU KVM guest.
Each iteration measures how long it took for testcase to finish.
Ideally it should take about 2 seconds.
1. HEAD at 83dfd52 sched: Update cpu load after task_tick
finish time [s] | iterations
----------------------------------
[ 2, 2.2] | 3134
[ 2.2, 2.5] | 18
[ 2.5, 3] | 0
[ 3, 4] | 0
[ 4, 5] | 0
[ 5, 999] | 0
2. HEAD at b92486c sched: Compute runnable load avg in cpu_load and cpu_avg_load_per_task
finish time [s] | iterations
----------------------------------
[ 2, 2.2] | 1617
[ 2.2, 2.5] | 38
[ 2.5, 3] | 727
[ 3, 4] | 399
[ 4, 5] | 17
[ 5, 999] | 11
Regards,
Jan
>
> I could reproduce it with HEAD set at above commit, I couldn't reproduce it
> with 3.10 kernel so far.
>
> Regards,
> Jan
>
> [1] https://github.com/linux-test-project/ltp/blob/master/testcases/open_posix_testsuite/functional/threads/condvar/pthread_cond_wait_1.c
> [2] http://jan.stancek.eu/tmp/pthread_cond_wait_failure/sched-trace1.tar.bz2
>
/*
* Copyright (c) 2004, QUALCOMM Inc. All rights reserved.
* Created by: abisain REMOVE-THIS AT qualcomm DOT com
* This file is licensed under the GPL license. For the full content
* of this license, see the COPYING file at the top level of this
* source tree.
* Test that pthread_cond_signal()
* shall wakeup a high priority thread even when a low priority thread
* is running
* Steps:
* 1. Create a condition variable
* 2. Create a high priority thread and make it wait on the cond
* 3. Create a low priority thread and let it busy-loop
* 4. Signal the cond in a signal handler and check that high
* priority thread got woken up
*
*/
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <time.h>
#define TEST "5-1"
#define AREA "scheduler"
#define ERROR_PREFIX "unexpected error: " AREA " " TEST ": "
#define HIGH_PRIORITY 10
#define LOW_PRIORITY 5
#define RUNTIME 5
#define POLICY SCHED_RR
#define PTS_PASS 0
#define PTS_FAIL 1
#define PTS_UNRESOLVED 2
/* mutex required by the cond variable */
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
/* condition variable that threads block on*/
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
/* Flags that the threads use to indicate events */
volatile int woken_up = 0;
volatile int low_done = 0;
/* Signal handler that handle the ALRM and wakes up
* the high priority thread
*/
void signal_handler(int sig)
{
(void) sig;
if (pthread_cond_signal(&cond) != 0) {
printf(ERROR_PREFIX "pthread_cond_signal\n");
exit(PTS_UNRESOLVED);
}
}
/* Utility function to find difference between two time values */
float timediff(struct timespec t2, struct timespec t1)
{
float diff = t2.tv_sec - t1.tv_sec;
diff += (t2.tv_nsec - t1.tv_nsec) / 1000000000.0;
return diff;
}
void *hi_priority_thread(void *tmp)
{
struct sched_param param;
int policy;
int rc = 0;
(void) tmp;
param.sched_priority = HIGH_PRIORITY;
rc = pthread_setschedparam(pthread_self(), POLICY, ¶m);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_setschedparam\n");
exit(PTS_UNRESOLVED);
}
rc = pthread_getschedparam(pthread_self(), &policy, ¶m);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_getschedparam\n");
exit(PTS_UNRESOLVED);
}
if ((policy != POLICY) || (param.sched_priority != HIGH_PRIORITY)) {
printf("Error: the policy or priority not correct\n");
exit(PTS_UNRESOLVED);
}
/* Install a signal handler for ALRM */
if (signal(SIGALRM, signal_handler) != 0) {
perror(ERROR_PREFIX "signal:");
exit(PTS_UNRESOLVED);
}
/* acquire the mutex */
rc = pthread_mutex_lock(&mutex);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_mutex_lock\n");
exit(PTS_UNRESOLVED);
}
/* Setup an alarm to go off in 2 seconds */
alarm(2);
/* Block, to be woken up by the signal handler */
rc = pthread_cond_wait(&cond, &mutex);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_cond_wait\n");
exit(PTS_UNRESOLVED);
}
/* This variable is unprotected because the scheduling removes
* the contention
*/
if (low_done != 1)
woken_up = 1;
rc = pthread_mutex_unlock(&mutex);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_mutex_unlock\n");
exit(PTS_UNRESOLVED);
}
return NULL;
}
void *low_priority_thread(void *tmp)
{
struct timespec start_time, current_time;
struct sched_param param;
int policy;
int rc = 0;
(void) tmp;
param.sched_priority = LOW_PRIORITY;
rc = pthread_setschedparam(pthread_self(), POLICY, ¶m);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_setschedparam\n");
exit(PTS_UNRESOLVED);
}
rc = pthread_getschedparam(pthread_self(), &policy, ¶m);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_getschedparam\n");
exit(PTS_UNRESOLVED);
}
if ((policy != POLICY) || (param.sched_priority != LOW_PRIORITY)) {
printf("Error: the policy or priority not correct\n");
exit(PTS_UNRESOLVED);
}
/* grab the start time and busy loop for 5 seconds */
clock_gettime(CLOCK_REALTIME, &start_time);
while (1 && !woken_up) {
clock_gettime(CLOCK_REALTIME, ¤t_time);
if (timediff(current_time, start_time) > RUNTIME)
break;
}
low_done = 1;
return NULL;
}
int main()
{
pthread_t high_id, low_id;
pthread_attr_t high_attr, low_attr;
struct sched_param param;
int rc = 0;
/* Create the higher priority thread */
rc = pthread_attr_init(&high_attr);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_attr_init\n");
exit(PTS_UNRESOLVED);
}
rc = pthread_attr_setschedpolicy(&high_attr, POLICY);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_attr_setschedpolicy\n");
exit(PTS_UNRESOLVED);
}
param.sched_priority = HIGH_PRIORITY;
rc = pthread_attr_setschedparam(&high_attr, ¶m);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_attr_setschedparam\n");
exit(PTS_UNRESOLVED);
}
rc = pthread_create(&high_id, &high_attr, hi_priority_thread, NULL);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_create\n");
exit(PTS_UNRESOLVED);
}
/* Create the low priority thread */
rc = pthread_attr_init(&low_attr);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_attr_init\n");
exit(PTS_UNRESOLVED);
}
rc = pthread_attr_setschedpolicy(&low_attr, POLICY);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_attr_setschedpolicy\n");
exit(PTS_UNRESOLVED);
}
param.sched_priority = LOW_PRIORITY;
rc = pthread_attr_setschedparam(&low_attr, ¶m);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_attr_setschedparam\n");
exit(PTS_UNRESOLVED);
}
rc = pthread_create(&low_id, &low_attr, low_priority_thread, NULL);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_create\n");
exit(PTS_UNRESOLVED);
}
/* Wait for the threads to exit */
rc = pthread_join(high_id, NULL);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_join\n");
exit(PTS_UNRESOLVED);
}
rc = pthread_join(low_id, NULL);
if (rc != 0) {
printf(ERROR_PREFIX "pthread_join\n");
exit(PTS_UNRESOLVED);
}
/* Check the result */
if (woken_up == 0) {
printf("Test FAILED: high priority was not woken up\\n");
exit(PTS_FAIL);
}
printf("Test PASSED\n");
exit(PTS_PASS);
}