Re: [tip:core/locking] x86/smp: Move waiting on contended ticketlock out of line

From: Rik van Riel
Date: Thu Feb 28 2013 - 16:15:31 EST


On 02/28/2013 03:26 PM, Linus Torvalds wrote:
On Thu, Feb 28, 2013 at 10:22 AM, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:

I'm sure there are other things we could do to improve ipc lock times
even if we don't actually split the lock, but the security one might
be a good first step.

Btw, if somebody has a benchmark for threads using multiple ipc
semaphores (from the same semget() allocation) concurrently, and we
could have a simple way to see the contention without having to run
some big DB thing, that would also be nice. Maybe there is something
out there already? Google didn't find any, and the normal benchmarks
I'm aware of all just do one single (private) ipc semaphore per
process.

Nothing gets some people going like just having a nice benchmark to
show the effect.

I have modified one of the semop tests to use multiple semaphores.

To run the test, specify the number of threads. If you want the
number of semaphores to be different from the number of threads,
specify a second commandline argument.

$ ./semop-multi
usage: ./semop-multi <threads> [nsems]

#define _GNU_SOURCE
#include <sched.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <malloc.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/sem.h>

#define TEST_TIME 30
#define SEMMNI 128

int semid;
int state = 1;
unsigned long *results_array;
int threads_starting;
pthread_cond_t thread_parent;
pthread_cond_t thread_worker;
pthread_mutex_t thread_lock;
int nsems;

union semun {
int val;
struct semid_ds *buf;
unsigned short int *array;
struct seminfo *__buf;
void *__pad;
};

void *
worker(void *arg)
{
unsigned long count = 0;
int id = (int)(unsigned long)arg;
struct sembuf sembuff;

sembuff.sem_num = 0;
sembuff.sem_flg = 0;

pthread_mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
pthread_cond_signal(&thread_parent);
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);

for (;state;) {

/* Move "id" ahead through the semaphores */
sembuff.sem_num = (sembuff.sem_num + id) % nsems;

/* Lock the semaphore */
sembuff.sem_op = 1;
if (semop(semid, &sembuff, 1) < 0) {
perror("semop");
exit(1);
}

/* Unlock the semaphore */
sembuff.sem_op = -1;
if (semop(semid, &sembuff, 1) < 0) {
perror("semop");
exit(1);
}

count += 2;
}

results_array[id] = count;

return NULL;
}

int
main(int argc, char **argv)
{
pthread_t *thread_array;
pthread_attr_t thread_attr;
int thread_count;
unsigned short seminit[SEMMNI];
union semun sem_un;
cpu_set_t cpu;
unsigned long total = 0;
int i, ret;
long cpus;

cpus = sysconf(_SC_NPROCESSORS_ONLN);

if (argc < 2) {
printf("usage: %s <threads> [nsems]\n", argv[0]);
exit(1);
}

thread_count = atoi(argv[1]);

if (thread_count < 0) {
printf("threads must be >= 0\n");
exit(1);
}

if (thread_count == 0)
thread_count = cpus;

if (argc > 2)
nsems = atoi(argv[2]);
else
nsems = thread_count;
if (nsems > SEMMNI)
nsems = SEMMNI;

printf("cpus %ld, threads: %d, semaphores: %d, test duration: %d secs\n", cpus, thread_count, nsems, TEST_TIME);

thread_array = malloc(thread_count * sizeof(pthread_t));

if (!thread_array) {
perror("malloc(thread_array)");
exit(1);
}

results_array = malloc(thread_count * sizeof(unsigned long));

if (!results_array) {
perror("malloc(results_array)");
exit(1);
}

semid = semget(0x12345, nsems, 0777|IPC_CREAT );

if (semid < 0) {
perror("semget");
exit(1);
}

for (i = 0; i < SEMMNI; i++)
seminit[i] = 200;
sem_un.array = seminit;

if (semctl(semid, 1, SETALL, sem_un) < 0) {
perror("semctl(setall)");
exit(1);
}

pthread_mutex_init(&thread_lock, NULL);
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);
pthread_attr_init(&thread_attr);

threads_starting = thread_count;

for (i = 0; i < thread_count; i++) {

CPU_ZERO(&cpu);
CPU_SET(i % cpus, &cpu);

ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);

if (ret) {
printf("pthread_attr_setaffinity_np: %s\n", strerror(ret));
exit(1);
}

ret = pthread_create(&thread_array[i], &thread_attr, worker, (void *)(unsigned long)i);

if (ret) {
printf("pthread_create: %s\n", strerror(ret));
exit(1);
}
}

pthread_attr_destroy(&thread_attr);

pthread_mutex_lock(&thread_lock);
while (threads_starting)
pthread_cond_wait(&thread_parent, &thread_lock);
pthread_cond_broadcast(&thread_worker);
pthread_mutex_unlock(&thread_lock);

sleep(TEST_TIME);
state = 0;

for (i = 0; i < thread_count; i++)
pthread_join(thread_array[i], NULL);

pthread_cond_destroy(&thread_parent);
pthread_cond_destroy(&thread_worker);
pthread_mutex_destroy(&thread_lock);

if (semctl(semid, 1, IPC_RMID) < 0)
perror("semctl(rmid)");

for (i = 0; i < thread_count; i++)
total += results_array[i];

printf("total operations: %ld, ops/sec %ld\n", total, total / TEST_TIME);

free(thread_array);
free(results_array);

return 0;
}