Re: [RFC PATCH] alispinlock: acceleration from lock integration on multi-core platform

From: Ling Ma
Date: Wed Feb 03 2016 - 01:00:58 EST


The attachment(thread.c) can tell us the new mechanism improve output
from the user space code (thread,c) by 1.14x (1174810406/1026910602,
kernel spinlock consumption is reduced from 25% to 15%) as below:

ORG NEW
38186815 43644156
38340186 43121265
38383155 44087753
38567102 43532586
38027878 43622700
38011581 43396376
37861959 43322857
37963215 43375528
38039247 43618315
37989106 43406187
37916912 44163029
39053184 43138581
37928359 43247866
37967417 43390352
37909796 43218250
37727531 43256009
38032818 43460496
38001860 43536100
38019929 44231331
37846621 43550597
37823231 44229887
38108158 43142689
37771900 43228168
37652536 43901042
37649114 43172690
37591314 43380004
38539678 43435592

Total 1026910602 1174810406

Thanks
Ling

2016-02-03 12:40 GMT+08:00 Ling Ma <ling.ma.program@xxxxxxxxx>:
> Longman,
>
> The attachment include user space code(thread.c), and kernel
> patch(ali_work_queue.patch) based on 4.3.0-rc4,
> we replaced all original spinlock (list_lock) in slab.h/c with the
> new mechanism.
>
> The thread.c in user space caused lots of hot kernel spinlock from
> __kmalloc and kfree,
> perf top -d1 shows ~25% before ali_work_queue.patch,after appending
> this patch ,
> the synchronous operation consumption from __kmalloc and kfree is
> reduced from 25% to ~15% on Intel E5-2699V3
> (we also observed the output from user space code (thread.c) is
> improved clearly)
>
> Peter, we will send the update version according to your comments.
>
> Thanks
> Ling
>
>
> 2016-01-19 23:36 GMT+08:00 Waiman Long <waiman.long@xxxxxxx>:
>> On 01/19/2016 03:52 AM, Ling Ma wrote:
>>>
>>> Is it acceptable for performance improvement or more comments on this
>>> patch?
>>>
>>> Thanks
>>> Ling
>>>
>>>
>>
>> Your alispinlock patchset should also include a use case where the lock is
>> used by some code within the kernel with test that can show a performance
>> improvement so that the reviewers can independently try it out and play
>> around with it. The kernel community will not accept any patch without a use
>> case in the kernel.
>>
>> Your lock_test.tar file is not good enough as it is not a performance test
>> of the patch that you sent out.
>>
>> Cheers,
>> Longman
/**
Test Case:
OpenDir, Get status and close it.
*/
#include <unistd.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/fcntl.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <pthread.h>

#define TEST_DIR "/tmp/thread"
#define MAX_TEST_THREAD (80)
#define MAX_TEST_FILE 5000

static unsigned long *result[MAX_TEST_THREAD];
static int stop = 0;

static void* case_function(void *para)
{
int id = (int)(long)para;
DIR *pDir;
struct stat f_stat;
struct dirent *entry=NULL;
char path[256];
char cmd[512];

int filecnt = 0;
int dircnt = 0;
int filetotalsize = 0;
unsigned long myresult = 0;
int f = 0;

result[id] = &myresult;

/* Goto my path and construct empty file */
sprintf(path, "%s/%d", TEST_DIR, id);
printf("Creating temp file at %s....\n", path);

sprintf(cmd, "mkdir %s", path);
system(cmd);
chdir(path);
for (f = 0; f < MAX_TEST_FILE; f++)
{
char name[256];

sprintf(name, "%s/%d", path, f);
int t = open(name, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU);
if (t != -1)
close(t);
else
{
printf("Errno = %d.\n", errno);
exit(errno);
}
}

again:
if ((pDir = opendir(path)) == NULL)
{
printf("打开 %s 错误:没有那个文件或目录\n", TEST_DIR);
goto err;
}

while ((entry = readdir(pDir)) != NULL)
{
struct stat buf;
if (entry->d_name[0] == '.')
continue;

//f = open(entry->d_name, 0);
f = stat(entry->d_name, &buf);

if (f)
close(f);
myresult++;


//printf("Filename %s, size %10d",entry->d_name, f_stat.st_size);
}

closedir(pDir);


/* Need to stop */
if (!stop)
goto again;
return 0;

err:
;
}

void main()
{
int i;
pthread_t thread;

system("mkdir "TEST_DIR);

for (i = 0; i < MAX_TEST_THREAD; i++)
{
pthread_create(&thread, NULL, case_function, (void*)(long)i);
}

while (1)
{
sleep(1);
unsigned long times = 0;
//printf("Statistics:\n");

for (i = 0; i < MAX_TEST_THREAD; i++)
{
//printf("%d\t", *result[i]);
times =times + *result[i];
}
printf("%ld\t\n", times);
for (i = 0; i < MAX_TEST_THREAD; i++)
*result[i] = 0;
}
}