Re: [PATCH 1/2] pidmap(2)

From: Alexey Dobriyan
Date: Wed Sep 06 2017 - 04:55:39 EST


On 9/6/17, Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> wrote:
> On Tue, 5 Sep 2017 22:05:00 +0300 Alexey Dobriyan <adobriyan@xxxxxxxxx>
> wrote:
>
>> Implement system call for bulk retrieveing of pids in binary form.
>>
>> Using /proc is slower than necessary: 3 syscalls + another 3 for each
>> thread +
>> converting with atoi().
>>
>> /proc may be not mounted especially in containers. Natural extension of
>> hidepid=2 efforts is to not mount /proc at all.
>>
>> It could be used by programs like ps, top or CRIU. Speed increase will
>> become more drastic once combined with bulk retrieval of process
>> statistics.
>
> The patches are performance optimizations, but their changelogs contain
> no performance measurements!
>
> Demonstration of some compelling real-world performance benefits would
> help things along a lot.

I forgot the sheet with numbers at work. :^)
They're very embarrassing for /proc.

pidmap:
N=1<<16 times
~130 processes (~250 task_structs) on a regular desktop system
opendir + readdir + closedir /proc + the same for every /proc/$PID/task
(roughly what htop(1) does) vs pidmap

/proc 16.80+-0.73%
pidmap 0.06+-0.31%

fdmap:
N=1<<22 times
4 opened descriptors (0, 1, 2, 3)
opendir+readdir+closedir /proc/self/fd (lsof(1)) vs fdmap

/proc 8.31+-0.37%
fdmap 0.32+-0.72%

Currently performance improvements may not be huge or even visible.
That's because programs like ps/top/lsof _have_ to use /proc to retrieve
other information. If combined with bulk taskstats-ish retrieval interfaces
they should run around /proc.
#include <sys/types.h>
#include <stdio.h>
#include <dirent.h>
#include <stdlib.h>
#include <unistd.h>

void f(void)
{
DIR *d;
struct dirent *de;

d = opendir("/proc/");
while ((de = readdir(d))) {
if ('1' <= de->d_name[0] && de->d_name[0] <= '9') {
int pid = atoi(de->d_name);
char buf[32];
DIR *dt;
struct dirent *dte;

snprintf(buf, sizeof(buf), "/proc/%d/task", pid);
dt = opendir(buf);
readdir(dt);
readdir(dt);
while ((dte = readdir(dt))) {
int tid = atoi(dte->d_name);
asm volatile ("" :: "g" (&tid) : "memory");
}
closedir(dt);
}
}
closedir(d);
}

static inline long sys_pidmap(int *pid, unsigned int n, int start)
{
register long r10 asm ("r10") = 0;
long rv;
asm volatile (
"syscall"
: "=a" (rv)
: "0" (333), "D" (pid), "S" (n), "d" (start), "r" (r10)
: "rcx", "r11", "cc", "memory"
);
return rv;
}

void g(void)
{
int pid[1024];

sys_pidmap(pid, sizeof(pid)/sizeof(pid[0]), 0);
}

int main(void)
{
unsigned int i;

// for (i = 0; i < (1<<16); i++)
// f();

for (i = 0; i < (1<<16); i++)
g();

return 0;
}
#include <sys/types.h>
#include <dirent.h>
#include <stdlib.h>
#include <unistd.h>

void f(void)
{
DIR *d;
struct dirent *de;

d = opendir("/proc/self/fd");
while ((de = readdir(d))) {
int fd = atoi(de->d_name);
asm volatile ("" :: "g" (&fd) : "memory");
}
closedir(d);
}

static inline long sys_fdmap(int pid, int *fd, unsigned int n, int start)
{
register long r10 asm ("r10") = start;
register long r8 asm ("r8") = 0;
long rv;
asm volatile (
"syscall"
: "=a" (rv)
: "0" (334), "D" (pid), "S" (fd), "d" (n), "r" (r10), "r" (r8)
: "rcx", "r11", "cc", "memory"
);
return rv;
}

void g(void)
{
int fd[1024];

sys_fdmap(0, fd, sizeof(fd)/sizeof(fd[0]), 0);
}

int main(void)
{
unsigned int i;

// for (i = 0; i < (1<<22); i++)
// f();

dup(0);
for (i = 0; i < (1<<22); i++)
g();

return 0;
}