Re: + pid-delete-struct-pidmap-nr_free.patch added to -mm tree

From: Gargi Sharma
Date: Tue Oct 03 2017 - 13:39:46 EST


On Tue, Oct 3, 2017 at 9:45 PM, Oleg Nesterov <oleg@xxxxxxxxxx> wrote:
> On 10/02, Andrew Morton wrote:
>>
>> From: Alexey Dobriyan <adobriyan@xxxxxxxxx>
>> Subject: pid: delete struct pidmap::nr_free
>>
>> There is a check in pid allocation code to skip a full page:
>>
>> if (likely(atomic_read(&map->nr_free))) {
>> ...
>>
>> In practice it doesn't do anything. To skip a pidmap page one has to have
>> 32K consecutive pids allocated at the same time which doesn't happen.
>>
>> Currently the price is _every_ fork/exit on every system being slower than
>> necessary.
>
> Agreed, I too never understood how can this counter help.
>
> Add Gargi and Rik, the next version of "Replace PID bitmap allocation with IDR
> API" can conflict with this and the previous pid-delete-reserved_pids.patch.

I think this patch will become obsolete as pidmap will be removed. As
for the 1/2
patch of Alexey's series, I'll incorporate it so that rolled over PIDs
start from 1
instead of RESERVED_PIDS.

Thanks!
Gargi
>
> Oleg.
>
>>
>> Link: http://lkml.kernel.org/r/20170909203649.GB4791@avx2
>> Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
>> Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
>> Cc: Kees Cook <keescook@xxxxxxxxxxxx>
>> Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
>> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
>> ---
>>
>> include/linux/pid_namespace.h | 1 -
>> kernel/pid.c | 28 ++++++++++------------------
>> kernel/pid_namespace.c | 6 ------
>> 3 files changed, 10 insertions(+), 25 deletions(-)
>>
>> diff -puN include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free include/linux/pid_namespace.h
>> --- a/include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free
>> +++ a/include/linux/pid_namespace.h
>> @@ -11,7 +11,6 @@
>> #include <linux/ns_common.h>
>>
>> struct pidmap {
>> - atomic_t nr_free;
>> void *page;
>> };
>>
>> diff -puN kernel/pid.c~pid-delete-struct-pidmap-nr_free kernel/pid.c
>> --- a/kernel/pid.c~pid-delete-struct-pidmap-nr_free
>> +++ a/kernel/pid.c
>> @@ -68,9 +68,6 @@ static inline int mk_pid(struct pid_name
>> */
>> struct pid_namespace init_pid_ns = {
>> .kref = KREF_INIT(2),
>> - .pidmap = {
>> - [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
>> - },
>> .last_pid = 0,
>> .nr_hashed = PIDNS_HASH_ADDING,
>> .level = 0,
>> @@ -106,7 +103,6 @@ static void free_pidmap(struct upid *upi
>> int offset = nr & BITS_PER_PAGE_MASK;
>>
>> clear_bit(offset, map->page);
>> - atomic_inc(&map->nr_free);
>> }
>>
>> /*
>> @@ -181,20 +177,17 @@ static int alloc_pidmap(struct pid_names
>> if (unlikely(!map->page))
>> return -ENOMEM;
>> }
>> - if (likely(atomic_read(&map->nr_free))) {
>> - for ( ; ; ) {
>> - if (!test_and_set_bit(offset, map->page)) {
>> - atomic_dec(&map->nr_free);
>> - set_last_pid(pid_ns, last, pid);
>> - return pid;
>> - }
>> - offset = find_next_offset(map, offset);
>> - if (offset >= BITS_PER_PAGE)
>> - break;
>> - pid = mk_pid(pid_ns, map, offset);
>> - if (pid >= pid_max)
>> - break;
>> + for (;;) {
>> + if (!test_and_set_bit(offset, map->page)) {
>> + set_last_pid(pid_ns, last, pid);
>> + return pid;
>> }
>> + offset = find_next_offset(map, offset);
>> + if (offset >= BITS_PER_PAGE)
>> + break;
>> + pid = mk_pid(pid_ns, map, offset);
>> + if (pid >= pid_max)
>> + break;
>> }
>> if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
>> ++map;
>> @@ -591,7 +584,6 @@ void __init pidmap_init(void)
>> init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
>> /* Reserve PID 0. We never call free_pidmap(0) */
>> set_bit(0, init_pid_ns.pidmap[0].page);
>> - atomic_dec(&init_pid_ns.pidmap[0].nr_free);
>>
>> init_pid_ns.pid_cachep = KMEM_CACHE(pid,
>> SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
>> diff -puN kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free kernel/pid_namespace.c
>> --- a/kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free
>> +++ a/kernel/pid_namespace.c
>> @@ -98,7 +98,6 @@ static struct pid_namespace *create_pid_
>> struct pid_namespace *ns;
>> unsigned int level = parent_pid_ns->level + 1;
>> struct ucounts *ucounts;
>> - int i;
>> int err;
>>
>> err = -EINVAL;
>> @@ -139,11 +138,6 @@ static struct pid_namespace *create_pid_
>> INIT_WORK(&ns->proc_work, proc_cleanup_work);
>>
>> set_bit(0, ns->pidmap[0].page);
>> - atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
>> -
>> - for (i = 1; i < PIDMAP_ENTRIES; i++)
>> - atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
>> -
>> return ns;
>>
>> out_free_map:
>> _
>>
>> Patches currently in -mm which might be from adobriyan@xxxxxxxxx are
>>
>> proc-uninline-name_to_int.patch
>> proc-use-do-while-in-name_to_int.patch
>> seq_file-delete-small-value-optimization.patch
>> pid-delete-reserved_pids.patch
>> pid-delete-struct-pidmap-nr_free.patch
>>
>