Re: [PATCH 2/2] kernel/sys: do not grab tasklist_lock for sys_setpriority(PRIO_PROCESS)

From: Oleg Nesterov
Date: Tue May 12 2020 - 12:10:57 EST


On 05/11, Davidlohr Bueso wrote:
>
> This option does not iterate the tasklist and we already have
> an rcu aware stable pointer. Also, the nice value is not serialized
> by this lock. Reduce the scope of this lock to just PRIO_PGRP
> and PRIO_USER - which need to to set the priorities atomically
> to the list of tasks, at least vs fork().

looks correct, but probably the PRIO_USER case can avoid tasklist too?
It can't help to avoid the race with setuid().

(PRIO_PGRP needs tasklist_lock (see my previous email) but afaics it
can race with fork anyway, it can miss the new child which was not
added to the list yet, I hope we do not care).

So I'd suggest a single patch instead of 1-2, but I still don't understand
your PF_EXITING check in 1/2.

Oleg.

--- x/kernel/sys.c
+++ x/kernel/sys.c
@@ -214,7 +214,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
niceval = MAX_NICE;

rcu_read_lock();
- read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
@@ -229,9 +228,11 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
error = set_one_prio(p, niceval, error);
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
@@ -243,16 +244,15 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
if (!user)
goto out_unlock; /* No processes for this user */
}
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
error = set_one_prio(p, niceval, error);
- } while_each_thread(g, p);
+ }
if (!uid_eq(uid, cred->uid))
free_uid(user); /* For find_user() */
break;
}
out_unlock:
- read_unlock(&tasklist_lock);
rcu_read_unlock();
out:
return error;
@@ -277,7 +277,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
return -EINVAL;

rcu_read_lock();
- read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
@@ -295,11 +294,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
@@ -311,19 +312,18 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
if (!user)
goto out_unlock; /* No processes for this user */
}
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
}
- } while_each_thread(g, p);
+ }
if (!uid_eq(uid, cred->uid))
free_uid(user); /* for find_user() */
break;
}
out_unlock:
- read_unlock(&tasklist_lock);
rcu_read_unlock();

return retval;