Re: sched/autogroup: race if !sysctl_sched_autogroup_enabled ?

From: Mike Galbraith
Date: Sun Nov 13 2016 - 08:59:53 EST


On Fri, 2016-11-11 at 17:57 +0100, Oleg Nesterov wrote:

> I am starting to think that we should just move ->autogroup from signal_struct
> to task_struct. This will simplify the code and fix all these problems. But
> I need a simple fix for backporting anyway.

How about just rip out knobs that should have never been born. More
can probably go as well.

(sorry for slowness btw.. on vacation, trying the "go outside" thing;)

sched/autogroup: Rip out dynamic knobs

Autogroup never should have had knobs in the first place, and now, Oleg has
discovered that the dynamic enable/disable capability either has become, or
perhaps always was racy. Rip it all out, leaving only commandline disable.

Signed-off-by: Mike Galbraith <umgwanakikbuti@xxxxxxxxx>
---
fs/proc/base.c | 34 ---------------------------
include/linux/sched/sysctl.h | 4 ---
kernel/sched/auto_group.c | 53 +++++++------------------------------------
kernel/sched/auto_group.h | 5 ----
kernel/sysctl.c | 11 --------
5 files changed, 10 insertions(+), 97 deletions(-)

--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1455,39 +1455,6 @@ static int sched_autogroup_show(struct s
return 0;
}

-static ssize_t
-sched_autogroup_write(struct file *file, const char __user *buf,
- size_t count, loff_t *offset)
-{
- struct inode *inode = file_inode(file);
- struct task_struct *p;
- char buffer[PROC_NUMBUF];
- int nice;
- int err;
-
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count))
- return -EFAULT;
-
- err = kstrtoint(strstrip(buffer), 0, &nice);
- if (err < 0)
- return err;
-
- p = get_proc_task(inode);
- if (!p)
- return -ESRCH;
-
- err = proc_sched_autogroup_set_nice(p, nice);
- if (err)
- count = err;
-
- put_task_struct(p);
-
- return count;
-}
-
static int sched_autogroup_open(struct inode *inode, struct file *filp)
{
int ret;
@@ -1504,7 +1471,6 @@ static int sched_autogroup_open(struct i
static const struct file_operations proc_pid_sched_autogroup_operations = {
.open = sched_autogroup_open,
.read = seq_read,
- .write = sched_autogroup_write,
.llseek = seq_lseek,
.release = single_release,
};
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -60,10 +60,6 @@ extern int sysctl_sched_rt_runtime;
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
#endif

-#ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-#endif
-
extern int sched_rr_timeslice;

extern int sched_rr_handler(struct ctl_table *table, int write,
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -7,7 +7,7 @@
#include <linux/security.h>
#include <linux/export.h>

-unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
+unsigned int __read_mostly sched_autogroup_enabled = 1;
static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr;

@@ -109,7 +109,7 @@ static inline struct autogroup *autogrou

bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
{
- if (tg != &root_task_group)
+ if (!sched_autogroup_enabled || tg != &root_task_group)
return false;

/*
@@ -139,12 +139,9 @@ autogroup_move_group(struct task_struct

p->signal->autogroup = autogroup_kref_get(ag);

- if (!READ_ONCE(sysctl_sched_autogroup_enabled))
- goto out;
-
for_each_thread(p, t)
sched_move_task(t);
-out:
+
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}
@@ -152,8 +149,11 @@ autogroup_move_group(struct task_struct
/* Allocates GFP_KERNEL, cannot be called under any spinlock */
void sched_autogroup_create_attach(struct task_struct *p)
{
- struct autogroup *ag = autogroup_create();
+ struct autogroup *ag;

+ if (!sched_autogroup_enabled)
+ return;
+ ag = autogroup_create();
autogroup_move_group(p, ag);
/* drop extra reference added by autogroup_create() */
autogroup_kref_put(ag);
@@ -179,7 +179,7 @@ void sched_autogroup_exit(struct signal_

static int __init setup_autogroup(char *str)
{
- sysctl_sched_autogroup_enabled = 0;
+ sched_autogroup_enabled = 0;

return 1;
}
@@ -187,41 +187,6 @@ static int __init setup_autogroup(char *
__setup("noautogroup", setup_autogroup);

#ifdef CONFIG_PROC_FS
-
-int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
-{
- static unsigned long next = INITIAL_JIFFIES;
- struct autogroup *ag;
- int err;
-
- if (nice < MIN_NICE || nice > MAX_NICE)
- return -EINVAL;
-
- err = security_task_setnice(current, nice);
- if (err)
- return err;
-
- if (nice < 0 && !can_nice(current, nice))
- return -EPERM;
-
- /* this is a heavy operation taking global locks.. */
- if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
- return -EAGAIN;
-
- next = HZ / 10 + jiffies;
- ag = autogroup_task_get(p);
-
- down_write(&ag->lock);
- err = sched_group_set_shares(ag->tg, sched_prio_to_weight[nice + 20]);
- if (!err)
- ag->nice = nice;
- up_write(&ag->lock);
-
- autogroup_kref_put(ag);
-
- return err;
-}
-
void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
{
struct autogroup *ag = autogroup_task_get(p);
@@ -230,7 +195,7 @@ void proc_sched_autogroup_show_task(stru
goto out;

down_read(&ag->lock);
- seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
+ seq_printf(m, "/autogroup-%ld\n", ag->id);
up_read(&ag->lock);

out:
--- a/kernel/sched/auto_group.h
+++ b/kernel/sched/auto_group.h
@@ -13,7 +13,6 @@ struct autogroup {
struct task_group *tg;
struct rw_semaphore lock;
unsigned long id;
- int nice;
};

extern void autogroup_init(struct task_struct *init_task);
@@ -29,9 +28,7 @@ extern bool task_wants_autogroup(struct
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg)
{
- int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
-
- if (enabled && task_wants_autogroup(p, tg))
+ if (task_wants_autogroup(p, tg))
return p->signal->autogroup->tg;

return tg;
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -437,17 +437,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rr_handler,
},
-#ifdef CONFIG_SCHED_AUTOGROUP
- {
- .procname = "sched_autogroup_enabled",
- .data = &sysctl_sched_autogroup_enabled,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
- },
-#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
.procname = "sched_cfs_bandwidth_slice_us",