Re: [REPORT] cfs-v4 vs sd-0.44

From: Ingo Molnar
Date: Sat Apr 21 2007 - 12:01:05 EST



* Con Kolivas <kernel@xxxxxxxxxxx> wrote:

> > Feels even better, mouse movements are very smooth even under high
> > load. I noticed that X gets reniced to -19 with this scheduler.
> > I've not looked at the code yet but this looked suspicious to me.
> > I've reniced it to 0 and it did not change any behaviour. Still
> > very good.
>
> Looks like this code does it:
>
> +int sysctl_sched_privileged_nice_level __read_mostly = -19;

correct. Note that Willy reniced X back to 0 so it had no relevance on
his test. Also note that i pointed this change out in the -v4 CFS
announcement:

|| Changes since -v3:
||
|| - usability fix: automatic renicing of kernel threads such as
|| keventd, OOM tasks and tasks doing privileged hardware access
|| (such as Xorg).

i've attached it below in a standalone form, feel free to put it into
SD! :)

Ingo

---
arch/i386/kernel/ioport.c | 13 ++++++++++---
arch/x86_64/kernel/ioport.c | 8 ++++++--
drivers/block/loop.c | 5 ++++-
include/linux/sched.h | 7 +++++++
kernel/sched.c | 40 ++++++++++++++++++++++++++++++++++++++++
kernel/workqueue.c | 2 +-
mm/oom_kill.c | 4 +++-
7 files changed, 71 insertions(+), 8 deletions(-)

Index: linux/arch/i386/kernel/ioport.c
===================================================================
--- linux.orig/arch/i386/kernel/ioport.c
+++ linux/arch/i386/kernel/ioport.c
@@ -64,9 +64,15 @@ asmlinkage long sys_ioperm(unsigned long

if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
+ if (turn_on) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ /*
+ * Task will be accessing hardware IO ports,
+ * mark it as special with the scheduler too:
+ */
+ sched_privileged_task(current);
+ }
/*
* If it's the first ioperm() call in this thread's lifetime, set the
* IO bitmap up. ioperm() is much less timing critical than clone(),
@@ -145,6 +151,7 @@ asmlinkage long sys_iopl(unsigned long u
if (level > old) {
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+ sched_privileged_task(current);
}
t->iopl = level << 12;
regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
Index: linux/arch/x86_64/kernel/ioport.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ioport.c
+++ linux/arch/x86_64/kernel/ioport.c
@@ -41,8 +41,11 @@ asmlinkage long sys_ioperm(unsigned long

if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
+ if (turn_on) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ sched_privileged_task(current);
+ }

/*
* If it's the first ioperm() call in this thread's lifetime, set the
@@ -113,6 +116,7 @@ asmlinkage long sys_iopl(unsigned int le
if (level > old) {
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+ sched_privileged_task(current);
}
regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
return 0;
Index: linux/drivers/block/loop.c
===================================================================
--- linux.orig/drivers/block/loop.c
+++ linux/drivers/block/loop.c
@@ -588,7 +588,10 @@ static int loop_thread(void *data)
*/
current->flags |= PF_NOFREEZE;

- set_user_nice(current, -20);
+ /*
+ * The loop thread is important enough to be given a boost:
+ */
+ sched_privileged_task(current);

while (!kthread_should_stop() || lo->lo_bio) {

Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -1256,6 +1256,13 @@ static inline int rt_mutex_getprio(struc
#endif

extern void set_user_nice(struct task_struct *p, long nice);
+/*
+ * Task has special privileges, give it more CPU power:
+ */
+extern void sched_privileged_task(struct task_struct *p);
+
+extern int sysctl_sched_privileged_nice_level;
+
extern int task_prio(const struct task_struct *p);
extern int task_nice(const struct task_struct *p);
extern int can_nice(const struct task_struct *p, const int nice);
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -3251,6 +3251,46 @@ out_unlock:
EXPORT_SYMBOL(set_user_nice);

/*
+ * Nice level for privileged tasks. (can be set to 0 for this
+ * to be turned off)
+ */
+int sysctl_sched_privileged_nice_level __read_mostly = -19;
+
+static int __init privileged_nice_level_setup(char *str)
+{
+ sysctl_sched_privileged_nice_level = simple_strtoul(str, NULL, 0);
+ return 1;
+}
+__setup("privileged_nice_level=", privileged_nice_level_setup);
+
+/*
+ * Tasks with special privileges call this and gain extra nice
+ * levels:
+ */
+void sched_privileged_task(struct task_struct *p)
+{
+ long new_nice = sysctl_sched_privileged_nice_level;
+ long old_nice = TASK_NICE(p);
+
+ if (new_nice >= old_nice)
+ return;
+ /*
+ * Setting the sysctl to 0 turns off the boosting:
+ */
+ if (unlikely(!new_nice))
+ return;
+
+ if (new_nice < -20)
+ new_nice = -20;
+ else if (new_nice > 19)
+ new_nice = 19;
+
+ set_user_nice(p, new_nice);
+}
+
+EXPORT_SYMBOL(sched_privileged_task);
+
+/*
* can_nice - check if a task can reduce its nice value
* @p: task
* @nice: nice value
Index: linux/kernel/workqueue.c
===================================================================
--- linux.orig/kernel/workqueue.c
+++ linux/kernel/workqueue.c
@@ -355,7 +355,7 @@ static int worker_thread(void *__cwq)
if (!cwq->freezeable)
current->flags |= PF_NOFREEZE;

- set_user_nice(current, -5);
+ sched_privileged_task(current);

/* Block and flush all signals */
sigfillset(&blocked);
Index: linux/mm/oom_kill.c
===================================================================
--- linux.orig/mm/oom_kill.c
+++ linux/mm/oom_kill.c
@@ -291,7 +291,9 @@ static void __oom_kill_task(struct task_
* all the memory it needs. That way it should be able to
* exit() and clear out its resources quickly...
*/
- p->time_slice = HZ;
+ if (p->policy == SCHED_NORMAL || p->policy == SCHED_BATCH)
+ sched_privileged_task(p);
+
set_tsk_thread_flag(p, TIF_MEMDIE);

force_sig(SIGKILL, p);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/