Re: 2.6.21-rc5-rt10 troubles

From: Ingo Molnar
Date: Wed Apr 04 2007 - 04:49:49 EST



* Rui Nuno Capela <rncbc@xxxxxxxxx> wrote:

> Ingo et al.
>
> I'm afraid having no good news (once again). After building
> 2.6.21-rc5-rt8 and recently on -rt10 I've found some trouble running
> on a Core2 T7200 laptop (SMP). Somehow, specially after starting
> jackd, the whole system starts crawling to death. It just slows down
> to some kind of Big Freeze, with no evidence over the console
> whatsoever, so that I'm ultimately left with a brick on my hands.
>
> This behavior is consistent and occurs every time after jackd is
> started. It does not seem to occur on -rt5 and earlier.
>
> I wish I could give you more details, but fact is I don't know where
> to look. The machine just freezes silently.

could you try rt11 (which fixes two bad bugs in rt10)? If rt11 freezes
too then could you try to unapply the attached patch? This patch is the
main delta between rt5 and rt11. (plus upstream changes but those
shouldnt matter for this problem)

Ingo

----------------------->
Subject: [patch] softirq preemption: optimization
From: Ingo Molnar <mingo@xxxxxxx>

optimize softirq preemption by allowing a hardirq context to pick up
softirq processing.

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
---
include/linux/interrupt.h | 1
kernel/irq/manage.c | 24 +++----
kernel/softirq.c | 139 +++++++++++++++++++++++++++++++++++++---------
3 files changed, 123 insertions(+), 41 deletions(-)

Index: linux/include/linux/interrupt.h
===================================================================
--- linux.orig/include/linux/interrupt.h
+++ linux/include/linux/interrupt.h
@@ -266,6 +266,7 @@ struct softirq_action
asmlinkage void do_softirq(void);
extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
extern void softirq_init(void);
+extern void do_softirq_from_hardirq(void);

#ifdef CONFIG_PREEMPT_HARDIRQS
# define __raise_softirq_irqoff(nr) raise_softirq_irqoff(nr)
Index: linux/kernel/irq/manage.c
===================================================================
--- linux.orig/kernel/irq/manage.c
+++ linux/kernel/irq/manage.c
@@ -628,14 +628,17 @@ static void thread_simple_irq(irq_desc_t
unsigned int irq = desc - irq_desc;
irqreturn_t action_ret;

+repeat:
if (action && !desc->depth) {
spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
- cond_resched_hardirq_context();
spin_lock_irq(&desc->lock);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
}
+ if (desc->status & IRQ_PENDING)
+ goto repeat;
+
desc->status &= ~IRQ_INPROGRESS;
}

@@ -692,7 +695,6 @@ static void thread_edge_irq(irq_desc_t *
desc->status &= ~IRQ_PENDING;
spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
- cond_resched_hardirq_context();
spin_lock_irq(&desc->lock);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
@@ -721,7 +723,6 @@ static void thread_do_irq(irq_desc_t *de
desc->status &= ~IRQ_PENDING;
spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
- cond_resched_hardirq_context();
spin_lock_irq(&desc->lock);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
@@ -757,8 +758,6 @@ static void do_hardirq(struct irq_desc *
wake_up(&desc->wait_for_handler);
}

-extern asmlinkage void __do_softirq(void);
-
static int do_irqd(void * __desc)
{
struct sched_param param = { 0, };
@@ -781,16 +780,13 @@ static int do_irqd(void * __desc)

while (!kthread_should_stop()) {
local_irq_disable_nort();
- set_current_state(TASK_INTERRUPTIBLE);
-#ifndef CONFIG_PREEMPT_RT
- irq_enter();
-#endif
- do_hardirq(desc);
-#ifndef CONFIG_PREEMPT_RT
- irq_exit();
-#endif
+ do {
+ set_current_state(TASK_INTERRUPTIBLE);
+ do_hardirq(desc);
+ do_softirq_from_hardirq();
+ } while (current->state == TASK_RUNNING);
+
local_irq_enable_nort();
- cond_resched();
#ifdef CONFIG_SMP
/*
* Did IRQ affinities change?
Index: linux/kernel/softirq.c
===================================================================
--- linux.orig/kernel/softirq.c
+++ linux/kernel/softirq.c
@@ -100,8 +100,26 @@ static void wakeup_softirqd(int softirq)
/* Interrupts are disabled: no need to stop preemption */
struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk;

- if (tsk && tsk->state != TASK_RUNNING)
- wake_up_process(tsk);
+ if (unlikely(!tsk))
+ return;
+#if defined(CONFIG_PREEMPT_SOFTIRQS) && defined(CONFIG_PREEMPT_HARDIRQS)
+ /*
+ * Optimization: if we are in a hardirq thread context, and
+ * if the priority of the softirq thread is the same as the
+ * priority of the hardirq thread, then 'merge' softirq
+ * processing into the hardirq context. (it will later on
+ * execute softirqs via do_softirq_from_hardirq()).
+ * So here we can skip the wakeup and can rely on the hardirq
+ * context processing it later on.
+ */
+ if ((current->flags & PF_HARDIRQ) && !hardirq_count() &&
+ (tsk->normal_prio == current->normal_prio))
+ return;
+#endif
+ /*
+ * Wake up the softirq task:
+ */
+ wake_up_process(tsk);
}

/*
@@ -250,50 +268,100 @@ EXPORT_SYMBOL(local_bh_enable_ip);
* we want to handle softirqs as soon as possible, but they
* should not be able to lock up the box.
*/
-#define MAX_SOFTIRQ_RESTART 10
+#define MAX_SOFTIRQ_RESTART 20
+
+static DEFINE_PER_CPU(u32, softirq_running);

-asmlinkage void ___do_softirq(void)
+static void ___do_softirq(const int same_prio_only)
{
+ int max_restart = MAX_SOFTIRQ_RESTART, max_loops = MAX_SOFTIRQ_RESTART;
+ __u32 pending, available_mask, same_prio_skipped;
struct softirq_action *h;
- __u32 pending;
- int max_restart = MAX_SOFTIRQ_RESTART;
- int cpu;
+ struct task_struct *tsk;
+ int cpu, softirq;

pending = local_softirq_pending();
account_system_vtime(current);

cpu = smp_processor_id();
restart:
+ available_mask = -1;
+ softirq = 0;
+ same_prio_skipped = 0;
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);

- local_irq_enable();
-
h = softirq_vec;

do {
+ u32 softirq_mask = 1 << softirq;
+
if (pending & 1) {
- {
- u32 preempt_count = preempt_count();
- h->action(h);
- if (preempt_count != preempt_count()) {
- print_symbol("BUG: softirq exited %s with wrong preemption count!\n", (unsigned long) h->action);
- printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count());
- preempt_count() = preempt_count;
+ u32 preempt_count = preempt_count();
+
+#if defined(CONFIG_PREEMPT_SOFTIRQS) && defined(CONFIG_PREEMPT_HARDIRQS)
+ /*
+ * If executed by a same-prio hardirq thread
+ * then skip pending softirqs that belong
+ * to softirq threads with different priority:
+ */
+ if (same_prio_only) {
+ tsk = __get_cpu_var(ksoftirqd)[softirq].tsk;
+ if (tsk && tsk->normal_prio !=
+ current->normal_prio) {
+ same_prio_skipped |= softirq_mask;
+ available_mask &= ~softirq_mask;
+ goto next;
}
}
+#endif
+ /*
+ * Is this softirq already being processed?
+ */
+ if (per_cpu(softirq_running, cpu) & softirq_mask) {
+ available_mask &= ~softirq_mask;
+ goto next;
+ }
+ per_cpu(softirq_running, cpu) |= softirq_mask;
+ local_irq_enable();
+
+ h->action(h);
+ if (preempt_count != preempt_count()) {
+ print_symbol("BUG: softirq exited %s with wrong preemption count!\n", (unsigned long) h->action);
+ printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count());
+ preempt_count() = preempt_count;
+ }
rcu_bh_qsctr_inc(cpu);
cond_resched_softirq_context();
+ local_irq_disable();
+ per_cpu(softirq_running, cpu) &= ~softirq_mask;
}
+next:
h++;
+ softirq++;
pending >>= 1;
} while (pending);

- local_irq_disable();
-
+ or_softirq_pending(same_prio_skipped);
pending = local_softirq_pending();
- if (pending && --max_restart)
- goto restart;
+ if (pending & available_mask) {
+ if (--max_restart)
+ goto restart;
+ /*
+ * With softirq threading there's no reason not to
+ * finish the workload we have:
+ */
+#ifdef CONFIG_PREEMPT_SOFTIRQS
+ if (--max_loops) {
+ if (printk_ratelimit())
+ printk("INFO: softirq overload: %08x\n", pending);
+ max_restart = MAX_SOFTIRQ_RESTART;
+ goto restart;
+ }
+ if (printk_ratelimit())
+ printk("BUG: softirq loop! %08x\n", pending);
+#endif
+ }

if (pending)
trigger_softirqs();
@@ -321,7 +389,7 @@ asmlinkage void __do_softirq(void)
p_flags = current->flags & PF_HARDIRQ;
current->flags &= ~PF_HARDIRQ;

- ___do_softirq();
+ ___do_softirq(0);

trace_softirq_exit();

@@ -350,8 +418,9 @@ void do_softirq_from_hardirq(void)
__local_bh_disable((unsigned long)__builtin_return_address(0));
p_flags = current->flags & PF_HARDIRQ;
current->flags &= ~PF_HARDIRQ;
+ current->flags |= PF_SOFTIRQ;

- ___do_softirq();
+ ___do_softirq(1);

trace_softirq_exit();

@@ -359,6 +428,9 @@ void do_softirq_from_hardirq(void)
_local_bh_enable();

current->flags |= p_flags;
+ current->flags &= ~PF_SOFTIRQ;
+
+ local_irq_enable();
}

#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -669,8 +741,9 @@ static int ksoftirqd(void * __data)
{
struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 };
struct softirqdata *data = __data;
- u32 mask = (1 << data->nr);
+ u32 softirq_mask = (1 << data->nr);
struct softirq_action *h;
+ int cpu = data->cpu;

sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
// set_user_nice(current, -10);
@@ -684,7 +757,8 @@ static int ksoftirqd(void * __data)

while (!kthread_should_stop()) {
preempt_disable();
- if (!(local_softirq_pending() & mask)) {
+ if (!(local_softirq_pending() & softirq_mask)) {
+sleep_more:
__preempt_enable_no_resched();
schedule();
preempt_disable();
@@ -694,16 +768,26 @@ static int ksoftirqd(void * __data)
#endif
__set_current_state(TASK_RUNNING);

- while (local_softirq_pending() & mask) {
+ while (local_softirq_pending() & softirq_mask) {
/* Preempt disable stops cpu going offline.
If already offline, we'll be on wrong CPU:
don't process */
- if (cpu_is_offline(data->cpu))
+ if (cpu_is_offline(cpu))
goto wait_to_die;

local_irq_disable();
+ /*
+ * Is the softirq already being executed by
+ * a hardirq context?
+ */
+ if (per_cpu(softirq_running, cpu) & softirq_mask) {
+ local_irq_enable();
+ set_current_state(TASK_INTERRUPTIBLE);
+ goto sleep_more;
+ }
+ per_cpu(softirq_running, cpu) |= softirq_mask;
__preempt_enable_no_resched();
- set_softirq_pending(local_softirq_pending() & ~mask);
+ set_softirq_pending(local_softirq_pending() & ~softirq_mask);
local_bh_disable();
local_irq_enable();

@@ -713,6 +797,7 @@ static int ksoftirqd(void * __data)
rcu_bh_qsctr_inc(data->cpu);

local_irq_disable();
+ per_cpu(softirq_running, cpu) &= ~softirq_mask;
_local_bh_enable();
local_irq_enable();

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/