[PATCH V7 4/4] softirq: Allow early break the softirq processing loop

From: qianjun . kernel
Date: Tue Sep 15 2020 - 08:13:49 EST


From: jun qian <qianjun.kernel@xxxxxxxxx>

Allow terminating the softirq processing loop without finishing the vectors.

Signed-off-by: jun qian <qianjun.kernel@xxxxxxxxx>
---
kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 91 insertions(+), 22 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index cbb59b5..29cf079 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -254,6 +254,22 @@ static inline bool __softirq_needs_break(u64 start)
return false;
}

+#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
+
+/*
+ * The pending_next_bit is recorded for the next processing order when
+ * the loop is broken. This per cpu variable is to solve the following
+ * scenarios:
+ * Assume bit 0 and 1 are pending when the processing starts. Now it
+ * breaks out after bit 0 has been handled and stores back bit 1 as
+ * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd
+ * runs and handles bit 0, which takes more than the timeout. As a
+ * result the bit 0 processing can starve all other softirqs.
+ *
+ * so we need the pending_next_bit to record the next process order.
+ */
+DEFINE_PER_CPU(u32, pending_next_bit);
+
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
u64 start = sched_clock();
@@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
unsigned int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
unsigned long pending;
+ unsigned long pending_left, pending_again;
unsigned int vec_nr;
bool in_hardirq;
+ int next_bit;
+ unsigned long flags;

/*
* Mask out PF_MEMALLOC as the current task context is borrowed for the
@@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)

local_irq_enable();

- for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
- int prev_count;
-
- __clear_bit(vec_nr, &pending);
-
- h = softirq_vec + vec_nr;
-
- prev_count = preempt_count();
-
- kstat_incr_softirqs_this_cpu(vec_nr);
+ /*
+ * pending_left means that the left bits unhandled when the loop is
+ * broken without finishing the vectors. These bits will be handled
+ * first in the next time. pending_again means that the new bits is
+ * generated in the other time. These bits should be handled after
+ * the pending_left bits have been handled.
+ *
+ * For example
+ * If the pending bits is 1101010110, and the loop is broken after
+ * the bit4 is handled. Then, the pending_next_bit will be 5, and
+ * the pending_left is 1101000000, the pending_again is 000000110.
+ */
+ next_bit = __this_cpu_read(pending_next_bit);
+ pending_left = pending &
+ (SOFTIRQ_PENDING_MASK << next_bit);
+ pending_again = pending &
+ (SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
+
+ while (pending_left || pending_again) {
+ if (pending_left) {
+ pending = pending_left;
+ pending_left = 0;
+ } else if (pending_again) {
+ pending = pending_again;
+ pending_again = 0;
+ } else
+ break;
+ for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
+ int prev_count;
+
+ __clear_bit(vec_nr, &pending);
+
+ h = softirq_vec + vec_nr;
+
+ prev_count = preempt_count();
+
+ kstat_incr_softirqs_this_cpu(vec_nr);
+
+ trace_softirq_entry(vec_nr);
+ h->action(h);
+ trace_softirq_exit(vec_nr);
+ if (unlikely(prev_count != preempt_count())) {
+ pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
+ vec_nr, softirq_to_name[vec_nr], h->action,
+ prev_count, preempt_count());
+ preempt_count_set(prev_count);
+ }

- trace_softirq_entry(vec_nr);
- h->action(h);
- trace_softirq_exit(vec_nr);
- if (unlikely(prev_count != preempt_count())) {
- pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
- vec_nr, softirq_to_name[vec_nr], h->action,
- prev_count, preempt_count());
- preempt_count_set(prev_count);
+ /* Allow early break to avoid big sched delay */
+ if (pending && __softirq_needs_break(start)) {
+ __this_cpu_write(pending_next_bit, vec_nr + 1);
+ /*
+ * Ensure that the remaining pending bits will be
+ * handled in the next time.
+ */
+ local_irq_save(flags);
+ or_softirq_pending(pending | pending_again);
+ local_irq_restore(flags);
+ break;
+ }
}
}

@@ -309,12 +369,21 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
rcu_softirq_qs();
local_irq_disable();

- pending = local_softirq_pending();
- if (pending) {
- if (!__softirq_needs_break(start) && --max_restart)
- goto restart;
+ /* get the unhandled bits */
+ pending |= pending_again;
+ if (!pending)
+ /*
+ * If all of the pending bits have been handled,
+ * reset the pending_next_bit to 0.
+ */
+ __this_cpu_write(pending_next_bit, 0);

+ if (pending)
wakeup_softirqd();
+ else if (!__softirq_needs_break(start) && --max_restart) {
+ pending = local_softirq_pending();
+ if (pending)
+ goto restart;
}

lockdep_softirq_end(in_hardirq);
--
1.8.3.1