[RFC 5/6] softirq: Add time accounting per-softirq type
From: Dmitry Safonov
Date: Thu Jan 18 2018 - 11:13:56 EST
Warning: not a merge-ready in any sense
As discussed, softirqs will be deferred or processed right away
according to how much time this type of softirq spent on CPU.
This will improve e.g. handling of net-rx softirqs during packet storm
and also give fair slice of cpu time for a userspace process to
serve incoming packages.
The time-based decision will work better than check of re-raised softirq
after processing previous one. Because the check might not work even
under softirq storm if softirqs are raised too slowly (e.g. because of hw).
Signed-off-by: Dmitry Safonov <dima@xxxxxxxxxx>
---
include/linux/hardirq.h | 2 +-
include/linux/vtime.h | 10 +++++-----
init/Kconfig | 10 ++++++++++
kernel/sched/cputime.c | 41 ++++++++++++++++++++++++++++++++++-------
kernel/sched/sched.h | 1 +
kernel/softirq.c | 16 ++++++++++++++--
6 files changed, 65 insertions(+), 15 deletions(-)
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 0fbbcdf0c178..8f42581ef38b 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -51,7 +51,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
- account_irq_exit_time(current); \
+ account_irq_exit_time(current, 0); \
preempt_count_sub(HARDIRQ_OFFSET); \
} while (0)
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index a26ed10a4eac..ebe140e2a84f 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -97,21 +97,21 @@ static inline void vtime_flush(struct task_struct *tsk) { }
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-extern void irqtime_account_irq(struct task_struct *tsk);
+extern void irqtime_account_irq(struct task_struct *tsk, u64 *si_times);
#else
-static inline void irqtime_account_irq(struct task_struct *tsk) { }
+static inline void irqtime_account_irq(struct task_struct *tsk, u64 *si_times) { }
#endif
static inline void account_irq_enter_time(struct task_struct *tsk)
{
vtime_account_irq_enter(tsk);
- irqtime_account_irq(tsk);
+ irqtime_account_irq(tsk, 0);
}
-static inline void account_irq_exit_time(struct task_struct *tsk)
+static inline void account_irq_exit_time(struct task_struct *tsk, u64 *si_times)
{
vtime_account_irq_exit(tsk);
- irqtime_account_irq(tsk);
+ irqtime_account_irq(tsk, si_times);
}
#endif /* _LINUX_KERNEL_VTIME_H */
diff --git a/init/Kconfig b/init/Kconfig
index a9a2e2c86671..9d09aa753299 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -387,6 +387,16 @@ config IRQ_TIME_ACCOUNTING
If in doubt, say N here.
+config FAIR_SOFTIRQ_SCHEDULE
+ bool "Fair schedule softirqs on process context"
+ depends on IRQ_TIME_ACCOUNTING
+ default n
+ help
+ Account softirq CPU time per softirq-type. Process pending softirq
+ on current context only if it'll be fair for the task.
+
+ If in doubt, say N here.
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
depends on MULTIUSER
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index faacba00a153..4da1df879c8a 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -34,35 +34,61 @@ void disable_sched_clock_irqtime(void)
sched_clock_irqtime = 0;
}
-static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
+static void __irqtime_account_delta(struct irqtime *irqtime, u64 delta,
enum cpu_usage_stat idx)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
- u64_stats_update_begin(&irqtime->sync);
cpustat[idx] += delta;
irqtime->total += delta;
irqtime->tick_delta += delta;
+}
+
+
+static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
+ enum cpu_usage_stat idx)
+{
+ u64_stats_update_begin(&irqtime->sync);
+ __irqtime_account_delta(irqtime, delta, idx);
u64_stats_update_end(&irqtime->sync);
}
-static void irqtime_account_softirq(struct irqtime *irqtime, s64 delta)
+static void irqtime_account_softirq(struct irqtime *irqtime, u64 *si_times, s64 delta)
{
+ unsigned i;
+
+ u64_stats_update_begin(&irqtime->sync);
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
- if (!current_is_ksoftirqd())
- irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
+ if (!IS_ENABLED(CONFIG_FAIR_SOFTIRQ_SCHEDULE)) {
+ if (!current_is_ksoftirqd())
+ __irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
+ goto out;
+ }
+
+ if (!si_times)
+ goto out;
+
+ for (i = 0; i < NR_SOFTIRQS; i++) {
+ if (servicing_softirq(i))
+ continue;
+ /* Account for ksoftirq thread only softirq time of it's type */
+ __irqtime_account_delta(irqtime, si_times[i], CPUTIME_SOFTIRQ);
+ irqtime->total_si[i] += si_times[i];
+ }
+out:
+ u64_stats_update_end(&irqtime->sync);
}
/*
* Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit.
*/
-void irqtime_account_irq(struct task_struct *curr)
+void irqtime_account_irq(struct task_struct *curr, u64 *si_times)
{
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
s64 delta;
@@ -76,9 +102,10 @@ void irqtime_account_irq(struct task_struct *curr)
irqtime->irq_start_time += delta;
if (hardirq_count()) {
+ WARN_ON_ONCE(si_times);
irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
} else if (in_serving_softirq()) {
- irqtime_account_softirq(irqtime, delta);
+ irqtime_account_softirq(irqtime, si_times, delta);
}
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b19552a212de..14e154c86dc5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2055,6 +2055,7 @@ struct irqtime {
u64 total;
u64 tick_delta;
u64 irq_start_time;
+ u64 total_si[NR_SOFTIRQS];
struct u64_stats_sync sync;
};
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fdde3788afba..516e31d3d5b4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -22,6 +22,7 @@
#include <linux/kthread.h>
#include <linux/rcupdate.h>
#include <linux/ftrace.h>
+#include <linux/sched/clock.h>
#include <linux/smp.h>
#include <linux/smpboot.h>
#include <linux/tick.h>
@@ -287,6 +288,14 @@ static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
#endif
+static inline u64 time_softirq(u64 start)
+{
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+ return local_clock() - start;
+#endif
+ return 0;
+}
+
asmlinkage __visible void __softirq_entry __do_softirq(__u32 mask)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
@@ -296,6 +305,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(__u32 mask)
bool in_hardirq;
__u32 pending;
int softirq_bit;
+ u64 si_times[NR_SOFTIRQS] = {0};
/*
* Mask out PF_MEMALLOC s current task context is borrowed for the
@@ -322,6 +332,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(__u32 mask)
while ((softirq_bit = ffs(pending))) {
unsigned int vec_nr;
int prev_count;
+ u64 start_time = time_softirq(0);
h += softirq_bit - 1;
@@ -341,6 +352,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(__u32 mask)
}
h++;
pending >>= softirq_bit;
+ si_times[vec_nr] += time_softirq(start_time);
}
rcu_bh_qs();
@@ -357,7 +369,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(__u32 mask)
}
lockdep_softirq_end(in_hardirq);
- account_irq_exit_time(current);
+ account_irq_exit_time(current, si_times);
__local_bh_enable(SOFTIRQ_OFFSET);
WARN_ON_ONCE(in_interrupt());
current_restore_flags(old_flags, PF_MEMALLOC);
@@ -449,7 +461,7 @@ void irq_exit(void)
#else
lockdep_assert_irqs_disabled();
#endif
- account_irq_exit_time(current);
+ account_irq_exit_time(current, 0);
preempt_count_sub(HARDIRQ_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
--
2.13.6