[PATCH v4 04/13] task_isolation: Add task isolation hooks to arch-independent code

From: Alex Belits
Date: Wed Jul 22 2020 - 10:52:33 EST


This commit adds task isolation hooks as follows:

- __handle_domain_irq() and handle_domain_nmi() generate an
isolation warning for the local task

- irq_work_queue_on() generates an isolation warning for the remote
task being interrupted for irq_work (through
__smp_call_single_queue())

- generic_exec_single() generates a remote isolation warning for
the remote cpu being IPI'd (through __smp_call_single_queue())

- smp_call_function_many() generates a remote isolation warning for
the set of remote cpus being IPI'd (through
smp_call_function_many_cond())

- on_each_cpu_cond_mask() generates a remote isolation warning for
the set of remote cpus being IPI'd (through
smp_call_function_many_cond())

- __ttwu_queue_wakelist() generates a remote isolation warning for
the remote cpu being IPI'd (through __smp_call_single_queue())

- nmi_enter(), __context_tracking_exit(), __handle_domain_irq(),
handle_domain_nmi() and scheduler_ipi() clear low-level flags and
synchronize CPUs by calling task_isolation_kernel_enter()

Calls to task_isolation_remote() or task_isolation_interrupt() can
be placed in the platform-independent code like this when doing so
results in fewer lines of code changes, as for example is true of
the users of the arch_send_call_function_*() APIs. Or, they can be
placed in the per-architecture code when there are many callers,
as for example is true of the smp_send_reschedule() call.

A further cleanup might be to create an intermediate layer, so that
for example smp_send_reschedule() is a single generic function that
just calls arch_smp_send_reschedule(), allowing generic code to be
called every time smp_send_reschedule() is invoked. But for now, we
just update either callers or callees as makes most sense.

Calls to task_isolation_kernel_enter() are intended for early
kernel entry code. They may be called in platform-independent or
platform-specific code.

It may be possible to clean up low-level entry code and somehow
organize calls to task_isolation_kernel_enter() to avoid multiple
per-architecture or driver-specific calls to it. RCU initialization
may be a good reference point for those places in kernel
(task_isolation_kernel_enter() should precede it), however right now
it is not unified between architectures.

Signed-off-by: Chris Metcalf <cmetcalf@xxxxxxxxxxxx>
[abelits@xxxxxxxxxxx: adapted for kernel 5.8, added low-level flags handling]
Signed-off-by: Alex Belits <abelits@xxxxxxxxxxx>
---
include/linux/hardirq.h | 2 ++
include/linux/sched.h | 2 ++
kernel/context_tracking.c | 4 ++++
kernel/irq/irqdesc.c | 13 +++++++++++++
kernel/smp.c | 6 +++++-
5 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 03c9fece7d43..5aab1d0a580e 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -7,6 +7,7 @@
#include <linux/lockdep.h>
#include <linux/ftrace_irq.h>
#include <linux/vtime.h>
+#include <linux/isolation.h>
#include <asm/hardirq.h>

extern void synchronize_irq(unsigned int irq);
@@ -114,6 +115,7 @@ extern void rcu_nmi_exit(void);
#define nmi_enter() \
do { \
arch_nmi_enter(); \
+ task_isolation_kernel_enter(); \
printk_nmi_enter(); \
lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7fb7bb3fddaa..cacfa415dc59 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -32,6 +32,7 @@
#include <linux/posix-timers.h>
#include <linux/rseq.h>
#include <linux/kcsan.h>
+#include <linux/isolation.h>

/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -1743,6 +1744,7 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
#ifdef CONFIG_SMP
static __always_inline void scheduler_ipi(void)
{
+ task_isolation_kernel_enter();
/*
* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
* TIF_NEED_RESCHED remotely (for the first time) will also send
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 36a98c48aedc..481a722ddbce 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -21,6 +21,7 @@
#include <linux/hardirq.h>
#include <linux/export.h>
#include <linux/kprobes.h>
+#include <linux/isolation.h>

#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>
@@ -148,6 +149,8 @@ void noinstr __context_tracking_exit(enum ctx_state state)
if (!context_tracking_recursion_enter())
return;

+ task_isolation_kernel_enter();
+
if (__this_cpu_read(context_tracking.state) == state) {
if (__this_cpu_read(context_tracking.active)) {
/*
@@ -159,6 +162,7 @@ void noinstr __context_tracking_exit(enum ctx_state state)
instrumentation_begin();
vtime_user_exit(current);
trace_user_exit(0);
+ task_isolation_user_exit();
instrumentation_end();
}
}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1a7723604399..b351aac7732f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -16,6 +16,7 @@
#include <linux/bitmap.h>
#include <linux/irqdomain.h>
#include <linux/sysfs.h>
+#include <linux/isolation.h>

#include "internals.h"

@@ -669,6 +670,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
unsigned int irq = hwirq;
int ret = 0;

+ task_isolation_kernel_enter();
+
irq_enter();

#ifdef CONFIG_IRQ_DOMAIN
@@ -676,6 +679,10 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
irq = irq_find_mapping(domain, hwirq);
#endif

+ task_isolation_interrupt((irq == hwirq) ?
+ "irq %d (%s)" : "irq %d (%s hwirq %d)",
+ irq, domain ? domain->name : "", hwirq);
+
/*
* Some hardware gives randomly wrong interrupts. Rather
* than crashing, do something sensible.
@@ -710,6 +717,8 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
unsigned int irq;
int ret = 0;

+ task_isolation_kernel_enter();
+
/*
* NMI context needs to be setup earlier in order to deal with tracing.
*/
@@ -717,6 +726,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,

irq = irq_find_mapping(domain, hwirq);

+ task_isolation_interrupt((irq == hwirq) ?
+ "NMI irq %d (%s)" : "NMI irq %d (%s hwirq %d)",
+ irq, domain ? domain->name : "", hwirq);
+
/*
* ack_bad_irq is not NMI-safe, just report
* an invalid interrupt.
diff --git a/kernel/smp.c b/kernel/smp.c
index aa17eedff5be..6a6849783948 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -20,6 +20,7 @@
#include <linux/sched.h>
#include <linux/sched/idle.h>
#include <linux/hypervisor.h>
+#include <linux/isolation.h>

#include "smpboot.h"
#include "sched/smp.h"
@@ -146,8 +147,10 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
* locking and barrier primitives. Generic code isn't really
* equipped to do the right thing...
*/
- if (llist_add(node, &per_cpu(call_single_queue, cpu)))
+ if (llist_add(node, &per_cpu(call_single_queue, cpu))) {
+ task_isolation_remote(cpu, "IPI function");
send_call_function_single_ipi(cpu);
+ }
}

/*
@@ -545,6 +548,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
}

/* Send a message to all CPUs in the map */
+ task_isolation_remote_cpumask(cfd->cpumask_ipi, "IPI function");
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);

if (wait) {
--
2.26.2