[ANNOUNCE] v4.11.7-rt3

From: Sebastian Andrzej Siewior
Date: Tue Jun 27 2017 - 09:58:09 EST


Dear RT folks!

I'm pleased to announce the v4.11.7-rt3 patch set.

Changes since v4.11.7-rt2:

- Clearing a swap-slot took a sleeping lock in a preempt-disable
region. Fixed by dropping the preempt-disable region.

- The capability check code on arm64 took a mutex in a atomic section.
The backport of a few patches from upstream made this visible and
now the fix for this has been also backported.

- The removal of TASK_ALL in the last release uncovered a bug where we
mixed normal wake ups and wake ups made for waiters of sleeping
spinlock. Reported by Mike Galbraith.

- Lock stealing for RTMutex wasn't working in v4.11. Reported and
fixed by Mike Galbraith.

- Code now compiles for RT + !CONFIG_POSIX_TIMERS. Reported by kbuild
test robot.

Known issues
- CPU hotplug got a little better but can deadlock.

The delta patch against v4.11.7-rt2 is appended below and can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/incr/patch-4.11.7-rt2-rt3.patch.xz

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.11.7-rt3

The RT patch against v4.11.7 can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patch-4.11.7-rt3.patch.xz

The split quilt queue is available at:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.7-rt3.tar.xz

Sebastian
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -115,6 +115,7 @@ struct arm64_cpu_capabilities {

extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
+extern struct static_key_false arm64_const_caps_ready;

bool this_cpu_has_cap(unsigned int cap);

@@ -124,7 +125,7 @@ static inline bool cpu_have_feature(unsigned int num)
}

/* System capability check for constant caps */
-static inline bool cpus_have_const_cap(int num)
+static inline bool __cpus_have_const_cap(int num)
{
if (num >= ARM64_NCAPS)
return false;
@@ -138,6 +139,14 @@ static inline bool cpus_have_cap(unsigned int num)
return test_bit(num, cpu_hwcaps);
}

+static inline bool cpus_have_const_cap(int num)
+{
+ if (static_branch_likely(&arm64_const_caps_ready))
+ return __cpus_have_const_cap(num);
+ else
+ return cpus_have_cap(num);
+}
+
static inline void cpus_set_cap(unsigned int num)
{
if (num >= ARM64_NCAPS) {
@@ -145,7 +154,6 @@ static inline void cpus_set_cap(unsigned int num)
num, ARM64_NCAPS);
} else {
__set_bit(num, cpu_hwcaps);
- static_branch_enable(&cpu_hwcap_keys[num]);
}
}

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -24,6 +24,7 @@

#include <linux/types.h>
#include <linux/kvm_types.h>
+#include <asm/cpufeature.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
@@ -356,9 +357,12 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long vector_ptr)
{
/*
- * Call initialization code, and switch to the full blown
- * HYP code.
+ * Call initialization code, and switch to the full blown HYP code.
+ * If the cpucaps haven't been finalized yet, something has gone very
+ * wrong, and hyp will crash and burn when it uses any
+ * cpus_have_const_cap() wrapper.
*/
+ BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
}

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -975,8 +975,16 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
*/
void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
{
- for (; caps->matches; caps++)
- if (caps->enable && cpus_have_cap(caps->capability))
+ for (; caps->matches; caps++) {
+ unsigned int num = caps->capability;
+
+ if (!cpus_have_cap(num))
+ continue;
+
+ /* Ensure cpus_have_const_cap(num) works */
+ static_branch_enable(&cpu_hwcap_keys[num]);
+
+ if (caps->enable) {
/*
* Use stop_machine() as it schedules the work allowing
* us to modify PSTATE, instead of on_each_cpu() which
@@ -984,6 +992,8 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
* we return.
*/
stop_machine(caps->enable, NULL, cpu_online_mask);
+ }
+ }
}

/*
@@ -1086,6 +1096,14 @@ static void __init setup_feature_capabilities(void)
enable_cpu_capabilities(arm64_features);
}

+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+
+static void __init mark_const_caps_ready(void)
+{
+ static_branch_enable(&arm64_const_caps_ready);
+}
+
/*
* Check if the current CPU has a given feature capability.
* Should be called from non-preemptible context.
@@ -1112,6 +1130,7 @@ void __init setup_cpu_features(void)
/* Set the CPU feature capabilies */
setup_feature_capabilities();
enable_errata_workarounds();
+ mark_const_caps_ready();
setup_elf_hwcaps(arm64_elf_hwcaps);

if (system_supports_32bit_el0())
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -167,7 +167,7 @@ extern struct cred init_cred;
# define INIT_PERF_EVENTS(tsk)
#endif

-#ifdef CONFIG_PREEMPT_RT_BASE
+#if defined(CONFIG_POSIX_TIMERS) && defined(CONFIG_PREEMPT_RT_BASE)
# define INIT_TIMER_LIST .posix_timer_list = NULL,
#else
# define INIT_TIMER_LIST
diff --git a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -797,6 +797,7 @@ struct task_struct {
raw_spinlock_t pi_lock;

struct wake_q_node wake_q;
+ struct wake_q_node wake_q_sleeper;

#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task: */
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -46,8 +46,20 @@ static inline void wake_q_init(struct wake_q_head *head)
head->lastp = &head->first;
}

-extern void wake_q_add(struct wake_q_head *head,
- struct task_struct *task);
+extern void __wake_q_add(struct wake_q_head *head,
+ struct task_struct *task, bool sleeper);
+static inline void wake_q_add(struct wake_q_head *head,
+ struct task_struct *task)
+{
+ __wake_q_add(head, task, false);
+}
+
+static inline void wake_q_add_sleeper(struct wake_q_head *head,
+ struct task_struct *task)
+{
+ __wake_q_add(head, task, true);
+}
+
extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
static inline void wake_up_q(struct wake_q_head *head)
{
diff --git a/kernel/fork.c b/kernel/fork.c
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -575,6 +575,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;
+ tsk->wake_q_sleeper.next = NULL;

account_kernel_stack(tsk, 1);

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -236,26 +236,19 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#endif

-#define STEAL_NORMAL 0
-#define STEAL_LATERAL 1
-
/*
* Only use with rt_mutex_waiter_{less,equal}()
*/
-#define task_to_waiter(p) \
- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
+#define task_to_waiter(p) &(struct rt_mutex_waiter) \
+ { .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) }

static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- struct rt_mutex_waiter *right, int mode)
+ struct rt_mutex_waiter *right)
{
- if (mode == STEAL_NORMAL) {
- if (left->prio < right->prio)
- return 1;
- } else {
- if (left->prio <= right->prio)
- return 1;
- }
+ if (left->prio < right->prio)
+ return 1;
+
/*
* If both waiters have dl_prio(), we check the deadlines of the
* associated tasks.
@@ -287,6 +280,27 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
return 1;
}

+#define STEAL_NORMAL 0
+#define STEAL_LATERAL 1
+
+static inline int
+rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode)
+{
+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
+
+ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter))
+ return 1;
+
+ /*
+ * Note that RT tasks are excluded from lateral-steals
+ * to prevent the introduction of an unbounded latency.
+ */
+ if (mode == STEAL_NORMAL || rt_task(waiter->task))
+ return 0;
+
+ return rt_mutex_waiter_equal(waiter, top_waiter);
+}
+
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
@@ -298,7 +312,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
- if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+ if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
@@ -337,7 +351,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
- if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+ if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
@@ -847,6 +861,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* @task: The task which wants to acquire the lock
* @waiter: The waiter that is queued to the lock's wait tree if the
* callsite called task_blocked_on_lock(), otherwise NULL
+ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL)
*/
static int __try_to_take_rt_mutex(struct rt_mutex *lock,
struct task_struct *task,
@@ -886,14 +901,11 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock,
*/
if (waiter) {
/*
- * If waiter is not the highest priority waiter of
- * @lock, give up.
+ * If waiter is not the highest priority waiter of @lock,
+ * or its peer when lateral steal is allowed, give up.
*/
- if (waiter != rt_mutex_top_waiter(lock)) {
- /* XXX rt_mutex_waiter_less() ? */
+ if (!rt_mutex_steal(lock, waiter, mode))
return 0;
- }
-
/*
* We can acquire the lock. Remove the waiter from the
* lock waiters tree.
@@ -910,25 +922,12 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock,
* not need to be dequeued.
*/
if (rt_mutex_has_waiters(lock)) {
- struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
-
- if (task != pown)
- return 0;
-
/*
- * Note that RT tasks are excluded from lateral-steals
- * to prevent the introduction of an unbounded latency.
+ * If @task->prio is greater than the top waiter
+ * priority (kernel view), or equal to it when a
+ * lateral steal is forbidden, @task lost.
*/
- if (rt_task(task))
- mode = STEAL_NORMAL;
- /*
- * If @task->prio is greater than or equal to
- * the top waiter priority (kernel view),
- * @task lost.
- */
- if (!rt_mutex_waiter_less(task_to_waiter(task),
- rt_mutex_top_waiter(lock),
- mode))
+ if (!rt_mutex_steal(lock, task_to_waiter(task), mode))
return 0;
/*
* The current top waiter stays enqueued. We
@@ -1507,7 +1506,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
*/
preempt_disable();
if (waiter->savestate)
- wake_q_add(wake_sleeper_q, waiter->task);
+ wake_q_add_sleeper(wake_sleeper_q, waiter->task);
else
wake_q_add(wake_q, waiter->task);
raw_spin_unlock(&current->pi_lock);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -437,9 +437,15 @@ static bool set_nr_if_polling(struct task_struct *p)
#endif
#endif

-void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+void __wake_q_add(struct wake_q_head *head, struct task_struct *task,
+ bool sleeper)
{
- struct wake_q_node *node = &task->wake_q;
+ struct wake_q_node *node;
+
+ if (sleeper)
+ node = &task->wake_q_sleeper;
+ else
+ node = &task->wake_q;

/*
* Atomically grab the task, if ->wake_q is !nil already it means
@@ -468,12 +474,17 @@ void __wake_up_q(struct wake_q_head *head, bool sleeper)
while (node != WAKE_Q_TAIL) {
struct task_struct *task;

- task = container_of(node, struct task_struct, wake_q);
+ if (sleeper)
+ task = container_of(node, struct task_struct, wake_q_sleeper);
+ else
+ task = container_of(node, struct task_struct, wake_q);
BUG_ON(!task);
/* Task can safely be re-inserted now: */
node = node->next;
- task->wake_q.next = NULL;
-
+ if (sleeper)
+ task->wake_q_sleeper.next = NULL;
+ else
+ task->wake_q.next = NULL;
/*
* wake_up_process() implies a wmb() to pair with the queueing
* in wake_q_add() so as not to miss wakeups.
diff --git a/localversion-rt b/localversion-rt
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt2
+-rt3
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,11 +267,11 @@ int free_swap_slot(swp_entry_t entry)
{
struct swap_slots_cache *cache;

- cache = &get_cpu_var(swp_slots);
+ cache = raw_cpu_ptr(&swp_slots);
if (use_swap_slot_cache && cache->slots_ret) {
spin_lock_irq(&cache->free_lock);
/* Swap slots cache may be deactivated before acquiring lock */
- if (!use_swap_slot_cache) {
+ if (!use_swap_slot_cache || !cache->slots_ret) {
spin_unlock_irq(&cache->free_lock);
goto direct_free;
}
@@ -291,7 +291,6 @@ int free_swap_slot(swp_entry_t entry)
direct_free:
swapcache_free_entries(&entry, 1);
}
- put_cpu_var(swp_slots);

return 0;
}