[ANNOUNCE] v5.9-rc4-rt5

From: Sebastian Andrzej Siewior
Date: Tue Sep 08 2020 - 14:14:03 EST


Dear RT folks!

I'm pleased to announce the v5.9-rc4-rt5 patch set.

Changes since v5.9-rc4-rt4:

- A queuing discipline other than pfifo_fast led to a lockdep splat.
Reported by Mike Galbraith.

- The series "seqlock: Introduce PREEMPT_RT support" by Ahmed S.
Darwis has been updated to v2.

- Added the series "seqlock: Introduce seqcount_latch_t" by Ahmed S.
Darwis.

Known issues
- It has been pointed out that due to changes to the printk code the
internal buffer representation changed. This is only an issue if tools
like `crash' are used to extract the printk buffer from a kernel memory
image.

The delta patch against v5.9-rc4-rt4 is appended below and can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/incr/patch-5.9-rc4-rt4-rt5.patch.xz

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.9-rc4-rt5

The RT patch against v5.9-rc4 can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/older/patch-5.9-rc4-rt5.patch.xz

The split quilt queue is available at:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/older/patches-5.9-rc4-rt5.tar.xz

Sebastian

diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
index 62c5ad98c11ca..a334b584f2b34 100644
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@@ -139,6 +139,24 @@ with the associated LOCKTYPE lock acquired.

Read path: same as in :ref:`seqcount_t`.

+
+.. _seqcount_latch_t:
+
+Latch sequence counters (``seqcount_latch_t``)
+----------------------------------------------
+
+Latch sequence counters are a multiversion concurrency control mechanism
+where the embedded seqcount_t counter even/odd value is used to switch
+between two copies of protected data. This allows the sequence counter
+read path to safely interrupt its own write side critical section.
+
+Use seqcount_latch_t when the write side sections cannot be protected
+from interruption by readers. This is typically the case when the read
+side can be invoked from NMI handlers.
+
+Check `raw_write_seqcount_latch()` for more information.
+
+
.. _seqlock_t:

Sequential locks (``seqlock_t``)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 49d925043171a..f70dffc2771f5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -54,7 +54,7 @@ struct clocksource *art_related_clocksource;

struct cyc2ns {
struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */
- seqcount_t seq; /* 32 + 4 = 36 */
+ seqcount_latch_t seq; /* 32 + 4 = 36 */

}; /* fits one cacheline */

@@ -73,14 +73,14 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
preempt_disable_notrace();

do {
- seq = this_cpu_read(cyc2ns.seq.sequence);
+ seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
idx = seq & 1;

data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);

- } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
+ } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}

__always_inline void cyc2ns_read_end(void)
@@ -186,7 +186,7 @@ static void __init cyc2ns_init_boot_cpu(void)
{
struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);

- seqcount_init(&c2n->seq);
+ seqcount_latch_init(&c2n->seq);
__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
}

@@ -203,7 +203,7 @@ static void __init cyc2ns_init_secondary_cpus(void)

for_each_possible_cpu(cpu) {
if (cpu != this_cpu) {
- seqcount_init(&c2n->seq);
+ seqcount_latch_init(&c2n->seq);
c2n = per_cpu_ptr(&cyc2ns, cpu);
c2n->data[0] = data[0];
c2n->data[1] = data[1];
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index 7d012faa509a4..3d1a9e716b803 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -42,8 +42,8 @@ struct latch_tree_node {
};

struct latch_tree_root {
- seqcount_t seq;
- struct rb_root tree[2];
+ seqcount_latch_t seq;
+ struct rb_root tree[2];
};

/**
@@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root,
do {
seq = raw_read_seqcount_latch(&root->seq);
node = __lt_find(key, root, seq & 1, ops->comp);
- } while (read_seqcount_retry(&root->seq, seq));
+ } while (read_seqcount_latch_retry(&root->seq, seq));

return node;
}
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 75e3a73e27567..f73c7eb68f27c 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -54,7 +54,7 @@
*
* If the write serialization mechanism is one of the common kernel
* locking primitives, use a sequence counter with associated lock
- * (seqcount_LOCKTYPE_t) instead.
+ * (seqcount_LOCKNAME_t) instead.
*
* If it's desired to automatically handle the sequence counter writer
* serialization and non-preemptibility requirements, use a sequential
@@ -118,7 +118,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }

/*
- * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
+ * Sequence counters with associated locks (seqcount_LOCKNAME_t)
*
* A sequence counter which associates the lock used for writer
* serialization at initialization time. This enables lockdep to validate
@@ -133,19 +133,19 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
*/

/*
- * For PREEMPT_RT, seqcount_LOCKTYPE_t write side critical sections cannot
- * disable preemption. It can lead to higher latencies and the write side
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
* sections will not be able to acquire locks which become sleeping locks
- * in RT (e.g. spinlock_t).
+ * (e.g. spinlock_t).
*
- * To remain preemptible while avoiding a possible livelock caused by a
- * reader preempting the write section, use a different technique: detect
- * if a seqcount_LOCKTYPE_t writer is in progress. If that is the case,
- * acquire then release the associated LOCKTYPE writer serialization
- * lock. This will force any possibly preempted writer to make progress
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
* until the end of its writer serialization lock critical section.
*
- * This lock-unlock technique must be implemented for all PREEMPT_RT
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
* sleeping locks. See Documentation/locking/locktypes.rst
*/
#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
@@ -155,54 +155,56 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
#endif

/**
- * typedef seqcount_LOCKTYPE_t - sequence counter with associated lock
+ * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
* @seqcount: The real sequence counter
- * @lock: Pointer to the associated spinlock
+ * @lock: Pointer to the associated lock
*
* A plain sequence counter with external writer synchronization by
- * LOCKTYPE @lock. The lock is associated to the sequence counter in the
+ * LOCKNAME @lock. The lock is associated to the sequence counter in the
* static initializer or init function. This enables lockdep to validate
* that the write side critical section is properly serialized.
*
- * LOCKTYPE: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
- */
-
-/**
- * seqcount_LOCKTYPE_init() - runtime initializer for seqcount_LOCKTYPE_t
- * @s: Pointer to the seqcount_LOCKTYPE_t instance
- * @lock: Pointer to the associated LOCKTYPE
+ * LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
*/

/*
- * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKTYPE_t and helpers
- * @locktype: "LOCKTYPE" part of seqcount_LOCKTYPE_t
- * @locktype_t: canonical/full LOCKTYPE C data type
+ * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
+ * @s: Pointer to the seqcount_LOCKNAME_t instance
+ * @lock: Pointer to the associated lock
+ */
+
+/*
+ * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
+ * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t
+ *
+ * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t
+ * @locktype: LOCKNAME canonical C data type
* @preemptible: preemptibility of above locktype
* @lockmember: argument for lockdep_assert_held()
* @lockbase: associated lock release function (prefix only)
* @lock_acquire: associated lock acquisition function (full call)
*/
-#define SEQCOUNT_LOCKTYPE(locktype, locktype_t, preemptible, lockmember, lockbase, lock_acquire) \
-typedef struct seqcount_##locktype { \
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
+typedef struct seqcount_##lockname { \
seqcount_t seqcount; \
- __SEQ_LOCK(locktype_t *lock); \
-} seqcount_##locktype##_t; \
+ __SEQ_LOCK(locktype *lock); \
+} seqcount_##lockname##_t; \
\
static __always_inline void \
-seqcount_##locktype##_init(seqcount_##locktype##_t *s, locktype_t *lock)\
+seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock) \
{ \
seqcount_init(&s->seqcount); \
__SEQ_LOCK(s->lock = lock); \
} \
\
static __always_inline seqcount_t * \
-__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s) \
+__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \
{ \
return &s->seqcount; \
} \
\
static __always_inline unsigned \
-__seqcount_##locktype##_sequence(const seqcount_##locktype##_t *s) \
+__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \
{ \
unsigned seq = READ_ONCE(s->seqcount.sequence); \
\
@@ -211,7 +213,7 @@ __seqcount_##locktype##_sequence(const seqcount_##locktype##_t *s) \
\
if (preemptible && unlikely(seq & 1)) { \
__SEQ_LOCK(lock_acquire); \
- __SEQ_LOCK(lockbase##_unlock((void *) s->lock)); \
+ __SEQ_LOCK(lockbase##_unlock(s->lock)); \
\
/* \
* Re-read the sequence counter since the (possibly \
@@ -224,13 +226,17 @@ __seqcount_##locktype##_sequence(const seqcount_##locktype##_t *s) \
} \
\
static __always_inline bool \
-__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s) \
+__seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \
{ \
- return preemptible; \
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
+ return preemptible; \
+ \
+ /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \
+ return false; \
} \
\
static __always_inline void \
-__seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \
+__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \
{ \
__SEQ_LOCK(lockdep_assert_held(lockmember)); \
}
@@ -239,56 +245,56 @@ __seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \
* __seqprop() for seqcount_t
*/

-static inline seqcount_t *__seqcount_t_ptr(seqcount_t *s)
+static inline seqcount_t *__seqprop_ptr(seqcount_t *s)
{
return s;
}

-static inline unsigned __seqcount_t_sequence(seqcount_t *s)
+static inline unsigned __seqprop_sequence(const seqcount_t *s)
{
return READ_ONCE(s->sequence);
}

-static inline bool __seqcount_t_preemptible(seqcount_t *s)
+static inline bool __seqprop_preemptible(const seqcount_t *s)
{
return false;
}

-static inline void __seqcount_t_assert(seqcount_t *s)
+static inline void __seqprop_assert(const seqcount_t *s)
{
lockdep_assert_preemption_disabled();
}

#define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT)

-SEQCOUNT_LOCKTYPE(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock))
-SEQCOUNT_LOCKTYPE(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock))
-SEQCOUNT_LOCKTYPE(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock))
-SEQCOUNT_LOCKTYPE(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock))
-SEQCOUNT_LOCKTYPE(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock))
+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock))
+SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))

-/**
- * SEQCNT_LOCKTYPE_ZERO - static initializer for seqcount_LOCKTYPE_t
- * @name: Name of the seqcount_LOCKTYPE_t instance
- * @lock: Pointer to the associated LOCKTYPE
+/*
+ * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
+ * @name: Name of the seqcount_LOCKNAME_t instance
+ * @lock: Pointer to the associated LOCKNAME
*/

-#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \
+#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \
.seqcount = SEQCNT_ZERO(seq_name.seqcount), \
__SEQ_LOCK(.lock = (assoc_lock)) \
}

-#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)

-#define __seqprop_case(s, locktype, prop) \
- seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s))
+#define __seqprop_case(s, lockname, prop) \
+ seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s))

#define __seqprop(s, prop) _Generic(*(s), \
- seqcount_t: __seqcount_t_##prop((void *)(s)), \
+ seqcount_t: __seqprop_##prop((void *)(s)), \
__seqprop_case((s), raw_spinlock, prop), \
__seqprop_case((s), spinlock, prop), \
__seqprop_case((s), rwlock, prop), \
@@ -302,7 +308,7 @@ SEQCOUNT_LOCKTYPE(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu

/**
* __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -316,36 +322,32 @@ SEQCOUNT_LOCKTYPE(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
*/
#define __read_seqcount_begin(s) \
({ \
- unsigned ret; \
+ unsigned seq; \
\
- while (true) { \
- ret = __seqcount_sequence(s); \
- if (likely(! (ret & 1))) \
- break; \
+ while ((seq = __seqcount_sequence(s)) & 1) \
cpu_relax(); \
- } \
\
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
- ret; \
+ seq; \
})

/**
* raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* Return: count to be passed to read_seqcount_retry()
*/
#define raw_read_seqcount_begin(s) \
({ \
- unsigned ret = __read_seqcount_begin(s); \
+ unsigned seq = __read_seqcount_begin(s); \
\
smp_rmb(); \
- ret; \
+ seq; \
})

/**
* read_seqcount_begin() - begin a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* Return: count to be passed to read_seqcount_retry()
*/
@@ -357,7 +359,7 @@ SEQCOUNT_LOCKTYPE(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu

/**
* raw_read_seqcount() - read the raw seqcount_t counter value
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* raw_read_seqcount opens a read critical section of the given
* seqcount_t, without any lockdep checking, and without checking or
@@ -368,17 +370,17 @@ SEQCOUNT_LOCKTYPE(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
*/
#define raw_read_seqcount(s) \
({ \
- unsigned ret = __seqcount_sequence(s); \
+ unsigned seq = __seqcount_sequence(s); \
\
smp_rmb(); \
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
- ret; \
+ seq; \
})

/**
* raw_seqcount_begin() - begin a seqcount_t read critical section w/o
* lockdep and w/o counter stabilization
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* raw_seqcount_begin opens a read critical section of the given
* seqcount_t. Unlike read_seqcount_begin(), this function will not wait
@@ -403,7 +405,7 @@ SEQCOUNT_LOCKTYPE(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu

/**
* __read_seqcount_retry() - end a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
* @start: count, from read_seqcount_begin()
*
* __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
@@ -427,7 +429,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)

/**
* read_seqcount_retry() - end a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
* @start: count, from read_seqcount_begin()
*
* read_seqcount_retry closes the read critical section of given
@@ -445,22 +447,13 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
return __read_seqcount_t_retry(s, start);
}

-/*
- * Automatically disable preemption for seqcount_LOCKTYPE_t writers, if the
- * associated lock does not implicitly disable preemption.
- *
- * Don't do it for PREEMPT_RT. Check __SEQ_LOCK() for rationale.
- */
-#define __seq_enforce_preemption_protection(s) \
- (!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
-
/**
* raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*/
#define raw_write_seqcount_begin(s) \
do { \
- if (__seq_enforce_preemption_protection(s)) \
+ if (__seqcount_lock_preemptible(s)) \
preempt_disable(); \
\
raw_write_seqcount_t_begin(__seqcount_ptr(s)); \
@@ -475,13 +468,13 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)

/**
* raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*/
#define raw_write_seqcount_end(s) \
do { \
raw_write_seqcount_t_end(__seqcount_ptr(s)); \
\
- if (__seq_enforce_preemption_protection(s)) \
+ if (__seqcount_lock_preemptible(s)) \
preempt_enable(); \
} while (0)

@@ -495,7 +488,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
/**
* write_seqcount_begin_nested() - start a seqcount_t write section with
* custom lockdep nesting level
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
* @subclass: lockdep nesting level
*
* See Documentation/locking/lockdep-design.rst
@@ -504,7 +497,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
do { \
__seqcount_assert_lock_held(s); \
\
- if (__seq_enforce_preemption_protection(s)) \
+ if (__seqcount_lock_preemptible(s)) \
preempt_disable(); \
\
write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \
@@ -518,7 +511,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)

/**
* write_seqcount_begin() - start a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* write_seqcount_begin opens a write side critical section of the given
* seqcount_t.
@@ -531,7 +524,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
do { \
__seqcount_assert_lock_held(s); \
\
- if (__seq_enforce_preemption_protection(s)) \
+ if (__seqcount_lock_preemptible(s)) \
preempt_disable(); \
\
write_seqcount_t_begin(__seqcount_ptr(s)); \
@@ -544,7 +537,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)

/**
* write_seqcount_end() - end a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* The write section must've been opened with write_seqcount_begin().
*/
@@ -552,7 +545,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
do { \
write_seqcount_t_end(__seqcount_ptr(s)); \
\
- if (__seq_enforce_preemption_protection(s)) \
+ if (__seqcount_lock_preemptible(s)) \
preempt_enable(); \
} while (0)

@@ -564,7 +557,7 @@ static inline void write_seqcount_t_end(seqcount_t *s)

/**
* raw_write_seqcount_barrier() - do a seqcount_t write barrier
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* This can be used to provide an ordering guarantee instead of the usual
* consistency guarantee. It is one wmb cheaper, because it can collapse
@@ -618,7 +611,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
/**
* write_seqcount_invalidate() - invalidate in-progress seqcount_t read
* side operations
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
* After write_seqcount_invalidate, no seqcount_t read side operations
* will complete successfully and see data older than this.
@@ -634,34 +627,73 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s)
kcsan_nestable_atomic_end();
}

-/**
- * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+/*
+ * Latch sequence counters (seqcount_latch_t)
*
- * Use seqcount_t latching to switch between two storage places protected
- * by a sequence counter. Doing so allows having interruptible, preemptible,
- * seqcount_t write side critical sections.
+ * A sequence counter variant where the counter even/odd value is used to
+ * switch between two copies of protected data. This allows the read path,
+ * typically NMIs, to safely interrupt the write side critical section.
*
- * Check raw_write_seqcount_latch() for more details and a full reader and
- * writer usage example.
- *
- * Return: sequence counter raw value. Use the lowest bit as an index for
- * picking which data copy to read. The full counter value must then be
- * checked with read_seqcount_retry().
+ * As the write sections are fully preemptible, no special handling for
+ * PREEMPT_RT is needed.
*/
-#define raw_read_seqcount_latch(s) \
- raw_read_seqcount_t_latch(__seqcount_ptr(s))
+typedef struct {
+ seqcount_t seqcount;
+} seqcount_latch_t;

-static inline int raw_read_seqcount_t_latch(seqcount_t *s)
-{
- /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
- int seq = READ_ONCE(s->sequence); /* ^^^ */
- return seq;
+/**
+ * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t
+ * @seq_name: Name of the seqcount_latch_t instance
+ */
+#define SEQCNT_LATCH_ZERO(seq_name) { \
+ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \
}

/**
- * raw_write_seqcount_latch() - redirect readers to even/odd copy
- * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
+ * seqcount_latch_init() - runtime initializer for seqcount_latch_t
+ * @s: Pointer to the seqcount_latch_t instance
+ */
+static inline void seqcount_latch_init(seqcount_latch_t *s)
+{
+ seqcount_init(&s->seqcount);
+}
+
+/**
+ * raw_read_seqcount_latch() - pick even/odd latch data copy
+ * @s: Pointer to seqcount_latch_t
+ *
+ * See raw_write_seqcount_latch() for details and a full reader/writer
+ * usage example.
+ *
+ * Return: sequence counter raw value. Use the lowest bit as an index for
+ * picking which data copy to read. The full counter must then be checked
+ * with read_seqcount_latch_retry().
+ */
+static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
+{
+ /*
+ * Pairs with the first smp_wmb() in raw_write_seqcount_latch().
+ * Due to the dependent load, a full smp_rmb() is not needed.
+ */
+ return READ_ONCE(s->seqcount.sequence);
+}
+
+/**
+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section
+ * @s: Pointer to seqcount_latch_t
+ * @start: count, from raw_read_seqcount_latch()
+ *
+ * Return: true if a read section retry is required, else false
+ */
+static inline int
+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
+{
+ return read_seqcount_retry(&s->seqcount, start);
+}
+
+/**
+ * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
+ * @s: Pointer to seqcount_latch_t
*
* The latch technique is a multiversion concurrency control method that allows
* queries during non-atomic modifications. If you can guarantee queries never
@@ -680,7 +712,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
* The basic form is a data structure like::
*
* struct latch_struct {
- * seqcount_t seq;
+ * seqcount_latch_t seq;
* struct data_struct data[2];
* };
*
@@ -690,13 +722,13 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
* void latch_modify(struct latch_struct *latch, ...)
* {
* smp_wmb(); // Ensure that the last data[1] update is visible
- * latch->seq++;
+ * latch->seq.sequence++;
* smp_wmb(); // Ensure that the seqcount update is visible
*
* modify(latch->data[0], ...);
*
* smp_wmb(); // Ensure that the data[0] update is visible
- * latch->seq++;
+ * latch->seq.sequence++;
* smp_wmb(); // Ensure that the seqcount update is visible
*
* modify(latch->data[1], ...);
@@ -715,8 +747,8 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
* idx = seq & 0x01;
* entry = data_query(latch->data[idx], ...);
*
- * // read_seqcount_retry() includes needed smp_rmb()
- * } while (read_seqcount_retry(&latch->seq, seq));
+ * // This includes needed smp_rmb()
+ * } while (read_seqcount_latch_retry(&latch->seq, seq));
*
* return entry;
* }
@@ -735,19 +767,16 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
* to miss an entire modification sequence, once it resumes it might
* observe the new entry.
*
- * NOTE:
+ * NOTE2:
*
* When data is a dynamic data structure; one should use regular RCU
* patterns to manage the lifetimes of the objects within.
*/
-#define raw_write_seqcount_latch(s) \
- raw_write_seqcount_t_latch(__seqcount_ptr(s))
-
-static inline void raw_write_seqcount_t_latch(seqcount_t *s)
+static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
{
- smp_wmb(); /* prior stores before incrementing "sequence" */
- s->sequence++;
- smp_wmb(); /* increment "sequence" before following stores */
+ smp_wmb(); /* prior stores before incrementing "sequence" */
+ s->seqcount.sequence++;
+ smp_wmb(); /* increment "sequence" before following stores */
}

/*
diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h
index 95a497a72e511..67710bace7418 100644
--- a/include/net/net_seq_lock.h
+++ b/include/net/net_seq_lock.h
@@ -6,15 +6,6 @@
# define net_seq_begin(__r) read_seqbegin(__r)
# define net_seq_retry(__r, __s) read_seqretry(__r, __s)

-static inline int try_write_seqlock(seqlock_t *sl)
-{
- if (spin_trylock(&sl->lock)) {
- write_seqcount_begin(&sl->seqcount);
- return 1;
- }
- return 0;
-}
-
#else
# define net_seqlock_t seqcount_t
# define net_seq_begin(__r) read_seqcount_begin(__r)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 796ac453d9519..40be4443b6bdb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -168,8 +168,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
return false;
}
#ifdef CONFIG_PREEMPT_RT
- if (try_write_seqlock(&qdisc->running))
+ if (spin_trylock(&qdisc->running.lock)) {
+ seqcount_t *s = &qdisc->running.seqcount.seqcount;
+ /*
+ * Variant of write_seqcount_t_begin() telling lockdep that a
+ * trylock was attempted.
+ */
+ raw_write_seqcount_t_begin(s);
+ seqcount_acquire(&s->dep_map, 0, 1, _RET_IP_);
return true;
+ }
return false;
#else
/* Variant of write_seqcount_begin() telling lockdep a trylock
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 1c03eec6ca9b9..0642013dace49 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -35,7 +35,7 @@
* into a single 64-byte cache line.
*/
struct clock_data {
- seqcount_t seq;
+ seqcount_latch_t seq;
struct clock_read_data read_data[2];
ktime_t wrap_kt;
unsigned long rate;
@@ -76,7 +76,7 @@ struct clock_read_data *sched_clock_read_begin(unsigned int *seq)

int sched_clock_read_retry(unsigned int seq)
{
- return read_seqcount_retry(&cd.seq, seq);
+ return read_seqcount_latch_retry(&cd.seq, seq);
}

unsigned long long notrace sched_clock(void)
@@ -258,7 +258,7 @@ void __init generic_sched_clock_init(void)
*/
static u64 notrace suspended_sched_clock_read(void)
{
- unsigned int seq = raw_read_seqcount(&cd.seq);
+ unsigned int seq = raw_read_seqcount_latch(&cd.seq);

return cd.read_data[seq & 1].epoch_cyc;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4c47f388a83f1..999c981ae766f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -64,7 +64,7 @@ static struct timekeeper shadow_timekeeper;
* See @update_fast_timekeeper() below.
*/
struct tk_fast {
- seqcount_raw_spinlock_t seq;
+ seqcount_latch_t seq;
struct tk_read_base base[2];
};

@@ -81,13 +81,13 @@ static struct clocksource dummy_clock = {
};

static struct tk_fast tk_fast_mono ____cacheline_aligned = {
- .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
+ .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
.base[0] = { .clock = &dummy_clock, },
.base[1] = { .clock = &dummy_clock, },
};

static struct tk_fast tk_fast_raw ____cacheline_aligned = {
- .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
+ .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
.base[0] = { .clock = &dummy_clock, },
.base[1] = { .clock = &dummy_clock, },
};
@@ -467,7 +467,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
- } while (read_seqcount_retry(&tkf->seq, seq));
+ } while (read_seqcount_latch_retry(&tkf->seq, seq));

return now;
}
@@ -533,7 +533,7 @@ static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
- } while (read_seqcount_retry(&tkf->seq, seq));
+ } while (read_seqcount_latch_retry(&tkf->seq, seq));

return now;
}
diff --git a/localversion-rt b/localversion-rt
index ad3da1bcab7e8..0efe7ba1930e1 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt4
+-rt5
diff --git a/mm/swap.c b/mm/swap.c
index d16d65d9b4e09..a1ec807e325d1 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -763,10 +763,20 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
*/
void lru_add_drain_all(void)
{
- static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
- static DEFINE_MUTEX(lock);
+ /*
+ * lru_drain_gen - Global pages generation number
+ *
+ * (A) Definition: global lru_drain_gen = x implies that all generations
+ * 0 < n <= x are already *scheduled* for draining.
+ *
+ * This is an optimization for the highly-contended use case where a
+ * user space workload keeps constantly generating a flow of pages for
+ * each CPU.
+ */
+ static unsigned int lru_drain_gen;
static struct cpumask has_work;
- int cpu, seq;
+ static DEFINE_MUTEX(lock);
+ unsigned cpu, this_gen;

/*
* Make sure nobody triggers this path before mm_percpu_wq is fully
@@ -775,21 +785,54 @@ void lru_add_drain_all(void)
if (WARN_ON(!mm_percpu_wq))
return;

- seq = raw_read_seqcount_latch(&seqcount);
+ /*
+ * Guarantee pagevec counter stores visible by this CPU are visible to
+ * other CPUs before loading the current drain generation.
+ */
+ smp_mb();
+
+ /*
+ * (B) Locally cache global LRU draining generation number
+ *
+ * The read barrier ensures that the counter is loaded before the mutex
+ * is taken. It pairs with smp_mb() inside the mutex critical section
+ * at (D).
+ */
+ this_gen = smp_load_acquire(&lru_drain_gen);

mutex_lock(&lock);

/*
- * Piggyback on drain started and finished while we waited for lock:
- * all pages pended at the time of our enter were drained from vectors.
+ * (C) Exit the draining operation if a newer generation, from another
+ * lru_add_drain_all(), was already scheduled for draining. Check (A).
*/
- if (__read_seqcount_retry(&seqcount, seq))
+ if (unlikely(this_gen != lru_drain_gen))
goto done;

- raw_write_seqcount_latch(&seqcount);
+ /*
+ * (D) Increment global generation number
+ *
+ * Pairs with smp_load_acquire() at (B), outside of the critical
+ * section. Use a full memory barrier to guarantee that the new global
+ * drain generation number is stored before loading pagevec counters.
+ *
+ * This pairing must be done here, before the for_each_online_cpu loop
+ * below which drains the page vectors.
+ *
+ * Let x, y, and z represent some system CPU numbers, where x < y < z.
+ * Assume CPU #z is is in the middle of the for_each_online_cpu loop
+ * below and has already reached CPU #y's per-cpu data. CPU #x comes
+ * along, adds some pages to its per-cpu vectors, then calls
+ * lru_add_drain_all().
+ *
+ * If the paired barrier is done at any later step, e.g. after the
+ * loop, CPU #x will just exit at (C) and miss flushing out all of its
+ * added pages.
+ */
+ WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
+ smp_mb();

cpumask_clear(&has_work);
-
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);

@@ -801,7 +844,7 @@ void lru_add_drain_all(void)
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
- cpumask_set_cpu(cpu, &has_work);
+ __cpumask_set_cpu(cpu, &has_work);
}
}

@@ -816,7 +859,7 @@ void lru_add_drain_all(void)
{
lru_add_drain();
}
-#endif
+#endif /* CONFIG_SMP */

/**
* release_pages - batched put_page()