[ANNOUNCE] v5.15-rc7-rt15

From: Sebastian Andrzej Siewior
Date: Fri Oct 29 2021 - 04:34:20 EST


Dear RT folks!

I'm pleased to announce the v5.15-rc7-rt15 patch set.

Changes since v5.15-rc7-rt14:

- Avoid a sleeping-while-atomic warning in fscache. Reported by Gregor
Beck.

- Redo the synchronisation in fs/namespace and then remove cpu_chill()
since it has finally no users.

- Disable NUMA_BALANCING. It is problematic since it may block a task
while moving memory from one NUMA node to another. This has been
brought round by Mel Gorman while discussion a different issue.

- Update the i915 patches to v2 which has been posted upstream.
Testing on hardware is appreciated.

Known issues
- netconsole triggers WARN.

- The "Memory controller" (CONFIG_MEMCG) has been disabled.

- Valentin Schneider reported a few splats on ARM64, see
https://lkml.kernel.org/r/20210810134127.1394269-1-valentin.schneider@xxxxxxx

The delta patch against v5.15-rc7-rt14 is appended below and can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/incr/patch-5.15-rc7-rt14-rt15.patch.xz

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.15-rc7-rt15

The RT patch against v5.15-rc7 can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patch-5.15-rc7-rt15.patch.xz

The split quilt queue is available at:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15-rc7-rt15.tar.xz

Sebastian

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index bed60dff93eff..601274ba86e46 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -161,11 +161,10 @@ static inline void intel_context_enter(struct intel_context *ce)
ce->ops->enter(ce);
}

-static inline void intel_context_mark_active(struct intel_context *ce,
- bool timeline_mutex_needed)
+static inline void intel_context_mark_active(struct intel_context *ce)
{
- if (timeline_mutex_needed)
- lockdep_assert_held(&ce->timeline->mutex);
+ lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
+ test_bit(CONTEXT_IS_PARKED, &ce->flags));
++ce->active_count;
}

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e54351a170e2c..1022be795e682 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -112,6 +112,7 @@ struct intel_context {
#define CONTEXT_FORCE_SINGLE_SUBMISSION 7
#define CONTEXT_NOPREEMPT 8
#define CONTEXT_LRCA_DIRTY 9
+#define CONTEXT_IS_PARKED 10

struct {
u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 02c0ab9fbb4b8..74775ae961b2b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -42,7 +42,7 @@ heartbeat_create(struct intel_context *ce, gfp_t gfp)
struct i915_request *rq;

intel_context_enter(ce);
- rq = __i915_request_create(ce, gfp, true);
+ rq = __i915_request_create(ce, gfp);
intel_context_exit(ce);

return rq;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index d75638d1d561e..e84f03a276d18 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -167,9 +167,10 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* engine->wakeref.count, we may see the request completion and retire
* it causing an underflow of the engine->wakeref.
*/
+ set_bit(CONTEXT_IS_PARKED, &ce->flags);
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);

- rq = __i915_request_create(ce, GFP_NOWAIT, false);
+ rq = __i915_request_create(ce, GFP_NOWAIT);
if (IS_ERR(rq))
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
@@ -198,6 +199,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)

result = false;
out_unlock:
+ clear_bit(CONTEXT_IS_PARKED, &ce->flags);
return result;
}

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3bab8f651b4e7..b9dd6100c6d17 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -833,8 +833,7 @@ static void __i915_request_ctor(void *arg)
}

struct i915_request *
-__i915_request_create(struct intel_context *ce, gfp_t gfp,
- bool timeline_mutex_needed)
+__i915_request_create(struct intel_context *ce, gfp_t gfp)
{
struct intel_timeline *tl = ce->timeline;
struct i915_request *rq;
@@ -958,7 +957,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp,

rq->infix = rq->ring->emit; /* end of header; start of user payload */

- intel_context_mark_active(ce, timeline_mutex_needed);
+ intel_context_mark_active(ce);
list_add_tail_rcu(&rq->link, &tl->requests);

return rq;
@@ -994,7 +993,7 @@ i915_request_create(struct intel_context *ce)
i915_request_retire(rq);

intel_context_enter(ce);
- rq = __i915_request_create(ce, GFP_KERNEL, true);
+ rq = __i915_request_create(ce, GFP_KERNEL);
intel_context_exit(ce); /* active reference transferred to request */
if (IS_ERR(rq))
goto err_unlock;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index ba1ced79c8d2c..a2f713b4ac2f9 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -320,8 +320,7 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
struct kmem_cache *i915_request_slab_cache(void);

struct i915_request * __must_check
-__i915_request_create(struct intel_context *ce, gfp_t gfp,
- bool timeline_mutex_needed);
+__i915_request_create(struct intel_context *ce, gfp_t gfp);
struct i915_request * __must_check
i915_request_create(struct intel_context *ce);

@@ -610,7 +609,8 @@ i915_request_timeline(const struct i915_request *rq)
{
/* Valid only while the request is being constructed (or retired). */
return rcu_dereference_protected(rq->timeline,
- lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+ lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
+ test_bit(CONTEXT_IS_PARKED, &rq->context->flags));
}

static inline struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 5259edacde380..b36b27c090496 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -343,7 +343,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)

/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
#else
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
diff --git a/drivers/gpu/drm/selftests/test-drm_damage_helper.c b/drivers/gpu/drm/selftests/test-drm_damage_helper.c
index 1c19a5d3eefbf..8d8d8e214c283 100644
--- a/drivers/gpu/drm/selftests/test-drm_damage_helper.c
+++ b/drivers/gpu/drm/selftests/test-drm_damage_helper.c
@@ -30,6 +30,7 @@ static void mock_setup(struct drm_plane_state *state)
mock_device.driver = &mock_driver;
mock_device.mode_config.prop_fb_damage_clips = &mock_prop;
mock_plane.dev = &mock_device;
+ mock_obj_props.count = 0;
mock_plane.base.properties = &mock_obj_props;
mock_prop.base.id = 1; /* 0 is an invalid id */
mock_prop.dev = &mock_device;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index c3e4804b8fcbf..9edb87e11680b 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -81,7 +81,6 @@ extern unsigned fscache_debug;
extern struct kobject *fscache_root;
extern struct workqueue_struct *fscache_object_wq;
extern struct workqueue_struct *fscache_op_wq;
-DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);

extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);

diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 4207f98e405fd..85f8cf3a323d5 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -41,8 +41,6 @@ struct kobject *fscache_root;
struct workqueue_struct *fscache_object_wq;
struct workqueue_struct *fscache_op_wq;

-DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
-
/* these values serve as lower bounds, will be adjusted in fscache_init() */
static unsigned fscache_object_max_active = 4;
static unsigned fscache_op_max_active = 2;
@@ -138,7 +136,6 @@ unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
static int __init fscache_init(void)
{
unsigned int nr_cpus = num_possible_cpus();
- unsigned int cpu;
int ret;

fscache_object_max_active =
@@ -161,9 +158,6 @@ static int __init fscache_init(void)
if (!fscache_op_wq)
goto error_op_wq;

- for_each_possible_cpu(cpu)
- init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
-
ret = fscache_proc_init();
if (ret < 0)
goto error_proc;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 6a675652129b2..78f332f2e98c8 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -798,6 +798,8 @@ void fscache_object_destroy(struct fscache_object *object)
}
EXPORT_SYMBOL(fscache_object_destroy);

+static DECLARE_WAIT_QUEUE_HEAD(fscache_object_cong_wait);
+
/*
* enqueue an object for metadata-type processing
*/
@@ -806,12 +808,10 @@ void fscache_enqueue_object(struct fscache_object *object)
_enter("{OBJ%x}", object->debug_id);

if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
- wait_queue_head_t *cong_wq =
- &get_cpu_var(fscache_object_cong_wait);

if (queue_work(fscache_object_wq, &object->work)) {
if (fscache_object_congested())
- wake_up(cong_wq);
+ wake_up(&fscache_object_cong_wait);
} else
fscache_put_object(object, fscache_obj_put_queue);

@@ -833,16 +833,15 @@ void fscache_enqueue_object(struct fscache_object *object)
*/
bool fscache_object_sleep_till_congested(signed long *timeoutp)
{
- wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait);
DEFINE_WAIT(wait);

if (fscache_object_congested())
return true;

- add_wait_queue_exclusive(cong_wq, &wait);
+ add_wait_queue_exclusive(&fscache_object_cong_wait, &wait);
if (!fscache_object_congested())
*timeoutp = schedule_timeout(*timeoutp);
- finish_wait(cong_wq, &wait);
+ finish_wait(&fscache_object_cong_wait, &wait);

return fscache_object_congested();
}
diff --git a/fs/namespace.c b/fs/namespace.c
index c6e1e24be9408..3ab45b47b2860 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -14,7 +14,6 @@
#include <linux/mnt_namespace.h>
#include <linux/user_namespace.h>
#include <linux/namei.h>
-#include <linux/hrtimer.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/idr.h>
@@ -344,10 +343,23 @@ int __mnt_want_write(struct vfsmount *m)
* incremented count after it has set MNT_WRITE_HOLD.
*/
smp_mb();
+ might_lock(&mount_lock.lock);
while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
- preempt_enable();
- cpu_chill();
- preempt_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ cpu_relax();
+ } else {
+ /*
+ * This prevents priority inversion, if the task
+ * setting MNT_WRITE_HOLD got preempted on a remote
+ * CPU, and it prevents life lock if the task setting
+ * MNT_WRITE_HOLD has a lower priority and is bound to
+ * the same CPU as the task that is spinning here.
+ */
+ preempt_enable();
+ lock_mount_hash();
+ unlock_mount_hash();
+ preempt_disable();
+ }
}
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h
index 1e5399e47c3a5..c34a3e8030e12 100644
--- a/include/drm/drm_mode_object.h
+++ b/include/drm/drm_mode_object.h
@@ -60,7 +60,7 @@ struct drm_mode_object {
void (*free_cb)(struct kref *kref);
};

-#define DRM_OBJECT_MAX_PROPERTY 42
+#define DRM_OBJECT_MAX_PROPERTY 24
/**
* struct drm_object_properties - property tracking for &drm_mode_object
*/
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4a2230e409d2a..0ee140176f102 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -42,7 +42,6 @@ enum hrtimer_mode {
HRTIMER_MODE_PINNED = 0x02,
HRTIMER_MODE_SOFT = 0x04,
HRTIMER_MODE_HARD = 0x08,
- HRTIMER_MODE_CHILL = 0x10,

HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -125,7 +124,6 @@ struct hrtimer {
u8 is_rel;
u8 is_soft;
u8 is_hard;
- u8 is_chill;
};

/**
@@ -538,10 +536,4 @@ int hrtimers_dead_cpu(unsigned int cpu);
#define hrtimers_dead_cpu NULL
#endif

-#ifdef CONFIG_PREEMPT_RT
-extern void cpu_chill(void);
-#else
-# define cpu_chill() cpu_relax()
-#endif
-
#endif
diff --git a/init/Kconfig b/init/Kconfig
index 28fd7b8e8c7d6..0b8a65ae1d72f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -901,7 +901,7 @@ config NUMA_BALANCING
bool "Memory placement aware NUMA scheduler"
depends on ARCH_SUPPORTS_NUMA_BALANCING
depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- depends on SMP && NUMA && MIGRATION
+ depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
help
This option adds support for automatic NUMA aware memory/task placement.
The mechanism is quite primitive and is based on migrating memory when
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 295e065d27905..0ea8702eb5163 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1570,7 +1570,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
base += hrtimer_clockid_to_base(clock_id);
timer->is_soft = softtimer;
timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
- timer->is_chill = !!(mode & HRTIMER_MODE_CHILL);
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
}
@@ -1937,7 +1936,7 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)

t->task = NULL;
if (task)
- wake_up_state(task, timer->is_chill ? TASK_RTLOCK_WAIT : TASK_NORMAL);
+ wake_up_process(task);

return HRTIMER_NORESTART;
}
@@ -2155,34 +2154,6 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
}
#endif

-#ifdef CONFIG_PREEMPT_RT
-/*
- * Sleep for 1 ms in hope whoever holds what we want will let it go.
- */
-void cpu_chill(void)
-{
- unsigned int freeze_flag = current->flags & PF_NOFREEZE;
- ktime_t chill_time;
-
- local_irq_disable();
- current_save_and_set_rtlock_wait_state();
- local_irq_enable();
-
- chill_time = ktime_set(0, NSEC_PER_MSEC);
-
- current->flags |= PF_NOFREEZE;
- schedule_hrtimeout(&chill_time,
- HRTIMER_MODE_REL_HARD| HRTIMER_MODE_CHILL);
- if (!freeze_flag)
- current->flags &= ~PF_NOFREEZE;
-
- local_irq_disable();
- current_restore_rtlock_saved_state();
- local_irq_enable();
-}
-EXPORT_SYMBOL(cpu_chill);
-#endif
-
/*
* Functions related to boot-time initialization:
*/
diff --git a/localversion-rt b/localversion-rt
index 08b3e75841adc..18777ec0c27d4 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt14
+-rt15