[ANNOUNCE] 3.18.7-rt2
From: Sebastian Andrzej Siewior
Date: Mon Feb 23 2015 - 04:06:30 EST
Dear RT folks!
I'm pleased to announce the v3.18.7-rt2 patch set.
There are still patches to collect but I had this tested for a couple of
days, got big and did not want to sit on it any longer.
Changes since v3.18.7-rt1
- a patch for sched/deadline to get it work in RT (Juri Lelli)
- a patch for proper lockdep annotation for the ww-mutex (Mike Galbraith)
- a patch for AIO to avoid "sleeping while atomic". Reported by Mike
Galbraith, fix suggested by Benjamin LaHaise
- a patch to use cmpxchg() on armv6+ in the locking path on ARM which
actually improves performance (from Yong Zhang, resent by Nathan
Sullivan)
- a patch to avoid "sleeping while atomic" on ARM while accessing kernel
memory (Yadi Hu)
- a patch to avoid a race in futex code (on ARM) which may results that
the unlock-er still owns the lock.
- a patch for memcontrol / cgroup to avoid "sleeping while atomic".
Reported by Nikita Yushchenko, stuffed by Mike Galbraith
- a patch to avoid NULL-pointer dereference if a rtmutex is locked by
the owner (again)
- a patch to avoid a crashes and get RT working on MIPS platforms with
dcache aliasing (Yang Shi)
- a patch to properly use the rtmutex deadlock detector in ww-mutex
which seems to cure a nouveau deadlock (Gustavo Bittencourt)
- a patch to properly check for a waiter in rtmutex instead
dereferencing an invalid pointer (Brad Mouring)
- a patch to not disable interrupts in the sas_ata on -RT (Paul
Gortmaker)
- a patch for better support of the UV x86 platform (Mike Galbraith)
- a patch not to wake up the mcework queue if it has not yet been
initialized (Paul Gortmaker)
Known issues:
- bcache is disabled.
- lazy preempt on x86_64 leads to a crash with some load.
- CPU hotplug works in general. Steven's test script however
deadlocks usually on the second invocation.
- xor / raid_pq
I had max latency jumping up to 67563us on one CPU while the next
lower max was 58us. I tracked it down to module's init code of
xor and raid_pq. Both disable preemption while measuring the
measuring the performance of the individual implementation.
- cpufreq deadlocks
the issue has been identified and will be fixed in the next -RT
release
The delta patch against 3.18.7-rt1 is appended below and can be found here:
https://www.kernel.org/pub/linux/kernel/projects/rt/3.18/incr/patch-3.18.7-rt1-rt2.patch.xz
The RT patch against 3.18.7 can be found here:
https://www.kernel.org/pub/linux/kernel/projects/rt/3.18/patch-3.18.7-rt2.patch.xz
The split quilt queue is available at:
https://www.kernel.org/pub/linux/kernel/projects/rt/3.18/patches-3.18.7-rt2.tar.xz
Sebastian
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index abb2c3769b01..2386e9745ba4 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -129,6 +129,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
#else /* min ARCH >= ARMv6 */
+#define __HAVE_ARCH_CMPXCHG 1
+
extern void __bad_cmpxchg(volatile void *ptr, int size);
/*
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 53e69dae796f..9d861f9f215b 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -93,6 +93,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
+ preempt_disable_rt();
+
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
"1: " TUSER(ldr) " %1, [%4]\n"
" teq %1, %2\n"
@@ -104,6 +106,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");
*uval = val;
+
+ preempt_enable_rt();
return ret;
}
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index b40d4bab8e07..c15d2a0826c6 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -431,6 +431,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
if (user_mode(regs))
goto bad_area;
@@ -498,6 +501,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
do_bad_area(addr, fsr, regs);
return 0;
}
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index f42e35e42790..e09dae6ce80d 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -90,7 +90,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
BUG_ON(Page_dcache_dirty(page));
- pagefault_disable();
+ raw_pagefault_disable();
idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
idx += in_interrupt() ? FIX_N_COLOURS : 0;
vaddr = __fix_to_virt(FIX_CMAP_END - idx);
@@ -146,7 +146,7 @@ void kunmap_coherent(void)
tlbw_use_hazard();
write_c0_entryhi(old_ctx);
local_irq_restore(flags);
- pagefault_enable();
+ raw_pagefault_enable();
}
void copy_user_highpage(struct page *to, struct page *from,
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 2d60a7813dfe..ddc4c9e3d09d 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -615,9 +615,9 @@ struct bau_control {
cycles_t send_message;
cycles_t period_end;
cycles_t period_time;
- spinlock_t uvhub_lock;
- spinlock_t queue_lock;
- spinlock_t disable_lock;
+ raw_spinlock_t uvhub_lock;
+ raw_spinlock_t queue_lock;
+ raw_spinlock_t disable_lock;
/* tunables */
int max_concurr;
int max_concurr_const;
@@ -776,15 +776,15 @@ static inline int atom_asr(short i, struct atomic_short *v)
* to be lowered below the current 'v'. atomic_add_unless can only stop
* on equal.
*/
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
+static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
{
- spin_lock(lock);
+ raw_spin_lock(lock);
if (atomic_read(v) >= u) {
- spin_unlock(lock);
+ raw_spin_unlock(lock);
return 0;
}
atomic_inc(v);
- spin_unlock(lock);
+ raw_spin_unlock(lock);
return 1;
}
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a00ad8f2a657..c2729abe02bc 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -492,7 +492,7 @@ struct uv_blade_info {
unsigned short nr_online_cpus;
unsigned short pnode;
short memory_nid;
- spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
+ raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
};
extern struct uv_blade_info *uv_blade_info;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8e9dcfd630e4..2cfedcae31b9 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -918,7 +918,7 @@ void __init uv_system_init(void)
uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
- spin_lock_init(&uv_blade_info[blade].nmi_lock);
+ raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
min_pnode = min(pnode, min_pnode);
max_pnode = max(pnode, max_pnode);
blade++;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c23856109466..718fc6cc6c64 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1411,6 +1411,11 @@ static int mce_notify_work_init(void)
static void mce_notify_work(void)
{
+ if (WARN_ON_ONCE(!mce_notify_helper)) {
+ pr_info(HW_ERR "Machine check event before MCE init; ignored\n");
+ return;
+ }
+
wake_up_process(mce_notify_helper);
}
#else
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 3968d67d366b..7d444650bdd4 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -714,9 +714,9 @@ static void destination_plugged(struct bau_desc *bau_desc,
quiesce_local_uvhub(hmaster);
- spin_lock(&hmaster->queue_lock);
+ raw_spin_lock(&hmaster->queue_lock);
reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
+ raw_spin_unlock(&hmaster->queue_lock);
end_uvhub_quiesce(hmaster);
@@ -736,9 +736,9 @@ static void destination_timeout(struct bau_desc *bau_desc,
quiesce_local_uvhub(hmaster);
- spin_lock(&hmaster->queue_lock);
+ raw_spin_lock(&hmaster->queue_lock);
reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
+ raw_spin_unlock(&hmaster->queue_lock);
end_uvhub_quiesce(hmaster);
@@ -759,7 +759,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
cycles_t tm1;
hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
+ raw_spin_lock(&hmaster->disable_lock);
if (!bcp->baudisabled) {
stat->s_bau_disabled++;
tm1 = get_cycles();
@@ -772,7 +772,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
}
}
}
- spin_unlock(&hmaster->disable_lock);
+ raw_spin_unlock(&hmaster->disable_lock);
}
static void count_max_concurr(int stat, struct bau_control *bcp,
@@ -835,7 +835,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
*/
static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
{
- spinlock_t *lock = &hmaster->uvhub_lock;
+ raw_spinlock_t *lock = &hmaster->uvhub_lock;
atomic_t *v;
v = &hmaster->active_descriptor_count;
@@ -968,7 +968,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
struct bau_control *hmaster;
hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
+ raw_spin_lock(&hmaster->disable_lock);
if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
stat->s_bau_reenabled++;
for_each_present_cpu(tcpu) {
@@ -980,10 +980,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
tbcp->period_giveups = 0;
}
}
- spin_unlock(&hmaster->disable_lock);
+ raw_spin_unlock(&hmaster->disable_lock);
return 0;
}
- spin_unlock(&hmaster->disable_lock);
+ raw_spin_unlock(&hmaster->disable_lock);
return -1;
}
@@ -1899,9 +1899,9 @@ static void __init init_per_cpu_tunables(void)
bcp->cong_reps = congested_reps;
bcp->disabled_period = sec_2_cycles(disabled_period);
bcp->giveup_limit = giveup_limit;
- spin_lock_init(&bcp->queue_lock);
- spin_lock_init(&bcp->uvhub_lock);
- spin_lock_init(&bcp->disable_lock);
+ raw_spin_lock_init(&bcp->queue_lock);
+ raw_spin_lock_init(&bcp->uvhub_lock);
+ raw_spin_lock_init(&bcp->disable_lock);
}
}
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index a244237f3cfa..a718fe0d2e73 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
/* There is one of these allocated per node */
struct uv_rtc_timer_head {
- spinlock_t lock;
+ raw_spinlock_t lock;
/* next cpu waiting for timer, local node relative: */
int next_cpu;
/* number of cpus on this node: */
@@ -178,7 +178,7 @@ static __init int uv_rtc_allocate_timers(void)
uv_rtc_deallocate_timers();
return -ENOMEM;
}
- spin_lock_init(&head->lock);
+ raw_spin_lock_init(&head->lock);
head->ncpus = uv_blade_nr_possible_cpus(bid);
head->next_cpu = -1;
blade_info[bid] = head;
@@ -232,7 +232,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
unsigned long flags;
int next_cpu;
- spin_lock_irqsave(&head->lock, flags);
+ raw_spin_lock_irqsave(&head->lock, flags);
next_cpu = head->next_cpu;
*t = expires;
@@ -244,12 +244,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
if (uv_setup_intr(cpu, expires)) {
*t = ULLONG_MAX;
uv_rtc_find_next_timer(head, pnode);
- spin_unlock_irqrestore(&head->lock, flags);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
return -ETIME;
}
}
- spin_unlock_irqrestore(&head->lock, flags);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
return 0;
}
@@ -268,7 +268,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
unsigned long flags;
int rc = 0;
- spin_lock_irqsave(&head->lock, flags);
+ raw_spin_lock_irqsave(&head->lock, flags);
if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
rc = 1;
@@ -280,7 +280,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
uv_rtc_find_next_timer(head, pnode);
}
- spin_unlock_irqrestore(&head->lock, flags);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
return rc;
}
@@ -300,13 +300,18 @@ static int uv_rtc_unset_timer(int cpu, int force)
static cycle_t uv_read_rtc(struct clocksource *cs)
{
unsigned long offset;
+ cycle_t cycles;
+ preempt_disable();
if (uv_get_min_hub_revision_id() == 1)
offset = 0;
else
offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
- return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
+ cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
+ preempt_enable();
+
+ return cycles;
}
/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index dce02cef145c..ec679b2c2229 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1272,9 +1272,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
if (list_empty(&plug->mq_list))
trace_block_plug(q);
else if (request_count >= BLK_MAX_REQUEST_COUNT) {
- spin_unlock(&data.ctx->cpu_lock);
blk_flush_plug_list(plug, false);
- spin_lock(&data.ctx->cpu_lock);
trace_block_plug(q);
}
list_add_tail(&rq->queuelist, &plug->mq_list);
@@ -1470,7 +1468,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
blk_mq_hctx_clear_pending(hctx, ctx);
}
spin_unlock(&ctx->lock);
- __blk_mq_put_ctx(ctx);
if (list_empty(&tmp))
return NOTIFY_OK;
@@ -1680,7 +1677,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
memset(__ctx, 0, sizeof(*__ctx));
__ctx->cpu = i;
spin_lock_init(&__ctx->lock);
- spin_lock_init(&__ctx->cpu_lock);
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d0d4780ddfb6..d1d78dfe4123 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -9,7 +9,6 @@ struct blk_mq_ctx {
struct list_head rq_list;
} ____cacheline_aligned_in_smp;
- spinlock_t cpu_lock;
unsigned int cpu;
unsigned int index_hw;
@@ -77,7 +76,6 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
struct blk_mq_ctx *ctx;
ctx = per_cpu_ptr(q->queue_ctx, cpu);
- spin_lock(&ctx->cpu_lock);
return ctx;
}
@@ -92,14 +90,8 @@ static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
return __blk_mq_get_ctx(q, get_cpu_light());
}
-static void __blk_mq_put_ctx(struct blk_mq_ctx *ctx)
-{
- spin_unlock(&ctx->cpu_lock);
-}
-
static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
{
- __blk_mq_put_ctx(ctx);
put_cpu_light();
}
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 766098af4eb7..d79daca3b00b 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -191,7 +191,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
/* TODO: audit callers to ensure they are ready for qc_issue to
* unconditionally re-enable interrupts
*/
- local_irq_save(flags);
+ local_irq_save_nort(flags);
spin_unlock(ap->lock);
/* If the device fell off, no sense in issuing commands */
@@ -261,7 +261,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
out:
spin_lock(ap->lock);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
return ret;
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 5906c14a4e64..e257819480a9 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -360,7 +360,8 @@ int max_lock_depth = 1024;
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
{
- return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
+ return rt_mutex_real_waiter(p->pi_blocked_on) ?
+ p->pi_blocked_on->lock : NULL;
}
/*
@@ -1700,7 +1701,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
set_current_state(TASK_RUNNING);
if (unlikely(ret)) {
- remove_waiter(lock, &waiter);
+ if (rt_mutex_has_waiters(lock))
+ remove_waiter(lock, &waiter);
rt_mutex_handle_deadlock(ret, chwalk, &waiter);
} else if (ww_ctx) {
ww_mutex_account_lock(lock, ww_ctx);
@@ -2242,7 +2244,8 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_c
might_sleep();
mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
- ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL,
+ RT_MUTEX_FULL_CHAINWALK, ww_ctx);
if (ret)
mutex_release(&lock->base.dep_map, 1, _RET_IP_);
else if (!ret && ww_ctx->acquired > 1)
@@ -2260,7 +2263,8 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
might_sleep();
mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
- ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL,
+ RT_MUTEX_FULL_CHAINWALK, ww_ctx);
if (ret)
mutex_release(&lock->base.dep_map, 1, _RET_IP_);
else if (!ret && ww_ctx->acquired > 1)
diff --git a/localversion-rt b/localversion-rt
index 6f206be67cd2..c3054d08a112 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt1
+-rt2
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 498e8844a142..0c4538811188 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2379,14 +2379,17 @@ static void __init memcg_stock_init(void)
*/
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
+ struct memcg_stock_pcp *stock;
+ int cpu = get_cpu_light();
+
+ stock = &per_cpu(memcg_stock, cpu);
if (stock->cached != memcg) { /* reset if necessary */
drain_stock(stock);
stock->cached = memcg;
}
stock->nr_pages += nr_pages;
- put_cpu_var(memcg_stock);
+ put_cpu_light();
}
/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/