[ANNOUNCE] v4.16.8-rt3

From: Sebastian Andrzej Siewior
Date: Fri May 18 2018 - 04:23:35 EST


Dear RT folks!

I'm pleased to announce the v4.16.8-rt3 patch set.

Changes since v4.16.8-rt2:

- Drop two MCE related patches. They are no longer required since the
timer wheel rework.

- Replace the atomic_dec_lock_irqsave() patches with their
refcount_dec_and_lock_irqsave() counterparts.

- Make the sched/CFS timer fire in hard-irq context. The lock taken in
the timer callback needs to be taken with disabled interrupts. Patch
by Mike Galbraith.

- Added a local lock to squashfs' multi CPU decompressor in order to
avoid long preempt-disable sections. Reported by Alexander Stein,
patch by Julia Cartwright.

- A handful patches to the Inter-event patches which were collected by
Steven Rostedt.

- Add a locallock during saving/restoring of SIMD registers on ARM64.
On RT we are preemptible there so extra protection is required.

- Let softirq_count() return BH disable count. This should make the
workarounds, we currently have in queue, unnecessary.

Known issues
- A warning triggered in "rcu_note_context_switch" originated from
SyS_timer_gettime(). The issue was always there, it is now
visible. Reported by Grygorii Strashko and Daniel Wagner.

The delta patch against v4.16.8-rt2 is appended below and can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.16/incr/patch-4.16.8-rt2-rt3.patch.xz

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.16.8-rt3

The RT patch against v4.16.8 can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patch-4.16.8-rt3.patch.xz

The split quilt queue is available at:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.8-rt3.tar.xz

Sebastian

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e7226c4c7493..3a5cd1908874 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -38,6 +38,7 @@
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/locallock.h>

#include <asm/fpsimd.h>
#include <asm/cputype.h>
@@ -235,7 +236,7 @@ static void sve_user_enable(void)
* whether TIF_SVE is clear or set, since these are not vector length
* dependent.
*/
-
+static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock);
/*
* Update current's FPSIMD/SVE registers from thread_struct.
*
@@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_struct *task,
* non-SVE thread.
*/
if (task == current) {
+ local_lock(fpsimd_lock);
local_bh_disable();

task_fpsimd_save();
@@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_struct *task,
if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
sve_to_fpsimd(task);

- if (task == current)
+ if (task == current) {
+ local_unlock(fpsimd_lock);
local_bh_enable();
+ }

/*
* Force reallocation of task SVE state to the correct size
@@ -838,6 +842,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
sve_alloc(current);

local_bh_disable();
+ local_lock(fpsimd_lock);

task_fpsimd_save();
fpsimd_to_sve(current);
@@ -849,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */

+ local_unlock(fpsimd_lock);
local_bh_enable();
}

@@ -926,6 +932,7 @@ void fpsimd_flush_thread(void)
return;

local_bh_disable();
+ local_lock(fpsimd_lock);

memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
@@ -967,6 +974,7 @@ void fpsimd_flush_thread(void)

set_thread_flag(TIF_FOREIGN_FPSTATE);

+ local_unlock(fpsimd_lock);
local_bh_enable();
}

@@ -980,7 +988,9 @@ void fpsimd_preserve_current_state(void)
return;

local_bh_disable();
+ local_lock(fpsimd_lock);
task_fpsimd_save();
+ local_unlock(fpsimd_lock);
local_bh_enable();
}

@@ -1022,12 +1032,14 @@ void fpsimd_restore_current_state(void)
return;

local_bh_disable();
+ local_lock(fpsimd_lock);

if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
task_fpsimd_load();
fpsimd_bind_to_cpu();
}

+ local_unlock(fpsimd_lock);
local_bh_enable();
}

@@ -1042,6 +1054,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
return;

local_bh_disable();
+ local_lock(fpsimd_lock);

current->thread.fpsimd_state.user_fpsimd = *state;
if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -1052,6 +1065,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_bind_to_cpu();

+ local_unlock(fpsimd_lock);
local_bh_enable();
}

@@ -1116,6 +1130,7 @@ void kernel_neon_begin(void)
BUG_ON(!may_use_simd());

local_bh_disable();
+ local_lock(fpsimd_lock);

__this_cpu_write(kernel_neon_busy, true);

@@ -1128,6 +1143,7 @@ void kernel_neon_begin(void)
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();

+ local_unlock(fpsimd_lock);
preempt_disable();

local_bh_enable();
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 4812ee9c7896..97685a0c3175 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -14,7 +14,6 @@
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/poll.h>
-#include <linux/swork.h>

#include "mce-internal.h"

@@ -87,43 +86,13 @@ static void mce_do_trigger(struct work_struct *work)

static DECLARE_WORK(mce_trigger_work, mce_do_trigger);

-static void __mce_work_trigger(struct swork_event *event)
+
+void mce_work_trigger(void)
{
if (mce_helper[0])
schedule_work(&mce_trigger_work);
}

-#ifdef CONFIG_PREEMPT_RT_FULL
-static bool notify_work_ready __read_mostly;
-static struct swork_event notify_work;
-
-static int mce_notify_work_init(void)
-{
- int err;
-
- err = swork_get();
- if (err)
- return err;
-
- INIT_SWORK(&notify_work, __mce_work_trigger);
- notify_work_ready = true;
- return 0;
-}
-
-void mce_work_trigger(void)
-{
- if (notify_work_ready)
- swork_queue(&notify_work);
-}
-
-#else
-void mce_work_trigger(void)
-{
- __mce_work_trigger(NULL);
-}
-static inline int mce_notify_work_init(void) { return 0; }
-#endif
-
static ssize_t
show_trigger(struct device *s, struct device_attribute *attr, char *buf)
{
@@ -387,7 +356,7 @@ static __init int dev_mcelog_init_device(void)

return err;
}
- mce_notify_work_init();
+
mce_register_decode_chain(&dev_mcelog_nb);
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 53a8916d41b9..466f47301334 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -41,7 +41,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
-#include <linux/jiffies.h>
#include <linux/jump_label.h>

#include <asm/intel-family.h>
@@ -1364,7 +1363,7 @@ int memory_failure(unsigned long pfn, int flags)
static unsigned long check_interval = INITIAL_CHECK_INTERVAL;

static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
-static DEFINE_PER_CPU(struct hrtimer, mce_timer);
+static DEFINE_PER_CPU(struct timer_list, mce_timer);

static unsigned long mce_adjust_timer_default(unsigned long interval)
{
@@ -1373,18 +1372,26 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)

static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;

-static void __start_timer(struct hrtimer *t, unsigned long iv)
+static void __start_timer(struct timer_list *t, unsigned long interval)
{
- if (!iv)
- return;
- hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
- 0, HRTIMER_MODE_REL_PINNED);
+ unsigned long when = jiffies + interval;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if (!timer_pending(t) || time_before(when, t->expires))
+ mod_timer(t, round_jiffies(when));
+
+ local_irq_restore(flags);
}

-static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
+static void mce_timer_fn(struct timer_list *t)
{
+ struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
unsigned long iv;

+ WARN_ON(cpu_t != t);
+
iv = __this_cpu_read(mce_next_interval);

if (mce_available(this_cpu_ptr(&cpu_info))) {
@@ -1407,11 +1414,7 @@ static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)

done:
__this_cpu_write(mce_next_interval, iv);
- if (!iv)
- return HRTIMER_NORESTART;
-
- hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(iv)));
- return HRTIMER_RESTART;
+ __start_timer(t, iv);
}

/*
@@ -1419,7 +1422,7 @@ static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
*/
void mce_timer_kick(unsigned long interval)
{
- struct hrtimer *t = this_cpu_ptr(&mce_timer);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long iv = __this_cpu_read(mce_next_interval);

__start_timer(t, interval);
@@ -1434,7 +1437,7 @@ static void mce_timer_delete_all(void)
int cpu;

for_each_online_cpu(cpu)
- hrtimer_cancel(&per_cpu(mce_timer, cpu));
+ del_timer_sync(&per_cpu(mce_timer, cpu));
}

/*
@@ -1763,7 +1766,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
}
}

-static void mce_start_timer(struct hrtimer *t)
+static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;

@@ -1776,19 +1779,16 @@ static void mce_start_timer(struct hrtimer *t)

static void __mcheck_cpu_setup_timer(void)
{
- struct hrtimer *t = this_cpu_ptr(&mce_timer);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);

- hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- t->function = mce_timer_fn;
+ timer_setup(t, mce_timer_fn, TIMER_PINNED);
}

static void __mcheck_cpu_init_timer(void)
{
- struct hrtimer *t = this_cpu_ptr(&mce_timer);
-
- hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- t->function = mce_timer_fn;
+ struct timer_list *t = this_cpu_ptr(&mce_timer);

+ timer_setup(t, mce_timer_fn, TIMER_PINNED);
mce_start_timer(t);
}

@@ -2307,7 +2307,7 @@ static int mce_cpu_dead(unsigned int cpu)

static int mce_cpu_online(unsigned int cpu)
{
- struct hrtimer *t = this_cpu_ptr(&mce_timer);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
int ret;

mce_device_create(cpu);
@@ -2324,10 +2324,10 @@ static int mce_cpu_online(unsigned int cpu)

static int mce_cpu_pre_down(unsigned int cpu)
{
- struct hrtimer *t = this_cpu_ptr(&mce_timer);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);

mce_disable_cpu();
- hrtimer_cancel(t);
+ del_timer_sync(t);
mce_threshold_remove_device(cpu);
mce_device_remove(cpu);
return 0;
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 3c65f52b68f5..532fdf56c117 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1049,7 +1049,7 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
* don't delay.
*/
clear_bit(STRIPE_DELAYED, &sh->state);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);

mutex_lock(&log->io_mutex);
/* meta + data */
@@ -1388,7 +1388,7 @@ static void r5c_flush_stripe(struct r5conf *conf, struct stripe_head *sh)
lockdep_assert_held(&conf->device_lock);

list_del_init(&sh->lru);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);

set_bit(STRIPE_HANDLE, &sh->state);
atomic_inc(&conf->active_stripes);
@@ -1491,7 +1491,7 @@ static void r5c_do_reclaim(struct r5conf *conf)
*/
if (!list_empty(&sh->lru) &&
!test_bit(STRIPE_HANDLE, &sh->state) &&
- atomic_read(&sh->count) == 0) {
+ refcount_read(&sh->count) == 0) {
r5c_flush_stripe(conf, sh);
if (count++ >= R5C_RECLAIM_STRIPE_GROUP)
break;
@@ -2912,7 +2912,7 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
* don't delay.
*/
clear_bit(STRIPE_DELAYED, &sh->state);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);

mutex_lock(&log->io_mutex);
/* meta + data */
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 42890a08375b..87840cfe7a80 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -388,7 +388,7 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)

set_bit(STRIPE_LOG_TRAPPED, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);

if (ppl_log_stripe(log, sh)) {
spin_lock_irq(&ppl_conf->no_mem_stripes_lock);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d8de7476d26a..eb967afd749a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -306,7 +306,7 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
struct list_head *temp_inactive_list)
{
- if (atomic_dec_and_test(&sh->count))
+ if (refcount_dec_and_test(&sh->count))
do_release_stripe(conf, sh, temp_inactive_list);
}

@@ -398,7 +398,7 @@ void raid5_release_stripe(struct stripe_head *sh)

/* Avoid release_list until the last reference.
*/
- if (atomic_add_unless(&sh->count, -1, 1))
+ if (refcount_dec_not_one(&sh->count))
return;

if (unlikely(!conf->mddev->thread) ||
@@ -410,7 +410,7 @@ void raid5_release_stripe(struct stripe_head *sh)
return;
slow_path:
/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
- if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) {
+ if (refcount_dec_and_lock_irqsave(&sh->count, &conf->device_lock, &flags)) {
INIT_LIST_HEAD(&list);
hash = sh->hash_lock_index;
do_release_stripe(conf, sh, &list);
@@ -499,7 +499,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
struct r5conf *conf = sh->raid_conf;
int i, seq;

- BUG_ON(atomic_read(&sh->count) != 0);
+ BUG_ON(refcount_read(&sh->count) != 0);
BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
BUG_ON(stripe_operations_active(sh));
BUG_ON(sh->batch_head);
@@ -676,11 +676,11 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
&conf->cache_state);
} else {
init_stripe(sh, sector, previous);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
}
- } else if (!atomic_inc_not_zero(&sh->count)) {
+ } else if (!refcount_inc_not_zero(&sh->count)) {
spin_lock(&conf->device_lock);
- if (!atomic_read(&sh->count)) {
+ if (!refcount_read(&sh->count)) {
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&sh->lru) &&
@@ -696,7 +696,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
sh->group = NULL;
}
}
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
spin_unlock(&conf->device_lock);
}
} while (sh == NULL);
@@ -758,9 +758,9 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
hash = stripe_hash_locks_hash(head_sector);
spin_lock_irq(conf->hash_locks + hash);
head = __find_stripe(conf, head_sector, conf->generation);
- if (head && !atomic_inc_not_zero(&head->count)) {
+ if (head && !refcount_inc_not_zero(&head->count)) {
spin_lock(&conf->device_lock);
- if (!atomic_read(&head->count)) {
+ if (!refcount_read(&head->count)) {
if (!test_bit(STRIPE_HANDLE, &head->state))
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&head->lru) &&
@@ -776,7 +776,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
head->group = NULL;
}
}
- atomic_inc(&head->count);
+ refcount_inc(&head->count);
spin_unlock(&conf->device_lock);
}
spin_unlock_irq(conf->hash_locks + hash);
@@ -845,7 +845,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
sh->batch_head->bm_seq = seq;
}

- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
unlock_out:
unlock_two_stripes(head, sh);
out:
@@ -1108,9 +1108,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
pr_debug("%s: for %llu schedule op %d on disc %d\n",
__func__, (unsigned long long)sh->sector,
bi->bi_opf, i);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
if (sh != head_sh)
- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
if (use_new_offset(conf, sh))
bi->bi_iter.bi_sector = (sh->sector
+ rdev->new_data_offset);
@@ -1172,9 +1172,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
"replacement disc %d\n",
__func__, (unsigned long long)sh->sector,
rbi->bi_opf, i);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
if (sh != head_sh)
- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
if (use_new_offset(conf, sh))
rbi->bi_iter.bi_sector = (sh->sector
+ rrdev->new_data_offset);
@@ -1352,7 +1352,7 @@ static void ops_run_biofill(struct stripe_head *sh)
}
}

- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
async_trigger_callback(&submit);
}
@@ -1430,7 +1430,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
if (i != target)
xor_srcs[count++] = sh->dev[i].page;

- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);

init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
ops_complete_compute, sh, to_addr_conv(sh, percpu, 0));
@@ -1519,7 +1519,7 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
dest = tgt->page;

- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);

if (target == qd_idx) {
count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
@@ -1594,7 +1594,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
__func__, (unsigned long long)sh->sector, faila, failb);

- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);

if (failb == syndrome_disks+1) {
/* Q disk is one of the missing disks */
@@ -1865,7 +1865,7 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
break;
}
if (i >= sh->disks) {
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
set_bit(R5_Discard, &sh->dev[pd_idx].flags);
ops_complete_reconstruct(sh);
return;
@@ -1906,7 +1906,7 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
flags = ASYNC_TX_ACK |
(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);

- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh,
to_addr_conv(sh, percpu, j));
} else {
@@ -1948,7 +1948,7 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
break;
}
if (i >= sh->disks) {
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
ops_complete_reconstruct(sh);
@@ -1972,7 +1972,7 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
struct stripe_head, batch_list) == head_sh;

if (last_stripe) {
- atomic_inc(&head_sh->count);
+ refcount_inc(&head_sh->count);
init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
head_sh, to_addr_conv(sh, percpu, j));
} else
@@ -2029,7 +2029,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
&sh->ops.zero_sum_result, &submit);

- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
tx = async_trigger_callback(&submit);
}
@@ -2048,7 +2048,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu
if (!checkp)
srcs[count] = NULL;

- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
sh, to_addr_conv(sh, percpu, 0));
async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
@@ -2150,7 +2150,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
INIT_LIST_HEAD(&sh->lru);
INIT_LIST_HEAD(&sh->r5c);
INIT_LIST_HEAD(&sh->log_list);
- atomic_set(&sh->count, 1);
+ refcount_set(&sh->count, 1);
sh->raid_conf = conf;
sh->log_start = MaxSector;
for (i = 0; i < disks; i++) {
@@ -2451,7 +2451,7 @@ static int drop_one_stripe(struct r5conf *conf)
spin_unlock_irq(conf->hash_locks + hash);
if (!sh)
return 0;
- BUG_ON(atomic_read(&sh->count));
+ BUG_ON(refcount_read(&sh->count));
shrink_buffers(sh);
free_stripe(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
@@ -2483,7 +2483,7 @@ static void raid5_end_read_request(struct bio * bi)
break;

pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
- (unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ (unsigned long long)sh->sector, i, refcount_read(&sh->count),
bi->bi_status);
if (i == disks) {
bio_reset(bi);
@@ -2620,7 +2620,7 @@ static void raid5_end_write_request(struct bio *bi)
}
}
pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
- (unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ (unsigned long long)sh->sector, i, refcount_read(&sh->count),
bi->bi_status);
if (i == disks) {
bio_reset(bi);
@@ -4687,7 +4687,7 @@ static void handle_stripe(struct stripe_head *sh)
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
(unsigned long long)sh->sector, sh->state,
- atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
+ refcount_read(&sh->count), sh->pd_idx, sh->qd_idx,
sh->check_state, sh->reconstruct_state);

analyse_stripe(sh, &s);
@@ -5062,7 +5062,7 @@ static void activate_bit_delay(struct r5conf *conf,
struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
int hash;
list_del_init(&sh->lru);
- atomic_inc(&sh->count);
+ refcount_inc(&sh->count);
hash = sh->hash_lock_index;
__release_stripe(conf, sh, &temp_inactive_list[hash]);
}
@@ -5387,7 +5387,8 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
sh->group = NULL;
}
list_del_init(&sh->lru);
- BUG_ON(atomic_inc_return(&sh->count) != 1);
+ refcount_inc(&sh->count);
+ BUG_ON(refcount_read(&sh->count) != 1);
return sh;
}

diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 2796fb045885..8c6d39e9db41 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -4,7 +4,7 @@

#include <linux/raid/xor.h>
#include <linux/dmaengine.h>
-
+#include <linux/refcount.h>
/*
*
* Each stripe contains one buffer per device. Each buffer can be in
@@ -208,7 +208,7 @@ struct stripe_head {
short ddf_layout;/* use DDF ordering to calculate Q */
short hash_lock_index;
unsigned long state; /* state flags */
- atomic_t count; /* nr of active thread/requests */
+ refcount_t count; /* nr of active thread/requests */
int bm_seq; /* sequence number for bitmap flushes */
int disks; /* disks in stripe */
int overwrite_disks; /* total overwrite disks in stripe,
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
index 23a9c28ad8ea..6a73c4fa88e7 100644
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/buffer_head.h>
+#include <linux/locallock.h>

#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -25,6 +26,8 @@ struct squashfs_stream {
void *stream;
};

+static DEFINE_LOCAL_IRQ_LOCK(stream_lock);
+
void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
@@ -79,10 +82,15 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
- struct squashfs_stream *stream = get_cpu_ptr(percpu);
- int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
- offset, length, output);
- put_cpu_ptr(stream);
+ struct squashfs_stream *stream;
+ int res;
+
+ stream = get_locked_ptr(stream_lock, percpu);
+
+ res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+ offset, length, output);
+
+ put_locked_ptr(stream_lock, stream);

if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 0bd432a4d7bd..81c75934ca5b 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -12,6 +12,7 @@
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <linux/refcount.h>

struct page;
struct device;
@@ -76,7 +77,7 @@ enum wb_reason {
*/
struct bdi_writeback_congested {
unsigned long state; /* WB_[a]sync_congested flags */
- atomic_t refcnt; /* nr of attached wb's and blkg */
+ refcount_t refcnt; /* nr of attached wb's and blkg */

#ifdef CONFIG_CGROUP_WRITEBACK
struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 82e8b73117d1..82aeba375154 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -403,13 +403,13 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
static inline struct bdi_writeback_congested *
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
{
- atomic_inc(&bdi->wb_congested->refcnt);
+ refcount_inc(&bdi->wb_congested->refcnt);
return bdi->wb_congested;
}

static inline void wb_congested_put(struct bdi_writeback_congested *congested)
{
- if (atomic_dec_and_test(&congested->refcnt))
+ if (refcount_dec_and_test(&congested->refcnt))
kfree(congested);
}

diff --git a/include/linux/locallock.h b/include/linux/locallock.h
index d658c2552601..921eab83cd34 100644
--- a/include/linux/locallock.h
+++ b/include/linux/locallock.h
@@ -222,6 +222,14 @@ static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,

#define put_locked_var(lvar, var) local_unlock(lvar);

+#define get_locked_ptr(lvar, var) \
+ ({ \
+ local_lock(lvar); \
+ this_cpu_ptr(var); \
+ })
+
+#define put_locked_ptr(lvar, var) local_unlock(lvar);
+
#define local_lock_cpu(lvar) \
({ \
local_lock(lvar); \
@@ -262,6 +270,8 @@ static inline void local_irq_lock_init(int lvar) { }

#define get_locked_var(lvar, var) get_cpu_var(var)
#define put_locked_var(lvar, var) put_cpu_var(var)
+#define get_locked_ptr(lvar, var) get_cpu_ptr(var)
+#define put_locked_ptr(lvar, var) put_cpu_ptr(var)

#define local_lock_cpu(lvar) get_cpu()
#define local_unlock_cpu(lvar) put_cpu()
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 0591df500e9d..043e431a7e8e 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -91,7 +91,7 @@
# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
#else
-# define softirq_count() (0UL)
+# define softirq_count() ((unsigned long)current->softirq_nestcnt)
extern int in_serving_softirq(void);
#endif

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 4193c41e383a..a685da2c4522 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -98,5 +98,7 @@ extern __must_check bool refcount_dec_if_one(refcount_t *r);
extern __must_check bool refcount_dec_not_one(refcount_t *r);
extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
-
+extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
+ spinlock_t *lock,
+ unsigned long *flags);
#endif /* _LINUX_REFCOUNT_H */
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 96fe289c4c6e..39ad98c09c58 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@

#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/ratelimit.h>

struct key;
@@ -12,7 +13,7 @@ struct key;
* Some day this will be a full-fledged user tracking system..
*/
struct user_struct {
- atomic_t __count; /* reference count */
+ refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_FANOTIFY
@@ -59,7 +60,7 @@ extern struct user_struct root_user;
extern struct user_struct * alloc_uid(kuid_t);
static inline struct user_struct *get_uid(struct user_struct *u)
{
- atomic_inc(&u->__count);
+ refcount_inc(&u->__count);
return u;
}
extern void free_uid(struct user_struct *);
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index f6f72c583e83..15068e3ef74e 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -419,11 +419,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
#define atomic_dec_and_lock(atomic, lock) \
__cond_lock(lock, _atomic_dec_and_lock(atomic, lock))

-extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
- unsigned long *flags);
-#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
- __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
-
int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
size_t max_size, unsigned int cpu_mult,
gfp_t gfp);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 960ad0ce77d7..420624c49f38 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5007,9 +5007,9 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period = ns_to_ktime(default_cfs_period());

INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
cfs_b->period_timer.function = sched_cfs_period_timer;
- hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
}

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index b87ea7421f3d..c555fe1164af 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1686,8 +1686,6 @@ static const char *hist_field_name(struct hist_field *field,
else if (field->flags & HIST_FIELD_FL_LOG2 ||
field->flags & HIST_FIELD_FL_ALIAS)
field_name = hist_field_name(field->operands[0], ++level);
- else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
- field_name = "common_timestamp";
else if (field->flags & HIST_FIELD_FL_CPU)
field_name = "cpu";
else if (field->flags & HIST_FIELD_FL_EXPR ||
@@ -1703,7 +1701,8 @@ static const char *hist_field_name(struct hist_field *field,
field_name = full_name;
} else
field_name = field->name;
- }
+ } else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
+ field_name = "common_timestamp";

if (field_name == NULL)
field_name = "";
@@ -2467,6 +2466,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
else if (strcmp(modifier, "usecs") == 0)
*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
+ hist_err("Invalid field modifier: ", modifier);
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -2482,6 +2482,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
+ hist_err("Couldn't find field: ", field_name);
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -2776,6 +2777,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
expr->fn = hist_field_plus;
break;
default:
+ ret = -EINVAL;
goto free;
}

@@ -4411,7 +4413,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
struct tracing_map *map = hist_data->map;
struct ftrace_event_field *field;
struct hist_field *hist_field;
- int i, idx;
+ int i, idx = 0;

for_each_hist_field(i, hist_data) {
hist_field = hist_data->fields[i];
@@ -4857,22 +4859,24 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
if (hist_field->var.name)
seq_printf(m, "%s=", hist_field->var.name);

- if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
- seq_puts(m, "common_timestamp");
- else if (hist_field->flags & HIST_FIELD_FL_CPU)
+ if (hist_field->flags & HIST_FIELD_FL_CPU)
seq_puts(m, "cpu");
else if (field_name) {
if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
hist_field->flags & HIST_FIELD_FL_ALIAS)
seq_putc(m, '$');
seq_printf(m, "%s", field_name);
- }
+ } else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
+ seq_puts(m, "common_timestamp");

if (hist_field->flags) {
- const char *flags_str = get_hist_field_flags(hist_field);
+ if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) &&
+ !(hist_field->flags & HIST_FIELD_FL_EXPR)) {
+ const char *flags = get_hist_field_flags(hist_field);

- if (flags_str)
- seq_printf(m, ".%s", flags_str);
+ if (flags)
+ seq_printf(m, ".%s", flags);
+ }
}
}

diff --git a/kernel/user.c b/kernel/user.c
index 8959ad11d766..0df9b1640b2a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock);

/* root_user.__count is 1, for init task cred */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = REFCOUNT_INIT(1),
.processes = ATOMIC_INIT(1),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)

hlist_for_each_entry(user, hashent, uidhash_node) {
if (uid_eq(user->uid, uid)) {
- atomic_inc(&user->__count);
+ refcount_inc(&user->__count);
return user;
}
}
@@ -169,7 +169,7 @@ void free_uid(struct user_struct *up)
if (!up)
return;

- if (atomic_dec_and_lock_irqsave(&up->__count, &uidhash_lock, flags))
+ if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
free_user(up, flags);
}

@@ -188,7 +188,7 @@ struct user_struct *alloc_uid(kuid_t uid)
goto out_unlock;

new->uid = uid;
- atomic_set(&new->__count, 1);
+ refcount_set(&new->__count, 1);
ratelimit_state_init(&new->ratelimit, HZ, 100);
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);

diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index f29d9cafc37a..347fa7ac2e8a 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -33,20 +33,3 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
}

EXPORT_SYMBOL(_atomic_dec_and_lock);
-
-int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
- unsigned long *flags)
-{
- /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
- if (atomic_add_unless(atomic, -1, 1))
- return 0;
-
- /* Otherwise do it the slow way */
- spin_lock_irqsave(lock, *flags);
- if (atomic_dec_and_test(atomic))
- return 1;
- spin_unlock_irqrestore(lock, *flags);
- return 0;
-}
-
-EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);
diff --git a/lib/refcount.c b/lib/refcount.c
index 0eb48353abe3..d3b81cefce91 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
}
EXPORT_SYMBOL(refcount_dec_and_lock);

+/**
+ * refcount_dec_and_lock_irqsave - return holding spinlock with disabled
+ * interrupts if able to decrement refcount to 0
+ * @r: the refcount
+ * @lock: the spinlock to be locked
+ * @flags: saved IRQ-flags if the is acquired
+ *
+ * Same as refcount_dec_and_lock() above except that the spinlock is acquired
+ * with disabled interupts.
+ *
+ * Return: true and hold spinlock if able to decrement refcount to 0, false
+ * otherwise
+ */
+bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
+ unsigned long *flags)
+{
+ if (refcount_dec_not_one(r))
+ return false;
+
+ spin_lock_irqsave(lock, *flags);
+ if (!refcount_dec_and_test(r)) {
+ spin_unlock_irqrestore(lock, *flags);
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);
diff --git a/localversion-rt b/localversion-rt
index c3054d08a112..1445cd65885c 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt2
+-rt3
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 96ab54d1dc9a..087403ed5164 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -460,10 +460,10 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
if (new_congested) {
/* !found and storage for new one already allocated, insert */
congested = new_congested;
- new_congested = NULL;
rb_link_node(&congested->rb_node, parent, node);
rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
- goto found;
+ spin_unlock_irqrestore(&cgwb_lock, flags);
+ return congested;
}

spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -473,13 +473,13 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
if (!new_congested)
return NULL;

- atomic_set(&new_congested->refcnt, 0);
+ refcount_set(&new_congested->refcnt, 1);
new_congested->__bdi = bdi;
new_congested->blkcg_id = blkcg_id;
goto retry;

found:
- atomic_inc(&congested->refcnt);
+ refcount_inc(&congested->refcnt);
spin_unlock_irqrestore(&cgwb_lock, flags);
kfree(new_congested);
return congested;
@@ -495,7 +495,7 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
{
unsigned long flags;

- if (!atomic_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, flags))
+ if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
return;

/* bdi might already have been destroyed leaving @congested unlinked */
@@ -803,7 +803,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
if (!bdi->wb_congested)
return -ENOMEM;

- atomic_set(&bdi->wb_congested->refcnt, 1);
+ refcount_set(&bdi->wb_congested->refcnt, 1);

err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (err) {