Re: [tip:locking/core] sched/wait: Fix signal handling in bit wait helpers

From: Peter Zijlstra
Date: Tue Dec 08 2015 - 05:47:20 EST


On Fri, Dec 04, 2015 at 03:52:12AM -0800, tip-bot for Peter Zijlstra wrote:
> Commit-ID: 68985633bccb6066bf1803e316fbc6c1f5b796d6
> Gitweb: http://git.kernel.org/tip/68985633bccb6066bf1803e316fbc6c1f5b796d6
> Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> AuthorDate: Tue, 1 Dec 2015 14:04:04 +0100
> Committer: Ingo Molnar <mingo@xxxxxxxxxx>
> CommitDate: Fri, 4 Dec 2015 10:10:15 +0100
>
> sched/wait: Fix signal handling in bit wait helpers
>
> Vladimir reported getting RCU stall warnings and bisected it back to
> commit:
>
> 743162013d40 ("sched: Remove proliferation of wait_on_bit() action functions")
>
> That commit inadvertently reversed the calls to schedule() and signal_pending(),
> thereby not handling the case where the signal receives while we sleep.
>
> Reported-by: Vladimir Murzin <vladimir.murzin@xxxxxxx>
> Tested-by: Vladimir Murzin <vladimir.murzin@xxxxxxx>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Cc: Mike Galbraith <efault@xxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: mark.rutland@xxxxxxx
> Cc: neilb@xxxxxxx
> Cc: oleg@xxxxxxxxxx
> Fixes: 743162013d40 ("sched: Remove proliferation of wait_on_bit() action functions")
> Fixes: cbbce8220949 ("SCHED: add some "wait..on_bit...timeout()" interfaces.")
> Link: http://lkml.kernel.org/r/20151201130404.GL3816@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
> ---
> kernel/sched/wait.c | 16 ++++++++--------
> 1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
> index 052e026..f10bd87 100644
> --- a/kernel/sched/wait.c
> +++ b/kernel/sched/wait.c
> @@ -583,18 +583,18 @@ EXPORT_SYMBOL(wake_up_atomic_t);
>
> __sched int bit_wait(struct wait_bit_key *word)
> {
> - if (signal_pending_state(current->state, current))
> - return 1;
> schedule();
> + if (signal_pending(current))
> + return -EINTR;
> return 0;
> }

*sigh*, so that patch was broken.. the below might fix it, but please
someone look at it, I seem to have a less than stellar track record
here...

---
Subject: sched/wait: Fix the signal handling fix

Jan Stancek reported that I wrecked things for him by fixing things for
Vladimir :/

His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which
should not be possible, however my previous patch made this possible by
unconditionally checking signal_pending().

We cannot use current->state as was done previously, because the
instruction after the store to that variable it can be changed. We must
instead pass the initial state along and use that.

Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers")
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: mark.rutland@xxxxxxx
Cc: neilb@xxxxxxx
Cc: oleg@xxxxxxxxxx
Cc: Vladimir Murzin <vladimir.murzin@xxxxxxx>
Reported-by: Jan Stancek <jstancek@xxxxxxxxxx>
Tested-by: Jan Stancek <jstancek@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
fs/cifs/inode.c | 6 +++---
fs/nfs/inode.c | 6 +++---
fs/nfs/internal.h | 2 +-
fs/nfs/pagelist.c | 2 +-
fs/nfs/pnfs.c | 4 ++--
include/linux/wait.h | 10 +++++-----
kernel/sched/wait.c | 20 ++++++++++----------
net/sunrpc/sched.c | 6 +++---
8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 6b66dd5..a329f5b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1831,11 +1831,11 @@ cifs_invalidate_mapping(struct inode *inode)
* @word: long word containing the bit lock
*/
static int
-cifs_wait_bit_killable(struct wait_bit_key *key)
+cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (fatal_signal_pending(current))
- return -ERESTARTSYS;
freezable_schedule_unsafe();
+ if (signal_pending_state(mode, current))
+ return -ERESTARTSYS;
return 0;
}

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 31b0a52..c7e8b87 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,11 +75,11 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock
*/
-int nfs_wait_bit_killable(struct wait_bit_key *key)
+int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (fatal_signal_pending(current))
- return -ERESTARTSYS;
freezable_schedule_unsafe();
+ if (signal_pending_state(mode, current))
+ return -ERESTARTSYS;
return 0;
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 56cfde2..9dea85f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -379,7 +379,7 @@ extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(struct wait_bit_key *key);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);

/* super.c */
extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index fe3ddd2..452a011 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -129,7 +129,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0)
break;
- ret = nfs_wait_bit_killable(&q.key);
+ ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE);
} while (atomic_read(&c->io_count) != 0 && !ret);
finish_wait(wq, &q.wait);
return ret;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5a8ae21..bec0384 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1466,11 +1466,11 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
}

/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
-static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key)
+static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
{
if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
return 1;
- return nfs_wait_bit_killable(key);
+ return nfs_wait_bit_killable(key, mode);
}

static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1e1bf9f..513b36f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -145,7 +145,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
list_del(&old->task_list);
}

-typedef int wait_bit_action_f(struct wait_bit_key *);
+typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
@@ -960,10 +960,10 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
} while (0)


-extern int bit_wait(struct wait_bit_key *);
-extern int bit_wait_io(struct wait_bit_key *);
-extern int bit_wait_timeout(struct wait_bit_key *);
-extern int bit_wait_io_timeout(struct wait_bit_key *);
+extern int bit_wait(struct wait_bit_key *, int);
+extern int bit_wait_io(struct wait_bit_key *, int);
+extern int bit_wait_timeout(struct wait_bit_key *, int);
+extern int bit_wait_io_timeout(struct wait_bit_key *, int);

/**
* wait_on_bit - wait for a bit to be cleared
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f10bd87..f15d6b6 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -392,7 +392,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
do {
prepare_to_wait(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags))
- ret = (*action)(&q->key);
+ ret = (*action)(&q->key, mode);
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
finish_wait(wq, &q->wait);
return ret;
@@ -431,7 +431,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
prepare_to_wait_exclusive(wq, &q->wait, mode);
if (!test_bit(q->key.bit_nr, q->key.flags))
continue;
- ret = action(&q->key);
+ ret = action(&q->key, mode);
if (!ret)
continue;
abort_exclusive_wait(wq, &q->wait, mode, &q->key);
@@ -581,43 +581,43 @@ void wake_up_atomic_t(atomic_t *p)
}
EXPORT_SYMBOL(wake_up_atomic_t);

-__sched int bit_wait(struct wait_bit_key *word)
+__sched int bit_wait(struct wait_bit_key *word, int mode)
{
schedule();
- if (signal_pending(current))
+ if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait);

-__sched int bit_wait_io(struct wait_bit_key *word)
+__sched int bit_wait_io(struct wait_bit_key *word, int mode)
{
io_schedule();
- if (signal_pending(current))
+ if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait_io);

-__sched int bit_wait_timeout(struct wait_bit_key *word)
+__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout))
return -EAGAIN;
schedule_timeout(word->timeout - now);
- if (signal_pending(current))
+ if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_timeout);

-__sched int bit_wait_io_timeout(struct wait_bit_key *word)
+__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout))
return -EAGAIN;
io_schedule_timeout(word->timeout - now);
- if (signal_pending(current))
+ if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f14f24e..73ad57a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -250,11 +250,11 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
}
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);

-static int rpc_wait_bit_killable(struct wait_bit_key *key)
+static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (fatal_signal_pending(current))
- return -ERESTARTSYS;
freezable_schedule_unsafe();
+ if (signal_pending_state(mode, current))
+ return -ERESTARTSYS;
return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/