[PATCH] fs: fix data races on inode->i_flctx

From: Dmitry Vyukov
Date: Mon Sep 21 2015 - 03:43:15 EST


locks_get_lock_context() uses cmpxchg() to install i_flctx.
cmpxchg() is a release operation which is correct. But it uses
a plain load to load i_flctx. This is incorrect. Subsequent loads
from i_flctx can hoist above the load of i_flctx pointer itself
and observe uninitialized garbage there. This in turn can lead
to corruption of ctx->flc_lock and other members.

Documentation/memory-barriers.txt explicitly requires to use
a barrier in such context:
"A load-load control dependency requires a full read memory barrier".

Use smp_load_acquire() in locks_get_lock_context() and in bunch
of other functions that can proceed concurrently with
locks_get_lock_context().

The data race was found with KernelThreadSanitizer (KTSAN).

Signed-off-by: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
---
For the record, here is the KTSAN report:

ThreadSanitizer: data-race in _raw_spin_lock

Read at 0xffff8800bae50da0 of size 1 by thread 3060 on CPU 2:
[< inline >] __raw_spin_lock include/linux/spinlock_api_smp.h:158
[<ffffffff81ee37d0>] _raw_spin_lock+0x50/0x70 kernel/locking/spinlock.c:151
[< inline >] spin_lock include/linux/spinlock.h:312
[<ffffffff812e1187>] flock_lock_inode+0xb7/0x440 fs/locks.c:895
[<ffffffff812e16e6>] flock_lock_inode_wait+0x46/0x110 fs/locks.c:1871
[<ffffffff812e2814>] SyS_flock+0x224/0x23

Previous write at 0xffff8800bae50da0 of size 8 by thread 3063 on CPU 8:
[<ffffffff81248628>] kmem_cache_alloc+0xd8/0x2f0 mm/slab.c:3420
[<ffffffff812ddba0>] locks_get_lock_context+0x60/0x140 fs/locks.c:213
[<ffffffff812e112a>] flock_lock_inode+0x5a/0x440 fs/locks.c:882
[<ffffffff812e16e6>] flock_lock_inode_wait+0x46/0x110 fs/locks.c:1871
[< inline >] flock_lock_file_wait include/linux/fs.h:1219
[< inline >] SYSC_flock fs/locks.c:1941
[<ffffffff812e2814>] SyS_flock+0x224/0x230 fs/locks.c:1904
[<ffffffff81ee3e11>] entry_SYSCALL_64_fastpath+0x31/0x95 arch/x86/entry/entry_64.S:188
---
fs/locks.c | 63 +++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 2a54c80..316e474 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -205,28 +205,32 @@ static struct kmem_cache *filelock_cache __read_mostly;
static struct file_lock_context *
locks_get_lock_context(struct inode *inode, int type)
{
- struct file_lock_context *new;
+ struct file_lock_context *ctx;

- if (likely(inode->i_flctx) || type == F_UNLCK)
+ /* paired with cmpxchg() below */
+ ctx = smp_load_acquire(&inode->i_flctx);
+ if (likely(ctx) || type == F_UNLCK)
goto out;

- new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
- if (!new)
+ ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
+ if (!ctx)
goto out;

- spin_lock_init(&new->flc_lock);
- INIT_LIST_HEAD(&new->flc_flock);
- INIT_LIST_HEAD(&new->flc_posix);
- INIT_LIST_HEAD(&new->flc_lease);
+ spin_lock_init(&ctx->flc_lock);
+ INIT_LIST_HEAD(&ctx->flc_flock);
+ INIT_LIST_HEAD(&ctx->flc_posix);
+ INIT_LIST_HEAD(&ctx->flc_lease);

/*
* Assign the pointer if it's not already assigned. If it is, then
* free the context we just allocated.
*/
- if (cmpxchg(&inode->i_flctx, NULL, new))
- kmem_cache_free(flctx_cache, new);
+ if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
+ kmem_cache_free(flctx_cache, ctx);
+ ctx = smp_load_acquire(&inode->i_flctx);
+ }
out:
- return inode->i_flctx;
+ return ctx;
}

void
@@ -762,7 +766,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
struct file_lock_context *ctx;
struct inode *inode = file_inode(filp);

- ctx = inode->i_flctx;
+ ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
fl->fl_type = F_UNLCK;
return;
@@ -1203,7 +1207,7 @@ int locks_mandatory_locked(struct file *file)
struct file_lock_context *ctx;
struct file_lock *fl;

- ctx = inode->i_flctx;
+ ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx || list_empty_careful(&ctx->flc_posix))
return 0;

@@ -1388,7 +1392,7 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker)
int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
{
int error = 0;
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx;
struct file_lock *new_fl, *fl, *tmp;
unsigned long break_time;
int want_write = (mode & O_ACCMODE) != O_RDONLY;
@@ -1400,6 +1404,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
new_fl->fl_flags = type;

/* typically we will check that ctx is non-NULL before calling */
+ ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx) {
WARN_ON_ONCE(1);
return error;
@@ -1494,9 +1499,10 @@ EXPORT_SYMBOL(__break_lease);
void lease_get_mtime(struct inode *inode, struct timespec *time)
{
bool has_lease = false;
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx;
struct file_lock *fl;

+ ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
if (!list_empty(&ctx->flc_lease)) {
@@ -1543,10 +1549,11 @@ int fcntl_getlease(struct file *filp)
{
struct file_lock *fl;
struct inode *inode = file_inode(filp);
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx;
int type = F_UNLCK;
LIST_HEAD(dispose);

+ ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
time_out_leases(file_inode(filp), &dispose);
@@ -1713,9 +1720,10 @@ static int generic_delete_lease(struct file *filp, void *owner)
struct file_lock *fl, *victim = NULL;
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx;
LIST_HEAD(dispose);

+ ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx) {
trace_generic_delete_lease(inode, NULL);
return error;
@@ -2359,13 +2367,14 @@ out:
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
struct file_lock lock;
- struct file_lock_context *ctx = file_inode(filp)->i_flctx;
+ struct file_lock_context *ctx;

/*
* If there are no locks held on this file, we don't need to call
* posix_lock_file(). Another process could be setting a lock on this
* file at the same time, but we wouldn't remove that lock anyway.
*/
+ ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
if (!ctx || list_empty(&ctx->flc_posix))
return;

@@ -2389,7 +2398,7 @@ EXPORT_SYMBOL(locks_remove_posix);

/* The i_flctx must be valid when calling into here */
static void
-locks_remove_flock(struct file *filp)
+locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
{
struct file_lock fl = {
.fl_owner = filp,
@@ -2400,7 +2409,6 @@ locks_remove_flock(struct file *filp)
.fl_end = OFFSET_MAX,
};
struct inode *inode = file_inode(filp);
- struct file_lock_context *flctx = inode->i_flctx;

if (list_empty(&flctx->flc_flock))
return;
@@ -2416,10 +2424,8 @@ locks_remove_flock(struct file *filp)

/* The i_flctx must be valid when calling into here */
static void
-locks_remove_lease(struct file *filp)
+locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
{
- struct inode *inode = file_inode(filp);
- struct file_lock_context *ctx = inode->i_flctx;
struct file_lock *fl, *tmp;
LIST_HEAD(dispose);

@@ -2439,17 +2445,20 @@ locks_remove_lease(struct file *filp)
*/
void locks_remove_file(struct file *filp)
{
- if (!file_inode(filp)->i_flctx)
+ struct file_lock_context *ctx;
+
+ ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+ if (!ctx)
return;

/* remove any OFD locks */
locks_remove_posix(filp, filp);

/* remove flock locks */
- locks_remove_flock(filp);
+ locks_remove_flock(filp, ctx);

/* remove any leases */
- locks_remove_lease(filp);
+ locks_remove_lease(filp, ctx);
}

/**
@@ -2616,7 +2625,7 @@ void show_fd_locks(struct seq_file *f,
struct file_lock_context *ctx;
int id = 0;

- ctx = inode->i_flctx;
+ ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx)
return;

--
2.6.0.rc0.131.gf624c3d

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/