[PATCH v2 1/2] debugfs: prevent access to possibly dead file_operations at file open

From: Nicolai Stange
Date: Mon Feb 08 2016 - 10:02:19 EST


Nothing prevents a dentry found by path lookup before a return of
__debugfs_remove() to actually get opened after that return. Now, after
the return of __debugfs_remove(), there are no guarantees whatsoever
regarding the memory the corresponding inode's file_operations object
had been kept in.

Since __debugfs_remove() is seldomly invoked, usually from module exit
handlers only, the race is hard to trigger and the impact is very low.

A discussion of the problem outlined above as well as a suggested
solution can be found in the (sub-)thread rooted at

http://lkml.kernel.org/g/20130401203445.GA20862@xxxxxxxxxxxxxxxxxx
("Yet another pipe related oops.")

Basically, Greg KH suggests to introduce an intermediate fops and
Al Viro points out that a pointer to the original ones may be stored in
->d_fsdata.

Follow this line of reasoning:
- Add SRCU as a reverse dependency of DEBUG_FS.
- Introduce a srcu_struct object for the debugfs subsystem.
- In debugfs_create_file(), store a pointer to the original
file_operations object in ->d_fsdata.
- In __debugfs_remove(), clear out that dentry->d_fsdata member again.
debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace
period before returning to their caller.
- Introduce an intermediate file_operations object named
"debugfs_proxy_file_operations". It's ->open() functions checks,
under the protection of a SRCU read lock, whether the "original"
file_operations pointed to by ->d_fsdata is still valid and if so,
tries to acquire a reference on the owning module. On success, it sets
the file object's ->f_op to the original file_operations and forwards
the ongoing open() call to the original ->open().
- For clarity, rename the former debugfs_file_operations to
debugfs_noop_file_operations -- they are in no way canonical.

The choice of SRCU over "normal" RCU is justified by the fact, that the
former may also be used to protect ->i_private data from going away
during the execution of a file's readers and writers which may (and do)
sleep.

Finally, introduce the fs/debugfs/internal.h header containing some
declarations internal to the debugfs implementation.

Signed-off-by: Nicolai Stange <nicstange@xxxxxxxxx>
---
fs/debugfs/file.c | 32 +++++++++++++++++++++++++++++++-
fs/debugfs/inode.c | 19 ++++++++++++++++---
fs/debugfs/internal.h | 24 ++++++++++++++++++++++++
include/linux/debugfs.h | 3 ---
lib/Kconfig.debug | 1 +
5 files changed, 72 insertions(+), 7 deletions(-)
create mode 100644 fs/debugfs/internal.h

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index d2ba12e..35ceae7 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -22,6 +22,9 @@
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/device.h>
+#include <linux/srcu.h>
+
+#include "internal.h"

static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -35,13 +38,40 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
return count;
}

-const struct file_operations debugfs_file_operations = {
+const struct file_operations debugfs_noop_file_operations = {
.read = default_read_file,
.write = default_write_file,
.open = simple_open,
.llseek = noop_llseek,
};

+static int proxy_open(struct inode *inode, struct file *filp)
+{
+ struct dentry * const dentry = filp->f_path.dentry;
+ const struct file_operations __rcu *rcu_real_fops;
+ const struct file_operations *real_fops;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&debugfs_srcu);
+ rcu_real_fops = (const struct file_operations __rcu *)dentry->d_fsdata;
+ real_fops = srcu_dereference(rcu_real_fops, &debugfs_srcu);
+ real_fops = fops_get(real_fops);
+ srcu_read_unlock(&debugfs_srcu, srcu_idx);
+
+ if (!real_fops)
+ return -ENOENT;
+
+ replace_fops(filp, real_fops);
+ if (filp->f_op->open)
+ return filp->f_op->open(inode, filp);
+
+ return 0;
+}
+
+const struct file_operations debugfs_proxy_file_operations = {
+ .open = proxy_open,
+};
+
static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
struct dentry *parent, void *value,
const struct file_operations *fops,
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index bece948..fbf86cf 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -27,9 +27,14 @@
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include <linux/srcu.h>
+
+#include "internal.h"

#define DEBUGFS_DEFAULT_MODE 0700

+DEFINE_SRCU(debugfs_srcu);
+
static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
static bool debugfs_registered;
@@ -340,8 +345,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
return failed_creating(dentry);

inode->i_mode = mode;
- inode->i_fop = fops ? fops : &debugfs_file_operations;
inode->i_private = data;
+
+ inode->i_fop = fops ? &debugfs_proxy_file_operations
+ : &debugfs_noop_file_operations;
+ rcu_assign_pointer(dentry->d_fsdata, (void *)fops);
+
d_instantiate(dentry, inode);
fsnotify_create(d_inode(dentry->d_parent), dentry);
return end_creating(dentry);
@@ -523,10 +532,12 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)

if (simple_positive(dentry)) {
dget(dentry);
- if (d_is_dir(dentry))
+ if (d_is_dir(dentry)) {
ret = simple_rmdir(d_inode(parent), dentry);
- else
+ } else {
simple_unlink(d_inode(parent), dentry);
+ rcu_assign_pointer(dentry->d_fsdata, NULL);
+ }
if (!ret)
d_delete(dentry);
dput(dentry);
@@ -565,6 +576,7 @@ void debugfs_remove(struct dentry *dentry)
inode_unlock(d_inode(parent));
if (!ret)
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ synchronize_srcu(&debugfs_srcu);
}
EXPORT_SYMBOL_GPL(debugfs_remove);

@@ -642,6 +654,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
inode_unlock(d_inode(parent));
+ synchronize_srcu(&debugfs_srcu);
}
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);

diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
new file mode 100644
index 0000000..1f87f3c
--- /dev/null
+++ b/fs/debugfs/internal.h
@@ -0,0 +1,24 @@
+/*
+ * internal.h - declarations internal to debugfs
+ *
+ * Copyright (C) 2016 Nicolai Stange <nicstange@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _DEBUGFS_INTERNAL_H_
+#define _DEBUGFS_INTERNAL_H_
+
+struct file_operations;
+struct srcu_struct;
+
+/* declared over in file.c */
+extern const struct file_operations debugfs_noop_file_operations;
+extern const struct file_operations debugfs_proxy_file_operations;
+
+extern struct srcu_struct debugfs_srcu;
+
+#endif /* _DEBUGFS_INTERNAL_H_ */
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 19c066d..8b44093 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -43,9 +43,6 @@ extern struct dentry *arch_debugfs_dir;

#if defined(CONFIG_DEBUG_FS)

-/* declared over in file.c */
-extern const struct file_operations debugfs_file_operations;
-
struct dentry *debugfs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ecb9e75..080d243 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -257,6 +257,7 @@ config PAGE_OWNER

config DEBUG_FS
bool "Debug Filesystem"
+ select SRCU
help
debugfs is a virtual file system that kernel developers use to put
debugging files into. Enable this option to be able to read and
--
2.7.0