Re: [RFC PATCH 8/9] debugfs: defer debugfs_fsdata allocation to first usage

From: Johannes Berg
Date: Tue Apr 18 2017 - 05:37:47 EST


On Sun, 2017-04-16 at 11:51 +0200, Nicolai Stange wrote:
>
> +++ b/fs/debugfs/file.c
> @@ -53,6 +53,7 @@ const struct file_operations
> *debugfs_real_fops(const struct file *filp)
> Â{
> Â struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata;
> Â
> + WARN_ON((unsigned long)fsd &
> DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
> Â return fsd->real_fops;

I'm not a fan of BUG_ON(), but in this case, if you have a completely
bogus pointer here, and then you return fsd->real_fops which will be
even more bogus, and *then* you call a function from within it... that
seems like a recipe for disaster.

So either you could return some valid ops (perhaps
debugfs_noop_file_operations although those don't have .name or .poll,
so it doesn't cover everything), or you can just BUG_ON() here
directly, saving the incomprehensible crash later.

johannes

> ÂEXPORT_SYMBOL_GPL(debugfs_real_fops);
> @@ -74,9 +75,35 @@ EXPORT_SYMBOL_GPL(debugfs_real_fops);
> Â */
> Âint debugfs_file_get(struct dentry *dentry)
> Â{
> - struct debugfs_fsdata *fsd = dentry->d_fsdata;
> + struct debugfs_fsdata *fsd;
> + void *d_fsd;
> +
> + d_fsd = READ_ONCE(dentry->d_fsdata);
> + if (!((unsigned long)d_fsd &
> DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) {
> + fsd = d_fsd;
> + } else {
> + fsd = kmalloc(sizeof(*fsd), GFP_KERNEL);
> + if (!fsd)
> + return -ENOMEM;
> +
> + fsd->real_fops = (void *)((unsigned long)d_fsd &
> + ~DEBUGFS_FSDATA_IS_REAL_FOPS
> _BIT);
> + refcount_set(&fsd->active_users, 1);
> + init_completion(&fsd->active_users_drained);
> + if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd)
> {
> + kfree(fsd);
> + fsd = READ_ONCE(dentry->d_fsdata);
> + }
> + }
> Â
> - /* Avoid starvation of removers. */
> + /*
> + Â* In case of a successful cmpxchg() above, this check is
> + Â* strictly necessary and must follow it, see the comment in
> + Â* __debugfs_remove_file().
> + Â* OTOH, if the cmpxchg() hasn't been executed or wasn't
> + Â* successful, this serves the purpose of not starving
> + Â* removers.
> + Â*/
> Â if (d_unlinked(dentry))
> Â return -EIO;
> Â
> @@ -98,7 +125,7 @@ EXPORT_SYMBOL_GPL(debugfs_file_get);
> Â */
> Âvoid debugfs_file_put(struct dentry *dentry)
> Â{
> - struct debugfs_fsdata *fsd = dentry->d_fsdata;
> + struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata);
> Â
> Â if (refcount_dec_and_test(&fsd->active_users))
> Â complete(&fsd->active_users_drained);
> @@ -109,10 +136,11 @@ static int open_proxy_open(struct inode *inode,
> struct file *filp)
> Â{
> Â struct dentry *dentry = F_DENTRY(filp);
> Â const struct file_operations *real_fops = NULL;
> - int r = 0;
> + int r;
> Â
> - if (debugfs_file_get(dentry))
> - return -ENOENT;
> + r = debugfs_file_get(dentry);
> + if (r)
> + return r == -EIO ? -ENOENT : r;
> Â
> Â real_fops = debugfs_real_fops(filp);
> Â real_fops = fops_get(real_fops);
> @@ -233,10 +261,11 @@ static int full_proxy_open(struct inode *inode,
> struct file *filp)
> Â struct dentry *dentry = F_DENTRY(filp);
> Â const struct file_operations *real_fops = NULL;
> Â struct file_operations *proxy_fops = NULL;
> - int r = 0;
> + int r;
> Â
> - if (debugfs_file_get(dentry))
> - return -ENOENT;
> + r = debugfs_file_get(dentry);
> + if (r)
> + return r == -EIO ? -ENOENT : r;
> Â
> Â real_fops = debugfs_real_fops(filp);
> Â real_fops = fops_get(real_fops);
> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
> index 5550f11d60bd..2360c17ec00a 100644
> --- a/fs/debugfs/inode.c
> +++ b/fs/debugfs/inode.c
> @@ -184,7 +184,10 @@ static const struct super_operations
> debugfs_super_operations = {
> Â
> Âstatic void debugfs_release_dentry(struct dentry *dentry)
> Â{
> - kfree(dentry->d_fsdata);
> + void *fsd = dentry->d_fsdata;
> +
> + if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))
> + kfree(dentry->d_fsdata);
> Â}
> Â
> Âstatic struct vfsmount *debugfs_automount(struct path *path)
> @@ -346,35 +349,25 @@ static struct dentry
> *__debugfs_create_file(const char *name, umode_t mode,
> Â{
> Â struct dentry *dentry;
> Â struct inode *inode;
> - struct debugfs_fsdata *fsd;
> -
> - fsd = kmalloc(sizeof(*fsd), GFP_KERNEL);
> - if (!fsd)
> - return NULL;
> Â
> Â if (!(mode & S_IFMT))
> Â mode |= S_IFREG;
> Â BUG_ON(!S_ISREG(mode));
> Â dentry = start_creating(name, parent);
> Â
> - if (IS_ERR(dentry)) {
> - kfree(fsd);
> + if (IS_ERR(dentry))
> Â return NULL;
> - }
> Â
> Â inode = debugfs_get_inode(dentry->d_sb);
> - if (unlikely(!inode)) {
> - kfree(fsd);
> + if (unlikely(!inode))
> Â return failed_creating(dentry);
> - }
> Â
> Â inode->i_mode = mode;
> Â inode->i_private = data;
> Â
> Â inode->i_fop = proxy_fops;
> - fsd->real_fops = real_fops;
> - refcount_set(&fsd->active_users, 1);
> - dentry->d_fsdata = fsd;
> + dentry->d_fsdata = (void *)((unsigned long)real_fops |
> + DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
> Â
> Â d_instantiate(dentry, inode);
> Â fsnotify_create(d_inode(dentry->d_parent), dentry);
> @@ -637,8 +630,17 @@ static void __debugfs_remove_file(struct dentry
> *dentry, struct dentry *parent)
> Â
> Â simple_unlink(d_inode(parent), dentry);
> Â d_delete(dentry);
> - fsd = dentry->d_fsdata;
> - init_completion(&fsd->active_users_drained);
> +
> + /*
> + Â* Paired with the closing smp_mb() implied by a successful
> + Â* cmpxchg() in debugfs_file_get(): either
> + Â* debugfs_file_get() must see a dead dentry or we must see
> a
> + Â* debugfs_fsdata instance at ->d_fsdata here (or both).
> + Â*/
> + smp_mb();
> + fsd = READ_ONCE(dentry->d_fsdata);
> + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
> + return;
> Â if (!refcount_dec_and_test(&fsd->active_users))
> Â wait_for_completion(&fsd->active_users_drained);
> Â}
> diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
> index 0eea99432840..cb1e8139c398 100644
> --- a/fs/debugfs/internal.h
> +++ b/fs/debugfs/internal.h
> @@ -25,4 +25,12 @@ struct debugfs_fsdata {
> Â struct completion active_users_drained;
> Â};
> Â
> +/*
> + * A dentry's ->d_fsdata either points to the real fops or to a
> + * dynamically allocated debugfs_fsdata instance.
> + * In order to distinguish between these two cases, a real fops
> + * pointer gets its lowest bit set.
> + */
> +#define DEBUGFS_FSDATA_IS_REAL_FOPS_BIT BIT(0)
> +
> Â#endif /* _DEBUGFS_INTERNAL_H_ */