[PATCH 2/2] proc: covert procfs to use the general revoke implementation

From: Li Zhong
Date: Thu Jul 11 2013 - 07:08:11 EST


This patch tries to replace the current revoke logic in procfs with the
implementation suggested by Al Viro in
https://lkml.org/lkml/2013/4/5/15

Below is the replacement guideline copied from that mail:

procfs would have struct revokable embedded into proc_dir_entry, with freeing
of those guys RCUd. It would set ->f_op to ->proc_fops and call
make_revokable(file, &pde->revokable) in proc_reg_open(); no wrappers for other
methods needed anymore. All file_operations instances fed to
proc_create() et.al. would lose ->owner - it's already not needed for those,
actually. remove_proc_entry()/remove_proc_subtree() would call revoke_it()
on everything we are removing.

Signed-off-by: Li Zhong <zhong@xxxxxxxxxxxxxxxxxx>
---
fs/compat_ioctl.c | 8 +-
fs/eventpoll.c | 10 ++-
fs/file_table.c | 13 ++-
fs/ioctl.c | 7 +-
fs/proc/generic.c | 12 +--
fs/proc/inode.c | 229 +++++-----------------------------------------------
fs/proc/internal.h | 9 +--
fs/read_write.c | 30 +++++--
fs/select.c | 11 ++-
mm/mmap.c | 8 +-
mm/nommu.c | 16 +++-
11 files changed, 111 insertions(+), 242 deletions(-)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5d19acf..48da3bf 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -110,6 +110,7 @@
#include <linux/dvb/video.h>

#include <linux/sort.h>
+#include <linux/revoke.h>

#ifdef CONFIG_SPARC
#include <asm/fbio.h>
@@ -1584,7 +1585,12 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,

default:
if (f.file->f_op && f.file->f_op->compat_ioctl) {
- error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
+ if (likely(start_using(f.file))) {
+ error = f.file->f_op->compat_ioctl(f.file,
+ cmd, arg);
+ stop_using(f.file);
+ } else
+ error = -ENOTTY;
if (error != -ENOIOCTLCMD)
goto out_fput;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9ad17b15..c73e8af 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -42,6 +42,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/compat.h>
+#include <linux/revoke.h>

/*
* LOCKING:
@@ -776,9 +777,16 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)

static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt)
{
+ unsigned int rv = DEFAULT_POLLMASK;
pt->_key = epi->event.events;

- return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events;
+ if (likely(start_using(epi->ffd.file))) {
+ rv = epi->ffd.file->f_op->poll(epi->ffd.file, pt)
+ & epi->event.events;
+ stop_using(epi->ffd.file);
+ }
+
+ return rv;
}

static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
diff --git a/fs/file_table.c b/fs/file_table.c
index 08e719b..b3d55e0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -26,6 +26,7 @@
#include <linux/hardirq.h>
#include <linux/task_work.h>
#include <linux/ima.h>
+#include <linux/revoke.h>

#include <linux/atomic.h>

@@ -244,14 +245,20 @@ static void __fput(struct file *file)
file->f_op->fasync(-1, file, 0);
}
ima_file_free(file);
- if (file->f_op && file->f_op->release)
- file->f_op->release(inode, file);
+ if (file->f_op && file->f_op->release) {
+ if (likely(!(file->f_revoke)))
+ file->f_op->release(inode, file);
+ else
+ release_revoke(file->f_revoke);
+ }
security_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
!(file->f_mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
}
- fops_put(file->f_op);
+ if (likely(!(file->f_revoke)))
+ fops_put(file->f_op);
+
put_pid(file->f_owner.pid);
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_dec(inode);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index fd507fb..7610377 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include <linux/revoke.h>

#include <asm/ioctls.h>

@@ -40,7 +41,11 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
if (!filp->f_op || !filp->f_op->unlocked_ioctl)
goto out;

- error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
+ if (likely(start_using(filp))) {
+ error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
+ stop_using(filp);
+ }
+
if (error == -ENOIOCTLCMD)
error = -ENOTTY;
out:
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 94441a4..3119937 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -23,6 +23,7 @@
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
+#include <linux/revoke.h>
#include <asm/uaccess.h>

#include "internal.h"
@@ -367,13 +368,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
if (!ent)
goto out;

+ spin_lock_init(&ent->revokable.lock);
+ INIT_HLIST_HEAD(&ent->revokable.list);
+
memcpy(ent->name, fn, len + 1);
ent->namelen = len;
ent->mode = mode;
ent->nlink = nlink;
atomic_set(&ent->count, 1);
- spin_lock_init(&ent->pde_unload_lock);
- INIT_LIST_HEAD(&ent->pde_openers);
out:
return ent;
}
@@ -488,7 +490,7 @@ static void free_proc_entry(struct proc_dir_entry *de)

if (S_ISLNK(de->mode))
kfree(de->data);
- kfree(de);
+ kfree_rcu(de, rcu);
}

void pde_put(struct proc_dir_entry *pde)
@@ -528,7 +530,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
return;
}

- proc_entry_rundown(de);
+ revoke_it(&de->revokable);

if (S_ISDIR(de->mode))
parent->nlink--;
@@ -577,7 +579,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
}
spin_unlock(&proc_subdir_lock);

- proc_entry_rundown(de);
+ revoke_it(&de->revokable);
next = de->parent;
if (S_ISDIR(de->mode))
next->nlink--;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 073aea6..17c0a66 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -12,7 +12,6 @@
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/completion.h>
-#include <linux/poll.h>
#include <linux/printk.h>
#include <linux/file.h>
#include <linux/limits.h>
@@ -23,6 +22,7 @@
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/magic.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>

@@ -130,168 +130,20 @@ static const struct super_operations proc_sops = {
.show_options = proc_show_options,
};

-enum {BIAS = -1U<<31};
-
-static inline int use_pde(struct proc_dir_entry *pde)
-{
- return atomic_inc_unless_negative(&pde->in_use);
-}
-
-static void unuse_pde(struct proc_dir_entry *pde)
-{
- if (atomic_dec_return(&pde->in_use) == BIAS)
- complete(pde->pde_unload_completion);
-}
-
-/* pde is locked */
-static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
-{
- if (pdeo->closing) {
- /* somebody else is doing that, just wait */
- DECLARE_COMPLETION_ONSTACK(c);
- pdeo->c = &c;
- spin_unlock(&pde->pde_unload_lock);
- wait_for_completion(&c);
- spin_lock(&pde->pde_unload_lock);
- } else {
- struct file *file;
- pdeo->closing = 1;
- spin_unlock(&pde->pde_unload_lock);
- file = pdeo->file;
- pde->proc_fops->release(file_inode(file), file);
- spin_lock(&pde->pde_unload_lock);
- list_del_init(&pdeo->lh);
- if (pdeo->c)
- complete(pdeo->c);
- kfree(pdeo);
- }
-}
-
-void proc_entry_rundown(struct proc_dir_entry *de)
-{
- DECLARE_COMPLETION_ONSTACK(c);
- /* Wait until all existing callers into module are done. */
- de->pde_unload_completion = &c;
- if (atomic_add_return(BIAS, &de->in_use) != BIAS)
- wait_for_completion(&c);
-
- spin_lock(&de->pde_unload_lock);
- while (!list_empty(&de->pde_openers)) {
- struct pde_opener *pdeo;
- pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
- close_pdeo(de, pdeo);
- }
- spin_unlock(&de->pde_unload_lock);
-}
-
-static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- loff_t rv = -EINVAL;
- if (use_pde(pde)) {
- loff_t (*llseek)(struct file *, loff_t, int);
- llseek = pde->proc_fops->llseek;
- if (!llseek)
- llseek = default_llseek;
- rv = llseek(file, offset, whence);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
- ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
- struct proc_dir_entry *pde = PDE(file_inode(file));
- ssize_t rv = -EIO;
- if (use_pde(pde)) {
- read = pde->proc_fops->read;
- if (read)
- rv = read(file, buf, count, ppos);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
-{
- ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
- struct proc_dir_entry *pde = PDE(file_inode(file));
- ssize_t rv = -EIO;
- if (use_pde(pde)) {
- write = pde->proc_fops->write;
- if (write)
- rv = write(file, buf, count, ppos);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- unsigned int rv = DEFAULT_POLLMASK;
- unsigned int (*poll)(struct file *, struct poll_table_struct *);
- if (use_pde(pde)) {
- poll = pde->proc_fops->poll;
- if (poll)
- rv = poll(file, pts);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- long rv = -ENOTTY;
- long (*ioctl)(struct file *, unsigned int, unsigned long);
- if (use_pde(pde)) {
- ioctl = pde->proc_fops->unlocked_ioctl;
- if (ioctl)
- rv = ioctl(file, cmd, arg);
- unuse_pde(pde);
- }
- return rv;
-}
-
-#ifdef CONFIG_COMPAT
-static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- long rv = -ENOTTY;
- long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
- if (use_pde(pde)) {
- compat_ioctl = pde->proc_fops->compat_ioctl;
- if (compat_ioctl)
- rv = compat_ioctl(file, cmd, arg);
- unuse_pde(pde);
- }
- return rv;
-}
-#endif
-
-static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- int rv = -EIO;
- int (*mmap)(struct file *, struct vm_area_struct *);
- if (use_pde(pde)) {
- mmap = pde->proc_fops->mmap;
- if (mmap)
- rv = mmap(file, vma);
- unuse_pde(pde);
- }
- return rv;
-}
-
static int proc_reg_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*open)(struct inode *, struct file *);
int (*release)(struct inode *, struct file *);
- struct pde_opener *pdeo;
+ const struct file_operations *old_fop = file->f_op;
+
+ open = pde->proc_fops->open;
+ release = pde->proc_fops->release;
+
+ file->f_op = pde->proc_fops;
+ if (open)
+ rv = open(inode, file);

/*
* What for, you ask? Well, we can have open, rmmod, remove_proc_entry
@@ -303,73 +155,28 @@ static int proc_reg_open(struct inode *inode, struct file *file)
* by hand in remove_proc_entry(). For this, save opener's credentials
* for later.
*/
- pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
- if (!pdeo)
- return -ENOMEM;
-
- if (!use_pde(pde)) {
- kfree(pdeo);
- return -ENOENT;
- }
- open = pde->proc_fops->open;
- release = pde->proc_fops->release;

- if (open)
- rv = open(inode, file);
+ if (!rv) {
+ rv = make_revokable(file, &pde->revokable);

- if (rv == 0 && release) {
- /* To know what to release. */
- pdeo->file = file;
- /* Strictly for "too late" ->release in proc_reg_release(). */
- spin_lock(&pde->pde_unload_lock);
- list_add(&pdeo->lh, &pde->pde_openers);
- spin_unlock(&pde->pde_unload_lock);
- } else
- kfree(pdeo);
-
- unuse_pde(pde);
- return rv;
-}
-
-static int proc_reg_release(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *pde = PDE(inode);
- struct pde_opener *pdeo;
- spin_lock(&pde->pde_unload_lock);
- list_for_each_entry(pdeo, &pde->pde_openers, lh) {
- if (pdeo->file == file) {
- close_pdeo(pde, pdeo);
- break;
+ if (rv) {
+ /* temporarily for ->owner issue */
+ file->f_op = old_fop;
+ if (release)
+ release(inode, file);
}
}
- spin_unlock(&pde->pde_unload_lock);
- return 0;
+
+ return rv;
}

static const struct file_operations proc_reg_file_ops = {
- .llseek = proc_reg_llseek,
- .read = proc_reg_read,
- .write = proc_reg_write,
- .poll = proc_reg_poll,
- .unlocked_ioctl = proc_reg_unlocked_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = proc_reg_compat_ioctl,
-#endif
- .mmap = proc_reg_mmap,
.open = proc_reg_open,
- .release = proc_reg_release,
};

#ifdef CONFIG_COMPAT
static const struct file_operations proc_reg_file_ops_no_compat = {
- .llseek = proc_reg_llseek,
- .read = proc_reg_read,
- .write = proc_reg_write,
- .poll = proc_reg_poll,
- .unlocked_ioctl = proc_reg_unlocked_ioctl,
- .mmap = proc_reg_mmap,
.open = proc_reg_open,
- .release = proc_reg_release,
};
#endif

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 651d09a..6c6c0c9 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/binfmts.h>
+#include <linux/revoke.h>

struct ctl_table_header;
struct mempolicy;
@@ -41,11 +42,8 @@ struct proc_dir_entry {
struct proc_dir_entry *next, *parent, *subdir;
void *data;
atomic_t count; /* use count */
- atomic_t in_use; /* number of callers into module in progress; */
- /* negative -> it's going away RSN */
- struct completion *pde_unload_completion;
- struct list_head pde_openers; /* who did ->open, but not ->release */
- spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+ struct revokable revokable;
+ struct rcu_head rcu;
u8 namelen;
char name[];
};
@@ -208,7 +206,6 @@ extern const struct inode_operations proc_pid_link_inode_operations;
extern void proc_init_inodecache(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern int proc_fill_super(struct super_block *);
-extern void proc_entry_rundown(struct proc_dir_entry *);

/*
* proc_devtree.c
diff --git a/fs/read_write.c b/fs/read_write.c
index 122a384..b9ed1fd 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
#include <linux/pagemap.h>
#include <linux/splice.h>
#include <linux/compat.h>
+#include <linux/revoke.h>
#include "internal.h"

#include <asm/uaccess.h>
@@ -254,13 +255,20 @@ EXPORT_SYMBOL(default_llseek);
loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
{
loff_t (*fn)(struct file *, loff_t, int);
+ loff_t rv = -EINVAL;

fn = no_llseek;
if (file->f_mode & FMODE_LSEEK) {
if (file->f_op && file->f_op->llseek)
fn = file->f_op->llseek;
}
- return fn(file, offset, whence);
+
+ if (likely(start_using(file))) {
+ rv = fn(file, offset, whence);
+ stop_using(file);
+ }
+
+ return rv;
}
EXPORT_SYMBOL(vfs_llseek);

@@ -393,9 +401,13 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
ret = rw_verify_area(READ, file, pos, count);
if (ret >= 0) {
count = ret;
- if (file->f_op->read)
- ret = file->f_op->read(file, buf, count, pos);
- else
+ if (file->f_op->read) {
+ if (likely(start_using(file))) {
+ ret = file->f_op->read(file, buf, count, pos);
+ stop_using(file);
+ } else
+ ret = -EIO;
+ } else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file);
@@ -471,9 +483,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
if (ret >= 0) {
count = ret;
file_start_write(file);
- if (file->f_op->write)
- ret = file->f_op->write(file, buf, count, pos);
- else
+ if (file->f_op->write) {
+ if (likely(start_using(file))) {
+ ret = file->f_op->write(file, buf, count, pos);
+ stop_using(file);
+ } else
+ ret = -EIO;
+ } else
ret = do_sync_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file);
diff --git a/fs/select.c b/fs/select.c
index 6b14dc7..16e1e37 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -28,6 +28,7 @@
#include <linux/hrtimer.h>
#include <linux/sched/rt.h>
#include <linux/freezer.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>

@@ -452,7 +453,10 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
- mask = (*f_op->poll)(f.file, wait);
+ if (likely(start_using(f.file))) {
+ mask = (*f_op->poll)(f.file, wait);
+ stop_using(f.file);
+ }
}
fdput(f);
if ((mask & POLLIN_SET) && (in & bit)) {
@@ -733,7 +737,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
mask = DEFAULT_POLLMASK;
if (f.file->f_op && f.file->f_op->poll) {
pwait->_key = pollfd->events|POLLERR|POLLHUP;
- mask = f.file->f_op->poll(f.file, pwait);
+ if (likely(start_using(f.file))) {
+ mask = f.file->f_op->poll(f.file, pwait);
+ stop_using(f.file);
+ }
}
/* Mask out unneeded events. */
mask &= pollfd->events | POLLERR | POLLHUP;
diff --git a/mm/mmap.c b/mm/mmap.c
index 8468ffd..5988ebe 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -36,6 +36,7 @@
#include <linux/sched/sysctl.h>
#include <linux/notifier.h>
#include <linux/memory.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -1554,7 +1555,12 @@ munmap_back:
correct_wcount = 1;
}
vma->vm_file = get_file(file);
- error = file->f_op->mmap(file, vma);
+ if (likely(start_using(file))) {
+ error = file->f_op->mmap(file, vma);
+ stop_using(file);
+ } else
+ error = -EIO;
+
if (error)
goto unmap_and_free_vma;

diff --git a/mm/nommu.c b/mm/nommu.c
index e44e6e0..0853fe2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -30,6 +30,7 @@
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/sched/sysctl.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>
#include <asm/tlb.h>
@@ -1116,9 +1117,13 @@ static unsigned long determine_vm_flags(struct file *file,
*/
static int do_mmap_shared_file(struct vm_area_struct *vma)
{
- int ret;
+ int ret = -EIO;
+
+ if (likely(start_using(vma->vm_file))) {
+ ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+ stop_using(vma->vm_file);
+ }

- ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
if (ret == 0) {
vma->vm_region->vm_top = vma->vm_region->vm_end;
return 0;
@@ -1143,14 +1148,17 @@ static int do_mmap_private(struct vm_area_struct *vma,
struct page *pages;
unsigned long total, point, n;
void *base;
- int ret, order;
+ int ret = -EIO, order;

/* invoke the file's mapping function so that it can keep track of
* shared mappings on devices or memory
* - VM_MAYSHARE will be set if it may attempt to share
*/
if (capabilities & BDI_CAP_MAP_DIRECT) {
- ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+ if (likely(start_using(vma->vm_file))) {
+ ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+ stop_using(vma->vm_file);
+ }
if (ret == 0) {
/* shouldn't return success if we're not sharing */
BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/