[PATCH][RESEND] fdinfo: handle large fdinfo buffers

From: Josef Bacik
Date: Fri Feb 19 2016 - 15:11:11 EST


Currently the way we deal with special fdinfo information is with the
fop->show_fdinfo, which seq_printf's and carries on. This works in 99% of the
cases, but for things like inotify/fanotify we can have lots of stuff that we
want to print out, so we can easily overflow the seq buffer and result in no
output. To fix this add a few more callbacks to file_operations to handle
stop/start/next in a way similar to the normal seq_operations. We keep track of
what state we are in inside of fd.c and then call out to the helpers if we need
to. This allows us to still get all the inotify/fanotify information from the
special file descriptors without overflowing the buffer. Thanks,

Signed-off-by: Josef Bacik <jbacik@xxxxxx>
---
-Just noticed that this didnt show up in linux-fsdevel for some reason,
apologies if this is the second time you are getting this email.

drivers/net/tun.c | 2 +-
fs/eventfd.c | 2 +-
fs/eventpoll.c | 2 +-
fs/notify/fanotify/fanotify_user.c | 3 +
fs/notify/fdinfo.c | 63 ++++++++------
fs/notify/fdinfo.h | 12 ++-
fs/notify/inotify/inotify_user.c | 3 +
fs/proc/fd.c | 170 +++++++++++++++++++++++++++++++------
fs/signalfd.c | 2 +-
fs/timerfd.c | 2 +-
fs/userfaultfd.c | 2 +-
include/linux/fs.h | 5 +-
kernel/bpf/syscall.c | 2 +-
13 files changed, 207 insertions(+), 63 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 88bb8cc..2c8158f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2251,7 +2251,7 @@ static int tun_chr_close(struct inode *inode, struct file *file)
}

#ifdef CONFIG_PROC_FS
-static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
+static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f, void *v)
{
struct tun_struct *tun;
struct ifreq ifr;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index ed70cf9..5b10518 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -287,7 +287,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
}

#ifdef CONFIG_PROC_FS
-static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void eventfd_show_fdinfo(struct seq_file *m, struct file *f, void *v)
{
struct eventfd_ctx *ctx = f->private_data;

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cde6074..29ac3fd 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -875,7 +875,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
}

#ifdef CONFIG_PROC_FS
-static void ep_show_fdinfo(struct seq_file *m, struct file *f)
+static void ep_show_fdinfo(struct seq_file *m, struct file *f, void *v)
{
struct eventpoll *ep = f->private_data;
struct rb_node *rbp;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bc..e946793 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -420,6 +420,9 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
}

static const struct file_operations fanotify_fops = {
+ .start_fdinfo = fsnotify_start_fdinfo,
+ .next_fdinfo = fsnotify_next_fdinfo,
+ .stop_fdinfo = fsnotify_stop_fdinfo,
.show_fdinfo = fanotify_show_fdinfo,
.poll = fanotify_poll,
.read = fanotify_read,
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index fd98e51..5f06970 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -20,22 +20,6 @@

#if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)

-static void show_fdinfo(struct seq_file *m, struct file *f,
- void (*show)(struct seq_file *m,
- struct fsnotify_mark *mark))
-{
- struct fsnotify_group *group = f->private_data;
- struct fsnotify_mark *mark;
-
- mutex_lock(&group->mark_mutex);
- list_for_each_entry(mark, &group->marks_list, g_list) {
- show(m, mark);
- if (seq_has_overflowed(m))
- break;
- }
- mutex_unlock(&group->mark_mutex);
-}
-
#if defined(CONFIG_EXPORTFS)
static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
@@ -71,11 +55,18 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode)

#ifdef CONFIG_INOTIFY_USER

-static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+void inotify_show_fdinfo(struct seq_file *m, struct file *f, void *v)
{
+ struct fsnotify_group *group = f->private_data;
+ struct list_head *cur = v;
+ struct fsnotify_mark *mark = list_entry(cur, struct fsnotify_mark,
+ g_list);
struct inotify_inode_mark *inode_mark;
struct inode *inode;

+ if (v == &group->marks_list)
+ return;
+
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) ||
!(mark->flags & FSNOTIFY_MARK_FLAG_INODE))
return;
@@ -99,11 +90,6 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
}
}

-void inotify_show_fdinfo(struct seq_file *m, struct file *f)
-{
- show_fdinfo(m, f, inotify_fdinfo);
-}
-
#endif /* CONFIG_INOTIFY_USER */

#ifdef CONFIG_FANOTIFY
@@ -137,11 +123,19 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
}
}

-void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
+void fanotify_show_fdinfo(struct seq_file *m, struct file *f, void *v)
{
struct fsnotify_group *group = f->private_data;
unsigned int flags = 0;

+ if (v != &group->marks_list) {
+ struct list_head *cur = v;
+ struct fsnotify_mark *mark = list_entry(cur,
+ struct fsnotify_mark,
+ g_list);
+ return fanotify_fdinfo(m, mark);
+ }
+
switch (group->priority) {
case FS_PRIO_0:
flags |= FAN_CLASS_NOTIF;
@@ -162,11 +156,30 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f)

seq_printf(m, "fanotify flags:%x event-flags:%x\n",
flags, group->fanotify_data.f_flags);
+}
+#endif /* CONFIG_FANOTIFY */

- show_fdinfo(m, f, fanotify_fdinfo);
+void *fsnotify_next_fdinfo(struct seq_file *seq, struct file *f, void *v,
+ loff_t *pos)
+{
+ struct fsnotify_group *group = f->private_data;
+ return seq_list_next(v, &group->marks_list, pos);
}

-#endif /* CONFIG_FANOTIFY */
+void *fsnotify_start_fdinfo(struct seq_file *seq, struct file *f, loff_t *pos)
+{
+ struct fsnotify_group *group = f->private_data;
+
+ mutex_lock(&group->mark_mutex);
+ return seq_list_start_head(&group->marks_list, *pos);
+}
+
+void fsnotify_stop_fdinfo(struct seq_file *seq, struct file *f, void *v)
+{
+ struct fsnotify_group *group = f->private_data;
+
+ mutex_unlock(&group->mark_mutex);
+}

#endif /* CONFIG_INOTIFY_USER || CONFIG_FANOTIFY */

diff --git a/fs/notify/fdinfo.h b/fs/notify/fdinfo.h
index 9664c49..3547f0c 100644
--- a/fs/notify/fdinfo.h
+++ b/fs/notify/fdinfo.h
@@ -10,17 +10,25 @@ struct file;
#ifdef CONFIG_PROC_FS

#ifdef CONFIG_INOTIFY_USER
-void inotify_show_fdinfo(struct seq_file *m, struct file *f);
+void inotify_show_fdinfo(struct seq_file *m, struct file *f, void *v);
#endif

#ifdef CONFIG_FANOTIFY
-void fanotify_show_fdinfo(struct seq_file *m, struct file *f);
+void fanotify_show_fdinfo(struct seq_file *m, struct file *f, void *v);
#endif

+void *fsnotify_next_fdinfo(struct seq_file *seq, struct file *f, void *v,
+ loff_t *pos);
+void *fsnotify_start_fdinfo(struct seq_file *seq, struct file *f, loff_t *pos);
+void fsnotify_stop_fdinfo(struct seq_file *seq, struct file *f, void *v);
+
#else /* CONFIG_PROC_FS */

#define inotify_show_fdinfo NULL
#define fanotify_show_fdinfo NULL
+#define fsnotify_start_fdinfo NULL
+#define fsnotify_stop_fdinfo NULL
+#define fsnotify_next_fdinfo NULL

#endif /* CONFIG_PROC_FS */

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index b8d08d0..7301965 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -316,6 +316,9 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,

static const struct file_operations inotify_fops = {
.show_fdinfo = inotify_show_fdinfo,
+ .next_fdinfo = fsnotify_next_fdinfo,
+ .stop_fdinfo = fsnotify_stop_fdinfo,
+ .start_fdinfo = fsnotify_start_fdinfo,
.poll = inotify_poll,
.read = inotify_read,
.fasync = fsnotify_fasync,
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 56afa5e..2b3ead7 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -16,14 +16,135 @@
#include "internal.h"
#include "fd.h"

-static int seq_show(struct seq_file *m, void *v)
+enum proc_fdinfo_states {
+ FDINFO_GENERIC = 0,
+ FDINFO_LOCKS = 1,
+ FDINFO_PRIVATE = 2,
+ FDINFO_DONE = 3,
+};
+
+struct proc_fdinfo_ctx {
+ struct file *file;
+ struct files_struct *files;
+ int f_flags;
+ unsigned state;
+ loff_t ppos;
+};
+
+static int seq_fdinfo_show(struct seq_file *seq, void *v)
+{
+ struct proc_fdinfo_ctx *ctx = seq->private;
+ struct file *file = ctx->file;
+ struct files_struct *files = ctx->files;
+
+ switch (ctx->state) {
+ case FDINFO_GENERIC:
+ seq_printf(seq, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
+ (long long)file->f_pos, ctx->f_flags,
+ real_mount(file->f_path.mnt)->mnt_id);
+ return 0;
+ case FDINFO_LOCKS:
+ show_fd_locks(seq, file, files);
+ return 0;
+ case FDINFO_PRIVATE:
+ if (!file->f_op->show_fdinfo)
+ return 1;
+ file->f_op->show_fdinfo(seq, file, v);
+ return 0;
+ default:
+ break;
+ }
+ return 1;
+}
+
+static void *seq_fdinfo_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct proc_fdinfo_ctx *ctx = seq->private;
+ struct file *file = ctx->file;
+ switch (ctx->state) {
+ case FDINFO_GENERIC:
+ case FDINFO_LOCKS:
+ ctx->state++;
+ *pos = 0;
+
+ /* We're switching states, we need to make sure the
+ * ->start_fdinfo stuff is run if it exists.
+ */
+ if (ctx->state == FDINFO_PRIVATE && file->f_op->start_fdinfo)
+ return file->f_op->start_fdinfo(seq, file, pos);
+ return pos;
+ case FDINFO_PRIVATE:
+ if (!file->f_op->next_fdinfo) {
+ ctx->state++;
+ return NULL;
+ }
+ return file->f_op->next_fdinfo(seq, ctx->file, v, pos);
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static void seq_fdinfo_stop(struct seq_file *seq, void *v)
+{
+ struct proc_fdinfo_ctx *ctx = seq->private;
+ struct file *file = ctx->file;
+
+ if (ctx->state == FDINFO_PRIVATE &&
+ file->f_op->stop_fdinfo)
+ file->f_op->stop_fdinfo(seq, file, v);
+}
+
+static void *seq_fdinfo_start(struct seq_file *seq, loff_t *pos)
+{
+ struct proc_fdinfo_ctx *ctx = seq->private;
+ struct file *file = ctx->file;
+
+ switch (ctx->state) {
+ case FDINFO_GENERIC:
+ case FDINFO_LOCKS:
+ *pos = 0;
+ return pos;
+ case FDINFO_PRIVATE:
+ if (!file->f_op->show_fdinfo) {
+ ctx->state = FDINFO_DONE;
+ return NULL;
+ }
+ if (file->f_op->start_fdinfo)
+ return file->f_op->start_fdinfo(seq, file, pos);
+ return pos;
+ default:
+ break;
+ }
+ return NULL;
+}
+
+const struct seq_operations proc_fdinfo_seq_operations = {
+ .start = seq_fdinfo_start,
+ .stop = seq_fdinfo_stop,
+ .next = seq_fdinfo_next,
+ .show = seq_fdinfo_show,
+};
+
+static int seq_fdinfo_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct proc_fdinfo_ctx *ctx = seq->private;
+
+ fput(ctx->file);
+ return seq_release_private(inode, file);
+}
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
{
struct files_struct *files = NULL;
int f_flags = 0, ret = -ENOENT;
- struct file *file = NULL;
+ struct file *target_file = NULL;
struct task_struct *task;
+ struct seq_file *seq;
+ struct proc_fdinfo_ctx *ctx;

- task = get_proc_task(m->private);
+ task = get_proc_task(inode);
if (!task)
return -ENOENT;

@@ -31,53 +152,46 @@ static int seq_show(struct seq_file *m, void *v)
put_task_struct(task);

if (files) {
- int fd = proc_fd(m->private);
+ int fd = proc_fd(inode);

spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (file) {
+ target_file = fcheck_files(files, fd);
+ if (target_file) {
struct fdtable *fdt = files_fdtable(files);

- f_flags = file->f_flags;
+ f_flags = target_file->f_flags;
if (close_on_exec(fd, fdt))
f_flags |= O_CLOEXEC;

- get_file(file);
+ get_file(target_file);
ret = 0;
}
spin_unlock(&files->file_lock);
put_files_struct(files);
}
-
if (ret)
return ret;

- seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
- (long long)file->f_pos, f_flags,
- real_mount(file->f_path.mnt)->mnt_id);
-
- show_fd_locks(m, file, files);
- if (seq_has_overflowed(m))
- goto out;
-
- if (file->f_op->show_fdinfo)
- file->f_op->show_fdinfo(m, file);
-
-out:
- fput(file);
+ ret = seq_open_private(file, &proc_fdinfo_seq_operations,
+ sizeof(*ctx));
+ if (ret) {
+ fput(target_file);
+ return ret;
+ }
+ seq = file->private_data;
+ ctx = seq->private;
+ ctx->file = target_file;
+ ctx->files = files;
+ ctx->f_flags = f_flags;
+ ctx->state = FDINFO_GENERIC;
return 0;
}

-static int seq_fdinfo_open(struct inode *inode, struct file *file)
-{
- return single_open(file, seq_show, inode);
-}
-
static const struct file_operations proc_fdinfo_file_operations = {
.open = seq_fdinfo_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = seq_fdinfo_release,
};

static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 270221f..892eb48 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -231,7 +231,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
}

#ifdef CONFIG_PROC_FS
-static void signalfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void signalfd_show_fdinfo(struct seq_file *m, struct file *f, void *v)
{
struct signalfd_ctx *ctx = f->private_data;
sigset_t sigmask;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 053818d..e8145d6 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -288,7 +288,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
}

#ifdef CONFIG_PROC_FS
-static void timerfd_show(struct seq_file *m, struct file *file)
+static void timerfd_show(struct seq_file *m, struct file *file, void *v)
{
struct timerfd_ctx *ctx = file->private_data;
struct itimerspec t;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5031170..f055f0b 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1191,7 +1191,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
}

#ifdef CONFIG_PROC_FS
-static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f, void *v)
{
struct userfaultfd_ctx *ctx = f->private_data;
wait_queue_t *wq;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ae68100..69f818a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1652,7 +1652,10 @@ struct file_operations {
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
- void (*show_fdinfo)(struct seq_file *m, struct file *f);
+ void *(*start_fdinfo)(struct seq_file *m, struct file *f, loff_t *pos);
+ void *(*next_fdinfo)(struct seq_file *m, struct file *f, void *v, loff_t *pos);
+ void (*show_fdinfo)(struct seq_file *m, struct file *f, void *v);
+ void (*stop_fdinfo)(struct seq_file *m, struct file *f, void *v);
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6373970..bbc6c31 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -114,7 +114,7 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
}

#ifdef CONFIG_PROC_FS
-static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
+static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp, void *v)
{
const struct bpf_map *map = filp->private_data;

--
2.5.0