Re: [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up

From: Miklos Szeredi
Date: Tue Mar 07 2017 - 13:15:17 EST


On Sun, Feb 19, 2017 at 09:14:41AM +0000, Al Viro wrote:
> On Fri, Feb 17, 2017 at 05:09:29PM +0100, Miklos Szeredi wrote:
> > A file is opened for read-only, opened read-write (resulting in a copy up)
> > and modified. The data read back from the the read-only fd will be stale
> > in this case (the read-only file descriptor still refers to the lower,
> > unmodified file).
> >
> > This patchset fixes issues related to this corner case. This is a
> > requirement from various parties for accepting overlayfs as a "POSIX"
> > filesystem.
> >
> > When an operation (read, mmap, fsync) is done on an overlay fd opened
> > read-only that is referring to a lower file, check if it has been copied up
> > in the mean time. If so, open the upper file and use that for the operation.
> >
> > To make the performance impact minimal for non-overlay case, use a flag in
> > file->f_mode to indicate that this is an overlay file.
>
> This is one hell of a DoS vector - it's really easy to eat tons of struct
> file that way. Preparatory parts of that series make sense on their own,
> but your "let's allocate a struct file, call ->open() and schedule that
> struct file for closing upon the exit to userland on each kernel_read()"
> is not.

How about this? Do you see a problem with calling __fput() synchronously here?

Thanks,
Miklos

---
fs/Makefile | 2 -
fs/file_table.c | 2 -
fs/internal.h | 1
fs/overlay_util.c | 53 +++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 11 ++++++++
include/linux/overlay_util.h | 13 ++++++++++
6 files changed, 80 insertions(+), 2 deletions(-)

--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -31,6 +31,7 @@
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>
#include <linux/delayed_call.h>
+#include <linux/overlay_util.h>

#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1721,9 +1722,19 @@ struct inode_operations {
int (*set_acl)(struct inode *, struct posix_acl *, int);
} ____cacheline_aligned;

+
+static inline bool overlay_file_inconsistent(struct file *file)
+{
+ return unlikely(file->f_path.dentry->d_flags & DCACHE_OP_REAL) &&
+ unlikely(d_real_inode(file->f_path.dentry) != file_inode(file));
+}
+
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
struct iov_iter *iter)
{
+ if (overlay_file_inconsistent(file))
+ return overlay_read_iter(file, kio, iter);
+
return file->f_op->read_iter(kio, iter);
}

--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
+ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o overlay_util.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
--- /dev/null
+++ b/fs/overlay_util.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/overlay_util.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include "internal.h"
+
+static struct file *overlay_clone_file(struct file *file)
+{
+ file = filp_clone_open(file);
+ if (!IS_ERR(file))
+ file->f_mode |= FMODE_NONOTIFY;
+
+ return file;
+}
+
+/*
+ * Do the release synchronously. Otherwise we'd have a DoS problem when doing
+ * multiple reads (e.g. through kernel_read()) and only releasing the cloned
+ * files when returning to userspace.
+ *
+ * There's no risk of final dput or final mntput happening, since caller holds
+ * ref to both through the original file.
+ */
+static void overlay_put_cloned_file(struct file *file)
+{
+ if (WARN_ON(!atomic_long_dec_and_test(&file->f_count)))
+ return;
+
+ __fput(file);
+}
+
+ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+
+ file = overlay_clone_file(file);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ret = vfs_iter_read(file, iter, &kio->ki_pos);
+ overlay_put_cloned_file(file);
+
+ return ret;
+}
+EXPORT_SYMBOL(overlay_read_iter);
--- /dev/null
+++ b/include/linux/overlay_util.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_OVERLAY_FS_H
+#define _LINUX_OVERLAY_FS_H
+
+#include <linux/types.h>
+
+struct file;
+struct kiocb;
+struct iov_iter;
+
+extern ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
+ struct iov_iter *iter);
+
+#endif /* _LINUX_OVERLAY_FS_H */
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -184,7 +184,7 @@ EXPORT_SYMBOL(alloc_file);

/* the real guts of fput() - releasing the last reference to file
*/
-static void __fput(struct file *file)
+void __fput(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -83,6 +83,7 @@ extern void chroot_fs_refs(const struct
* file_table.c
*/
extern struct file *get_empty_filp(void);
+extern void __fput(struct file *);

/*
* super.c