Re: [PATCH] fs: fix i_writecount on shmem and friends
From: NeilBrown
Date: Thu Mar 13 2014 - 00:08:20 EST
On Wed, 12 Mar 2014 18:19:25 +0000 Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
> On Tue, Mar 11, 2014 at 12:05:09PM -0700, Linus Torvalds wrote:
> >
> > which returns ETXTBSY (most easily seen by just stracing it).
> >
> > The patch would also seem to make sense, with the i_readcount_inc()
> > being immediately below for the FMODE_READ case.
>
> I think it's trying to fix the problem in the wrong place. The bug is real,
> all right, but it's not that alloc_file() for non-regulars doesn't grab
> writecount; it's that drop_file_write_access() drops it for those.
>
> What the hell would we want to play with that counter for, anyway? It's not
> as if they could be mmapped, so all it does is making pipe(2) and socket(2)
> more costly, for no visible reason.
>
> I would prefer to flip
> put_write_access(inode);
>
> if (special_file(inode->i_mode))
> return;
> in drop_file_write_access() instead.
>
> <goes to looks at i_writecount users>
> Oh, shit...
>
> drivers/md/md.c:
> /* similar to deny_write_access, but accounts for our holding a reference
> * to the file ourselves */
> static int deny_bitmap_write_access(struct file * file)
> {
> struct inode *inode = file->f_mapping->host;
>
> spin_lock(&inode->i_lock);
> if (atomic_read(&inode->i_writecount) > 1) {
> spin_unlock(&inode->i_lock);
> return -ETXTBSY;
> }
> atomic_set(&inode->i_writecount, -1);
> spin_unlock(&inode->i_lock);
>
> return 0;
> }
>
> Broken. get_write_access() will happily increment i_writecount e.g. from
> 1 to 2, without even looking at i_lock.
I guess someone changed exactly how i_writecount is used without check all
users ... that isn't like you Al :-)
> Moreover, it's paired with
> void restore_bitmap_write_access(struct file *file)
> {
> struct inode *inode = file->f_mapping->host;
>
> spin_lock(&inode->i_lock);
> atomic_set(&inode->i_writecount, 1);
> spin_unlock(&inode->i_lock);
> }
> Just what will happen if we do denywrite mmap() of that file in between?
> Even worse, the caller take file straight from fget(), with no sanity
> checks whatsoever. Just what will happen if I give it e.g. a directory?
> Or a procfs/sysfs/whatnot file, for that matter? Neil? I realize that
> it's root-only, but still...
But as you point out, even "fixing" it to match the current i_writecount
behaviour wouldn't really be a proper fix.
Probably best to stop messing with i_writecount and just use it to guard
against using the same bitmap twice.
It won't prevent some other process writing to the file but that shouldn't
happen anyway.
Maybe something like the following once I've tested it.
Thanks,
NeilBrown
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 4195a01b1535..9a8e66ae04f5 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1988,7 +1988,6 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->bitmap_info.file) {
struct file *f = mddev->bitmap_info.file;
mddev->bitmap_info.file = NULL;
- restore_bitmap_write_access(f);
fput(f);
}
} else {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e28c9d2a1166..223126046e02 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5181,32 +5181,6 @@ static int restart_array(struct mddev *mddev)
return 0;
}
-/* similar to deny_write_access, but accounts for our holding a reference
- * to the file ourselves */
-static int deny_bitmap_write_access(struct file * file)
-{
- struct inode *inode = file->f_mapping->host;
-
- spin_lock(&inode->i_lock);
- if (atomic_read(&inode->i_writecount) > 1) {
- spin_unlock(&inode->i_lock);
- return -ETXTBSY;
- }
- atomic_set(&inode->i_writecount, -1);
- spin_unlock(&inode->i_lock);
-
- return 0;
-}
-
-void restore_bitmap_write_access(struct file *file)
-{
- struct inode *inode = file->f_mapping->host;
-
- spin_lock(&inode->i_lock);
- atomic_set(&inode->i_writecount, 1);
- spin_unlock(&inode->i_lock);
-}
-
static void md_clean(struct mddev *mddev)
{
mddev->array_sectors = 0;
@@ -5427,7 +5401,6 @@ static int do_md_stop(struct mddev * mddev, int mode,
bitmap_destroy(mddev);
if (mddev->bitmap_info.file) {
- restore_bitmap_write_access(mddev->bitmap_info.file);
fput(mddev->bitmap_info.file);
mddev->bitmap_info.file = NULL;
}
@@ -5991,6 +5964,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
if (fd >= 0) {
+ struct inode *inode;
if (mddev->bitmap)
return -EEXIST; /* cannot add when bitmap is present */
mddev->bitmap_info.file = fget(fd);
@@ -6001,13 +5975,20 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
return -EBADF;
}
- err = deny_bitmap_write_access(mddev->bitmap_info.file);
- if (err) {
+ inode = mddev->bitmap_info.file->f_mapping->host;
+ if (!S_ISREG(inode->i_mode)) {
+ printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
+ mdname(mddev));
+ fput(mddev->bitmap_info.file);
+ mddev->bitmap_info.file = NULL;
+ return -EBADF;
+ }
+ if (atomic_read(&inode->i_writecount) != 1) {
printk(KERN_ERR "%s: error: bitmap file is already in use\n",
mdname(mddev));
fput(mddev->bitmap_info.file);
mddev->bitmap_info.file = NULL;
- return err;
+ return -EBUSY;
}
mddev->bitmap_info.offset = 0; /* file overrides offset */
} else if (mddev->bitmap == NULL)
@@ -6027,10 +6008,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
mddev->pers->quiesce(mddev, 0);
}
if (fd < 0) {
- if (mddev->bitmap_info.file) {
- restore_bitmap_write_access(mddev->bitmap_info.file);
+ if (mddev->bitmap_info.file)
fput(mddev->bitmap_info.file);
- }
mddev->bitmap_info.file = NULL;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 07bba96de260..a49d991f3fe1 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -605,7 +605,6 @@ extern int md_check_no_bitmap(struct mddev *mddev);
extern int md_integrity_register(struct mddev *mddev);
extern void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
-extern void restore_bitmap_write_access(struct file *file);
extern void mddev_init(struct mddev *mddev);
extern int md_run(struct mddev *mddev);
Attachment:
signature.asc
Description: PGP signature