[RESEND][PATCH] lseek: remove i_mutex

From: Hisashi Hifumi
Date: Wed May 13 2009 - 20:31:27 EST


Hi,

Following patch removes i_mutex from generic_file_llseek.
I think the reason of protecting lseek with i_mutex is just
touching i_size (and f_pos) atomically.
So i_mutex is no longer needed here by introducing i_size_read
to read i_size.
I also made f_pos access atomic here without i_mutex regarding
former i_mutex holders by introducing file_pos_read/file_pos_write
that use seqlock to preserve atomicity.

Following patch removes i_mutex from generic_file_llseek, and deletes
generic_file_llseek_nolock totally.

Currently there is i_mutex contention not only around lseek, but also fsync or write.
So, I think we can mitigate i_mutex contention between fsync lseek and write by
removing i_mutex.

Thanks.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@xxxxxxxxxxxxx>

diff -Nrup linux-2.6.30-rc5.org/fs/cifs/cifsfs.c linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c
--- linux-2.6.30-rc5.org/fs/cifs/cifsfs.c 2009-05-11 10:07:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c 2009-05-11 10:10:36.000000000 +0900
@@ -635,7 +635,7 @@ static loff_t cifs_llseek(struct file *f
if (retval < 0)
return (loff_t)retval;
}
- return generic_file_llseek_unlocked(file, offset, origin);
+ return generic_file_llseek(file, offset, origin);
}

#ifdef CONFIG_CIFS_EXPERIMENTAL
diff -Nrup linux-2.6.30-rc5.org/fs/file_table.c linux-2.6.30-rc5.lseek/fs/file_table.c
--- linux-2.6.30-rc5.org/fs/file_table.c 2009-05-11 10:07:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/file_table.c 2009-05-11 10:10:36.000000000 +0900
@@ -126,6 +126,7 @@ struct file *get_empty_filp(void)

INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_long_set(&f->f_count, 1);
+ f_pos_ordered_init(f);
rwlock_init(&f->f_owner.lock);
f->f_cred = get_cred(cred);
spin_lock_init(&f->f_lock);
diff -Nrup linux-2.6.30-rc5.org/fs/gfs2/ops_file.c linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c
--- linux-2.6.30-rc5.org/fs/gfs2/ops_file.c 2009-05-11 10:07:15.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c 2009-05-11 10:10:36.000000000 +0900
@@ -63,11 +63,11 @@ static loff_t gfs2_llseek(struct file *f
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
&i_gh);
if (!error) {
- error = generic_file_llseek_unlocked(file, offset, origin);
+ error = generic_file_llseek(file, offset, origin);
gfs2_glock_dq_uninit(&i_gh);
}
} else
- error = generic_file_llseek_unlocked(file, offset, origin);
+ error = generic_file_llseek(file, offset, origin);

return error;
}
diff -Nrup linux-2.6.30-rc5.org/fs/ncpfs/file.c linux-2.6.30-rc5.lseek/fs/ncpfs/file.c
--- linux-2.6.30-rc5.org/fs/ncpfs/file.c 2009-03-24 08:12:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/ncpfs/file.c 2009-05-11 10:10:36.000000000 +0900
@@ -286,7 +286,7 @@ static loff_t ncp_remote_llseek(struct f
{
loff_t ret;
lock_kernel();
- ret = generic_file_llseek_unlocked(file, offset, origin);
+ ret = generic_file_llseek(file, offset, origin);
unlock_kernel();
return ret;
}
diff -Nrup linux-2.6.30-rc5.org/fs/nfs/file.c linux-2.6.30-rc5.lseek/fs/nfs/file.c
--- linux-2.6.30-rc5.org/fs/nfs/file.c 2009-05-11 10:07:15.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/nfs/file.c 2009-05-11 10:10:36.000000000 +0900
@@ -188,10 +188,10 @@ static loff_t nfs_file_llseek(struct fil
return (loff_t)retval;

spin_lock(&inode->i_lock);
- loff = generic_file_llseek_unlocked(filp, offset, origin);
+ loff = generic_file_llseek(filp, offset, origin);
spin_unlock(&inode->i_lock);
} else
- loff = generic_file_llseek_unlocked(filp, offset, origin);
+ loff = generic_file_llseek(filp, offset, origin);
return loff;
}

diff -Nrup linux-2.6.30-rc5.org/fs/read_write.c linux-2.6.30-rc5.lseek/fs/read_write.c
--- linux-2.6.30-rc5.org/fs/read_write.c 2009-05-11 10:07:15.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/read_write.c 2009-05-11 10:10:36.000000000 +0900
@@ -31,23 +31,61 @@ const struct file_operations generic_ro_

EXPORT_SYMBOL(generic_ro_fops);

+static inline loff_t file_pos_read(struct file *file)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ loff_t pos;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&file->f_pos_seqlock);
+ pos = file->f_pos;
+ } while (read_seqretry(&file->f_pos_seqlock, seq));
+ return pos;
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ loff_t pos;
+
+ preempt_disable();
+ pos = file->f_pos;
+ preempt_enable();
+ return pos;
+#else
+ return file->f_pos;
+#endif
+}
+
+static inline void file_pos_write(struct file *file, loff_t pos)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ write_seqlock(&file->f_pos_seqlock);
+ file->f_pos = pos;
+ write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ file->f_pos = pos;
+ preempt_enable();
+#else
+ file->f_pos = pos;
+#endif
+}
+
/**
- * generic_file_llseek_unlocked - lockless generic llseek implementation
+ * generic_file_llseek - generic llseek implementation for regular files
* @file: file structure to seek on
* @offset: file offset to seek to
* @origin: type of seek
*
- * Updates the file offset to the value specified by @offset and @origin.
- * Locking must be provided by the caller.
+ * This is a generic implemenation of ->llseek useable for all normal local
+ * filesystems. It just updates the file offset to the value specified by
+ * @offset and @origin.
*/
-loff_t
-generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
+loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;

switch (origin) {
case SEEK_END:
- offset += inode->i_size;
+ offset += i_size_read(inode);
break;
case SEEK_CUR:
/*
@@ -57,8 +95,8 @@ generic_file_llseek_unlocked(struct file
* write() or lseek() might have altered it
*/
if (offset == 0)
- return file->f_pos;
- offset += file->f_pos;
+ return file_pos_read(file);
+ offset += file_pos_read(file);
break;
}

@@ -66,35 +104,13 @@ generic_file_llseek_unlocked(struct file
return -EINVAL;

/* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
+ if (offset != file_pos_read(file)) {
+ file_pos_write(file, offset);
file->f_version = 0;
}

return offset;
}
-EXPORT_SYMBOL(generic_file_llseek_unlocked);
-
-/**
- * generic_file_llseek - generic llseek implementation for regular files
- * @file: file structure to seek on
- * @offset: file offset to seek to
- * @origin: type of seek
- *
- * This is a generic implemenation of ->llseek useable for all normal local
- * filesystems. It just updates the file offset to the value specified by
- * @offset and @origin under i_mutex.
- */
-loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
-{
- loff_t rval;
-
- mutex_lock(&file->f_dentry->d_inode->i_mutex);
- rval = generic_file_llseek_unlocked(file, offset, origin);
- mutex_unlock(&file->f_dentry->d_inode->i_mutex);
-
- return rval;
-}
EXPORT_SYMBOL(generic_file_llseek);

loff_t no_llseek(struct file *file, loff_t offset, int origin)
@@ -359,16 +375,6 @@ ssize_t vfs_write(struct file *file, con

EXPORT_SYMBOL(vfs_write);

-static inline loff_t file_pos_read(struct file *file)
-{
- return file->f_pos;
-}
-
-static inline void file_pos_write(struct file *file, loff_t pos)
-{
- file->f_pos = pos;
-}
-
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
struct file *file;
diff -Nrup linux-2.6.30-rc5.org/fs/smbfs/file.c linux-2.6.30-rc5.lseek/fs/smbfs/file.c
--- linux-2.6.30-rc5.org/fs/smbfs/file.c 2009-03-24 08:12:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/smbfs/file.c 2009-05-11 10:10:36.000000000 +0900
@@ -426,7 +426,7 @@ static loff_t smb_remote_llseek(struct f
{
loff_t ret;
lock_kernel();
- ret = generic_file_llseek_unlocked(file, offset, origin);
+ ret = generic_file_llseek(file, offset, origin);
unlock_kernel();
return ret;
}
diff -Nrup linux-2.6.30-rc5.org/include/linux/fs.h linux-2.6.30-rc5.lseek/include/linux/fs.h
--- linux-2.6.30-rc5.org/include/linux/fs.h 2009-05-11 10:07:17.000000000 +0900
+++ linux-2.6.30-rc5.lseek/include/linux/fs.h 2009-05-11 10:10:36.000000000 +0900
@@ -896,6 +896,16 @@ static inline int ra_has_index(struct fi
#define FILE_MNT_WRITE_TAKEN 1
#define FILE_MNT_WRITE_RELEASED 2

+/*
+ * Use sequence lock to get consistent f_pos on 32-bit processors.
+ */
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define __NEED_F_POS_ORDERED
+#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock)
+#else
+#define f_pos_ordered_init(file) do { } while (0)
+#endif
+
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
@@ -914,6 +924,9 @@ struct file {
unsigned int f_flags;
fmode_t f_mode;
loff_t f_pos;
+#ifdef __NEED_F_POS_ORDERED
+ seqlock_t f_pos_seqlock;
+#endif
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
@@ -2215,8 +2228,6 @@ extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
- int origin);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/