[PATCH 01/39] ocfs2: POSIX file locks support
From: Mark Fasheh
Date: Wed Sep 24 2008 - 18:01:17 EST
This is actually pretty easy since fs/dlm already handles the bulk of the
work. The Ocfs2 userspace cluster stack module already uses fs/dlm as the
underlying lock manager, so I only had to add the right calls.
Cluster-aware POSIX locks ("plocks") can be turned off by the same means at
UNIX locks - mount with 'noflocks', or create a local-only Ocfs2 volume.
Internally, the file system uses two sets of file_operations, depending on
whether cluster aware plocks is required. This turns out to be easier than
implementing local-only versions of ->lock.
Signed-off-by: Mark Fasheh <mfasheh@xxxxxxxx>
---
fs/ocfs2/file.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/ocfs2/file.h | 2 +
fs/ocfs2/inode.c | 15 ++++++++++++-
fs/ocfs2/locks.c | 15 ++++++++++++++
fs/ocfs2/locks.h | 1 +
fs/ocfs2/stack_user.c | 33 +++++++++++++++++++++++++++++++
fs/ocfs2/stackglue.c | 20 +++++++++++++++++++
fs/ocfs2/stackglue.h | 19 ++++++++++++++++++
8 files changed, 154 insertions(+), 2 deletions(-)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ec2ed15..60232b1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2236,6 +2236,10 @@ const struct inode_operations ocfs2_special_file_iops = {
.permission = ocfs2_permission,
};
+/*
+ * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
+ * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
+ */
const struct file_operations ocfs2_fops = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -2250,6 +2254,7 @@ const struct file_operations ocfs2_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
#endif
+ .lock = ocfs2_lock,
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
.splice_write = ocfs2_file_splice_write,
@@ -2266,5 +2271,51 @@ const struct file_operations ocfs2_dops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
#endif
+ .lock = ocfs2_lock,
+ .flock = ocfs2_flock,
+};
+
+/*
+ * POSIX-lockless variants of our file_operations.
+ *
+ * These will be used if the underlying cluster stack does not support
+ * posix file locking, if the user passes the "localflocks" mount
+ * option, or if we have a local-only fs.
+ *
+ * ocfs2_flock is in here because all stacks handle UNIX file locks,
+ * so we still want it in the case of no stack support for
+ * plocks. Internally, it will do the right thing when asked to ignore
+ * the cluster.
+ */
+const struct file_operations ocfs2_fops_no_plocks = {
+ .llseek = generic_file_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .mmap = ocfs2_mmap,
+ .fsync = ocfs2_sync_file,
+ .release = ocfs2_file_release,
+ .open = ocfs2_file_open,
+ .aio_read = ocfs2_file_aio_read,
+ .aio_write = ocfs2_file_aio_write,
+ .unlocked_ioctl = ocfs2_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ocfs2_compat_ioctl,
+#endif
+ .flock = ocfs2_flock,
+ .splice_read = ocfs2_file_splice_read,
+ .splice_write = ocfs2_file_splice_write,
+};
+
+const struct file_operations ocfs2_dops_no_plocks = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .readdir = ocfs2_readdir,
+ .fsync = ocfs2_sync_file,
+ .release = ocfs2_dir_release,
+ .open = ocfs2_dir_open,
+ .unlocked_ioctl = ocfs2_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ocfs2_compat_ioctl,
+#endif
.flock = ocfs2_flock,
};
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 1e27b4d..5a6d3e4 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -28,6 +28,8 @@
extern const struct file_operations ocfs2_fops;
extern const struct file_operations ocfs2_dops;
+extern const struct file_operations ocfs2_fops_no_plocks;
+extern const struct file_operations ocfs2_dops_no_plocks;
extern const struct inode_operations ocfs2_file_iops;
extern const struct inode_operations ocfs2_special_file_iops;
struct ocfs2_alloc_context;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7e9e4c7..99f012a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -219,6 +219,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
struct super_block *sb;
struct ocfs2_super *osb;
int status = -EINVAL;
+ int use_plocks = 1;
mlog_entry("(0x%p, size:%llu)\n", inode,
(unsigned long long)le64_to_cpu(fe->i_size));
@@ -226,6 +227,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
sb = inode->i_sb;
osb = OCFS2_SB(sb);
+ if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
+ ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
+ use_plocks = 0;
+
/* this means that read_inode cannot create a superblock inode
* today. change if needed. */
if (!OCFS2_IS_VALID_DINODE(fe) ||
@@ -295,13 +300,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
- inode->i_fop = &ocfs2_fops;
+ if (use_plocks)
+ inode->i_fop = &ocfs2_fops;
+ else
+ inode->i_fop = &ocfs2_fops_no_plocks;
inode->i_op = &ocfs2_file_iops;
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
case S_IFDIR:
inode->i_op = &ocfs2_dir_iops;
- inode->i_fop = &ocfs2_dops;
+ if (use_plocks)
+ inode->i_fop = &ocfs2_dops;
+ else
+ inode->i_fop = &ocfs2_dops_no_plocks;
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
case S_IFLNK:
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 203f871..544ac62 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -24,6 +24,7 @@
*/
#include <linux/fs.h>
+#include <linux/fcntl.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
@@ -32,6 +33,7 @@
#include "dlmglue.h"
#include "file.h"
+#include "inode.h"
#include "locks.h"
static int ocfs2_do_flock(struct file *file, struct inode *inode,
@@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
else
return ocfs2_do_flock(file, inode, cmd, fl);
}
+
+int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (!(fl->fl_flags & FL_POSIX))
+ return -ENOLCK;
+ if (__mandatory_lock(inode))
+ return -ENOLCK;
+
+ return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
+}
diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h
index 9743ef2..496d488 100644
--- a/fs/ocfs2/locks.h
+++ b/fs/ocfs2/locks.h
@@ -27,5 +27,6 @@
#define OCFS2_LOCKS_H
int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
+int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl);
#endif /* OCFS2_LOCKS_H */
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 353fc35..faec2d8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -28,6 +28,7 @@
#include "ocfs2.h" /* For struct ocfs2_lock_res */
#include "stackglue.h"
+#include <linux/dlm_plock.h>
/*
* The control protocol starts with a handshake. Until the handshake
@@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
{
}
+static int user_plock(struct ocfs2_cluster_connection *conn,
+ u64 ino,
+ struct file *file,
+ int cmd,
+ struct file_lock *fl)
+{
+ /*
+ * This more or less just demuxes the plock request into any
+ * one of three dlm calls.
+ *
+ * Internally, fs/dlm will pass these to a misc device, which
+ * a userspace daemon will read and write to.
+ *
+ * For now, cancel requests (which happen internally only),
+ * are turned into unlocks. Most of this function taken from
+ * gfs2_lock.
+ */
+
+ if (cmd == F_CANCELLK) {
+ cmd = F_SETLK;
+ fl->fl_type = F_UNLCK;
+ }
+
+ if (IS_GETLK(cmd))
+ return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
+ else if (fl->fl_type == F_UNLCK)
+ return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
+ else
+ return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
+}
+
/*
* Compare a requested locking protocol version against the current one.
*
@@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
.dlm_unlock = user_dlm_unlock,
.lock_status = user_dlm_lock_status,
.lock_lvb = user_dlm_lvb,
+ .plock = user_plock,
.dump_lksb = user_dlm_dump_lksb,
};
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 07f348b..7150f5d 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -288,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
+int ocfs2_stack_supports_plocks(void)
+{
+ return !!(active_stack && active_stack->sp_ops->plock);
+}
+EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks);
+
+/*
+ * ocfs2_plock() can only be safely called if
+ * ocfs2_stack_supports_plocks() returned true
+ */
+int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
+ struct file *file, int cmd, struct file_lock *fl)
+{
+ WARN_ON_ONCE(active_stack->sp_ops->plock == NULL);
+ if (active_stack->sp_ops->plock)
+ return active_stack->sp_ops->plock(conn, ino, file, cmd, fl);
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL_GPL(ocfs2_plock);
+
int ocfs2_cluster_connect(const char *stack_name,
const char *group,
int grouplen,
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index db56281..c571af3 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -28,6 +28,10 @@
#include "dlm/dlmapi.h"
#include <linux/dlm.h>
+/* Needed for plock-related prototypes */
+struct file;
+struct file_lock;
+
/*
* dlmconstants.h does not have a LOCAL flag. We hope to remove it
* some day, but right now we need it. Let's fake it. This value is larger
@@ -187,6 +191,17 @@ struct ocfs2_stack_operations {
void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
/*
+ * Cluster-aware posix locks
+ *
+ * This is NULL for stacks which do not support posix locks.
+ */
+ int (*plock)(struct ocfs2_cluster_connection *conn,
+ u64 ino,
+ struct file *file,
+ int cmd,
+ struct file_lock *fl);
+
+ /*
* This is an optoinal debugging hook. If provided, the
* stack can dump debugging information about this lock.
*/
@@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
+int ocfs2_stack_supports_plocks(void);
+int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
+ struct file *file, int cmd, struct file_lock *fl);
+
void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto);
--
1.5.4.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/