[PATCH] Make POSIX locks compatible with the NPTL thread model

From: Trond Myklebust
Date: Wed Jun 23 2004 - 12:13:11 EST


Hi,

At some point in 2.5.x we introduced the NPTL thread model at the kernel
level, and hence redefined the idea of a process: a process appears
currently to be defined as one or more threads with the same tgid...
However we failed to completely update the POSIX locking code to reflect
that change: currently, the POSIX locking code defines the process to be
a set of one or more threads with the same tgid and a shared file
table...

As a result we end up with abominations like the steal_locks() function
that is required in order to move the locks from from one file table to
another on exec etc.

The following patch simplifies the POSIX locking code: it makes it
conform more strictly to the NPTL + Single Unix Spec by removing the
Linux-specific shared file table restriction for ordinary threads. It
retains the binary interface, though, in order to be able to continue to
distinguish remote locks (as created by lockd for instance) from local
locks.

Cheers,
Trond
fs/locks.c | 46 ++++------------------------------------------
fs/nfs/direct.c | 4 ++--
fs/nfs/file.c | 2 +-
fs/nfs/nfs4proc.c | 12 ++++++------
fs/nfs/nfs4state.c | 18 +++++++++---------
fs/nfs/nfs4xdr.c | 8 ++++----
fs/nfs/read.c | 4 ++--
fs/nfs/write.c | 4 ++--
fs/open.c | 2 +-
include/linux/fs.h | 2 +-
include/linux/nfs_fs.h | 8 ++++----
include/linux/nfs_page.h | 2 +-
include/linux/nfs_xdr.h | 4 ++--
13 files changed, 39 insertions(+), 77 deletions(-)

diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/locks.c linux-2.6.7-01-lock_owner_fixup/fs/locks.c
--- linux-2.6.7-00-fix_locks/fs/locks.c 2004-06-23 12:46:29.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/locks.c 2004-06-22 16:48:19.000000000 -0400
@@ -317,7 +317,7 @@ static int flock_to_posix_lock(struct fi
if (l->l_len == 0)
fl->fl_end = OFFSET_MAX;

- fl->fl_owner = current->files;
+ fl->fl_owner = 0;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_POSIX;
@@ -357,7 +357,7 @@ static int flock64_to_posix_lock(struct
if (l->l_len == 0)
fl->fl_end = OFFSET_MAX;

- fl->fl_owner = current->files;
+ fl->fl_owner = 0;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
fl->fl_flags = FL_POSIX;
@@ -920,7 +920,6 @@ int posix_lock_file(struct file *filp, s
*/
int locks_mandatory_locked(struct inode *inode)
{
- fl_owner_t owner = current->files;
unsigned int pid = current->tgid;
struct file_lock *fl;

@@ -931,7 +930,7 @@ int locks_mandatory_locked(struct inode
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!IS_POSIX(fl))
continue;
- if (fl->fl_owner != owner)
+ if (fl->fl_owner != 0)
break;
if (fl->fl_pid != pid)
break;
@@ -961,7 +960,7 @@ int locks_mandatory_area(int read_write,
int error;

locks_init_lock(&fl);
- fl.fl_owner = current->files;
+ fl.fl_owner = 0;
fl.fl_pid = current->tgid;
fl.fl_file = filp;
fl.fl_flags = FL_POSIX | FL_ACCESS;
@@ -1985,18 +1984,6 @@ int lock_may_write(struct inode *inode,

EXPORT_SYMBOL(lock_may_write);

-static inline void __steal_locks(struct file *file, fl_owner_t from)
-{
- struct inode *inode = file->f_dentry->d_inode;
- struct file_lock *fl = inode->i_flock;
-
- while (fl) {
- if (fl->fl_file == file && fl->fl_owner == from)
- fl->fl_owner = current->files;
- fl = fl->fl_next;
- }
-}
-
/* When getting ready for executing a binary, we make sure that current
* has a files_struct on its own. Before dropping the old files_struct,
* we take over ownership of all locks for all file descriptors we own.
@@ -2005,31 +1992,6 @@ static inline void __steal_locks(struct
*/
void steal_locks(fl_owner_t from)
{
- struct files_struct *files = current->files;
- int i, j;
-
- if (from == files)
- return;
-
- lock_kernel();
- j = 0;
- for (;;) {
- unsigned long set;
- i = j * __NFDBITS;
- if (i >= files->max_fdset || i >= files->max_fds)
- break;
- set = files->open_fds->fds_bits[j++];
- while (set) {
- if (set & 1) {
- struct file *file = files->fd[i];
- if (file)
- __steal_locks(file, from);
- }
- i++;
- set >>= 1;
- }
- }
- unlock_kernel();
}
EXPORT_SYMBOL(steal_locks);

diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/nfs/direct.c linux-2.6.7-01-lock_owner_fixup/fs/nfs/direct.c
--- linux-2.6.7-00-fix_locks/fs/nfs/direct.c 2004-06-16 14:58:26.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/nfs/direct.c 2004-06-22 16:48:19.000000000 -0400
@@ -129,7 +129,7 @@ nfs_direct_read_seg(struct inode *inode,
.inode = inode,
.args = {
.fh = NFS_FH(inode),
- .lockowner = current->files,
+ .pid = current->tgid,
},
.res = {
.fattr = &rdata.fattr,
@@ -259,7 +259,7 @@ nfs_direct_write_seg(struct inode *inode
.inode = inode,
.args = {
.fh = NFS_FH(inode),
- .lockowner = current->files,
+ .pid = current->tgid,
},
.res = {
.fattr = &wdata.fattr,
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/nfs/file.c linux-2.6.7-01-lock_owner_fixup/fs/nfs/file.c
--- linux-2.6.7-00-fix_locks/fs/nfs/file.c 2004-06-16 14:58:08.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/nfs/file.c 2004-06-22 16:48:19.000000000 -0400
@@ -340,7 +340,7 @@ nfs_lock(struct file *filp, int cmd, str
* Not sure whether that would be unique, though, or whether
* that would break in other places.
*/
- if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX))
+ if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;

/*
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/nfs/nfs4proc.c linux-2.6.7-01-lock_owner_fixup/fs/nfs/nfs4proc.c
--- linux-2.6.7-00-fix_locks/fs/nfs/nfs4proc.c 2004-06-16 14:58:17.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/nfs/nfs4proc.c 2004-06-22 16:48:19.000000000 -0400
@@ -1529,7 +1529,7 @@ nfs4_request_init(struct nfs_page *req,
state = (struct nfs4_state *)filp->private_data;
req->wb_state = state;
req->wb_cred = get_rpccred(state->owner->so_cred);
- req->wb_lockowner = current->files;
+ req->wb_pid = current->tgid;
}

static int
@@ -1653,7 +1653,7 @@ nfs4_request_compatible(struct nfs_page
state = (struct nfs4_state *)filp->private_data;
if (req->wb_state != state)
return 0;
- if (req->wb_lockowner != current->files)
+ if (req->wb_pid != current->tgid)
return 0;
cred = state->owner->so_cred;
if (req->wb_cred != cred)
@@ -1780,7 +1780,7 @@ nfs4_proc_getlk(struct nfs4_state *state

nlo.clientid = clp->cl_clientid;
down(&state->lock_sema);
- lsp = nfs4_find_lock_state(state, request->fl_owner);
+ lsp = nfs4_find_lock_state(state, request->fl_pid);
if (lsp)
nlo.id = lsp->ls_id;
else {
@@ -1839,7 +1839,7 @@ nfs4_proc_unlck(struct nfs4_state *state
int status = 0;

down(&state->lock_sema);
- lsp = nfs4_find_lock_state(state, request->fl_owner);
+ lsp = nfs4_find_lock_state(state, request->fl_pid);
if (!lsp)
goto out;
luargs.seqid = lsp->ls_seqid;
@@ -1886,7 +1886,7 @@ nfs4_proc_setlk(struct nfs4_state *state
int status;

down(&state->lock_sema);
- lsp = nfs4_find_lock_state(state, request->fl_owner);
+ lsp = nfs4_find_lock_state(state, request->fl_pid);
if (lsp == NULL) {
struct nfs4_state_owner *owner = state->owner;
struct nfs_open_to_lock otl = {
@@ -1895,7 +1895,7 @@ nfs4_proc_setlk(struct nfs4_state *state
},
};
status = -ENOMEM;
- lsp = nfs4_alloc_lock_state(state, request->fl_owner);
+ lsp = nfs4_alloc_lock_state(state, request->fl_pid);
if (!lsp)
goto out;
otl.lock_seqid = lsp->ls_seqid;
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/nfs/nfs4state.c linux-2.6.7-01-lock_owner_fixup/fs/nfs/nfs4state.c
--- linux-2.6.7-00-fix_locks/fs/nfs/nfs4state.c 2004-06-16 14:57:44.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/nfs/nfs4state.c 2004-06-22 16:48:19.000000000 -0400
@@ -496,11 +496,11 @@ nfs4_close_state(struct nfs4_state *stat
* that is compatible with current->files
*/
static struct nfs4_lock_state *
-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+__nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid)
{
struct nfs4_lock_state *pos;
list_for_each_entry(pos, &state->lock_states, ls_locks) {
- if (pos->ls_owner != fl_owner)
+ if (pos->ls_pid != pid)
continue;
atomic_inc(&pos->ls_count);
return pos;
@@ -509,11 +509,11 @@ __nfs4_find_lock_state(struct nfs4_state
}

struct nfs4_lock_state *
-nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid)
{
struct nfs4_lock_state *lsp;
read_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, fl_owner);
+ lsp = __nfs4_find_lock_state(state, pid);
read_unlock(&state->state_lock);
return lsp;
}
@@ -525,7 +525,7 @@ nfs4_find_lock_state(struct nfs4_state *
* The caller must be holding state->lock_sema
*/
struct nfs4_lock_state *
-nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+nfs4_alloc_lock_state(struct nfs4_state *state, unsigned int pid)
{
struct nfs4_lock_state *lsp;
struct nfs4_client *clp = state->owner->so_client;
@@ -537,7 +537,7 @@ nfs4_alloc_lock_state(struct nfs4_state
lsp->ls_id = -1;
memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
atomic_set(&lsp->ls_count, 1);
- lsp->ls_owner = fl_owner;
+ lsp->ls_pid = pid;
lsp->ls_parent = state;
INIT_LIST_HEAD(&lsp->ls_locks);
spin_lock(&clp->cl_lock);
@@ -551,12 +551,12 @@ nfs4_alloc_lock_state(struct nfs4_state
* requests.
*/
void
-nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, unsigned int pid)
{
if (test_bit(LK_STATE_IN_USE, &state->flags)) {
struct nfs4_lock_state *lsp;

- lsp = nfs4_find_lock_state(state, fl_owner);
+ lsp = nfs4_find_lock_state(state, pid);
if (lsp) {
memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
nfs4_put_lock_state(lsp);
@@ -628,7 +628,7 @@ nfs4_notify_unlck(struct inode *inode, s
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & FL_POSIX))
continue;
- if (fl->fl_owner != lsp->ls_owner)
+ if (fl->fl_pid != lsp->ls_pid)
continue;
/* Exit if we find at least one lock which is not consumed */
if (nfs4_check_unlock(fl,request) == 0)
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/nfs/nfs4xdr.c linux-2.6.7-01-lock_owner_fixup/fs/nfs/nfs4xdr.c
--- linux-2.6.7-00-fix_locks/fs/nfs/nfs4xdr.c 2004-06-16 14:58:28.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/nfs/nfs4xdr.c 2004-06-22 16:48:19.000000000 -0400
@@ -887,7 +887,7 @@ static int encode_putrootfh(struct xdr_s
return 0;
}

-static void encode_stateid(struct xdr_stream *xdr, struct nfs4_state *state, fl_owner_t lockowner)
+static void encode_stateid(struct xdr_stream *xdr, struct nfs4_state *state, unsigned int pid)
{
extern nfs4_stateid zero_stateid;
nfs4_stateid stateid;
@@ -895,7 +895,7 @@ static void encode_stateid(struct xdr_st

RESERVE_SPACE(16);
if (state != NULL) {
- nfs4_copy_stateid(&stateid, state, lockowner);
+ nfs4_copy_stateid(&stateid, state, pid);
WRITEMEM(stateid.data, sizeof(stateid.data));
} else
WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
@@ -908,7 +908,7 @@ static int encode_read(struct xdr_stream
RESERVE_SPACE(4);
WRITE32(OP_READ);

- encode_stateid(xdr, args->state, args->lockowner);
+ encode_stateid(xdr, args->state, args->pid);

RESERVE_SPACE(12);
WRITE64(args->offset);
@@ -1075,7 +1075,7 @@ static int encode_write(struct xdr_strea
RESERVE_SPACE(4);
WRITE32(OP_WRITE);

- encode_stateid(xdr, args->state, args->lockowner);
+ encode_stateid(xdr, args->state, args->pid);

RESERVE_SPACE(16);
WRITE64(args->offset);
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/nfs/read.c linux-2.6.7-01-lock_owner_fixup/fs/nfs/read.c
--- linux-2.6.7-00-fix_locks/fs/nfs/read.c 2004-06-16 14:57:41.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/nfs/read.c 2004-06-22 16:48:19.000000000 -0400
@@ -108,7 +108,7 @@ nfs_readpage_sync(struct file *file, str
rdata->inode = inode;
INIT_LIST_HEAD(&rdata->pages);
rdata->args.fh = NFS_FH(inode);
- rdata->args.lockowner = current->files;
+ rdata->args.pid = current->tgid;
rdata->args.pages = &page;
rdata->args.pgbase = 0UL;
rdata->args.count = rsize;
@@ -225,7 +225,7 @@ static void nfs_read_rpcsetup(struct nfs
data->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pagevec;
data->args.count = count;
- data->args.lockowner = req->wb_lockowner;
+ data->args.pid = req->wb_pid;
data->args.state = req->wb_state;

data->res.fattr = &data->fattr;
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/nfs/write.c linux-2.6.7-01-lock_owner_fixup/fs/nfs/write.c
--- linux-2.6.7-00-fix_locks/fs/nfs/write.c 2004-06-16 14:58:17.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/nfs/write.c 2004-06-22 16:48:19.000000000 -0400
@@ -189,7 +189,7 @@ static int nfs_writepage_sync(struct fil
wdata->flags = how;
wdata->inode = inode;
wdata->args.fh = NFS_FH(inode);
- wdata->args.lockowner = current->files;
+ wdata->args.pid = current->tgid;
wdata->args.pages = &page;
wdata->args.stable = NFS_FILE_SYNC;
wdata->args.pgbase = offset;
@@ -868,7 +868,7 @@ static void nfs_write_rpcsetup(struct nf
data->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pagevec;
data->args.count = count;
- data->args.lockowner = req->wb_lockowner;
+ data->args.pid = req->wb_pid;
data->args.state = req->wb_state;

data->res.fattr = &data->fattr;
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/fs/open.c linux-2.6.7-01-lock_owner_fixup/fs/open.c
--- linux-2.6.7-00-fix_locks/fs/open.c 2004-06-16 14:57:46.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/fs/open.c 2004-06-22 16:48:19.000000000 -0400
@@ -1007,7 +1007,7 @@ int filp_close(struct file *filp, fl_own
}

dnotify_flush(filp, id);
- locks_remove_posix(filp, id);
+ locks_remove_posix(filp, 0);
fput(filp);
return retval;
}
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/include/linux/fs.h linux-2.6.7-01-lock_owner_fixup/include/linux/fs.h
--- linux-2.6.7-00-fix_locks/include/linux/fs.h 2004-06-16 14:57:54.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/include/linux/fs.h 2004-06-22 16:48:19.000000000 -0400
@@ -625,7 +625,7 @@ struct file_lock {
struct file_lock *fl_next; /* singly linked list for this inode */
struct list_head fl_link; /* doubly linked list of all locks */
struct list_head fl_block; /* circular list of blocked processes */
- fl_owner_t fl_owner;
+ fl_owner_t fl_owner; /* 0 if lock owned by a local process */
unsigned int fl_pid;
wait_queue_head_t fl_wait;
struct file *fl_file;
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/include/linux/nfs_fs.h linux-2.6.7-01-lock_owner_fixup/include/linux/nfs_fs.h
--- linux-2.6.7-00-fix_locks/include/linux/nfs_fs.h 2004-06-16 14:58:01.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/include/linux/nfs_fs.h 2004-06-22 16:48:19.000000000 -0400
@@ -595,7 +595,7 @@ struct nfs4_state_owner {

struct nfs4_lock_state {
struct list_head ls_locks; /* Other lock stateids */
- fl_owner_t ls_owner; /* POSIX lock owner */
+ unsigned int ls_pid; /* pid of owner process */
struct nfs4_state * ls_parent; /* Parent nfs4_state */
u32 ls_seqid;
u32 ls_id;
@@ -665,13 +665,13 @@ extern struct nfs4_state *nfs4_find_stat
extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
extern int nfs4_handle_error(struct nfs_server *, int);
extern void nfs4_schedule_state_recovery(struct nfs4_client *);
-extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t);
-extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t);
+extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid);
+extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, unsigned int pid);
extern void nfs4_put_lock_state(struct nfs4_lock_state *state);
extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
extern void nfs4_notify_setlk(struct inode *, struct file_lock *, struct nfs4_lock_state *);
extern void nfs4_notify_unlck(struct inode *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
+extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, unsigned int pid);



diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/include/linux/nfs_page.h linux-2.6.7-01-lock_owner_fixup/include/linux/nfs_page.h
--- linux-2.6.7-00-fix_locks/include/linux/nfs_page.h 2004-06-16 14:57:47.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/include/linux/nfs_page.h 2004-06-22 16:48:19.000000000 -0400
@@ -30,7 +30,7 @@ struct nfs_page {
struct list_head wb_list, /* Defines state of page: */
*wb_list_head; /* read/write/commit */
struct file *wb_file;
- fl_owner_t wb_lockowner;
+ unsigned int wb_pid;
struct inode *wb_inode;
struct rpc_cred *wb_cred;
struct nfs4_state *wb_state;
diff -u --recursive --new-file --show-c-function linux-2.6.7-00-fix_locks/include/linux/nfs_xdr.h linux-2.6.7-01-lock_owner_fixup/include/linux/nfs_xdr.h
--- linux-2.6.7-00-fix_locks/include/linux/nfs_xdr.h 2004-06-16 14:58:18.000000000 -0400
+++ linux-2.6.7-01-lock_owner_fixup/include/linux/nfs_xdr.h 2004-06-22 16:48:19.000000000 -0400
@@ -235,7 +235,7 @@ struct nfs_lockres {

struct nfs_readargs {
struct nfs_fh * fh;
- fl_owner_t lockowner;
+ unsigned int pid;
struct nfs4_state * state;
__u64 offset;
__u32 count;
@@ -259,7 +259,7 @@ struct nfs_readres {

struct nfs_writeargs {
struct nfs_fh * fh;
- fl_owner_t lockowner;
+ unsigned int pid;
struct nfs4_state * state;
__u64 offset;
__u32 count;