Patch to fix uninterruptible sleeps on tty and pipe writers

tytso@mit.edu
Wed, 9 Sep 1998 12:16:42 -0400


Hi Linus,

Here's a patch which to fix the problem Alan and I had discussed with
you earlier regarding processes getting stuck in uninterruptible waits
if more than one process is trying to write() to a tty and a pipe and
the tty's or the pipe's write buffer is full. It is caused by the fact
that there is a uninterruptible semaphore in sys_write() which will
block after the first process is waiting inside the tty or pipe code.

The patch is versus 2.1.120, and I think it's important to get this bug
fixed before 2.2 comes out. In the long run, the correct fix is to move
the semaphore down into the individual filesystem and device driver
routines. For now, though, this is the smallest patch which fixes the
problem while still preserving the atomic write semantics guaranteed by
POSIX.

Could you apply this to the mainline kernel sources, please? Thanks!!

- Ted

Patch generated: on Fri Sep 4 10:03:01 EDT 1998 by tytso@rsts-11.mit.edu
against Linux version 2.1.117

===================================================================
RCS file: include/linux/RCS/fs.h,v
retrieving revision 1.1
diff -u -r1.1 include/linux/fs.h
--- include/linux/fs.h 1998/09/02 17:41:42 1.1
+++ include/linux/fs.h 1998/09/02 17:41:59
@@ -347,6 +347,7 @@
unsigned long i_version;
unsigned long i_nrpages;
struct semaphore i_sem;
+ struct semaphore i_atomic_write;
struct inode_operations *i_op;
struct super_block *i_sb;
struct wait_queue *i_wait;
===================================================================
RCS file: fs/RCS/inode.c,v
retrieving revision 1.1
diff -u -r1.1 fs/inode.c
--- fs/inode.c 1998/09/02 17:42:46 1.1
+++ fs/inode.c 1998/09/02 17:45:24
@@ -130,6 +130,7 @@
INIT_LIST_HEAD(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
sema_init(&inode->i_sem, 1);
+ sema_init(&inode->i_atomic_write, 1);
}

static inline void write_inode(struct inode *inode)
@@ -713,8 +714,11 @@
printk(KERN_ERR "iput: device %s inode %ld count changed, count=%d\n",
kdevname(inode->i_dev), inode->i_ino, inode->i_count);
if (atomic_read(&inode->i_sem.count) != 1)
-printk(KERN_ERR "iput: Aieee, semaphore in use device %s, count=%d\n",
-kdevname(inode->i_dev), atomic_read(&inode->i_sem.count));
+printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
+kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
+if (atomic_read(&inode->i_atomic_write.count) != 1)
+printk(KERN_ERR "iput: Aieee, atomic write semaphore in use inode %s/%ld, count=%d\n",
+kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
#endif
}
if (inode->i_count > (1<<31)) {
===================================================================
RCS file: fs/RCS/pipe.c,v
retrieving revision 1.1
diff -u -r1.1 fs/pipe.c
--- fs/pipe.c 1998/09/02 17:43:00 1.1
+++ fs/pipe.c 1998/09/02 18:02:44
@@ -92,7 +92,7 @@
size_t count, loff_t *ppos)
{
struct inode * inode = filp->f_dentry->d_inode;
- ssize_t chars = 0, free = 0, written = 0;
+ ssize_t chars = 0, free = 0, written = 0, err=0;
char *pipebuf;

if (ppos != &filp->f_pos)
@@ -107,16 +107,26 @@
free = count;
else
free = 1; /* can't do it atomically, wait for any free space */
+ up(&inode->i_sem);
+ if (down_interruptible(&inode->i_atomic_write)) {
+ down(&inode->i_sem);
+ return -ERESTARTSYS;
+ }
while (count>0) {
while ((PIPE_FREE(*inode) < free) || PIPE_LOCK(*inode)) {
if (!PIPE_READERS(*inode)) { /* no readers */
send_sig(SIGPIPE,current,0);
- return written? :-EPIPE;
+ err = -EPIPE;
+ goto errout;
+ }
+ if (signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto errout;
+ }
+ if (filp->f_flags & O_NONBLOCK) {
+ err = -EAGAIN;
+ goto errout;
}
- if (signal_pending(current))
- return written? :-ERESTARTSYS;
- if (filp->f_flags & O_NONBLOCK)
- return written? :-EAGAIN;
interruptible_sleep_on(&PIPE_WAIT(*inode));
}
PIPE_LOCK(*inode)++;
@@ -139,7 +149,10 @@
}
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
mark_inode_dirty(inode);
- return written;
+errout:
+ up(&inode->i_atomic_write);
+ down(&inode->i_sem);
+ return written ? written : err;
}

static long long pipe_lseek(struct file * file, long long offset, int orig)
===================================================================
RCS file: drivers/char/RCS/tty_io.c,v
retrieving revision 1.1
diff -u -r1.1 drivers/char/tty_io.c
--- drivers/char/tty_io.c 1998/08/31 19:13:28 1.1
+++ drivers/char/tty_io.c 1998/09/02 18:02:27
@@ -638,7 +638,13 @@
size_t count)
{
ssize_t ret = 0, written = 0;
-
+ struct inode *inode = file->f_dentry->d_inode;
+
+ up(&inode->i_sem);
+ if (down_interruptible(&inode->i_atomic_write)) {
+ down(&inode->i_sem);
+ return -ERESTARTSYS;
+ }
for (;;) {
unsigned long size = PAGE_SIZE*2;
if (size > count)
@@ -661,6 +667,8 @@
file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
ret = written;
}
+ up(&inode->i_atomic_write);
+ down(&inode->i_sem);
return ret;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/faq.html