> You added a dead-lock:
> * shm_lock() is a spinlock.
> * shm_swap() is called from try_to_free_pages(), and that function could
> be called with the big kernel lock acquired; shm_swap() calls
> shm_lockall().
Yep. I also borked the chroot code since the fs_struct could be shared so
changing the dentry changes the entire thread group out of chroot briefly
(not good!). I now 'borrow' task 0's fs_struct so that the other threads
aren't touched.
> I documented the spinlock ordering in shm.c, ~ line 190.
Nod. I think this fixes it:
--- ../linux.vanilla/ipc/shm.c Sat Mar 11 13:59:17 2000
+++ ipc/shm.c Mon Mar 13 13:48:40 2000
@@ -71,6 +73,7 @@
unsigned long shm_npages; /* size of segment (pages) */
pte_t **shm_dir; /* ptr to arr of ptrs to frames */
int id;
+ int destroyed; /* set if the final detach kills */
union permap {
struct shmem {
time_t atime;
@@ -115,6 +118,7 @@
static void killseg_core(struct shmid_kernel *shp, int doacc);
static void shm_open (struct vm_area_struct *shmd);
static void shm_close (struct vm_area_struct *shmd);
+static void shm_remove_name(int id);
static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
static int shm_swapout(struct page *, struct file *);
#ifdef CONFIG_PROC_FS
@@ -310,6 +314,20 @@
return 0;
}
+static struct fs_struct *shm_push_root(void)
+{
+ struct fs_struct *old,*new;
+ new=init_task_union.task.fs;
+ old=current->fs;
+ current->fs=new;
+ return old;
+}
+
+static void shm_pop_root(struct fs_struct *saved)
+{
+ current->fs=saved;
+}
+
static void shm_put_super(struct super_block *sb)
{
struct super_block **p = &shm_sb;
@@ -517,7 +535,7 @@
{
unsigned short dir = pages / PTRS_PER_PTE;
unsigned short last = pages % PTRS_PER_PTE;
- pte_t **ret, **ptr;
+ pte_t **ret, **ptr, *pte;
if (pages == 0)
return NULL;
@@ -531,7 +549,8 @@
*ptr = (pte_t *)__get_free_page (GFP_KERNEL);
if (!*ptr)
goto free;
- memset (*ptr, 0, PAGE_SIZE);
+ for (pte = *ptr; pte < *ptr + PTRS_PER_PTE; pte++)
+ pte_clear (pte);
}
/* The last one is probably not of PAGE_SIZE: we use kmalloc */
@@ -539,7 +558,8 @@
*ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
if (!*ptr)
goto free;
- memset (*ptr, 0, last*sizeof(pte_t));
+ for (pte = *ptr; pte < *ptr + last; pte++)
+ pte_clear (pte);
}
return ret;
@@ -696,9 +716,11 @@
{
struct shmid_kernel *shp;
int err, id = 0;
+ static int count=0;
if (!shm_sb) {
- printk ("shmget: shm filesystem not mounted\n");
+ if(count++<5)
+ printk(KERN_WARNING "shmget: shm filesystem not mounted\n");
return -EINVAL;
}
@@ -886,9 +908,11 @@
struct shm_setbuf setbuf;
struct shmid_kernel *shp;
int err, version;
+ static int count;
if (!shm_sb) {
- printk ("shmctl: shm filesystem not mounted\n");
+ if(count++<5)
+ printk (KERN_WARNING "shmctl: shm filesystem not mounted\n");
return -EINVAL;
}
@@ -1008,18 +1032,36 @@
}
case IPC_RMID:
{
- char *name;
+ /*
+ * We cannot simply remove the file. The SVID states
+ * that the block remains until the last person
+ * detaches from it, then is deleted. A shmat() on
+ * an RMID segment is legal in older Linux and if
+ * we change it apps break...
+ *
+ * Instead we set a destroyed flag, and then blow
+ * the name away when the usage hits zero.
+ */
if ((shmid % SEQ_MULTIPLIER)== zero_id)
return -EINVAL;
- name = shm_getname(shmid);
- if (IS_ERR(name))
- return PTR_ERR(name);
lock_kernel();
- err = do_unlink (name);
+ shp = shm_lock(shmid);
+ if(shp==NULL)
+ {
+ unlock_kernel();
+ return -EINVAL;
+ }
+ err=-EIDRM;
+ if(shm_checkid(shp,shmid)==0)
+ {
+ if(shp->shm_nattch==0)
+ shm_remove_name(shmid);
+ else
+ shp->destroyed=1;
+ err=0;
+ }
+ shm_unlock(shmid);
unlock_kernel();
- putname (name);
- if (err == -ENOENT)
- err = -EINVAL;
return err;
}
@@ -1099,6 +1141,7 @@
int err;
int flags;
char *name;
+ struct fs_struct *saved;
if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id)
return -EINVAL;
@@ -1119,11 +1162,14 @@
if (IS_ERR (name))
return PTR_ERR (name);
+ lock_kernel();
+ saved=shm_push_root();
file = filp_open (name, O_RDWR, 0);
+ shm_pop_root(saved);
+
putname (name);
if (IS_ERR (file))
goto bad_file;
- lock_kernel();
*raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size,
(shmflg & SHM_RDONLY ? PROT_READ :
PROT_READ | PROT_WRITE), flags, 0);
@@ -1136,6 +1182,7 @@
return err;
bad_file:
+ unlock_kernel();
if ((err = PTR_ERR(file)) == -ENOENT)
return -EINVAL;
return err;
@@ -1148,6 +1195,23 @@
}
/*
+ * Remove a name. Must be called with lock_kernel
+ */
+
+static void shm_remove_name(int id)
+{
+ char *name = shm_getname(id);
+ if (!IS_ERR(name))
+ {
+ struct fs_struct *saved;
+ saved=shm_push_root();
+ do_unlink (name);
+ shm_pop_root(saved);
+ putname (name);
+ }
+}
+
+/*
* remove the attach descriptor shmd.
* free memory for segment if it is marked destroyed.
* The descriptor has already been removed from the current->mm->mmap list
@@ -1164,7 +1228,14 @@
shp->shm_lprid = current->pid;
shp->shm_dtim = CURRENT_TIME;
shp->shm_nattch--;
- shm_unlock(id);
+ if(shp->shm_nattch==0 && shp->destroyed)
+ {
+ shp->destroyed=0;
+ shm_remove_name(id);
+ shm_unlock(id);
+ }
+ else
+ shm_unlock(id);
}
/*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Wed Mar 15 2000 - 21:00:24 EST