Re: PROPOSAL: Fixing the sys5 emulation bugs in shmfs

From: Alan Cox (alan@lxorguk.ukuu.org.uk)
Date: Mon Mar 13 2000 - 10:11:19 EST


> You added a dead-lock:
> * shm_lock() is a spinlock.
> * shm_swap() is called from try_to_free_pages(), and that function could
> be called with the big kernel lock acquired; shm_swap() calls
> shm_lockall().

Yep. I also borked the chroot code since the fs_struct could be shared so
changing the dentry changes the entire thread group out of chroot briefly
(not good!). I now 'borrow' task 0's fs_struct so that the other threads
aren't touched.

> I documented the spinlock ordering in shm.c, ~ line 190.

Nod. I think this fixes it:

--- ../linux.vanilla/ipc/shm.c Sat Mar 11 13:59:17 2000
+++ ipc/shm.c Mon Mar 13 13:48:40 2000
@@ -71,6 +73,7 @@
         unsigned long shm_npages; /* size of segment (pages) */
         pte_t **shm_dir; /* ptr to arr of ptrs to frames */
         int id;
+ int destroyed; /* set if the final detach kills */
         union permap {
                 struct shmem {
                         time_t atime;
@@ -115,6 +118,7 @@
 static void killseg_core(struct shmid_kernel *shp, int doacc);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
+static void shm_remove_name(int id);
 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
 static int shm_swapout(struct page *, struct file *);
 #ifdef CONFIG_PROC_FS
@@ -310,6 +314,20 @@
         return 0;
 }
 
+static struct fs_struct *shm_push_root(void)
+{
+ struct fs_struct *old,*new;
+ new=init_task_union.task.fs;
+ old=current->fs;
+ current->fs=new;
+ return old;
+}
+
+static void shm_pop_root(struct fs_struct *saved)
+{
+ current->fs=saved;
+}
+
 static void shm_put_super(struct super_block *sb)
 {
         struct super_block **p = &shm_sb;
@@ -517,7 +535,7 @@
 {
         unsigned short dir = pages / PTRS_PER_PTE;
         unsigned short last = pages % PTRS_PER_PTE;
- pte_t **ret, **ptr;
+ pte_t **ret, **ptr, *pte;
 
         if (pages == 0)
                 return NULL;
@@ -531,7 +549,8 @@
                 *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
                 if (!*ptr)
                         goto free;
- memset (*ptr, 0, PAGE_SIZE);
+ for (pte = *ptr; pte < *ptr + PTRS_PER_PTE; pte++)
+ pte_clear (pte);
         }
 
         /* The last one is probably not of PAGE_SIZE: we use kmalloc */
@@ -539,7 +558,8 @@
                 *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
                 if (!*ptr)
                         goto free;
- memset (*ptr, 0, last*sizeof(pte_t));
+ for (pte = *ptr; pte < *ptr + last; pte++)
+ pte_clear (pte);
         }
         return ret;
 
@@ -696,9 +716,11 @@
 {
         struct shmid_kernel *shp;
         int err, id = 0;
+ static int count=0;
 
         if (!shm_sb) {
- printk ("shmget: shm filesystem not mounted\n");
+ if(count++<5)
+ printk(KERN_WARNING "shmget: shm filesystem not mounted\n");
                 return -EINVAL;
         }
 
@@ -886,9 +908,11 @@
         struct shm_setbuf setbuf;
         struct shmid_kernel *shp;
         int err, version;
+ static int count;
 
         if (!shm_sb) {
- printk ("shmctl: shm filesystem not mounted\n");
+ if(count++<5)
+ printk (KERN_WARNING "shmctl: shm filesystem not mounted\n");
                 return -EINVAL;
         }
 
@@ -1008,18 +1032,36 @@
         }
         case IPC_RMID:
         {
- char *name;
+ /*
+ * We cannot simply remove the file. The SVID states
+ * that the block remains until the last person
+ * detaches from it, then is deleted. A shmat() on
+ * an RMID segment is legal in older Linux and if
+ * we change it apps break...
+ *
+ * Instead we set a destroyed flag, and then blow
+ * the name away when the usage hits zero.
+ */
                 if ((shmid % SEQ_MULTIPLIER)== zero_id)
                         return -EINVAL;
- name = shm_getname(shmid);
- if (IS_ERR(name))
- return PTR_ERR(name);
                 lock_kernel();
- err = do_unlink (name);
+ shp = shm_lock(shmid);
+ if(shp==NULL)
+ {
+ unlock_kernel();
+ return -EINVAL;
+ }
+ err=-EIDRM;
+ if(shm_checkid(shp,shmid)==0)
+ {
+ if(shp->shm_nattch==0)
+ shm_remove_name(shmid);
+ else
+ shp->destroyed=1;
+ err=0;
+ }
+ shm_unlock(shmid);
                 unlock_kernel();
- putname (name);
- if (err == -ENOENT)
- err = -EINVAL;
                 return err;
         }
 
@@ -1099,6 +1141,7 @@
         int err;
         int flags;
         char *name;
+ struct fs_struct *saved;
 
         if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id)
                 return -EINVAL;
@@ -1119,11 +1162,14 @@
         if (IS_ERR (name))
                 return PTR_ERR (name);
 
+ lock_kernel();
+ saved=shm_push_root();
         file = filp_open (name, O_RDWR, 0);
+ shm_pop_root(saved);
+
         putname (name);
         if (IS_ERR (file))
                 goto bad_file;
- lock_kernel();
         *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size,
                           (shmflg & SHM_RDONLY ? PROT_READ :
                            PROT_READ | PROT_WRITE), flags, 0);
@@ -1136,6 +1182,7 @@
         return err;
 
 bad_file:
+ unlock_kernel();
         if ((err = PTR_ERR(file)) == -ENOENT)
                 return -EINVAL;
         return err;
@@ -1148,6 +1195,23 @@
 }
 
 /*
+ * Remove a name. Must be called with lock_kernel
+ */
+
+static void shm_remove_name(int id)
+{
+ char *name = shm_getname(id);
+ if (!IS_ERR(name))
+ {
+ struct fs_struct *saved;
+ saved=shm_push_root();
+ do_unlink (name);
+ shm_pop_root(saved);
+ putname (name);
+ }
+}
+
+/*
  * remove the attach descriptor shmd.
  * free memory for segment if it is marked destroyed.
  * The descriptor has already been removed from the current->mm->mmap list
@@ -1164,7 +1228,14 @@
         shp->shm_lprid = current->pid;
         shp->shm_dtim = CURRENT_TIME;
         shp->shm_nattch--;
- shm_unlock(id);
+ if(shp->shm_nattch==0 && shp->destroyed)
+ {
+ shp->destroyed=0;
+ shm_remove_name(id);
+ shm_unlock(id);
+ }
+ else
+ shm_unlock(id);
 }
 
 /*

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Wed Mar 15 2000 - 21:00:24 EST