[PATCH] mm: rcu-protected get_mm_exe_file()

From: Konstantin Khlebnikov
Date: Mon Mar 16 2015 - 09:13:10 EST


This patch removes mm->mmap_sem from mm->exe_file read side.
Also it kills dup_mm_exe_file() and moves exe_file duplication into
dup_mmap() where both mmap_sems are locked.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
Cc: Davidlohr Bueso <dbueso@xxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
---
fs/file.c | 3 +-
include/linux/fs.h | 1 +
include/linux/mm_types.h | 2 +-
kernel/fork.c | 56 ++++++++++++++++++++++++++++++----------------
4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index ee738ea028fa..93c5f89c248b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,8 +638,7 @@ static struct file *__fget(unsigned int fd, fmode_t mask)
file = fcheck_files(files, fd);
if (file) {
/* File object ref couldn't be taken */
- if ((file->f_mode & mask) ||
- !atomic_long_inc_not_zero(&file->f_count))
+ if ((file->f_mode & mask) || !get_file_rcu(file))
file = NULL;
}
rcu_read_unlock();
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1abc7f2a5730..29bc94cfa273 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -847,6 +847,7 @@ static inline struct file *get_file(struct file *f)
atomic_long_inc(&f->f_count);
return f;
}
+#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
#define file_count(x) atomic_long_read(&(x)->f_count)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 590630eb59ba..8d37e26a1007 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -429,7 +429,7 @@ struct mm_struct {
#endif

/* store ref to file /proc/<pid>/exe symlink points to */
- struct file *exe_file;
+ struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 2113001ceb38..a7c596517bd6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -380,6 +380,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);

+ /* No ordering required: file already has been exposed. */
+ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+
mm->total_vm = oldmm->total_vm;
mm->shared_vm = oldmm->shared_vm;
mm->exec_vm = oldmm->exec_vm;
@@ -505,7 +508,13 @@ static inline void mm_free_pgd(struct mm_struct *mm)
pgd_free(mm, mm->pgd);
}
#else
-#define dup_mmap(mm, oldmm) (0)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ down_write(&oldmm->mmap_sem);
+ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+ up_write(&oldmm->mmap_sem);
+ return 0;
+}
#define mm_alloc_pgd(mm) (0)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
@@ -674,36 +683,47 @@ void mmput(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(mmput);

+/**
+ * set_mm_exe_file - change a reference to the mm's executable file
+ *
+ * This changes mm's executale file (shown as symlink /proc/[pid]/exe).
+ *
+ * Main users are mmput(), sys_execve() and sys_prctl(PR_SET_MM_MAP/EXE_FILE).
+ * Callers prevent concurrent invocations: in mmput() nobody alive left,
+ * in execve task is single-threaded, prctl holds mmap_sem exclusively.
+ */
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
+ struct file *old_exe_file = rcu_dereference_protected(mm->exe_file,
+ !atomic_read(&mm->mm_users) || current->in_execve ||
+ lock_is_held(&mm->mmap_sem));
+
if (new_exe_file)
get_file(new_exe_file);
- if (mm->exe_file)
- fput(mm->exe_file);
- mm->exe_file = new_exe_file;
+ rcu_assign_pointer(mm->exe_file, new_exe_file);
+ if (old_exe_file)
+ fput(old_exe_file);
}

+/**
+ * get_mm_exe_file - acquire a reference to the mm's executable file
+ *
+ * Returns %NULL if mm has no associated executable file.
+ * User must release file via fput().
+ */
struct file *get_mm_exe_file(struct mm_struct *mm)
{
struct file *exe_file;

- /* We need mmap_sem to protect against races with removal of exe_file */
- down_read(&mm->mmap_sem);
- exe_file = mm->exe_file;
- if (exe_file)
- get_file(exe_file);
- up_read(&mm->mmap_sem);
+ rcu_read_lock();
+ exe_file = rcu_dereference(mm->exe_file);
+ if (exe_file && !get_file_rcu(exe_file))
+ exe_file = NULL;
+ rcu_read_unlock();
return exe_file;
}
EXPORT_SYMBOL(get_mm_exe_file);

-static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
-{
- /* It's safe to write the exe_file pointer without exe_file_lock because
- * this is called during fork when the task is not yet in /proc */
- newmm->exe_file = get_mm_exe_file(oldmm);
-}
-
/**
* get_task_mm - acquire a reference to the task's mm
*
@@ -865,8 +885,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
if (!mm_init(mm, tsk))
goto fail_nomem;

- dup_mm_exe_file(oldmm, mm);
-
err = dup_mmap(mm, oldmm);
if (err)
goto free_pt;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/