[tip:sched/numa] mm/mpol: Split and explose some mempolicy functions
From: tip-bot for Peter Zijlstra
Date: Fri May 18 2012 - 06:31:40 EST
Commit-ID: 5dca4a9119807320b6571ecae878249eef46fa92
Gitweb: http://git.kernel.org/tip/5dca4a9119807320b6571ecae878249eef46fa92
Author: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
AuthorDate: Mon, 9 Jan 2012 11:41:03 +0100
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 18 May 2012 08:16:20 +0200
mm/mpol: Split and explose some mempolicy functions
In order to allow creating 'custom' mpols, expose some guts. In
particular means to allocate fresh mpols and to bind them to memory
ranges, skipping out on the intermediate -- policy -- part of
sys_mbind().
Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Cc: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
Cc: Paul Turner <pjt@xxxxxxxxxx>
Cc: Dan Smith <danms@xxxxxxxxxx>
Cc: Bharata B Rao <bharata.rao@xxxxxxxxx>
Cc: Lee Schermerhorn <Lee.Schermerhorn@xxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: http://lkml.kernel.org/n/tip-g2rnququsbs1htvrg0b3w8a3@xxxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
include/linux/mempolicy.h | 8 +++
mm/mempolicy.c | 111 +++++++++++++++++++++++++-------------------
2 files changed, 71 insertions(+), 48 deletions(-)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3942845..578fcd0 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -205,6 +205,12 @@ struct shared_policy {
spinlock_t lock;
};
+extern struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
+ nodemask_t *nodes);
+extern long mpol_do_mbind(unsigned long start, unsigned long len,
+ struct mempolicy *policy, unsigned long mode,
+ nodemask_t *nmask, unsigned long flags);
+
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
int mpol_set_shared_policy(struct shared_policy *info,
struct vm_area_struct *vma,
@@ -218,6 +224,8 @@ struct mempolicy *get_vma_policy(struct task_struct *tsk,
extern void numa_default_policy(void);
extern void numa_policy_init(void);
+extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new,
+ enum mpol_rebind_step step);
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
enum mpol_rebind_step step);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d1de6b8..1ddbf22 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -243,7 +243,7 @@ static int mpol_set_nodemask(struct mempolicy *pol,
* This function just creates a new policy, does some check and simple
* initialization. You must invoke mpol_set_nodemask() to set nodes.
*/
-static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
+struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
nodemask_t *nodes)
{
struct mempolicy *policy;
@@ -389,7 +389,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
* MPOL_REBIND_STEP1 - set all the newly nodes
* MPOL_REBIND_STEP2 - clean all the disallowed nodes
*/
-static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask,
+void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask,
enum mpol_rebind_step step)
{
if (!pol)
@@ -1056,55 +1056,28 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int *
}
#endif
-static long do_mbind(unsigned long start, unsigned long len,
- unsigned short mode, unsigned short mode_flags,
- nodemask_t *nmask, unsigned long flags)
+long mpol_do_mbind(unsigned long start, unsigned long len,
+ struct mempolicy *new, unsigned long mode,
+ nodemask_t *nmask, unsigned long flags)
{
- struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- struct mempolicy *new = NULL;
- unsigned long end;
+ struct vm_area_struct *vma;
int err, nr_failed = 0;
+ unsigned long end;
LIST_HEAD(pagelist);
- if (flags & ~(unsigned long)MPOL_MF_VALID)
- return -EINVAL;
- if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
- return -EPERM;
-
- if (start & ~PAGE_MASK)
- return -EINVAL;
-
- if (mode == MPOL_DEFAULT || mode == MPOL_NOOP)
- flags &= ~MPOL_MF_STRICT;
-
len = (len + PAGE_SIZE - 1) & PAGE_MASK;
end = start + len;
- if (end < start)
- return -EINVAL;
- if (end == start)
- return 0;
-
- if (mode != MPOL_NOOP) {
- new = mpol_new(mode, mode_flags, nmask);
- if (IS_ERR(new))
- return PTR_ERR(new);
-
- if (flags & MPOL_MF_LAZY)
- new->flags |= MPOL_F_MOF;
-
+ if (end < start) {
+ err = -EINVAL;
+ goto mpol_out;
}
- /*
- * If we are using the default policy then operation
- * on discontinuous address spaces is okay after all
- */
- if (!new)
- flags |= MPOL_MF_DISCONTIG_OK;
- pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n",
- start, start + len, mode, mode_flags,
- nmask ? nodes_addr(*nmask)[0] : -1);
+ if (end == start) {
+ err = 0;
+ goto mpol_out;
+ }
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
err = migrate_prep();
@@ -1112,8 +1085,6 @@ static long do_mbind(unsigned long start, unsigned long len,
goto mpol_out;
}
- down_write(&mm->mmap_sem);
-
if (mode != MPOL_NOOP) {
NODEMASK_SCRATCH(scratch);
err = -ENOMEM;
@@ -1124,7 +1095,7 @@ static long do_mbind(unsigned long start, unsigned long len,
}
NODEMASK_SCRATCH_FREE(scratch);
if (err)
- goto mpol_out_unlock;
+ goto mpol_out;
}
vma = check_range(mm, start, end, nmask,
@@ -1132,12 +1103,12 @@ static long do_mbind(unsigned long start, unsigned long len,
err = PTR_ERR(vma); /* maybe ... */
if (IS_ERR(vma))
- goto mpol_out_unlock;
+ goto mpol_out_putback;
if (mode != MPOL_NOOP) {
err = mbind_range(mm, start, end, new);
if (err)
- goto mpol_out_unlock;
+ goto mpol_out_putback;
}
if (!list_empty(&pagelist)) {
@@ -1153,12 +1124,56 @@ static long do_mbind(unsigned long start, unsigned long len,
if (nr_failed && (flags & MPOL_MF_STRICT))
err = -EIO;
+mpol_out_putback:
putback_lru_pages(&pagelist);
-mpol_out_unlock:
- up_write(&mm->mmap_sem);
mpol_out:
+ return err;
+}
+
+static long do_mbind(unsigned long start, unsigned long len,
+ unsigned short mode, unsigned short mode_flags,
+ nodemask_t *nmask, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct mempolicy *new = NULL;
+ int err;
+
+ if (flags & ~(unsigned long)MPOL_MF_VALID)
+ return -EINVAL;
+ if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+
+ if (mode == MPOL_DEFAULT || mode == MPOL_NOOP)
+ flags &= ~MPOL_MF_STRICT;
+
+ if (mode != MPOL_NOOP) {
+ new = mpol_new(mode, mode_flags, nmask);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ if (flags & MPOL_MF_LAZY)
+ new->flags |= MPOL_F_MOF;
+ }
+ /*
+ * If we are using the default policy then operation
+ * on discontinuous address spaces is okay after all
+ */
+ if (!new)
+ flags |= MPOL_MF_DISCONTIG_OK;
+
+ pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n",
+ start, start + len, mode, mode_flags,
+ nmask ? nodes_addr(*nmask)[0] : -1);
+
+ down_write(&mm->mmap_sem);
+ err = mpol_do_mbind(start, len, new, mode, nmask, flags);
+ up_write(&mm->mmap_sem);
mpol_put(new);
+
return err;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/