[PATCH 24/43] mm: numa: Migrate on reference policy

From: Mel Gorman
Date: Fri Nov 16 2012 - 06:27:53 EST


This is the simplest possible policy that still does something of note.
When a pte_numa is faulted, it is moved immediately. Any replacement
policy must at least do better than this and in all likelihood this
policy regresses normal workloads.

Signed-off-by: Mel Gorman <mgorman@xxxxxxx>
Acked-by: Rik van Riel <riel@xxxxxxxxxx>
---
include/uapi/linux/mempolicy.h | 1 +
mm/mempolicy.c | 38 ++++++++++++++++++++++++++++++++++++--
2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 16fb4e6..0d11c3d 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -67,6 +67,7 @@ enum mpol_rebind_step {
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
+#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */


#endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 045714d..bcaa4fe 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -118,6 +118,26 @@ static struct mempolicy default_policy = {
.flags = MPOL_F_LOCAL,
};

+static struct mempolicy preferred_node_policy[MAX_NUMNODES];
+
+static struct mempolicy *get_task_policy(struct task_struct *p)
+{
+ struct mempolicy *pol = p->mempolicy;
+ int node;
+
+ if (!pol) {
+ node = numa_node_id();
+ if (node != -1)
+ pol = &preferred_node_policy[node];
+
+ /* preferred_node_policy is not initialised early in boot */
+ if (!pol->mode)
+ pol = NULL;
+ }
+
+ return pol;
+}
+
static const struct mempolicy_operations {
int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
/*
@@ -1706,7 +1726,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
struct mempolicy *get_vma_policy(struct task_struct *task,
struct vm_area_struct *vma, unsigned long addr)
{
- struct mempolicy *pol = task->mempolicy;
+ struct mempolicy *pol = get_task_policy(task);

if (vma) {
if (vma->vm_ops && vma->vm_ops->get_policy) {
@@ -2129,7 +2149,7 @@ retry_cpuset:
*/
struct page *alloc_pages_current(gfp_t gfp, unsigned order)
{
- struct mempolicy *pol = current->mempolicy;
+ struct mempolicy *pol = get_task_policy(current);
struct page *page;
unsigned int cpuset_mems_cookie;

@@ -2403,6 +2423,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
default:
BUG();
}
+
+ /* Migrate the page towards the node whose CPU is referencing it */
+ if (pol->flags & MPOL_F_MORON)
+ polnid = numa_node_id();
+
if (curnid != polnid)
ret = polnid;
out:
@@ -2591,6 +2616,15 @@ void __init numa_policy_init(void)
sizeof(struct sp_node),
0, SLAB_PANIC, NULL);

+ for_each_node(nid) {
+ preferred_node_policy[nid] = (struct mempolicy) {
+ .refcnt = ATOMIC_INIT(1),
+ .mode = MPOL_PREFERRED,
+ .flags = MPOL_F_MOF | MPOL_F_MORON,
+ .v = { .preferred_node = nid, },
+ };
+ }
+
/*
* Set interleaving policy for system init. Interleaving is only
* enabled across suitably sized nodes (default is >= 16MB), or
--
1.7.9.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/