Re: [PATCH] token based thrashing control

From: Rik van Riel
Date: Sun Aug 01 2004 - 20:37:40 EST


On Sun, 1 Aug 2004, Andrew Morton wrote:
> Rik van Riel <riel@xxxxxxxxxx> wrote:

> > However, for make -j 60 there's a dramatic difference between
> > a kernel with the token based swapout and a kernel without.
> >
> > normal 2.6.8-rc2: 1h20m runtime / ~26% CPU use average
> > 2.6.8-rc2 + token: 42m runtime / ~52% CPU use average
>
> OK. My test is usually around 50-60% CPU occupancy so we're not gaining
> in the moderate swapping range.

I wonder if measuring minor faults too would help here ...

Btw, here's a slightly updated patch. It's got the definition
for put_swap_token fixed for !CONFIG_SWAP and calls put_swap_token
before mmput.

I also cut the 4G/4G split line out of the mm/Makefile patch chunk,
so that should now apply better.

It doesn't have any functional changes I'm aware of.

--- linux-2.6.7/include/linux/swap.h.token 2004-07-30 13:22:17.000000000 -0400
+++ linux-2.6.7/include/linux/swap.h 2004-08-01 21:28:29.411274311 -0400
@@ -204,6 +204,27 @@
extern struct page * lookup_swap_cache(swp_entry_t);
extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma,
unsigned long addr);
+/* linux/mm/thrash.c */
+#ifdef CONFIG_SWAP
+extern struct mm_struct * swap_token_mm;
+extern void grab_swap_token(void);
+extern void __put_swap_token(struct mm_struct *);
+
+static inline int has_swap_token(struct mm_struct * mm)
+{
+ return (mm == swap_token_mm);
+}
+
+static inline void put_swap_token(struct mm_struct * mm)
+{
+ if (has_swap_token(mm))
+ __put_swap_token(mm);
+}
+#else /* CONFIG_SWAP */
+#define put_swap_token(x) do { } while(0)
+#define grab_swap_token do { } while(0)
+#define has_swap_token 0
+#endif /* CONFIG_SWAP */

/* linux/mm/swapfile.c */
extern long total_swap_pages;
--- linux-2.6.7/include/linux/sched.h.token 2004-07-30 13:22:28.000000000 -0400
+++ linux-2.6.7/include/linux/sched.h 2004-07-30 13:22:29.000000000 -0400
@@ -239,6 +239,10 @@
/* Architecture-specific MM context */
mm_context_t context;

+ /* Token based thrashing protection. */
+ unsigned long swap_token_time;
+ char recent_pagein;
+
/* coredumping support */
int core_waiters;
struct completion *core_startup_done, core_done;
--- linux-2.6.7/kernel/fork.c.token 2004-07-30 13:22:27.000000000 -0400
+++ linux-2.6.7/kernel/fork.c 2004-08-01 20:44:50.000000000 -0400
@@ -36,6 +36,7 @@
#include <linux/mount.h>
#include <linux/audit.h>
#include <linux/rmap.h>
+#include <linux/swap.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -462,6 +463,7 @@
spin_unlock(&mmlist_lock);
exit_aio(mm);
exit_mmap(mm);
+ put_swap_token(mm);
mmdrop(mm);
}
}
--- linux-2.6.7/mm/memory.c.token 2004-07-30 13:22:28.000000000 -0400
+++ linux-2.6.7/mm/memory.c 2004-07-30 13:22:29.000000000 -0400
@@ -1433,6 +1433,7 @@
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
inc_page_state(pgmajfault);
+ grab_swap_token();
}

mark_page_accessed(page);
--- linux-2.6.7/mm/filemap.c.token 2004-07-30 13:22:28.000000000 -0400
+++ linux-2.6.7/mm/filemap.c 2004-07-30 13:22:29.000000000 -0400
@@ -1195,6 +1195,7 @@
* effect.
*/
error = page_cache_read(file, pgoff);
+ grab_swap_token();

/*
* The page we want has now been added to the page cache.
--- /dev/null 2003-09-15 09:40:47.000000000 -0400
+++ linux-2.6.7/mm/thrash.c 2004-07-31 01:54:26.000000000 -0400
@@ -0,0 +1,100 @@
+/*
+ * mm/thrash.c
+ *
+ * Copyright (C) 2004, Red Hat, Inc.
+ * Copyright (C) 2004, Rik van Riel <riel@xxxxxxxxxx>
+ * Released under the GPL, see the file COPYING for details.
+ *
+ * Simple token based thrashing protection, using the algorithm
+ * described in: http://www.cs.wm.edu/~sjiang/token.pdf
+ */
+#include <linux/jiffies.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/swap.h>
+
+static spinlock_t swap_token_lock = SPIN_LOCK_UNLOCKED;
+static unsigned long swap_token_timeout;
+unsigned long swap_token_check;
+struct mm_struct * swap_token_mm = &init_mm;
+
+#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2)
+#define SWAP_TOKEN_TIMEOUT (HZ * 300)
+
+/*
+ * Take the token away if the process had no page faults
+ * in the last interval, or if it has held the token for
+ * too long.
+ */
+#define SWAP_TOKEN_ENOUGH_RSS 1
+#define SWAP_TOKEN_TIMED_OUT 2
+static int should_release_swap_token(struct mm_struct * mm)
+{
+ int ret = 0;
+ if (!mm->recent_pagein)
+ ret = SWAP_TOKEN_ENOUGH_RSS;
+ else if (time_after(jiffies, swap_token_timeout))
+ ret = SWAP_TOKEN_TIMED_OUT;
+ mm->recent_pagein = 0;
+ return ret;
+}
+
+/*
+ * Try to grab the swapout protection token. We only try to
+ * grab it once every TOKEN_CHECK_INTERVAL, both to prevent
+ * SMP lock contention and to check that the process that held
+ * the token before is no longer thrashing.
+ */
+void grab_swap_token(void)
+{
+ struct mm_struct * mm;
+ int reason;
+
+ /* We have the token. Let others know we still need it. */
+ if (has_swap_token(current->mm)) {
+ current->mm->recent_pagein = 1;
+ return;
+ }
+
+ if (time_after(jiffies, swap_token_check)) {
+
+ /* Can't get swapout protection if we exceed our RSS limit. */
+ // if (current->mm->rss > current->mm->rlimit_rss)
+ // return;
+
+ /* ... or if we recently held the token. */
+ if (time_before(jiffies, current->mm->swap_token_time))
+ return;
+
+ if (!spin_trylock(&swap_token_lock))
+ return;
+
+ swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
+
+ mm = swap_token_mm;
+ if ((reason = should_release_swap_token(mm))) {
+ unsigned long eligible = jiffies;
+ if (reason == SWAP_TOKEN_TIMED_OUT) {
+ eligible += SWAP_TOKEN_TIMEOUT;
+ }
+ mm->swap_token_time = eligible;
+ swap_token_timeout = jiffies + SWAP_TOKEN_TIMEOUT;
+ swap_token_mm = current->mm;
+ printk("Took swap token, pid %d (%s)\n",
+ current->pid, current->comm);
+ }
+ spin_unlock(&swap_token_lock);
+ }
+ return;
+}
+
+/* Called on process exit. */
+void __put_swap_token(struct mm_struct * mm)
+{
+ spin_lock(&swap_token_lock);
+ if (likely(mm == swap_token_mm)) {
+ swap_token_mm = &init_mm;
+ swap_token_check = jiffies;
+ }
+ spin_unlock(&swap_token_lock);
+}
--- linux-2.6.7/mm/rmap.c.token 2004-07-30 13:22:24.000000000 -0400
+++ linux-2.6.7/mm/rmap.c 2004-08-01 21:15:29.861020222 -0400
@@ -230,6 +230,9 @@
if (ptep_clear_flush_young(vma, address, pte))
referenced++;

+ if (mm != current->mm && has_swap_token(mm))
+ referenced++;
+
(*mapcount)--;

out_unmap:
--- linux-2.6.7/mm/Makefile.token 2004-07-30 13:22:27.000000000 -0400
+++ linux-2.6.7/mm/Makefile 2004-07-30 13:22:29.000000000 -0400
@@ -12,6 +12,6 @@
readahead.o slab.o swap.o truncate.o vmscan.o \
$(mmu-y)

-obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
+obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/