Memory Management Patch 2.1.42-1

Benjamin C R LaHaise (blah@dot.superaje.com)
Sat, 31 May 1997 07:13:08 +0000 ( )


Ok, here's a quick & dirty version of the patch I'm working on -- it's
*much* harder to hard-lock the machine now (although it becomes *very*
slow). This thing still needs cleanup & optimizations - something to do
when I wake up... (and yes, we should kill a recently started process
when the system starts being unusable) Note that I've only tested this
on machines w/o swap, so I need feedback! (I'm pulling out a slow MFM
drive in the morning... ;)

Oh, while testing the kernel, I did manage to hose the NFS client :-/

-benjamin
ps: This is Ugly!!! Also watch out - I added a count so that
Right Alt - Scroll Lock (dump regs) will cause an Ooops after 32 tries.
If your machine starts looping in schedule() (as i've seen around here a
bit), repeat 32 times and record the oops ;)

Also at ftp://dot.superaje.com/pub/linux/blah-mm-2.1.42-1.diff

====Snip: blah-mm-2.1.42-1.diff====
diff -u --recursive linux-2.1.42/arch/i386/mm/init.c linux/arch/i386/mm/init.c
--- linux-2.1.42/arch/i386/mm/init.c Mon May 12 13:35:38 1997
+++ linux/arch/i386/mm/init.c Thu May 29 03:08:37 1997
@@ -91,6 +91,7 @@
printk("%d free pages\n",free);
printk("%d reserved pages\n",reserved);
printk("%d pages shared\n",shared);
+ printk("%d async pages\n",atomic_read(&nr_async_pages));
show_buffers();
#ifdef CONFIG_NET
show_net_buffers();
diff -u --recursive linux-2.1.42/drivers/char/keyboard.c linux/drivers/char/keyboard.c
--- linux-2.1.42/drivers/char/keyboard.c Thu May 22 21:51:32 1997
+++ linux/drivers/char/keyboard.c Sat May 31 01:34:53 1997
@@ -346,8 +346,11 @@

static void show_ptregs(void)
{
+ static int count = 0;
if (pt_regs)
show_regs(pt_regs);
+ if (count++ > 32)
+ *(char *)0 = 0;
}

static void hold(void)
diff -u --recursive linux-2.1.42/include/linux/swapctl.h linux/include/linux/swapctl.h
--- linux-2.1.42/include/linux/swapctl.h Thu May 15 18:52:11 1997
+++ linux/include/linux/swapctl.h Sat May 31 00:32:31 1997
@@ -31,6 +31,17 @@
typedef struct swap_control_v5 swap_control_t;
extern swap_control_t swap_control;

+typedef struct kswapd_control_v1
+{
+ unsigned int minpages;
+ unsigned int max_async_pages;
+ unsigned int pages_shm;
+ unsigned int pages_mmap;
+ unsigned int pages_swap;
+} kswapd_control_v1;
+typedef kswapd_control_v1 kswapd_control_t;
+extern kswapd_control_t kswapd_ctl;
+
typedef struct swapstat_v1
{
unsigned int wakeups;
diff -u --recursive linux-2.1.42/kernel/sysctl.c linux/kernel/sysctl.c
--- linux-2.1.42/kernel/sysctl.c Mon May 12 13:35:44 1997
+++ linux/kernel/sysctl.c Wed May 28 23:27:19 1997
@@ -184,6 +184,10 @@
static ctl_table vm_table[] = {
{VM_SWAPCTL, "swapctl",
&swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec},
+ {VM_KSWAPD, "kswapd",
+ &kswapd_ctl, sizeof(kswapd_ctl), 0600, NULL, &proc_dointvec},
+ {VM_SWAPOUT, "kswapd-interval",
+ &swapout_interval, sizeof(int), 0600, NULL, &proc_dointvec},
{VM_FREEPG, "freepages",
&min_free_pages, 3*sizeof(int), 0600, NULL, &proc_dointvec},
{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
diff -u --recursive linux-2.1.42/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.1.42/mm/page_alloc.c Wed May 28 23:25:26 1997
+++ linux/mm/page_alloc.c Sat May 31 00:42:26 1997
@@ -31,6 +31,13 @@
int nr_free_pages = 0;

/*
+ * Wait queue for free pages
+ */
+static struct wait_queue * page_wait = NULL;
+
+extern struct wait_queue * kswapd_wait;
+
+/*
* Free area management
*
* The free_area_list arrays point to the queue heads of the free areas
@@ -126,6 +133,10 @@
#undef list

spin_unlock_irqrestore(&page_alloc_lock, flags);
+
+ /* ick -- *bad* optimization */
+ if (page_wait)
+ wake_up(&page_wait);
}

void __free_page(struct page *page)
@@ -197,6 +208,7 @@
{
unsigned long flags;
int reserved_pages;
+ int tries = 0;

if (order >= NR_MEM_LISTS)
return 0;
@@ -217,12 +229,25 @@
spin_lock_irqsave(&page_alloc_lock, flags);
if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
RMQUEUE(order, dma);
+#if 0
spin_unlock_irqrestore(&page_alloc_lock, flags);
return 0;
+#endif
+ if (priority == GFP_BUFFER) { spin_unlock_irqrestore(&page_alloc_lock, flags);
+ return 0; }
}
spin_unlock_irqrestore(&page_alloc_lock, flags);
- if (priority != GFP_BUFFER && try_to_free_page(priority, dma, 1))
+ if (!tries) {
+ try_to_free_page(priority, dma, 1);
+ goto repeat;
+ }
+ if (++tries < 3) {
+ wake_up(&kswapd_wait);
+ if (nr_free_pages > reserved_pages)
+ goto repeat;
+ sleep_on(&page_wait);
goto repeat;
+ }
return 0;
}

@@ -270,11 +295,11 @@

/*
* select nr of pages we try to keep free for important stuff
- * with a minimum of 48 pages. This is totally arbitrary
+ * with a minimum of 24 pages. This is totally arbitrary
*/
i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
- if (i < 48)
- i = 48;
+ if (i < 24)
+ i = 24;
min_free_pages = i;
free_pages_low = i + (i>>1);
free_pages_high = i + i;
diff -u --recursive linux-2.1.42/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.1.42/mm/vmscan.c Wed May 28 23:25:27 1997
+++ linux/mm/vmscan.c Fri May 30 23:55:37 1997
@@ -7,6 +7,10 @@
* kswapd added: 7.1.96 sct
* Removed kswapd_ctl limits, and swap out as many pages as needed
* to bring the system back to free_pages_high: 2.4.97, Rik van Riel.
+ *
+ * Improve kswapd logic a bit -- work again on machines w/no swap,
+ * and bring back kswapd_ctl as a minimum # of pages to swap. 29.5.97
+ *
* Version: $Id: vmscan.c,v 1.23 1997/04/12 04:31:05 davem Exp $
*/

@@ -35,6 +39,21 @@
*/
static int next_swap_jiffies = 0;

+/*
+ * Was the last kswapd wakeup caused by nr_free_pages < free_pages_low
+ */
+static int last_wakeup_low = 0;
+
+/*
+ * Below what number of free pages do we start doing a try_to_free_page to age pages?
+ */
+int free_pages_age = 256;
+
+/*
+ * Interval for aging
+ */
+int age_interval = HZ * 30;
+
/*
* How often do we do a pageout scan during normal conditions?
* Default is four times a second.
@@ -44,13 +63,19 @@
/*
* The wait queue for waking up the pageout daemon:
*/
-static struct wait_queue * kswapd_wait = NULL;
+struct wait_queue * kswapd_wait = NULL;

/*
* We avoid doing a reschedule if the pageout daemon is already awake;
*/
static int kswapd_awake = 0;

+/*
+ * sysctl-modifiable parameters to control the aggressiveness of the
+ * page-searching within the kswapd page recovery daemon.
+ */
+kswapd_control_t kswapd_ctl = {4,32,-1,-1,-1};
+
static void init_swap_timer(void);

/*
@@ -396,6 +421,10 @@
return retval;
}

+int max(int a, int b)
+{
+ return a > b ? a : b;
+}
/*
* The background pageout daemon.
* Started as a kernel thread from the init process.
@@ -433,23 +462,32 @@
printk ("Started kswapd v%.*s\n", i, s);

while (1) {
+ /* if we were below the low mark, use half the normal swapout_interval */
+ next_swap_jiffies = jiffies + (last_wakeup_low ? swapout_interval >> 1 : swapout_interval);
+
kswapd_awake = 0;
current->signal = 0;
run_task_queue(&tq_disk);
interruptible_sleep_on(&kswapd_wait);
kswapd_awake = 1;
swapstats.wakeups++;
- /* Do the background pageout:
- * We now only swap out as many pages as needed.
- * When we are truly low on memory, we swap out
- * synchronously (WAIT == 1). -- Rik.
+ /*
+ * Slightly more intelligent use of try_to_free_pages: try
+ * to free enough pages to bring us back up free_pages_high
+ * when we are below free_pages_low. Otherwise try to free
+ * kswapd_ctl.minpages (= slow rise up to free_pages_high).
*/
- while(nr_free_pages < min_free_pages)
- try_to_free_page(GFP_KERNEL, 0, 1);
- while((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_low)
- try_to_free_page(GFP_KERNEL, 0, 1);
- while((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_high)
- try_to_free_page(GFP_KERNEL, 0, 0);
+ if (nr_free_pages < free_pages_low) {
+ i = max(free_pages_high - nr_free_pages - atomic_read(&nr_async_pages),
+ kswapd_ctl.minpages);
+ }
+ else
+ i = kswapd_ctl.minpages;
+ while (i-- > 0)
+ try_to_free_page(GFP_KERNEL, 0,
+ (nr_free_pages < free_pages_low) ||
+ (atomic_read(&nr_async_pages) >= kswapd_ctl.max_async_pages)
+ );
}
}

@@ -460,30 +498,31 @@
void swap_tick(void)
{
int want_wakeup = 0;
- static int last_wakeup_low = 0;

- if ((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_low) {
+ /*
+ * This looks confusing, but it's not complicated --
+ * we awaken kswapd immediately if the last wakeup was not
+ * caused by nr_free_pages being 'low'. Otherwise,
+ * wait until next_swap_jiffies.
+ */
+ if (nr_free_pages < free_pages_low) {
if (last_wakeup_low)
want_wakeup = jiffies >= next_swap_jiffies;
else
last_wakeup_low = want_wakeup = 1;
}
- else if (((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_high) &&
- jiffies >= next_swap_jiffies) {
+ else if ((nr_free_pages < free_pages_high) && (jiffies >= next_swap_jiffies)) {
last_wakeup_low = 0;
want_wakeup = 1;
}
+ else if ((nr_free_pages < free_pages_age) && (age_interval < (int)(jiffies - next_swap_jiffies)))
+ want_wakeup = 1;

- if (want_wakeup) {
+ if (want_wakeup) {
if (!kswapd_awake) {
wake_up(&kswapd_wait);
need_resched = 1;
}
- /* low on memory, we need to start swapping soon */
- if(last_wakeup_low)
- next_swap_jiffies = jiffies;
- else
- next_swap_jiffies = jiffies + swapout_interval;
}
timer_active |= (1<<SWAP_TIMER);
}