I've attached an updated misc swap memory patch to use the improved page
table cache interface, and have simplified the file system interface
following a suggestion sent in by Bjorn Ekwall.
The pgt cache interface works by having the kswapd task trim its own
CPU's cache synchronously, and uses a trim flag to request that the
other CPU idle tasks trim their pgt caches. Once the pgt cache has
drained on a particular CPU, the trim flag is reset and and pgt cache
management returns to its previous behavior using the high/low water
marks.
This gives immediate access to the kswapd CPU cache memory and
reasonably contemporaneous access to the other pgt caches, and avoids
any cross-CPU cache accesses. I've left a couple of diagnostic printks
in the patch, and they show a reasonable pattern of pgt cache trimming
on the kswapd and other CPUs, interleaved with pages freed from the NFS
dircache.
The interface to trim_fs_memory now uses a function pointer in the
filesystem structure for each registered fs. When the vm system calls
shrink_misc_mem, trim_fs_memory() iterates through the filesystems and
calls the free_fs_memory() function for each fs supporting it.
Subsequent calls remain with the previously selected fs until the memory
function returns failure.
At present only NFS supplies a free_fs_memory() funtion, but it will be
easy to add support for other filesystems as well.
The patch works very reliably here, and should be especially helpful on
systems with modest memory resources.
Regards,
Bill
--------------8E37ADCCD8977BBCC8E5F2BC
Content-Type: text/plain; charset=us-ascii; name="mm_miscmem117-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="mm_miscmem117-patch"
--- linux-2.1.117/mm/vmscan.c.old Wed Aug 19 21:23:21 1998
+++ linux-2.1.117/mm/vmscan.c Thu Aug 20 08:31:33 1998
@@ -439,6 +439,21 @@
return 0;
}
+extern int trim_pgt_cache(void);
+/*
+ * Try to free memory from various sources.
+ */
+static int shrink_misc_mem(int pri, int gfp_mask)
+{
+ if (shm_swap(pri, gfp_mask))
+ return 1;
+ if (trim_pgt_cache())
+ return 1;
+ if (trim_filesystem_memory())
+ return 1;
+ return 0;
+}
+
/*
* We are much more aggressive about trying to swap out than we used
* to be. This works out OK, because we now do proper aging on page
@@ -469,7 +484,7 @@
return 1;
state = 1;
case 1:
- if (shm_swap(i, gfp_mask))
+ if (shrink_misc_mem(i, gfp_mask))
return 1;
state = 2;
case 2:
--- linux-2.1.117/mm/memory.c.old Thu Aug 6 16:54:09 1998
+++ linux-2.1.117/mm/memory.c Thu Aug 20 08:26:14 1998
@@ -126,10 +126,64 @@
*/
int pgt_cache_water[2] = { 25, 50 };
-/* Returns the number of pages freed */
+/*
+ * Set when the vm system wants to trim
+ * extra pages from the cache lists.
+ */
+int pgt_trim_req[NR_CPUS] = {0, };
+
+
+/*
+ * Called by the vm system to reduce pgt cache memory.
+ */
+int trim_pgt_cache(void)
+{
+ int cpu;
+
+ /*
+ * Request that the other CPUs trim their caches.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (cpu == smp_processor_id())
+ continue;
+ pgt_trim_req[cpu] = 1;
+ }
+ /*
+ * TBD: Request that the idle tasks get scheduled?
+ * (Want to trim memory as soon as possible.)
+ */
+
+ /* now check our own cache */
+ return do_check_pgt_cache(pgt_cache_water[0], 0);
+}
+
+
+/*
+ * Called from the CPU idle tasks to regulate the page table cache.
+ *
+ * Note: called without the kernel lock, so must be SMP-safe.
+ */
int check_pgt_cache(void)
{
- return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
+ int result, high = pgt_cache_water[1];
+
+ /* Set the high limit to 0 if trimming was requested */
+ if (pgt_trim_req[smp_processor_id()])
+ high = 0;
+ result = do_check_pgt_cache(pgt_cache_water[0], high);
+
+if (result)
+printk("check_pgt_cache: task %s on CPU %d, freed %d\n",
+current->comm, smp_processor_id(), result);
+
+ /*
+ * Reset the trim flag if no memory was freed at limit == 0.
+ * (On some architectures pgtable_cache_size won't go to 0.)
+ */
+ if (!result && high == 0)
+ pgt_trim_req[smp_processor_id()] = 0;
+
+ return result;
}
@@ -148,7 +202,7 @@
free_one_pgd(page_dir + i);
/* keep the page table cache within bounds */
- check_pgt_cache();
+ do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
return;
out_bad:
@@ -177,7 +231,7 @@
pgd_free(page_dir);
/* keep the page table cache within bounds */
- check_pgt_cache();
+ do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
out:
return;
--- linux-2.1.117/include/linux/fs.h.old Wed Aug 19 21:52:23 1998
+++ linux-2.1.117/include/linux/fs.h Thu Aug 20 08:26:14 1998
@@ -658,10 +658,25 @@
int fs_flags;
struct super_block *(*read_super) (struct super_block *, void *, int);
struct file_system_type * next;
+ int (*free_fs_memory) (void);
};
+/* fs/super.c */
+
+extern struct file_system_type *get_fs_type(const char *name);
extern int register_filesystem(struct file_system_type *);
extern int unregister_filesystem(struct file_system_type *);
+extern int trim_fs_memory(void);
+extern void sync_supers(kdev_t dev);
+extern struct super_block *get_super(kdev_t dev);
+extern void put_super(kdev_t dev);
+extern int fs_may_mount(kdev_t dev);
+extern void mount_root(void);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+extern kdev_t real_root_dev;
+extern int change_root(kdev_t new_root_dev,const char *put_old);
+#endif
/* fs/open.c */
@@ -705,10 +720,8 @@
extern struct file_operations write_pipe_fops;
extern struct file_operations rdwr_pipe_fops;
-extern struct file_system_type *get_fs_type(const char *name);
extern int fs_may_remount_ro(struct super_block *);
-extern int fs_may_mount(kdev_t dev);
extern struct file *inuse_filps;
@@ -753,7 +766,6 @@
extern void write_inode_now(struct inode *inode);
extern void sync_dev(kdev_t dev);
extern int fsync_dev(kdev_t dev);
-extern void sync_supers(kdev_t dev);
extern int bmap(struct inode * inode,int block);
extern int notify_change(struct dentry *, struct iattr *);
extern int permission(struct inode * inode,int mask);
@@ -825,19 +837,11 @@
extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
extern ssize_t generic_file_write(struct file *, const char*, size_t, loff_t*);
-extern struct super_block *get_super(kdev_t dev);
-extern void put_super(kdev_t dev);
unsigned long generate_cluster(kdev_t dev, int b[], int size);
unsigned long generate_cluster_swab32(kdev_t dev, int b[], int size);
extern kdev_t ROOT_DEV;
extern void show_buffers(void);
-extern void mount_root(void);
-
-#ifdef CONFIG_BLK_DEV_INITRD
-extern kdev_t real_root_dev;
-extern int change_root(kdev_t new_root_dev,const char *put_old);
-#endif
extern ssize_t char_read(struct file *, char *, size_t, loff_t *);
extern ssize_t block_read(struct file *, char *, size_t, loff_t *);
--- linux-2.1.117/fs/super.c.old Wed Aug 19 21:23:21 1998
+++ linux-2.1.117/fs/super.c Thu Aug 20 13:45:54 1998
@@ -425,6 +425,40 @@
return fs;
}
+/*
+ * Cycle through the filesystems trying to free any
+ * reclaimable (e.g. cache) memory.
+ */
+int trim_filesystem_memory(void)
+{
+ static int index = 0;
+
+ while (1) {
+ struct file_system_type *fs = file_systems;
+ int i = index;
+
+ /* advance to the current fs */
+ while (fs && i--)
+ fs = fs->next;
+ if (!fs)
+ break;
+
+ /* try to reclaim memory from the selected fs */
+ if (fs->free_fs_memory && fs->free_fs_memory() != 0)
+ return 1;
+
+ /*
+ * The list may have changed during the call above,
+ * so we advance the index to find the next fs.
+ */
+ index++;
+ }
+
+ /* reset the index */
+ index = 0;
+ return 0;
+}
+
void __wait_on_super(struct super_block * sb)
{
struct wait_queue wait = { current, NULL };
--- linux-2.1.117/fs/nfs/dir.c.old Wed Aug 19 21:23:20 1998
+++ linux-2.1.117/fs/nfs/dir.c Thu Aug 20 08:26:15 1998
@@ -359,7 +359,8 @@
if (sb && sb->s_dev != cache->dev)
continue;
if (cache->locked) {
- printk("NFS: cache locked at umount %s\n",
+ printk(KERN_ERR
+ "NFS: cache locked at umount %s\n",
(cache->entry ? "(lost a page!)" : ""));
continue;
}
@@ -369,6 +370,41 @@
cache->entry = NULL;
}
}
+}
+
+/*
+ * Trim a page from the dir cache. Eventually the dir cache
+ * should be implemented as inode (page) cache, but for now
+ * this allows the memory to be reclaimed when needed.
+ */
+int
+nfs_trim_dircache(void)
+{
+ struct nfs_dirent *cache = dircache, *oldest = NULL;
+ unsigned long age = ~0UL;
+ int i;
+
+ /*
+ * Find the oldest cache entry with a freeable page.
+ */
+ for (i = NFS_MAX_DIRCACHE; i--; cache++) {
+ if (cache->locked)
+ continue;
+ if (!cache->entry)
+ continue;
+ if (cache->age <= age) {
+ oldest = cache;
+ age = cache->age;
+ }
+ }
+ if (oldest) {
+printk("nfs_trim_dircache: freeing page\n");
+ oldest->valid = 0;
+ free_page((unsigned long) oldest->entry);
+ oldest->entry = NULL;
+ return 1;
+ }
+ return 0;
}
/*
--- linux-2.1.117/fs/nfs/inode.c.old Thu Jul 23 16:26:10 1998
+++ linux-2.1.117/fs/nfs/inode.c Thu Aug 20 08:26:15 1998
@@ -818,14 +818,16 @@
goto out;
}
+extern int nfs_trim_dircache(void);
/*
* File system information
*/
static struct file_system_type nfs_fs_type = {
"nfs",
- 0 /* FS_NO_DCACHE - this doesn't work right now*/,
+ 0,
nfs_read_super,
- NULL
+ NULL,
+ nfs_trim_dircache
};
/*
--------------8E37ADCCD8977BBCC8E5F2BC--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.altern.org/andrebalsa/doc/lkml-faq.html