Hi
After some rethought over the patch (and feedback from Linus)
I have made another version that should work also on SMP
Sparc.
As always, any comment/feedback/Bug reports/... are welcome.
Later, Juan.
ChangeLog:
v2.0:
- shrink_[id]_caches don't return the number of freed pages, and then
I change the code to reflect that. Just now with per-CPU caches, it
is difficult to make the bookkeeping, with NUMA, things will get only
worse.
- Now, we call smp_call_function with single functions (i.e. don't use
spinlocks). That last point was the problem with Sparcs.
- slab_cache_all_sync changes name to smp_call_function_all_cpus (it
could be a good idea to make that function in a library).
- For freeing the slabs, we now use the same _trick_ that
kmem_tune_cpucache(). The only difference is that, as we are low on
memory, we can't allocate a new cpucache_t*new[] to do the swap. To
resolve that, we change the previous one for NULLs (with an
smp_call_function), we free the slabs (now we are in normal kernel
space, spinlocks/waits/... are safe). And once finished, we assign:
->avail = 0 and set that arrays again with smp_call_function.
Notice that we are serialising that operations with cache_chain_sem,
that means that we can't never mesh the state.
- This last change allowed us to share (indeed) more code with the
case of create the cache.
v1.0:
- shrink_[id]_caches return the count of the freed pages (from ingo
and marelo patch)
- removes the ret variable in smp_call_function (it was unused)
- removes the slab_cache_drain_mask/slab_drain_local_cache and its
references for the timer interrupt code. That calls are now done
with smp_call_function, that lets us to simplify a lot the code (we
don't need the cache_drain_wait queue anymore.
- Change the cache_drain_sem semaphore to cache_all_lock spinlock, as
now we never sleep/schedule while holding it. The name is changed
because it is not only used by the drain rutines it is also used by
the update ones.
- slab_drain_local_cache is divided in the functions:
slab_drain_local_cache
do_ccupdate_local
as we known a compile time _which_ part of the function we want to
call.
- pass the spinlock calls inside slab_cache_all_sync, as they are
needed only when calling that function. The wait queue is not
needed anymore. This function used global variables to pass
arguments to slab_drain_local_cache, that has been changed to use
global arguments.
- do_ccupdate & drain_cpu_caches code has been refunded inside
slab_cache_all_sync, as the same code except for one line.
- In the proccess, the net result are ~40 less lines of code
Thanks to Phillip Rumpf, Stephen Tweedie, Alan Cox, Ingo & the rest of the
people that explained me the SMP/cross CPU mysteries.
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/arch/i386/kernel/smp.c working/arch/i386/kernel/smp.c
--- base/arch/i386/kernel/smp.c Tue Sep 26 03:46:03 2000
+++ working/arch/i386/kernel/smp.c Wed Sep 27 23:45:30 2000
@@ -464,7 +464,7 @@
*/
{
struct call_data_struct data;
- int ret, cpus = smp_num_cpus-1;
+ int cpus = smp_num_cpus-1;
if (!cpus)
return 0;
@@ -485,7 +485,6 @@
while (atomic_read(&data.started) != cpus)
barrier();
- ret = 0;
if (wait)
while (atomic_read(&data.finished) != cpus)
barrier();
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/fs/dcache.c working/fs/dcache.c
--- base/fs/dcache.c Tue Sep 26 03:34:00 2000
+++ working/fs/dcache.c Thu Sep 28 17:15:28 2000
@@ -551,7 +551,7 @@
* ...
* 6 - base-level: try to shrink a bit.
*/
-int shrink_dcache_memory(int priority, unsigned int gfp_mask)
+void shrink_dcache_memory(int priority, unsigned int gfp_mask)
{
int count = 0;
@@ -567,19 +567,13 @@
* block allocations, but for now:
*/
if (!(gfp_mask & __GFP_IO))
- return 0;
+ return;
if (priority)
count = dentry_stat.nr_unused / priority;
+
prune_dcache(count);
- /* FIXME: kmem_cache_shrink here should tell us
- the number of pages freed, and it should
- work in a __GFP_DMA/__GFP_HIGHMEM behaviour
- to free only the interesting pages in
- function of the needs of the current allocation. */
kmem_cache_shrink(dentry_cache);
-
- return 0;
}
#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/fs/inode.c working/fs/inode.c
--- base/fs/inode.c Tue Sep 26 03:34:00 2000
+++ working/fs/inode.c Thu Sep 28 17:15:40 2000
@@ -454,7 +454,7 @@
dispose_list(freeable);
}
-int shrink_icache_memory(int priority, int gfp_mask)
+void shrink_icache_memory(int priority, int gfp_mask)
{
int count = 0;
@@ -466,19 +466,13 @@
* in clear_inode() and friends..
*/
if (!(gfp_mask & __GFP_IO))
- return 0;
+ return;
if (priority)
count = inodes_stat.nr_unused / priority;
+
prune_icache(count);
- /* FIXME: kmem_cache_shrink here should tell us
- the number of pages freed, and it should
- work in a __GFP_DMA/__GFP_HIGHMEM behaviour
- to free only the interesting pages in
- function of the needs of the current allocation. */
kmem_cache_shrink(inode_cachep);
-
- return 0;
}
/*
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/dcache.h working/include/linux/dcache.h
--- base/include/linux/dcache.h Wed Sep 27 00:16:36 2000
+++ working/include/linux/dcache.h Thu Sep 28 17:09:46 2000
@@ -163,11 +163,11 @@
#define shrink_dcache() prune_dcache(0)
struct zone_struct;
/* dcache memory management */
-extern int shrink_dcache_memory(int, unsigned int);
+extern void shrink_dcache_memory(int, unsigned int);
extern void prune_dcache(int);
/* icache memory management (defined in linux/fs/inode.c) */
-extern int shrink_icache_memory(int, int);
+extern void shrink_icache_memory(int, int);
extern void prune_icache(int);
/* only used at mount-time */
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/slab.h working/include/linux/slab.h
--- base/include/linux/slab.h Wed Sep 27 00:16:36 2000
+++ working/include/linux/slab.h Thu Sep 28 17:09:51 2000
@@ -76,14 +76,6 @@
extern kmem_cache_t *fs_cachep;
extern kmem_cache_t *sigact_cachep;
-#ifdef CONFIG_SMP
-extern unsigned long slab_cache_drain_mask;
-extern void slab_drain_local_cache(void);
-#else
-#define slab_cache_drain_mask 0
-#define slab_drain_local_cache() do { } while (0)
-#endif
-
#endif /* __KERNEL__ */
#endif /* _LINUX_SLAB_H */
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/kernel/timer.c working/kernel/timer.c
--- base/kernel/timer.c Mon Aug 28 23:28:27 2000
+++ working/kernel/timer.c Wed Sep 27 23:38:09 2000
@@ -22,7 +22,6 @@
#include <linux/smp_lock.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
-#include <linux/slab.h>
#include <asm/uaccess.h>
@@ -596,9 +595,6 @@
kstat.per_cpu_system[cpu] += system;
} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
kstat.per_cpu_system[cpu] += system;
-
- if (slab_cache_drain_mask & (1UL << cpu))
- slab_drain_local_cache();
}
/*
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/slab.c working/mm/slab.c
--- base/mm/slab.c Tue Sep 26 03:34:05 2000
+++ working/mm/slab.c Fri Sep 29 01:26:32 2000
@@ -579,7 +579,6 @@
kmem_cache_free(cachep->slabp_cache, slabp);
}
-
/**
* kmem_cache_create - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache.
@@ -838,48 +837,60 @@
}
#ifdef CONFIG_SMP
-static DECLARE_MUTEX(cache_drain_sem);
-static kmem_cache_t *cache_to_drain = NULL;
-static DECLARE_WAIT_QUEUE_HEAD(cache_drain_wait);
-unsigned long slab_cache_drain_mask;
-
/*
- * Waits for all CPUs to execute slab_drain_local_cache().
- * Caller must be holding cache_drain_sem.
+ * Waits for all CPUs to execute func().
*/
-static void slab_drain_all_sync(void)
+static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
{
- DECLARE_WAITQUEUE(wait, current);
-
local_irq_disable();
- slab_drain_local_cache();
+ func(arg);
local_irq_enable();
- add_wait_queue(&cache_drain_wait, &wait);
- current->state = TASK_UNINTERRUPTIBLE;
- while (slab_cache_drain_mask != 0UL)
- schedule();
- current->state = TASK_RUNNING;
- remove_wait_queue(&cache_drain_wait, &wait);
+ if (smp_call_function(func, arg, 1, 1))
+ BUG();
+}
+typedef struct ccupdate_struct_s
+{
+ kmem_cache_t *cachep;
+ cpucache_t *new[NR_CPUS];
+} ccupdate_struct_t;
+
+static void do_ccupdate_local(void *info)
+{
+ ccupdate_struct_t *new = (ccupdate_struct_t *)info;
+ cpucache_t *old = cc_data(new->cachep);
+
+ cc_data(new->cachep) = new->new[smp_processor_id()];
+ new->new[smp_processor_id()] = old;
}
+static void free_block (kmem_cache_t* cachep, void** objpp, int len);
+
static void drain_cpu_caches(kmem_cache_t *cachep)
{
- unsigned long cpu_mask = 0;
+ ccupdate_struct_t new;
int i;
- for (i = 0; i < smp_num_cpus; i++)
- cpu_mask |= (1UL << cpu_logical_map(i));
+ memset(&new.new,0,sizeof(new.new));
- down(&cache_drain_sem);
+ new.cachep = cachep;
- cache_to_drain = cachep;
- slab_cache_drain_mask = cpu_mask;
- slab_drain_all_sync();
- cache_to_drain = NULL;
+ down(&cache_chain_sem);
+ smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
- up(&cache_drain_sem);
+ for (i = 0; i < smp_num_cpus; i++) {
+ cpucache_t* ccold = new.new[cpu_logical_map(i)];
+ if (!ccold || (ccold->avail == 0))
+ continue;
+ local_irq_disable();
+ free_block(cachep, cc_entry(ccold), ccold->avail);
+ local_irq_enable();
+ ccold->avail = 0;
+ }
+ smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+ up(&cache_chain_sem);
}
+
#else
#define drain_cpu_caches(cachep) do { } while (0)
#endif
@@ -1593,56 +1604,6 @@
#ifdef CONFIG_SMP
-typedef struct ccupdate_struct_s
-{
- kmem_cache_t *cachep;
- cpucache_t *new[NR_CPUS];
-} ccupdate_struct_t;
-
-static ccupdate_struct_t *ccupdate_state = NULL;
-
-/* Called from per-cpu timer interrupt. */
-void slab_drain_local_cache(void)
-{
- if (ccupdate_state != NULL) {
- ccupdate_struct_t *new = ccupdate_state;
- cpucache_t *old = cc_data(new->cachep);
-
- cc_data(new->cachep) = new->new[smp_processor_id()];
- new->new[smp_processor_id()] = old;
- } else {
- kmem_cache_t *cachep = cache_to_drain;
- cpucache_t *cc = cc_data(cachep);
-
- if (cc && cc->avail) {
- free_block(cachep, cc_entry(cc), cc->avail);
- cc->avail = 0;
- }
- }
-
- clear_bit(smp_processor_id(), &slab_cache_drain_mask);
- if (slab_cache_drain_mask == 0)
- wake_up(&cache_drain_wait);
-}
-
-static void do_ccupdate(ccupdate_struct_t *data)
-{
- unsigned long cpu_mask = 0;
- int i;
-
- for (i = 0; i < smp_num_cpus; i++)
- cpu_mask |= (1UL << cpu_logical_map(i));
-
- down(&cache_drain_sem);
-
- ccupdate_state = data;
- slab_cache_drain_mask = cpu_mask;
- slab_drain_all_sync();
- ccupdate_state = NULL;
-
- up(&cache_drain_sem);
-}
-
/* called with cache_chain_sem acquired. */
static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
{
@@ -1666,7 +1627,6 @@
for (i = 0; i< smp_num_cpus; i++) {
cpucache_t* ccnew;
-
ccnew = kmalloc(sizeof(void*)*limit+
sizeof(cpucache_t), GFP_KERNEL);
if (!ccnew)
@@ -1681,7 +1641,7 @@
cachep->batchcount = batchcount;
spin_unlock_irq(&cachep->spinlock);
- do_ccupdate(&new);
+ smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
for (i = 0; i < smp_num_cpus; i++) {
cpucache_t* ccold = new.new[cpu_logical_map(i)];
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/vmscan.c working/mm/vmscan.c
--- base/mm/vmscan.c Tue Sep 26 03:34:05 2000
+++ working/mm/vmscan.c Thu Sep 28 17:09:34 2000
@@ -908,17 +908,8 @@
* refill_inactive() almost never fail when there's
* really plenty of memory free.
*/
- count -= shrink_dcache_memory(priority, gfp_mask);
- count -= shrink_icache_memory(priority, gfp_mask);
- /*
- * Not currently working, see fixme in shrink_?cache_memory
- * In the inner funtions there is a comment:
- * "To help debugging, a zero exit status indicates
- * all slabs were released." (-arca?)
- * lets handle it in a primitive but working way...
- * if (count <= 0)
- * goto done;
- */
+ shrink_dcache_memory(priority, gfp_mask);
+ shrink_icache_memory(priority, gfp_mask);
/* Try to get rid of some shared memory pages.. */
while (shm_swap(priority, gfp_mask)) {
@@ -985,8 +976,8 @@
* the inode and dentry cache whenever we do this.
*/
if (free_shortage() || inactive_shortage()) {
- ret += shrink_dcache_memory(6, gfp_mask);
- ret += shrink_icache_memory(6, gfp_mask);
+ shrink_dcache_memory(6, gfp_mask);
+ shrink_icache_memory(6, gfp_mask);
ret += refill_inactive(gfp_mask, user);
} else {
/*
-- In theory, practice and theory are the same, but in practice they are different -- Larry McVoy - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sat Sep 30 2000 - 21:00:23 EST