[patch] fork() speedup, 2.1.90

MOLNAR Ingo (mingo@chiara.csoma.elte.hu)
Thu, 19 Mar 1998 14:00:23 +0100 (CET)


The biggest overhead in current Linux's fork() latency is the creation
of new page tables. This patch implements 'page table caching', a way to
preserve constructed state after a process exits. Timings on a 100 MHz
P5:

2.1.90, without patch:

hell:~> ./fork_lat
best fork() latency: 84340 cycles

2.1.90 + patch:

hell:~> ./fork_lat
best fork() latency: 47547 cycles

the patch is not production quality yet, (it's a very ugly prototype, the
page table cache doesnt listen to memory pressure yet, it doesnt work with
3-level paging) but it does almost all things it should do, so i post it
here so people can comment on it before i clean it up. I guess Linux needs
some new/revised generic mechanizm to preserve/cache constructed state.
(just like the SLAB does, but on page granularity level?).

'page table caching' is self-feeding, it needs no background mechanizm
(ZeroD) to construct state. There are two slightly different states,
'root page directory' and 'page tables'. Page tables are always released
completely zeroed, the root page table has the upper kernel page table
entries constructed. [there is a slight weekness in the current patch,
vmalloc.h:set_pgdir() has to change all cached page directories as well,
but this is not a big issue currently, given the low number of vmalloc()s]

especially i'd like to know what kind of speedup it brings on newer
x86 systems (could anyone with a PPro or a PII benchmark fork() with
this patch installed?). Also, this method of forkig is much nicer to
L1/L2 caches.

-- mingo

--- linux/mm/memory.c.orig Mon Mar 23 00:34:07 1998
+++ linux/mm/memory.c Wed Mar 25 01:51:33 1998
@@ -139,6 +139,9 @@
free_one_pgd(page_dir + i);
}

+pgd_t * pgd_alloc_page(void);
+void pgd_free_page (pgd_t * page);
+
/*
* This function frees up all page tables of a process when it exits. It
* is the same as "clear_page_tables()", except it also frees the old
@@ -157,19 +160,17 @@
}
for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
free_one_pgd(page_dir + i);
- pgd_free(page_dir);
+ pgd_free_page(page_dir);
}
}

int new_page_tables(struct task_struct * tsk)
{
- pgd_t * page_dir, * new_pg;
+ pgd_t * new_pg;

- if (!(new_pg = pgd_alloc()))
+ if (!(new_pg = pgd_alloc_page()))
return -ENOMEM;
- page_dir = pgd_offset(&init_mm, 0);
- memcpy(new_pg + USER_PTRS_PER_PGD, page_dir + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof (pgd_t));
+
SET_PAGE_DIR(tsk, new_pg);
tsk->mm->pgd = new_pg;
return 0;
@@ -915,3 +916,85 @@
no_memory:
oom(tsk);
}
+
+int zero1_hit=0, zero1_miss=0;
+int zero2_hit=0, zero2_miss=0;
+
+static int * zero_pages = NULL;
+
+static inline void check_zero_page (unsigned long page)
+{
+#if 0
+ int i;
+
+ for (i=0; i<4096/4; i++)
+ if (((int*)page)[i])
+ break;
+
+ if (i != 4096/4)
+ printk("page %08lx not zero\n", page);
+#endif
+}
+
+unsigned long get_zero_page(void)
+{
+ unsigned long page;
+
+ if (!zero_pages) {
+ page = __get_free_page(GFP_KERNEL);
+
+ clear_page(page);
+ zero1_miss++;
+ return page;
+ }
+ page = (unsigned long)zero_pages;
+ zero_pages = (int *)(zero_pages[0]);
+ ((int *)page)[0] = 0;
+ check_zero_page(page);
+ zero1_hit++;
+ return page;
+}
+
+void free_zero_page (unsigned long page)
+{
+ check_zero_page(page);
+
+ ((int *)page)[0] = (int)zero_pages;
+ zero_pages = (int *)page;
+}
+
+
+static int * pgd_pages = NULL;
+
+
+pgd_t * pgd_alloc_page(void)
+{
+ unsigned long page;
+
+ if (!pgd_pages) {
+ pgd_t * page_dir, * new_pg;
+ page = __get_free_page(GFP_KERNEL);
+
+ clear_page(page);
+ new_pg = (pgd_t *) page;
+ page_dir = pgd_offset(&init_mm, 0);
+ memcpy(new_pg + USER_PTRS_PER_PGD, page_dir + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof (pgd_t));
+
+ zero2_miss++;
+ return (pgd_t *)page;
+ }
+ page = (unsigned long)pgd_pages;
+ pgd_pages = (int *)(pgd_pages[0]);
+ ((int *)page)[0] = 0;
+ zero2_hit++;
+ return (pgd_t *)page;
+}
+
+void pgd_free_page (pgd_t * page)
+{
+ ((int *)page)[0] = (int)pgd_pages;
+ pgd_pages = (int *)page;
+}
+
+
--- linux/include/asm-i386/pgtable.h.orig Tue Mar 24 23:10:45 1998
+++ linux/include/asm-i386/pgtable.h Wed Mar 25 00:41:04 1998
@@ -156,6 +156,10 @@
}
#endif
#endif
+
+extern unsigned long get_zero_page(void);
+extern void free_zero_page(unsigned long);
+
#endif /* !__ASSEMBLY__ */


@@ -431,7 +435,7 @@

extern inline void pte_free(pte_t * pte)
{
- free_page((unsigned long) pte);
+ free_zero_page((unsigned long) pte);
}

extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address)
@@ -447,16 +451,15 @@

getnew:
{
- unsigned long page = __get_free_page(GFP_KERNEL);
+ unsigned long page = get_zero_page();
if (!pmd_none(*pmd))
goto freenew;
if (!page)
goto oom;
- memset((void *) page, 0, PAGE_SIZE);
pmd_val(*pmd) = _PAGE_TABLE + __pa(page);
return (pte_t *) (page + address);
freenew:
- free_page(page);
+ free_zero_page(page);
goto repeat;
}

@@ -488,7 +491,7 @@

extern inline pgd_t * pgd_alloc(void)
{
- return (pgd_t *) get_free_page(GFP_KERNEL);
+ return (pgd_t *) get_zero_page();
}

extern pgd_t swapper_pg_dir[1024];

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu