diff -urN -X dontdiff linux-2.4.5/arch/i386/kernel/i8259.c high-2.4.5/arch/i386/kernel/i8259.c --- linux-2.4.5/arch/i386/kernel/i8259.c Fri Feb 9 19:29:44 2001 +++ high-2.4.5/arch/i386/kernel/i8259.c Tue Jul 3 11:20:38 2001 @@ -80,6 +80,7 @@ BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) +BUILD_SMP_INTERRUPT(flush_all_interrupt,FLUSH_TLB_VECTOR) #endif /* @@ -473,6 +474,9 @@ /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for flush all TLB function call */ + set_intr_gate(FLUSH_TLB_VECTOR, flush_all_interrupt); #endif #ifdef CONFIG_X86_LOCAL_APIC diff -urN -X dontdiff linux-2.4.5/arch/i386/kernel/smp.c high-2.4.5/arch/i386/kernel/smp.c --- linux-2.4.5/arch/i386/kernel/smp.c Tue Feb 13 22:13:43 2001 +++ high-2.4.5/arch/i386/kernel/smp.c Tue Jul 3 12:04:08 2001 @@ -394,14 +394,45 @@ leave_mm(cpu); } -static void flush_tlb_all_ipi(void* info) +/* + * Flush the TLBs on all engines. + * + * This does not wait for the other engines to ack the TLB flush. + * This is OK (I think), but needs some explaination. + * + * For all cases of flush_tlb_all(), we do not need to sync with the + * flush on all engines. I should explain each case, but for now there + * is just a simple description of the highmem case. XXX + * + * For highmem, the TLBs are flushed under the kmap lock. This lock is taken + * without blocking interrupts, and _cannot_ be taken with interrupts disabled + * (else the system will deadlock - at least with the sync'ing version of + * flush_tlb_all()). New highmem virtual mappings are only created under the + * kmap_lock, and mappings are "released" under this lock. + * As new mappings cannot be created with ints disabled, or inside an + * interrupt context, if an engine is in either of these states it doesn't + * have to flush its TLB until it leaves the state (ie. interrupts become + * enabled). + */ +static void flush_tlb_all_ipi(void) { do_flush_tlb_all_local(); } +static void inline smp_call_flush_all(void) +{ + int cpus = smp_num_cpus-1; + + if (!cpus) + return; + + /* Send a message to all other CPUs */ + send_IPI_allbutself(FLUSH_TLB_VECTOR); +} + void flush_tlb_all(void) { - smp_call_function (flush_tlb_all_ipi,0,1,1); + smp_call_flush_all(); do_flush_tlb_all_local(); } @@ -483,6 +514,7 @@ return 0; } + static void stop_this_cpu (void * dummy) { /* @@ -538,5 +570,12 @@ (*func)(info); if (wait) atomic_inc(&call_data->finished); +} + +asmlinkage void smp_flush_all_interrupt(void) +{ + ack_APIC_irq(); + + flush_tlb_all_ipi(); } diff -urN -X dontdiff linux-2.4.5/include/asm-i386/highmem.h high-2.4.5/include/asm-i386/highmem.h --- linux-2.4.5/include/asm-i386/highmem.h Sat May 26 02:01:27 2001 +++ high-2.4.5/include/asm-i386/highmem.h Tue Jul 3 12:07:44 2001 @@ -53,6 +53,11 @@ #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) +/* + * Should only be used in highmem.c. + */ +#define PKMAP_VIRT_TO_PKP(virt) &pkmap[(((virt)-PKMAP_BASE) >> PAGE_SHIFT)] + extern void * FASTCALL(kmap_high(struct page *page)); extern void FASTCALL(kunmap_high(struct page *page)); diff -urN -X dontdiff linux-2.4.5/include/asm-i386/hw_irq.h high-2.4.5/include/asm-i386/hw_irq.h --- linux-2.4.5/include/asm-i386/hw_irq.h Sat May 26 02:01:26 2001 +++ high-2.4.5/include/asm-i386/hw_irq.h Tue Jul 3 12:07:44 2001 @@ -41,6 +41,8 @@ #define INVALIDATE_TLB_VECTOR 0xfd #define RESCHEDULE_VECTOR 0xfc #define CALL_FUNCTION_VECTOR 0xfb +/* kdb uses 0xfa, so use 0xf9 for the global TLB shootdown */ +#define FLUSH_TLB_VECTOR 0xf9 /* * Local APIC timer IRQ vector is on a different priority level, diff -urN -X dontdiff linux-2.4.5/init/main.c high-2.4.5/init/main.c --- linux-2.4.5/init/main.c Tue May 22 17:35:42 2001 +++ high-2.4.5/init/main.c Tue Jul 3 11:20:38 2001 @@ -555,6 +555,14 @@ initrd_start = 0; } #endif + +#ifdef CONFIG_HIGHMEM + { + extern void init_highmem(void); + init_highmem(); + } +#endif + mem_init(); kmem_cache_sizes_init(); mempages = num_physpages; diff -urN -X dontdiff linux-2.4.5/mm/highmem.c high-2.4.5/mm/highmem.c --- linux-2.4.5/mm/highmem.c Sat May 26 01:57:46 2001 +++ high-2.4.5/mm/highmem.c Tue Jul 3 12:51:52 2001 @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include /* * Virtual_count is not a pure "count". @@ -30,92 +32,153 @@ * since the last TLB flush - so we can't use it. * n means that there are (n-1) current users of it. */ -static int pkmap_count[LAST_PKMAP]; -static unsigned int last_pkmap_nr; -static spinlock_t kmap_lock = SPIN_LOCK_UNLOCKED; -pte_t * pkmap_page_table; +/* + * Use a structure so we can keep the "count" next to the linkage (ie. on the + * same L1 cache line). + * This may seem like overkill, but we really want to avoid linear searches. + * Could caculate the index (offset from pkmap), but including it doesn't + * hurt and gives us a nice power-of-2 sized structure (could ptr directly + * into pagetable? XXX). + */ +typedef struct pkmap_s { + unsigned int pk_count; + struct pkmap_s *pk_next; +} pkmap_t; + +static pkmap_t pkmap[LAST_PKMAP]; + + +/* + * We don't want this lock false sharing with anything else, so L1 + * cache align it. + */ +static spinlock_t kmap_lock __cacheline_aligned; + +/* + * Hopefully, these two will fall on the same L1 line. + */ +pte_t *pkmap_page_table; +static pkmap_t *pkmap_free; static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); +void +init_highmem(void) +{ + int i; + + spin_lock_init(&kmap_lock); + + pkmap_free = &pkmap[0]; + + for (i=1; i < LAST_PKMAP; i++) + pkmap[i-1].pk_next = &pkmap[i]; + pkmap[i-1].pk_next = NULL; +} + static void flush_all_zero_pkmaps(void) { - int i; + pkmap_t *pkp; + int i; flush_cache_all(); - for (i = 0; i < LAST_PKMAP; i++) { - struct page *page; - pte_t pte; - /* - * zero means we don't have anything to do, - * >1 means that it is still in use. Only - * a count of 1 means that it is free but - * needs to be unmapped - */ - if (pkmap_count[i] != 1) + pkp = &pkmap[0]; + for (i=0; i < LAST_PKMAP; i++, pkp++) { + pte_t *pte; + + /* re-instate comment! */ + if (pkp->pk_count != 1) continue; - pkmap_count[i] = 0; - pte = ptep_get_and_clear(pkmap_page_table+i); - if (pte_none(pte)) + pkp->pk_count = 0; + + pkp->pk_next = pkmap_free; + pkmap_free = pkp; + + pte = &pkmap_page_table[i]; + + /* sanity check */ + if (pte_none(*pte)) BUG(); - page = pte_page(pte); - page->virtual = NULL; + + /* + * Don't need an atomic fetch-and-clear op here; + * no-one has the page mapped, and cannot get at + * its virtual address (and hence PTE) without first + * getting the kmap_lock (which is held here). + * So no dangers, even with speculative execution. + */ + pte_page(*pte)->virtual = NULL; + pte_clear(pte); } + flush_tlb_all(); } -static inline unsigned long map_new_virtual(struct page *page) +static void fill_pkmap_free(void) { - unsigned long vaddr; - int count; + /* + * Try to free entries. + */ + flush_all_zero_pkmaps(); -start: - count = LAST_PKMAP; - /* Find an empty entry */ - for (;;) { - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; - if (!last_pkmap_nr) { - flush_all_zero_pkmaps(); - count = LAST_PKMAP; - } - if (!pkmap_count[last_pkmap_nr]) - break; /* Found a usable entry */ - if (--count) - continue; + /* + * Any free after flushing? + */ + if (pkmap_free == NULL) { + DECLARE_WAITQUEUE(wait, current); + + /* + * Sleep for somebody else to unmap their entries. + */ + current->state = TASK_UNINTERRUPTIBLE; + add_wait_queue(&pkmap_map_wait, &wait); + spin_unlock(&kmap_lock); + schedule(); + remove_wait_queue(&pkmap_map_wait, &wait); + spin_lock(&kmap_lock); + } +} + +static inline void *map_new_virtual(struct page *page) +{ + pkmap_t *pkp; + int index; + + while (pkmap_free == NULL) { /* - * Sleep for somebody else to unmap their entries + * Keep code to handle out-of-free-pkmaps out of common + * code path. */ - { - DECLARE_WAITQUEUE(wait, current); + fill_pkmap_free(); - current->state = TASK_UNINTERRUPTIBLE; - add_wait_queue(&pkmap_map_wait, &wait); - spin_unlock(&kmap_lock); - schedule(); - remove_wait_queue(&pkmap_map_wait, &wait); - spin_lock(&kmap_lock); - - /* Somebody else might have mapped it while we slept */ - if (page->virtual) - return (unsigned long) page->virtual; - - /* Re-start */ - goto start; - } + /* Somebody else might have mapped it while we slept */ + if (page->virtual) + return page->virtual; } - vaddr = PKMAP_ADDR(last_pkmap_nr); - set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); - pkmap_count[last_pkmap_nr] = 1; - page->virtual = (void *) vaddr; + pkp = pkmap_free; + pkmap_free = pkp->pk_next; + + if (pkp->pk_count != 0) + BUG(); + pkp->pk_count = 1; + + index = pkp - pkmap; + + set_pte(&(pkmap_page_table[index]), mk_pte(page, kmap_prot)); - return vaddr; + page->virtual = (void *)PKMAP_ADDR(index); + + return page->virtual; } + void *kmap_high(struct page *page) { + pkmap_t *pkp; unsigned long vaddr; /* @@ -125,38 +188,58 @@ * We cannot call this from interrupts, as it may block */ spin_lock(&kmap_lock); - vaddr = (unsigned long) page->virtual; + vaddr = (unsigned long)page->virtual; if (!vaddr) - vaddr = map_new_virtual(page); - pkmap_count[PKMAP_NR(vaddr)]++; - if (pkmap_count[PKMAP_NR(vaddr)] < 2) + vaddr = (unsigned long)map_new_virtual(page); + + pkp = PKMAP_VIRT_TO_PKP(vaddr); + + pkp->pk_count++; + if (pkp->pk_count < 2) BUG(); spin_unlock(&kmap_lock); - return (void*) vaddr; + + return (void *)vaddr; } void kunmap_high(struct page *page) { + pkmap_t *pkp; + uint need_wakeup; unsigned long vaddr; - unsigned long nr; + + need_wakeup = 0; spin_lock(&kmap_lock); vaddr = (unsigned long) page->virtual; if (!vaddr) BUG(); - nr = PKMAP_NR(vaddr); + pkp = PKMAP_VIRT_TO_PKP(vaddr); /* * A count must never go down to zero * without a TLB flush! */ - switch (--pkmap_count[nr]) { + switch (--(pkp->pk_count)) { case 0: BUG(); case 1: - wake_up(&pkmap_map_wait); + /* + * Avoid an unnecessary wake_up() function call. + * The common case is pkmap_count[] == 1, but + * no waiters. + * The tasks queued in the wait-queue are guarded + * by both the lock in the wait-queue-head and by + * the kmap_lock. As the kmap_lock is held here, + * no need for the wait-queue-head's lock. Simply + * test if the queue is empty. + */ + need_wakeup = waitqueue_active(&pkmap_map_wait); } spin_unlock(&kmap_lock); + + if (need_wakeup) + wake_up(&pkmap_map_wait); } #define POOL_SIZE 32