diff -Nru linux-2.6.4-rc1/arch/i386/kernel/Makefile linux-64/arch/i386/kernel/Makefile --- linux-2.6.4-rc1/arch/i386/kernel/Makefile 2004-03-02 09:45:43.214170408 -0800 +++ linux-64/arch/i386/kernel/Makefile 2004-03-01 17:34:12.000000000 -0800 @@ -32,6 +32,7 @@ obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_SCHED_SMT) += init_sched_domains.o EXTRA_AFLAGS := -traditional diff -Nru linux-2.6.4-rc1/arch/i386/kernel/smpboot.c linux-64/arch/i386/kernel/smpboot.c --- linux-2.6.4-rc1/arch/i386/kernel/smpboot.c 2004-03-02 09:45:43.245165696 -0800 +++ linux-64/arch/i386/kernel/smpboot.c 2004-03-01 17:34:12.000000000 -0800 @@ -1123,215 +1123,6 @@ synchronize_tsc_bp(); } -#ifdef CONFIG_SCHED_SMT -#ifdef CONFIG_NUMA -static struct sched_group sched_group_cpus[NR_CPUS]; -static struct sched_group sched_group_phys[NR_CPUS]; -static struct sched_group sched_group_nodes[MAX_NUMNODES]; -static DEFINE_PER_CPU(struct sched_domain, phys_domains); -static DEFINE_PER_CPU(struct sched_domain, node_domains); -__init void arch_init_sched_domains(void) -{ - int i; - struct sched_group *first_cpu = NULL, *last_cpu = NULL; - - /* Set up domains */ - for_each_cpu(i) { - struct sched_domain *cpu_domain = cpu_sched_domain(i); - struct sched_domain *phys_domain = &per_cpu(phys_domains, i); - struct sched_domain *node_domain = &per_cpu(node_domains, i); - int node = cpu_to_node(i); - cpumask_t nodemask = node_to_cpumask(node); - - *cpu_domain = SD_SIBLING_INIT; - cpu_domain->span = cpu_sibling_map[i]; - - *phys_domain = SD_CPU_INIT; - phys_domain->span = nodemask; - - *node_domain = SD_NODE_INIT; - node_domain->span = cpu_possible_map; - } - - /* Set up CPU (sibling) groups */ - for_each_cpu(i) { - struct sched_domain *cpu_domain = cpu_sched_domain(i); - int j; - first_cpu = last_cpu = NULL; - - if (i != first_cpu(cpu_domain->span)) - continue; - - for_each_cpu_mask(j, cpu_domain->span) { - struct sched_group *cpu = &sched_group_cpus[j]; - - cpu->cpumask = CPU_MASK_NONE; - cpu_set(j, cpu->cpumask); - cpu->cpu_power = SCHED_LOAD_SCALE; - - if (!first_cpu) - first_cpu = cpu; - if (last_cpu) - last_cpu->next = cpu; - last_cpu = cpu; - } - last_cpu->next = first_cpu; - } - - for (i = 0; i < MAX_NUMNODES; i++) { - int j; - cpumask_t nodemask; - struct sched_group *node = &sched_group_nodes[i]; - cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map); - - if (cpus_empty(nodemask)) - continue; - - first_cpu = last_cpu = NULL; - /* Set up physical groups */ - for_each_cpu_mask(j, nodemask) { - struct sched_domain *cpu_domain = cpu_sched_domain(j); - struct sched_group *cpu = &sched_group_phys[j]; - - if (j != first_cpu(cpu_domain->span)) - continue; - - cpu->cpumask = cpu_domain->span; - /* - * Make each extra sibling increase power by 10% of - * the basic CPU. This is very arbitrary. - */ - cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; - node->cpu_power += cpu->cpu_power; - - if (!first_cpu) - first_cpu = cpu; - if (last_cpu) - last_cpu->next = cpu; - last_cpu = cpu; - } - last_cpu->next = first_cpu; - } - - /* Set up nodes */ - first_cpu = last_cpu = NULL; - for (i = 0; i < MAX_NUMNODES; i++) { - struct sched_group *cpu = &sched_group_nodes[i]; - cpumask_t nodemask; - cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map); - - if (cpus_empty(nodemask)) - continue; - - cpu->cpumask = nodemask; - /* ->cpu_power already setup */ - - if (!first_cpu) - first_cpu = cpu; - if (last_cpu) - last_cpu->next = cpu; - last_cpu = cpu; - } - last_cpu->next = first_cpu; - - mb(); - for_each_cpu(i) { - int node = cpu_to_node(i); - struct sched_domain *cpu_domain = cpu_sched_domain(i); - struct sched_domain *phys_domain = &per_cpu(phys_domains, i); - struct sched_domain *node_domain = &per_cpu(node_domains, i); - struct sched_group *cpu_group = &sched_group_cpus[i]; - struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; - struct sched_group *node_group = &sched_group_nodes[node]; - - cpu_domain->parent = phys_domain; - phys_domain->parent = node_domain; - - node_domain->groups = node_group; - phys_domain->groups = phys_group; - cpu_domain->groups = cpu_group; - } -} -#else /* CONFIG_NUMA */ -static struct sched_group sched_group_cpus[NR_CPUS]; -static struct sched_group sched_group_phys[NR_CPUS]; -static DEFINE_PER_CPU(struct sched_domain, phys_domains); -__init void arch_init_sched_domains(void) -{ - int i; - struct sched_group *first_cpu = NULL, *last_cpu = NULL; - - /* Set up domains */ - for_each_cpu(i) { - struct sched_domain *cpu_domain = cpu_sched_domain(i); - struct sched_domain *phys_domain = &per_cpu(phys_domains, i); - - *cpu_domain = SD_SIBLING_INIT; - cpu_domain->span = cpu_sibling_map[i]; - - *phys_domain = SD_CPU_INIT; - phys_domain->span = cpu_possible_map; - } - - /* Set up CPU (sibling) groups */ - for_each_cpu(i) { - struct sched_domain *cpu_domain = cpu_sched_domain(i); - int j; - first_cpu = last_cpu = NULL; - - if (i != first_cpu(cpu_domain->span)) - continue; - - for_each_cpu_mask(j, cpu_domain->span) { - struct sched_group *cpu = &sched_group_cpus[j]; - - cpus_clear(cpu->cpumask); - cpu_set(j, cpu->cpumask); - cpu->cpu_power = SCHED_LOAD_SCALE; - - if (!first_cpu) - first_cpu = cpu; - if (last_cpu) - last_cpu->next = cpu; - last_cpu = cpu; - } - last_cpu->next = first_cpu; - } - - first_cpu = last_cpu = NULL; - /* Set up physical groups */ - for_each_cpu(i) { - struct sched_domain *cpu_domain = cpu_sched_domain(i); - struct sched_group *cpu = &sched_group_phys[i]; - - if (i != first_cpu(cpu_domain->span)) - continue; - - cpu->cpumask = cpu_domain->span; - /* See SMT+NUMA setup for comment */ - cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; - - if (!first_cpu) - first_cpu = cpu; - if (last_cpu) - last_cpu->next = cpu; - last_cpu = cpu; - } - last_cpu->next = first_cpu; - - mb(); - for_each_cpu(i) { - struct sched_domain *cpu_domain = cpu_sched_domain(i); - struct sched_domain *phys_domain = &per_cpu(phys_domains, i); - struct sched_group *cpu_group = &sched_group_cpus[i]; - struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; - cpu_domain->parent = phys_domain; - phys_domain->groups = phys_group; - cpu_domain->groups = cpu_group; - } -} -#endif /* CONFIG_NUMA */ -#endif /* CONFIG_SCHED_SMT */ /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ diff -Nru linux-2.6.4-rc1/arch/i386/kernel/init_sched_domains.c linux-64/arch/i386/kernel/init_sched_domains.c --- linux-2.6.4-rc1/arch/i386/kernel/init_sched_domains.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-64/arch/i386/kernel/init_sched_domains.c 2004-03-01 17:34:12.000000000 -0800 @@ -0,0 +1,212 @@ +#include +#include +#include + + +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_possible_map; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + struct sched_group *node = &sched_group_nodes[i]; + cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = cpu_sched_domain(j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* + * Make each extra sibling increase power by 10% of + * the basic CPU. This is very arbitrary. + */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + node->cpu_power += cpu->cpu_power; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + /* Set up nodes */ + first_cpu = last_cpu = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + /* ->cpu_power already setup */ + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu(i) { + int node = cpu_to_node(i); + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + struct sched_group *node_group = &sched_group_nodes[node]; + + cpu_domain->parent = phys_domain; + phys_domain->parent = node_domain; + + node_domain->groups = node_group; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#else /* CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_possible_map; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* See SMT+NUMA setup for comment */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + cpu_domain->parent = phys_domain; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#endif /* CONFIG_NUMA */ diff -Nru linux-2.6.4-rc1/arch/x86_64/Kconfig linux-64/arch/x86_64/Kconfig --- linux-2.6.4-rc1/arch/x86_64/Kconfig 2004-03-02 09:45:44.055042576 -0800 +++ linux-64/arch/x86_64/Kconfig 2004-03-01 17:34:12.000000000 -0800 @@ -222,6 +222,16 @@ If you don't know what to do here, say N. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" ---help--- diff -Nru linux-2.6.4-rc1/arch/x86_64/kernel/Makefile linux-64/arch/x86_64/kernel/Makefile --- linux-2.6.4-rc1/arch/x86_64/kernel/Makefile 2004-03-02 09:45:44.111034064 -0800 +++ linux-64/arch/x86_64/kernel/Makefile 2004-03-01 17:34:34.000000000 -0800 @@ -25,6 +25,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_SWIOTLB) += swiotlb.o +obj-$(CONFIG_SCHED_SMT) += init_sched_domains.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KGDB) += kgdb_stub.o @@ -36,3 +37,4 @@ topology-y += ../../i386/mach-default/topology.o swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o +init_sched_domains-$(CONFIG_SCHED_SMT) += ../../i386/kernel/init_sched_domains.o diff -Nru linux-2.6.4-rc1/arch/x86_64/kernel/smpboot.c linux-64/arch/x86_64/kernel/smpboot.c --- linux-2.6.4-rc1/arch/x86_64/kernel/smpboot.c 2004-03-02 09:45:57.274032984 -0800 +++ linux-64/arch/x86_64/kernel/smpboot.c 2004-03-01 17:39:19.000000000 -0800 @@ -75,7 +75,7 @@ /* Set when the idlers are all forked */ int smp_threads_ready; -int cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; /* * Trampoline 80x86 program as an array. @@ -872,35 +872,38 @@ Dprintk("Before bogocount - setting activated=1.\n"); } + Dprintk("Boot done.\n"); + /* - * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so - * that we can tell the sibling CPU efficiently. + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. */ - if (cpu_has_ht && smp_num_siblings > 1) { - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = NO_PROC_ID; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpus_clear(cpu_sibling_map[cpu]); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_isset(i, cpu_callout_map)) + if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_sibling_map[cpu] = i; - break; + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); } } - if (cpu_sibling_map[cpu] == NO_PROC_ID) { - smp_num_siblings = 1; - printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); - } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); } - } - Dprintk("Boot done.\n"); + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); + } /* * Here we can be sure that there is an IO-APIC in the system. Let's diff -Nru linux-2.6.4-rc1/include/asm-x86_64/processor.h linux-64/include/asm-x86_64/processor.h --- linux-2.6.4-rc1/include/asm-x86_64/processor.h 2004-03-02 09:45:50.190109904 -0800 +++ linux-64/include/asm-x86_64/processor.h 2004-03-01 17:34:12.000000000 -0800 @@ -451,4 +451,10 @@ ti->task; \ }) + +#ifdef CONFIG_SCHED_SMT +#define ARCH_HAS_SCHED_DOMAIN +#define ARCH_HAS_SCHED_WAKE_BALANCE +#endif + #endif /* __ASM_X86_64_PROCESSOR_H */ diff -Nru linux-2.6.4-rc1/include/asm-x86_64/smp.h linux-64/include/asm-x86_64/smp.h --- linux-2.6.4-rc1/include/asm-x86_64/smp.h 2004-03-02 09:45:57.284031464 -0800 +++ linux-64/include/asm-x86_64/smp.h 2004-03-01 17:39:43.000000000 -0800 @@ -47,7 +47,7 @@ extern void (*mtrr_hook) (void); extern void zap_low_mappings(void); void smp_stop_cpu(void); -extern int cpu_sibling_map[]; +extern cpumask_t cpu_sibling_map[]; #define SMP_TRAMPOLINE_BASE 0x6000