Re: finish_task_switch high in profiles in 2.6.7

From: William Lee Irwin III
Date: Thu Jun 24 2004 - 17:08:39 EST


On Thu, Jun 24, 2004 at 02:22:48PM -0700, William Lee Irwin III wrote:
> Brute-force port of schedprof to 2.6.7-final. Compiletested on sparc64
> only. No runtime testing.
> Given that the context switch rate is actually *reduced* in 2.6.7 vs.
> 2.6.5, I expect that this will not, in fact, reveal anything useful.

While I'm spraying out untested code, I might as well do these, which
I've not even compiled. =)


-- wli
Index: schedprof-2.6.7/kernel/sched.c
===================================================================
--- schedprof-2.6.7.orig/kernel/sched.c 2004-06-24 14:02:48.292038264 -0700
+++ schedprof-2.6.7/kernel/sched.c 2004-06-24 14:39:50.035281952 -0700
@@ -4063,7 +4063,7 @@
if (schedprof_buf) {
unsigned long pc = (unsigned long)__pc;
pc -= min(pc, (unsigned long)_stext);
- atomic_inc(&schedprof_buf[min(pc, schedprof_len)]);
+ atomic_inc(&schedprof_buf[min(pc + 1, schedprof_len - 1)]);
}
}

@@ -4081,8 +4081,9 @@
if (!sched_profiling)
return;
schedprof_len = (unsigned long)(_etext - _stext) + 1;
- schedprof_buf = alloc_bootmem(schedprof_len*sizeof(atomic_t));
+ schedprof_buf = alloc_bootmem(sizeof(atomic_t)*schedprof_len);
printk(KERN_INFO "Scheduler call profiling enabled\n");
+ schedprof_buf[0] = 1;
}

#ifdef CONFIG_PROC_FS
@@ -4092,38 +4093,59 @@
read_sched_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
- char * pnt;
unsigned int sample_step = 1;

- if (p >= (schedprof_len+1)*sizeof(atomic_t))
+ if (p >= sizeof(atomic_t)*schedprof_len)
return 0;
- if (count > (schedprof_len+1)*sizeof(atomic_t) - p)
- count = (schedprof_len+1)*sizeof(atomic_t) - p;
- read = 0;
-
- while (p < sizeof(atomic_t) && count > 0) {
- put_user(*((char *)(&sample_step)+p),buf);
- buf++; p++; count--; read++;
- }
- pnt = (char *)schedprof_buf + p - sizeof(atomic_t);
- if (copy_to_user(buf,(void *)pnt,count))
+ count = min(schedprof_len*sizeof(atomic_t) - p, count);
+ if (copy_to_user(buf, (char *)schedprof_buf + p, count))
return -EFAULT;
- read += count;
- *ppos += read;
- return read;
+ *ppos += count;
+ return count;
}

static ssize_t write_sched_profile(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- memset(schedprof_buf, 0, sizeof(atomic_t)*schedprof_len);
+ memset(&schedprof_buf[1], 0, (schedprof_len-1)*sizeof(atomic_t));
return count;
}

+static int mmap_sched_profile(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long pfn, vaddr, base_pfn = __pa(schedprof_buf)/PAGE_SIZE;
+ if (vma->vm_pgoff + vma_pages(vma) > schedprof_pages)
+ return -ENODEV;
+ vma->vm_flags |= VM_RESERVED|VM_IO;
+ for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+ pgd_t *pgd = pgd_offset(vma->vm_mm, vaddr);
+ pmd_t *pmd;
+ pte_t *pte, pte_val;
+
+ spin_lock(&vma->vm_mm->page_table_lock);
+ pmd = pmd_alloc(vma->vm_mm, pgd, vaddr);
+ if (!pmd)
+ goto enomem;
+ pte = pte_alloc_map(vma->vm_mm, pmd, vaddr);
+ if (!pte)
+ goto enomem;
+ pfn = base_pfn + linear_page_index(vma, vaddr);
+ pte_val = pfn_pte(pfn, vma->vm_page_prot);
+ set_pte(pte, pte_val);
+ update_mmu_cache(vma, vaddr, pte_val);
+ pte_unmap(pte);
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ }
+ return 0;
+enomem:
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ return -ENOMEM;
+}
+
static struct file_operations sched_profile_operations = {
.read = read_sched_profile,
.write = write_sched_profile,
+ .mmap = mmap_sched_profile,
};

static int proc_schedprof_init(void)
@@ -4134,7 +4156,7 @@
entry = create_proc_entry("schedprof", S_IWUSR | S_IRUGO, NULL);
if (entry) {
entry->proc_fops = &sched_profile_operations;
- entry->size = sizeof(atomic_t)*(schedprof_len + 1);
+ entry->size = sizeof(atomic_t)*schedprof_len;
}
return !!entry;
}
Index: schedprof-2.6.7/kernel/sched.c
===================================================================
--- schedprof-2.6.7.orig/kernel/sched.c 2004-06-24 14:39:50.035281952 -0700
+++ schedprof-2.6.7/kernel/sched.c 2004-06-24 14:40:04.347106224 -0700
@@ -4152,13 +4152,13 @@
{
struct proc_dir_entry *entry;
if (!sched_profiling)
- return 1;
+ return 0;
entry = create_proc_entry("schedprof", S_IWUSR | S_IRUGO, NULL);
if (entry) {
entry->proc_fops = &sched_profile_operations;
entry->size = sizeof(atomic_t)*schedprof_len;
}
- return !!entry;
+ return !entry;
}
module_init(proc_schedprof_init);
#endif
Index: schedprof-2.6.7/kernel/sched.c
===================================================================
--- schedprof-2.6.7.orig/kernel/sched.c 2004-06-24 14:40:04.347106224 -0700
+++ schedprof-2.6.7/kernel/sched.c 2004-06-24 14:51:35.285067688 -0700
@@ -4052,7 +4052,7 @@
#endif /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) */

static atomic_t *schedprof_buf;
-static int sched_profiling;
+static int sched_profiling, schedprof_shift;
static unsigned long schedprof_len;

#include <linux/bootmem.h>
@@ -4062,11 +4062,22 @@
{
if (schedprof_buf) {
unsigned long pc = (unsigned long)__pc;
- pc -= min(pc, (unsigned long)_stext);
+ pc = (pc - min(pc, (unsigned long)_stext)) >> schedprof_shift;
atomic_inc(&schedprof_buf[min(pc + 1, schedprof_len - 1)]);
}
}

+static int __init schedprof_shift_setup(char *s)
+{
+ int n;
+ if (get_option(&s, &n)) {
+ scheprof_shift = n;
+ sched_profiling = 1;
+ }
+ return 1;
+}
+__setup("schedprof_shift=", schedprof_shift_setup);
+
static int __init schedprof_setup(char *s)
{
int n;
@@ -4080,10 +4091,10 @@
{
if (!sched_profiling)
return;
- schedprof_len = (unsigned long)(_etext - _stext) + 1;
+ schedprof_len = ((unsigned long)(_etext - _stext) >> schedprof_shift) + 1;
schedprof_buf = alloc_bootmem(sizeof(atomic_t)*schedprof_len);
printk(KERN_INFO "Scheduler call profiling enabled\n");
- schedprof_buf[0] = 1;
+ schedprof_buf[0] = 1 << schedprof_shift;
}

#ifdef CONFIG_PROC_FS