preempt-timing-2.6.8-rc1

From: William Lee Irwin III
Date: Tue Jul 13 2004 - 07:29:13 EST


This patch uses the preemption counter increments and decrements to time
non-preemptible critical sections.

This is an instrumentation patch intended to help determine the causes of
scheduling latency related to long non-preemptible critical sections.

Changes from 2.6.7-based patch:
(1) fix unmap_vmas() check correctly this time
(2) add touch_preempt_timing() to cond_resched_lock()
(3) depend on preempt until it's worked out wtf goes wrong without it


-- wli

Index: timing-2.6.8-rc1/include/linux/preempt.h
===================================================================
--- timing-2.6.8-rc1.orig/include/linux/preempt.h 2004-07-11 10:34:18.000000000 -0700
+++ timing-2.6.8-rc1/include/linux/preempt.h 2004-07-13 03:56:37.887605624 -0700
@@ -9,17 +9,17 @@
#include <linux/config.h>
#include <linux/linkage.h>

-#define preempt_count() (current_thread_info()->preempt_count)
-
-#define inc_preempt_count() \
-do { \
- preempt_count()++; \
-} while (0)
+#ifdef CONFIG_PREEMPT_TIMING
+void inc_preempt_count(void);
+void dec_preempt_count(void);
+void touch_preempt_timing(void);
+#else
+#define touch_preempt_timing() do { } while (0)
+#define inc_preempt_count() do { preempt_count()++; } while (0)
+#define dec_preempt_count() do { preempt_count()--; } while (0)
+#endif

-#define dec_preempt_count() \
-do { \
- preempt_count()--; \
-} while (0)
+#define preempt_count() (current_thread_info()->preempt_count)

#ifdef CONFIG_PREEMPT

@@ -51,9 +51,15 @@

#else

+#ifdef CONFIG_PREEMPT_TIMING
+#define preempt_disable() inc_preempt_count()
+#define preempt_enable_no_resched() dec_preempt_count()
+#define preempt_enable() dec_preempt_count()
+#else
#define preempt_disable() do { } while (0)
#define preempt_enable_no_resched() do { } while (0)
#define preempt_enable() do { } while (0)
+#endif
#define preempt_check_resched() do { } while (0)

#endif
Index: timing-2.6.8-rc1/include/linux/sched.h
===================================================================
--- timing-2.6.8-rc1.orig/include/linux/sched.h 2004-07-11 10:33:55.000000000 -0700
+++ timing-2.6.8-rc1/include/linux/sched.h 2004-07-13 03:56:37.895604408 -0700
@@ -1021,6 +1021,7 @@
extern void __cond_resched(void);
static inline void cond_resched(void)
{
+ touch_preempt_timing();
if (need_resched())
__cond_resched();
}
@@ -1040,7 +1041,8 @@
preempt_enable_no_resched();
__cond_resched();
spin_lock(lock);
- }
+ } else
+ touch_preempt_timing();
}

/* Reevaluate whether the task has signals pending delivery.
Index: timing-2.6.8-rc1/init/Kconfig
===================================================================
--- timing-2.6.8-rc1.orig/init/Kconfig 2004-07-11 10:35:08.000000000 -0700
+++ timing-2.6.8-rc1/init/Kconfig 2004-07-13 03:56:37.898603952 -0700
@@ -279,6 +279,15 @@
Disabling this option will cause the kernel to be built without
support for epoll family of system calls.

+config PREEMPT_TIMING
+ bool "Non-preemptible critical section timing"
+ default n
+ depends on PREEMPT
+ help
+ This option measures the time spent in non-preemptible critical
+ sections and reports warnings when a boot-time configurable
+ latency threshold is exceeded.
+
source "drivers/block/Kconfig.iosched"

config CC_OPTIMIZE_FOR_SIZE
Index: timing-2.6.8-rc1/kernel/printk.c
===================================================================
--- timing-2.6.8-rc1.orig/kernel/printk.c 2004-07-11 10:35:31.000000000 -0700
+++ timing-2.6.8-rc1/kernel/printk.c 2004-07-13 03:56:37.901603496 -0700
@@ -650,10 +650,8 @@
*/
void console_conditional_schedule(void)
{
- if (console_may_schedule && need_resched()) {
- set_current_state(TASK_RUNNING);
- schedule();
- }
+ if (console_may_schedule)
+ cond_resched();
}
EXPORT_SYMBOL(console_conditional_schedule);

Index: timing-2.6.8-rc1/kernel/sched.c
===================================================================
--- timing-2.6.8-rc1.orig/kernel/sched.c 2004-07-11 10:35:08.000000000 -0700
+++ timing-2.6.8-rc1/kernel/sched.c 2004-07-13 03:56:37.914601520 -0700
@@ -4040,3 +4040,74 @@

EXPORT_SYMBOL(__preempt_write_lock);
#endif /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) */
+
+#ifdef CONFIG_PREEMPT_TIMING
+#include <linux/kallsyms.h>
+
+static int preempt_thresh;
+static DEFINE_PER_CPU(u64, preempt_timings);
+static DEFINE_PER_CPU(unsigned long, preempt_entry);
+
+static int setup_preempt_thresh(char *s)
+{
+ int thresh;
+
+ get_option(&s, &thresh);
+ if (thresh > 0) {
+ preempt_thresh = thresh;
+ printk("Preemption threshold = %dms\n", preempt_thresh);
+ }
+ return 1;
+}
+__setup("preempt_thresh=", setup_preempt_thresh);
+
+static void __touch_preempt_timing(void *addr)
+{
+ __get_cpu_var(preempt_timings) = sched_clock();
+ __get_cpu_var(preempt_entry) = (unsigned long)addr;
+}
+
+void touch_preempt_timing(void)
+{
+ if (preempt_count() > 0 && system_state == SYSTEM_RUNNING &&
+ __get_cpu_var(preempt_entry))
+ __touch_preempt_timing(__builtin_return_address(0));
+}
+EXPORT_SYMBOL(touch_preempt_timing);
+
+void inc_preempt_count(void)
+{
+ preempt_count()++;
+ if (preempt_count() == 1 && system_state == SYSTEM_RUNNING)
+ __touch_preempt_timing(__builtin_return_address(0));
+}
+EXPORT_SYMBOL(inc_preempt_count);
+
+void dec_preempt_count(void)
+{
+ if (preempt_count() == 1 && system_state == SYSTEM_RUNNING &&
+ __get_cpu_var(preempt_entry)) {
+ u64 hold;
+ unsigned long preempt_exit
+ = (unsigned long)__builtin_return_address(0);
+ hold = sched_clock() - __get_cpu_var(preempt_timings) + 999999;
+ do_div(hold, 1000000);
+ if (preempt_thresh && hold > preempt_thresh &&
+ printk_ratelimit()) {
+ printk("%lums non-preemptible critical section "
+ "violated %d ms preempt threshold "
+ "starting at ",
+ (unsigned long)hold,
+ preempt_thresh);
+ print_symbol("%s", __get_cpu_var(preempt_entry));
+ printk(" and ending at ");
+ print_symbol("%s", preempt_exit);
+ printk("\n");
+ dump_stack();
+ }
+ __get_cpu_var(preempt_entry) = 0;
+ }
+ preempt_count()--;
+}
+EXPORT_SYMBOL(dec_preempt_count);
+#endif
Index: timing-2.6.8-rc1/mm/memory.c
===================================================================
--- timing-2.6.8-rc1.orig/mm/memory.c 2004-07-11 10:34:37.000000000 -0700
+++ timing-2.6.8-rc1/mm/memory.c 2004-07-13 03:56:37.920600608 -0700
@@ -567,14 +567,17 @@
zap_bytes -= block;
if ((long)zap_bytes > 0)
continue;
- if (!atomic && need_resched()) {
+ zap_bytes = ZAP_BLOCK_SIZE;
+ if (atomic)
+ continue;
+ if (need_resched()) {
int fullmm = tlb_is_full_mm(*tlbp);
tlb_finish_mmu(*tlbp, tlb_start, start);
cond_resched_lock(&mm->page_table_lock);
*tlbp = tlb_gather_mmu(mm, fullmm);
tlb_start_valid = 0;
- }
- zap_bytes = ZAP_BLOCK_SIZE;
+ } else
+ touch_preempt_timing();
}
}
return ret;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/