Re: [patch] remove the BKL (Big Kernel Lock), this time for real

From: Ingo Molnar
Date: Fri Sep 17 2004 - 09:25:00 EST



* Ingo Molnar <mingo@xxxxxxx> wrote:

> the attached patch is a simplified variant of the remove-bkl patch i
> sent two days ago: it doesnt do the ->cpus_allowed trick.
>
> The question is, is there any BKL-using kernel code that relies on the
> task not migrating to another CPU within the BLK critical section?

the attached debug patch is ontop of the above patch and prints warnings
if code uses smp_processor_id() in a preemptible section of code. The
patch gets rid of a number of common false positives but more false
positives are more than likely.

The patch has already uncovered a bug: softirq invocation had a softirq
invocation race possibly leading to (<1msec) softirq processing delays
in the PREEMPT case. But more importantly it has found no BKL user so
far that relied on smp_processor_id(), which makes me hopeful that the
problem is not widespread at all and we could opt for the simpler,
semaphore-based BKS design.

(patch tested on x86 and x64, should work on all architectures.)

Ingo


--- linux/arch/i386/lib/delay.c
+++ linux/arch/i386/lib/delay.c
@@ -34,7 +34,7 @@ inline void __const_udelay(unsigned long
xloops *= 4;
__asm__("mull %0"
:"=d" (xloops), "=&a" (d0)
- :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy * (HZ/4)));
+ :"1" (xloops),"0" (boot_cpu_data.loops_per_jiffy * (HZ/4)));
__delay(++xloops);
}

--- linux/arch/x86_64/lib/delay.c
+++ linux/arch/x86_64/lib/delay.c
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)

inline void __const_udelay(unsigned long xloops)
{
- __delay(((xloops * current_cpu_data.loops_per_jiffy) >> 32) * HZ);
+ __delay(((xloops * boot_cpu_data.loops_per_jiffy) >> 32) * HZ);
}

void __udelay(unsigned long usecs)
--- linux/include/asm-i386/smp.h
+++ linux/include/asm-i386/smp.h
@@ -50,7 +50,7 @@ extern u8 x86_cpu_to_apicid[];
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define smp_processor_id() (current_thread_info()->cpu)
+#define __smp_processor_id() (current_thread_info()->cpu)

extern cpumask_t cpu_callout_map;
#define cpu_possible_map cpu_callout_map
--- linux/include/asm-x86_64/smp.h
+++ linux/include/asm-x86_64/smp.h
@@ -66,7 +66,7 @@ static inline int num_booting_cpus(void)
return cpus_weight(cpu_callout_map);
}

-#define smp_processor_id() read_pda(cpunumber)
+#define __smp_processor_id() read_pda(cpunumber)

extern __inline int hard_smp_processor_id(void)
{
--- linux/include/linux/smp.h
+++ linux/include/linux/smp.h
@@ -95,8 +95,11 @@ void smp_prepare_boot_cpu(void);
/*
* These macros fold the SMP functionality into a single CPU system
*/
-
-#define smp_processor_id() 0
+
+#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
+# define smp_processor_id() 0
+
+#endif
#define hard_smp_processor_id() 0
#define smp_threads_ready 1
#define smp_call_function(func,info,retry,wait) ({ 0; })
@@ -107,6 +110,17 @@ static inline void smp_send_reschedule(i

#endif /* !SMP */

+#ifdef __smp_processor_id
+# ifdef CONFIG_PREEMPT
+ extern unsigned int smp_processor_id(void);
+# else
+# define smp_processor_id() __smp_processor_id()
+# endif
+# define _smp_processor_id() __smp_processor_id()
+#else
+# define _smp_processor_id() smp_processor_id()
+#endif
+
#define get_cpu() ({ preempt_disable(); smp_processor_id(); })
#define put_cpu() preempt_enable()
#define put_cpu_no_resched() preempt_enable_no_resched()
--- linux/include/net/route.h
+++ linux/include/net/route.h
@@ -105,7 +105,7 @@ struct rt_cache_stat

extern struct rt_cache_stat *rt_cache_stat;
#define RT_CACHE_STAT_INC(field) \
- (per_cpu_ptr(rt_cache_stat, smp_processor_id())->field++)
+ (per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++)

extern struct ip_rt_acct *ip_rt_acct;

--- linux/include/net/snmp.h
+++ linux/include/net/snmp.h
@@ -128,18 +128,18 @@ struct linux_mib {
#define SNMP_STAT_USRPTR(name) (name[1])

#define SNMP_INC_STATS_BH(mib, field) \
- (per_cpu_ptr(mib[0], smp_processor_id())->mibs[field]++)
+ (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++)
#define SNMP_INC_STATS_OFFSET_BH(mib, field, offset) \
- (per_cpu_ptr(mib[0], smp_processor_id())->mibs[field + (offset)]++)
+ (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++)
#define SNMP_INC_STATS_USER(mib, field) \
- (per_cpu_ptr(mib[1], smp_processor_id())->mibs[field]++)
+ (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++)
#define SNMP_INC_STATS(mib, field) \
- (per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]++)
+ (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++)
#define SNMP_DEC_STATS(mib, field) \
- (per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]--)
+ (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--)
#define SNMP_ADD_STATS_BH(mib, field, addend) \
- (per_cpu_ptr(mib[0], smp_processor_id())->mibs[field] += addend)
+ (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend)
#define SNMP_ADD_STATS_USER(mib, field, addend) \
- (per_cpu_ptr(mib[1], smp_processor_id())->mibs[field] += addend)
+ (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend)

#endif
--- linux/kernel/printk.c
+++ linux/kernel/printk.c
@@ -641,8 +641,9 @@ void release_console_sem(void)
_con_start = con_start;
_log_end = log_end;
con_start = log_end; /* Flush */
- spin_unlock_irqrestore(&logbuf_lock, flags);
+ spin_unlock(&logbuf_lock);
call_console_drivers(_con_start, _log_end);
+ local_irq_restore(flags);
}
console_locked = 0;
console_may_schedule = 0;
--- linux/kernel/sched.c
+++ linux/kernel/sched.c
@@ -2288,6 +2288,56 @@ static inline int dependent_sleeper(int
}
#endif

+#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id)
+/*
+ * Debugging check.
+ */
+unsigned int smp_processor_id(void)
+{
+ unsigned long preempt_count = preempt_count();
+ int this_cpu = __smp_processor_id();
+ cpumask_t this_mask;
+
+ if (likely(preempt_count))
+ goto out;
+
+ if (irqs_disabled())
+ goto out;
+
+ /*
+ * Kernel threads bound to a single CPU can safely use
+ * smp_processor_id():
+ */
+ this_mask = cpumask_of_cpu(this_cpu);
+
+ if (cpus_equal(current->cpus_allowed, this_mask))
+ goto out;
+
+ /*
+ * It is valid to assume CPU-locality during early bootup:
+ */
+ if (system_state != SYSTEM_RUNNING)
+ goto out;
+
+ /*
+ * Avoid recursion:
+ */
+ preempt_disable();
+
+ if (!printk_ratelimit())
+ goto out_enable;
+
+ printk(KERN_ERR "using smp_processor_id() in preemptible code: %s/%d\n",
+ current->comm, current->pid);
+ dump_stack();
+
+out_enable:
+ preempt_enable_no_resched();
+out:
+ return this_cpu;
+}
+#endif
+
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)

/*
@@ -3406,7 +3456,7 @@ EXPORT_SYMBOL(yield);
*/
void __sched io_schedule(void)
{
- struct runqueue *rq = this_rq();
+ struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());

atomic_inc(&rq->nr_iowait);
schedule();
@@ -3417,7 +3467,7 @@ EXPORT_SYMBOL(io_schedule);

long __sched io_schedule_timeout(long timeout)
{
- struct runqueue *rq = this_rq();
+ struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
long ret;

atomic_inc(&rq->nr_iowait);
--- linux/kernel/softirq.c
+++ linux/kernel/softirq.c
@@ -137,12 +137,19 @@ EXPORT_SYMBOL(do_softirq);

void local_bh_enable(void)
{
- __local_bh_enable();
WARN_ON(irqs_disabled());
- if (unlikely(!in_interrupt() &&
- local_softirq_pending()))
+ if (unlikely(!in_interrupt() && local_softirq_pending())) {
+ /*
+ * Keep preemption disabled until we are done with
+ * softirq processing:
+ */
+ preempt_count() -= SOFTIRQ_OFFSET - 1;
invoke_softirq();
- preempt_check_resched();
+ preempt_enable();
+ } else {
+ __local_bh_enable();
+ preempt_check_resched();
+ }
}
EXPORT_SYMBOL(local_bh_enable);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/