Re: x86: CR4 update when IRQs are enabled
From: Nadav Amit
Date: Thu Nov 09 2017 - 01:29:46 EST
CCâing more people, and adding a patch to clarify...
Nadav Amit <nadav.amit@xxxxxxxxx> wrote:
> I am puzzled by the comment in tlb_state.cr4 , which says:
>
> /*
> * Access to this CR4 shadow and to H/W CR4 is protected by
> * disabling interrupts when modifying either one.
> */
>
> This does not seem to be true and adding a warning to CR4 writes when
> !irqs_disabled() immediately fires in identify_cpu() and
> __mcheck_cpu_init_generic(). While these two are called during boot, I think
> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>
> So my question(s): Is the comment correct? Is the current behavior correct?
So here is what I have in mind. I am not sure whether CONFIG_DEBUG_PREEMPT is
the right #ifdef. Let me know what you think.
-- >8 --
Subject: [PATCH] x86: disable IRQs before changing CR4
CR4 changes need to be performed while IRQs are disabled in order to
update the CR4 shadow and the actual register atomically.
Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
---
arch/x86/include/asm/tlbflush.h | 18 ++++++++++++------
arch/x86/kernel/cpu/common.c | 13 ++++++++++++-
arch/x86/kernel/cpu/mcheck/mce.c | 3 +++
arch/x86/kernel/cpu/mcheck/p5.c | 4 ++++
arch/x86/kernel/cpu/mcheck/winchip.c | 3 +++
arch/x86/kernel/process.c | 14 ++++++++++++--
6 files changed, 46 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50ea3482e1d1..bc70dd1cc7c6 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}
+static inline void update_cr4(unsigned long cr4)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+ WARN_ON_ONCE(!irqs_disabled());
+#endif
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask)
{
@@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long mask)
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 | mask) != cr4) {
cr4 |= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
+ update_cr4(cr4);
}
}
@@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long mask)
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 & ~mask) != cr4) {
cr4 &= ~mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
+ update_cr4(cr4);
}
}
@@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long mask)
cr4 = this_cpu_read(cpu_tlbstate.cr4);
cr4 ^= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
+ update_cr4(cr4);
}
/* Read the CR4 shadow. */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c8b39870f33e..82e6b41fd5e9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -318,6 +318,8 @@ static bool pku_disabled;
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
+ unsigned long flags;
+
/* check the boot processor, plus compile options for PKU: */
if (!cpu_feature_enabled(X86_FEATURE_PKU))
return;
@@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
if (pku_disabled)
return;
+ local_irq_save(flags);
cr4_set_bits(X86_CR4_PKE);
+ local_irq_restore(flags);
+
/*
* Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
* cpuid bit to be set. We need to ensure that we
@@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
*/
static void identify_cpu(struct cpuinfo_x86 *c)
{
+ unsigned long flags;
int i;
c->loops_per_jiffy = loops_per_jiffy;
@@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
- /* Set up SMEP/SMAP */
+ /*
+ * Set up SMEP/SMAP. Disable interrupts to prevent triggering a warning
+ * as CR4 changes must be done with disabled interrupts.
+ */
+ local_irq_save(flags);
setup_smep(c);
setup_smap(c);
+ local_irq_restore(flags);
/*
* The vendor-specific functions might have changed features.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..497c07e33c25 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
{
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
+ unsigned long flags;
u64 cap;
if (!mca_cfg.bootlog)
@@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | m_fl, &all_banks);
+ local_irq_save(flags);
cr4_set_bits(X86_CR4_MCE);
+ local_irq_restore(flags);
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2a0717bf8033..d5d4963415e9 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
/* Set up machine check reporting for processors with Intel style MCE: */
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
+ unsigned long flags;
u32 l, h;
/* Default P5 to off as its often misconnected: */
@@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
pr_info("Intel old style machine check architecture supported.\n");
/* Enable MCE: */
+ local_irq_save(flags);
cr4_set_bits(X86_CR4_MCE);
+ local_irq_restore(flags);
+
pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
}
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index c6a722e1d011..6dd985e3849d 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
/* Set up machine check reporting on the Winchip C6 series */
void winchip_mcheck_init(struct cpuinfo_x86 *c)
{
+ unsigned long flags;
u32 lo, hi;
machine_check_vector = winchip_machine_check;
@@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
lo &= ~(1<<4); /* Enable MCE */
wrmsr(MSR_IDT_FCR1, lo, hi);
+ local_irq_save(flags);
cr4_set_bits(X86_CR4_MCE);
+ local_irq_restore(flags);
pr_info("Winchip machine check reporting enabled on CPU#0.\n");
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca198080ea9..09e44e784745 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -127,25 +127,35 @@ void flush_thread(void)
void disable_TSC(void)
{
+ unsigned long flags;
+
preempt_disable();
- if (!test_and_set_thread_flag(TIF_NOTSC))
+ if (!test_and_set_thread_flag(TIF_NOTSC)) {
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
+ local_irq_save(flags);
cr4_set_bits(X86_CR4_TSD);
+ local_irq_restore(flags);
+ }
preempt_enable();
}
static void enable_TSC(void)
{
+ unsigned long flags;
+
preempt_disable();
- if (test_and_clear_thread_flag(TIF_NOTSC))
+ if (test_and_clear_thread_flag(TIF_NOTSC)) {
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
+ local_irq_save(flags);
cr4_clear_bits(X86_CR4_TSD);
+ local_irq_restore(flags);
+ }
preempt_enable();
}