[GIT PULL] x86 fixes

From: Ingo Molnar
Date: Thu Sep 17 2015 - 04:28:34 EST


Linus,

Please pull the latest x86-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

# HEAD: 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d x86/platform: Fix Geode LX timekeeping in the generic x86 build

- misc fixes all around the map
- block non-root vm86(old) if mmap_min_addr != 0
- two small debuggability improvements
- removal of obsolete paravirt op

out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
tools/testing/selftests/x86/entry_from_vm86.c# 76fc5e7b2355: x86/vm86: Block non-root vm8

Thanks,

Ingo

------------------>
Andy Lutomirski (1):
x86/vm86: Block non-root vm86(old) if mmap_min_addr != 0

Borislav Petkov (1):
x86/cpu: Print family/model/stepping in hex

David Woodhouse (1):
x86/platform: Fix Geode LX timekeeping in the generic x86 build

Ingo Molnar (1):
x86/vm86: Fix the misleading CONFIG_VM86 Kconfig help text

Jan Beulich (1):
x86/ldt: Fix small LDT allocation for Xen

Juergen Gross (1):
x86/paravirt: Remove the unused pv_time_ops::get_tsc_khz method

Linda Knippers (1):
x86/mm/srat: Print non-volatile flag in SRAT

Shaohua Li (1):
x86/apic: Serialize LVTT and TSC_DEADLINE writes

Thomas Gleixner (2):
x86/alternatives: Make optimize_nops() interrupt safe and synced
x86/ioapic: Force affinity setting in setup_ioapic_dest()

Tim Chen (1):
x86/cpufeatures: Enable cpuid for Intel SHA extensions


arch/x86/Kconfig | 23 ++++++++++++-----------
arch/x86/include/asm/cpufeature.h | 1 +
arch/x86/include/asm/paravirt_types.h | 1 -
arch/x86/kernel/alternative.c | 5 +++++
arch/x86/kernel/apic/apic.c | 7 +++++++
arch/x86/kernel/apic/io_apic.c | 5 +++--
arch/x86/kernel/cpu/common.c | 4 ++--
arch/x86/kernel/ldt.c | 4 ++--
arch/x86/kernel/tsc.c | 17 ++++++++++-------
arch/x86/kernel/vm86_32.c | 27 +++++++++++++++++++++++++++
arch/x86/mm/srat.c | 5 +++--
tools/testing/selftests/x86/entry_from_vm86.c | 5 +++--
12 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 48f7433dac6f..d28815325ef0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1004,7 +1004,7 @@ config X86_THERMAL_VECTOR
depends on X86_MCE_INTEL

config X86_LEGACY_VM86
- bool "Legacy VM86 support (obsolete)"
+ bool "Legacy VM86 support"
default n
depends on X86_32
---help---
@@ -1016,19 +1016,20 @@ config X86_LEGACY_VM86
available to accelerate real mode DOS programs. However, any
recent version of DOSEMU, X, or vbetool should be fully
functional even without kernel VM86 support, as they will all
- fall back to (pretty well performing) software emulation.
+ fall back to software emulation. Nevertheless, if you are using
+ a 16-bit DOS program where 16-bit performance matters, vm86
+ mode might be faster than emulation and you might want to
+ enable this option.

- Anything that works on a 64-bit kernel is unlikely to need
- this option, as 64-bit kernels don't, and can't, support V8086
- mode. This option is also unrelated to 16-bit protected mode
- and is not needed to run most 16-bit programs under Wine.
+ Note that any app that works on a 64-bit kernel is unlikely to
+ need this option, as 64-bit kernels don't, and can't, support
+ V8086 mode. This option is also unrelated to 16-bit protected
+ mode and is not needed to run most 16-bit programs under Wine.

- Enabling this option adds considerable attack surface to the
- kernel and slows down system calls and exception handling.
+ Enabling this option increases the complexity of the kernel
+ and slows down exception handling a tiny bit.

- Unless you use very old userspace or need the last drop of
- performance in your real mode DOS games and can't use KVM,
- say N here.
+ If unsure, say N here.

config VM86
bool
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 477fc28050e4..e6cf2ad350d1 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -241,6 +241,7 @@
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */

/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index ce029e4fa7c6..31247b5bff7c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -97,7 +97,6 @@ struct pv_lazy_ops {
struct pv_time_ops {
unsigned long long (*sched_clock)(void);
unsigned long long (*steal_clock)(int cpu);
- unsigned long (*get_tsc_khz)(void);
};

struct pv_cpu_ops {
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c42827eb86cf..25f909362b7a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -338,10 +338,15 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)

static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
+ unsigned long flags;
+
if (instr[0] != 0x90)
return;

+ local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+ sync_core();
+ local_irq_restore(flags);

DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
instr, a->instrlen - a->padlen, a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3ca3e46aa405..24e94ce454e2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
apic_write(APIC_LVTT, lvtt_value);

if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
+ /*
+ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
+ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
+ * According to Intel, MFENCE can do the serialization here.
+ */
+ asm volatile("mfence" : : : "memory");
+
printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 38a76f826530..5c60bb162622 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void)
int pin, ioapic, irq, irq_entry;
const struct cpumask *mask;
struct irq_data *idata;
+ struct irq_chip *chip;

if (skip_ioapic_setup == 1)
return;
@@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();

- irq_set_affinity(irq, mask);
+ chip = irq_data_get_irq_chip(idata);
+ chip->irq_set_affinity(idata, mask, false);
}
-
}
#endif

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 07ce52c22ec8..de22ea7ff82f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c)
else
printk(KERN_CONT "%d86", c->x86);

- printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
+ printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);

if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
+ printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
else
printk(KERN_CONT ")\n");

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 2bcc0525f1c1..6acc9dd91f36 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size)
if (alloc_size > PAGE_SIZE)
new_ldt->entries = vzalloc(alloc_size);
else
- new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);

if (!new_ldt->entries) {
kfree(new_ldt);
@@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(ldt->entries);
else
- kfree(ldt->entries);
+ free_page((unsigned long)ldt->entries);
kfree(ldt);
}

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 79055cf2c497..51e62d6afd9a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -21,6 +21,7 @@
#include <asm/hypervisor.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#include <asm/geode.h>

unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1015,15 +1016,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);

static void __init check_system_tsc_reliable(void)
{
-#ifdef CONFIG_MGEODE_LX
- /* RTSC counts during suspend */
+#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
+ if (is_geode_lx()) {
+ /* RTSC counts during suspend */
#define RTSC_SUSP 0x100
- unsigned long res_low, res_high;
+ unsigned long res_low, res_high;

- rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
- /* Geode_LX - the OLPC CPU has a very reliable TSC */
- if (res_low & RTSC_SUSP)
- tsc_clocksource_reliable = 1;
+ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ /* Geode_LX - the OLPC CPU has a very reliable TSC */
+ if (res_low & RTSC_SUSP)
+ tsc_clocksource_reliable = 1;
+ }
#endif
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
tsc_clocksource_reliable = 1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index abd8b856bd2b..524619351961 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -45,6 +45,7 @@
#include <linux/audit.h>
#include <linux/stddef.h>
#include <linux/slab.h>
+#include <linux/security.h>

#include <asm/uaccess.h>
#include <asm/io.h>
@@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
struct pt_regs *regs = current_pt_regs();
unsigned long err = 0;

+ err = security_mmap_addr(0);
+ if (err) {
+ /*
+ * vm86 cannot virtualize the address space, so vm86 users
+ * need to manage the low 1MB themselves using mmap. Given
+ * that BIOS places important data in the first page, vm86
+ * is essentially useless if mmap_min_addr != 0. DOSEMU,
+ * for example, won't even bother trying to use vm86 if it
+ * can't map a page at virtual address 0.
+ *
+ * To reduce the available kernel attack surface, simply
+ * disallow vm86(old) for users who cannot mmap at va 0.
+ *
+ * The implementation of security_mmap_addr will allow
+ * suitably privileged users to map va 0 even if
+ * vm.mmap_min_addr is set above 0, and we want this
+ * behavior for vm86 as well, as it ensures that legacy
+ * tools like vbetool will not fail just because of
+ * vm.mmap_min_addr.
+ */
+ pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
+ current->comm, task_pid_nr(current),
+ from_kuid_munged(&init_user_ns, current_uid()));
+ return -EPERM;
+ }
+
if (!vm86) {
if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
return -ENOMEM;
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 66338a60aa6e..c2aea63bee20 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)

node_set(node, numa_nodes_parsed);

- pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n",
+ pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
node, pxm,
(unsigned long long) start, (unsigned long long) end - 1,
- hotpluggable ? " hotplug" : "");
+ hotpluggable ? " hotplug" : "",
+ ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");

/* Mark hotplug range in memblock. */
if (hotpluggable && memblock_mark_hotplug(start, ma->length))
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index 9a43a59a9bb4..421c607a8856 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -116,8 +116,9 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
v86->regs.eip = eip;
ret = vm86(VM86_ENTER, v86);

- if (ret == -1 && errno == ENOSYS) {
- printf("[SKIP]\tvm86 not supported\n");
+ if (ret == -1 && (errno == ENOSYS || errno == EPERM)) {
+ printf("[SKIP]\tvm86 %s\n",
+ errno == ENOSYS ? "not supported" : "not allowed");
return false;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/