Re: Linux 4.19.7

From: Greg KH
Date: Wed Dec 05 2018 - 14:47:28 EST


diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fa4eec22816d..0c404cda531a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4169,9 +4169,13 @@

spectre_v2= [X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
+ The default operation protects the kernel from
+ user space attacks.

- on - unconditionally enable
- off - unconditionally disable
+ on - unconditionally enable, implies
+ spectre_v2_user=on
+ off - unconditionally disable, implies
+ spectre_v2_user=off
auto - kernel detects whether your CPU model is
vulnerable

@@ -4181,6 +4185,12 @@
CONFIG_RETPOLINE configuration option, and the
compiler with which the kernel was built.

+ Selecting 'on' will also enable the mitigation
+ against user space to user space task attacks.
+
+ Selecting 'off' will disable both the kernel and
+ the user space protections.
+
Specific mitigations can also be selected manually:

retpoline - replace indirect branches
@@ -4190,6 +4200,48 @@
Not specifying this option is equivalent to
spectre_v2=auto.

+ spectre_v2_user=
+ [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability between
+ user space tasks
+
+ on - Unconditionally enable mitigations. Is
+ enforced by spectre_v2=on
+
+ off - Unconditionally disable mitigations. Is
+ enforced by spectre_v2=off
+
+ prctl - Indirect branch speculation is enabled,
+ but mitigation can be enabled via prctl
+ per thread. The mitigation control state
+ is inherited on fork.
+
+ prctl,ibpb
+ - Like "prctl" above, but only STIBP is
+ controlled per thread. IBPB is issued
+ always when switching between different user
+ space processes.
+
+ seccomp
+ - Same as "prctl" above, but all seccomp
+ threads will enable the mitigation unless
+ they explicitly opt out.
+
+ seccomp,ibpb
+ - Like "seccomp" above, but only STIBP is
+ controlled per thread. IBPB is issued
+ always when switching between different
+ user space processes.
+
+ auto - Kernel selects the mitigation depending on
+ the available CPU features and vulnerability.
+
+ Default mitigation:
+ If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
+
+ Not specifying this option is equivalent to
+ spectre_v2_user=auto.
+
spec_store_bypass_disable=
[HW] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 32f3d55c54b7..c4dbe6f7cdae 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -92,3 +92,12 @@ Speculation misfeature controls
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
+ (Mitigate Spectre V2 style attacks against user processes)
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/Makefile b/Makefile
index 20cbb8e84650..d2b4efcfb388 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 19
-SUBLEVEL = 6
+SUBLEVEL = 7
EXTRAVERSION =
NAME = "People's Front"

diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 2075120cfc4d..d8bf939a3aff 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -10,7 +10,11 @@
#include "rk3288.dtsi"

/ {
- memory@0 {
+ /*
+ * The default coreboot on veyron devices ignores memory@0 nodes
+ * and would instead create another memory node.
+ */
+ memory {
device_type = "memory";
reg = <0x0 0x0 0x0 0x80000000>;
};
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 5617932a83df..ee673c09aa6c 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -227,9 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long) &return_to_handler;
- struct ftrace_graph_ent trace;
unsigned long old;
- int err;

if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
@@ -237,21 +235,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
old = *parent;
*parent = return_hooker;

- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
+ if (function_graph_enter(old, self_addr, frame_pointer, NULL))
*parent = old;
- return;
- }
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, NULL);
- if (err == -EBUSY) {
- *parent = old;
- return;
- }
}

#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
index e0d64f862322..8ce4a79d9360 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
@@ -153,7 +153,7 @@
};

&pcie0 {
- ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
+ ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
num-lanes = <4>;
pinctrl-names = "default";
pinctrl-0 = <&pcie_clkreqn_cpm>;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 50986e388d2b..57e962290df3 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
unsigned long old;
- struct ftrace_graph_ent trace;
- int err;

if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
@@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*/
old = *parent;

- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return;
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, NULL);
- if (err == -EBUSY)
- return;
- else
+ if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
*parent = return_hooker;
}

diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
index d57563c58a26..224eea40e1ee 100644
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -22,8 +22,7 @@
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
- int faulted, err;
- struct ftrace_graph_ent trace;
+ int faulted;
unsigned long return_hooker = (unsigned long)
&return_to_handler;

@@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}

- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
- if (err == -EBUSY) {
+ if (function_graph_enter(old, self_addr, 0, NULL))
*parent = old;
- return;
- }
-
- trace.func = self_addr;
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
- *parent = old;
- }
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 7f3dfdbc3657..b122cbb4aad1 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
unsigned long fp)
{
unsigned long old_parent_ra;
- struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
int faulted, insns;
@@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
if (unlikely(faulted))
goto out;

- if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
- NULL) == -EBUSY) {
- *parent_ra_addr = old_parent_ra;
- return;
- }
-
/*
* Get the recorded ip of the current mcount calling site in the
* __mcount_loc section, which will be used to filter the function
@@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
*/

insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
- trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
+ self_ra -= (MCOUNT_INSN_SIZE * insns);

- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
+ if (function_graph_enter(old_parent_ra, self_ra, fp, NULL))
*parent_ra_addr = old_parent_ra;
- }
return;
out:
ftrace_graph_stop();
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index a0a9679ad5de..8a41372551ff 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -211,29 +211,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
- struct ftrace_graph_ent trace;
unsigned long old;
- int err;

if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;

old = *parent;

- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return;
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, NULL);
-
- if (err == -EBUSY)
- return;
-
- *parent = return_hooker;
+ if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+ *parent = return_hooker;
}

noinline void ftrace_graph_caller(void)
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 6fa8535d3cce..e46a4157a894 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
unsigned long self_addr)
{
unsigned long old;
- struct ftrace_graph_ent trace;
extern int parisc_return_to_handler;

if (unlikely(ftrace_graph_is_dead()))
@@ -41,19 +40,9 @@ static void __hot prepare_ftrace_return(unsigned long *parent,

old = *parent;

- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return;
-
- if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- 0, NULL) == -EBUSY)
- return;
-
- /* activate parisc_return_to_handler() as return point */
- *parent = (unsigned long) &parisc_return_to_handler;
+ if (!function_graph_enter(old, self_addr, 0, NULL))
+ /* activate parisc_return_to_handler() as return point */
+ *parent = (unsigned long) &parisc_return_to_handler;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 4bfbb54dee51..19ef4f5866b6 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -697,7 +697,6 @@ int ftrace_disable_ftrace_graph_caller(void)
*/
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
- struct ftrace_graph_ent trace;
unsigned long return_hooker;

if (unlikely(ftrace_graph_is_dead()))
@@ -708,18 +707,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)

return_hooker = ppc_function_entry(return_to_handler);

- trace.func = ip;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- goto out;
-
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
- NULL) == -EBUSY)
- goto out;
-
- parent = return_hooker;
+ if (!function_graph_enter(parent, ip, 0, NULL))
+ parent = return_hooker;
out:
return parent;
}
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 1157b6b52d25..c433f6d3dd64 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
unsigned long old;
- struct ftrace_graph_ent trace;
int err;

if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -144,17 +143,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*/
old = *parent;

- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- if (!ftrace_graph_entry(&trace))
- return;
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, parent);
- if (err == -EBUSY)
- return;
- *parent = return_hooker;
+ if (function_graph_enter(old, self_addr, frame_pointer, parent))
+ *parent = return_hooker;
}

#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 84be7f02d0c2..39b13d71a8fe 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init);
*/
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
- struct ftrace_graph_ent trace;
-
if (unlikely(ftrace_graph_is_dead()))
goto out;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
ip -= MCOUNT_INSN_SIZE;
- trace.func = ip;
- trace.depth = current->curr_ret_stack + 1;
- /* Only trace if the calling function expects to. */
- if (!ftrace_graph_entry(&trace))
- goto out;
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
- NULL) == -EBUSY)
- goto out;
- parent = (unsigned long) return_to_handler;
+ if (!function_graph_enter(parent, ip, 0, NULL))
+ parent = (unsigned long) return_to_handler;
out:
return parent;
}
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 96dd9f7da250..1b04270e5460 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void)
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
- int faulted, err;
- struct ftrace_graph_ent trace;
+ int faulted;
unsigned long return_hooker = (unsigned long)&return_to_handler;

if (unlikely(ftrace_graph_is_dead()))
@@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}

- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
- if (err == -EBUSY) {
+ if (function_graph_enter(old, self_addr, 0, NULL))
__raw_writel(old, parent);
- return;
- }
-
- trace.func = self_addr;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
- __raw_writel(old, parent);
- }
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 915dda4ae412..684b84ce397f 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long) &return_to_handler;
- struct ftrace_graph_ent trace;

if (unlikely(atomic_read(&current->tracing_graph_pause)))
return parent + 8UL;

- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return parent + 8UL;
-
- if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
- frame_pointer, NULL) == -EBUSY)
+ if (function_graph_enter(parent, self_addr, frame_pointer, NULL))
return parent + 8UL;

return return_hooker;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..44c6a82b7ce5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -441,10 +441,6 @@ config RETPOLINE
branches. Requires a compiler with -mindirect-branch=thunk-extern
support for full protection. The kernel may run slower.

- Without compiler support, at least indirect branches in assembler
- code are eliminated. Since this includes the syscall entry path,
- it is not entirely pointless.
-
config INTEL_RDT
bool "Intel Resource Director Technology support"
default n
@@ -1005,13 +1001,7 @@ config NR_CPUS
to the kernel image.

config SCHED_SMT
- bool "SMT (Hyperthreading) scheduler support"
- depends on SMP
- ---help---
- SMT scheduler support improves the CPU scheduler's decision making
- when dealing with Intel Pentium 4 chips with HyperThreading at a
- cost of slightly increased overhead in some places. If unsure say
- N here.
+ def_bool y if SMP

config SCHED_MC
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8f6e7eb8ae9f..9298f0f3817a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -223,9 +223,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables

# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
-ifneq ($(RETPOLINE_CFLAGS),)
- KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ifeq ($(RETPOLINE_CFLAGS),)
+ $(error You are building kernel with non-retpoline compiler, please update your compiler.)
endif
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
endif

archscripts: scripts_basic
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index dfb2f7c0d019..c8d08da5b308 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
if (config == -1LL)
return -EINVAL;

- /*
- * Branch tracing:
- */
- if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !attr->freq && hwc->sample_period == 1) {
- /* BTS is not supported by this architecture. */
- if (!x86_pmu.bts_active)
- return -EOPNOTSUPP;
-
- /* BTS is currently only allowed for user-mode. */
- if (!attr->exclude_kernel)
- return -EOPNOTSUPP;
-
- /* disallow bts if conflicting events are present */
- if (x86_add_exclusive(x86_lbr_exclusive_lbr))
- return -EBUSY;
-
- event->destroy = hw_perf_lbr_event_destroy;
- }
-
hwc->config |= config;

return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 035c37481f57..155fa4b53c56 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2358,16 +2358,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
static struct event_constraint *
intel_bts_constraints(struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
- unsigned int hw_event, bts_event;
-
- if (event->attr.freq)
- return NULL;
-
- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
- if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+ if (unlikely(intel_pmu_has_bts(event)))
return &bts_constraint;

return NULL;
@@ -2986,10 +2977,51 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
return flags;
}

+static int intel_pmu_bts_config(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (unlikely(intel_pmu_has_bts(event))) {
+ /* BTS is not supported by this architecture. */
+ if (!x86_pmu.bts_active)
+ return -EOPNOTSUPP;
+
+ /* BTS is currently only allowed for user-mode. */
+ if (!attr->exclude_kernel)
+ return -EOPNOTSUPP;
+
+ /* BTS is not allowed for precise events. */
+ if (attr->precise_ip)
+ return -EOPNOTSUPP;
+
+ /* disallow bts if conflicting events are present */
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ return -EBUSY;
+
+ event->destroy = hw_perf_lbr_event_destroy;
+ }
+
+ return 0;
+}
+
+static int core_pmu_hw_config(struct perf_event *event)
+{
+ int ret = x86_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+
+ return intel_pmu_bts_config(event);
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);

+ if (ret)
+ return ret;
+
+ ret = intel_pmu_bts_config(event);
if (ret)
return ret;

@@ -3015,7 +3047,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
/*
* BTS is set up earlier in this path, so don't account twice
*/
- if (!intel_pmu_has_bts(event)) {
+ if (!unlikely(intel_pmu_has_bts(event))) {
/* disallow lbr if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
@@ -3478,7 +3510,7 @@ static __initconst const struct x86_pmu core_pmu = {
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
+ .hw_config = core_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 156286335351..c5ad9cc61f4b 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -857,11 +857,16 @@ static inline int amd_pmu_init(void)

static inline bool intel_pmu_has_bts(struct perf_event *event)
{
- if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !event->attr.freq && event->hw.sample_period == 1)
- return true;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int hw_event, bts_event;
+
+ if (event->attr.freq)
+ return false;
+
+ hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+ bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);

- return false;
+ return hw_event == bts_event && hwc->sample_period == 1;
}

int intel_pmu_save_and_restart(struct perf_event *event);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1c09a0d1771f..022845ee0c88 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1046,7 +1046,8 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);

u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
- void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ /* Returns actual tsc_offset set in active VMCS */
+ u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);

void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4731f0cf97c5..b3486c8b570a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,9 +41,10 @@

#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
-#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
+#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
-#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */

#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index fd2a8c1b88bc..032b6009baab 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -3,6 +3,8 @@
#ifndef _ASM_X86_NOSPEC_BRANCH_H_
#define _ASM_X86_NOSPEC_BRANCH_H_

+#include <linux/static_key.h>
+
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
@@ -162,29 +164,35 @@
_ASM_PTR " 999b\n\t" \
".popsection\n\t"

-#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_64

/*
- * Since the inline asm uses the %V modifier which is only in newer GCC,
- * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ * Inline asm uses the %V modifier which is only in newer GCC
+ * which is ensured when CONFIG_RETPOLINE is defined.
*/
# define CALL_NOSPEC \
ANNOTATE_NOSPEC_ALTERNATIVE \
- ALTERNATIVE( \
+ ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
"call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE)
+ X86_FEATURE_RETPOLINE, \
+ "lfence;\n" \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE_AMD)
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)

-#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+#else /* CONFIG_X86_32 */
/*
* For i386 we use the original ret-equivalent retpoline, because
* otherwise we'll run out of registers. We don't care about CET
* here, anyway.
*/
# define CALL_NOSPEC \
- ALTERNATIVE( \
+ ANNOTATE_NOSPEC_ALTERNATIVE \
+ ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
" jmp 904f;\n" \
@@ -199,9 +207,14 @@
" ret;\n" \
" .align 16\n" \
"904: call 901b;\n", \
- X86_FEATURE_RETPOLINE)
+ X86_FEATURE_RETPOLINE, \
+ "lfence;\n" \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE_AMD)

# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
#else /* No retpoline for C / inline asm */
# define CALL_NOSPEC "call *%[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
@@ -210,13 +223,19 @@
/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
- SPECTRE_V2_RETPOLINE_MINIMAL,
- SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
SPECTRE_V2_IBRS_ENHANCED,
};

+/* The indirect branch speculation control variants */
+enum spectre_v2_user_mitigation {
+ SPECTRE_V2_USER_NONE,
+ SPECTRE_V2_USER_STRICT,
+ SPECTRE_V2_USER_PRCTL,
+ SPECTRE_V2_USER_SECCOMP,
+};
+
/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
@@ -294,6 +313,10 @@ do { \
preempt_enable(); \
} while (0)

+DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
#endif /* __ASSEMBLY__ */

/*
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index ae7c2c5cd7f0..5393babc0598 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}

+static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
+{
+ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
+ return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
+}
+
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
{
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}

+static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
+ return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
+}
+
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
{
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
static inline void speculative_store_bypass_ht_init(void) { }
#endif

-extern void speculative_store_bypass_update(unsigned long tif);
-
-static inline void speculative_store_bypass_update_current(void)
-{
- speculative_store_bypass_update(current_thread_info()->flags);
-}
+extern void speculation_ctrl_update(unsigned long tif);
+extern void speculation_ctrl_update_current(void);

#endif
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 36bd243843d6..7cf1a270d891 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,

__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
-struct tss_struct;
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss);

/* This runs runs on the previous thread's stack. */
static inline void prepare_switch_to(struct task_struct *next)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2ff2a30a264f..82b73b75d67c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,10 +79,12 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_SSBD 5 /* Reduced data speculation */
+#define TIF_SSBD 5 /* Speculative store bypass disable */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
+#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
+#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
@@ -110,6 +112,8 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
@@ -145,8 +149,18 @@ struct thread_info {
_TIF_FSCHECK)

/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
+#define _TIF_WORK_CTXSW_BASE \
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
+ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
+
+/*
+ * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
+ */
+#ifdef CONFIG_SMP
+# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
+#else
+# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
+#endif

#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0e2130d8d6b1..79ec7add5f98 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -185,10 +185,14 @@ struct tlb_state {

#define LOADED_MM_SWITCHING ((struct mm_struct *)1)

+ /* Last user mm for optimizing IBPB */
+ union {
+ struct mm_struct *last_user_mm;
+ unsigned long last_user_mm_ibpb;
+ };
+
u16 loaded_mm_asid;
u16 next_asid;
- /* last user mm's ctx id */
- u64 last_ctx_id;

/*
* We can be in one of several states:
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 40bdaea97fe7..78928f56cf72 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
+#include <linux/sched/smt.h>

#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -35,12 +36,10 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);

-/*
- * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
- * writes to SPEC_CTRL contain whatever reserved bits have been set.
- */
-u64 __ro_after_init x86_spec_ctrl_base;
+/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+static DEFINE_MUTEX(spec_ctrl_mutex);

/*
* The vendor and possibly platform specific bits which can be modified in
@@ -55,6 +54,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
u64 __ro_after_init x86_amd_ls_cfg_base;
u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;

+/* Control conditional STIPB in switch_to() */
+DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+/* Control conditional IBPB in switch_mm() */
+DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+/* Control unconditional IBPB in switch_mm() */
+DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -125,31 +131,6 @@ void __init check_bugs(void)
#endif
}

-/* The kernel command line selection */
-enum spectre_v2_mitigation_cmd {
- SPECTRE_V2_CMD_NONE,
- SPECTRE_V2_CMD_AUTO,
- SPECTRE_V2_CMD_FORCE,
- SPECTRE_V2_CMD_RETPOLINE,
- SPECTRE_V2_CMD_RETPOLINE_GENERIC,
- SPECTRE_V2_CMD_RETPOLINE_AMD,
-};
-
-static const char *spectre_v2_strings[] = {
- [SPECTRE_V2_NONE] = "Vulnerable",
- [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
- [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
-};
-
-#undef pr_fmt
-#define pr_fmt(fmt) "Spectre V2 : " fmt
-
-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
- SPECTRE_V2_NONE;
-
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
@@ -171,6 +152,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
static_cpu_has(X86_FEATURE_AMD_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);

+ /* Conditional STIBP enabled? */
+ if (static_branch_unlikely(&switch_to_cond_stibp))
+ hostval |= stibp_tif_to_spec_ctrl(ti->flags);
+
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@@ -204,7 +189,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
ssbd_spec_ctrl_to_tif(hostval);

- speculative_store_bypass_update(tif);
+ speculation_ctrl_update(tif);
}
}
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
@@ -219,6 +204,15 @@ static void x86_amd_ssb_disable(void)
wrmsrl(MSR_AMD64_LS_CFG, msrval);
}

+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+ SPECTRE_V2_NONE;
+
+static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+ SPECTRE_V2_USER_NONE;
+
#ifdef RETPOLINE
static bool spectre_v2_bad_module;

@@ -240,67 +234,217 @@ static inline const char *spectre_v2_module_string(void)
static inline const char *spectre_v2_module_string(void) { return ""; }
#endif

-static void __init spec2_print_if_insecure(const char *reason)
+static inline bool match_option(const char *arg, int arglen, const char *opt)
{
- if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- pr_info("%s selected on command line.\n", reason);
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
}

-static void __init spec2_print_if_secure(const char *reason)
+/* The kernel command line selection for spectre v2 */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+enum spectre_v2_user_cmd {
+ SPECTRE_V2_USER_CMD_NONE,
+ SPECTRE_V2_USER_CMD_AUTO,
+ SPECTRE_V2_USER_CMD_FORCE,
+ SPECTRE_V2_USER_CMD_PRCTL,
+ SPECTRE_V2_USER_CMD_PRCTL_IBPB,
+ SPECTRE_V2_USER_CMD_SECCOMP,
+ SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
+};
+
+static const char * const spectre_v2_user_strings[] = {
+ [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
+ [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
+ [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
+ [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
+};
+
+static const struct {
+ const char *option;
+ enum spectre_v2_user_cmd cmd;
+ bool secure;
+} v2_user_options[] __initdata = {
+ { "auto", SPECTRE_V2_USER_CMD_AUTO, false },
+ { "off", SPECTRE_V2_USER_CMD_NONE, false },
+ { "on", SPECTRE_V2_USER_CMD_FORCE, true },
+ { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
+ { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
+ { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
+ { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
+};
+
+static void __init spec_v2_user_print_cond(const char *reason, bool secure)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- pr_info("%s selected on command line.\n", reason);
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
+ pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}

-static inline bool retp_compiler(void)
+static enum spectre_v2_user_cmd __init
+spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
{
- return __is_defined(RETPOLINE);
+ char arg[20];
+ int ret, i;
+
+ switch (v2_cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return SPECTRE_V2_USER_CMD_NONE;
+ case SPECTRE_V2_CMD_FORCE:
+ return SPECTRE_V2_USER_CMD_FORCE;
+ default:
+ break;
+ }
+
+ ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
+ arg, sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_USER_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
+ if (match_option(arg, ret, v2_user_options[i].option)) {
+ spec_v2_user_print_cond(v2_user_options[i].option,
+ v2_user_options[i].secure);
+ return v2_user_options[i].cmd;
+ }
+ }
+
+ pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
+ return SPECTRE_V2_USER_CMD_AUTO;
}

-static inline bool match_option(const char *arg, int arglen, const char *opt)
+static void __init
+spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
{
- int len = strlen(opt);
+ enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
+ bool smt_possible = IS_ENABLED(CONFIG_SMP);
+ enum spectre_v2_user_cmd cmd;

- return len == arglen && !strncmp(arg, opt, len);
+ if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
+ return;
+
+ if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+ cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+ smt_possible = false;
+
+ cmd = spectre_v2_parse_user_cmdline(v2_cmd);
+ switch (cmd) {
+ case SPECTRE_V2_USER_CMD_NONE:
+ goto set_mode;
+ case SPECTRE_V2_USER_CMD_FORCE:
+ mode = SPECTRE_V2_USER_STRICT;
+ break;
+ case SPECTRE_V2_USER_CMD_PRCTL:
+ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
+ mode = SPECTRE_V2_USER_PRCTL;
+ break;
+ case SPECTRE_V2_USER_CMD_AUTO:
+ case SPECTRE_V2_USER_CMD_SECCOMP:
+ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ mode = SPECTRE_V2_USER_SECCOMP;
+ else
+ mode = SPECTRE_V2_USER_PRCTL;
+ break;
+ }
+
+ /* Initialize Indirect Branch Prediction Barrier */
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+
+ switch (cmd) {
+ case SPECTRE_V2_USER_CMD_FORCE:
+ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
+ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
+ static_branch_enable(&switch_mm_always_ibpb);
+ break;
+ case SPECTRE_V2_USER_CMD_PRCTL:
+ case SPECTRE_V2_USER_CMD_AUTO:
+ case SPECTRE_V2_USER_CMD_SECCOMP:
+ static_branch_enable(&switch_mm_cond_ibpb);
+ break;
+ default:
+ break;
+ }
+
+ pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
+ static_key_enabled(&switch_mm_always_ibpb) ?
+ "always-on" : "conditional");
+ }
+
+ /* If enhanced IBRS is enabled no STIPB required */
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ return;
+
+ /*
+ * If SMT is not possible or STIBP is not available clear the STIPB
+ * mode.
+ */
+ if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+ mode = SPECTRE_V2_USER_NONE;
+set_mode:
+ spectre_v2_user = mode;
+ /* Only print the STIBP mode when SMT possible */
+ if (smt_possible)
+ pr_info("%s\n", spectre_v2_user_strings[mode]);
}

+static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+ [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
+};
+
static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
-} mitigation_options[] = {
- { "off", SPECTRE_V2_CMD_NONE, false },
- { "on", SPECTRE_V2_CMD_FORCE, true },
- { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
- { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
- { "auto", SPECTRE_V2_CMD_AUTO, false },
+} mitigation_options[] __initdata = {
+ { "off", SPECTRE_V2_CMD_NONE, false },
+ { "on", SPECTRE_V2_CMD_FORCE, true },
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
};

+static void __init spec_v2_print_cond(const char *reason, bool secure)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
+ pr_info("%s selected on command line.\n", reason);
+}
+
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
{
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
char arg[20];
int ret, i;
- enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;

if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
return SPECTRE_V2_CMD_NONE;
- else {
- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
- if (ret < 0)
- return SPECTRE_V2_CMD_AUTO;

- for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
- if (!match_option(arg, ret, mitigation_options[i].option))
- continue;
- cmd = mitigation_options[i].cmd;
- break;
- }
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_CMD_AUTO;

- if (i >= ARRAY_SIZE(mitigation_options)) {
- pr_err("unknown option (%s). Switching to AUTO select\n", arg);
- return SPECTRE_V2_CMD_AUTO;
- }
+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+ if (!match_option(arg, ret, mitigation_options[i].option))
+ continue;
+ cmd = mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPECTRE_V2_CMD_AUTO;
}

if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
@@ -317,11 +461,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}

- if (mitigation_options[i].secure)
- spec2_print_if_secure(mitigation_options[i].option);
- else
- spec2_print_if_insecure(mitigation_options[i].option);
-
+ spec_v2_print_cond(mitigation_options[i].option,
+ mitigation_options[i].secure);
return cmd;
}

@@ -377,14 +518,12 @@ static void __init spectre_v2_select_mitigation(void)
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
goto retpoline_generic;
}
- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
- SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ mode = SPECTRE_V2_RETPOLINE_AMD;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
} else {
retpoline_generic:
- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
- SPECTRE_V2_RETPOLINE_MINIMAL;
+ mode = SPECTRE_V2_RETPOLINE_GENERIC;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}

@@ -403,12 +542,6 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");

- /* Initialize Indirect Branch Prediction Barrier if supported */
- if (boot_cpu_has(X86_FEATURE_IBPB)) {
- setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
- pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
- }
-
/*
* Retpoline means the kernel is safe because it has no indirect
* branches. Enhanced IBRS protects firmware too, so, enable restricted
@@ -424,6 +557,66 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
+
+ /* Set up IBPB and STIBP depending on the general spectre V2 command */
+ spectre_v2_user_select_mitigation(cmd);
+
+ /* Enable STIBP if appropriate */
+ arch_smt_update();
+}
+
+static void update_stibp_msr(void * __unused)
+{
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+}
+
+/* Update x86_spec_ctrl_base in case SMT state changed. */
+static void update_stibp_strict(void)
+{
+ u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
+
+ if (sched_smt_active())
+ mask |= SPEC_CTRL_STIBP;
+
+ if (mask == x86_spec_ctrl_base)
+ return;
+
+ pr_info("Update user space SMT mitigation: STIBP %s\n",
+ mask & SPEC_CTRL_STIBP ? "always-on" : "off");
+ x86_spec_ctrl_base = mask;
+ on_each_cpu(update_stibp_msr, NULL, 1);
+}
+
+/* Update the static key controlling the evaluation of TIF_SPEC_IB */
+static void update_indir_branch_cond(void)
+{
+ if (sched_smt_active())
+ static_branch_enable(&switch_to_cond_stibp);
+ else
+ static_branch_disable(&switch_to_cond_stibp);
+}
+
+void arch_smt_update(void)
+{
+ /* Enhanced IBRS implies STIBP. No update required. */
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ return;
+
+ mutex_lock(&spec_ctrl_mutex);
+
+ switch (spectre_v2_user) {
+ case SPECTRE_V2_USER_NONE:
+ break;
+ case SPECTRE_V2_USER_STRICT:
+ update_stibp_strict();
+ break;
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ update_indir_branch_cond();
+ break;
+ }
+
+ mutex_unlock(&spec_ctrl_mutex);
}

#undef pr_fmt
@@ -440,7 +633,7 @@ enum ssb_mitigation_cmd {
SPEC_STORE_BYPASS_CMD_SECCOMP,
};

-static const char *ssb_strings[] = {
+static const char * const ssb_strings[] = {
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
[SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
[SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
@@ -450,7 +643,7 @@ static const char *ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] = {
+} ssb_mitigation_options[] __initdata = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
@@ -561,10 +754,25 @@ static void ssb_select_mitigation(void)
#undef pr_fmt
#define pr_fmt(fmt) "Speculation prctl: " fmt

-static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+static void task_update_spec_tif(struct task_struct *tsk)
{
- bool update;
+ /* Force the update of the real TIF bits */
+ set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);

+ /*
+ * Immediately update the speculation control MSRs for the current
+ * task, but for a non-current task delay setting the CPU
+ * mitigation until it is scheduled next.
+ *
+ * This can only happen for SECCOMP mitigation. For PRCTL it's
+ * always the current task.
+ */
+ if (tsk == current)
+ speculation_ctrl_update_current();
+}
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
return -ENXIO;
@@ -575,28 +783,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
if (task_spec_ssb_force_disable(task))
return -EPERM;
task_clear_spec_ssb_disable(task);
- update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+ task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
task_set_spec_ssb_disable(task);
- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ task_update_spec_tif(task);
break;
case PR_SPEC_FORCE_DISABLE:
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ task_update_spec_tif(task);
break;
default:
return -ERANGE;
}
+ return 0;
+}

- /*
- * If being set on non-current task, delay setting the CPU
- * mitigation until it is next scheduled.
- */
- if (task == current && update)
- speculative_store_bypass_update_current();
-
+static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ return 0;
+ /*
+ * Indirect branch speculation is always disabled in strict
+ * mode.
+ */
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+ return -EPERM;
+ task_clear_spec_ib_disable(task);
+ task_update_spec_tif(task);
+ break;
+ case PR_SPEC_DISABLE:
+ case PR_SPEC_FORCE_DISABLE:
+ /*
+ * Indirect branch speculation is always allowed when
+ * mitigation is force disabled.
+ */
+ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ return -EPERM;
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+ return 0;
+ task_set_spec_ib_disable(task);
+ if (ctrl == PR_SPEC_FORCE_DISABLE)
+ task_set_spec_ib_force_disable(task);
+ task_update_spec_tif(task);
+ break;
+ default:
+ return -ERANGE;
+ }
return 0;
}

@@ -606,6 +842,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_set(task, ctrl);
+ case PR_SPEC_INDIRECT_BRANCH:
+ return ib_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -616,6 +854,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+ if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+ ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif

@@ -638,11 +878,35 @@ static int ssb_prctl_get(struct task_struct *task)
}
}

+static int ib_prctl_get(struct task_struct *task)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return PR_SPEC_NOT_AFFECTED;
+
+ switch (spectre_v2_user) {
+ case SPECTRE_V2_USER_NONE:
+ return PR_SPEC_ENABLE;
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ if (task_spec_ib_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ib_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ case SPECTRE_V2_USER_STRICT:
+ return PR_SPEC_DISABLE;
+ default:
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
{
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_get(task);
+ case PR_SPEC_INDIRECT_BRANCH:
+ return ib_prctl_get(task);
default:
return -ENODEV;
}
@@ -780,7 +1044,7 @@ early_param("l1tf", l1tf_cmdline);
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"

#if IS_ENABLED(CONFIG_KVM_INTEL)
-static const char *l1tf_vmx_states[] = {
+static const char * const l1tf_vmx_states[] = {
[VMENTER_L1D_FLUSH_AUTO] = "auto",
[VMENTER_L1D_FLUSH_NEVER] = "vulnerable",
[VMENTER_L1D_FLUSH_COND] = "conditional cache flushes",
@@ -796,13 +1060,14 @@ static ssize_t l1tf_show_state(char *buf)

if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
(l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
- cpu_smt_control == CPU_SMT_ENABLED))
+ sched_smt_active())) {
return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation]);
+ }

return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation],
- cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
+ sched_smt_active() ? "vulnerable" : "disabled");
}
#else
static ssize_t l1tf_show_state(char *buf)
@@ -811,6 +1076,36 @@ static ssize_t l1tf_show_state(char *buf)
}
#endif

+static char *stibp_state(void)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ return "";
+
+ switch (spectre_v2_user) {
+ case SPECTRE_V2_USER_NONE:
+ return ", STIBP: disabled";
+ case SPECTRE_V2_USER_STRICT:
+ return ", STIBP: forced";
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ if (static_key_enabled(&switch_to_cond_stibp))
+ return ", STIBP: conditional";
+ }
+ return "";
+}
+
+static char *ibpb_state(void)
+{
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ if (static_key_enabled(&switch_mm_always_ibpb))
+ return ", IBPB: always-on";
+ if (static_key_enabled(&switch_mm_cond_ibpb))
+ return ", IBPB: conditional";
+ return ", IBPB: disabled";
+ }
+ return "";
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -831,9 +1126,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return sprintf(buf, "Mitigation: __user pointer sanitization\n");

case X86_BUG_SPECTRE_V2:
- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ stibp_state(),
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
spectre_v2_module_string());

case X86_BUG_SPEC_STORE_BYPASS:
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index dd33c357548f..e12454e21b8a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -56,7 +56,7 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000

-static bool thresholding_en;
+static bool thresholding_irq_en;

static const char * const th_names[] = {
"load_store",
@@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,

set_offset:
offset = setup_APIC_mce_threshold(offset, new);
-
- if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
- mce_threshold_vector = amd_threshold_interrupt;
+ if (offset == new)
+ thresholding_irq_en = true;

done:
mce_threshold_block_init(&b, offset);
@@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;

- if (!thresholding_en)
- return 0;
-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu)
struct threshold_bank **bp;
int err = 0;

- if (!thresholding_en)
- return 0;
-
bp = per_cpu(threshold_banks, cpu);
if (bp)
return 0;
@@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void)
{
unsigned lcpu = 0;

- if (mce_threshold_vector == amd_threshold_interrupt)
- thresholding_en = true;
-
/* to hit CPUs online before the notifier is up */
for_each_online_cpu(lcpu) {
int err = mce_threshold_create_device(lcpu);
@@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void)
return err;
}

+ if (thresholding_irq_en)
+ mce_threshold_vector = amd_threshold_interrupt;
+
return 0;
}
/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 61a949d84dfa..d99a8ee9e185 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
}

+ local_bh_disable();
fpu->initialized = 1;
- preempt_disable();
fpu__restore(fpu);
- preempt_enable();
+ local_bh_enable();

return err;
} else {
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 01ebcb6f263e..7ee8067cbf45 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
{
unsigned long old;
int faulted;
- struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;

@@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
return;
}

- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
+ if (function_graph_enter(old, self_addr, frame_pointer, parent))
*parent = old;
- return;
- }
-
- if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, parent) == -EBUSY) {
- *parent = old;
- return;
- }
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c93fcfdf1673..7d31192296a8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -40,6 +40,8 @@
#include <asm/prctl.h>
#include <asm/spec-ctrl.h>

+#include "process.h"
+
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -252,11 +254,12 @@ void arch_setup_new_exec(void)
enable_cpuid();
}

-static inline void switch_to_bitmap(struct tss_struct *tss,
- struct thread_struct *prev,
+static inline void switch_to_bitmap(struct thread_struct *prev,
struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
@@ -395,32 +398,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
}

-static __always_inline void intel_set_ssb_state(unsigned long tifn)
+/*
+ * Update the MSRs managing speculation control, during context switch.
+ *
+ * tifp: Previous task's thread flags
+ * tifn: Next task's thread flags
+ */
+static __always_inline void __speculation_ctrl_update(unsigned long tifp,
+ unsigned long tifn)
{
- u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+ unsigned long tif_diff = tifp ^ tifn;
+ u64 msr = x86_spec_ctrl_base;
+ bool updmsr = false;
+
+ /*
+ * If TIF_SSBD is different, select the proper mitigation
+ * method. Note that if SSBD mitigation is disabled or permanentely
+ * enabled this branch can't be taken because nothing can set
+ * TIF_SSBD.
+ */
+ if (tif_diff & _TIF_SSBD) {
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ amd_set_ssb_virt_state(tifn);
+ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+ amd_set_core_ssb_state(tifn);
+ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ msr |= ssbd_tif_to_spec_ctrl(tifn);
+ updmsr = true;
+ }
+ }
+
+ /*
+ * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
+ * otherwise avoid the MSR write.
+ */
+ if (IS_ENABLED(CONFIG_SMP) &&
+ static_branch_unlikely(&switch_to_cond_stibp)) {
+ updmsr |= !!(tif_diff & _TIF_SPEC_IB);
+ msr |= stibp_tif_to_spec_ctrl(tifn);
+ }

- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ if (updmsr)
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
}

-static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
{
- if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
- amd_set_ssb_virt_state(tifn);
- else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
- amd_set_core_ssb_state(tifn);
- else
- intel_set_ssb_state(tifn);
+ if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
+ if (task_spec_ssb_disable(tsk))
+ set_tsk_thread_flag(tsk, TIF_SSBD);
+ else
+ clear_tsk_thread_flag(tsk, TIF_SSBD);
+
+ if (task_spec_ib_disable(tsk))
+ set_tsk_thread_flag(tsk, TIF_SPEC_IB);
+ else
+ clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
+ }
+ /* Return the updated threadinfo flags*/
+ return task_thread_info(tsk)->flags;
}

-void speculative_store_bypass_update(unsigned long tif)
+void speculation_ctrl_update(unsigned long tif)
{
+ /* Forced update. Make sure all relevant TIF flags are different */
preempt_disable();
- __speculative_store_bypass_update(tif);
+ __speculation_ctrl_update(~tif, tif);
preempt_enable();
}

-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss)
+/* Called from seccomp/prctl update */
+void speculation_ctrl_update_current(void)
+{
+ preempt_disable();
+ speculation_ctrl_update(speculation_ctrl_update_tif(current));
+ preempt_enable();
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev, *next;
unsigned long tifp, tifn;
@@ -430,7 +486,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,

tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
- switch_to_bitmap(tss, prev, next, tifp, tifn);
+ switch_to_bitmap(prev, next, tifp, tifn);

propagate_user_return_notify(prev_p, next_p);

@@ -451,8 +507,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));

- if ((tifp ^ tifn) & _TIF_SSBD)
- __speculative_store_bypass_update(tifn);
+ if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
+ __speculation_ctrl_update(tifp, tifn);
+ } else {
+ speculation_ctrl_update_tif(prev_p);
+ tifn = speculation_ctrl_update_tif(next_p);
+
+ /* Enforce MSR update to ensure consistent state */
+ __speculation_ctrl_update(~tifn, tifn);
+ }
}

/*
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
new file mode 100644
index 000000000000..898e97cf6629
--- /dev/null
+++ b/arch/x86/kernel/process.h
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Code shared between 32 and 64 bit
+
+#include <asm/spec-ctrl.h>
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
+
+/*
+ * This needs to be inline to optimize for the common case where no extra
+ * work needs to be done.
+ */
+static inline void switch_to_extra(struct task_struct *prev,
+ struct task_struct *next)
+{
+ unsigned long next_tif = task_thread_info(next)->flags;
+ unsigned long prev_tif = task_thread_info(prev)->flags;
+
+ if (IS_ENABLED(CONFIG_SMP)) {
+ /*
+ * Avoid __switch_to_xtra() invocation when conditional
+ * STIPB is disabled and the only different bit is
+ * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
+ * in the TIF_WORK_CTXSW masks.
+ */
+ if (!static_branch_likely(&switch_to_cond_stibp)) {
+ prev_tif &= ~_TIF_SPEC_IB;
+ next_tif &= ~_TIF_SPEC_IB;
+ }
+ }
+
+ /*
+ * __switch_to_xtra() handles debug registers, i/o bitmaps,
+ * speculation mitigations etc.
+ */
+ if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
+ prev_tif & _TIF_WORK_CTXSW_PREV))
+ __switch_to_xtra(prev, next);
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5046a3c9dec2..d3e593eb189f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,6 +59,8 @@
#include <asm/intel_rdt_sched.h>
#include <asm/proto.h>

+#include "process.h"
+
void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);

/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

@@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
set_iopl_mask(next->iopl);

- /*
- * Now maybe handle debug registers and/or IO bitmaps
- */
- if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
- task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
- __switch_to_xtra(prev_p, next_p, tss);
+ switch_to_extra(prev_p, next_p);

/*
* Leave lazy mode, flushing any hypercalls made here.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ea5ea850348d..a0854f283efd 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -59,6 +59,8 @@
#include <asm/unistd_32_ia32.h>
#endif

+#include "process.h"
+
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);

/* Prints also some state that isn't saved in the pt_regs */
@@ -422,7 +424,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);

WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
@@ -489,12 +490,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Reload sp0. */
update_task_stack(next_p);

- /*
- * Now maybe reload the debug registers and handle I/O bitmaps
- */
- if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
- task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
- __switch_to_xtra(prev_p, next_p, tss);
+ switch_to_extra(prev_p, next_p);

#ifdef CONFIG_XEN_PV
/*
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fbb0e6df121b..2216d21e955d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -571,6 +571,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);

+ if (unlikely(!map)) {
+ count = -EOPNOTSUPP;
+ goto out;
+ }
+
if (min > map->max_apic_id)
goto out;
/* Bits above cluster_size are masked in the caller. */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 51b953ad9d4e..1b82bc7c3cca 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5013,9 +5013,9 @@ static bool need_remote_flush(u64 old, u64 new)
}

static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
- const u8 *new, int *bytes)
+ int *bytes)
{
- u64 gentry;
+ u64 gentry = 0;
int r;

/*
@@ -5027,22 +5027,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
*gpa &= ~(gpa_t)7;
*bytes = 8;
- r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
- if (r)
- gentry = 0;
- new = (const u8 *)&gentry;
}

- switch (*bytes) {
- case 4:
- gentry = *(const u32 *)new;
- break;
- case 8:
- gentry = *(const u64 *)new;
- break;
- default:
- gentry = 0;
- break;
+ if (*bytes == 4 || *bytes == 8) {
+ r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
+ if (r)
+ gentry = 0;
}

return gentry;
@@ -5146,8 +5136,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,

pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);

- gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
-
/*
* No need to care whether allocation memory is successful
* or not since pte prefetch is skiped if it does not have
@@ -5156,6 +5144,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mmu_topup_memory_caches(vcpu);

spin_lock(&vcpu->kvm->mmu_lock);
+
+ gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
+
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 61ccfb13899e..4397fa0c448f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1444,7 +1444,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
return vcpu->arch.tsc_offset;
}

-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
@@ -1462,6 +1462,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;

mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+ return svm->vmcb->control.tsc_offset;
}

static void avic_init_vmcb(struct vcpu_svm *svm)
@@ -2187,21 +2188,31 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
return ERR_PTR(err);
}

+static void svm_clear_current_vmcb(struct vmcb *vmcb)
+{
+ int i;
+
+ for_each_online_cpu(i)
+ cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+}
+
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);

+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So, ensure that no logical CPU has this
+ * vmcb page recorded as its current vmcb.
+ */
+ svm_clear_current_vmcb(svm->vmcb);
+
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
- /*
- * The vmcb page can be recycled, causing a false negative in
- * svm_vcpu_load(). So do a full IBPB now.
- */
- indirect_branch_prediction_barrier();
}

static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -7145,7 +7156,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.has_wbinvd_exit = svm_has_wbinvd_exit,

.read_l1_tsc_offset = svm_read_l1_tsc_offset,
- .write_tsc_offset = svm_write_tsc_offset,
+ .write_l1_tsc_offset = svm_write_l1_tsc_offset,

.set_tdp_cr3 = set_tdp_cr3,

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9f3def7baa6d..e55f7a90d4b2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -170,6 +170,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
+module_param(ple_gap, uint, 0444);

static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, uint, 0444);
@@ -3433,11 +3434,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
return vcpu->arch.tsc_offset;
}

-/*
- * writes 'offset' into guest's timestamp counter offset register
- */
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
+ u64 active_offset = offset;
if (is_guest_mode(vcpu)) {
/*
* We're here if L1 chose not to trap WRMSR to TSC. According
@@ -3445,17 +3444,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
* set for L2 remains unchanged, and still needs to be added
* to the newly set TSC to get L2's TSC.
*/
- struct vmcs12 *vmcs12;
- /* recalculate vmcs02.TSC_OFFSET: */
- vmcs12 = get_vmcs12(vcpu);
- vmcs_write64(TSC_OFFSET, offset +
- (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
- vmcs12->tsc_offset : 0));
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
+ active_offset += vmcs12->tsc_offset;
} else {
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
vmcs_read64(TSC_OFFSET), offset);
- vmcs_write64(TSC_OFFSET, offset);
}
+
+ vmcs_write64(TSC_OFFSET, active_offset);
+ return active_offset;
}

/*
@@ -14203,7 +14201,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,

.read_l1_tsc_offset = vmx_read_l1_tsc_offset,
- .write_tsc_offset = vmx_write_tsc_offset,
+ .write_l1_tsc_offset = vmx_write_l1_tsc_offset,

.set_tdp_cr3 = vmx_set_cr3,

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ca717737347e..68b53f05a420 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1582,8 +1582,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);

static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
- kvm_x86_ops->write_tsc_offset(vcpu, offset);
- vcpu->arch.tsc_offset = offset;
+ vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
}

static inline bool kvm_check_tsc_unstable(void)
@@ -1711,7 +1710,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
- kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
+ u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+ kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
}

static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -6788,6 +6788,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
clock_pairing.nsec = ts.tv_nsec;
clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
clock_pairing.flags = 0;
+ memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));

ret = 0;
if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
@@ -7313,7 +7314,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
else {
if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ if (ioapic_in_kernel(vcpu->kvm))
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}

if (is_guest_mode(vcpu))
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e96b99eb800c..a6d1b0241aea 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -29,6 +29,12 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/

+/*
+ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
+ * stored in cpu_tlb_state.last_user_mm_ibpb.
+ */
+#define LAST_USER_MM_IBPB 0x1UL
+
/*
* We get here when we do something requiring a TLB invalidation
* but could not go invalidate all of the contexts. We do the
@@ -180,6 +186,89 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
}
}

+static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
+{
+ unsigned long next_tif = task_thread_info(next)->flags;
+ unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
+
+ return (unsigned long)next->mm | ibpb;
+}
+
+static void cond_ibpb(struct task_struct *next)
+{
+ if (!next || !next->mm)
+ return;
+
+ /*
+ * Both, the conditional and the always IBPB mode use the mm
+ * pointer to avoid the IBPB when switching between tasks of the
+ * same process. Using the mm pointer instead of mm->context.ctx_id
+ * opens a hypothetical hole vs. mm_struct reuse, which is more or
+ * less impossible to control by an attacker. Aside of that it
+ * would only affect the first schedule so the theoretically
+ * exposed data is not really interesting.
+ */
+ if (static_branch_likely(&switch_mm_cond_ibpb)) {
+ unsigned long prev_mm, next_mm;
+
+ /*
+ * This is a bit more complex than the always mode because
+ * it has to handle two cases:
+ *
+ * 1) Switch from a user space task (potential attacker)
+ * which has TIF_SPEC_IB set to a user space task
+ * (potential victim) which has TIF_SPEC_IB not set.
+ *
+ * 2) Switch from a user space task (potential attacker)
+ * which has TIF_SPEC_IB not set to a user space task
+ * (potential victim) which has TIF_SPEC_IB set.
+ *
+ * This could be done by unconditionally issuing IBPB when
+ * a task which has TIF_SPEC_IB set is either scheduled in
+ * or out. Though that results in two flushes when:
+ *
+ * - the same user space task is scheduled out and later
+ * scheduled in again and only a kernel thread ran in
+ * between.
+ *
+ * - a user space task belonging to the same process is
+ * scheduled in after a kernel thread ran in between
+ *
+ * - a user space task belonging to the same process is
+ * scheduled in immediately.
+ *
+ * Optimize this with reasonably small overhead for the
+ * above cases. Mangle the TIF_SPEC_IB bit into the mm
+ * pointer of the incoming task which is stored in
+ * cpu_tlbstate.last_user_mm_ibpb for comparison.
+ */
+ next_mm = mm_mangle_tif_spec_ib(next);
+ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
+
+ /*
+ * Issue IBPB only if the mm's are different and one or
+ * both have the IBPB bit set.
+ */
+ if (next_mm != prev_mm &&
+ (next_mm | prev_mm) & LAST_USER_MM_IBPB)
+ indirect_branch_prediction_barrier();
+
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
+ }
+
+ if (static_branch_unlikely(&switch_mm_always_ibpb)) {
+ /*
+ * Only flush when switching to a user space task with a
+ * different context than the user space task which ran
+ * last on this CPU.
+ */
+ if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
+ indirect_branch_prediction_barrier();
+ this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
+ }
+ }
+}
+
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -254,27 +343,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
} else {
u16 new_asid;
bool need_flush;
- u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);

/*
* Avoid user/user BTB poisoning by flushing the branch
* predictor when switching between processes. This stops
* one process from doing Spectre-v2 attacks on another.
- *
- * As an optimization, flush indirect branches only when
- * switching into processes that disable dumping. This
- * protects high value processes like gpg, without having
- * too high performance overhead. IBPB is *expensive*!
- *
- * This will not flush branches when switching into kernel
- * threads. It will also not flush if we switch to idle
- * thread and back to the same process. It will flush if we
- * switch to a different non-dumpable process.
*/
- if (tsk && tsk->mm &&
- tsk->mm->context.ctx_id != last_ctx_id &&
- get_dumpable(tsk->mm) != SUID_DUMP_USER)
- indirect_branch_prediction_barrier();
+ cond_ibpb(tsk);

if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
@@ -331,14 +406,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}

- /*
- * Record last user mm's context id, so we can avoid
- * flushing branch buffer with IBPB if we switch back
- * to the same user.
- */
- if (next != &init_mm)
- this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
-
/* Make sure we write CR3 before loaded_mm. */
barrier();

@@ -419,7 +486,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));

/* Reinitialize tlbstate. */
- this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 67904f55f188..120dd746a147 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -94,14 +94,14 @@ int main(void)
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XTENSA_HAVE_COPROCESSORS
- DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
+ DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
+ DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
+ DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
+ DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
+ DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
+ DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
+ DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
+ DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
#endif
DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 483dcfb6e681..4bb68133a72a 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -94,18 +94,21 @@ void coprocessor_release_all(struct thread_info *ti)

void coprocessor_flush_all(struct thread_info *ti)
{
- unsigned long cpenable;
+ unsigned long cpenable, old_cpenable;
int i;

preempt_disable();

+ RSR_CPENABLE(old_cpenable);
cpenable = ti->cpenable;
+ WSR_CPENABLE(cpenable);

for (i = 0; i < XCHAL_CP_MAX; i++) {
if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
cpenable >>= 1;
}
+ WSR_CPENABLE(old_cpenable);

preempt_enable();
}
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index c0845cb1cbb9..d9541be0605a 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
}


+#if XTENSA_HAVE_COPROCESSORS
+#define CP_OFFSETS(cp) \
+ { \
+ .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
+ .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
+ .sz = sizeof(xtregs_ ## cp ## _t), \
+ }
+
+static const struct {
+ size_t elf_xtregs_offset;
+ size_t ti_offset;
+ size_t sz;
+} cp_offsets[] = {
+ CP_OFFSETS(cp0),
+ CP_OFFSETS(cp1),
+ CP_OFFSETS(cp2),
+ CP_OFFSETS(cp3),
+ CP_OFFSETS(cp4),
+ CP_OFFSETS(cp5),
+ CP_OFFSETS(cp6),
+ CP_OFFSETS(cp7),
+};
+#endif
+
static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
{
struct pt_regs *regs = task_pt_regs(child);
struct thread_info *ti = task_thread_info(child);
elf_xtregs_t __user *xtregs = uregs;
int ret = 0;
+ int i __maybe_unused;

if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
return -EIO;
@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
#if XTENSA_HAVE_COPROCESSORS
/* Flush all coprocessor registers to memory. */
coprocessor_flush_all(ti);
- ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
- sizeof(xtregs_coprocessor_t));
+
+ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
+ ret |= __copy_to_user((char __user *)xtregs +
+ cp_offsets[i].elf_xtregs_offset,
+ (const char *)ti +
+ cp_offsets[i].ti_offset,
+ cp_offsets[i].sz);
#endif
ret |= __copy_to_user(&xtregs->opt, &regs->xtregs_opt,
sizeof(xtregs->opt));
@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
struct pt_regs *regs = task_pt_regs(child);
elf_xtregs_t *xtregs = uregs;
int ret = 0;
+ int i __maybe_unused;

if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
return -EFAULT;
@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
coprocessor_flush_all(ti);
coprocessor_release_all(ti);

- ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
- sizeof(xtregs_coprocessor_t));
+ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
+ ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
+ (const char __user *)xtregs +
+ cp_offsets[i].elf_xtregs_offset,
+ cp_offsets[i].sz);
#endif
ret |= __copy_from_user(&regs->xtregs_opt, &xtregs->opt,
sizeof(xtregs->opt));
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d58763b6b009..ce0e4d317d24 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2971,7 +2971,6 @@ static void binder_transaction(struct binder_proc *proc,
t->buffer = NULL;
goto err_binder_alloc_buf_failed;
}
- t->buffer->allow_user_free = 0;
t->buffer->debug_id = t->debug_id;
t->buffer->transaction = t;
t->buffer->target_node = target_node;
@@ -3465,14 +3464,18 @@ static int binder_thread_write(struct binder_proc *proc,

buffer = binder_alloc_prepare_to_free(&proc->alloc,
data_ptr);
- if (buffer == NULL) {
- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
- proc->pid, thread->pid, (u64)data_ptr);
- break;
- }
- if (!buffer->allow_user_free) {
- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
- proc->pid, thread->pid, (u64)data_ptr);
+ if (IS_ERR_OR_NULL(buffer)) {
+ if (PTR_ERR(buffer) == -EPERM) {
+ binder_user_error(
+ "%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
+ proc->pid, thread->pid,
+ (u64)data_ptr);
+ } else {
+ binder_user_error(
+ "%d:%d BC_FREE_BUFFER u%016llx no match\n",
+ proc->pid, thread->pid,
+ (u64)data_ptr);
+ }
break;
}
binder_debug(BINDER_DEBUG_FREE_BUFFER,
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 64fd96eada31..030c98f35cca 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -151,16 +151,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
else {
/*
* Guard against user threads attempting to
- * free the buffer twice
+ * free the buffer when in use by kernel or
+ * after it's already been freed.
*/
- if (buffer->free_in_progress) {
- binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
- "%d:%d FREE_BUFFER u%016llx user freed buffer twice\n",
- alloc->pid, current->pid,
- (u64)user_ptr);
- return NULL;
- }
- buffer->free_in_progress = 1;
+ if (!buffer->allow_user_free)
+ return ERR_PTR(-EPERM);
+ buffer->allow_user_free = 0;
return buffer;
}
}
@@ -500,7 +496,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(

rb_erase(best_fit, &alloc->free_buffers);
buffer->free = 0;
- buffer->free_in_progress = 0;
+ buffer->allow_user_free = 0;
binder_insert_allocated_buffer_locked(alloc, buffer);
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
"%d: binder_alloc_buf size %zd got %pK\n",
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 9ef64e563856..fb3238c74c8a 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -50,8 +50,7 @@ struct binder_buffer {
unsigned free:1;
unsigned allow_user_free:1;
unsigned async_transaction:1;
- unsigned free_in_progress:1;
- unsigned debug_id:28;
+ unsigned debug_id:29;

struct binder_transaction *transaction;

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 75f38d19fcbe..dbc51154f122 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan)
atchan->descs_allocated = 0;
atchan->status = 0;

+ /*
+ * Free atslave allocated in at_dma_xlate()
+ */
+ kfree(chan->private);
+ chan->private = NULL;
+
dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
}

@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);

- atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
+ atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
if (!atslave)
return NULL;

@@ -2000,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev)
struct resource *io;

at_dma_off(atdma);
+ if (pdev->dev.of_node)
+ of_dma_controller_free(pdev->dev.of_node);
dma_async_device_unregister(&atdma->dma_common);

dma_pool_destroy(atdma->memset_pool);
diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index e8a114157f87..bb012bc032e0 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -358,7 +358,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
sensor_inst->hsdev,
sensor_inst->hsdev->usage,
usage, report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC, false);
} else if (!strncmp(name, "units", strlen("units")))
value = sensor_inst->fields[field_index].attribute.units;
else if (!strncmp(name, "unit-expo", strlen("unit-expo")))
diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index 2b63487057c2..4256fdc5cd6d 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(sensor_hub_get_feature);
int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
u32 usage_id,
u32 attr_usage_id, u32 report_id,
- enum sensor_hub_read_flags flag)
+ enum sensor_hub_read_flags flag,
+ bool is_signed)
{
struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev);
unsigned long flags;
@@ -331,10 +332,16 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
&hsdev->pending.ready, HZ*5);
switch (hsdev->pending.raw_size) {
case 1:
- ret_val = *(u8 *)hsdev->pending.raw_data;
+ if (is_signed)
+ ret_val = *(s8 *)hsdev->pending.raw_data;
+ else
+ ret_val = *(u8 *)hsdev->pending.raw_data;
break;
case 2:
- ret_val = *(u16 *)hsdev->pending.raw_data;
+ if (is_signed)
+ ret_val = *(s16 *)hsdev->pending.raw_data;
+ else
+ ret_val = *(u16 *)hsdev->pending.raw_data;
break;
case 4:
ret_val = *(u32 *)hsdev->pending.raw_data;
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 741857d80da1..2f164bd74687 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -482,6 +482,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
}
wait_for_completion(&msginfo->waitevent);

+ if (msginfo->response.gpadl_created.creation_status != 0) {
+ pr_err("Failed to establish GPADL: err = 0x%x\n",
+ msginfo->response.gpadl_created.creation_status);
+
+ ret = -EDQUOT;
+ goto cleanup;
+ }
+
if (channel->rescind) {
ret = -ENODEV;
goto cleanup;
diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index 41d97faf5013..38ff374a3ca4 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -149,6 +149,7 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev,
int report_id = -1;
u32 address;
int ret_type;
+ s32 min;
struct hid_sensor_hub_device *hsdev =
accel_state->common_attributes.hsdev;

@@ -158,12 +159,14 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_RAW:
hid_sensor_power_state(&accel_state->common_attributes, true);
report_id = accel_state->accel[chan->scan_index].report_id;
+ min = accel_state->accel[chan->scan_index].logical_minimum;
address = accel_3d_addresses[chan->scan_index];
if (report_id >= 0)
*val = sensor_hub_input_attr_get_raw_value(
accel_state->common_attributes.hsdev,
hsdev->usage, address, report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ min < 0);
else {
*val = 0;
hid_sensor_power_state(&accel_state->common_attributes,
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 36941e69f959..88e857c4baf4 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -111,6 +111,7 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev,
int report_id = -1;
u32 address;
int ret_type;
+ s32 min;

*val = 0;
*val2 = 0;
@@ -118,13 +119,15 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_RAW:
hid_sensor_power_state(&gyro_state->common_attributes, true);
report_id = gyro_state->gyro[chan->scan_index].report_id;
+ min = gyro_state->gyro[chan->scan_index].logical_minimum;
address = gyro_3d_addresses[chan->scan_index];
if (report_id >= 0)
*val = sensor_hub_input_attr_get_raw_value(
gyro_state->common_attributes.hsdev,
HID_USAGE_SENSOR_GYRO_3D, address,
report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ min < 0);
else {
*val = 0;
hid_sensor_power_state(&gyro_state->common_attributes,
diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c
index beab6d6fd6e1..4bc95f31c730 100644
--- a/drivers/iio/humidity/hid-sensor-humidity.c
+++ b/drivers/iio/humidity/hid-sensor-humidity.c
@@ -75,7 +75,8 @@ static int humidity_read_raw(struct iio_dev *indio_dev,
HID_USAGE_SENSOR_HUMIDITY,
HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY,
humid_st->humidity_attr.report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ humid_st->humidity_attr.logical_minimum < 0);
hid_sensor_power_state(&humid_st->common_attributes, false);

return IIO_VAL_INT;
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 406caaee9a3c..94f33250ba5a 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -93,6 +93,7 @@ static int als_read_raw(struct iio_dev *indio_dev,
int report_id = -1;
u32 address;
int ret_type;
+ s32 min;

*val = 0;
*val2 = 0;
@@ -102,8 +103,8 @@ static int als_read_raw(struct iio_dev *indio_dev,
case CHANNEL_SCAN_INDEX_INTENSITY:
case CHANNEL_SCAN_INDEX_ILLUM:
report_id = als_state->als_illum.report_id;
- address =
- HID_USAGE_SENSOR_LIGHT_ILLUM;
+ min = als_state->als_illum.logical_minimum;
+ address = HID_USAGE_SENSOR_LIGHT_ILLUM;
break;
default:
report_id = -1;
@@ -116,7 +117,8 @@ static int als_read_raw(struct iio_dev *indio_dev,
als_state->common_attributes.hsdev,
HID_USAGE_SENSOR_ALS, address,
report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ min < 0);
hid_sensor_power_state(&als_state->common_attributes,
false);
} else {
diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c
index 45107f7537b5..cf5a0c242609 100644
--- a/drivers/iio/light/hid-sensor-prox.c
+++ b/drivers/iio/light/hid-sensor-prox.c
@@ -73,6 +73,7 @@ static int prox_read_raw(struct iio_dev *indio_dev,
int report_id = -1;
u32 address;
int ret_type;
+ s32 min;

*val = 0;
*val2 = 0;
@@ -81,8 +82,8 @@ static int prox_read_raw(struct iio_dev *indio_dev,
switch (chan->scan_index) {
case CHANNEL_SCAN_INDEX_PRESENCE:
report_id = prox_state->prox_attr.report_id;
- address =
- HID_USAGE_SENSOR_HUMAN_PRESENCE;
+ min = prox_state->prox_attr.logical_minimum;
+ address = HID_USAGE_SENSOR_HUMAN_PRESENCE;
break;
default:
report_id = -1;
@@ -95,7 +96,8 @@ static int prox_read_raw(struct iio_dev *indio_dev,
prox_state->common_attributes.hsdev,
HID_USAGE_SENSOR_PROX, address,
report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ min < 0);
hid_sensor_power_state(&prox_state->common_attributes,
false);
} else {
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index d55c4885211a..f3c0d41e5a8c 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -163,21 +163,23 @@ static int magn_3d_read_raw(struct iio_dev *indio_dev,
int report_id = -1;
u32 address;
int ret_type;
+ s32 min;

*val = 0;
*val2 = 0;
switch (mask) {
case IIO_CHAN_INFO_RAW:
hid_sensor_power_state(&magn_state->magn_flux_attributes, true);
- report_id =
- magn_state->magn[chan->address].report_id;
+ report_id = magn_state->magn[chan->address].report_id;
+ min = magn_state->magn[chan->address].logical_minimum;
address = magn_3d_addresses[chan->address];
if (report_id >= 0)
*val = sensor_hub_input_attr_get_raw_value(
magn_state->magn_flux_attributes.hsdev,
HID_USAGE_SENSOR_COMPASS_3D, address,
report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ min < 0);
else {
*val = 0;
hid_sensor_power_state(
diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
index 0a9e8fadfa9d..37ab30566464 100644
--- a/drivers/iio/magnetometer/st_magn_buffer.c
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
return st_sensors_set_dataready_irq(indio_dev, state);
}

-static int st_magn_buffer_preenable(struct iio_dev *indio_dev)
-{
- return st_sensors_set_enable(indio_dev, true);
-}
-
static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
{
int err;
@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
if (err < 0)
goto st_magn_buffer_postenable_error;

- return err;
+ return st_sensors_set_enable(indio_dev, true);

st_magn_buffer_postenable_error:
kfree(mdata->buffer_data);
@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
int err;
struct st_sensor_data *mdata = iio_priv(indio_dev);

- err = iio_triggered_buffer_predisable(indio_dev);
+ err = st_sensors_set_enable(indio_dev, false);
if (err < 0)
goto st_magn_buffer_predisable_error;

- err = st_sensors_set_enable(indio_dev, false);
+ err = iio_triggered_buffer_predisable(indio_dev);

st_magn_buffer_predisable_error:
kfree(mdata->buffer_data);
@@ -75,7 +70,6 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
}

static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
- .preenable = &st_magn_buffer_preenable,
.postenable = &st_magn_buffer_postenable,
.predisable = &st_magn_buffer_predisable,
};
diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c
index 1e5451d1ff88..bdc5e4554ee4 100644
--- a/drivers/iio/orientation/hid-sensor-incl-3d.c
+++ b/drivers/iio/orientation/hid-sensor-incl-3d.c
@@ -111,21 +111,23 @@ static int incl_3d_read_raw(struct iio_dev *indio_dev,
int report_id = -1;
u32 address;
int ret_type;
+ s32 min;

*val = 0;
*val2 = 0;
switch (mask) {
case IIO_CHAN_INFO_RAW:
hid_sensor_power_state(&incl_state->common_attributes, true);
- report_id =
- incl_state->incl[chan->scan_index].report_id;
+ report_id = incl_state->incl[chan->scan_index].report_id;
+ min = incl_state->incl[chan->scan_index].logical_minimum;
address = incl_3d_addresses[chan->scan_index];
if (report_id >= 0)
*val = sensor_hub_input_attr_get_raw_value(
incl_state->common_attributes.hsdev,
HID_USAGE_SENSOR_INCLINOMETER_3D, address,
report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ min < 0);
else {
hid_sensor_power_state(&incl_state->common_attributes,
false);
diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c
index 4c437918f1d2..d7b1c00ceb4d 100644
--- a/drivers/iio/pressure/hid-sensor-press.c
+++ b/drivers/iio/pressure/hid-sensor-press.c
@@ -77,6 +77,7 @@ static int press_read_raw(struct iio_dev *indio_dev,
int report_id = -1;
u32 address;
int ret_type;
+ s32 min;

*val = 0;
*val2 = 0;
@@ -85,8 +86,8 @@ static int press_read_raw(struct iio_dev *indio_dev,
switch (chan->scan_index) {
case CHANNEL_SCAN_INDEX_PRESSURE:
report_id = press_state->press_attr.report_id;
- address =
- HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE;
+ min = press_state->press_attr.logical_minimum;
+ address = HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE;
break;
default:
report_id = -1;
@@ -99,7 +100,8 @@ static int press_read_raw(struct iio_dev *indio_dev,
press_state->common_attributes.hsdev,
HID_USAGE_SENSOR_PRESSURE, address,
report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ min < 0);
hid_sensor_power_state(&press_state->common_attributes,
false);
} else {
diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c
index beaf6fd3e337..b592fc4f007e 100644
--- a/drivers/iio/temperature/hid-sensor-temperature.c
+++ b/drivers/iio/temperature/hid-sensor-temperature.c
@@ -76,7 +76,8 @@ static int temperature_read_raw(struct iio_dev *indio_dev,
HID_USAGE_SENSOR_TEMPERATURE,
HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE,
temp_st->temperature_attr.report_id,
- SENSOR_HUB_SYNC);
+ SENSOR_HUB_SYNC,
+ temp_st->temperature_attr.logical_minimum < 0);
hid_sensor_power_state(
&temp_st->common_attributes,
false);
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index c824329f7012..0e4193cb08cf 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -416,7 +416,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev,
if (err)
goto error_window;
err = scif_map_page(&window->num_pages_lookup.lookup[j],
- vmalloc_dma_phys ?
+ vmalloc_num_pages ?
vmalloc_to_page(&window->num_pages[i]) :
virt_to_page(&window->num_pages[i]),
remote_dev);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 768f584f8392..88f8a8fa93cd 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1784,6 +1784,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
bool if_up = netif_running(nic->netdev);
struct bpf_prog *old_prog;
bool bpf_attached = false;
+ int ret = 0;

/* For now just support only the usual MTU sized frames */
if (prog && (dev->mtu > 1500)) {
@@ -1817,8 +1818,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
if (nic->xdp_prog) {
/* Attach BPF program */
nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
- if (!IS_ERR(nic->xdp_prog))
+ if (!IS_ERR(nic->xdp_prog)) {
bpf_attached = true;
+ } else {
+ ret = PTR_ERR(nic->xdp_prog);
+ nic->xdp_prog = NULL;
+ }
}

/* Calculate Tx queues needed for XDP and network stack */
@@ -1830,7 +1835,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
netif_trans_update(nic->netdev);
}

- return 0;
+ return ret;
}

static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 187a249ff2d1..fcaf18fa3904 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
if (!sq->dmem.base)
return;

- if (sq->tso_hdrs)
+ if (sq->tso_hdrs) {
dma_free_coherent(&nic->pdev->dev,
sq->dmem.q_len * TSO_HEADER_SIZE,
sq->tso_hdrs, sq->tso_hdrs_phys);
+ sq->tso_hdrs = NULL;
+ }

/* Free pending skbs in the queue */
smp_rmb();
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 1c9ad3630c77..dfd1ad0b1cb9 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -661,7 +661,7 @@ static void gmac_clean_txq(struct net_device *netdev, struct gmac_txq *txq,

u64_stats_update_begin(&port->tx_stats_syncp);
port->tx_frag_stats[nfrags]++;
- u64_stats_update_end(&port->ir_stats_syncp);
+ u64_stats_update_end(&port->tx_stats_syncp);
}
}

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 001b5f714c1b..aaedf1072460 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1675,7 +1675,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
netif_wake_queue(adapter->netdev);
}

- if (!napi_complete_done(napi, weight))
+ if (!napi_complete(napi))
goto done;

/* enable isr */
@@ -1684,7 +1684,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
lan743x_csr_read(adapter, INT_STS);

done:
- return weight;
+ return 0;
}

static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx)
@@ -1873,9 +1873,9 @@ static int lan743x_tx_open(struct lan743x_tx *tx)
tx->vector_flags = lan743x_intr_get_vector_flags(adapter,
INT_BIT_DMA_TX_
(tx->channel_number));
- netif_napi_add(adapter->netdev,
- &tx->napi, lan743x_tx_napi_poll,
- tx->ring_size - 1);
+ netif_tx_napi_add(adapter->netdev,
+ &tx->napi, lan743x_tx_napi_poll,
+ tx->ring_size - 1);
napi_enable(&tx->napi);

data = 0;
@@ -3020,6 +3020,7 @@ static const struct dev_pm_ops lan743x_pm_ops = {

static const struct pci_device_id lan743x_pcidev_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
+ { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7431) },
{ 0, }
};

diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 0e82b6368798..2d6eea18973e 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -548,6 +548,7 @@ struct lan743x_adapter;
/* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */
#define PCI_VENDOR_ID_SMSC PCI_VENDOR_ID_EFAR
#define PCI_DEVICE_ID_SMSC_LAN7430 (0x7430)
+#define PCI_DEVICE_ID_SMSC_LAN7431 (0x7431)

#define PCI_CONFIG_LENGTH (0x1000)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 19ab8a7d1e48..733e35b7c4bb 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1930,6 +1930,14 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
new_driver->mdiodrv.driver.remove = phy_remove;
new_driver->mdiodrv.driver.owner = owner;

+ /* The following works around an issue where the PHY driver doesn't bind
+ * to the device, resulting in the genphy driver being used instead of
+ * the dedicated driver. The root cause of the issue isn't known yet
+ * and seems to be in the base driver core. Once this is fixed we may
+ * remove this workaround.
+ */
+ new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
+
retval = driver_register(&new_driver->mdiodrv.driver);
if (retval) {
pr_err("%s: Error %d in registering driver\n",
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index e9f101c9bae2..bfbb39f93554 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
* it just report sending a packet to the target
* (without actual packet transfer).
*/
- dev_kfree_skb_any(skb);
ndev->stats.tx_packets++;
ndev->stats.tx_bytes += skb->len;
+ dev_kfree_skb_any(skb);
}
}

diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 7275761a1177..3d8a70d3ea9b 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -140,7 +140,6 @@ struct ipheth_device {
struct usb_device *udev;
struct usb_interface *intf;
struct net_device *net;
- struct sk_buff *tx_skb;
struct urb *tx_urb;
struct urb *rx_urb;
unsigned char *tx_buf;
@@ -230,6 +229,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
case -ENOENT:
case -ECONNRESET:
case -ESHUTDOWN:
+ case -EPROTO:
return;
case 0:
break;
@@ -281,7 +281,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
dev_err(&dev->intf->dev, "%s: urb status: %d\n",
__func__, status);

- dev_kfree_skb_irq(dev->tx_skb);
if (status == 0)
netif_wake_queue(dev->net);
else
@@ -423,7 +422,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
if (skb->len > IPHETH_BUF_SIZE) {
WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
dev->net->stats.tx_dropped++;
- dev_kfree_skb_irq(skb);
+ dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}

@@ -443,12 +442,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
__func__, retval);
dev->net->stats.tx_errors++;
- dev_kfree_skb_irq(skb);
+ dev_kfree_skb_any(skb);
} else {
- dev->tx_skb = skb;
-
dev->net->stats.tx_packets++;
dev->net->stats.tx_bytes += skb->len;
+ dev_consume_skb_any(skb);
netif_stop_queue(net);
}

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ddfa3f24204c..c2ca6cd3fbe0 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -70,7 +70,8 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_ECN,
- VIRTIO_NET_F_GUEST_UFO
+ VIRTIO_NET_F_GUEST_UFO,
+ VIRTIO_NET_F_GUEST_CSUM
};

struct virtnet_stat_desc {
@@ -2285,9 +2286,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
if (!vi->guest_offloads)
return 0;

- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
- offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
-
return virtnet_set_guest_offloads(vi, offloads);
}

@@ -2297,8 +2295,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)

if (!vi->guest_offloads)
return 0;
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
- offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;

return virtnet_set_guest_offloads(vi, offloads);
}
@@ -2316,8 +2312,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
&& (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
- virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
return -EOPNOTSUPP;
}

diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c
index 3724d3ef7008..7aa9a82b7ebd 100644
--- a/drivers/pci/controller/dwc/pci-layerscape.c
+++ b/drivers/pci/controller/dwc/pci-layerscape.c
@@ -88,7 +88,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
int i;

for (i = 0; i < PCIE_IATU_NUM; i++)
- dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
+ dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND);
}

static int ls1021_pcie_link_up(struct dw_pcie *pci)
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 1e7b02221eac..de8635af4cde 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -440,7 +440,6 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
tbl_offset = dw_pcie_readl_dbi(pci, reg);
bir = (tbl_offset & PCI_MSIX_TABLE_BIR);
tbl_offset &= PCI_MSIX_TABLE_OFFSET;
- tbl_offset >>= 3;

reg = PCI_BASE_ADDRESS_0 + (4 * bir);
bar_addr_upper = 0;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 51b6c81671c1..afc4680c584f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5473,9 +5473,13 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
u32 lnkcap2, lnkcap;

/*
- * PCIe r4.0 sec 7.5.3.18 recommends using the Supported Link
- * Speeds Vector in Link Capabilities 2 when supported, falling
- * back to Max Link Speed in Link Capabilities otherwise.
+ * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18. The
+ * implementation note there recommends using the Supported Link
+ * Speeds Vector in Link Capabilities 2 when supported.
+ *
+ * Without Link Capabilities 2, i.e., prior to PCIe r3.0, software
+ * should use the Supported Link Speeds field in Link Capabilities,
+ * where only 2.5 GT/s and 5.0 GT/s speeds were defined.
*/
pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2);
if (lnkcap2) { /* PCIe r3.0-compliant */
@@ -5491,16 +5495,10 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
}

pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
- if (lnkcap) {
- if (lnkcap & PCI_EXP_LNKCAP_SLS_16_0GB)
- return PCIE_SPEED_16_0GT;
- else if (lnkcap & PCI_EXP_LNKCAP_SLS_8_0GB)
- return PCIE_SPEED_8_0GT;
- else if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB)
- return PCIE_SPEED_5_0GT;
- else if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB)
- return PCIE_SPEED_2_5GT;
- }
+ if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB)
+ return PCIE_SPEED_5_0GT;
+ else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB)
+ return PCIE_SPEED_2_5GT;

return PCI_SPEED_UNKNOWN;
}
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index 2751dba850c6..3e1abb455472 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -213,7 +213,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm)
/* get a report with all values through requesting one value */
sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev,
HID_USAGE_SENSOR_TIME, hid_time_addresses[0],
- time_state->info[0].report_id, SENSOR_HUB_SYNC);
+ time_state->info[0].report_id, SENSOR_HUB_SYNC, false);
/* wait for all values (event) */
ret = wait_for_completion_killable_timeout(
&time_state->comp_last_time, HZ*6);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index ffce6f39828a..b03515d43745 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4524,8 +4524,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
{
struct qeth_ipa_cmd *cmd;
struct qeth_arp_query_info *qinfo;
- struct qeth_snmp_cmd *snmp;
unsigned char *data;
+ void *snmp_data;
__u16 data_len;

QETH_CARD_TEXT(card, 3, "snpcmdcb");
@@ -4533,7 +4533,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
cmd = (struct qeth_ipa_cmd *) sdata;
data = (unsigned char *)((char *)cmd - reply->offset);
qinfo = (struct qeth_arp_query_info *) reply->param;
- snmp = &cmd->data.setadapterparms.data.snmp;

if (cmd->hdr.return_code) {
QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
@@ -4546,10 +4545,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
return 0;
}
data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
- if (cmd->data.setadapterparms.hdr.seq_no == 1)
- data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
- else
- data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
+ if (cmd->data.setadapterparms.hdr.seq_no == 1) {
+ snmp_data = &cmd->data.setadapterparms.data.snmp;
+ data_len -= offsetof(struct qeth_ipa_cmd,
+ data.setadapterparms.data.snmp);
+ } else {
+ snmp_data = &cmd->data.setadapterparms.data.snmp.request;
+ data_len -= offsetof(struct qeth_ipa_cmd,
+ data.setadapterparms.data.snmp.request);
+ }

/* check if there is enough room in userspace */
if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
@@ -4562,16 +4566,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
QETH_CARD_TEXT_(card, 4, "sseqn%i",
cmd->data.setadapterparms.hdr.seq_no);
/*copy entries to user buffer*/
- if (cmd->data.setadapterparms.hdr.seq_no == 1) {
- memcpy(qinfo->udata + qinfo->udata_offset,
- (char *)snmp,
- data_len + offsetof(struct qeth_snmp_cmd, data));
- qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
- } else {
- memcpy(qinfo->udata + qinfo->udata_offset,
- (char *)&snmp->request, data_len);
- }
+ memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
qinfo->udata_offset += data_len;
+
/* check if all replies received ... */
QETH_CARD_TEXT_(card, 4, "srtot%i",
cmd->data.setadapterparms.hdr.used_total);
diff --git a/drivers/staging/most/core.c b/drivers/staging/most/core.c
index f4c464625a67..52ad62722996 100644
--- a/drivers/staging/most/core.c
+++ b/drivers/staging/most/core.c
@@ -351,7 +351,7 @@ static ssize_t set_datatype_show(struct device *dev,

for (i = 0; i < ARRAY_SIZE(ch_data_type); i++) {
if (c->cfg.data_type & ch_data_type[i].most_ch_data_type)
- return snprintf(buf, PAGE_SIZE, ch_data_type[i].name);
+ return snprintf(buf, PAGE_SIZE, "%s", ch_data_type[i].name);
}
return snprintf(buf, PAGE_SIZE, "unconfigured\n");
}
diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c
index df6ebf41bdea..5831f816c17b 100644
--- a/drivers/staging/mt7621-dma/mtk-hsdma.c
+++ b/drivers/staging/mt7621-dma/mtk-hsdma.c
@@ -335,6 +335,8 @@ static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma,
/* tx desc */
src = sg->src_addr;
for (i = 0; i < chan->desc->num_sgs; i++) {
+ tx_desc = &chan->tx_ring[chan->tx_idx];
+
if (len > HSDMA_MAX_PLEN)
tlen = HSDMA_MAX_PLEN;
else
@@ -344,7 +346,6 @@ static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma,
tx_desc->addr1 = src;
tx_desc->flags |= HSDMA_DESC_PLEN1(tlen);
} else {
- tx_desc = &chan->tx_ring[chan->tx_idx];
tx_desc->addr0 = src;
tx_desc->flags = HSDMA_DESC_PLEN0(tlen);

diff --git a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
index b8566ed898f1..aa98fbb17013 100644
--- a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
+++ b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
@@ -82,7 +82,7 @@ static int rt2880_pinctrl_dt_node_to_map(struct pinctrl_dev *pctrldev,
struct property *prop;
const char *function_name, *group_name;
int ret;
- int ngroups;
+ int ngroups = 0;
unsigned int reserved_maps = 0;

for_each_node_with_property(np_config, "group")
diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c
index 85077947b9b8..85aba8a503cd 100644
--- a/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c
+++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c
@@ -109,12 +109,12 @@ static void update_recvframe_phyinfo(union recv_frame *precvframe,
rx_bssid = get_hdr_bssid(wlanhdr);
pkt_info.bssid_match = ((!IsFrameTypeCtrl(wlanhdr)) &&
!pattrib->icv_err && !pattrib->crc_err &&
- !ether_addr_equal(rx_bssid, my_bssid));
+ ether_addr_equal(rx_bssid, my_bssid));

rx_ra = get_ra(wlanhdr);
my_hwaddr = myid(&padapter->eeprompriv);
pkt_info.to_self = pkt_info.bssid_match &&
- !ether_addr_equal(rx_ra, my_hwaddr);
+ ether_addr_equal(rx_ra, my_hwaddr);


pkt_info.is_beacon = pkt_info.bssid_match &&
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index af2234798fa8..db553f2e4c0b 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -1277,7 +1277,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,

sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
sinfo->tx_packets = psta->sta_stats.tx_pkts;
-
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
}

/* for Ad-Hoc/AP mode */
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index bc05c69383b8..fe431302a030 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -1787,6 +1787,7 @@ vchiq_compat_ioctl_await_completion(struct file *file,
struct vchiq_await_completion32 args32;
struct vchiq_completion_data32 completion32;
unsigned int *msgbufcount32;
+ unsigned int msgbufcount_native;
compat_uptr_t msgbuf32;
void *msgbuf;
void **msgbufptr;
@@ -1898,7 +1899,11 @@ vchiq_compat_ioctl_await_completion(struct file *file,
sizeof(completion32)))
return -EFAULT;

- args32.msgbufcount--;
+ if (get_user(msgbufcount_native, &args->msgbufcount))
+ return -EFAULT;
+
+ if (!msgbufcount_native)
+ args32.msgbufcount--;

msgbufcount32 =
&((struct vchiq_await_completion32 __user *)arg)->msgbufcount;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index f9ff03e6af93..0690fcff0ea2 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -209,6 +209,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Microsoft LifeCam-VX700 v2.0 */
{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },

+ /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
+ { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
+
/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2de1a3971a26..558949b826d0 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1461,9 +1461,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
unsigned transfer_in_flight;
unsigned started;

- if (dep->flags & DWC3_EP_STALL)
- return 0;
-
if (dep->number > 1)
trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
else
@@ -1485,8 +1482,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
else
dep->flags |= DWC3_EP_STALL;
} else {
- if (!(dep->flags & DWC3_EP_STALL))
- return 0;

ret = dwc3_send_clear_stall_ep_cmd(dep);
if (ret)
diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h
index d17cd95b55bb..6b2140f966ef 100644
--- a/drivers/usb/storage/unusual_realtek.h
+++ b/drivers/usb/storage/unusual_realtek.h
@@ -27,4 +27,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999,
"USB Card Reader",
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),

+UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999,
+ "Realtek",
+ "USB Card Reader",
+ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
+
+UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999,
+ "Realtek",
+ "USB Card Reader",
+ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
+
#endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dc52ce5e495f..834a3f5ef642 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -477,9 +477,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
int mirror_num = 0;
int failed_mirror = 0;

- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
+ clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
mirror_num);
if (!ret) {
@@ -493,15 +493,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
break;
}

- /*
- * This buffer's crc is fine, but its contents are corrupted, so
- * there is no reason to read the other copies, they won't be
- * any less wrong.
- */
- if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
- ret == -EUCLEAN)
- break;
-
num_copies = btrfs_num_copies(fs_info,
eb->start, eb->len);
if (num_copies == 1)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7d81cc415264..ca4902c66dc4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2088,6 +2088,30 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)

atomic_inc(&root->log_batch);

+ /*
+ * Before we acquired the inode's lock, someone may have dirtied more
+ * pages in the target range. We need to make sure that writeback for
+ * any such pages does not start while we are logging the inode, because
+ * if it does, any of the following might happen when we are not doing a
+ * full inode sync:
+ *
+ * 1) We log an extent after its writeback finishes but before its
+ * checksums are added to the csum tree, leading to -EIO errors
+ * when attempting to read the extent after a log replay.
+ *
+ * 2) We can end up logging an extent before its writeback finishes.
+ * Therefore after the log replay we will have a file extent item
+ * pointing to an unwritten extent (and no data checksums as well).
+ *
+ * So trigger writeback for any eventual new dirty pages and then we
+ * wait for all ordered extents to complete below.
+ */
+ ret = start_ordered_ops(inode, start, end);
+ if (ret) {
+ inode_unlock(inode);
+ goto out;
+ }
+
/*
* We have to do this here to avoid the priority inversion of waiting on
* IO of a lower priority task while holding a transaciton open.
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b070401406be..ff434663d65b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2244,7 +2244,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
int i;
u64 *i_qgroups;
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_root *quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
u32 level_size = 0;
@@ -2254,6 +2254,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
goto out;

+ quota_root = fs_info->quota_root;
if (!quota_root) {
ret = -EINVAL;
goto out;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 60bf8dfe7df4..0526b6c473c7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3963,6 +3963,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans);
+ trans = NULL;
continue;
}

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6601c9aa5e35..8ad145820ea8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2235,6 +2235,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
vol = memdup_user((void __user *)arg, sizeof(*vol));
if (IS_ERR(vol))
return PTR_ERR(vol);
+ vol->name[BTRFS_PATH_NAME_MAX] = '\0';

switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 093fb54cd316..199146036093 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -325,8 +325,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
*/
dio->iocb->ki_pos += transferred;

- if (dio->op == REQ_OP_WRITE)
- ret = generic_write_sync(dio->iocb, transferred);
+ if (ret > 0 && dio->op == REQ_OP_WRITE)
+ ret = generic_write_sync(dio->iocb, ret);
dio->iocb->ki_complete(dio->iocb, ret, 0);
}

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 73bd58fa13de..0c38e31ec938 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -895,6 +895,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_magic != EXT2_SUPER_MAGIC)
goto cantfind_ext2;

+ opts.s_mount_opt = 0;
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
if (def_mount_opts & EXT2_DEFM_DEBUG)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 62d9a659a8ff..dd8f10db82e9 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -612,9 +612,9 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
}

cleanup:
- brelse(bh);
if (!(bh && header == HDR(bh)))
kfree(header);
+ brelse(bh);
up_write(&EXT2_I(inode)->xattr_sem);

return error;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b997e3116e37..c495db7165ae 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -831,16 +831,20 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)


ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32);
- if (ret < 0)
- goto out_bh;
-
- strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+ if (ret < 0) {
+ strcpy(UDF_SB(sb)->s_volume_ident, "InvalidName");
+ pr_warn("incorrect volume identification, setting to "
+ "'InvalidName'\n");
+ } else {
+ strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+ }
udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);

ret = udf_dstrCS0toChar(sb, outstr, 127, pvoldesc->volSetIdent, 128);
- if (ret < 0)
+ if (ret < 0) {
+ ret = 0;
goto out_bh;
-
+ }
outstr[ret] = 0;
udf_debug("volSetIdent[] = '%s'\n", outstr);

diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 45234791fec2..5fcfa96463eb 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -351,6 +351,11 @@ static int udf_name_to_CS0(struct super_block *sb,
return u_len;
}

+/*
+ * Convert CS0 dstring to output charset. Warning: This function may truncate
+ * input string if it is too long as it is used for informational strings only
+ * and it is better to truncate the string than to refuse mounting a media.
+ */
int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
const uint8_t *ocu_i, int i_len)
{
@@ -359,9 +364,12 @@ int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
if (i_len > 0) {
s_len = ocu_i[i_len - 1];
if (s_len >= i_len) {
- pr_err("incorrect dstring lengths (%d/%d)\n",
- s_len, i_len);
- return -EINVAL;
+ pr_warn("incorrect dstring lengths (%d/%d),"
+ " truncating\n", s_len, i_len);
+ s_len = i_len - 1;
+ /* 2-byte encoding? Need to round properly... */
+ if (ocu_i[0] == 16)
+ s_len -= (s_len - 1) & 2;
}
}

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 356d2b8568c1..cd58939dc977 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1361,6 +1361,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
ret = -EINVAL;
if (!vma_can_userfault(cur))
goto out_unlock;
+
+ /*
+ * UFFDIO_COPY will fill file holes even without
+ * PROT_WRITE. This check enforces that if this is a
+ * MAP_SHARED, the process has write permission to the backing
+ * file. If VM_MAYWRITE is set it also enforces that on a
+ * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+ * F_WRITE_SEAL can be taken until the vma is destroyed.
+ */
+ ret = -EPERM;
+ if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+ goto out_unlock;
+
/*
* If this vma contains ending address, and huge pages
* check alignment.
@@ -1406,6 +1419,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
BUG_ON(!vma_can_userfault(vma));
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));

/*
* Nothing to do: this vma is already registered into this
@@ -1552,6 +1566,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
cond_resched();

BUG_ON(!vma_can_userfault(vma));
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));

/*
* Nothing to do: this vma is already registered into this
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a397907e8d72..dd16e8218db3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -777,8 +777,8 @@ struct ftrace_ret_stack {
extern void return_to_handler(void);

extern int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
- unsigned long frame_pointer, unsigned long *retp);
+function_graph_enter(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp);

unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp);
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 331dc377c275..dc12f5c4b076 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
* @attr_usage_id: Attribute usage id as per spec
* @report_id: Report id to look for
* @flag: Synchronous or asynchronous read
+* @is_signed: If true then fields < 32 bits will be sign-extended
*
* Issues a synchronous or asynchronous read request for an input attribute.
* Returns data upto 32 bits.
@@ -190,7 +191,8 @@ enum sensor_hub_read_flags {
int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
u32 usage_id,
u32 attr_usage_id, u32 report_id,
- enum sensor_hub_read_flags flag
+ enum sensor_hub_read_flags flag,
+ bool is_signed
);

/**
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index c79e859408e6..fd458389f7d1 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -406,6 +406,8 @@ static inline void net_dim(struct net_dim *dim,
}
/* fall through */
case NET_DIM_START_MEASURE:
+ net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr,
+ &dim->start_sample);
dim->state = NET_DIM_MEASURE_IN_PROGRESS;
break;
case NET_DIM_APPLY_NEW_PROFILE:
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 4f36431c380b..561feb560619 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -62,8 +62,8 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
#define PTRACE_MODE_READ 0x01
#define PTRACE_MODE_ATTACH 0x02
#define PTRACE_MODE_NOAUDIT 0x04
-#define PTRACE_MODE_FSCREDS 0x08
-#define PTRACE_MODE_REALCREDS 0x10
+#define PTRACE_MODE_FSCREDS 0x08
+#define PTRACE_MODE_REALCREDS 0x10

/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..4abb5bd74b04 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1108,6 +1108,7 @@ struct task_struct {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack: */
int curr_ret_stack;
+ int curr_ret_depth;

/* Stack of return addresses for return function tracing: */
struct ftrace_ret_stack *ret_stack;
@@ -1439,6 +1440,8 @@ static inline bool is_percpu_thread(void)
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
+#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
+#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */

#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1470,6 +1473,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)

+TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
+
+TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
new file mode 100644
index 000000000000..59d3736c454c
--- /dev/null
+++ b/include/linux/sched/smt.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_SMT_H
+#define _LINUX_SCHED_SMT_H
+
+#include <linux/static_key.h>
+
+#ifdef CONFIG_SCHED_SMT
+extern struct static_key_false sched_smt_present;
+
+static __always_inline bool sched_smt_active(void)
+{
+ return static_branch_likely(&sched_smt_present);
+}
+#else
+static inline bool sched_smt_active(void) { return false; }
+#endif
+
+void arch_smt_update(void);
+
+#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17a13e4785fc..e6ef9cc05e60 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1311,6 +1311,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
}
}

+static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
+{
+ skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
+ skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+}
+
+static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
+{
+ return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
+}
+
+static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
+{
+ return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
+}
+
/* Release a reference on a zerocopy structure */
static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
{
@@ -1320,7 +1336,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
if (uarg->callback == sock_zerocopy_callback) {
uarg->zerocopy = uarg->zerocopy && zerocopy;
sock_zerocopy_put(uarg);
- } else {
+ } else if (!skb_zcopy_is_nouarg(skb)) {
uarg->callback(uarg, zerocopy);
}

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 263e37271afd..d2c8f280e48f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -196,6 +196,7 @@ struct tcp_sock {
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */
+ u32 compressed_ack_rcv_nxt;

u32 tsoffset; /* timestamp offset */

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index c0d7ea0bf5b6..b17201edfa09 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -212,6 +212,7 @@ struct prctl_mm_map {
#define PR_SET_SPECULATION_CTRL 53
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
+# define PR_SPEC_INDIRECT_BRANCH 1
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0097acec1c71..1699ff68c412 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,6 +10,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/task.h>
+#include <linux/sched/smt.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/oom.h>
@@ -346,6 +347,12 @@ void cpu_hotplug_enable(void)
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */

+/*
+ * Architectures that need SMT-specific errata handling during SMT hotplug
+ * should override this.
+ */
+void __weak arch_smt_update(void) { }
+
#ifdef CONFIG_HOTPLUG_SMT
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
EXPORT_SYMBOL_GPL(cpu_smt_control);
@@ -982,6 +989,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* concurrent CPU hotplug via cpu_add_remove_lock.
*/
lockup_detector_cleanup();
+ arch_smt_update();
return ret;
}

@@ -1110,6 +1118,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
ret = cpuhp_up_callbacks(cpu, st, target);
out:
cpus_write_unlock();
+ arch_smt_update();
return ret;
}

@@ -2052,8 +2061,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
*/
cpuhp_offline_cpu_device(cpu);
}
- if (!ret)
+ if (!ret) {
cpu_smt_control = ctrlval;
+ arch_smt_update();
+ }
cpu_maps_update_done();
return ret;
}
@@ -2064,6 +2075,7 @@ static int cpuhp_smt_enable(void)

cpu_maps_update_begin();
cpu_smt_control = CPU_SMT_ENABLED;
+ arch_smt_update();
for_each_present_cpu(cpu) {
/* Skip online CPUs and CPUs on offline nodes */
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 35551110d277..2beda4b726e2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5741,15 +5741,10 @@ int sched_cpu_activate(unsigned int cpu)

#ifdef CONFIG_SCHED_SMT
/*
- * The sched_smt_present static key needs to be evaluated on every
- * hotplug event because at boot time SMT might be disabled when
- * the number of booted CPUs is limited.
- *
- * If then later a sibling gets hotplugged, then the key would stay
- * off and SMT scheduling would never be functional.
+ * When going up, increment the number of cores with SMT present.
*/
- if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
- static_branch_enable_cpuslocked(&sched_smt_present);
+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+ static_branch_inc_cpuslocked(&sched_smt_present);
#endif
set_cpu_active(cpu, true);

@@ -5793,6 +5788,14 @@ int sched_cpu_deactivate(unsigned int cpu)
*/
synchronize_rcu_mult(call_rcu, call_rcu_sched);

+#ifdef CONFIG_SCHED_SMT
+ /*
+ * When going down, decrement the number of cores with SMT present.
+ */
+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+ static_branch_dec_cpuslocked(&sched_smt_present);
+#endif
+
if (!sched_smp_initialized)
return 0;

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9683f458aec7..6c25bbe87bd3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -23,6 +23,7 @@
#include <linux/sched/prio.h>
#include <linux/sched/rt.h>
#include <linux/sched/signal.h>
+#include <linux/sched/smt.h>
#include <linux/sched/stat.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/task.h>
@@ -930,9 +931,6 @@ static inline int cpu_of(struct rq *rq)


#ifdef CONFIG_SCHED_SMT
-
-extern struct static_key_false sched_smt_present;
-
extern void __update_idle_core(struct rq *rq);

static inline void update_idle_core(struct rq *rq)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f536f601bd46..77734451cb05 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -817,7 +817,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
- int index = trace->depth;
+ int index = current->curr_ret_stack;

function_profile_call(trace->func, 0, NULL, NULL);

@@ -852,7 +852,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
if (!fgraph_graph_time) {
int index;

- index = trace->depth;
+ index = current->curr_ret_stack;

/* Append this call time to the parent time to subtract */
if (index)
@@ -6814,6 +6814,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
/* Make sure the tasks see the -1 first: */
smp_wmb();
t->ret_stack = ret_stack_list[start++];
@@ -7038,6 +7039,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
{
t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
/*
* The idle task has no parent, it either has its own
* stack or no stack at all.
@@ -7068,6 +7070,7 @@ void ftrace_graph_init_task(struct task_struct *t)
/* Make sure we do not use the parent ret_stack */
t->ret_stack = NULL;
t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;

if (ftrace_graph_active) {
struct ftrace_ret_stack *ret_stack;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 169b3c44ee97..2561460d7baf 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -118,8 +118,8 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
struct trace_seq *s, u32 flags);

/* Add a function return address to the trace stack on thread info.*/
-int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+static int
+ftrace_push_return_trace(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp)
{
unsigned long long calltime;
@@ -177,9 +177,31 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
current->ret_stack[index].retp = retp;
#endif
- *depth = current->curr_ret_stack;
+ return 0;
+}
+
+int function_graph_enter(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp)
+{
+ struct ftrace_graph_ent trace;
+
+ trace.func = func;
+ trace.depth = ++current->curr_ret_depth;
+
+ if (ftrace_push_return_trace(ret, func,
+ frame_pointer, retp))
+ goto out;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace))
+ goto out_ret;

return 0;
+ out_ret:
+ current->curr_ret_stack--;
+ out:
+ current->curr_ret_depth--;
+ return -EBUSY;
}

/* Retrieve a function return address to the trace stack on thread info.*/
@@ -241,7 +263,13 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
trace->func = current->ret_stack[index].func;
trace->calltime = current->ret_stack[index].calltime;
trace->overrun = atomic_read(&current->trace_overrun);
- trace->depth = index;
+ trace->depth = current->curr_ret_depth--;
+ /*
+ * We still want to trace interrupts coming in if
+ * max_depth is set to 1. Make sure the decrement is
+ * seen before ftrace_graph_return.
+ */
+ barrier();
}

/*
@@ -255,6 +283,12 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)

ftrace_pop_return_trace(&trace, &ret, frame_pointer);
trace.rettime = trace_clock_local();
+ ftrace_graph_return(&trace);
+ /*
+ * The ftrace_graph_return() may still access the current
+ * ret_stack structure, we need to make sure the update of
+ * curr_ret_stack is after that.
+ */
barrier();
current->curr_ret_stack--;
/*
@@ -267,13 +301,6 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
return ret;
}

- /*
- * The trace should run after decrementing the ret counter
- * in case an interrupt were to come in. We don't want to
- * lose the interrupt if max_depth is set.
- */
- ftrace_graph_return(&trace);
-
if (unlikely(!ret)) {
ftrace_graph_stop();
WARN_ON(1);
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index e3ddd836491f..d82d022111e0 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1214,7 +1214,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev)

dev_info(test_dev->dev, "removing interface\n");
misc_deregister(&test_dev->misc_dev);
- kfree(&test_dev->misc_dev.name);

mutex_unlock(&test_dev->config_mutex);
mutex_unlock(&test_dev->trigger_mutex);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index deed97fba979..15310f14c25e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2322,7 +2322,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
}
}

-static void freeze_page(struct page *page)
+static void unmap_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
@@ -2337,7 +2337,7 @@ static void freeze_page(struct page *page)
VM_BUG_ON_PAGE(!unmap_success, page);
}

-static void unfreeze_page(struct page *page)
+static void remap_page(struct page *page)
{
int i;
if (PageTransHuge(page)) {
@@ -2373,6 +2373,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
(1L << PG_unevictable) |
(1L << PG_dirty)));

+ /* ->mapping in first tail page is compound_mapcount */
+ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+ page_tail);
+ page_tail->mapping = head->mapping;
+ page_tail->index = head->index + tail;
+
/* Page flags must be visible before we make the page non-compound. */
smp_wmb();

@@ -2393,12 +2399,6 @@ static void __split_huge_page_tail(struct page *head, int tail,
if (page_is_idle(head))
set_page_idle(page_tail);

- /* ->mapping in first tail page is compound_mapcount */
- VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
- page_tail);
- page_tail->mapping = head->mapping;
-
- page_tail->index = head->index + tail;
page_cpupid_xchg_last(page_tail, page_cpupid_last(head));

/*
@@ -2410,12 +2410,11 @@ static void __split_huge_page_tail(struct page *head, int tail,
}

static void __split_huge_page(struct page *page, struct list_head *list,
- unsigned long flags)
+ pgoff_t end, unsigned long flags)
{
struct page *head = compound_head(page);
struct zone *zone = page_zone(head);
struct lruvec *lruvec;
- pgoff_t end = -1;
int i;

lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
@@ -2423,9 +2422,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);

- if (!PageAnon(page))
- end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
-
for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond i_size: drop them from page cache */
@@ -2454,7 +2450,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,

spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);

- unfreeze_page(head);
+ remap_page(head);

for (i = 0; i < HPAGE_PMD_NR; i++) {
struct page *subpage = head + i;
@@ -2597,6 +2593,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
int count, mapcount, extra_pins, ret;
bool mlocked;
unsigned long flags;
+ pgoff_t end;

VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -2619,6 +2616,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
ret = -EBUSY;
goto out;
}
+ end = -1;
mapping = NULL;
anon_vma_lock_write(anon_vma);
} else {
@@ -2632,10 +2630,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)

anon_vma = NULL;
i_mmap_lock_read(mapping);
+
+ /*
+ *__split_huge_page() may need to trim off pages beyond EOF:
+ * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
+ * which cannot be nested inside the page tree lock. So note
+ * end now: i_size itself may be changed at any moment, but
+ * head page lock is good enough to serialize the trimming.
+ */
+ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
}

/*
- * Racy check if we can split the page, before freeze_page() will
+ * Racy check if we can split the page, before unmap_page() will
* split PMDs
*/
if (!can_split_huge_page(head, &extra_pins)) {
@@ -2644,7 +2651,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}

mlocked = PageMlocked(page);
- freeze_page(head);
+ unmap_page(head);
VM_BUG_ON_PAGE(compound_mapcount(head), head);

/* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -2681,7 +2688,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mapping)
__dec_node_page_state(page, NR_SHMEM_THPS);
spin_unlock(&pgdata->split_queue_lock);
- __split_huge_page(page, list, flags);
+ __split_huge_page(page, list, end, flags);
if (PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };

@@ -2701,7 +2708,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
fail: if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
- unfreeze_page(head);
+ remap_page(head);
ret = -EBUSY;
}

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a31d740e6cd1..fde5820be24d 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* collapse_shmem - collapse small tmpfs/shmem pages into huge one.
*
* Basic scheme is simple, details are more complex:
- * - allocate and freeze a new huge page;
+ * - allocate and lock a new huge page;
* - scan over radix tree replacing old pages the new one
* + swap in pages if necessary;
* + fill in gaps;
@@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* - if replacing succeed:
* + copy data over;
* + free old pages;
- * + unfreeze huge page;
+ * + unlock huge page;
* - if replacing failed;
* + put all pages back and unfreeze them;
* + restore gaps in the radix-tree;
- * + free huge page;
+ * + unlock and free huge page;
*/
static void collapse_shmem(struct mm_struct *mm,
struct address_space *mapping, pgoff_t start,
@@ -1330,18 +1330,15 @@ static void collapse_shmem(struct mm_struct *mm,
goto out;
}

+ __SetPageLocked(new_page);
+ __SetPageSwapBacked(new_page);
new_page->index = start;
new_page->mapping = mapping;
- __SetPageSwapBacked(new_page);
- __SetPageLocked(new_page);
- BUG_ON(!page_ref_freeze(new_page, 1));
-

/*
- * At this point the new_page is 'frozen' (page_count() is zero), locked
- * and not up-to-date. It's safe to insert it into radix tree, because
- * nobody would be able to map it or use it in other way until we
- * unfreeze it.
+ * At this point the new_page is locked and not up-to-date.
+ * It's safe to insert it into the page cache, because nobody would
+ * be able to map it or use it in another way until we unlock it.
*/

index = start;
@@ -1349,19 +1346,29 @@ static void collapse_shmem(struct mm_struct *mm,
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
int n = min(iter.index, end) - index;

+ /*
+ * Stop if extent has been hole-punched, and is now completely
+ * empty (the more obvious i_size_read() check would take an
+ * irq-unsafe seqlock on 32-bit).
+ */
+ if (n >= HPAGE_PMD_NR) {
+ result = SCAN_TRUNCATED;
+ goto tree_locked;
+ }
+
/*
* Handle holes in the radix tree: charge it from shmem and
* insert relevant subpage of new_page into the radix-tree.
*/
if (n && !shmem_charge(mapping->host, n)) {
result = SCAN_FAIL;
- break;
+ goto tree_locked;
}
- nr_none += n;
for (; index < min(iter.index, end); index++) {
radix_tree_insert(&mapping->i_pages, index,
new_page + (index % HPAGE_PMD_NR));
}
+ nr_none += n;

/* We are done. */
if (index >= end)
@@ -1377,12 +1384,12 @@ static void collapse_shmem(struct mm_struct *mm,
result = SCAN_FAIL;
goto tree_unlocked;
}
- xa_lock_irq(&mapping->i_pages);
} else if (trylock_page(page)) {
get_page(page);
+ xa_unlock_irq(&mapping->i_pages);
} else {
result = SCAN_PAGE_LOCK;
- break;
+ goto tree_locked;
}

/*
@@ -1391,17 +1398,24 @@ static void collapse_shmem(struct mm_struct *mm,
*/
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageUptodate(page), page);
- VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+ /*
+ * If file was truncated then extended, or hole-punched, before
+ * we locked the first page, then a THP might be there already.
+ */
+ if (PageTransCompound(page)) {
+ result = SCAN_PAGE_COMPOUND;
+ goto out_unlock;
+ }

if (page_mapping(page) != mapping) {
result = SCAN_TRUNCATED;
goto out_unlock;
}
- xa_unlock_irq(&mapping->i_pages);

if (isolate_lru_page(page)) {
result = SCAN_DEL_PAGE_LRU;
- goto out_isolate_failed;
+ goto out_unlock;
}

if (page_mapped(page))
@@ -1422,7 +1436,9 @@ static void collapse_shmem(struct mm_struct *mm,
*/
if (!page_ref_freeze(page, 3)) {
result = SCAN_PAGE_COUNT;
- goto out_lru;
+ xa_unlock_irq(&mapping->i_pages);
+ putback_lru_page(page);
+ goto out_unlock;
}

/*
@@ -1438,17 +1454,10 @@ static void collapse_shmem(struct mm_struct *mm,
slot = radix_tree_iter_resume(slot, &iter);
index++;
continue;
-out_lru:
- xa_unlock_irq(&mapping->i_pages);
- putback_lru_page(page);
-out_isolate_failed:
- unlock_page(page);
- put_page(page);
- goto tree_unlocked;
out_unlock:
unlock_page(page);
put_page(page);
- break;
+ goto tree_unlocked;
}

/*
@@ -1456,14 +1465,18 @@ static void collapse_shmem(struct mm_struct *mm,
* This code only triggers if there's nothing in radix tree
* beyond 'end'.
*/
- if (result == SCAN_SUCCEED && index < end) {
+ if (index < end) {
int n = end - index;

+ /* Stop if extent has been truncated, and is now empty */
+ if (n >= HPAGE_PMD_NR) {
+ result = SCAN_TRUNCATED;
+ goto tree_locked;
+ }
if (!shmem_charge(mapping->host, n)) {
result = SCAN_FAIL;
goto tree_locked;
}
-
for (; index < end; index++) {
radix_tree_insert(&mapping->i_pages, index,
new_page + (index % HPAGE_PMD_NR));
@@ -1471,59 +1484,64 @@ static void collapse_shmem(struct mm_struct *mm,
nr_none += n;
}

+ __inc_node_page_state(new_page, NR_SHMEM_THPS);
+ if (nr_none) {
+ struct zone *zone = page_zone(new_page);
+
+ __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+ __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+ }
+
tree_locked:
xa_unlock_irq(&mapping->i_pages);
tree_unlocked:

if (result == SCAN_SUCCEED) {
- unsigned long flags;
- struct zone *zone = page_zone(new_page);
-
/*
* Replacing old pages with new one has succeed, now we need to
* copy the content and free old pages.
*/
+ index = start;
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+ while (index < page->index) {
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
+ index++;
+ }
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
page);
list_del(&page->lru);
- unlock_page(page);
- page_ref_unfreeze(page, 1);
page->mapping = NULL;
+ page_ref_unfreeze(page, 1);
ClearPageActive(page);
ClearPageUnevictable(page);
+ unlock_page(page);
put_page(page);
+ index++;
}
-
- local_irq_save(flags);
- __inc_node_page_state(new_page, NR_SHMEM_THPS);
- if (nr_none) {
- __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
- __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+ while (index < end) {
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
+ index++;
}
- local_irq_restore(flags);

- /*
- * Remove pte page tables, so we can re-faulti
- * the page as huge.
- */
- retract_page_tables(mapping, start);
-
- /* Everything is ready, let's unfreeze the new_page */
- set_page_dirty(new_page);
SetPageUptodate(new_page);
- page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+ page_ref_add(new_page, HPAGE_PMD_NR - 1);
+ set_page_dirty(new_page);
mem_cgroup_commit_charge(new_page, memcg, false, true);
lru_cache_add_anon(new_page);
- unlock_page(new_page);

+ /*
+ * Remove pte page tables, so we can re-fault the page as huge.
+ */
+ retract_page_tables(mapping, start);
*hpage = NULL;

khugepaged_pages_collapsed++;
} else {
/* Something went wrong: rollback changes to the radix-tree */
- shmem_uncharge(mapping->host, nr_none);
xa_lock_irq(&mapping->i_pages);
+ mapping->nrpages -= nr_none;
+ shmem_uncharge(mapping->host, nr_none);
+
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
if (iter.index >= end)
break;
@@ -1546,19 +1564,18 @@ static void collapse_shmem(struct mm_struct *mm,
radix_tree_replace_slot(&mapping->i_pages, slot, page);
slot = radix_tree_iter_resume(slot, &iter);
xa_unlock_irq(&mapping->i_pages);
- putback_lru_page(page);
unlock_page(page);
+ putback_lru_page(page);
xa_lock_irq(&mapping->i_pages);
}
VM_BUG_ON(nr_none);
xa_unlock_irq(&mapping->i_pages);

- /* Unfreeze new_page, caller would take care about freeing it */
- page_ref_unfreeze(new_page, 1);
mem_cgroup_cancel_charge(new_page, memcg, true);
- unlock_page(new_page);
new_page->mapping = NULL;
}
+
+ unlock_page(new_page);
out:
VM_BUG_ON(!list_empty(&pagelist));
/* TODO: tracepoints */
diff --git a/mm/rmap.c b/mm/rmap.c
index 1e79fac3186b..85b7f9423352 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1627,16 +1627,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
address + PAGE_SIZE);
} else {
/*
- * We should not need to notify here as we reach this
- * case only from freeze_page() itself only call from
- * split_huge_page_to_list() so everything below must
- * be true:
- * - page is not anonymous
- * - page is locked
- *
- * So as it is a locked file back page thus it can not
- * be remove from the page cache and replace by a new
- * page before mmu_notifier_invalidate_range_end so no
+ * This is a locked file-backed page, thus it cannot
+ * be removed from the page cache and replaced by a new
+ * page before mmu_notifier_invalidate_range_end, so no
* concurrent thread might update its page table to
* point at new page while a device still is using this
* page.
diff --git a/mm/shmem.c b/mm/shmem.c
index 38d228a30fdc..0b02b539072e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -297,12 +297,14 @@ bool shmem_charge(struct inode *inode, long pages)
if (!shmem_inode_acct_block(inode, pages))
return false;

+ /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
+ inode->i_mapping->nrpages += pages;
+
spin_lock_irqsave(&info->lock, flags);
info->alloced += pages;
inode->i_blocks += pages * BLOCKS_PER_PAGE;
shmem_recalc_inode(inode);
spin_unlock_irqrestore(&info->lock, flags);
- inode->i_mapping->nrpages += pages;

return true;
}
@@ -312,6 +314,8 @@ void shmem_uncharge(struct inode *inode, long pages)
struct shmem_inode_info *info = SHMEM_I(inode);
unsigned long flags;

+ /* nrpages adjustment done by __delete_from_page_cache() or caller */
+
spin_lock_irqsave(&info->lock, flags);
info->alloced -= pages;
inode->i_blocks -= pages * BLOCKS_PER_PAGE;
@@ -1547,11 +1551,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
{
struct page *oldpage, *newpage;
struct address_space *swap_mapping;
+ swp_entry_t entry;
pgoff_t swap_index;
int error;

oldpage = *pagep;
- swap_index = page_private(oldpage);
+ entry.val = page_private(oldpage);
+ swap_index = swp_offset(entry);
swap_mapping = page_mapping(oldpage);

/*
@@ -1570,7 +1576,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
__SetPageLocked(newpage);
__SetPageSwapBacked(newpage);
SetPageUptodate(newpage);
- set_page_private(newpage, swap_index);
+ set_page_private(newpage, entry.val);
SetPageSwapCache(newpage);

/*
diff --git a/mm/truncate.c b/mm/truncate.c
index 1d2fb2dca96f..71b65aab8077 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -520,9 +520,13 @@ void truncate_inode_pages_final(struct address_space *mapping)
*/
xa_lock_irq(&mapping->i_pages);
xa_unlock_irq(&mapping->i_pages);
-
- truncate_inode_pages(mapping, 0);
}
+
+ /*
+ * Cleancache needs notification even if there are no pages or shadow
+ * entries.
+ */
+ truncate_inode_pages(mapping, 0);
}
EXPORT_SYMBOL(truncate_inode_pages_final);

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..f0af11b1cdf3 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -205,8 +205,9 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
goto out_unlock;
/*
- * Only allow __mcopy_atomic_hugetlb on userfaultfd
- * registered ranges.
+ * Check the vma is registered in uffd, this is
+ * required to enforce the VM_MAYWRITE check done at
+ * uffd registration time.
*/
if (!dst_vma->vm_userfaultfd_ctx.ctx)
goto out_unlock;
@@ -449,13 +450,9 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
if (!dst_vma)
goto out_unlock;
/*
- * Be strict and only allow __mcopy_atomic on userfaultfd
- * registered ranges to prevent userland errors going
- * unnoticed. As far as the VM consistency is concerned, it
- * would be perfectly safe to remove this check, but there's
- * no useful usage for __mcopy_atomic ouside of userfaultfd
- * registered ranges. This is after all why these are ioctls
- * belonging to the userfaultfd and not syscalls.
+ * Check the vma is registered in uffd, this is required to
+ * enforce the VM_MAYWRITE check done at uffd registration
+ * time.
*/
if (!dst_vma->vm_userfaultfd_ctx.ctx)
goto out_unlock;
diff --git a/net/core/dev.c b/net/core/dev.c
index 097c02101450..22af88c47756 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5945,11 +5945,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done)
timeout = n->dev->gro_flush_timeout;

+ /* When the NAPI instance uses a timeout and keeps postponing
+ * it, we need to bound somehow the time packets are kept in
+ * the GRO layer
+ */
+ napi_gro_flush(n, !!timeout);
if (timeout)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
- else
- napi_gro_flush(n, false);
}
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f817f336595d..abbbd7fd17fe 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4912,6 +4912,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
nf_reset(skb);
nf_reset_trace(skb);

+#ifdef CONFIG_NET_SWITCHDEV
+ skb->offload_fwd_mark = 0;
+ skb->offload_mr_fwd_mark = 0;
+#endif
+
if (!xnet)
return;

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 72898cbef43d..664fa7d8f7d9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4276,7 +4276,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
@@ -5196,7 +5196,17 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now;
- tp->compressed_ack++;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+ tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = 0;
+ }
+
+ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ goto send_now;

if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 597dbd749f05..68f65ddf9e3c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -165,10 +165,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);

- if (unlikely(tp->compressed_ack)) {
+ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack);
- tp->compressed_ack = 0;
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7fdf222a0bdf..57eae8d70ba1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)

bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d6e94dc7e290..6477b131e809 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
void *ph;
__u32 ts;

- ph = skb_shinfo(skb)->destructor_arg;
+ ph = skb_zcopy_get_nouarg(skb);
packet_dec_pending(&po->tx_ring);

ts = __packet_set_timestamp(po, ph, skb);
@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->mark = po->sk.sk_mark;
skb->tstamp = sockc->transmit_time;
sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
- skb_shinfo(skb)->destructor_arg = ph.raw;
+ skb_zcopy_set_nouarg(skb, ph.raw);

skb_reserve(skb, hlen);
skb_reset_network_header(skb);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2afc4f8c37a7..488019766433 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -584,12 +584,15 @@ static void tipc_node_clear_links(struct tipc_node *node)
/* tipc_node_cleanup - delete nodes that does not
* have active links for NODE_CLEANUP_AFTER time
*/
-static int tipc_node_cleanup(struct tipc_node *peer)
+static bool tipc_node_cleanup(struct tipc_node *peer)
{
struct tipc_net *tn = tipc_net(peer->net);
bool deleted = false;

- spin_lock_bh(&tn->node_list_lock);
+ /* If lock held by tipc_node_stop() the node will be deleted anyway */
+ if (!spin_trylock_bh(&tn->node_list_lock))
+ return false;
+
tipc_node_write_lock(peer);

if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 54da4b070db3..64fac0ad32d6 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -248,10 +248,8 @@ ifdef CONFIG_GCOV_KERNEL
objtool_args += --no-unreachable
endif
ifdef CONFIG_RETPOLINE
-ifneq ($(RETPOLINE_CFLAGS),)
objtool_args += --retpoline
endif
-endif


ifdef CONFIG_MODVERSIONS
diff --git a/sound/core/control.c b/sound/core/control.c
index 9aa15bfc7936..649d3217590e 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -348,6 +348,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
return 0;
}

+/* add a new kcontrol object; call with card->controls_rwsem locked */
+static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
+{
+ struct snd_ctl_elem_id id;
+ unsigned int idx;
+ unsigned int count;
+
+ id = kcontrol->id;
+ if (id.index > UINT_MAX - kcontrol->count)
+ return -EINVAL;
+
+ if (snd_ctl_find_id(card, &id)) {
+ dev_err(card->dev,
+ "control %i:%i:%i:%s:%i is already present\n",
+ id.iface, id.device, id.subdevice, id.name, id.index);
+ return -EBUSY;
+ }
+
+ if (snd_ctl_find_hole(card, kcontrol->count) < 0)
+ return -ENOMEM;
+
+ list_add_tail(&kcontrol->list, &card->controls);
+ card->controls_count += kcontrol->count;
+ kcontrol->id.numid = card->last_numid + 1;
+ card->last_numid += kcontrol->count;
+
+ id = kcontrol->id;
+ count = kcontrol->count;
+ for (idx = 0; idx < count; idx++, id.index++, id.numid++)
+ snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
+
+ return 0;
+}
+
/**
* snd_ctl_add - add the control instance to the card
* @card: the card instance
@@ -364,45 +398,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
*/
int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
{
- struct snd_ctl_elem_id id;
- unsigned int idx;
- unsigned int count;
int err = -EINVAL;

if (! kcontrol)
return err;
if (snd_BUG_ON(!card || !kcontrol->info))
goto error;
- id = kcontrol->id;
- if (id.index > UINT_MAX - kcontrol->count)
- goto error;

down_write(&card->controls_rwsem);
- if (snd_ctl_find_id(card, &id)) {
- up_write(&card->controls_rwsem);
- dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n",
- id.iface,
- id.device,
- id.subdevice,
- id.name,
- id.index);
- err = -EBUSY;
- goto error;
- }
- if (snd_ctl_find_hole(card, kcontrol->count) < 0) {
- up_write(&card->controls_rwsem);
- err = -ENOMEM;
- goto error;
- }
- list_add_tail(&kcontrol->list, &card->controls);
- card->controls_count += kcontrol->count;
- kcontrol->id.numid = card->last_numid + 1;
- card->last_numid += kcontrol->count;
- id = kcontrol->id;
- count = kcontrol->count;
+ err = __snd_ctl_add(card, kcontrol);
up_write(&card->controls_rwsem);
- for (idx = 0; idx < count; idx++, id.index++, id.numid++)
- snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
+ if (err < 0)
+ goto error;
return 0;

error:
@@ -1361,9 +1368,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
kctl->tlv.c = snd_ctl_elem_user_tlv;

/* This function manage to free the instance on failure. */
- err = snd_ctl_add(card, kctl);
- if (err < 0)
- return err;
+ down_write(&card->controls_rwsem);
+ err = __snd_ctl_add(card, kctl);
+ if (err < 0) {
+ snd_ctl_free_one(kctl);
+ goto unlock;
+ }
offset = snd_ctl_get_ioff(kctl, &info->id);
snd_ctl_build_ioff(&info->id, kctl, offset);
/*
@@ -1374,10 +1384,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
* which locks the element.
*/

- down_write(&card->controls_rwsem);
card->user_ctl_count++;
- up_write(&card->controls_rwsem);

+ unlock:
+ up_write(&card->controls_rwsem);
return 0;
}

diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c
index 32453f81b95a..3a5008837576 100644
--- a/sound/isa/wss/wss_lib.c
+++ b/sound/isa/wss/wss_lib.c
@@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream)
if (err < 0) {
if (chip->release_dma)
chip->release_dma(chip, chip->dma_private_data, chip->dma1);
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
return err;
}
chip->playback_substream = substream;
@@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream)
if (err < 0) {
if (chip->release_dma)
chip->release_dma(chip, chip->dma_private_data, chip->dma2);
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
return err;
}
chip->capture_substream = substream;
diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index f4459d1a9d67..27b468f057dd 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_
{
struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol);
int reg = kcontrol->private_value & 0xff;
- int shift = (kcontrol->private_value >> 8) & 0xff;
+ int shift = (kcontrol->private_value >> 8) & 0x0f;
int mask = (kcontrol->private_value >> 16) & 0xff;
// int invert = (kcontrol->private_value >> 24) & 0xff;
unsigned short value, old, new;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 625cb6c7b7d6..5810be2c6c34 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2256,6 +2256,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
SND_PCI_QUIRK(0x1849, 0xc892, "Asrock B85M-ITX", 0),
/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+ SND_PCI_QUIRK(0x1849, 0x0397, "Asrock N68C-S UCC", 0),
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
SND_PCI_QUIRK(0x1849, 0x7662, "Asrock H81M-HDS", 0),
/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e58537e13ad3..cf5d26642bcd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -388,6 +388,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
case 0x10ec0285:
case 0x10ec0298:
case 0x10ec0289:
+ case 0x10ec0300:
alc_update_coef_idx(codec, 0x10, 1<<9, 0);
break;
case 0x10ec0275:
@@ -2830,6 +2831,7 @@ enum {
ALC269_TYPE_ALC215,
ALC269_TYPE_ALC225,
ALC269_TYPE_ALC294,
+ ALC269_TYPE_ALC300,
ALC269_TYPE_ALC700,
};

@@ -2864,6 +2866,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
case ALC269_TYPE_ALC215:
case ALC269_TYPE_ALC225:
case ALC269_TYPE_ALC294:
+ case ALC269_TYPE_ALC300:
case ALC269_TYPE_ALC700:
ssids = alc269_ssids;
break;
@@ -5358,6 +5361,16 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec,
spec->gen.preferred_dacs = preferred_pairs;
}

+/* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */
+static void alc285_fixup_invalidate_dacs(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ if (action != HDA_FIXUP_ACT_PRE_PROBE)
+ return;
+
+ snd_hda_override_wcaps(codec, 0x03, 0);
+}
+
/* for hda_fixup_thinkpad_acpi() */
#include "thinkpad_helper.c"

@@ -5495,6 +5508,8 @@ enum {
ALC255_FIXUP_DELL_HEADSET_MIC,
ALC295_FIXUP_HP_X360,
ALC221_FIXUP_HP_HEADSET_MIC,
+ ALC285_FIXUP_LENOVO_HEADPHONE_NOISE,
+ ALC295_FIXUP_HP_AUTO_MUTE,
};

static const struct hda_fixup alc269_fixups[] = {
@@ -5659,6 +5674,8 @@ static const struct hda_fixup alc269_fixups[] = {
[ALC269_FIXUP_HP_MUTE_LED_MIC3] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc269_fixup_hp_mute_led_mic3,
+ .chained = true,
+ .chain_id = ALC295_FIXUP_HP_AUTO_MUTE
},
[ALC269_FIXUP_HP_GPIO_LED] = {
.type = HDA_FIXUP_FUNC,
@@ -6362,6 +6379,14 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_HEADSET_MIC
},
+ [ALC285_FIXUP_LENOVO_HEADPHONE_NOISE] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_invalidate_dacs,
+ },
+ [ALC295_FIXUP_HP_AUTO_MUTE] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_auto_mute_via_amp,
+ },
};

static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6532,6 +6557,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8),
SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
@@ -7034,6 +7060,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x12, 0x90a60130},
{0x19, 0x03a11020},
{0x21, 0x0321101f}),
+ SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_HEADPHONE_NOISE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x19, 0x04a11040},
+ {0x21, 0x04211020}),
SND_HDA_PIN_QUIRK(0x10ec0288, 0x1028, "Dell", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
{0x12, 0x90a60120},
{0x14, 0x90170110},
@@ -7295,6 +7326,10 @@ static int patch_alc269(struct hda_codec *codec)
spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */
alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */
break;
+ case 0x10ec0300:
+ spec->codec_variant = ALC269_TYPE_ALC300;
+ spec->gen.mixer_nid = 0; /* no loopback on ALC300 */
+ break;
case 0x10ec0700:
case 0x10ec0701:
case 0x10ec0703:
@@ -8404,6 +8439,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269),
HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269),
+ HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269),
HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861),
diff --git a/sound/soc/codecs/pcm186x.h b/sound/soc/codecs/pcm186x.h
index 2c6ba55bf394..bb3f0c42a1cd 100644
--- a/sound/soc/codecs/pcm186x.h
+++ b/sound/soc/codecs/pcm186x.h
@@ -139,7 +139,7 @@ enum pcm186x_type {
#define PCM186X_MAX_REGISTER PCM186X_CURR_TRIM_CTRL

/* PCM186X_PAGE */
-#define PCM186X_RESET 0xff
+#define PCM186X_RESET 0xfe

/* PCM186X_ADCX_INPUT_SEL_X */
#define PCM186X_ADC_INPUT_SEL_POL BIT(7)
diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index db6976f4ddaa..9d9f6e41d81c 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -19,6 +19,7 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

+#include <linux/dmi.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
@@ -35,6 +36,8 @@
#define CHT_PLAT_CLK_3_HZ 19200000
#define CHT_CODEC_DAI "HiFi"

+#define QUIRK_PMC_PLT_CLK_0 0x01
+
struct cht_mc_private {
struct clk *mclk;
struct snd_soc_jack jack;
@@ -385,11 +388,29 @@ static struct snd_soc_card snd_soc_card_cht = {
.num_controls = ARRAY_SIZE(cht_mc_controls),
};

+static const struct dmi_system_id cht_max98090_quirk_table[] = {
+ {
+ /* Swanky model Chromebook (Toshiba Chromebook 2) */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Swanky"),
+ },
+ .driver_data = (void *)QUIRK_PMC_PLT_CLK_0,
+ },
+ {}
+};
+
static int snd_cht_mc_probe(struct platform_device *pdev)
{
+ const struct dmi_system_id *dmi_id;
struct device *dev = &pdev->dev;
int ret_val = 0;
struct cht_mc_private *drv;
+ const char *mclk_name;
+ int quirks = 0;
+
+ dmi_id = dmi_first_match(cht_max98090_quirk_table);
+ if (dmi_id)
+ quirks = (unsigned long)dmi_id->driver_data;

drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
if (!drv)
@@ -411,11 +432,16 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
snd_soc_card_cht.dev = &pdev->dev;
snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);

- drv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3");
+ if (quirks & QUIRK_PMC_PLT_CLK_0)
+ mclk_name = "pmc_plt_clk_0";
+ else
+ mclk_name = "pmc_plt_clk_3";
+
+ drv->mclk = devm_clk_get(&pdev->dev, mclk_name);
if (IS_ERR(drv->mclk)) {
dev_err(&pdev->dev,
- "Failed to get MCLK from pmc_plt_clk_3: %ld\n",
- PTR_ERR(drv->mclk));
+ "Failed to get MCLK from %s: %ld\n",
+ mclk_name, PTR_ERR(drv->mclk));
return PTR_ERR(drv->mclk);
}

diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index e73c962590eb..079063d8038d 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream)
runtime->hw = snd_cs4231_playback;

err = snd_cs4231_open(chip, CS4231_MODE_PLAY);
- if (err < 0) {
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+ if (err < 0)
return err;
- }
chip->playback_substream = substream;
chip->p_periods_sent = 0;
snd_pcm_set_sync(substream);
@@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream)
runtime->hw = snd_cs4231_capture;

err = snd_cs4231_open(chip, CS4231_MODE_RECORD);
- if (err < 0) {
- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+ if (err < 0)
return err;
- }
chip->capture_substream = substream;
chip->c_periods_sent = 0;
snd_pcm_set_sync(substream);
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index c0d7ea0bf5b6..b17201edfa09 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -212,6 +212,7 @@ struct prctl_mm_map {
#define PR_SET_SPECULATION_CTRL 53
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
+# define PR_SPEC_INDIRECT_BRANCH 1
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)