Re: Linux 5.4.19

From: Greg KH
Date: Tue Feb 11 2020 - 18:09:14 EST


diff --git a/MAINTAINERS b/MAINTAINERS
index 4f7ac27d8651..d1aeebb59e6a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8704,8 +8704,10 @@ L: isdn4linux@xxxxxxxxxxxxxxxxxxxxxx (subscribers-only)
L: netdev@xxxxxxxxxxxxxxx
W: http://www.isdn4linux.de
S: Maintained
-F: drivers/isdn/mISDN
-F: drivers/isdn/hardware
+F: drivers/isdn/mISDN/
+F: drivers/isdn/hardware/
+F: drivers/isdn/Kconfig
+F: drivers/isdn/Makefile

ISDN/CAPI SUBSYSTEM
M: Karsten Keil <isdn@xxxxxxxxxxxxxx>
diff --git a/Makefile b/Makefile
index b6c151fd5227..2f55d377f0db 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 4
-SUBLEVEL = 18
+SUBLEVEL = 19
EXTRAVERSION =
NAME = Kleptomaniac Octopus

diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..43102756304c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
config HAVE_RCU_TABLE_FREE
bool

-config HAVE_RCU_TABLE_NO_INVALIDATE
- bool
-
config HAVE_MMU_GATHER_PAGE_SIZE
bool

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 40002416efec..8e995ec796c8 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -14,13 +14,25 @@
#include <asm/cputype.h>

/* arm64 compatibility macros */
+#define PSR_AA32_MODE_FIQ FIQ_MODE
+#define PSR_AA32_MODE_SVC SVC_MODE
#define PSR_AA32_MODE_ABT ABT_MODE
#define PSR_AA32_MODE_UND UND_MODE
#define PSR_AA32_T_BIT PSR_T_BIT
+#define PSR_AA32_F_BIT PSR_F_BIT
#define PSR_AA32_I_BIT PSR_I_BIT
#define PSR_AA32_A_BIT PSR_A_BIT
#define PSR_AA32_E_BIT PSR_E_BIT
#define PSR_AA32_IT_MASK PSR_IT_MASK
+#define PSR_AA32_GE_MASK 0x000f0000
+#define PSR_AA32_DIT_BIT 0x00200000
+#define PSR_AA32_PAN_BIT 0x00400000
+#define PSR_AA32_SSBS_BIT 0x00800000
+#define PSR_AA32_Q_BIT PSR_Q_BIT
+#define PSR_AA32_V_BIT PSR_V_BIT
+#define PSR_AA32_C_BIT PSR_C_BIT
+#define PSR_AA32_Z_BIT PSR_Z_BIT
+#define PSR_AA32_N_BIT PSR_N_BIT

unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);

@@ -41,6 +53,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
*__vcpu_spsr(vcpu) = v;
}

+static inline unsigned long host_spsr_to_spsr32(unsigned long spsr)
+{
+ return spsr;
+}
+
static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
u8 reg_num)
{
@@ -177,6 +194,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_SSE;
}

+static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
{
return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index 7c0eddb0adb2..32fbf82e3ebc 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -14,6 +14,8 @@
struct kvm_decode {
unsigned long rt;
bool sign_extend;
+ /* Not used on 32-bit arm */
+ bool sixty_four;
};

void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
index b408fa56eb89..6922dd8d3e2d 100644
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ b/arch/arm/mach-tegra/sleep-tegra30.S
@@ -370,6 +370,14 @@ _pll_m_c_x_done:
pll_locked r1, r0, CLK_RESET_PLLC_BASE
pll_locked r1, r0, CLK_RESET_PLLX_BASE

+ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
+ cmp r1, #TEGRA30
+ beq 1f
+ ldr r1, [r0, #CLK_RESET_PLLP_BASE]
+ bic r1, r1, #(1<<31) @ disable PllP bypass
+ str r1, [r0, #CLK_RESET_PLLP_BASE]
+1:
+
mov32 r7, TEGRA_TMRUS_BASE
ldr r1, [r7]
add r1, r1, #LOCK_DELAY
@@ -630,7 +638,10 @@ tegra30_switch_cpu_to_clk32k:
str r0, [r4, #PMC_PLLP_WB0_OVERRIDE]

/* disable PLLP, PLLA, PLLC and PLLX */
+ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
+ cmp r1, #TEGRA30
ldr r0, [r5, #CLK_RESET_PLLP_BASE]
+ orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster
bic r0, r0, #(1 << 30)
str r0, [r5, #CLK_RESET_PLLP_BASE]
ldr r0, [r5, #CLK_RESET_PLLA_BASE]
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7d042d5c43e3..27576c7b836e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops);

static int __dma_supported(struct device *dev, u64 mask, bool warn)
{
- unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+ unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit);

/*
* Translate the device's DMA mask to a PFN limit. This
diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi
index 501a7330dbc8..522d3ef72df5 100644
--- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi
@@ -73,6 +73,7 @@
regulator-always-on;
regulator-boot-on;
regulator-name = "vdd_apc";
+ regulator-initial-mode = <1>;
regulator-min-microvolt = <1048000>;
regulator-max-microvolt = <1384000>;
};
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 70b1469783f9..24bc0a3f26e2 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -261,7 +261,7 @@ static int ghash_setkey(struct crypto_shash *tfm,
static struct shash_alg ghash_alg[] = {{
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-neon",
- .base.cra_priority = 100,
+ .base.cra_priority = 150,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct ghash_key),
.base.cra_module = THIS_MODULE,
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 063c964af705..48bfbf70dbb0 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -36,7 +36,7 @@ static inline void local_daif_mask(void)
trace_hardirqs_off();
}

-static inline unsigned long local_daif_save(void)
+static inline unsigned long local_daif_save_flags(void)
{
unsigned long flags;

@@ -48,6 +48,15 @@ static inline unsigned long local_daif_save(void)
flags |= PSR_I_BIT;
}

+ return flags;
+}
+
+static inline unsigned long local_daif_save(void)
+{
+ unsigned long flags;
+
+ flags = local_daif_save_flags();
+
local_daif_mask();

return flags;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d69c1efc63e7..6ff84f1f3b4c 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -204,6 +204,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
}

+/*
+ * The layout of SPSR for an AArch32 state is different when observed from an
+ * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
+ * view given an AArch64 view.
+ *
+ * In ARM DDI 0487E.a see:
+ *
+ * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426
+ * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256
+ * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280
+ *
+ * Which show the following differences:
+ *
+ * | Bit | AA64 | AA32 | Notes |
+ * +-----+------+------+-----------------------------|
+ * | 24 | DIT | J | J is RES0 in ARMv8 |
+ * | 21 | SS | DIT | SS doesn't exist in AArch32 |
+ *
+ * ... and all other bits are (currently) common.
+ */
+static inline unsigned long host_spsr_to_spsr32(unsigned long spsr)
+{
+ const unsigned long overlap = BIT(24) | BIT(21);
+ unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT);
+
+ spsr &= ~overlap;
+
+ spsr |= dit << 21;
+
+ return spsr;
+}
+
static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
{
u32 mode;
@@ -263,6 +295,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
}

+static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF);
+}
+
static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
{
return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index 02b5c48fd467..b204501a0c39 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -10,13 +10,11 @@
#include <linux/kvm_host.h>
#include <asm/kvm_arm.h>

-/*
- * This is annoying. The mmio code requires this, even if we don't
- * need any decoding. To be fixed.
- */
struct kvm_decode {
unsigned long rt;
bool sign_extend;
+ /* Witdth of the register accessed by the faulting instruction is 64-bits */
+ bool sixty_four;
};

void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index fbebb411ae20..bf57308fcd63 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -62,6 +62,7 @@
#define PSR_AA32_I_BIT 0x00000080
#define PSR_AA32_A_BIT 0x00000100
#define PSR_AA32_E_BIT 0x00000200
+#define PSR_AA32_PAN_BIT 0x00400000
#define PSR_AA32_SSBS_BIT 0x00800000
#define PSR_AA32_DIT_BIT 0x01000000
#define PSR_AA32_Q_BIT 0x08000000
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 7ed9294e2004..d1bb5b69f1ce 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -49,6 +49,7 @@
#define PSR_SSBS_BIT 0x00001000
#define PSR_PAN_BIT 0x00400000
#define PSR_UAO_BIT 0x00800000
+#define PSR_DIT_BIT 0x01000000
#define PSR_V_BIT 0x10000000
#define PSR_C_BIT 0x20000000
#define PSR_Z_BIT 0x40000000
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 3a58e9db5cfe..a100483b47c4 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES))
return err;

- current_flags = arch_local_save_flags();
+ current_flags = local_daif_save_flags();

/*
* SEA can interrupt SError, mask it and describe this as an NMI so
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a9d25a305af5..a364a4ad5479 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -14,9 +14,6 @@
#include <asm/kvm_emulate.h>
#include <asm/esr.h>

-#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
- PSR_I_BIT | PSR_D_BIT)
-
#define CURRENT_EL_SP_EL0_VECTOR 0x0
#define CURRENT_EL_SP_ELx_VECTOR 0x200
#define LOWER_EL_AArch64_VECTOR 0x400
@@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}

+/*
+ * When an exception is taken, most PSTATE fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
+ * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
+ * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
+ *
+ * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
+ * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
+ *
+ * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_N_BIT);
+ new |= (old & PSR_Z_BIT);
+ new |= (old & PSR_C_BIT);
+ new |= (old & PSR_V_BIT);
+
+ // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+
+ new |= (old & PSR_DIT_BIT);
+
+ // PSTATE.UAO is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D5-2579.
+
+ // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
+ // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page D5-2578.
+ new |= (old & PSR_PAN_BIT);
+ if (!(sctlr & SCTLR_EL1_SPAN))
+ new |= PSR_PAN_BIT;
+
+ // PSTATE.SS is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D2-2452.
+
+ // PSTATE.IL is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D1-2306.
+
+ // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D13-3258
+ if (sctlr & SCTLR_ELx_DSSBS)
+ new |= PSR_SSBS_BIT;
+
+ // PSTATE.BTYPE is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
+
+ new |= PSR_D_BIT;
+ new |= PSR_A_BIT;
+ new |= PSR_I_BIT;
+ new |= PSR_F_BIT;
+
+ new |= PSR_MODE_EL1h;
+
+ return new;
+}
+
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);

- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);

vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
@@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);

- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);

/*
diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink
index 4eea4188cb20..13e0beb9eee3 100644
--- a/arch/mips/Makefile.postlink
+++ b/arch/mips/Makefile.postlink
@@ -12,7 +12,7 @@ __archpost:
include scripts/Kbuild.include

CMD_RELOCS = arch/mips/boot/tools/relocs
-quiet_cmd_relocs = RELOCS $@
+quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $@

# `@true` prevents complaint when there is nothing to be done
diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile
index 528bd73d530a..4ed45ade32a1 100644
--- a/arch/mips/boot/Makefile
+++ b/arch/mips/boot/Makefile
@@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS
targets += vmlinux.its
targets += vmlinux.gz.its
targets += vmlinux.bz2.its
-targets += vmlinux.lzmo.its
+targets += vmlinux.lzma.its
targets += vmlinux.lzo.its

quiet_cmd_cpp_its_S = ITS $@
diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile
index a3d4bec695c6..6efb2f6889a7 100644
--- a/arch/mips/kernel/syscalls/Makefile
+++ b/arch/mips/kernel/syscalls/Makefile
@@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@
'$(syshdr_pfx_$(basetarget))' \
'$(syshdr_offset_$(basetarget))'

-quiet_cmd_sysnr = SYSNR $@
+quiet_cmd_sysnr = SYSNR $@
cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \
'$(sysnr_abis_$(basetarget))' \
'$(sysnr_pfx_$(basetarget))' \
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3e56c9c2f16e..2b1033f13210 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -221,8 +221,7 @@ config PPC
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
+ select HAVE_RCU_TABLE_FREE
select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
@@ -237,6 +236,7 @@ config PPC
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_SG_DMA_LENGTH
select OF
+ select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
select OF_EARLY_FLATTREE
select OLD_SIGACTION if PPC32
select OLD_SIGSUSPEND
diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
index 1699e9531552..00c4d843a023 100644
--- a/arch/powerpc/boot/4xx.c
+++ b/arch/powerpc/boot/4xx.c
@@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void)
dpath = 8; /* 64 bits */

/* get address pins (rows) */
- val = SDRAM0_READ(DDR0_42);
+ val = SDRAM0_READ(DDR0_42);

row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT);
if (row > max_row)
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index f9dc597b0b86..91c8f1d9bcee 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,11 +102,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
isync(); /* Context sync required after mtsrin() */
}

-static inline void allow_user_access(void __user *to, const void __user *from, u32 size)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ u32 size, unsigned long dir)
{
u32 addr, end;

- if (__builtin_constant_p(to) && to == NULL)
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+ if (!(dir & KUAP_WRITE))
return;

addr = (__force u32)to;
@@ -119,11 +121,16 @@ static inline void allow_user_access(void __user *to, const void __user *from, u
kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
}

-static inline void prevent_user_access(void __user *to, const void __user *from, u32 size)
+static __always_inline void prevent_user_access(void __user *to, const void __user *from,
+ u32 size, unsigned long dir)
{
u32 addr = (__force u32)to;
u32 end = min(addr + size, TASK_SIZE);

+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+ if (!(dir & KUAP_WRITE))
+ return;
+
if (!addr || addr >= TASK_SIZE || !size)
return;

@@ -131,12 +138,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
}

-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
+ unsigned long begin = regs->kuap & 0xf0000000;
+ unsigned long end = regs->kuap << 28;
+
if (!is_write)
return false;

- return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
+ return WARN(address < begin || address >= end,
+ "Bug: write fault blocked by segment registers !");
}

#endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 998317702630..dc5c039eb28e 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size)

#define get_hugepd_cache_index(x) (x)

-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb,
void *table, int shift)
{
@@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table)

pgtable_free(table, shift);
}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif

static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f254de956d6a..c8d1076e0ebb 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -77,25 +77,27 @@ static inline void set_kuap(unsigned long value)
isync();
}

-static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
{
// This is written so we can resolve to a single case at build time
- if (__builtin_constant_p(to) && to == NULL)
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+ if (dir == KUAP_READ)
set_kuap(AMR_KUAP_BLOCK_WRITE);
- else if (__builtin_constant_p(from) && from == NULL)
+ else if (dir == KUAP_WRITE)
set_kuap(AMR_KUAP_BLOCK_READ);
else
set_kuap(0);
}

static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size)
+ unsigned long size, unsigned long dir)
{
set_kuap(AMR_KUAP_BLOCKED);
}

-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index d5a44912902f..cae9e814593a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list;
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
extern void __tlb_remove_table(void *_table);
-#endif
void pte_frag_destroy(void *pte_frag);

static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index eea28ca679db..bc7d9d06a6d9 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;

- allow_write_to_user(uaddr, sizeof(*uaddr));
+ allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
pagefault_disable();

switch (op) {
@@ -62,7 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,

*oval = oldval;

- prevent_write_to_user(uaddr, sizeof(*uaddr));
+ prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
return ret;
}

@@ -76,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;

- allow_write_to_user(uaddr, sizeof(*uaddr));
+ allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
+
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
@@ -97,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");

*uval = prev;
- prevent_write_to_user(uaddr, sizeof(*uaddr));
+ prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
+
return ret;
}

diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 5b5e39643a27..94f24928916a 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -2,6 +2,10 @@
#ifndef _ASM_POWERPC_KUP_H_
#define _ASM_POWERPC_KUP_H_

+#define KUAP_READ 1
+#define KUAP_WRITE 2
+#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE)
+
#ifdef CONFIG_PPC64
#include <asm/book3s/64/kup-radix.h>
#endif
@@ -42,32 +46,48 @@ void setup_kuap(bool disabled);
#else
static inline void setup_kuap(bool disabled) { }
static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size) { }
+ unsigned long size, unsigned long dir) { }
static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size) { }
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; }
+ unsigned long size, unsigned long dir) { }
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return false;
+}
#endif /* CONFIG_PPC_KUAP */

static inline void allow_read_from_user(const void __user *from, unsigned long size)
{
- allow_user_access(NULL, from, size);
+ allow_user_access(NULL, from, size, KUAP_READ);
}

static inline void allow_write_to_user(void __user *to, unsigned long size)
{
- allow_user_access(to, NULL, size);
+ allow_user_access(to, NULL, size, KUAP_WRITE);
+}
+
+static inline void allow_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ allow_user_access(to, from, size, KUAP_READ_WRITE);
}

static inline void prevent_read_from_user(const void __user *from, unsigned long size)
{
- prevent_user_access(NULL, from, size);
+ prevent_user_access(NULL, from, size, KUAP_READ);
}

static inline void prevent_write_to_user(void __user *to, unsigned long size)
{
- prevent_user_access(to, NULL, size);
+ prevent_user_access(to, NULL, size, KUAP_WRITE);
+}
+
+static inline void prevent_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ prevent_user_access(to, from, size, KUAP_READ_WRITE);
}

#endif /* !__ASSEMBLY__ */

-#endif /* _ASM_POWERPC_KUP_H_ */
+#endif /* _ASM_POWERPC_KUAP_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1c3133b5f86a..6fe97465e350 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -34,18 +34,19 @@
#include <asm/reg.h>

static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size)
+ unsigned long size, unsigned long dir)
{
mtspr(SPRN_MD_AP, MD_APG_INIT);
}

static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size)
+ unsigned long size, unsigned long dir)
{
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}

-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
"Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 332b13b4ecdb..29c43665a753 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift)

#define get_hugepd_cache_index(x) (x)

-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
{
unsigned long pgf = (unsigned long)table;
@@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table)
pgtable_free(table, shift);
}

-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index b2c0be93929d..7f3a8b902325 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -26,6 +26,17 @@

#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate() radix_enabled()

/* Get the generic bits... */
#include <asm-generic/tlb.h>
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index c92fe7fe9692..cafad1960e76 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -313,9 +313,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
unsigned long ret;

barrier_nospec();
- allow_user_access(to, from, n);
+ allow_read_write_user(to, from, n);
ret = __copy_tofrom_user(to, from, n);
- prevent_user_access(to, from, n);
+ prevent_read_write_user(to, from, n);
return ret;
}
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d60908ea37fb..59bb4f4ae316 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -179,7 +179,7 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
- kuap_save_and_lock r11, r12, r9, r2, r0
+ kuap_save_and_lock r11, r12, r9, r2, r6
addi r2, r12, -THREAD
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
@@ -284,6 +284,7 @@ reenable_mmu:
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
kuap_restore r11, r2, r3, r4, r5
+ lwz r2, GPR2(r11)
b fast_exception_return
#endif

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 709cf1fd4cf4..36abbe3c346d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2354,7 +2354,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_unlock(&kvm->lock);

if (!vcore)
- goto free_vcpu;
+ goto uninit_vcpu;

spin_lock(&vcore->lock);
++vcore->num_threads;
@@ -2371,6 +2371,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,

return vcpu;

+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc65af8fe6f7..3f6ad3f58628 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1769,10 +1769,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,

err = kvmppc_mmu_init(vcpu);
if (err < 0)
- goto uninit_vcpu;
+ goto free_shared_page;

return vcpu;

+free_shared_page:
+ free_page((unsigned long)vcpu->arch.shared);
uninit_vcpu:
kvm_vcpu_uninit(vcpu);
free_shadow_vcpu:
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 5a3373e06e60..235d57d6c205 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -638,7 +638,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
srcu_idx = srcu_read_lock(&kvm->srcu);
gfn = gpa_to_gfn(kvm_eq.qaddr);

- page_size = kvm_host_page_size(kvm, gfn);
+ page_size = kvm_host_page_size(vcpu, gfn);
if (1ull << kvm_eq.qshift > page_size) {
srcu_read_unlock(&kvm->srcu, srcu_idx);
pr_warn("Incompatible host page size %lx!\n", page_size);
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 75483b40fcb1..2bf7e1b4fd82 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index)
}
}

-#ifdef CONFIG_SMP
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
{
unsigned long pgf = (unsigned long)table;
@@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table)

return pgtable_free(table, index);
}
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
-{
- return pgtable_free(table, index);
-}
-#endif

#ifdef CONFIG_PROC_FS
atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8432c281de92..9298905cfe74 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,

// Read/write fault in a valid region (the exception table search passed
// above), but blocked by KUAP is bad, it can never succeed.
- if (bad_kuap_fault(regs, is_write))
+ if (bad_kuap_fault(regs, address, is_write))
return true;

// What's left? Kernel fault on user in well defined regions (extable
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 2f9ddc29c535..c73205172447 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -173,10 +173,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr)

static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
+ pte_t pte = __pte(st->current_flags);
+
if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
return;

- if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X)))
+ if (!pte_write(pte) || !pte_exec(pte))
return;

WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 8e700390f3d6..4c3af2e9eb8e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -360,8 +360,10 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)

for (i = 0; i < scns_per_block; i++) {
pfn = PFN_DOWN(phys_addr);
- if (!pfn_present(pfn))
+ if (!pfn_present(pfn)) {
+ phys_addr += MIN_MEMORY_BLOCK_SIZE;
continue;
+ }

rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
phys_addr += MIN_MEMORY_BLOCK_SIZE;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d83364ebc5c5..8057aafd5f5e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1894,15 +1894,14 @@ static void dump_300_sprs(void)

printf("pidr = %.16lx tidr = %.16lx\n",
mfspr(SPRN_PID), mfspr(SPRN_TIDR));
- printf("asdr = %.16lx psscr = %.16lx\n",
- mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR)
- : mfspr(SPRN_PSSCR_PR));
+ printf("psscr = %.16lx\n",
+ hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));

if (!hv)
return;

- printf("ptcr = %.16lx\n",
- mfspr(SPRN_PTCR));
+ printf("ptcr = %.16lx asdr = %.16lx\n",
+ mfspr(SPRN_PTCR), mfspr(SPRN_ASDR));
#endif
}

diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c
index 7fbf56aab661..e2279fed8f56 100644
--- a/arch/riscv/net/bpf_jit_comp.c
+++ b/arch/riscv/net/bpf_jit_comp.c
@@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx)
return false;
}

+static void mark_fp(struct rv_jit_context *ctx)
+{
+ __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags);
+}
+
static void mark_call(struct rv_jit_context *ctx)
{
__set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
@@ -596,7 +601,8 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)

emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
/* Set return value. */
- emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
+ if (reg == RV_REG_RA)
+ emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx);
}

@@ -1426,6 +1432,10 @@ static void build_prologue(struct rv_jit_context *ctx)
{
int stack_adjust = 0, store_offset, bpf_stack_adjust;

+ bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
+ if (bpf_stack_adjust)
+ mark_fp(ctx);
+
if (seen_reg(RV_REG_RA, ctx))
stack_adjust += 8;
stack_adjust += 8; /* RV_REG_FP */
@@ -1443,7 +1453,6 @@ static void build_prologue(struct rv_jit_context *ctx)
stack_adjust += 8;

stack_adjust = round_up(stack_adjust, 16);
- bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
stack_adjust += bpf_stack_adjust;

store_offset = stack_adjust - 8;
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..3f5cb55cde35 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -33,6 +33,8 @@
#define ARCH_HAS_PREPARE_HUGEPAGE
#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH

+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
#include <asm/setup.h>
#ifndef __ASSEMBLY__

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d047e846e1b9..756c627f7e54 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2863,9 +2863,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
CR14_UNUSED_33 |
CR14_EXTERNAL_DAMAGE_SUBMASK;
- /* make sure the new fpc will be lazily loaded */
- save_fpu_regs();
- current->thread.fpu.fpc = 0;
+ vcpu->run->s.regs.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
@@ -4354,7 +4352,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
switch (ioctl) {
case KVM_S390_STORE_STATUS:
idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvm_s390_vcpu_store_status(vcpu, arg);
+ r = kvm_s390_store_status_unloaded(vcpu, arg);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
case KVM_S390_SET_INITIAL_PSW: {
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index b0246c705a19..5674710a4841 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -2,7 +2,7 @@
/*
* IBM System z Huge TLB Page Support for Kernel.
*
- * Copyright IBM Corp. 2007,2016
+ * Copyright IBM Corp. 2007,2020
* Author(s): Gerald Schaefer <gerald.schaefer@xxxxxxxxxx>
*/

@@ -11,6 +11,9 @@

#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/security.h>

/*
* If the bit selected by single-bit bitmask "a" is set within "x", move
@@ -267,3 +270,98 @@ static __init int setup_hugepagesz(char *opt)
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
+
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+ unsigned long addr0, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+ unsigned long addr;
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+ goto check_asce_limit;
+ }
+
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
+ }
+
+ if (mm->get_unmapped_area == arch_get_unmapped_area)
+ addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
+ else
+ addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index eb24cb1afc11..18e9fb6fcf1b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -65,7 +65,6 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index a2f3fa61ee36..8cb8f3833239 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -28,6 +28,15 @@ void flush_tlb_pending(void);
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending()

+/*
+ * SPARC64's hardware TLB fill does not use the Linux page-tables
+ * and therefore we don't need a TLBI when freeing page-table pages.
+ */
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_needs_table_invalidate() (false)
+#endif
+
#include <asm-generic/tlb.h>

#endif /* _SPARC64_TLB_H */
diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h
index 9d0d125500e2..084b8949ddff 100644
--- a/arch/sparc/include/uapi/asm/ipcbuf.h
+++ b/arch/sparc/include/uapi/asm/ipcbuf.h
@@ -15,19 +15,19 @@

struct ipc64_perm
{
- __kernel_key_t key;
- __kernel_uid_t uid;
- __kernel_gid_t gid;
- __kernel_uid_t cuid;
- __kernel_gid_t cgid;
+ __kernel_key_t key;
+ __kernel_uid32_t uid;
+ __kernel_gid32_t gid;
+ __kernel_uid32_t cuid;
+ __kernel_gid32_t cgid;
#ifndef __arch64__
- unsigned short __pad0;
+ unsigned short __pad0;
#endif
- __kernel_mode_t mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned long long __unused1;
- unsigned long long __unused2;
+ __kernel_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned long long __unused1;
+ unsigned long long __unused2;
};

#endif /* __SPARC_IPCBUF_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 2ebc17d9c72c..19e94af9cc5d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -140,6 +140,7 @@ extern void apic_soft_disable(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
+extern void apic_intr_mode_select(void);
extern void apic_intr_mode_init(void);
extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
@@ -188,6 +189,7 @@ static inline void disable_local_APIC(void) { }
# define setup_secondary_APIC_clock x86_init_noop
static inline void lapic_update_tsc_freq(void) { }
static inline void init_bsp_APIC(void) { }
+static inline void apic_intr_mode_select(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
@@ -452,6 +454,14 @@ static inline void ack_APIC_irq(void)
apic_eoi();
}

+
+static inline bool lapic_vector_set_in_irr(unsigned int vector)
+{
+ u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+
+ return !!(irr & (1U << (vector % 32)));
+}
+
static inline unsigned default_get_apic_id(unsigned long x)
{
unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4fc61483919a..c1ed054c103c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -380,12 +380,12 @@ struct kvm_mmu {
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
- int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
+ int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
bool prefault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
- struct x86_exception *exception);
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
+ u32 access, struct x86_exception *exception);
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -667,10 +667,10 @@ struct kvm_vcpu_arch {
bool pvclock_set_guest_stopped_request;

struct {
+ u8 preempted;
u64 msr_val;
u64 last_steal;
- struct gfn_to_hva_cache stime;
- struct kvm_steal_time steal;
+ struct gfn_to_pfn_cache cache;
} st;

u64 tsc_offset;
@@ -1128,6 +1128,7 @@ struct kvm_x86_ops {
bool (*xsaves_supported)(void);
bool (*umip_emulated)(void);
bool (*pt_supported)(void);
+ bool (*pku_supported)(void);

int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
@@ -1450,7 +1451,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);

int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);

-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 19435858df5f..96d9cd208610 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -51,12 +51,14 @@ struct x86_init_resources {
* are set up.
* @intr_init: interrupt init code
* @trap_init: platform specific trap setup
+ * @intr_mode_select: interrupt delivery mode selection
* @intr_mode_init: interrupt delivery mode setup
*/
struct x86_init_irqs {
void (*pre_vector_init)(void);
void (*intr_init)(void);
void (*trap_init)(void);
+ void (*intr_mode_select)(void);
void (*intr_mode_init)(void);
};

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2b0faf86da1b..df891f874614 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -830,8 +830,17 @@ bool __init apic_needs_pit(void)
if (!tsc_khz || !cpu_khz)
return true;

- /* Is there an APIC at all? */
- if (!boot_cpu_has(X86_FEATURE_APIC))
+ /* Is there an APIC at all or is it disabled? */
+ if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic)
+ return true;
+
+ /*
+ * If interrupt delivery mode is legacy PIC or virtual wire without
+ * configuration, the local APIC timer wont be set up. Make sure
+ * that the PIT is initialized.
+ */
+ if (apic_intr_mode == APIC_PIC ||
+ apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
return true;

/* Virt guests may lack ARAT, but still have DEADLINE */
@@ -1322,7 +1331,7 @@ void __init sync_Arb_IDs(void)

enum apic_intr_mode_id apic_intr_mode __ro_after_init;

-static int __init apic_intr_mode_select(void)
+static int __init __apic_intr_mode_select(void)
{
/* Check kernel option */
if (disable_apic) {
@@ -1384,6 +1393,12 @@ static int __init apic_intr_mode_select(void)
return APIC_SYMMETRIC_IO;
}

+/* Select the interrupt delivery mode for the BSP */
+void __init apic_intr_mode_select(void)
+{
+ apic_intr_mode = __apic_intr_mode_select();
+}
+
/*
* An initial setup of the virtual wire mode.
*/
@@ -1440,8 +1455,6 @@ void __init apic_intr_mode_init(void)
{
bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);

- apic_intr_mode = apic_intr_mode_select();
-
switch (apic_intr_mode) {
case APIC_PIC:
pr_info("APIC: Keep in PIC mode(8259)\n");
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 7f7533462474..159bd0cb8548 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -23,10 +23,8 @@

static struct irq_domain *msi_default_domain;

-static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
{
- struct irq_cfg *cfg = irqd_cfg(data);
-
msg->address_hi = MSI_ADDR_BASE_HI;

if (x2apic_enabled())
@@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
MSI_DATA_VECTOR(cfg->vector);
}

+static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __irq_msi_compose_msg(irqd_cfg(data), msg);
+}
+
+static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg)
+{
+ struct msi_msg msg[2] = { [1] = { }, };
+
+ __irq_msi_compose_msg(cfg, msg);
+ irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg);
+}
+
+static int
+msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
+{
+ struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd);
+ struct irq_data *parent = irqd->parent_data;
+ unsigned int cpu;
+ int ret;
+
+ /* Save the current configuration */
+ cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));
+ old_cfg = *cfg;
+
+ /* Allocate a new target vector */
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
+
+ /*
+ * For non-maskable and non-remapped MSI interrupts the migration
+ * to a different destination CPU and a different vector has to be
+ * done careful to handle the possible stray interrupt which can be
+ * caused by the non-atomic update of the address/data pair.
+ *
+ * Direct update is possible when:
+ * - The MSI is maskable (remapped MSI does not use this code path)).
+ * The quirk bit is not set in this case.
+ * - The new vector is the same as the old vector
+ * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+ * - The new destination CPU is the same as the old destination CPU
+ */
+ if (!irqd_msi_nomask_quirk(irqd) ||
+ cfg->vector == old_cfg.vector ||
+ old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+ cfg->dest_apicid == old_cfg.dest_apicid) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Paranoia: Validate that the interrupt target is the local
+ * CPU.
+ */
+ if (WARN_ON_ONCE(cpu != smp_processor_id())) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Redirect the interrupt to the new vector on the current CPU
+ * first. This might cause a spurious interrupt on this vector if
+ * the device raises an interrupt right between this update and the
+ * update to the final destination CPU.
+ *
+ * If the vector is in use then the installed device handler will
+ * denote it as spurious which is no harm as this is a rare event
+ * and interrupt handlers have to cope with spurious interrupts
+ * anyway. If the vector is unused, then it is marked so it won't
+ * trigger the 'No irq handler for vector' warning in do_IRQ().
+ *
+ * This requires to hold vector lock to prevent concurrent updates to
+ * the affected vector.
+ */
+ lock_vector_lock();
+
+ /*
+ * Mark the new target vector on the local CPU if it is currently
+ * unused. Reuse the VECTOR_RETRIGGERED state which is also used in
+ * the CPU hotplug path for a similar purpose. This cannot be
+ * undone here as the current CPU has interrupts disabled and
+ * cannot handle the interrupt before the whole set_affinity()
+ * section is done. In the CPU unplug case, the current CPU is
+ * about to vanish and will not handle any interrupts anymore. The
+ * vector is cleaned up when the CPU comes online again.
+ */
+ if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector])))
+ this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED);
+
+ /* Redirect it to the new vector on the local CPU temporarily */
+ old_cfg.vector = cfg->vector;
+ irq_msi_update_msg(irqd, &old_cfg);
+
+ /* Now transition it to the target CPU */
+ irq_msi_update_msg(irqd, cfg);
+
+ /*
+ * All interrupts after this point are now targeted at the new
+ * vector/CPU.
+ *
+ * Drop vector lock before testing whether the temporary assignment
+ * to the local CPU was hit by an interrupt raised in the device,
+ * because the retrigger function acquires vector lock again.
+ */
+ unlock_vector_lock();
+
+ /*
+ * Check whether the transition raced with a device interrupt and
+ * is pending in the local APICs IRR. It is safe to do this outside
+ * of vector lock as the irq_desc::lock of this interrupt is still
+ * held and interrupts are disabled: The check is not accessing the
+ * underlying vector store. It's just checking the local APIC's
+ * IRR.
+ */
+ if (lapic_vector_set_in_irr(cfg->vector))
+ irq_data_get_irq_chip(irqd)->irq_retrigger(irqd);
+
+ return ret;
+}
+
/*
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
* which implement the MSI or MSI-X Capability Structure.
@@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = irq_msi_compose_msg,
+ .irq_set_affinity = msi_set_affinity,
.flags = IRQCHIP_SKIP_SET_WAKE,
};

@@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent)
}
if (!msi_default_domain)
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
+ else
+ msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
}

#ifdef CONFIG_IRQ_REMAP
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index 3e20d322bc98..032509adf9de 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -115,11 +115,12 @@ void __init tsx_init(void)
tsx_disable();

/*
- * tsx_disable() will change the state of the
- * RTM CPUID bit. Clear it here since it is now
- * expected to be not set.
+ * tsx_disable() will change the state of the RTM and HLE CPUID
+ * bits. Clear them here since they are now expected to be not
+ * set.
*/
setup_clear_cpu_cap(X86_FEATURE_RTM);
+ setup_clear_cpu_cap(X86_FEATURE_HLE);
} else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {

/*
@@ -131,10 +132,10 @@ void __init tsx_init(void)
tsx_enable();

/*
- * tsx_enable() will change the state of the
- * RTM CPUID bit. Force it here since it is now
- * expected to be set.
+ * tsx_enable() will change the state of the RTM and HLE CPUID
+ * bits. Force them here since they are now expected to be set.
*/
setup_force_cpu_cap(X86_FEATURE_RTM);
+ setup_force_cpu_cap(X86_FEATURE_HLE);
}
}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 7ce29cee9f9e..d8673d8a779b 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -91,10 +91,18 @@ void __init hpet_time_init(void)

static __init void x86_late_time_init(void)
{
+ /*
+ * Before PIT/HPET init, select the interrupt mode. This is required
+ * to make the decision whether PIT should be initialized correct.
+ */
+ x86_init.irqs.intr_mode_select();
+
+ /* Setup the legacy timers */
x86_init.timers.timer_init();
+
/*
- * After PIT/HPET timers init, select and setup
- * the final interrupt mode for delivering IRQs.
+ * After PIT/HPET timers init, set up the final interrupt mode for
+ * delivering IRQs.
*/
x86_init.irqs.intr_mode_init();
tsc_init();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 18a799c8fa28..1838b10a299c 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -58,6 +58,7 @@ struct x86_init_ops x86_init __initdata = {
.pre_vector_init = init_ISA_irqs,
.intr_init = native_init_IRQ,
.trap_init = x86_init_noop,
+ .intr_mode_select = apic_intr_mode_select,
.intr_mode_init = apic_intr_mode_init
},

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b1d5a8c94a57..6fa946f983c9 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -352,6 +352,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
unsigned f_la57;
+ unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0;

/* cpuid 7.0.ebx */
const u32 kvm_cpuid_7_0_ebx_x86_features =
@@ -363,7 +364,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)

/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
+ F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
@@ -392,6 +393,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
/* Set LA57 based on hardware capability. */
entry->ecx |= f_la57;
entry->ecx |= f_umip;
+ entry->ecx |= f_pku;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 698efb8c3897..37aa9ce29b33 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,6 +22,7 @@
#include "kvm_cache_regs.h"
#include <asm/kvm_emulate.h>
#include <linux/stringify.h>
+#include <asm/fpu/api.h>
#include <asm/debugreg.h>
#include <asm/nospec-branch.h>

@@ -1075,8 +1076,23 @@ static void fetch_register_operand(struct operand *op)
}
}

+static void emulator_get_fpu(void)
+{
+ fpregs_lock();
+
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+}
+
+static void emulator_put_fpu(void)
+{
+ fpregs_unlock();
+}
+
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1098,11 +1114,13 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
#endif
default: BUG();
}
+ emulator_put_fpu();
}

static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
int reg)
{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1124,10 +1142,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
#endif
default: BUG();
}
+ emulator_put_fpu();
}

static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1139,10 +1159,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
default: BUG();
}
+ emulator_put_fpu();
}

static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
+ emulator_get_fpu();
switch (reg) {
case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1154,6 +1176,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
default: BUG();
}
+ emulator_put_fpu();
}

static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1161,7 +1184,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);

+ emulator_get_fpu();
asm volatile("fninit");
+ emulator_put_fpu();
return X86EMUL_CONTINUE;
}

@@ -1172,7 +1197,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);

+ emulator_get_fpu();
asm volatile("fnstcw %0": "+m"(fcw));
+ emulator_put_fpu();

ctxt->dst.val = fcw;

@@ -1186,7 +1213,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);

+ emulator_get_fpu();
asm volatile("fnstsw %0": "+m"(fsw));
+ emulator_put_fpu();

ctxt->dst.val = fsw;

@@ -4094,8 +4123,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;

+ emulator_get_fpu();
+
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));

+ emulator_put_fpu();
+
if (rc != X86EMUL_CONTINUE)
return rc;

@@ -4138,6 +4171,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;

+ emulator_get_fpu();
+
if (size < __fxstate_size(16)) {
rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
@@ -4153,6 +4188,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));

out:
+ emulator_put_fpu();
+
return rc;
}

@@ -5212,16 +5249,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
ctxt->ad_bytes = def_ad_bytes ^ 6;
break;
case 0x26: /* ES override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_ES;
+ break;
case 0x2e: /* CS override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_CS;
+ break;
case 0x36: /* SS override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_SS;
+ break;
case 0x3e: /* DS override */
has_seg_override = true;
- ctxt->seg_override = (ctxt->b >> 3) & 3;
+ ctxt->seg_override = VCPU_SREG_DS;
break;
case 0x64: /* FS override */
+ has_seg_override = true;
+ ctxt->seg_override = VCPU_SREG_FS;
+ break;
case 0x65: /* GS override */
has_seg_override = true;
- ctxt->seg_override = ctxt->b & 7;
+ ctxt->seg_override = VCPU_SREG_GS;
break;
case 0x40 ... 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
@@ -5305,10 +5354,15 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
}
break;
case Escape:
- if (ctxt->modrm > 0xbf)
- opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
- else
+ if (ctxt->modrm > 0xbf) {
+ size_t size = ARRAY_SIZE(opcode.u.esc->high);
+ u32 index = array_index_nospec(
+ ctxt->modrm - 0xc0, size);
+
+ opcode = opcode.u.esc->high[index];
+ } else {
opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
+ }
break;
case InstrDual:
if ((ctxt->modrm >> 6) == 3)
@@ -5450,7 +5504,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
int rc;

+ emulator_get_fpu();
rc = asm_safe("fwait");
+ emulator_put_fpu();

if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 23ff65504d7e..26408434b9bc 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -809,11 +809,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
u32 index, u64 *pdata)
{
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+ size_t size = ARRAY_SIZE(hv->hv_crash_param);

- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+ if (WARN_ON_ONCE(index >= size))
return -EINVAL;

- *pdata = hv->hv_crash_param[index];
+ *pdata = hv->hv_crash_param[array_index_nospec(index, size)];
return 0;
}

@@ -852,11 +853,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
u32 index, u64 data)
{
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+ size_t size = ARRAY_SIZE(hv->hv_crash_param);

- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+ if (WARN_ON_ONCE(index >= size))
return -EINVAL;

- hv->hv_crash_param[index] = data;
+ hv->hv_crash_param[array_index_nospec(index, size)] = data;
return 0;
}

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 8b38bb4868a6..629a09ca9860 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s,
switch (addr) {
case 0x20:
case 0x21:
+ pic_lock(s);
+ pic_ioport_write(&s->pics[0], addr, data);
+ pic_unlock(s);
+ break;
case 0xa0:
case 0xa1:
pic_lock(s);
- pic_ioport_write(&s->pics[addr >> 7], addr, data);
+ pic_ioport_write(&s->pics[1], addr, data);
pic_unlock(s);
break;
case 0x4d0:
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index d859ae8890d0..24a6905d60ee 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -36,6 +36,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/nospec.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
default:
{
u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
- u64 redir_content;
+ u64 redir_content = ~0ULL;

- if (redir_index < IOAPIC_NUM_PINS)
- redir_content =
- ioapic->redirtbl[redir_index].bits;
- else
- redir_content = ~0ULL;
+ if (redir_index < IOAPIC_NUM_PINS) {
+ u32 index = array_index_nospec(
+ redir_index, IOAPIC_NUM_PINS);
+
+ redir_content = ioapic->redirtbl[index].bits;
+ }

result = (ioapic->ioregsel & 0x1) ?
(redir_content >> 32) & 0xffffffff :
@@ -291,6 +293,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)

if (index >= IOAPIC_NUM_PINS)
return;
+ index = array_index_nospec(index, IOAPIC_NUM_PINS);
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
/* Preserve read-only fields */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b29d00b661ff..15728971a430 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1926,15 +1926,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:
- case APIC_LVTERR:
+ case APIC_LVTERR: {
/* TODO: Check vector */
+ size_t size;
+ u32 index;
+
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
-
- val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
+ size = ARRAY_SIZE(apic_lvt_mask);
+ index = array_index_nospec(
+ (reg - APIC_LVTT) >> 4, size);
+ val &= apic_lvt_mask[index];
kvm_lapic_set_reg(apic, reg, val);
-
break;
+ }

case APIC_LVTT:
if (!kvm_apic_sw_enabled(apic))
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2ce9da58611e..518100ea5ef4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte)
* requires a full MMU zap). The flag is instead explicitly queried when
* checking for MMIO spte cache hits.
*/
-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0)
+#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)

#define MMIO_SPTE_GEN_LOW_START 3
#define MMIO_SPTE_GEN_LOW_END 11
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
MMIO_SPTE_GEN_LOW_START)

-#define MMIO_SPTE_GEN_HIGH_START 52
-#define MMIO_SPTE_GEN_HIGH_END 61
+#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
+#define MMIO_SPTE_GEN_HIGH_END 62
#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
MMIO_SPTE_GEN_HIGH_START)
+
static u64 generation_mmio_spte_mask(u64 gen)
{
u64 mask;

WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+ BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);

mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
@@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte)
{
u64 gen;

- spte &= ~shadow_mmio_mask;
-
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
return gen;
@@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static u8 kvm_get_shadow_phys_bits(void)
{
/*
- * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
- * in CPU detection code, but MKTME treats those reduced bits as
- * 'keyID' thus they are not reserved bits. Therefore for MKTME
- * we should still return physical address bits reported by CPUID.
+ * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
+ * in CPU detection code, but the processor treats those reduced bits as
+ * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
+ * the physical address bits reported by CPUID.
*/
- if (!boot_cpu_has(X86_FEATURE_TME) ||
- WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
- return boot_cpu_data.x86_phys_bits;
+ if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
+ return cpuid_eax(0x80000008) & 0xff;

- return cpuid_eax(0x80000008) & 0xff;
+ /*
+ * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
+ * custom CPUID. Proceed with whatever the kernel found since these features
+ * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
+ */
+ return boot_cpu_data.x86_phys_bits;
}

static void kvm_mmu_reset_all_pte_masks(void)
@@ -1282,12 +1286,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
}

-static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
+static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn)
{
unsigned long page_size;
int i, ret = 0;

- page_size = kvm_host_page_size(kvm, gfn);
+ page_size = kvm_host_page_size(vcpu, gfn);

for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
if (page_size >= KVM_HPAGE_SIZE(i))
@@ -1337,7 +1341,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
if (unlikely(*force_pt_level))
return PT_PAGE_TABLE_LEVEL;

- host_level = host_mapping_level(vcpu->kvm, large_gfn);
+ host_level = host_mapping_level(vcpu, large_gfn);

if (host_level == PT_PAGE_TABLE_LEVEL)
return host_level;
@@ -3528,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
* - true: let the vcpu to access on the same address again.
* - false: let the real page fault path to fix it.
*/
-static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
+static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level,
u32 error_code)
{
struct kvm_shadow_walk_iterator iterator;
@@ -3548,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
do {
u64 new_spte;

- for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
+ for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
if (!is_shadow_present_pte(spte) ||
iterator.level < level)
break;
@@ -3626,7 +3630,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,

} while (true);

- trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
+ trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
spte, fault_handled);
walk_shadow_page_lockless_end(vcpu);

@@ -3634,10 +3638,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
}

static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
+ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
+ bool *writable);
static int make_mmu_pages_available(struct kvm_vcpu *vcpu);

-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
+static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
gfn_t gfn, bool prefault)
{
int r;
@@ -3663,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
}

- if (fast_page_fault(vcpu, v, level, error_code))
+ if (fast_page_fault(vcpu, gpa, level, error_code))
return RET_PF_RETRY;

mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();

- if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
+ if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
return RET_PF_RETRY;

- if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+ if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r))
return r;

r = RET_PF_RETRY;
@@ -3683,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, v, write, map_writable, level, pfn,
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
prefault, false);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3981,7 +3986,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);

-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access, struct x86_exception *exception)
{
if (exception)
@@ -3989,7 +3994,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
return vaddr;
}

-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
+static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access,
struct x86_exception *exception)
{
@@ -4149,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}

-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa,
u32 error_code, bool prefault)
{
- gfn_t gfn = gva >> PAGE_SHIFT;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
int r;

- pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
+ /* Note, paging is disabled, ergo gva == gpa. */
+ pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code);

if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
@@ -4167,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));


- return nonpaging_map(vcpu, gva & PAGE_MASK,
+ return nonpaging_map(vcpu, gpa & PAGE_MASK,
error_code, gfn, prefault);
}

-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ gfn_t gfn)
{
struct kvm_arch_async_pf arch;

@@ -4180,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);

- return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+ return kvm_setup_async_pf(vcpu, cr2_or_gpa,
+ kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}

static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
+ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
+ bool *writable)
{
struct kvm_memory_slot *slot;
bool async;
@@ -4204,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false; /* *pfn has correct page already */

if (!prefault && kvm_can_do_async_pf(vcpu)) {
- trace_kvm_try_async_get_page(gva, gfn);
+ trace_kvm_try_async_get_page(cr2_or_gpa, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
- trace_kvm_async_pf_doublefault(gva, gfn);
+ trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return true;
- } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
+ } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn))
return true;
}

@@ -4222,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
{
int r = 1;

+#ifndef CONFIG_X86_64
+ /* A 64-bit CR2 should be impossible on 32-bit KVM. */
+ if (WARN_ON_ONCE(fault_address >> 32))
+ return -EFAULT;
+#endif
+
vcpu->arch.l1tf_flush_l1d = true;
switch (vcpu->arch.apf.host_apf_reason) {
default:
@@ -4259,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
}

-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
bool prefault)
{
kvm_pfn_t pfn;
@@ -5516,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
return 0;
}

-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len)
{
int r, emulation_type = 0;
@@ -5525,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
/* With shadow page tables, fault_address contains a GVA or nGPA. */
if (vcpu->arch.mmu->direct_map) {
vcpu->arch.gpa_available = true;
- vcpu->arch.gpa_val = cr2;
+ vcpu->arch.gpa_val = cr2_or_gpa;
}

r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, cr2, direct);
+ r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
if (r == RET_PF_EMULATE)
goto emulate;
}

if (r == RET_PF_INVALID) {
- r = vcpu->arch.mmu->page_fault(vcpu, cr2,
+ r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
lower_32_bits(error_code),
false);
WARN_ON(r == RET_PF_INVALID);
@@ -5556,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
*/
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
return 1;
}

@@ -5571,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
* explicitly shadowing L1's page tables, i.e. unprotecting something
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
*/
- if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+ if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
@@ -5586,7 +5601,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
}

- return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
+ return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len);
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
@@ -6249,7 +6264,7 @@ static void kvm_set_mmio_spte_mask(void)
* If reserved bit is not supported, clear the present bit to disable
* mmio page fault.
*/
- if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
+ if (shadow_phys_bits == 52)
mask &= ~1ull;

kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 7ca8831c7d1a..3c6522b84ff1 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -249,13 +249,13 @@ TRACE_EVENT(

TRACE_EVENT(
fast_page_fault,
- TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
+ TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
u64 *sptep, u64 old_spte, bool retry),
- TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry),
+ TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry),

TP_STRUCT__entry(
__field(int, vcpu_id)
- __field(gva_t, gva)
+ __field(gpa_t, cr2_or_gpa)
__field(u32, error_code)
__field(u64 *, sptep)
__field(u64, old_spte)
@@ -265,7 +265,7 @@ TRACE_EVENT(

TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
- __entry->gva = gva;
+ __entry->cr2_or_gpa = cr2_or_gpa;
__entry->error_code = error_code;
__entry->sptep = sptep;
__entry->old_spte = old_spte;
@@ -273,9 +273,9 @@ TRACE_EVENT(
__entry->retry = retry;
),

- TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx"
+ TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
" new %llx spurious %d fixed %d", __entry->vcpu_id,
- __entry->gva, __print_flags(__entry->error_code, "|",
+ __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
kvm_mmu_trace_pferr_flags), __entry->sptep,
__entry->old_spte, __entry->new_spte,
__spte_satisfied(old_spte), __spte_satisfied(new_spte)
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 25ce3edd1872..7f0059aa30e1 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
break;
case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
*seg = 1;
- *unit = msr - MSR_MTRRfix16K_80000;
+ *unit = array_index_nospec(
+ msr - MSR_MTRRfix16K_80000,
+ MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1);
break;
case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
*seg = 2;
- *unit = msr - MSR_MTRRfix4K_C0000;
+ *unit = array_index_nospec(
+ msr - MSR_MTRRfix4K_C0000,
+ MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1);
break;
default:
return false;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 97b21e7fd013..c1d7b866a03f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
}

/*
- * Fetch a guest pte for a guest virtual address
+ * Fetch a guest pte for a guest virtual address, or for an L2's GPA.
*/
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- gva_t addr, u32 access)
+ gpa_t addr, u32 access)
{
int ret;
pt_element_t pte;
@@ -496,7 +496,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
}

static int FNAME(walk_addr)(struct guest_walker *walker,
- struct kvm_vcpu *vcpu, gva_t addr, u32 access)
+ struct kvm_vcpu *vcpu, gpa_t addr, u32 access)
{
return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
access);
@@ -611,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
* If the guest tries to write a write-protected page, we need to
* emulate this operation, return 1 to indicate this case.
*/
-static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
struct guest_walker *gw,
int write_fault, int hlevel,
kvm_pfn_t pfn, bool map_writable, bool prefault,
@@ -765,7 +765,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
bool prefault)
{
int write_fault = error_code & PFERR_WRITE_MASK;
@@ -945,18 +945,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
spin_unlock(&vcpu->kvm->mmu_lock);
}

-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access,
struct x86_exception *exception)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;

- r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
+ r = FNAME(walk_addr)(&walker, vcpu, addr, access);

if (r) {
gpa = gfn_to_gpa(walker.gfn);
- gpa |= vaddr & ~PAGE_MASK;
+ gpa |= addr & ~PAGE_MASK;
} else if (exception)
*exception = walker.fault;

@@ -964,7 +965,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
}

#if PTTYPE != PTTYPE_EPT
-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
+/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */
+static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access,
struct x86_exception *exception)
{
@@ -972,6 +974,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
gpa_t gpa = UNMAPPED_GVA;
int r;

+#ifndef CONFIG_X86_64
+ /* A 64-bit GVA should be impossible on 32-bit KVM. */
+ WARN_ON_ONCE(vaddr >> 32);
+#endif
+
r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);

if (r) {
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 58265f761c3b..3fc98afd72a8 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -2,6 +2,8 @@
#ifndef __KVM_X86_PMU_H
#define __KVM_X86_PMU_H

+#include <linux/nospec.h>
+
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
#define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu))
#define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu)
@@ -86,8 +88,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
u32 base)
{
- if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
- return &pmu->gp_counters[msr - base];
+ if (msr >= base && msr < base + pmu->nr_arch_gp_counters) {
+ u32 index = array_index_nospec(msr - base,
+ pmu->nr_arch_gp_counters);
+
+ return &pmu->gp_counters[index];
+ }

return NULL;
}
@@ -97,8 +103,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
{
int base = MSR_CORE_PERF_FIXED_CTR0;

- if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
- return &pmu->fixed_counters[msr - base];
+ if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) {
+ u32 index = array_index_nospec(msr - base,
+ pmu->nr_arch_fixed_counters);
+
+ return &pmu->fixed_counters[index];
+ }

return NULL;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c5673bda4b66..8d1be7c61f10 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5986,6 +5986,11 @@ static bool svm_has_wbinvd_exit(void)
return true;
}

+static bool svm_pku_supported(void)
+{
+ return false;
+}
+
#define PRE_EX(exit) { .exit_code = (exit), \
.stage = X86_ICPT_PRE_EXCEPT, }
#define POST_EX(exit) { .exit_code = (exit), \
@@ -7278,6 +7283,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.xsaves_supported = svm_xsaves_supported,
.umip_emulated = svm_umip_emulated,
.pt_supported = svm_pt_supported,
+ .pku_supported = svm_pku_supported,

.set_supported_cpuid = svm_set_supported_cpuid,

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 7aa69716d516..283bdb7071af 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -145,6 +145,11 @@ static inline bool vmx_umip_emulated(void)
SECONDARY_EXEC_DESC;
}

+static inline bool vmx_pku_supported(void)
+{
+ return boot_cpu_has(X86_FEATURE_PKU);
+}
+
static inline bool cpu_has_vmx_rdtscp(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d0523741fb03..931d3b5f3acd 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4663,8 +4663,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
vmx_instruction_info, true, len, &gva))
return 1;
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
- if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
+ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) {
kvm_inject_page_fault(vcpu, &e);
+ return 1;
+ }
}

return nested_vmx_succeed(vcpu);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 3e9c059099e9..f8998a7bc7d5 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -84,10 +84,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu,

static unsigned intel_find_fixed_event(int idx)
{
- if (idx >= ARRAY_SIZE(fixed_pmc_events))
+ u32 event;
+ size_t size = ARRAY_SIZE(fixed_pmc_events);
+
+ if (idx >= size)
return PERF_COUNT_HW_MAX;

- return intel_arch_events[fixed_pmc_events[idx]].event_type;
+ event = fixed_pmc_events[array_index_nospec(idx, size)];
+ return intel_arch_events[event].event_type;
}

/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
@@ -128,16 +132,20 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
struct kvm_pmc *counters;
+ unsigned int num_counters;

idx &= ~(3u << 30);
- if (!fixed && idx >= pmu->nr_arch_gp_counters)
- return NULL;
- if (fixed && idx >= pmu->nr_arch_fixed_counters)
+ if (fixed) {
+ counters = pmu->fixed_counters;
+ num_counters = pmu->nr_arch_fixed_counters;
+ } else {
+ counters = pmu->gp_counters;
+ num_counters = pmu->nr_arch_gp_counters;
+ }
+ if (idx >= num_counters)
return NULL;
- counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
*mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
-
- return &counters[idx];
+ return &counters[array_index_nospec(idx, num_counters)];
}

static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f09a213fd5cb..dc7c166c4335 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2140,6 +2140,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
(index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_num_address_ranges)))
return 1;
+ if (is_noncanonical_address(data, vcpu))
+ return 1;
if (index % 2)
vmx->pt_desc.guest.addr_b[index / 2] = data;
else
@@ -7865,6 +7867,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.xsaves_supported = vmx_xsaves_supported,
.umip_emulated = vmx_umip_emulated,
.pt_supported = vmx_pt_supported,
+ .pku_supported = vmx_pku_supported,

.request_immediate_exit = vmx_request_immediate_exit,

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8d82ec0482fc..edde5ee8c6f5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -92,6 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif

+static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
+
#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__

@@ -886,9 +888,38 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
}
EXPORT_SYMBOL_GPL(kvm_set_xcr);

+static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
+{
+ u64 reserved_bits = CR4_RESERVED_BITS;
+
+ if (!cpu_has(c, X86_FEATURE_XSAVE))
+ reserved_bits |= X86_CR4_OSXSAVE;
+
+ if (!cpu_has(c, X86_FEATURE_SMEP))
+ reserved_bits |= X86_CR4_SMEP;
+
+ if (!cpu_has(c, X86_FEATURE_SMAP))
+ reserved_bits |= X86_CR4_SMAP;
+
+ if (!cpu_has(c, X86_FEATURE_FSGSBASE))
+ reserved_bits |= X86_CR4_FSGSBASE;
+
+ if (!cpu_has(c, X86_FEATURE_PKU))
+ reserved_bits |= X86_CR4_PKE;
+
+ if (!cpu_has(c, X86_FEATURE_LA57) &&
+ !(cpuid_ecx(0x7) & bit(X86_FEATURE_LA57)))
+ reserved_bits |= X86_CR4_LA57;
+
+ if (!cpu_has(c, X86_FEATURE_UMIP) && !kvm_x86_ops->umip_emulated())
+ reserved_bits |= X86_CR4_UMIP;
+
+ return reserved_bits;
+}
+
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
- if (cr4 & CR4_RESERVED_BITS)
+ if (cr4 & cr4_reserved_bits)
return -EINVAL;

if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
@@ -1054,9 +1085,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)

static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
{
+ size_t size = ARRAY_SIZE(vcpu->arch.db);
+
switch (dr) {
case 0 ... 3:
- vcpu->arch.db[dr] = val;
+ vcpu->arch.db[array_index_nospec(dr, size)] = val;
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
vcpu->arch.eff_db[dr] = val;
break;
@@ -1093,9 +1126,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr);

int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
{
+ size_t size = ARRAY_SIZE(vcpu->arch.db);
+
switch (dr) {
case 0 ... 3:
- *val = vcpu->arch.db[dr];
+ *val = vcpu->arch.db[array_index_nospec(dr, size)];
break;
case 4:
/* fall through */
@@ -2490,7 +2525,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default:
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = msr - MSR_IA32_MC0_CTL;
+ u32 offset = array_index_nospec(
+ msr - MSR_IA32_MC0_CTL,
+ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
+
/* only 0 or all 1s can be written to IA32_MCi_CTL
* some Linux kernels though clear bit 10 in bank 4 to
* workaround a BIOS/GART TBL issue on AMD K8s, ignore
@@ -2586,45 +2624,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)

static void record_steal_time(struct kvm_vcpu *vcpu)
{
+ struct kvm_host_map map;
+ struct kvm_steal_time *st;
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;

- if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
+ /* -EAGAIN is returned in atomic context so we can just return. */
+ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+ &map, &vcpu->arch.st.cache, false))
return;

+ st = map.hva +
+ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+
/*
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
- vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB);
- if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ st->preempted & KVM_VCPU_FLUSH_TLB);
+ if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb(vcpu, false);

- if (vcpu->arch.st.steal.version & 1)
- vcpu->arch.st.steal.version += 1; /* first time write, random junk */
+ vcpu->arch.st.preempted = 0;

- vcpu->arch.st.steal.version += 1;
+ if (st->version & 1)
+ st->version += 1; /* first time write, random junk */

- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+ st->version += 1;

smp_wmb();

- vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+ st->steal += current->sched_info.run_delay -
vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;

- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
-
smp_wmb();

- vcpu->arch.st.steal.version += 1;
+ st->version += 1;

- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
}

int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -2777,11 +2817,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & KVM_STEAL_RESERVED_MASK)
return 1;

- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
- data & KVM_STEAL_VALID_BITS,
- sizeof(struct kvm_steal_time)))
- return 1;
-
vcpu->arch.st.msr_val = data;

if (!(data & KVM_MSR_ENABLED))
@@ -2917,7 +2952,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
default:
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = msr - MSR_IA32_MC0_CTL;
+ u32 offset = array_index_nospec(
+ msr - MSR_IA32_MC0_CTL,
+ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
+
data = vcpu->arch.mce_banks[offset];
break;
}
@@ -3443,10 +3481,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)

kvm_x86_ops->vcpu_load(vcpu, cpu);

- fpregs_assert_state_consistent();
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_return();
-
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -3486,15 +3520,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)

static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
+ struct kvm_host_map map;
+ struct kvm_steal_time *st;
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;

- vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
+ if (vcpu->arch.st.preempted)
+ return;
+
+ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
+ &vcpu->arch.st.cache, true))
+ return;

- kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal.preempted,
- offsetof(struct kvm_steal_time, preempted),
- sizeof(vcpu->arch.st.steal.preempted));
+ st = map.hva +
+ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+
+ st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+
+ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
}

void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -6365,11 +6409,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
return 1;
}

-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
+static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
bool write_fault_to_shadow_pgtable,
int emulation_type)
{
- gpa_t gpa = cr2;
+ gpa_t gpa = cr2_or_gpa;
kvm_pfn_t pfn;

if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
@@ -6383,7 +6427,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
* Write permission should be allowed since only
* write access need to be emulated.
*/
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);

/*
* If the mapping is invalid in guest, let cpu retry
@@ -6440,10 +6484,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
}

static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
- unsigned long cr2, int emulation_type)
+ gpa_t cr2_or_gpa, int emulation_type)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
+ unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;

last_retry_eip = vcpu->arch.last_retry_eip;
last_retry_addr = vcpu->arch.last_retry_addr;
@@ -6472,14 +6516,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
if (x86_page_table_writing_insn(ctxt))
return false;

- if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
+ if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
return false;

vcpu->arch.last_retry_eip = ctxt->eip;
- vcpu->arch.last_retry_addr = cr2;
+ vcpu->arch.last_retry_addr = cr2_or_gpa;

if (!vcpu->arch.mmu->direct_map)
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);

kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));

@@ -6625,11 +6669,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
return false;
}

-int x86_emulate_instruction(struct kvm_vcpu *vcpu,
- unsigned long cr2,
- int emulation_type,
- void *insn,
- int insn_len)
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ int emulation_type, void *insn, int insn_len)
{
int r;
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
@@ -6675,8 +6716,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
- emulation_type))
+ if (reexecute_instruction(vcpu, cr2_or_gpa,
+ write_fault_to_spt,
+ emulation_type))
return 1;
if (ctxt->have_exception) {
/*
@@ -6710,7 +6752,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return 1;
}

- if (retry_instruction(ctxt, cr2, emulation_type))
+ if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
return 1;

/* this is needed for vmware backdoor interface to work since it
@@ -6722,7 +6764,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,

restart:
/* Save the faulting GPA (cr2) in the address field */
- ctxt->exception.address = cr2;
+ ctxt->exception.address = cr2_or_gpa;

r = x86_emulate_insn(ctxt);

@@ -6730,7 +6772,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return 1;

if (r == EMULATION_FAILED) {
- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+ if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
emulation_type))
return 1;

@@ -8172,8 +8214,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
trace_kvm_entry(vcpu->vcpu_id);
guest_enter_irqoff();

- /* The preempt notifier should have taken care of the FPU already. */
- WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();

if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
@@ -8445,12 +8488,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}

+static void kvm_save_current_fpu(struct fpu *fpu)
+{
+ /*
+ * If the target FPU state is not resident in the CPU registers, just
+ * memcpy() from current, else save CPU state directly to the target.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ memcpy(&fpu->state, &current->thread.fpu.state,
+ fpu_kernel_xstate_size);
+ else
+ copy_fpregs_to_fpstate(fpu);
+}
+
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();

- copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
+ kvm_save_current_fpu(vcpu->arch.user_fpu);
+
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
@@ -8466,7 +8523,8 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();

- copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
+ kvm_save_current_fpu(vcpu->arch.guest_fpu);
+
copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);

fpregs_mark_activate();
@@ -8688,6 +8746,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
vcpu_load(vcpu);
+ if (kvm_mpx_supported())
+ kvm_load_guest_fpu(vcpu);

kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
@@ -8696,6 +8756,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
else
mp_state->mp_state = vcpu->arch.mp_state;

+ if (kvm_mpx_supported())
+ kvm_put_guest_fpu(vcpu);
vcpu_put(vcpu);
return 0;
}
@@ -9055,6 +9117,9 @@ static void fx_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
+ struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
+
+ kvm_release_pfn(cache->pfn, cache->dirty, cache);

kvmclock_reset(vcpu);

@@ -9125,7 +9190,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);

- kvm_x86_ops->vcpu_free(vcpu);
+ kvm_arch_vcpu_free(vcpu);
}

void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -9317,6 +9382,8 @@ int kvm_arch_hardware_setup(void)
if (r != 0)
return r;

+ cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+
if (kvm_has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
@@ -9719,11 +9786,18 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,

void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
{
+ struct kvm_vcpu *vcpu;
+ int i;
+
/*
* memslots->generation has been incremented.
* mmio generation may have reached its maximum value.
*/
kvm_mmu_invalidate_mmio_sptes(kvm, gen);
+
+ /* Force re-initialization of steal_time cache */
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vcpu_kick(vcpu);
}

int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -9975,7 +10049,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
return;

- vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
+ vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
}

static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
@@ -10088,7 +10162,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
{
struct x86_exception fault;

- trace_kvm_async_pf_not_present(work->arch.token, work->gva);
+ trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);

if (kvm_can_deliver_async_pf(vcpu) &&
@@ -10123,7 +10197,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
- trace_kvm_async_pf_ready(work->arch.token, work->gva);
+ trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);

if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
!apf_get_user(vcpu, &val)) {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index dbf7442a822b..de6b55484876 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -286,7 +286,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);

#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5bfea374a160..6ea215cdeada 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1215,6 +1215,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_platform.get_nmi_reason = xen_get_nmi_reason;

x86_init.resources.memory_setup = xen_memory_setup;
+ x86_init.irqs.intr_mode_select = x86_init_noop;
x86_init.irqs.intr_mode_init = x86_init_noop;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
diff --git a/crypto/algapi.c b/crypto/algapi.c
index de30ddc952d8..bb8329e49956 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -257,6 +257,7 @@ void crypto_alg_tested(const char *name, int err)
struct crypto_alg *alg;
struct crypto_alg *q;
LIST_HEAD(list);
+ bool best;

down_write(&crypto_alg_sem);
list_for_each_entry(q, &crypto_alg_list, cra_list) {
@@ -280,6 +281,21 @@ void crypto_alg_tested(const char *name, int err)

alg->cra_flags |= CRYPTO_ALG_TESTED;

+ /* Only satisfy larval waiters if we are the best. */
+ best = true;
+ list_for_each_entry(q, &crypto_alg_list, cra_list) {
+ if (crypto_is_moribund(q) || !crypto_is_larval(q))
+ continue;
+
+ if (strcmp(alg->cra_name, q->cra_name))
+ continue;
+
+ if (q->cra_priority > alg->cra_priority) {
+ best = false;
+ break;
+ }
+ }
+
list_for_each_entry(q, &crypto_alg_list, cra_list) {
if (q == alg)
continue;
@@ -303,10 +319,12 @@ void crypto_alg_tested(const char *name, int err)
continue;
if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
continue;
- if (!crypto_mod_get(alg))
- continue;

- larval->adult = alg;
+ if (best && crypto_mod_get(alg))
+ larval->adult = alg;
+ else
+ larval->adult = ERR_PTR(-EAGAIN);
+
continue;
}

@@ -669,11 +687,9 @@ EXPORT_SYMBOL_GPL(crypto_grab_spawn);

void crypto_drop_spawn(struct crypto_spawn *spawn)
{
- if (!spawn->alg)
- return;
-
down_write(&crypto_alg_sem);
- list_del(&spawn->list);
+ if (spawn->alg)
+ list_del(&spawn->list);
up_write(&crypto_alg_sem);
}
EXPORT_SYMBOL_GPL(crypto_drop_spawn);
@@ -681,22 +697,16 @@ EXPORT_SYMBOL_GPL(crypto_drop_spawn);
static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn)
{
struct crypto_alg *alg;
- struct crypto_alg *alg2;

down_read(&crypto_alg_sem);
alg = spawn->alg;
- alg2 = alg;
- if (alg2)
- alg2 = crypto_mod_get(alg2);
- up_read(&crypto_alg_sem);
-
- if (!alg2) {
- if (alg)
- crypto_shoot_alg(alg);
- return ERR_PTR(-EAGAIN);
+ if (alg && !crypto_mod_get(alg)) {
+ alg->cra_flags |= CRYPTO_ALG_DYING;
+ alg = NULL;
}
+ up_read(&crypto_alg_sem);

- return alg;
+ return alg ?: ERR_PTR(-EAGAIN);
}

struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
diff --git a/crypto/api.c b/crypto/api.c
index d8ba54142620..eda0c56b8615 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -97,7 +97,7 @@ static void crypto_larval_destroy(struct crypto_alg *alg)
struct crypto_larval *larval = (void *)alg;

BUG_ON(!crypto_is_larval(alg));
- if (larval->adult)
+ if (!IS_ERR_OR_NULL(larval->adult))
crypto_mod_put(larval->adult);
kfree(larval);
}
@@ -178,6 +178,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
alg = ERR_PTR(-ETIMEDOUT);
else if (!alg)
alg = ERR_PTR(-ENOENT);
+ else if (IS_ERR(alg))
+ ;
else if (crypto_is_test_larval(larval) &&
!(alg->cra_flags & CRYPTO_ALG_TESTED))
alg = ERR_PTR(-EAGAIN);
@@ -344,13 +346,12 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
return len;
}

-void crypto_shoot_alg(struct crypto_alg *alg)
+static void crypto_shoot_alg(struct crypto_alg *alg)
{
down_write(&crypto_alg_sem);
alg->cra_flags |= CRYPTO_ALG_DYING;
up_write(&crypto_alg_sem);
}
-EXPORT_SYMBOL_GPL(crypto_shoot_alg);

struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
u32 mask)
diff --git a/crypto/internal.h b/crypto/internal.h
index 93df7bec844a..e506a57e2243 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -68,7 +68,6 @@ void crypto_alg_tested(const char *name, int err);
void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
struct crypto_alg *nalg);
void crypto_remove_final(struct list_head *list);
-void crypto_shoot_alg(struct crypto_alg *alg);
struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
u32 mask);
void *crypto_create_tfm(struct crypto_alg *alg,
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 81bbea7f2ba6..a4f3b3f342c8 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -24,6 +24,8 @@ static struct kset *pcrypt_kset;

struct pcrypt_instance_ctx {
struct crypto_aead_spawn spawn;
+ struct padata_shell *psenc;
+ struct padata_shell *psdec;
atomic_t tfm_count;
};

@@ -32,6 +34,12 @@ struct pcrypt_aead_ctx {
unsigned int cb_cpu;
};

+static inline struct pcrypt_instance_ctx *pcrypt_tfm_ictx(
+ struct crypto_aead *tfm)
+{
+ return aead_instance_ctx(aead_alg_instance(tfm));
+}
+
static int pcrypt_aead_setkey(struct crypto_aead *parent,
const u8 *key, unsigned int keylen)
{
@@ -63,7 +71,6 @@ static void pcrypt_aead_done(struct crypto_async_request *areq, int err)
struct padata_priv *padata = pcrypt_request_padata(preq);

padata->info = err;
- req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;

padata_do_serial(padata);
}
@@ -90,6 +97,9 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
u32 flags = aead_request_flags(req);
+ struct pcrypt_instance_ctx *ictx;
+
+ ictx = pcrypt_tfm_ictx(aead);

memset(padata, 0, sizeof(struct padata_priv));

@@ -103,7 +113,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
req->cryptlen, req->iv);
aead_request_set_ad(creq, req->assoclen);

- err = padata_do_parallel(pencrypt, padata, &ctx->cb_cpu);
+ err = padata_do_parallel(ictx->psenc, padata, &ctx->cb_cpu);
if (!err)
return -EINPROGRESS;

@@ -132,6 +142,9 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
u32 flags = aead_request_flags(req);
+ struct pcrypt_instance_ctx *ictx;
+
+ ictx = pcrypt_tfm_ictx(aead);

memset(padata, 0, sizeof(struct padata_priv));

@@ -145,7 +158,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
req->cryptlen, req->iv);
aead_request_set_ad(creq, req->assoclen);

- err = padata_do_parallel(pdecrypt, padata, &ctx->cb_cpu);
+ err = padata_do_parallel(ictx->psdec, padata, &ctx->cb_cpu);
if (!err)
return -EINPROGRESS;

@@ -192,6 +205,8 @@ static void pcrypt_free(struct aead_instance *inst)
struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);

crypto_drop_aead(&ctx->spawn);
+ padata_free_shell(ctx->psdec);
+ padata_free_shell(ctx->psenc);
kfree(inst);
}

@@ -233,12 +248,22 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
if (!inst)
return -ENOMEM;

+ err = -ENOMEM;
+
ctx = aead_instance_ctx(inst);
+ ctx->psenc = padata_alloc_shell(pencrypt);
+ if (!ctx->psenc)
+ goto out_free_inst;
+
+ ctx->psdec = padata_alloc_shell(pdecrypt);
+ if (!ctx->psdec)
+ goto out_free_psenc;
+
crypto_set_aead_spawn(&ctx->spawn, aead_crypto_instance(inst));

err = crypto_grab_aead(&ctx->spawn, name, 0, 0);
if (err)
- goto out_free_inst;
+ goto out_free_psdec;

alg = crypto_spawn_aead_alg(&ctx->spawn);
err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base);
@@ -271,6 +296,10 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,

out_drop_aead:
crypto_drop_aead(&ctx->spawn);
+out_free_psdec:
+ padata_free_shell(ctx->psdec);
+out_free_psenc:
+ padata_free_shell(ctx->psenc);
out_free_inst:
kfree(inst);
goto out;
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 558fedf8a7a1..254a7d98b9d4 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -38,6 +38,8 @@
#define PREFIX "ACPI: "

#define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
+#define ACPI_BATTERY_CAPACITY_VALID(capacity) \
+ ((capacity) != 0 && (capacity) != ACPI_BATTERY_VALUE_UNKNOWN)

#define ACPI_BATTERY_DEVICE_NAME "Battery"

@@ -192,7 +194,8 @@ static int acpi_battery_is_charged(struct acpi_battery *battery)

static bool acpi_battery_is_degraded(struct acpi_battery *battery)
{
- return battery->full_charge_capacity && battery->design_capacity &&
+ return ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) &&
+ ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity) &&
battery->full_charge_capacity < battery->design_capacity;
}

@@ -214,7 +217,7 @@ static int acpi_battery_get_property(struct power_supply *psy,
enum power_supply_property psp,
union power_supply_propval *val)
{
- int ret = 0;
+ int full_capacity = ACPI_BATTERY_VALUE_UNKNOWN, ret = 0;
struct acpi_battery *battery = to_acpi_battery(psy);

if (acpi_battery_present(battery)) {
@@ -263,14 +266,14 @@ static int acpi_battery_get_property(struct power_supply *psy,
break;
case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
- if (battery->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+ if (!ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity))
ret = -ENODEV;
else
val->intval = battery->design_capacity * 1000;
break;
case POWER_SUPPLY_PROP_CHARGE_FULL:
case POWER_SUPPLY_PROP_ENERGY_FULL:
- if (battery->full_charge_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+ if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity))
ret = -ENODEV;
else
val->intval = battery->full_charge_capacity * 1000;
@@ -283,11 +286,17 @@ static int acpi_battery_get_property(struct power_supply *psy,
val->intval = battery->capacity_now * 1000;
break;
case POWER_SUPPLY_PROP_CAPACITY:
- if (battery->capacity_now && battery->full_charge_capacity)
- val->intval = battery->capacity_now * 100/
- battery->full_charge_capacity;
+ if (ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity))
+ full_capacity = battery->full_charge_capacity;
+ else if (ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity))
+ full_capacity = battery->design_capacity;
+
+ if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN ||
+ full_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+ ret = -ENODEV;
else
- val->intval = 0;
+ val->intval = battery->capacity_now * 100/
+ full_capacity;
break;
case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
if (battery->state & ACPI_BATTERY_STATE_CRITICAL)
@@ -333,6 +342,20 @@ static enum power_supply_property charge_battery_props[] = {
POWER_SUPPLY_PROP_SERIAL_NUMBER,
};

+static enum power_supply_property charge_battery_full_cap_broken_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_CYCLE_COUNT,
+ POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_CURRENT_NOW,
+ POWER_SUPPLY_PROP_CHARGE_NOW,
+ POWER_SUPPLY_PROP_MODEL_NAME,
+ POWER_SUPPLY_PROP_MANUFACTURER,
+ POWER_SUPPLY_PROP_SERIAL_NUMBER,
+};
+
static enum power_supply_property energy_battery_props[] = {
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_PRESENT,
@@ -794,20 +817,34 @@ static void __exit battery_hook_exit(void)
static int sysfs_add_battery(struct acpi_battery *battery)
{
struct power_supply_config psy_cfg = { .drv_data = battery, };
+ bool full_cap_broken = false;
+
+ if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) &&
+ !ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity))
+ full_cap_broken = true;

if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
- battery->bat_desc.properties = charge_battery_props;
- battery->bat_desc.num_properties =
- ARRAY_SIZE(charge_battery_props);
- } else if (battery->full_charge_capacity == 0) {
- battery->bat_desc.properties =
- energy_battery_full_cap_broken_props;
- battery->bat_desc.num_properties =
- ARRAY_SIZE(energy_battery_full_cap_broken_props);
+ if (full_cap_broken) {
+ battery->bat_desc.properties =
+ charge_battery_full_cap_broken_props;
+ battery->bat_desc.num_properties =
+ ARRAY_SIZE(charge_battery_full_cap_broken_props);
+ } else {
+ battery->bat_desc.properties = charge_battery_props;
+ battery->bat_desc.num_properties =
+ ARRAY_SIZE(charge_battery_props);
+ }
} else {
- battery->bat_desc.properties = energy_battery_props;
- battery->bat_desc.num_properties =
- ARRAY_SIZE(energy_battery_props);
+ if (full_cap_broken) {
+ battery->bat_desc.properties =
+ energy_battery_full_cap_broken_props;
+ battery->bat_desc.num_properties =
+ ARRAY_SIZE(energy_battery_full_cap_broken_props);
+ } else {
+ battery->bat_desc.properties = energy_battery_props;
+ battery->bat_desc.num_properties =
+ ARRAY_SIZE(energy_battery_props);
+ }
}

battery->bat_desc.name = acpi_device_bid(battery->device);
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 31014c7d3793..e63fd7bfd3a5 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -336,6 +336,11 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"),
},
},
+
+ /*
+ * Desktops which falsely report a backlight and which our heuristics
+ * for this do not catch.
+ */
{
.callback = video_detect_force_none,
.ident = "Dell OptiPlex 9020M",
@@ -344,6 +349,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 9020M"),
},
},
+ {
+ .callback = video_detect_force_none,
+ .ident = "MSI MS-7721",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MSI"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MS-7721"),
+ },
+ },
{ },
};

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 134a8af51511..0e99a760aebd 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -273,10 +273,38 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async)
device_links_read_unlock(idx);
}

-static void dpm_wait_for_superior(struct device *dev, bool async)
+static bool dpm_wait_for_superior(struct device *dev, bool async)
{
- dpm_wait(dev->parent, async);
+ struct device *parent;
+
+ /*
+ * If the device is resumed asynchronously and the parent's callback
+ * deletes both the device and the parent itself, the parent object may
+ * be freed while this function is running, so avoid that by reference
+ * counting the parent once more unless the device has been deleted
+ * already (in which case return right away).
+ */
+ mutex_lock(&dpm_list_mtx);
+
+ if (!device_pm_initialized(dev)) {
+ mutex_unlock(&dpm_list_mtx);
+ return false;
+ }
+
+ parent = get_device(dev->parent);
+
+ mutex_unlock(&dpm_list_mtx);
+
+ dpm_wait(parent, async);
+ put_device(parent);
+
dpm_wait_for_suppliers(dev, async);
+
+ /*
+ * If the parent's callback has deleted the device, attempting to resume
+ * it would be invalid, so avoid doing that then.
+ */
+ return device_pm_initialized(dev);
}

static void dpm_wait_for_consumers(struct device *dev, bool async)
@@ -621,7 +649,8 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
if (!dev->power.is_noirq_suspended)
goto Out;

- dpm_wait_for_superior(dev, async);
+ if (!dpm_wait_for_superior(dev, async))
+ goto Out;

skip_resume = dev_pm_may_skip_resume(dev);

@@ -829,7 +858,8 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn
if (!dev->power.is_late_suspended)
goto Out;

- dpm_wait_for_superior(dev, async);
+ if (!dpm_wait_for_superior(dev, async))
+ goto Out;

callback = dpm_subsys_resume_early_cb(dev, state, &info);

@@ -944,7 +974,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
goto Complete;
}

- dpm_wait_for_superior(dev, async);
+ if (!dpm_wait_for_superior(dev, async))
+ goto Complete;
+
dpm_watchdog_set(&wd, dev);
device_lock(dev);

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 4e7ef35f1c8f..9c3b063e1a1f 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2850,7 +2850,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname)
err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
if (err < 0) {
bt_dev_err(hdev, "Failed to send wmt rst (%d)", err);
- return err;
+ goto err_release_fw;
}

/* Wait a few moments for firmware activation done */
@@ -3819,6 +3819,10 @@ static int btusb_probe(struct usb_interface *intf,
* (DEVICE_REMOTE_WAKEUP)
*/
set_bit(BTUSB_WAKEUP_DISABLE, &data->flags);
+
+ err = usb_autopm_get_interface(intf);
+ if (err < 0)
+ goto out_free_dev;
}

if (id->driver_info & BTUSB_AMP) {
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 1ed85f120a1b..49b9f2f85bad 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -785,7 +785,11 @@ static struct tegra_periph_init_data gate_clks[] = {
GATE("ahbdma", "hclk", 33, 0, tegra_clk_ahbdma, 0),
GATE("apbdma", "pclk", 34, 0, tegra_clk_apbdma, 0),
GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0),
- GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0),
+ /*
+ * Critical for RAM re-repair operation, which must occur on resume
+ * from LP1 system suspend and as part of CCPLEX cluster switching.
+ */
+ GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL),
GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0),
GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0),
GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0),
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8d8da763adc5..8910fd1ae3c6 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -217,7 +217,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
return ret;
}

-static int cppc_verify_policy(struct cpufreq_policy *policy)
+static int cppc_verify_policy(struct cpufreq_policy_data *policy)
{
cpufreq_verify_within_cpu_limits(policy);
return 0;
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index cd53272e2fa2..f7a7bcf6f52e 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -291,7 +291,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
* nforce2_verify - verifies a new CPUFreq policy
* @policy: new policy
*/
-static int nforce2_verify(struct cpufreq_policy *policy)
+static int nforce2_verify(struct cpufreq_policy_data *policy)
{
unsigned int fsb_pol_max;

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a7db4f22a077..7679f8a91745 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -74,6 +74,9 @@ static void cpufreq_exit_governor(struct cpufreq_policy *policy);
static int cpufreq_start_governor(struct cpufreq_policy *policy);
static void cpufreq_stop_governor(struct cpufreq_policy *policy);
static void cpufreq_governor_limits(struct cpufreq_policy *policy);
+static int cpufreq_set_policy(struct cpufreq_policy *policy,
+ struct cpufreq_governor *new_gov,
+ unsigned int new_pol);

/**
* Two notifier lists: the "policy" list is involved in the
@@ -613,25 +616,22 @@ static struct cpufreq_governor *find_governor(const char *str_governor)
return NULL;
}

-static int cpufreq_parse_policy(char *str_governor,
- struct cpufreq_policy *policy)
+static unsigned int cpufreq_parse_policy(char *str_governor)
{
- if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
- policy->policy = CPUFREQ_POLICY_PERFORMANCE;
- return 0;
- }
- if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) {
- policy->policy = CPUFREQ_POLICY_POWERSAVE;
- return 0;
- }
- return -EINVAL;
+ if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN))
+ return CPUFREQ_POLICY_PERFORMANCE;
+
+ if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN))
+ return CPUFREQ_POLICY_POWERSAVE;
+
+ return CPUFREQ_POLICY_UNKNOWN;
}

/**
* cpufreq_parse_governor - parse a governor string only for has_target()
+ * @str_governor: Governor name.
*/
-static int cpufreq_parse_governor(char *str_governor,
- struct cpufreq_policy *policy)
+static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor)
{
struct cpufreq_governor *t;

@@ -645,7 +645,7 @@ static int cpufreq_parse_governor(char *str_governor,

ret = request_module("cpufreq_%s", str_governor);
if (ret)
- return -EINVAL;
+ return NULL;

mutex_lock(&cpufreq_governor_mutex);

@@ -656,12 +656,7 @@ static int cpufreq_parse_governor(char *str_governor,

mutex_unlock(&cpufreq_governor_mutex);

- if (t) {
- policy->governor = t;
- return 0;
- }
-
- return -EINVAL;
+ return t;
}

/**
@@ -762,28 +757,33 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
const char *buf, size_t count)
{
+ char str_governor[16];
int ret;
- char str_governor[16];
- struct cpufreq_policy new_policy;
-
- memcpy(&new_policy, policy, sizeof(*policy));

ret = sscanf(buf, "%15s", str_governor);
if (ret != 1)
return -EINVAL;

if (cpufreq_driver->setpolicy) {
- if (cpufreq_parse_policy(str_governor, &new_policy))
+ unsigned int new_pol;
+
+ new_pol = cpufreq_parse_policy(str_governor);
+ if (!new_pol)
return -EINVAL;
+
+ ret = cpufreq_set_policy(policy, NULL, new_pol);
} else {
- if (cpufreq_parse_governor(str_governor, &new_policy))
+ struct cpufreq_governor *new_gov;
+
+ new_gov = cpufreq_parse_governor(str_governor);
+ if (!new_gov)
return -EINVAL;
- }

- ret = cpufreq_set_policy(policy, &new_policy);
+ ret = cpufreq_set_policy(policy, new_gov,
+ CPUFREQ_POLICY_UNKNOWN);

- if (new_policy.governor)
- module_put(new_policy.governor->owner);
+ module_put(new_gov->owner);
+ }

return ret ? ret : count;
}
@@ -1050,40 +1050,33 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)

static int cpufreq_init_policy(struct cpufreq_policy *policy)
{
- struct cpufreq_governor *gov = NULL, *def_gov = NULL;
- struct cpufreq_policy new_policy;
-
- memcpy(&new_policy, policy, sizeof(*policy));
-
- def_gov = cpufreq_default_governor();
+ struct cpufreq_governor *def_gov = cpufreq_default_governor();
+ struct cpufreq_governor *gov = NULL;
+ unsigned int pol = CPUFREQ_POLICY_UNKNOWN;

if (has_target()) {
- /*
- * Update governor of new_policy to the governor used before
- * hotplug
- */
+ /* Update policy governor to the one used before hotplug. */
gov = find_governor(policy->last_governor);
if (gov) {
pr_debug("Restoring governor %s for cpu %d\n",
- policy->governor->name, policy->cpu);
- } else {
- if (!def_gov)
- return -ENODATA;
+ policy->governor->name, policy->cpu);
+ } else if (def_gov) {
gov = def_gov;
+ } else {
+ return -ENODATA;
}
- new_policy.governor = gov;
} else {
/* Use the default policy if there is no last_policy. */
if (policy->last_policy) {
- new_policy.policy = policy->last_policy;
+ pol = policy->last_policy;
+ } else if (def_gov) {
+ pol = cpufreq_parse_policy(def_gov->name);
} else {
- if (!def_gov)
- return -ENODATA;
- cpufreq_parse_policy(def_gov->name, &new_policy);
+ return -ENODATA;
}
}

- return cpufreq_set_policy(policy, &new_policy);
+ return cpufreq_set_policy(policy, gov, pol);
}

static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
@@ -1111,13 +1104,10 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp

void refresh_frequency_limits(struct cpufreq_policy *policy)
{
- struct cpufreq_policy new_policy;
-
if (!policy_is_inactive(policy)) {
- new_policy = *policy;
pr_debug("updating policy for CPU %u\n", policy->cpu);

- cpufreq_set_policy(policy, &new_policy);
+ cpufreq_set_policy(policy, policy->governor, policy->policy);
}
}
EXPORT_SYMBOL(refresh_frequency_limits);
@@ -2361,43 +2351,46 @@ EXPORT_SYMBOL(cpufreq_get_policy);
/**
* cpufreq_set_policy - Modify cpufreq policy parameters.
* @policy: Policy object to modify.
- * @new_policy: New policy data.
+ * @new_gov: Policy governor pointer.
+ * @new_pol: Policy value (for drivers with built-in governors).
*
- * Pass @new_policy to the cpufreq driver's ->verify() callback. Next, copy the
- * min and max parameters of @new_policy to @policy and either invoke the
- * driver's ->setpolicy() callback (if present) or carry out a governor update
- * for @policy. That is, run the current governor's ->limits() callback (if the
- * governor field in @new_policy points to the same object as the one in
- * @policy) or replace the governor for @policy with the new one stored in
- * @new_policy.
+ * Invoke the cpufreq driver's ->verify() callback to sanity-check the frequency
+ * limits to be set for the policy, update @policy with the verified limits
+ * values and either invoke the driver's ->setpolicy() callback (if present) or
+ * carry out a governor update for @policy. That is, run the current governor's
+ * ->limits() callback (if @new_gov points to the same object as the one in
+ * @policy) or replace the governor for @policy with @new_gov.
*
* The cpuinfo part of @policy is not updated by this function.
*/
-int cpufreq_set_policy(struct cpufreq_policy *policy,
- struct cpufreq_policy *new_policy)
+static int cpufreq_set_policy(struct cpufreq_policy *policy,
+ struct cpufreq_governor *new_gov,
+ unsigned int new_pol)
{
+ struct cpufreq_policy_data new_data;
struct cpufreq_governor *old_gov;
int ret;

- pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
- new_policy->cpu, new_policy->min, new_policy->max);
-
- memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
-
+ memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
+ new_data.freq_table = policy->freq_table;
+ new_data.cpu = policy->cpu;
/*
* PM QoS framework collects all the requests from users and provide us
* the final aggregated value here.
*/
- new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
- new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
+ new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
+ new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
+
+ pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
+ new_data.cpu, new_data.min, new_data.max);

/* verify the cpu speed can be set within this limit */
- ret = cpufreq_driver->verify(new_policy);
+ ret = cpufreq_driver->verify(&new_data);
if (ret)
return ret;

- policy->min = new_policy->min;
- policy->max = new_policy->max;
+ policy->min = new_data.min;
+ policy->max = new_data.max;
trace_cpu_frequency_limits(policy);

policy->cached_target_freq = UINT_MAX;
@@ -2406,12 +2399,12 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
policy->min, policy->max);

if (cpufreq_driver->setpolicy) {
- policy->policy = new_policy->policy;
+ policy->policy = new_pol;
pr_debug("setting range\n");
return cpufreq_driver->setpolicy(policy);
}

- if (new_policy->governor == policy->governor) {
+ if (new_gov == policy->governor) {
pr_debug("governor limits update\n");
cpufreq_governor_limits(policy);
return 0;
@@ -2428,7 +2421,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
}

/* start new governor */
- policy->governor = new_policy->governor;
+ policy->governor = new_gov;
ret = cpufreq_init_governor(policy);
if (!ret) {
ret = cpufreq_start_governor(policy);
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index ded427e0a488..e117b0059123 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -60,7 +60,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
return 0;
}

-int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
+int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy,
struct cpufreq_frequency_table *table)
{
struct cpufreq_frequency_table *pos;
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify);
* Generic routine to verify policy & frequency table, requires driver to set
* policy->freq_table prior to it.
*/
-int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy)
+int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy)
{
if (!policy->freq_table)
return -ENODEV;
diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c
index e97b5733aa24..75b3ef7ec679 100644
--- a/drivers/cpufreq/gx-suspmod.c
+++ b/drivers/cpufreq/gx-suspmod.c
@@ -328,7 +328,7 @@ static void gx_set_cpuspeed(struct cpufreq_policy *policy, unsigned int khz)
* for the hardware supported by the driver.
*/

-static int cpufreq_gx_verify(struct cpufreq_policy *policy)
+static int cpufreq_gx_verify(struct cpufreq_policy_data *policy)
{
unsigned int tmp_freq = 0;
u8 tmp1, tmp2;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 8ab31702cf6a..45499e0b9f2f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2036,8 +2036,9 @@ static int intel_pstate_get_max_freq(struct cpudata *cpu)
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
}

-static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
- struct cpudata *cpu)
+static void intel_pstate_update_perf_limits(struct cpudata *cpu,
+ unsigned int policy_min,
+ unsigned int policy_max)
{
int max_freq = intel_pstate_get_max_freq(cpu);
int32_t max_policy_perf, min_policy_perf;
@@ -2056,18 +2057,17 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
turbo_max = cpu->pstate.turbo_pstate;
}

- max_policy_perf = max_state * policy->max / max_freq;
- if (policy->max == policy->min) {
+ max_policy_perf = max_state * policy_max / max_freq;
+ if (policy_max == policy_min) {
min_policy_perf = max_policy_perf;
} else {
- min_policy_perf = max_state * policy->min / max_freq;
+ min_policy_perf = max_state * policy_min / max_freq;
min_policy_perf = clamp_t(int32_t, min_policy_perf,
0, max_policy_perf);
}

pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n",
- policy->cpu, max_state,
- min_policy_perf, max_policy_perf);
+ cpu->cpu, max_state, min_policy_perf, max_policy_perf);

/* Normalize user input to [min_perf, max_perf] */
if (per_cpu_limits) {
@@ -2081,7 +2081,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
global_min = clamp_t(int32_t, global_min, 0, global_max);

- pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu,
+ pr_debug("cpu:%d global_min:%d global_max:%d\n", cpu->cpu,
global_min, global_max);

cpu->min_perf_ratio = max(min_policy_perf, global_min);
@@ -2094,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
cpu->max_perf_ratio);

}
- pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu,
+ pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", cpu->cpu,
cpu->max_perf_ratio,
cpu->min_perf_ratio);
}
@@ -2114,7 +2114,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)

mutex_lock(&intel_pstate_limits_lock);

- intel_pstate_update_perf_limits(policy, cpu);
+ intel_pstate_update_perf_limits(cpu, policy->min, policy->max);

if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
/*
@@ -2143,8 +2143,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
return 0;
}

-static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
- struct cpudata *cpu)
+static void intel_pstate_adjust_policy_max(struct cpudata *cpu,
+ struct cpufreq_policy_data *policy)
{
if (!hwp_active &&
cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
@@ -2155,7 +2155,7 @@ static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
}
}

-static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
+static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
{
struct cpudata *cpu = all_cpu_data[policy->cpu];

@@ -2163,11 +2163,7 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
intel_pstate_get_max_freq(cpu));

- if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
- policy->policy != CPUFREQ_POLICY_PERFORMANCE)
- return -EINVAL;
-
- intel_pstate_adjust_policy_max(policy, cpu);
+ intel_pstate_adjust_policy_max(cpu, policy);

return 0;
}
@@ -2268,7 +2264,7 @@ static struct cpufreq_driver intel_pstate = {
.name = "intel_pstate",
};

-static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
+static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
{
struct cpudata *cpu = all_cpu_data[policy->cpu];

@@ -2276,9 +2272,9 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
intel_pstate_get_max_freq(cpu));

- intel_pstate_adjust_policy_max(policy, cpu);
+ intel_pstate_adjust_policy_max(cpu, policy);

- intel_pstate_update_perf_limits(policy, cpu);
+ intel_pstate_update_perf_limits(cpu, policy->min, policy->max);

return 0;
}
diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c
index 64b8689f7a4a..0b08be8bff76 100644
--- a/drivers/cpufreq/longrun.c
+++ b/drivers/cpufreq/longrun.c
@@ -122,7 +122,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy)
* Validates a new CPUFreq policy. This function has to be called with
* cpufreq_driver locked.
*/
-static int longrun_verify_policy(struct cpufreq_policy *policy)
+static int longrun_verify_policy(struct cpufreq_policy_data *policy)
{
if (!policy)
return -EINVAL;
@@ -130,10 +130,6 @@ static int longrun_verify_policy(struct cpufreq_policy *policy)
policy->cpu = 0;
cpufreq_verify_within_cpu_limits(policy);

- if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
- (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
- return -EINVAL;
-
return 0;
}

diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index fdc767fdbe6a..f90273006553 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -109,7 +109,7 @@ struct pcc_cpu {

static struct pcc_cpu __percpu *pcc_cpu_info;

-static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
+static int pcc_cpufreq_verify(struct cpufreq_policy_data *policy)
{
cpufreq_verify_within_cpu_limits(policy);
return 0;
diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c
index 5096c0ab781b..0ac265d47ef0 100644
--- a/drivers/cpufreq/sh-cpufreq.c
+++ b/drivers/cpufreq/sh-cpufreq.c
@@ -87,7 +87,7 @@ static int sh_cpufreq_target(struct cpufreq_policy *policy,
return work_on_cpu(policy->cpu, __sh_cpufreq_target, &data);
}

-static int sh_cpufreq_verify(struct cpufreq_policy *policy)
+static int sh_cpufreq_verify(struct cpufreq_policy_data *policy)
{
struct clk *cpuclk = &per_cpu(sh_cpuclk, policy->cpu);
struct cpufreq_frequency_table *freq_table;
diff --git a/drivers/cpufreq/unicore2-cpufreq.c b/drivers/cpufreq/unicore2-cpufreq.c
index 707dbc1b7ac8..98d392196df2 100644
--- a/drivers/cpufreq/unicore2-cpufreq.c
+++ b/drivers/cpufreq/unicore2-cpufreq.c
@@ -22,7 +22,7 @@ static struct cpufreq_driver ucv2_driver;
/* make sure that only the "userspace" governor is run
* -- anything else wouldn't make sense on this platform, anyway.
*/
-static int ucv2_verify_speed(struct cpufreq_policy *policy)
+static int ucv2_verify_speed(struct cpufreq_policy_data *policy)
{
if (policy->cpu)
return -EINVAL;
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index db99cee1991c..89f79d763ab8 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -88,7 +88,6 @@
struct atmel_aes_caps {
bool has_dualbuff;
bool has_cfb64;
- bool has_ctr32;
bool has_gcm;
bool has_xts;
bool has_authenc;
@@ -1013,8 +1012,9 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd)
struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx);
struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
struct scatterlist *src, *dst;
- u32 ctr, blocks;
size_t datalen;
+ u32 ctr;
+ u16 blocks, start, end;
bool use_dma, fragmented = false;

/* Check for transfer completion. */
@@ -1026,27 +1026,17 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd)
datalen = req->nbytes - ctx->offset;
blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE);
ctr = be32_to_cpu(ctx->iv[3]);
- if (dd->caps.has_ctr32) {
- /* Check 32bit counter overflow. */
- u32 start = ctr;
- u32 end = start + blocks - 1;
-
- if (end < start) {
- ctr |= 0xffffffff;
- datalen = AES_BLOCK_SIZE * -start;
- fragmented = true;
- }
- } else {
- /* Check 16bit counter overflow. */
- u16 start = ctr & 0xffff;
- u16 end = start + (u16)blocks - 1;
-
- if (blocks >> 16 || end < start) {
- ctr |= 0xffff;
- datalen = AES_BLOCK_SIZE * (0x10000-start);
- fragmented = true;
- }
+
+ /* Check 16bit counter overflow. */
+ start = ctr & 0xffff;
+ end = start + blocks - 1;
+
+ if (blocks >> 16 || end < start) {
+ ctr |= 0xffff;
+ datalen = AES_BLOCK_SIZE * (0x10000 - start);
+ fragmented = true;
}
+
use_dma = (datalen >= ATMEL_AES_DMA_THRESHOLD);

/* Jump to offset. */
@@ -2550,7 +2540,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
{
dd->caps.has_dualbuff = 0;
dd->caps.has_cfb64 = 0;
- dd->caps.has_ctr32 = 0;
dd->caps.has_gcm = 0;
dd->caps.has_xts = 0;
dd->caps.has_authenc = 0;
@@ -2561,7 +2550,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
case 0x500:
dd->caps.has_dualbuff = 1;
dd->caps.has_cfb64 = 1;
- dd->caps.has_ctr32 = 1;
dd->caps.has_gcm = 1;
dd->caps.has_xts = 1;
dd->caps.has_authenc = 1;
@@ -2570,7 +2558,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
case 0x200:
dd->caps.has_dualbuff = 1;
dd->caps.has_cfb64 = 1;
- dd->caps.has_ctr32 = 1;
dd->caps.has_gcm = 1;
dd->caps.max_burst_size = 4;
break;
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 0186b3df4c87..0d5576f6ad21 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -586,6 +586,7 @@ const struct ccp_vdata ccpv3_platform = {
.setup = NULL,
.perform = &ccp3_actions,
.offset = 0,
+ .rsamax = CCP_RSA_MAX_WIDTH,
};

const struct ccp_vdata ccpv3 = {
diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c
index d3e8faa03f15..3d7c8d9e54b9 100644
--- a/drivers/crypto/ccree/cc_aead.c
+++ b/drivers/crypto/ccree/cc_aead.c
@@ -237,7 +237,7 @@ static void cc_aead_complete(struct device *dev, void *cc_req, int err)
* revealed the decrypted message --> zero its memory.
*/
sg_zero_buffer(areq->dst, sg_nents(areq->dst),
- areq->cryptlen, 0);
+ areq->cryptlen, areq->assoclen);
err = -EBADMSG;
}
/*ENCRYPT*/
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 254b48797799..cd9c60268bf8 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -523,6 +523,7 @@ static void cc_setup_readiv_desc(struct crypto_tfm *tfm,
}
}

+
static void cc_setup_state_desc(struct crypto_tfm *tfm,
struct cipher_req_ctx *req_ctx,
unsigned int ivsize, unsigned int nbytes,
@@ -534,8 +535,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm,
int cipher_mode = ctx_p->cipher_mode;
int flow_mode = ctx_p->flow_mode;
int direction = req_ctx->gen_ctx.op_type;
- dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr;
- unsigned int key_len = ctx_p->keylen;
dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
unsigned int du_size = nbytes;

@@ -570,6 +569,47 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm,
break;
case DRV_CIPHER_XTS:
case DRV_CIPHER_ESSIV:
+ case DRV_CIPHER_BITLOCKER:
+ break;
+ default:
+ dev_err(dev, "Unsupported cipher mode (%d)\n", cipher_mode);
+ }
+}
+
+
+static void cc_setup_xex_state_desc(struct crypto_tfm *tfm,
+ struct cipher_req_ctx *req_ctx,
+ unsigned int ivsize, unsigned int nbytes,
+ struct cc_hw_desc desc[],
+ unsigned int *seq_size)
+{
+ struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+ struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+ int cipher_mode = ctx_p->cipher_mode;
+ int flow_mode = ctx_p->flow_mode;
+ int direction = req_ctx->gen_ctx.op_type;
+ dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr;
+ unsigned int key_len = ctx_p->keylen;
+ dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
+ unsigned int du_size = nbytes;
+
+ struct cc_crypto_alg *cc_alg =
+ container_of(tfm->__crt_alg, struct cc_crypto_alg,
+ skcipher_alg.base);
+
+ if (cc_alg->data_unit)
+ du_size = cc_alg->data_unit;
+
+ switch (cipher_mode) {
+ case DRV_CIPHER_ECB:
+ break;
+ case DRV_CIPHER_CBC:
+ case DRV_CIPHER_CBC_CTS:
+ case DRV_CIPHER_CTR:
+ case DRV_CIPHER_OFB:
+ break;
+ case DRV_CIPHER_XTS:
+ case DRV_CIPHER_ESSIV:
case DRV_CIPHER_BITLOCKER:
/* load XEX key */
hw_desc_init(&desc[*seq_size]);
@@ -881,12 +921,14 @@ static int cc_cipher_process(struct skcipher_request *req,

/* STAT_PHASE_2: Create sequence */

- /* Setup IV and XEX key used */
+ /* Setup state (IV) */
cc_setup_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len);
/* Setup MLLI line, if needed */
cc_setup_mlli_desc(tfm, req_ctx, dst, src, nbytes, req, desc, &seq_len);
/* Setup key */
cc_setup_key_desc(tfm, req_ctx, nbytes, desc, &seq_len);
+ /* Setup state (IV and XEX key) */
+ cc_setup_xex_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len);
/* Data processing */
cc_setup_flow_desc(tfm, req_ctx, dst, src, nbytes, desc, &seq_len);
/* Read next IV */
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
index ab31d4a68c80..7d2f7e2c0bb5 100644
--- a/drivers/crypto/ccree/cc_driver.h
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -161,6 +161,7 @@ struct cc_drvdata {
int std_bodies;
bool sec_disabled;
u32 comp_mask;
+ bool pm_on;
};

struct cc_crypto_alg {
diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c
index dbc508fb719b..452bd77a9ba0 100644
--- a/drivers/crypto/ccree/cc_pm.c
+++ b/drivers/crypto/ccree/cc_pm.c
@@ -22,14 +22,8 @@ const struct dev_pm_ops ccree_pm = {
int cc_pm_suspend(struct device *dev)
{
struct cc_drvdata *drvdata = dev_get_drvdata(dev);
- int rc;

dev_dbg(dev, "set HOST_POWER_DOWN_EN\n");
- rc = cc_suspend_req_queue(drvdata);
- if (rc) {
- dev_err(dev, "cc_suspend_req_queue (%x)\n", rc);
- return rc;
- }
fini_cc_regs(drvdata);
cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE);
cc_clk_off(drvdata);
@@ -63,13 +57,6 @@ int cc_pm_resume(struct device *dev)
/* check if tee fips error occurred during power down */
cc_tee_handle_fips_error(drvdata);

- rc = cc_resume_req_queue(drvdata);
- if (rc) {
- dev_err(dev, "cc_resume_req_queue (%x)\n", rc);
- return rc;
- }
-
- /* must be after the queue resuming as it uses the HW queue*/
cc_init_hash_sram(drvdata);

return 0;
@@ -80,12 +67,10 @@ int cc_pm_get(struct device *dev)
int rc = 0;
struct cc_drvdata *drvdata = dev_get_drvdata(dev);

- if (cc_req_queue_suspended(drvdata))
+ if (drvdata->pm_on)
rc = pm_runtime_get_sync(dev);
- else
- pm_runtime_get_noresume(dev);

- return rc;
+ return (rc == 1 ? 0 : rc);
}

int cc_pm_put_suspend(struct device *dev)
@@ -93,14 +78,11 @@ int cc_pm_put_suspend(struct device *dev)
int rc = 0;
struct cc_drvdata *drvdata = dev_get_drvdata(dev);

- if (!cc_req_queue_suspended(drvdata)) {
+ if (drvdata->pm_on) {
pm_runtime_mark_last_busy(dev);
rc = pm_runtime_put_autosuspend(dev);
- } else {
- /* Something wrong happens*/
- dev_err(dev, "request to suspend already suspended queue");
- rc = -EBUSY;
}
+
return rc;
}

@@ -117,7 +99,7 @@ int cc_pm_init(struct cc_drvdata *drvdata)
/* must be before the enabling to avoid resdundent suspending */
pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
pm_runtime_use_autosuspend(dev);
- /* activate the PM module */
+ /* set us as active - note we won't do PM ops until cc_pm_go()! */
return pm_runtime_set_active(dev);
}

@@ -125,9 +107,11 @@ int cc_pm_init(struct cc_drvdata *drvdata)
void cc_pm_go(struct cc_drvdata *drvdata)
{
pm_runtime_enable(drvdata_to_dev(drvdata));
+ drvdata->pm_on = true;
}

void cc_pm_fini(struct cc_drvdata *drvdata)
{
pm_runtime_disable(drvdata_to_dev(drvdata));
+ drvdata->pm_on = false;
}
diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c
index a947d5a2cf35..37e6fee37b13 100644
--- a/drivers/crypto/ccree/cc_request_mgr.c
+++ b/drivers/crypto/ccree/cc_request_mgr.c
@@ -41,7 +41,6 @@ struct cc_req_mgr_handle {
#else
struct tasklet_struct comptask;
#endif
- bool is_runtime_suspended;
};

struct cc_bl_item {
@@ -404,6 +403,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata)
spin_lock(&mgr->bl_lock);
list_del(&bli->list);
--mgr->bl_len;
+ kfree(bli);
}

spin_unlock(&mgr->bl_lock);
@@ -677,52 +677,3 @@ static void comp_handler(unsigned long devarg)
cc_proc_backlog(drvdata);
dev_dbg(dev, "Comp. handler done.\n");
}
-
-/*
- * resume the queue configuration - no need to take the lock as this happens
- * inside the spin lock protection
- */
-#if defined(CONFIG_PM)
-int cc_resume_req_queue(struct cc_drvdata *drvdata)
-{
- struct cc_req_mgr_handle *request_mgr_handle =
- drvdata->request_mgr_handle;
-
- spin_lock_bh(&request_mgr_handle->hw_lock);
- request_mgr_handle->is_runtime_suspended = false;
- spin_unlock_bh(&request_mgr_handle->hw_lock);
-
- return 0;
-}
-
-/*
- * suspend the queue configuration. Since it is used for the runtime suspend
- * only verify that the queue can be suspended.
- */
-int cc_suspend_req_queue(struct cc_drvdata *drvdata)
-{
- struct cc_req_mgr_handle *request_mgr_handle =
- drvdata->request_mgr_handle;
-
- /* lock the send_request */
- spin_lock_bh(&request_mgr_handle->hw_lock);
- if (request_mgr_handle->req_queue_head !=
- request_mgr_handle->req_queue_tail) {
- spin_unlock_bh(&request_mgr_handle->hw_lock);
- return -EBUSY;
- }
- request_mgr_handle->is_runtime_suspended = true;
- spin_unlock_bh(&request_mgr_handle->hw_lock);
-
- return 0;
-}
-
-bool cc_req_queue_suspended(struct cc_drvdata *drvdata)
-{
- struct cc_req_mgr_handle *request_mgr_handle =
- drvdata->request_mgr_handle;
-
- return request_mgr_handle->is_runtime_suspended;
-}
-
-#endif
diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h
index f46cf766fe4d..ff7746aaaf35 100644
--- a/drivers/crypto/ccree/cc_request_mgr.h
+++ b/drivers/crypto/ccree/cc_request_mgr.h
@@ -40,12 +40,4 @@ void complete_request(struct cc_drvdata *drvdata);

void cc_req_mgr_fini(struct cc_drvdata *drvdata);

-#if defined(CONFIG_PM)
-int cc_resume_req_queue(struct cc_drvdata *drvdata);
-
-int cc_suspend_req_queue(struct cc_drvdata *drvdata);
-
-bool cc_req_queue_suspended(struct cc_drvdata *drvdata);
-#endif
-
#endif /*__REQUEST_MGR_H__*/
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index 504daff7687d..f7f0a1fb6895 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -35,6 +35,5 @@ config CRYPTO_DEV_HISI_ZIP
depends on ARM64 && PCI && PCI_MSI
select CRYPTO_DEV_HISI_QM
select CRYPTO_HISI_SGL
- select SG_SPLIT
help
Support for HiSilicon ZIP Driver
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index ffb00d987d02..99f21d848d4f 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -12,6 +12,10 @@

/* hisi_zip_sqe dw3 */
#define HZIP_BD_STATUS_M GENMASK(7, 0)
+/* hisi_zip_sqe dw7 */
+#define HZIP_IN_SGE_DATA_OFFSET_M GENMASK(23, 0)
+/* hisi_zip_sqe dw8 */
+#define HZIP_OUT_SGE_DATA_OFFSET_M GENMASK(23, 0)
/* hisi_zip_sqe dw9 */
#define HZIP_REQ_TYPE_M GENMASK(7, 0)
#define HZIP_ALG_TYPE_ZLIB 0x02
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 59023545a1c4..cf34bfdfb3e6 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -45,10 +45,8 @@ enum hisi_zip_alg_type {

struct hisi_zip_req {
struct acomp_req *req;
- struct scatterlist *src;
- struct scatterlist *dst;
- size_t slen;
- size_t dlen;
+ int sskip;
+ int dskip;
struct hisi_acc_hw_sgl *hw_src;
struct hisi_acc_hw_sgl *hw_dst;
dma_addr_t dma_src;
@@ -94,13 +92,15 @@ static void hisi_zip_config_tag(struct hisi_zip_sqe *sqe, u32 tag)

static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type,
dma_addr_t s_addr, dma_addr_t d_addr, u32 slen,
- u32 dlen)
+ u32 dlen, int sskip, int dskip)
{
memset(sqe, 0, sizeof(struct hisi_zip_sqe));

- sqe->input_data_length = slen;
+ sqe->input_data_length = slen - sskip;
+ sqe->dw7 = FIELD_PREP(HZIP_IN_SGE_DATA_OFFSET_M, sskip);
+ sqe->dw8 = FIELD_PREP(HZIP_OUT_SGE_DATA_OFFSET_M, dskip);
sqe->dw9 = FIELD_PREP(HZIP_REQ_TYPE_M, req_type);
- sqe->dest_avail_out = dlen;
+ sqe->dest_avail_out = dlen - dskip;
sqe->source_addr_l = lower_32_bits(s_addr);
sqe->source_addr_h = upper_32_bits(s_addr);
sqe->dest_addr_l = lower_32_bits(d_addr);
@@ -301,11 +301,6 @@ static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx,
{
struct hisi_zip_req_q *req_q = &qp_ctx->req_q;

- if (qp_ctx->qp->alg_type == HZIP_ALG_TYPE_COMP)
- kfree(req->dst);
- else
- kfree(req->src);
-
write_lock(&req_q->req_lock);
clear_bit(req->req_id, req_q->req_bitmap);
memset(req, 0, sizeof(struct hisi_zip_req));
@@ -333,8 +328,8 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data)
}
dlen = sqe->produced;

- hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src);
- hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst);
+ hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src);
+ hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst);

head_size = (qp->alg_type == 0) ? TO_HEAD_SIZE(qp->req_type) : 0;
acomp_req->dlen = dlen + head_size;
@@ -428,20 +423,6 @@ static size_t get_comp_head_size(struct scatterlist *src, u8 req_type)
}
}

-static int get_sg_skip_bytes(struct scatterlist *sgl, size_t bytes,
- size_t remains, struct scatterlist **out)
-{
-#define SPLIT_NUM 2
- size_t split_sizes[SPLIT_NUM];
- int out_mapped_nents[SPLIT_NUM];
-
- split_sizes[0] = bytes;
- split_sizes[1] = remains;
-
- return sg_split(sgl, 0, 0, SPLIT_NUM, split_sizes, out,
- out_mapped_nents, GFP_KERNEL);
-}
-
static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
struct hisi_zip_qp_ctx *qp_ctx,
size_t head_size, bool is_comp)
@@ -449,31 +430,7 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
struct hisi_zip_req *q = req_q->q;
struct hisi_zip_req *req_cache;
- struct scatterlist *out[2];
- struct scatterlist *sgl;
- size_t len;
- int ret, req_id;
-
- /*
- * remove/add zlib/gzip head, as hardware operations do not include
- * comp head. so split req->src to get sgl without heads in acomp, or
- * add comp head to req->dst ahead of that hardware output compressed
- * data in sgl splited from req->dst without comp head.
- */
- if (is_comp) {
- sgl = req->dst;
- len = req->dlen - head_size;
- } else {
- sgl = req->src;
- len = req->slen - head_size;
- }
-
- ret = get_sg_skip_bytes(sgl, head_size, len, out);
- if (ret)
- return ERR_PTR(ret);
-
- /* sgl for comp head is useless, so free it now */
- kfree(out[0]);
+ int req_id;

write_lock(&req_q->req_lock);

@@ -481,7 +438,6 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
if (req_id >= req_q->size) {
write_unlock(&req_q->req_lock);
dev_dbg(&qp_ctx->qp->qm->pdev->dev, "req cache is full!\n");
- kfree(out[1]);
return ERR_PTR(-EBUSY);
}
set_bit(req_id, req_q->req_bitmap);
@@ -489,16 +445,13 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
req_cache = q + req_id;
req_cache->req_id = req_id;
req_cache->req = req;
+
if (is_comp) {
- req_cache->src = req->src;
- req_cache->dst = out[1];
- req_cache->slen = req->slen;
- req_cache->dlen = req->dlen - head_size;
+ req_cache->sskip = 0;
+ req_cache->dskip = head_size;
} else {
- req_cache->src = out[1];
- req_cache->dst = req->dst;
- req_cache->slen = req->slen - head_size;
- req_cache->dlen = req->dlen;
+ req_cache->sskip = head_size;
+ req_cache->dskip = 0;
}

write_unlock(&req_q->req_lock);
@@ -510,6 +463,7 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
struct hisi_zip_qp_ctx *qp_ctx)
{
struct hisi_zip_sqe *zip_sqe = &qp_ctx->zip_sqe;
+ struct acomp_req *a_req = req->req;
struct hisi_qp *qp = qp_ctx->qp;
struct device *dev = &qp->qm->pdev->dev;
struct hisi_acc_sgl_pool *pool = &qp_ctx->sgl_pool;
@@ -517,16 +471,16 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
dma_addr_t output;
int ret;

- if (!req->src || !req->slen || !req->dst || !req->dlen)
+ if (!a_req->src || !a_req->slen || !a_req->dst || !a_req->dlen)
return -EINVAL;

- req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->src, pool,
+ req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->src, pool,
req->req_id << 1, &input);
if (IS_ERR(req->hw_src))
return PTR_ERR(req->hw_src);
req->dma_src = input;

- req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->dst, pool,
+ req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->dst, pool,
(req->req_id << 1) + 1,
&output);
if (IS_ERR(req->hw_dst)) {
@@ -535,8 +489,8 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
}
req->dma_dst = output;

- hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, req->slen,
- req->dlen);
+ hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, a_req->slen,
+ a_req->dlen, req->sskip, req->dskip);
hisi_zip_config_buf_type(zip_sqe, HZIP_SGL);
hisi_zip_config_tag(zip_sqe, req->req_id);

@@ -548,9 +502,9 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
return -EINPROGRESS;

err_unmap_output:
- hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst);
+ hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst);
err_unmap_input:
- hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src);
+ hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src);
return ret;
}

diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 3cbefb41b099..2680e1525db5 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1613,6 +1613,11 @@ static const struct of_device_id spacc_of_id_table[] = {
MODULE_DEVICE_TABLE(of, spacc_of_id_table);
#endif /* CONFIG_OF */

+static void spacc_tasklet_kill(void *data)
+{
+ tasklet_kill(data);
+}
+
static int spacc_probe(struct platform_device *pdev)
{
int i, err, ret;
@@ -1655,6 +1660,14 @@ static int spacc_probe(struct platform_device *pdev)
return -ENXIO;
}

+ tasklet_init(&engine->complete, spacc_spacc_complete,
+ (unsigned long)engine);
+
+ ret = devm_add_action(&pdev->dev, spacc_tasklet_kill,
+ &engine->complete);
+ if (ret)
+ return ret;
+
if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0,
engine->name, engine)) {
dev_err(engine->dev, "failed to request IRQ\n");
@@ -1712,8 +1725,6 @@ static int spacc_probe(struct platform_device *pdev)
INIT_LIST_HEAD(&engine->completed);
INIT_LIST_HEAD(&engine->in_progress);
engine->in_flight = 0;
- tasklet_init(&engine->complete, spacc_spacc_complete,
- (unsigned long)engine);

platform_set_drvdata(pdev, engine);

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index ee1dc75f5ddc..1d733b57e60f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -247,7 +247,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
drm_dp_mst_reset_vcpi_slots(mst_mgr, mst_port);
}

- ret = drm_dp_update_payload_part1(mst_mgr);
+ /* It's OK for this to fail */
+ drm_dp_update_payload_part1(mst_mgr);

/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
* AUX message. The sequence is slot 1-63 allocated sequence for each
@@ -256,9 +257,6 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(

get_payload_table(aconnector, proposed_table);

- if (ret)
- return false;
-
return true;
}

@@ -316,7 +314,6 @@ bool dm_helpers_dp_mst_send_payload_allocation(
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_mgr *mst_mgr;
struct drm_dp_mst_port *mst_port;
- int ret;

aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;

@@ -330,10 +327,8 @@ bool dm_helpers_dp_mst_send_payload_allocation(
if (!mst_mgr->mst_state)
return false;

- ret = drm_dp_update_payload_part2(mst_mgr);
-
- if (ret)
- return false;
+ /* It's OK for this to fail */
+ drm_dp_update_payload_part2(mst_mgr);

if (!enable)
drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port);
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index f2e73e6d46b8..10985134ce0b 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -73,7 +73,11 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
unsigned long prate;
unsigned int mask = ATMEL_HLCDC_CLKDIV_MASK | ATMEL_HLCDC_CLKPOL;
unsigned int cfg = 0;
- int div;
+ int div, ret;
+
+ ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk);
+ if (ret)
+ return;

vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay;
vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end;
@@ -95,14 +99,14 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
(adj->crtc_hdisplay - 1) |
((adj->crtc_vdisplay - 1) << 16));

+ prate = clk_get_rate(crtc->dc->hlcdc->sys_clk);
+ mode_rate = adj->crtc_clock * 1000;
if (!crtc->dc->desc->fixed_clksrc) {
+ prate *= 2;
cfg |= ATMEL_HLCDC_CLKSEL;
mask |= ATMEL_HLCDC_CLKSEL;
}

- prate = 2 * clk_get_rate(crtc->dc->hlcdc->sys_clk);
- mode_rate = adj->crtc_clock * 1000;
-
div = DIV_ROUND_UP(prate, mode_rate);
if (div < 2) {
div = 2;
@@ -117,8 +121,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
int div_low = prate / mode_rate;

if (div_low >= 2 &&
- ((prate / div_low - mode_rate) <
- 10 * (mode_rate - prate / div)))
+ (10 * (prate / div_low - mode_rate) <
+ (mode_rate - prate / div)))
/*
* At least 10 times better when using a higher
* frequency than requested, instead of a lower.
@@ -147,6 +151,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO |
ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK,
cfg);
+
+ clk_disable_unprepare(crtc->dc->hlcdc->sys_clk);
}

static enum drm_mode_status
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index a48a4c21b1b3..c5e9e2305fff 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2694,6 +2694,7 @@ static bool drm_dp_get_vc_payload_bw(int dp_link_bw,
int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state)
{
int ret = 0;
+ int i = 0;
struct drm_dp_mst_branch *mstb = NULL;

mutex_lock(&mgr->lock);
@@ -2754,10 +2755,21 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
/* this can fail if the device is gone */
drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0);
ret = 0;
+ mutex_lock(&mgr->payload_lock);
memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload));
mgr->payload_mask = 0;
set_bit(0, &mgr->payload_mask);
+ for (i = 0; i < mgr->max_payloads; i++) {
+ struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i];
+
+ if (vcpi) {
+ vcpi->vcpi = 0;
+ vcpi->num_slots = 0;
+ }
+ mgr->proposed_vcpis[i] = NULL;
+ }
mgr->vcpi_mask = 0;
+ mutex_unlock(&mgr->payload_lock);
}

out_unlock:
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index b8363aaa9032..818738e83d06 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -54,7 +54,12 @@ EXPORT_SYMBOL(drm_rect_intersect);

static u32 clip_scaled(u32 src, u32 dst, u32 clip)
{
- u64 tmp = mul_u32_u32(src, dst - clip);
+ u64 tmp;
+
+ if (dst == 0)
+ return 0;
+
+ tmp = mul_u32_u32(src, dst - clip);

/*
* Round toward 1.0 when clipping so that we don't accidentally
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
index 772f0753ed38..aaf2f26f8505 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
@@ -121,7 +121,7 @@ static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder)
if (mdp4_dsi_encoder->enabled)
return;

- mdp4_crtc_set_config(encoder->crtc,
+ mdp4_crtc_set_config(encoder->crtc,
MDP4_DMA_CONFIG_PACK_ALIGN_MSB |
MDP4_DMA_CONFIG_DEFLKR_EN |
MDP4_DMA_CONFIG_DITHER_EN |
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 34bd73526afd..930674117533 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1213,10 +1213,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
unsigned int i, j;
struct page *pg;

- if (num_pages < alloc_unit)
- return 0;
-
- for (i = 0; (i * alloc_unit) < num_pages; i++) {
+ for (i = 0; i < num_pages / alloc_unit; i++) {
if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) >
PAGE_SIZE)
return i * alloc_unit;
@@ -1254,7 +1251,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,

}

- return num_pages;
+ return i * alloc_unit;
}

static void balloon_up(struct work_struct *dummy)
@@ -1269,9 +1266,6 @@ static void balloon_up(struct work_struct *dummy)
long avail_pages;
unsigned long floor;

- /* The host balloons pages in 2M granularity. */
- WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0);
-
/*
* We will attempt 2M allocations. However, if we fail to
* allocate 2M chunks, we will go back to 4k allocations.
@@ -1281,14 +1275,13 @@ static void balloon_up(struct work_struct *dummy)
avail_pages = si_mem_available();
floor = compute_balloon_floor();

- /* Refuse to balloon below the floor, keep the 2M granularity. */
+ /* Refuse to balloon below the floor. */
if (avail_pages < num_pages || avail_pages - num_pages < floor) {
pr_warn("Balloon request will be partially fulfilled. %s\n",
avail_pages < num_pages ? "Not enough memory." :
"Balloon floor reached.");

num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
- num_pages -= num_pages % PAGES_IN_2M;
}

while (!done) {
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 163ff7ba92b7..fedf6829cdec 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -632,7 +632,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,

while (bcnt > 0) {
const size_t gup_num_pages = min_t(size_t,
- (bcnt + BIT(page_shift) - 1) >> page_shift,
+ ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
PAGE_SIZE / sizeof(struct page *));

down_read(&owning_mm->mmap_sem);
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 4950df3f71b6..5c73c0a790fa 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
if (ret) {
/* Undo the effect of adding the outstanding wr */
- gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
- gsi->cap.max_send_wr;
+ gsi->outstanding_pi--;
goto err;
}
spin_unlock_irqrestore(&gsi->lock, flags);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index deb924e1d790..3d2b63585da9 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -329,6 +329,9 @@ struct cached_dev {
*/
atomic_t has_dirty;

+#define BCH_CACHE_READA_ALL 0
+#define BCH_CACHE_READA_META_ONLY 1
+ unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 41adcd1546f1..4045ae748f17 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -391,13 +391,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
goto skip;

/*
- * Flag for bypass if the IO is for read-ahead or background,
- * unless the read-ahead request is for metadata
+ * If the bio is for read-ahead or background IO, bypass it or
+ * not depends on the following situations,
+ * - If the IO is for meta data, always cache it and no bypass
+ * - If the IO is not meta data, check dc->cache_reada_policy,
+ * BCH_CACHE_READA_ALL: cache it and not bypass
+ * BCH_CACHE_READA_META_ONLY: not cache it and bypass
+ * That is, read-ahead request for metadata always get cached
* (eg, for gfs2 or xfs).
*/
- if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
- !(bio->bi_opf & (REQ_META|REQ_PRIO)))
- goto skip;
+ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
+ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
+ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
+ goto skip;
+ }

if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 627dcea0f5b6..7f0fb4b5755a 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = {
NULL
};

+static const char * const bch_reada_cache_policies[] = {
+ "all",
+ "meta-only",
+ NULL
+};
+
/* Default is 0 ("auto") */
static const char * const bch_stop_on_failure_modes[] = {
"auto",
@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
+rw_attribute(readahead_cache_policy);
rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
@@ -167,6 +174,11 @@ SHOW(__bch_cached_dev)
bch_cache_modes,
BDEV_CACHE_MODE(&dc->sb));

+ if (attr == &sysfs_readahead_cache_policy)
+ return bch_snprint_string_list(buf, PAGE_SIZE,
+ bch_reada_cache_policies,
+ dc->cache_readahead_policy);
+
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_stop_on_failure_modes,
@@ -352,6 +364,15 @@ STORE(__cached_dev)
}
}

+ if (attr == &sysfs_readahead_cache_policy) {
+ v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
+ if (v < 0)
+ return v;
+
+ if ((unsigned int) v != dc->cache_readahead_policy)
+ dc->cache_readahead_policy = v;
+ }
+
if (attr == &sysfs_stop_when_cache_set_failed) {
v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
if (v < 0)
@@ -466,6 +487,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
+ &sysfs_readahead_cache_policy,
&sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index eb9782fc93fe..492bbe0584d9 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -331,8 +331,14 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti,
const char *opts)
{
- unsigned bs = crypto_skcipher_blocksize(any_tfm(cc));
- int log = ilog2(bs);
+ unsigned bs;
+ int log;
+
+ if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags))
+ bs = crypto_aead_blocksize(any_tfm_aead(cc));
+ else
+ bs = crypto_skcipher_blocksize(any_tfm(cc));
+ log = ilog2(bs);

/* we need to calculate how far we must shift the sector count
* to get the cipher block count, we use this shift in _gen */
@@ -717,7 +723,7 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv,
struct crypto_wait wait;
int err;

- req = skcipher_request_alloc(any_tfm(cc), GFP_KERNEL | GFP_NOFS);
+ req = skcipher_request_alloc(any_tfm(cc), GFP_NOIO);
if (!req)
return -ENOMEM;

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index b88d6d701f5b..8bb723f1a569 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -387,16 +387,15 @@ static int subtree_equal(void *context, const void *value1_le, const void *value
* Variant that is used for in-core only changes or code that
* shouldn't put the pool in service on its own (e.g. commit).
*/
-static inline void __pmd_write_lock(struct dm_pool_metadata *pmd)
+static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd)
__acquires(pmd->root_lock)
{
down_write(&pmd->root_lock);
}
-#define pmd_write_lock_in_core(pmd) __pmd_write_lock((pmd))

static inline void pmd_write_lock(struct dm_pool_metadata *pmd)
{
- __pmd_write_lock(pmd);
+ pmd_write_lock_in_core(pmd);
if (unlikely(!pmd->in_service))
pmd->in_service = true;
}
@@ -831,6 +830,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
* We need to know if the thin_disk_superblock exceeds a 512-byte sector.
*/
BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
+ BUG_ON(!rwsem_is_locked(&pmd->root_lock));

if (unlikely(!pmd->in_service))
return 0;
@@ -953,6 +953,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
return -EBUSY;
}

+ pmd_write_lock_in_core(pmd);
if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) {
r = __commit_transaction(pmd);
if (r < 0)
@@ -961,6 +962,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
}
if (!pmd->fail_io)
__destroy_persistent_data_objects(pmd);
+ pmd_write_unlock(pmd);

kfree(pmd);
return 0;
@@ -1841,7 +1843,7 @@ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
* Care is taken to not have commit be what
* triggers putting the thin-pool in-service.
*/
- __pmd_write_lock(pmd);
+ pmd_write_lock_in_core(pmd);
if (pmd->fail_io)
goto out;

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 43d1af1d8173..07c1b0334f57 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -442,7 +442,13 @@ static void writecache_notify_io(unsigned long error, void *context)
complete(&endio->c);
}

-static void ssd_commit_flushed(struct dm_writecache *wc)
+static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
+{
+ wait_event(wc->bio_in_progress_wait[direction],
+ !atomic_read(&wc->bio_in_progress[direction]));
+}
+
+static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
struct dm_io_region region;
struct dm_io_request req;
@@ -488,17 +494,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc)
writecache_notify_io(0, &endio);
wait_for_completion_io(&endio.c);

+ if (wait_for_ios)
+ writecache_wait_for_ios(wc, WRITE);
+
writecache_disk_flush(wc, wc->ssd_dev);

memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
}

-static void writecache_commit_flushed(struct dm_writecache *wc)
+static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
if (WC_MODE_PMEM(wc))
wmb();
else
- ssd_commit_flushed(wc);
+ ssd_commit_flushed(wc, wait_for_ios);
}

static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
@@ -522,12 +531,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
writecache_error(wc, r, "error flushing metadata: %d", r);
}

-static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
-{
- wait_event(wc->bio_in_progress_wait[direction],
- !atomic_read(&wc->bio_in_progress[direction]));
-}
-
#define WFE_RETURN_FOLLOWING 1
#define WFE_LOWEST_SEQ 2

@@ -724,15 +727,12 @@ static void writecache_flush(struct dm_writecache *wc)
e = e2;
cond_resched();
}
- writecache_commit_flushed(wc);
-
- if (!WC_MODE_PMEM(wc))
- writecache_wait_for_ios(wc, WRITE);
+ writecache_commit_flushed(wc, true);

wc->seq_count++;
pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);

wc->overwrote_committed = false;

@@ -756,7 +756,7 @@ static void writecache_flush(struct dm_writecache *wc)
}

if (need_flush_after_free)
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}

static void writecache_flush_work(struct work_struct *work)
@@ -809,7 +809,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_
}

if (discarded_something)
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}

static bool writecache_wait_for_writeback(struct dm_writecache *wc)
@@ -958,7 +958,7 @@ static void writecache_resume(struct dm_target *ti)

if (need_flush) {
writecache_flush_all_metadata(wc);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}

wc_unlock(wc);
@@ -1342,7 +1342,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *
wc->writeback_size--;
n_walked++;
if (unlikely(n_walked >= ENDIO_LATENCY)) {
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
wc_unlock(wc);
wc_lock(wc);
n_walked = 0;
@@ -1423,7 +1423,7 @@ static int writecache_endio_thread(void *data)
writecache_wait_for_ios(wc, READ);
}

- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);

wc_unlock(wc);
}
@@ -1766,10 +1766,10 @@ static int init_memory(struct dm_writecache *wc)
write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);

writecache_flush_all_metadata(wc);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);

return 0;
}
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index ac1179ca80d9..5205cf9bbfd9 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -134,6 +134,7 @@ struct dmz_metadata {

sector_t zone_bitmap_size;
unsigned int zone_nr_bitmap_blocks;
+ unsigned int zone_bits_per_mblk;

unsigned int nr_bitmap_blocks;
unsigned int nr_map_blocks;
@@ -1167,7 +1168,10 @@ static int dmz_init_zones(struct dmz_metadata *zmd)

/* Init */
zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3;
- zmd->zone_nr_bitmap_blocks = zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT;
+ zmd->zone_nr_bitmap_blocks =
+ max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT);
+ zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks,
+ DMZ_BLOCK_SIZE_BITS);

/* Allocate zone array */
zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL);
@@ -1991,7 +1995,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
dmz_release_mblock(zmd, to_mblk);
dmz_release_mblock(zmd, from_mblk);

- chunk_block += DMZ_BLOCK_SIZE_BITS;
+ chunk_block += zmd->zone_bits_per_mblk;
}

to_zone->weight = from_zone->weight;
@@ -2052,7 +2056,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone,

/* Set bits */
bit = chunk_block & DMZ_BLOCK_MASK_BITS;
- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit);
+ nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit);

count = dmz_set_bits((unsigned long *)mblk->data, bit, nr_bits);
if (count) {
@@ -2131,7 +2135,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone,

/* Clear bits */
bit = chunk_block & DMZ_BLOCK_MASK_BITS;
- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit);
+ nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit);

count = dmz_clear_bits((unsigned long *)mblk->data,
bit, nr_bits);
@@ -2191,6 +2195,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone,
{
struct dmz_mblock *mblk;
unsigned int bit, set_bit, nr_bits;
+ unsigned int zone_bits = zmd->zone_bits_per_mblk;
unsigned long *bitmap;
int n = 0;

@@ -2205,15 +2210,15 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone,
/* Get offset */
bitmap = (unsigned long *) mblk->data;
bit = chunk_block & DMZ_BLOCK_MASK_BITS;
- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit);
+ nr_bits = min(nr_blocks, zone_bits - bit);
if (set)
- set_bit = find_next_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit);
+ set_bit = find_next_bit(bitmap, zone_bits, bit);
else
- set_bit = find_next_zero_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit);
+ set_bit = find_next_zero_bit(bitmap, zone_bits, bit);
dmz_release_mblock(zmd, mblk);

n += set_bit - bit;
- if (set_bit < DMZ_BLOCK_SIZE_BITS)
+ if (set_bit < zone_bits)
break;

nr_blocks -= nr_bits;
@@ -2316,7 +2321,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone)
/* Count bits in this block */
bitmap = mblk->data;
bit = chunk_block & DMZ_BLOCK_MASK_BITS;
- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit);
+ nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit);
n += dmz_count_bits(bitmap, bit, nr_bits);

dmz_release_mblock(zmd, mblk);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1a5e328c443a..6d3cc235f842 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1880,6 +1880,7 @@ static void dm_init_normal_md_queue(struct mapped_device *md)
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
+ md->queue->backing_dev_info->congested_data = md;
md->queue->backing_dev_info->congested_fn = dm_any_congested;
}

@@ -1970,7 +1971,12 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->queue)
goto bad;
md->queue->queuedata = md;
- md->queue->backing_dev_info->congested_data = md;
+ /*
+ * default to bio-based required ->make_request_fn until DM
+ * table is loaded and md->type established. If request-based
+ * table is loaded: blk-mq will override accordingly.
+ */
+ blk_queue_make_request(md->queue, dm_make_request);

md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
@@ -2285,7 +2291,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
- blk_queue_make_request(md->queue, dm_make_request);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index bd68f6fef694..d8b4125e338c 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -380,6 +380,33 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
return -ENOSPC;
}

+int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
+ dm_block_t begin, dm_block_t end, dm_block_t *b)
+{
+ int r;
+ uint32_t count;
+
+ do {
+ r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
+ if (r)
+ break;
+
+ /* double check this block wasn't used in the old transaction */
+ if (*b >= old_ll->nr_blocks)
+ count = 0;
+ else {
+ r = sm_ll_lookup(old_ll, *b, &count);
+ if (r)
+ break;
+
+ if (count)
+ begin = *b + 1;
+ }
+ } while (count);
+
+ return r;
+}
+
static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
int (*mutator)(void *context, uint32_t old, uint32_t *new),
void *context, enum allocation_event *ev)
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h
index b3078d5eda0c..8de63ce39bdd 100644
--- a/drivers/md/persistent-data/dm-space-map-common.h
+++ b/drivers/md/persistent-data/dm-space-map-common.h
@@ -109,6 +109,8 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result);
int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result);
int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
dm_block_t end, dm_block_t *result);
+int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
+ dm_block_t begin, dm_block_t end, dm_block_t *result);
int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev);
int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index 32adf6b4a9c7..bf4c5e2ccb6f 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -167,8 +167,10 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
enum allocation_event ev;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);

- /* FIXME: we should loop round a couple of times */
- r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b);
+ /*
+ * Any block we allocate has to be free in both the old and current ll.
+ */
+ r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b);
if (r)
return r;

diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 25328582cc48..9e3c64ec2026 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -448,7 +448,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
enum allocation_event ev;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);

- r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b);
+ /*
+ * Any block we allocate has to be free in both the old and current ll.
+ */
+ r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b);
if (r)
return r;

diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c
index 872d6441e512..a7deca1fefb7 100644
--- a/drivers/media/rc/iguanair.c
+++ b/drivers/media/rc/iguanair.c
@@ -413,7 +413,7 @@ static int iguanair_probe(struct usb_interface *intf,
int ret, pipein, pipeout;
struct usb_host_interface *idesc;

- idesc = intf->altsetting;
+ idesc = intf->cur_altsetting;
if (idesc->desc.bNumEndpoints < 2)
return -ENODEV;

diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 7741151606ef..6f80c251f641 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -1891,23 +1891,28 @@ int rc_register_device(struct rc_dev *dev)

dev->registered = true;

- if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
- rc = rc_setup_rx_device(dev);
- if (rc)
- goto out_dev;
- }
-
- /* Ensure that the lirc kfifo is setup before we start the thread */
+ /*
+ * once the the input device is registered in rc_setup_rx_device,
+ * userspace can open the input device and rc_open() will be called
+ * as a result. This results in driver code being allowed to submit
+ * keycodes with rc_keydown, so lirc must be registered first.
+ */
if (dev->allowed_protocols != RC_PROTO_BIT_CEC) {
rc = ir_lirc_register(dev);
if (rc < 0)
- goto out_rx;
+ goto out_dev;
+ }
+
+ if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
+ rc = rc_setup_rx_device(dev);
+ if (rc)
+ goto out_lirc;
}

if (dev->driver_type == RC_DRIVER_IR_RAW) {
rc = ir_raw_event_register(dev);
if (rc < 0)
- goto out_lirc;
+ goto out_rx;
}

dev_dbg(&dev->dev, "Registered rc%u (driver: %s)\n", dev->minor,
@@ -1915,11 +1920,11 @@ int rc_register_device(struct rc_dev *dev)

return 0;

+out_rx:
+ rc_free_rx_device(dev);
out_lirc:
if (dev->allowed_protocols != RC_PROTO_BIT_CEC)
ir_lirc_unregister(dev);
-out_rx:
- rc_free_rx_device(dev);
out_dev:
device_del(&dev->dev);
out_rx_free:
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
index 428235ca2635..2b688cc39bb8 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -1493,6 +1493,11 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain,
break;
if (forward == prev)
continue;
+ if (forward->chain.next || forward->chain.prev) {
+ uvc_trace(UVC_TRACE_DESCR, "Found reference to "
+ "entity %d already in chain.\n", forward->id);
+ return -EINVAL;
+ }

switch (UVC_ENTITY_TYPE(forward)) {
case UVC_VC_EXTENSION_UNIT:
@@ -1574,6 +1579,13 @@ static int uvc_scan_chain_backward(struct uvc_video_chain *chain,
return -1;
}

+ if (term->chain.next || term->chain.prev) {
+ uvc_trace(UVC_TRACE_DESCR, "Found reference to "
+ "entity %d already in chain.\n",
+ term->id);
+ return -EINVAL;
+ }
+
if (uvc_trace_param & UVC_TRACE_PROBE)
printk(KERN_CONT " %d", term->id);

diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index e1eaf1135c7f..7ad6db8dd9f6 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -1183,36 +1183,38 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
u32 aux_space;
int compatible_arg = 1;
long err = 0;
+ unsigned int ncmd;

/*
* 1. When struct size is different, converts the command.
*/
switch (cmd) {
- case VIDIOC_G_FMT32: cmd = VIDIOC_G_FMT; break;
- case VIDIOC_S_FMT32: cmd = VIDIOC_S_FMT; break;
- case VIDIOC_QUERYBUF32: cmd = VIDIOC_QUERYBUF; break;
- case VIDIOC_G_FBUF32: cmd = VIDIOC_G_FBUF; break;
- case VIDIOC_S_FBUF32: cmd = VIDIOC_S_FBUF; break;
- case VIDIOC_QBUF32: cmd = VIDIOC_QBUF; break;
- case VIDIOC_DQBUF32: cmd = VIDIOC_DQBUF; break;
- case VIDIOC_ENUMSTD32: cmd = VIDIOC_ENUMSTD; break;
- case VIDIOC_ENUMINPUT32: cmd = VIDIOC_ENUMINPUT; break;
- case VIDIOC_TRY_FMT32: cmd = VIDIOC_TRY_FMT; break;
- case VIDIOC_G_EXT_CTRLS32: cmd = VIDIOC_G_EXT_CTRLS; break;
- case VIDIOC_S_EXT_CTRLS32: cmd = VIDIOC_S_EXT_CTRLS; break;
- case VIDIOC_TRY_EXT_CTRLS32: cmd = VIDIOC_TRY_EXT_CTRLS; break;
- case VIDIOC_DQEVENT32: cmd = VIDIOC_DQEVENT; break;
- case VIDIOC_OVERLAY32: cmd = VIDIOC_OVERLAY; break;
- case VIDIOC_STREAMON32: cmd = VIDIOC_STREAMON; break;
- case VIDIOC_STREAMOFF32: cmd = VIDIOC_STREAMOFF; break;
- case VIDIOC_G_INPUT32: cmd = VIDIOC_G_INPUT; break;
- case VIDIOC_S_INPUT32: cmd = VIDIOC_S_INPUT; break;
- case VIDIOC_G_OUTPUT32: cmd = VIDIOC_G_OUTPUT; break;
- case VIDIOC_S_OUTPUT32: cmd = VIDIOC_S_OUTPUT; break;
- case VIDIOC_CREATE_BUFS32: cmd = VIDIOC_CREATE_BUFS; break;
- case VIDIOC_PREPARE_BUF32: cmd = VIDIOC_PREPARE_BUF; break;
- case VIDIOC_G_EDID32: cmd = VIDIOC_G_EDID; break;
- case VIDIOC_S_EDID32: cmd = VIDIOC_S_EDID; break;
+ case VIDIOC_G_FMT32: ncmd = VIDIOC_G_FMT; break;
+ case VIDIOC_S_FMT32: ncmd = VIDIOC_S_FMT; break;
+ case VIDIOC_QUERYBUF32: ncmd = VIDIOC_QUERYBUF; break;
+ case VIDIOC_G_FBUF32: ncmd = VIDIOC_G_FBUF; break;
+ case VIDIOC_S_FBUF32: ncmd = VIDIOC_S_FBUF; break;
+ case VIDIOC_QBUF32: ncmd = VIDIOC_QBUF; break;
+ case VIDIOC_DQBUF32: ncmd = VIDIOC_DQBUF; break;
+ case VIDIOC_ENUMSTD32: ncmd = VIDIOC_ENUMSTD; break;
+ case VIDIOC_ENUMINPUT32: ncmd = VIDIOC_ENUMINPUT; break;
+ case VIDIOC_TRY_FMT32: ncmd = VIDIOC_TRY_FMT; break;
+ case VIDIOC_G_EXT_CTRLS32: ncmd = VIDIOC_G_EXT_CTRLS; break;
+ case VIDIOC_S_EXT_CTRLS32: ncmd = VIDIOC_S_EXT_CTRLS; break;
+ case VIDIOC_TRY_EXT_CTRLS32: ncmd = VIDIOC_TRY_EXT_CTRLS; break;
+ case VIDIOC_DQEVENT32: ncmd = VIDIOC_DQEVENT; break;
+ case VIDIOC_OVERLAY32: ncmd = VIDIOC_OVERLAY; break;
+ case VIDIOC_STREAMON32: ncmd = VIDIOC_STREAMON; break;
+ case VIDIOC_STREAMOFF32: ncmd = VIDIOC_STREAMOFF; break;
+ case VIDIOC_G_INPUT32: ncmd = VIDIOC_G_INPUT; break;
+ case VIDIOC_S_INPUT32: ncmd = VIDIOC_S_INPUT; break;
+ case VIDIOC_G_OUTPUT32: ncmd = VIDIOC_G_OUTPUT; break;
+ case VIDIOC_S_OUTPUT32: ncmd = VIDIOC_S_OUTPUT; break;
+ case VIDIOC_CREATE_BUFS32: ncmd = VIDIOC_CREATE_BUFS; break;
+ case VIDIOC_PREPARE_BUF32: ncmd = VIDIOC_PREPARE_BUF; break;
+ case VIDIOC_G_EDID32: ncmd = VIDIOC_G_EDID; break;
+ case VIDIOC_S_EDID32: ncmd = VIDIOC_S_EDID; break;
+ default: ncmd = cmd; break;
}

/*
@@ -1221,11 +1223,11 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
* argument into it.
*/
switch (cmd) {
- case VIDIOC_OVERLAY:
- case VIDIOC_STREAMON:
- case VIDIOC_STREAMOFF:
- case VIDIOC_S_INPUT:
- case VIDIOC_S_OUTPUT:
+ case VIDIOC_OVERLAY32:
+ case VIDIOC_STREAMON32:
+ case VIDIOC_STREAMOFF32:
+ case VIDIOC_S_INPUT32:
+ case VIDIOC_S_OUTPUT32:
err = alloc_userspace(sizeof(unsigned int), 0, &new_p64);
if (!err && assign_in_user((unsigned int __user *)new_p64,
(compat_uint_t __user *)p32))
@@ -1233,23 +1235,23 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
compatible_arg = 0;
break;

- case VIDIOC_G_INPUT:
- case VIDIOC_G_OUTPUT:
+ case VIDIOC_G_INPUT32:
+ case VIDIOC_G_OUTPUT32:
err = alloc_userspace(sizeof(unsigned int), 0, &new_p64);
compatible_arg = 0;
break;

- case VIDIOC_G_EDID:
- case VIDIOC_S_EDID:
+ case VIDIOC_G_EDID32:
+ case VIDIOC_S_EDID32:
err = alloc_userspace(sizeof(struct v4l2_edid), 0, &new_p64);
if (!err)
err = get_v4l2_edid32(new_p64, p32);
compatible_arg = 0;
break;

- case VIDIOC_G_FMT:
- case VIDIOC_S_FMT:
- case VIDIOC_TRY_FMT:
+ case VIDIOC_G_FMT32:
+ case VIDIOC_S_FMT32:
+ case VIDIOC_TRY_FMT32:
err = bufsize_v4l2_format(p32, &aux_space);
if (!err)
err = alloc_userspace(sizeof(struct v4l2_format),
@@ -1262,7 +1264,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
compatible_arg = 0;
break;

- case VIDIOC_CREATE_BUFS:
+ case VIDIOC_CREATE_BUFS32:
err = bufsize_v4l2_create(p32, &aux_space);
if (!err)
err = alloc_userspace(sizeof(struct v4l2_create_buffers),
@@ -1275,10 +1277,10 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
compatible_arg = 0;
break;

- case VIDIOC_PREPARE_BUF:
- case VIDIOC_QUERYBUF:
- case VIDIOC_QBUF:
- case VIDIOC_DQBUF:
+ case VIDIOC_PREPARE_BUF32:
+ case VIDIOC_QUERYBUF32:
+ case VIDIOC_QBUF32:
+ case VIDIOC_DQBUF32:
err = bufsize_v4l2_buffer(p32, &aux_space);
if (!err)
err = alloc_userspace(sizeof(struct v4l2_buffer),
@@ -1291,7 +1293,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
compatible_arg = 0;
break;

- case VIDIOC_S_FBUF:
+ case VIDIOC_S_FBUF32:
err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0,
&new_p64);
if (!err)
@@ -1299,13 +1301,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
compatible_arg = 0;
break;

- case VIDIOC_G_FBUF:
+ case VIDIOC_G_FBUF32:
err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0,
&new_p64);
compatible_arg = 0;
break;

- case VIDIOC_ENUMSTD:
+ case VIDIOC_ENUMSTD32:
err = alloc_userspace(sizeof(struct v4l2_standard), 0,
&new_p64);
if (!err)
@@ -1313,16 +1315,16 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
compatible_arg = 0;
break;

- case VIDIOC_ENUMINPUT:
+ case VIDIOC_ENUMINPUT32:
err = alloc_userspace(sizeof(struct v4l2_input), 0, &new_p64);
if (!err)
err = get_v4l2_input32(new_p64, p32);
compatible_arg = 0;
break;

- case VIDIOC_G_EXT_CTRLS:
- case VIDIOC_S_EXT_CTRLS:
- case VIDIOC_TRY_EXT_CTRLS:
+ case VIDIOC_G_EXT_CTRLS32:
+ case VIDIOC_S_EXT_CTRLS32:
+ case VIDIOC_TRY_EXT_CTRLS32:
err = bufsize_v4l2_ext_controls(p32, &aux_space);
if (!err)
err = alloc_userspace(sizeof(struct v4l2_ext_controls),
@@ -1334,7 +1336,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
}
compatible_arg = 0;
break;
- case VIDIOC_DQEVENT:
+ case VIDIOC_DQEVENT32:
err = alloc_userspace(sizeof(struct v4l2_event), 0, &new_p64);
compatible_arg = 0;
break;
@@ -1352,9 +1354,9 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
* Otherwise, it will pass the newly allocated @new_p64 argument.
*/
if (compatible_arg)
- err = native_ioctl(file, cmd, (unsigned long)p32);
+ err = native_ioctl(file, ncmd, (unsigned long)p32);
else
- err = native_ioctl(file, cmd, (unsigned long)new_p64);
+ err = native_ioctl(file, ncmd, (unsigned long)new_p64);

if (err == -ENOTTY)
return err;
@@ -1370,13 +1372,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
* the blocks to maximum allowed value.
*/
switch (cmd) {
- case VIDIOC_G_EXT_CTRLS:
- case VIDIOC_S_EXT_CTRLS:
- case VIDIOC_TRY_EXT_CTRLS:
+ case VIDIOC_G_EXT_CTRLS32:
+ case VIDIOC_S_EXT_CTRLS32:
+ case VIDIOC_TRY_EXT_CTRLS32:
if (put_v4l2_ext_controls32(file, new_p64, p32))
err = -EFAULT;
break;
- case VIDIOC_S_EDID:
+ case VIDIOC_S_EDID32:
if (put_v4l2_edid32(new_p64, p32))
err = -EFAULT;
break;
@@ -1389,49 +1391,49 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
* the original 32 bits structure.
*/
switch (cmd) {
- case VIDIOC_S_INPUT:
- case VIDIOC_S_OUTPUT:
- case VIDIOC_G_INPUT:
- case VIDIOC_G_OUTPUT:
+ case VIDIOC_S_INPUT32:
+ case VIDIOC_S_OUTPUT32:
+ case VIDIOC_G_INPUT32:
+ case VIDIOC_G_OUTPUT32:
if (assign_in_user((compat_uint_t __user *)p32,
((unsigned int __user *)new_p64)))
err = -EFAULT;
break;

- case VIDIOC_G_FBUF:
+ case VIDIOC_G_FBUF32:
err = put_v4l2_framebuffer32(new_p64, p32);
break;

- case VIDIOC_DQEVENT:
+ case VIDIOC_DQEVENT32:
err = put_v4l2_event32(new_p64, p32);
break;

- case VIDIOC_G_EDID:
+ case VIDIOC_G_EDID32:
err = put_v4l2_edid32(new_p64, p32);
break;

- case VIDIOC_G_FMT:
- case VIDIOC_S_FMT:
- case VIDIOC_TRY_FMT:
+ case VIDIOC_G_FMT32:
+ case VIDIOC_S_FMT32:
+ case VIDIOC_TRY_FMT32:
err = put_v4l2_format32(new_p64, p32);
break;

- case VIDIOC_CREATE_BUFS:
+ case VIDIOC_CREATE_BUFS32:
err = put_v4l2_create32(new_p64, p32);
break;

- case VIDIOC_PREPARE_BUF:
- case VIDIOC_QUERYBUF:
- case VIDIOC_QBUF:
- case VIDIOC_DQBUF:
+ case VIDIOC_PREPARE_BUF32:
+ case VIDIOC_QUERYBUF32:
+ case VIDIOC_QBUF32:
+ case VIDIOC_DQBUF32:
err = put_v4l2_buffer32(new_p64, p32);
break;

- case VIDIOC_ENUMSTD:
+ case VIDIOC_ENUMSTD32:
err = put_v4l2_standard32(new_p64, p32);
break;

- case VIDIOC_ENUMINPUT:
+ case VIDIOC_ENUMINPUT32:
err = put_v4l2_input32(new_p64, p32);
break;
}
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 66a6c6c236a7..28262190c3ab 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -349,8 +349,11 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma)
BUG_ON(dma->sglen);

if (dma->pages) {
- for (i = 0; i < dma->nr_pages; i++)
+ for (i = 0; i < dma->nr_pages; i++) {
+ if (dma->direction == DMA_FROM_DEVICE)
+ set_page_dirty_lock(dma->pages[i]);
put_page(dma->pages[i]);
+ }
kfree(dma->pages);
dma->pages = NULL;
}
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index a4aaadaa0cb0..aa59496e4376 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -126,7 +126,7 @@ static const struct regmap_range axp288_writeable_ranges[] = {
static const struct regmap_range axp288_volatile_ranges[] = {
regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP288_POWER_REASON),
regmap_reg_range(AXP288_BC_GLOBAL, AXP288_BC_GLOBAL),
- regmap_reg_range(AXP288_BC_DET_STAT, AXP288_BC_DET_STAT),
+ regmap_reg_range(AXP288_BC_DET_STAT, AXP20X_VBUS_IPSOUT_MGMT),
regmap_reg_range(AXP20X_CHRG_BAK_CTRL, AXP20X_CHRG_BAK_CTRL),
regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L),
regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL),
diff --git a/drivers/mfd/da9062-core.c b/drivers/mfd/da9062-core.c
index e69626867c26..9143de7b77b8 100644
--- a/drivers/mfd/da9062-core.c
+++ b/drivers/mfd/da9062-core.c
@@ -248,7 +248,7 @@ static const struct mfd_cell da9062_devs[] = {
.name = "da9062-watchdog",
.num_resources = ARRAY_SIZE(da9062_wdt_resources),
.resources = da9062_wdt_resources,
- .of_compatible = "dlg,da9062-wdt",
+ .of_compatible = "dlg,da9062-watchdog",
},
{
.name = "da9062-thermal",
diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c
index 381593fbe50f..7841c11411d0 100644
--- a/drivers/mfd/dln2.c
+++ b/drivers/mfd/dln2.c
@@ -722,6 +722,8 @@ static int dln2_probe(struct usb_interface *interface,
const struct usb_device_id *usb_id)
{
struct usb_host_interface *hostif = interface->cur_altsetting;
+ struct usb_endpoint_descriptor *epin;
+ struct usb_endpoint_descriptor *epout;
struct device *dev = &interface->dev;
struct dln2_dev *dln2;
int ret;
@@ -731,12 +733,19 @@ static int dln2_probe(struct usb_interface *interface,
hostif->desc.bNumEndpoints < 2)
return -ENODEV;

+ epin = &hostif->endpoint[0].desc;
+ epout = &hostif->endpoint[1].desc;
+ if (!usb_endpoint_is_bulk_out(epout))
+ return -ENODEV;
+ if (!usb_endpoint_is_bulk_in(epin))
+ return -ENODEV;
+
dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL);
if (!dln2)
return -ENOMEM;

- dln2->ep_out = hostif->endpoint[0].desc.bEndpointAddress;
- dln2->ep_in = hostif->endpoint[1].desc.bEndpointAddress;
+ dln2->ep_out = epout->bEndpointAddress;
+ dln2->ep_in = epin->bEndpointAddress;
dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface));
dln2->interface = interface;
usb_set_intfdata(interface, dln2);
diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c
index da5cd9c92a59..ead2e79036a9 100644
--- a/drivers/mfd/rn5t618.c
+++ b/drivers/mfd/rn5t618.c
@@ -26,6 +26,7 @@ static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg)
case RN5T618_WATCHDOGCNT:
case RN5T618_DCIRQ:
case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL:
+ case RN5T618_ADCCNT3:
case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3:
case RN5T618_IR_GPR:
case RN5T618_IR_GPF:
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 66e354d51ee9..7083d8ddd495 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1134,17 +1134,22 @@ static void mmc_spi_initsequence(struct mmc_spi_host *host)
* SPI protocol. Another is that when chipselect is released while
* the card returns BUSY status, the clock must issue several cycles
* with chipselect high before the card will stop driving its output.
+ *
+ * SPI_CS_HIGH means "asserted" here. In some cases like when using
+ * GPIOs for chip select, SPI_CS_HIGH is set but this will be logically
+ * inverted by gpiolib, so if we want to ascertain to drive it high
+ * we should toggle the default with an XOR as we do here.
*/
- host->spi->mode |= SPI_CS_HIGH;
+ host->spi->mode ^= SPI_CS_HIGH;
if (spi_setup(host->spi) != 0) {
/* Just warn; most cards work without it. */
dev_warn(&host->spi->dev,
"can't change chip-select polarity\n");
- host->spi->mode &= ~SPI_CS_HIGH;
+ host->spi->mode ^= SPI_CS_HIGH;
} else {
mmc_spi_readbytes(host, 18);

- host->spi->mode &= ~SPI_CS_HIGH;
+ host->spi->mode ^= SPI_CS_HIGH;
if (spi_setup(host->spi) != 0) {
/* Wot, we can't get the same setup we had before? */
dev_err(&host->spi->dev,
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 0ae986c42bc8..9378d5dc86c8 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -324,19 +324,22 @@ static int sdhci_at91_probe(struct platform_device *pdev)
priv->mainck = devm_clk_get(&pdev->dev, "baseclk");
if (IS_ERR(priv->mainck)) {
dev_err(&pdev->dev, "failed to get baseclk\n");
- return PTR_ERR(priv->mainck);
+ ret = PTR_ERR(priv->mainck);
+ goto sdhci_pltfm_free;
}

priv->hclock = devm_clk_get(&pdev->dev, "hclock");
if (IS_ERR(priv->hclock)) {
dev_err(&pdev->dev, "failed to get hclock\n");
- return PTR_ERR(priv->hclock);
+ ret = PTR_ERR(priv->hclock);
+ goto sdhci_pltfm_free;
}

priv->gck = devm_clk_get(&pdev->dev, "multclk");
if (IS_ERR(priv->gck)) {
dev_err(&pdev->dev, "failed to get multclk\n");
- return PTR_ERR(priv->gck);
+ ret = PTR_ERR(priv->gck);
+ goto sdhci_pltfm_free;
}

ret = sdhci_at91_set_clks_presets(&pdev->dev);
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index c9ea365c248c..5091e2c1c0e5 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1604,7 +1604,7 @@ static u32 sdhci_read_present_state(struct sdhci_host *host)
return sdhci_readl(host, SDHCI_PRESENT_STATE);
}

-void amd_sdhci_reset(struct sdhci_host *host, u8 mask)
+static void amd_sdhci_reset(struct sdhci_host *host, u8 mask)
{
struct sdhci_pci_slot *slot = sdhci_priv(host);
struct pci_dev *pdev = slot->chip->pdev;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 309c808351ac..f417fb680cd8 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2310,15 +2310,16 @@ static const struct flash_info spi_nor_ids[] = {
{ "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "n25q256ax1", INFO(0x20bb19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_QUAD_READ) },
{ "n25q512ax3", INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
+ { "mt25qu512a", INFO6(0x20bb20, 0x104400, 64 * 1024, 1024,
+ SECT_4K | USE_FSR | SPI_NOR_DUAL_READ |
+ SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) },
+ { "n25q512a", INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K |
+ SPI_NOR_QUAD_READ) },
{ "n25q00", INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
{ "n25q00a", INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
{ "mt25ql02g", INFO(0x20ba22, 0, 64 * 1024, 4096,
SECT_4K | USE_FSR | SPI_NOR_QUAD_READ |
NO_CHIP_ERASE) },
- { "mt25qu512a (n25q512a)", INFO(0x20bb20, 0, 64 * 1024, 1024,
- SECT_4K | USE_FSR | SPI_NOR_DUAL_READ |
- SPI_NOR_QUAD_READ |
- SPI_NOR_4B_OPCODES) },
{ "mt25qu02g", INFO(0x20bb22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },

/* Micron */
diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index 30621c67721a..604772fc4a96 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -64,7 +64,7 @@ static int self_check_seen(struct ubi_device *ubi, unsigned long *seen)
return 0;

for (pnum = 0; pnum < ubi->peb_count; pnum++) {
- if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) {
+ if (!test_bit(pnum, seen) && ubi->lookuptbl[pnum]) {
ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum);
ret = -EINVAL;
}
@@ -1137,7 +1137,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
struct rb_node *tmp_rb;
int ret, i, j, free_peb_count, used_peb_count, vol_count;
int scrub_peb_count, erase_peb_count;
- unsigned long *seen_pebs = NULL;
+ unsigned long *seen_pebs;

fm_raw = ubi->fm_buf;
memset(ubi->fm_buf, 0, ubi->fm_size);
@@ -1151,7 +1151,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
dvbuf = new_fm_vbuf(ubi, UBI_FM_DATA_VOLUME_ID);
if (!dvbuf) {
ret = -ENOMEM;
- goto out_kfree;
+ goto out_free_avbuf;
}

avhdr = ubi_get_vid_hdr(avbuf);
@@ -1160,7 +1160,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
seen_pebs = init_seen(ubi);
if (IS_ERR(seen_pebs)) {
ret = PTR_ERR(seen_pebs);
- goto out_kfree;
+ goto out_free_dvbuf;
}

spin_lock(&ubi->volumes_lock);
@@ -1328,7 +1328,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avbuf);
if (ret) {
ubi_err(ubi, "unable to write vid_hdr to fastmap SB!");
- goto out_kfree;
+ goto out_free_seen;
}

for (i = 0; i < new_fm->used_blocks; i++) {
@@ -1350,7 +1350,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
if (ret) {
ubi_err(ubi, "unable to write vid_hdr to PEB %i!",
new_fm->e[i]->pnum);
- goto out_kfree;
+ goto out_free_seen;
}
}

@@ -1360,7 +1360,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
if (ret) {
ubi_err(ubi, "unable to write fastmap to PEB %i!",
new_fm->e[i]->pnum);
- goto out_kfree;
+ goto out_free_seen;
}
}

@@ -1370,10 +1370,13 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
ret = self_check_seen(ubi, seen_pebs);
dbg_bld("fastmap written!");

-out_kfree:
- ubi_free_vid_buf(avbuf);
- ubi_free_vid_buf(dvbuf);
+out_free_seen:
free_seen(seen_pebs);
+out_free_dvbuf:
+ ubi_free_vid_buf(dvbuf);
+out_free_avbuf:
+ ubi_free_vid_buf(avbuf);
+
out:
return ret;
}
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 4f2e6910c623..1cc2cd894f87 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1383,26 +1383,31 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
bool do_tx_balance = true;
u32 hash_index = 0;
const u8 *hash_start = NULL;
- struct ipv6hdr *ip6hdr;

skb_reset_mac_header(skb);
eth_data = eth_hdr(skb);

switch (ntohs(skb->protocol)) {
case ETH_P_IP: {
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph;

if (is_broadcast_ether_addr(eth_data->h_dest) ||
- iph->daddr == ip_bcast ||
- iph->protocol == IPPROTO_IGMP) {
+ !pskb_network_may_pull(skb, sizeof(*iph))) {
+ do_tx_balance = false;
+ break;
+ }
+ iph = ip_hdr(skb);
+ if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) {
do_tx_balance = false;
break;
}
hash_start = (char *)&(iph->daddr);
hash_size = sizeof(iph->daddr);
- }
break;
- case ETH_P_IPV6:
+ }
+ case ETH_P_IPV6: {
+ const struct ipv6hdr *ip6hdr;
+
/* IPv6 doesn't really use broadcast mac address, but leave
* that here just in case.
*/
@@ -1419,7 +1424,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
break;
}

- /* Additianally, DAD probes should not be tx-balanced as that
+ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) {
+ do_tx_balance = false;
+ break;
+ }
+ /* Additionally, DAD probes should not be tx-balanced as that
* will lead to false positives for duplicate addresses and
* prevent address configuration from working.
*/
@@ -1429,17 +1438,26 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
break;
}

- hash_start = (char *)&(ipv6_hdr(skb)->daddr);
- hash_size = sizeof(ipv6_hdr(skb)->daddr);
+ hash_start = (char *)&ip6hdr->daddr;
+ hash_size = sizeof(ip6hdr->daddr);
break;
- case ETH_P_IPX:
- if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) {
+ }
+ case ETH_P_IPX: {
+ const struct ipxhdr *ipxhdr;
+
+ if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) {
+ do_tx_balance = false;
+ break;
+ }
+ ipxhdr = (struct ipxhdr *)skb_network_header(skb);
+
+ if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) {
/* something is wrong with this packet */
do_tx_balance = false;
break;
}

- if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) {
+ if (ipxhdr->ipx_type != IPX_TYPE_NCP) {
/* The only protocol worth balancing in
* this family since it has an "ARP" like
* mechanism
@@ -1448,9 +1466,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
break;
}

+ eth_data = eth_hdr(skb);
hash_start = (char *)eth_data->h_dest;
hash_size = ETH_ALEN;
break;
+ }
case ETH_P_ARP:
do_tx_balance = false;
if (bond_info->rlb_enabled)
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index a7132c1593c3..7ed667b304d1 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -680,7 +680,7 @@ int b53_configure_vlan(struct dsa_switch *ds)
b53_do_vlan_op(dev, VTA_CMD_CLEAR);
}

- b53_enable_vlan(dev, false, ds->vlan_filtering);
+ b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering);

b53_for_each_port(dev, i)
b53_write16(dev, B53_VLAN_PAGE,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 47b21096b577..fecd5e674e04 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -68,7 +68,9 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)

/* Force link status for IMP port */
reg = core_readl(priv, offset);
- reg |= (MII_SW_OR | LINK_STS | GMII_SPEED_UP_2G);
+ reg |= (MII_SW_OR | LINK_STS);
+ if (priv->type == BCM7278_DEVICE_ID)
+ reg |= GMII_SPEED_UP_2G;
core_writel(priv, reg, offset);

/* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index c5f64959a184..1142768969c2 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -101,6 +101,12 @@ static struct spi_driver ksz9477_spi_driver = {

module_spi_driver(ksz9477_spi_driver);

+MODULE_ALIAS("spi:ksz9477");
+MODULE_ALIAS("spi:ksz9897");
+MODULE_ALIAS("spi:ksz9893");
+MODULE_ALIAS("spi:ksz9563");
+MODULE_ALIAS("spi:ksz8563");
+MODULE_ALIAS("spi:ksz9567");
MODULE_AUTHOR("Woojung Huh <Woojung.Huh@xxxxxxxxxxxxx>");
MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch SPI access Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index b4c664957266..4a27577e137b 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2728,6 +2728,9 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)

umac_reset(priv);

+ /* Disable the UniMAC RX/TX */
+ umac_enable_set(priv, CMD_RX_EN | CMD_TX_EN, 0);
+
/* We may have been suspended and never received a WOL event that
* would turn off MPD detection, take care of that now
*/
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index cf292f7c3d3c..41297533b4a8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7873,7 +7873,7 @@ static void bnxt_setup_msix(struct bnxt *bp)
int tcs, i;

tcs = netdev_get_num_tc(dev);
- if (tcs > 1) {
+ if (tcs) {
int i, off, count;

for (i = 0; i < tcs; i++) {
@@ -9273,10 +9273,6 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
bnxt_debug_dev_exit(bp);
bnxt_disable_napi(bp);
del_timer_sync(&bp->timer);
- if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) &&
- pci_is_enabled(bp->pdev))
- pci_disable_device(bp->pdev);
-
bnxt_free_skbs(bp);

/* Save ring stats before shutdown */
@@ -10052,8 +10048,15 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
{
__bnxt_close_nic(bp, true, false);
bnxt_ulp_irq_stop(bp);
+ /* When firmware is fatal state, disable PCI device to prevent
+ * any potential bad DMAs before freeing kernel memory.
+ */
+ if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+ pci_disable_device(bp->pdev);
bnxt_clear_int_mode(bp);
bnxt_hwrm_func_drv_unrgtr(bp);
+ if (pci_is_enabled(bp->pdev))
+ pci_disable_device(bp->pdev);
bnxt_free_ctx_mem(bp);
kfree(bp->ctx);
bp->ctx = NULL;
@@ -11359,9 +11362,9 @@ static void bnxt_remove_one(struct pci_dev *pdev)
bnxt_sriov_disable(bp);

bnxt_dl_fw_reporters_destroy(bp, true);
- bnxt_dl_unregister(bp);
pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
+ bnxt_dl_unregister(bp);
bnxt_shutdown_tc(bp);
bnxt_cancel_sp_work(bp);
bp->sp_event = 0;
@@ -11850,11 +11853,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
bnxt_init_tc(bp);
}

+ bnxt_dl_register(bp);
+
rc = register_netdev(dev);
if (rc)
- goto init_err_cleanup_tc;
+ goto init_err_cleanup;

- bnxt_dl_register(bp);
+ if (BNXT_PF(bp))
+ devlink_port_type_eth_set(&bp->dl_port, bp->dev);
bnxt_dl_fw_reporters_create(bp);

netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
@@ -11864,7 +11870,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

return 0;

-init_err_cleanup_tc:
+init_err_cleanup:
+ bnxt_dl_unregister(bp);
bnxt_shutdown_tc(bp);
bnxt_clear_int_mode(bp);

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 1e236e74ff2f..2d817ba0602c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -482,7 +482,6 @@ int bnxt_dl_register(struct bnxt *bp)
netdev_err(bp->dev, "devlink_port_register failed");
goto err_dl_param_unreg;
}
- devlink_port_type_eth_set(&bp->dl_port, bp->dev);

rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params,
ARRAY_SIZE(bnxt_dl_port_params));
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index f496b248bda3..95a94507cec1 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -73,7 +73,11 @@ struct sifive_fu540_macb_mgmt {
/* Max length of transmit frame must be a multiple of 8 bytes */
#define MACB_TX_LEN_ALIGN 8
#define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
-#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
+/* Limit maximum TX length as per Cadence TSO errata. This is to avoid a
+ * false amba_error in TX path from the DMA assuming there is not enough
+ * space in the SRAM (16KB) even when there is.
+ */
+#define GEM_MAX_TX_LEN (unsigned int)(0x3FC0)

#define GEM_MTU_MIN_SIZE ETH_MIN_MTU
#define MACB_NETIF_LSO NETIF_F_TSO
@@ -1664,16 +1668,14 @@ static netdev_features_t macb_features_check(struct sk_buff *skb,

/* Validate LSO compatibility */

- /* there is only one buffer */
- if (!skb_is_nonlinear(skb))
+ /* there is only one buffer or protocol is not UDP */
+ if (!skb_is_nonlinear(skb) || (ip_hdr(skb)->protocol != IPPROTO_UDP))
return features;

/* length of header */
hdrlen = skb_transport_offset(skb);
- if (ip_hdr(skb)->protocol == IPPROTO_TCP)
- hdrlen += tcp_hdrlen(skb);

- /* For LSO:
+ /* For UFO only:
* When software supplies two or more payload buffers all payload buffers
* apart from the last must be a multiple of 8 bytes in size.
*/
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 0efdbd1a4a6f..32d470d4122a 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void)
if (cr6set)
dmfe_cr6_user_set = cr6set;

- switch(mode) {
- case DMFE_10MHF:
+ switch (mode) {
+ case DMFE_10MHF:
case DMFE_100MHF:
case DMFE_10MFD:
case DMFE_100MFD:
case DMFE_1M_HPNA:
dmfe_media_mode = mode;
break;
- default:dmfe_media_mode = DMFE_AUTO;
+ default:
+ dmfe_media_mode = DMFE_AUTO;
break;
}

diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index b1f30b194300..117ffe08800d 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void)
if (cr6set)
uli526x_cr6_user_set = cr6set;

- switch (mode) {
- case ULI526X_10MHF:
+ switch (mode) {
+ case ULI526X_10MHF:
case ULI526X_100MHF:
case ULI526X_10MFD:
case ULI526X_100MFD:
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index fcbe01f61aa4..e130233b5085 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2483,6 +2483,9 @@ static void dpaa_adjust_link(struct net_device *net_dev)
mac_dev->adjust_link(mac_dev);
}

+/* The Aquantia PHYs are capable of performing rate adaptation */
+#define PHY_VEND_AQUANTIA 0x03a1b400
+
static int dpaa_phy_init(struct net_device *net_dev)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
@@ -2501,9 +2504,14 @@ static int dpaa_phy_init(struct net_device *net_dev)
return -ENODEV;
}

- /* Remove any features not supported by the controller */
- ethtool_convert_legacy_u32_to_link_mode(mask, mac_dev->if_support);
- linkmode_and(phy_dev->supported, phy_dev->supported, mask);
+ /* Unless the PHY is capable of rate adaptation */
+ if (mac_dev->phy_if != PHY_INTERFACE_MODE_XGMII ||
+ ((phy_dev->drv->phy_id & GENMASK(31, 10)) != PHY_VEND_AQUANTIA)) {
+ /* remove any features not supported by the controller */
+ ethtool_convert_legacy_u32_to_link_mode(mask,
+ mac_dev->if_support);
+ linkmode_and(phy_dev->supported, phy_dev->supported, mask);
+ }

phy_support_asym_pause(phy_dev);

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e49820675c8c..6b1a81df1465 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -388,6 +388,8 @@ struct mvneta_pcpu_stats {
struct u64_stats_sync syncp;
u64 rx_packets;
u64 rx_bytes;
+ u64 rx_dropped;
+ u64 rx_errors;
u64 tx_packets;
u64 tx_bytes;
};
@@ -706,6 +708,8 @@ mvneta_get_stats64(struct net_device *dev,
struct mvneta_pcpu_stats *cpu_stats;
u64 rx_packets;
u64 rx_bytes;
+ u64 rx_dropped;
+ u64 rx_errors;
u64 tx_packets;
u64 tx_bytes;

@@ -714,19 +718,20 @@ mvneta_get_stats64(struct net_device *dev,
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
rx_packets = cpu_stats->rx_packets;
rx_bytes = cpu_stats->rx_bytes;
+ rx_dropped = cpu_stats->rx_dropped;
+ rx_errors = cpu_stats->rx_errors;
tx_packets = cpu_stats->tx_packets;
tx_bytes = cpu_stats->tx_bytes;
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));

stats->rx_packets += rx_packets;
stats->rx_bytes += rx_bytes;
+ stats->rx_dropped += rx_dropped;
+ stats->rx_errors += rx_errors;
stats->tx_packets += tx_packets;
stats->tx_bytes += tx_bytes;
}

- stats->rx_errors = dev->stats.rx_errors;
- stats->rx_dropped = dev->stats.rx_dropped;
-
stats->tx_dropped = dev->stats.tx_dropped;
}

@@ -1703,8 +1708,14 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto,
static void mvneta_rx_error(struct mvneta_port *pp,
struct mvneta_rx_desc *rx_desc)
{
+ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
u32 status = rx_desc->status;

+ /* update per-cpu counter */
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_errors++;
+ u64_stats_update_end(&stats->syncp);
+
switch (status & MVNETA_RXD_ERR_CODE_MASK) {
case MVNETA_RXD_ERR_CRC:
netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n",
@@ -1965,7 +1976,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
/* Check errors only for FIRST descriptor */
if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
mvneta_rx_error(pp, rx_desc);
- dev->stats.rx_errors++;
/* leave the descriptor untouched */
continue;
}
@@ -1976,11 +1986,17 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
skb_size = max(rx_copybreak, rx_header_size);
rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
if (unlikely(!rxq->skb)) {
+ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
netdev_err(dev,
"Can't allocate skb on queue %d\n",
rxq->id);
- dev->stats.rx_dropped++;
+
rxq->skb_alloc_err++;
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_dropped++;
+ u64_stats_update_end(&stats->syncp);
continue;
}
copy_size = min(skb_size, rx_bytes);
@@ -2137,7 +2153,6 @@ static int mvneta_rx_hwbm(struct napi_struct *napi,
mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
rx_desc->buf_phys_addr);
err_drop_frame:
- dev->stats.rx_errors++;
mvneta_rx_error(pp, rx_desc);
/* leave the descriptor untouched */
continue;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index d787bc0a4155..e09bc3858d57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -45,7 +45,7 @@ void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);

static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
{
- if (!MLX5_CAP_GEN(mdev, tls))
+ if (!MLX5_CAP_GEN(mdev, tls_tx))
return false;

if (!MLX5_CAP_GEN(mdev, log_max_dek))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 71384ad1a443..ef1ed15a53b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -269,7 +269,7 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
int datalen;
u32 skb_seq;

- if (MLX5_CAP_GEN(sq->channel->mdev, tls)) {
+ if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx)) {
skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
goto out;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index c76da309506b..72232e570af7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -850,6 +850,7 @@ void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
mutex_lock(&fpga_xfrm->lock);
if (!--fpga_xfrm->num_rules) {
mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
+ kfree(fpga_xfrm->sa_ctx);
fpga_xfrm->sa_ctx = NULL;
}
mutex_unlock(&fpga_xfrm->lock);
@@ -1478,7 +1479,7 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
return 0;

- if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+ if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 791e14ac26f4..86e6bbb57482 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1555,16 +1555,16 @@ struct match_list_head {
struct match_list first;
};

-static void free_match_list(struct match_list_head *head)
+static void free_match_list(struct match_list_head *head, bool ft_locked)
{
if (!list_empty(&head->list)) {
struct match_list *iter, *match_tmp;

list_del(&head->first.list);
- tree_put_node(&head->first.g->node, false);
+ tree_put_node(&head->first.g->node, ft_locked);
list_for_each_entry_safe(iter, match_tmp, &head->list,
list) {
- tree_put_node(&iter->g->node, false);
+ tree_put_node(&iter->g->node, ft_locked);
list_del(&iter->list);
kfree(iter);
}
@@ -1573,7 +1573,8 @@ static void free_match_list(struct match_list_head *head)

static int build_match_list(struct match_list_head *match_head,
struct mlx5_flow_table *ft,
- const struct mlx5_flow_spec *spec)
+ const struct mlx5_flow_spec *spec,
+ bool ft_locked)
{
struct rhlist_head *tmp, *list;
struct mlx5_flow_group *g;
@@ -1598,7 +1599,7 @@ static int build_match_list(struct match_list_head *match_head,

curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
if (!curr_match) {
- free_match_list(match_head);
+ free_match_list(match_head, ft_locked);
err = -ENOMEM;
goto out;
}
@@ -1778,7 +1779,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
version = atomic_read(&ft->node.version);

/* Collect all fgs which has a matching match_criteria */
- err = build_match_list(&match_head, ft, spec);
+ err = build_match_list(&match_head, ft, spec, take_write);
if (err) {
if (take_write)
up_write_ref_node(&ft->node, false);
@@ -1792,7 +1793,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,

rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
dest_num, version);
- free_match_list(&match_head);
+ free_match_list(&match_head, take_write);
if (!IS_ERR(rule) ||
(PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
if (take_write)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index a19790dee7b2..13e86f0b42f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -239,7 +239,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}

- if (MLX5_CAP_GEN(dev, tls)) {
+ if (MLX5_CAP_GEN(dev, tls_tx)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
if (err)
return err;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 5bfdda19f64d..d8745f87f065 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -862,7 +862,7 @@ struct ionic_rxq_comp {
#define IONIC_RXQ_COMP_CSUM_F_VLAN 0x40
#define IONIC_RXQ_COMP_CSUM_F_CALC 0x80
u8 pkt_type_color;
-#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x0f
+#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x7f
};

enum ionic_pkt_type {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index 0dacf2c18c09..3e613058e225 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -44,8 +44,8 @@
/* Add/subtract the Adjustment_Value when making a Drift adjustment */
#define QED_DRIFT_CNTR_DIRECTION_SHIFT 31
#define QED_TIMESTAMP_MASK BIT(16)
-/* Param mask for Hardware to detect/timestamp the unicast PTP packets */
-#define QED_PTP_UCAST_PARAM_MASK 0xF
+/* Param mask for Hardware to detect/timestamp the L2/L4 unicast PTP packets */
+#define QED_PTP_UCAST_PARAM_MASK 0x70F

static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn)
{
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 8d88e4083456..7b65e79d6ae9 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work)
if (lp->ctl_rspeed != 100)
my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF);

- if (!lp->ctl_rfduplx)
+ if (!lp->ctl_rfduplx)
my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL);

/* Update our Auto-Neg Advertisement Register */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 7ec895407d23..e0a5fe83d8e0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -413,6 +413,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos)
dll_lock = rgmii_readl(ethqos, SDC4_STATUS);
if (dll_lock & SDC4_STATUS_DLL_LOCK)
break;
+ retry--;
} while (retry > 0);
if (!retry)
dev_err(&ethqos->pdev->dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 06dd65c419c4..582176d869c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4763,6 +4763,7 @@ int stmmac_suspend(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct stmmac_priv *priv = netdev_priv(ndev);
+ u32 chan;

if (!ndev || !netif_running(ndev))
return 0;
@@ -4776,6 +4777,9 @@ int stmmac_suspend(struct device *dev)

stmmac_disable_all_queues(priv);

+ for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+ del_timer_sync(&priv->tx_queue[chan].txtimer);
+
/* Stop TX/RX DMA */
stmmac_stop_all_dma(priv);

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 9b3ba98726d7..3a53d222bfcc 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -767,12 +767,12 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize)
int i;

gtp->addr_hash = kmalloc_array(hsize, sizeof(struct hlist_head),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (gtp->addr_hash == NULL)
return -ENOMEM;

gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (gtp->tid_hash == NULL)
goto err1;

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 44c2d857a7fa..91b302f0192f 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -73,7 +73,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = {

static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
{
- char dev_ddir_name[16];
+ char dev_ddir_name[sizeof(DRV_NAME) + 10];

sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id);
nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir);
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index a7b9cf3269bf..29a0917a81e6 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -874,15 +874,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf,
skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2);
if (!skb)
goto nomem;
- ap->rpkt = skb;
- }
- if (skb->len == 0) {
- /* Try to get the payload 4-byte aligned.
- * This should match the
- * PPP_ALLSTATIONS/PPP_UI/compressed tests in
- * process_input_packet, but we do not have
- * enough chars here to test buf[1] and buf[2].
- */
+ ap->rpkt = skb;
+ }
+ if (skb->len == 0) {
+ /* Try to get the payload 4-byte aligned.
+ * This should match the
+ * PPP_ALLSTATIONS/PPP_UI/compressed tests in
+ * process_input_packet, but we do not have
+ * enough chars here to test buf[1] and buf[2].
+ */
if (buf[0] != PPP_ALLSTATIONS)
skb_reserve(skb, 2 + (buf[0] & 1));
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 7cdfde9b3dea..575ed19e9195 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -430,6 +430,7 @@ brcmf_usbdev_qinit(struct list_head *q, int qsize)
usb_free_urb(req->urb);
list_del(q->next);
}
+ kfree(reqs);
return NULL;

}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index b3768d5d852a..8ad2d889179c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -3321,6 +3321,10 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm,
igtk_cmd.sta_id = cpu_to_le32(sta_id);

if (remove_key) {
+ /* This is a valid situation for IGTK */
+ if (sta_id == IWL_MVM_INVALID_STA)
+ return 0;
+
igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID);
} else {
struct ieee80211_key_seq seq;
@@ -3575,9 +3579,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
keyconf->keyidx, sta_id);

- if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
- keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
- keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256))
+ if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+ keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+ keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)
return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true);

if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) {
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index 6dd835f1efc2..fbfa0b15d0c8 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -232,6 +232,7 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv,

if (country_ie_len >
(IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) {
+ rcu_read_unlock();
mwifiex_dbg(priv->adapter, ERROR,
"11D: country_ie_len overflow!, deauth AP\n");
return -EINVAL;
diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c
index cda996f6954e..2b83156efe3f 100644
--- a/drivers/nfc/pn544/pn544.c
+++ b/drivers/nfc/pn544/pn544.c
@@ -693,7 +693,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev,
target->nfcid1_len != 10)
return -EOPNOTSUPP;

- return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE,
+ return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE,
PN544_RF_READER_CMD_ACTIVATE_NEXT,
target->nfcid1, target->nfcid1_len, NULL);
} else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK |
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index d16b55ffe79f..4e9004fe5c6f 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -105,6 +105,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 sqsize = le16_to_cpu(c->sqsize);
struct nvmet_ctrl *old;
+ u16 ret;

old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
@@ -115,7 +116,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
if (!sqsize) {
pr_warn("queue size zero!\n");
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
- return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ goto err;
}

/* note: convert queue size from 0's-based value to 1's-based value */
@@ -128,16 +130,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
}

if (ctrl->ops->install_queue) {
- u16 ret = ctrl->ops->install_queue(req->sq);
-
+ ret = ctrl->ops->install_queue(req->sq);
if (ret) {
pr_err("failed to install queue %d cntlid %d ret %x\n",
- qid, ret, ctrl->cntlid);
- return ret;
+ qid, ctrl->cntlid, ret);
+ goto err;
}
}

return 0;
+
+err:
+ req->sq->ctrl = NULL;
+ return ret;
}

static void nvmet_execute_admin_connect(struct nvmet_req *req)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 057d1ff87d5d..960542dea5ad 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -110,7 +110,7 @@ static void nvmem_cell_drop(struct nvmem_cell *cell)
list_del(&cell->node);
mutex_unlock(&nvmem_mutex);
of_node_put(cell->np);
- kfree(cell->name);
+ kfree_const(cell->name);
kfree(cell);
}

@@ -137,7 +137,9 @@ static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem,
cell->nvmem = nvmem;
cell->offset = info->offset;
cell->bytes = info->bytes;
- cell->name = info->name;
+ cell->name = kstrdup_const(info->name, GFP_KERNEL);
+ if (!cell->name)
+ return -ENOMEM;

cell->bit_offset = info->bit_offset;
cell->nbits = info->nbits;
@@ -327,7 +329,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
dev_err(dev, "cell %s unaligned to nvmem stride %d\n",
cell->name, nvmem->stride);
/* Cells already added will be freed later. */
- kfree(cell->name);
+ kfree_const(cell->name);
kfree(cell);
return -EINVAL;
}
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 37c2ccbefecd..d91618641be6 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -103,4 +103,8 @@ config OF_OVERLAY
config OF_NUMA
bool

+config OF_DMA_DEFAULT_COHERENT
+ # arches should select this if DMA is coherent by default for OF devices
+ bool
+
endif # OF
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 978427a9d5e6..8f74c4626e0e 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -998,12 +998,16 @@ EXPORT_SYMBOL_GPL(of_dma_get_range);
* @np: device node
*
* It returns true if "dma-coherent" property was found
- * for this device in DT.
+ * for this device in the DT, or if DMA is coherent by
+ * default for OF devices on the current platform.
*/
bool of_dma_is_coherent(struct device_node *np)
{
struct device_node *node = of_node_get(np);

+ if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT))
+ return true;
+
while (node) {
if (of_property_read_bool(node, "dma-coherent")) {
of_node_put(node);
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index af677254a072..c8c702c494a2 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -422,7 +422,7 @@ static void ks_pcie_setup_rc_app_regs(struct keystone_pcie *ks_pcie)
lower_32_bits(start) | OB_ENABLEN);
ks_pcie_app_writel(ks_pcie, OB_OFFSET_HI(i),
upper_32_bits(start));
- start += OB_WIN_SIZE;
+ start += OB_WIN_SIZE * SZ_1M;
}

val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
@@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci)
/* Disable Link training */
val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
val &= ~LTSSM_EN_VAL;
- ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val);
+ ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
}

static int ks_pcie_start_link(struct dw_pcie *pci)
@@ -1354,7 +1354,7 @@ static int __init ks_pcie_probe(struct platform_device *pdev)
ret = of_property_read_u32(np, "num-viewport", &num_viewport);
if (ret < 0) {
dev_err(dev, "unable to read *num-viewport* property\n");
- return ret;
+ goto err_get_sync;
}

/*
diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
index 673a1725ef38..090b632965e2 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -2798,7 +2798,7 @@ static int tegra_pcie_probe(struct platform_device *pdev)

pm_runtime_enable(pcie->dev);
err = pm_runtime_get_sync(pcie->dev);
- if (err) {
+ if (err < 0) {
dev_err(dev, "fail to enable pcie controller: %d\n", err);
goto teardown_msi;
}
diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c
index 42bc5150dd92..febe0aef68d4 100644
--- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c
+++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c
@@ -80,7 +80,7 @@ static int read_poll_timeout(void __iomem *addr, u32 mask)
if (readl_relaxed(addr) & mask)
return 0;

- usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50);
+ usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50);
} while (!time_after(jiffies, timeout));

return (readl_relaxed(addr) & mask) ? 0 : -ETIMEDOUT;
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index cdab916fbf92..e330ec73c465 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -67,26 +67,22 @@
struct intel_scu_ipc_pdata_t {
u32 i2c_base;
u32 i2c_len;
- u8 irq_mode;
};

static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = {
.i2c_base = 0xff12b000,
.i2c_len = 0x10,
- .irq_mode = 0,
};

/* Penwell and Cloverview */
static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = {
.i2c_base = 0xff12b000,
.i2c_len = 0x10,
- .irq_mode = 1,
};

static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = {
.i2c_base = 0xff00d000,
.i2c_len = 0x10,
- .irq_mode = 0,
};

struct intel_scu_ipc_dev {
@@ -99,6 +95,9 @@ struct intel_scu_ipc_dev {

static struct intel_scu_ipc_dev ipcdev; /* Only one for now */

+#define IPC_STATUS 0x04
+#define IPC_STATUS_IRQ BIT(2)
+
/*
* IPC Read Buffer (Read Only):
* 16 byte buffer for receiving data from SCU, if IPC command
@@ -120,11 +119,8 @@ static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */
*/
static inline void ipc_command(struct intel_scu_ipc_dev *scu, u32 cmd)
{
- if (scu->irq_mode) {
- reinit_completion(&scu->cmd_complete);
- writel(cmd | IPC_IOC, scu->ipc_base);
- }
- writel(cmd, scu->ipc_base);
+ reinit_completion(&scu->cmd_complete);
+ writel(cmd | IPC_IOC, scu->ipc_base);
}

/*
@@ -610,9 +606,10 @@ EXPORT_SYMBOL(intel_scu_ipc_i2c_cntrl);
static irqreturn_t ioc(int irq, void *dev_id)
{
struct intel_scu_ipc_dev *scu = dev_id;
+ int status = ipc_read_status(scu);

- if (scu->irq_mode)
- complete(&scu->cmd_complete);
+ writel(status | IPC_STATUS_IRQ, scu->ipc_base + IPC_STATUS);
+ complete(&scu->cmd_complete);

return IRQ_HANDLED;
}
@@ -638,8 +635,6 @@ static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!pdata)
return -ENODEV;

- scu->irq_mode = pdata->irq_mode;
-
err = pcim_enable_device(pdev);
if (err)
return err;
diff --git a/drivers/power/supply/axp20x_ac_power.c b/drivers/power/supply/axp20x_ac_power.c
index 0d34a932b6d5..f74b0556bb6b 100644
--- a/drivers/power/supply/axp20x_ac_power.c
+++ b/drivers/power/supply/axp20x_ac_power.c
@@ -23,6 +23,8 @@
#define AXP20X_PWR_STATUS_ACIN_PRESENT BIT(7)
#define AXP20X_PWR_STATUS_ACIN_AVAIL BIT(6)

+#define AXP813_ACIN_PATH_SEL BIT(7)
+
#define AXP813_VHOLD_MASK GENMASK(5, 3)
#define AXP813_VHOLD_UV_TO_BIT(x) ((((x) / 100000) - 40) << 3)
#define AXP813_VHOLD_REG_TO_UV(x) \
@@ -40,6 +42,7 @@ struct axp20x_ac_power {
struct power_supply *supply;
struct iio_channel *acin_v;
struct iio_channel *acin_i;
+ bool has_acin_path_sel;
};

static irqreturn_t axp20x_ac_power_irq(int irq, void *devid)
@@ -86,6 +89,17 @@ static int axp20x_ac_power_get_property(struct power_supply *psy,
return ret;

val->intval = !!(reg & AXP20X_PWR_STATUS_ACIN_AVAIL);
+
+ /* ACIN_PATH_SEL disables ACIN even if ACIN_AVAIL is set. */
+ if (val->intval && power->has_acin_path_sel) {
+ ret = regmap_read(power->regmap, AXP813_ACIN_PATH_CTRL,
+ &reg);
+ if (ret)
+ return ret;
+
+ val->intval = !!(reg & AXP813_ACIN_PATH_SEL);
+ }
+
return 0;

case POWER_SUPPLY_PROP_VOLTAGE_NOW:
@@ -224,21 +238,25 @@ static const struct power_supply_desc axp813_ac_power_desc = {
struct axp_data {
const struct power_supply_desc *power_desc;
bool acin_adc;
+ bool acin_path_sel;
};

static const struct axp_data axp20x_data = {
- .power_desc = &axp20x_ac_power_desc,
- .acin_adc = true,
+ .power_desc = &axp20x_ac_power_desc,
+ .acin_adc = true,
+ .acin_path_sel = false,
};

static const struct axp_data axp22x_data = {
- .power_desc = &axp22x_ac_power_desc,
- .acin_adc = false,
+ .power_desc = &axp22x_ac_power_desc,
+ .acin_adc = false,
+ .acin_path_sel = false,
};

static const struct axp_data axp813_data = {
- .power_desc = &axp813_ac_power_desc,
- .acin_adc = false,
+ .power_desc = &axp813_ac_power_desc,
+ .acin_adc = false,
+ .acin_path_sel = true,
};

static int axp20x_ac_power_probe(struct platform_device *pdev)
@@ -282,6 +300,7 @@ static int axp20x_ac_power_probe(struct platform_device *pdev)
}

power->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+ power->has_acin_path_sel = axp_data->acin_path_sel;

platform_set_drvdata(pdev, power);

diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c
index da49436176cd..30a9014b2f95 100644
--- a/drivers/power/supply/ltc2941-battery-gauge.c
+++ b/drivers/power/supply/ltc2941-battery-gauge.c
@@ -449,7 +449,7 @@ static int ltc294x_i2c_remove(struct i2c_client *client)
{
struct ltc294x_info *info = i2c_get_clientdata(client);

- cancel_delayed_work(&info->work);
+ cancel_delayed_work_sync(&info->work);
power_supply_unregister(info->supply);
return 0;
}
diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index ca3dc3f3bb29..bb16c465426e 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -13,6 +13,8 @@
#include <linux/regulator/driver.h>
#include <linux/module.h>

+#include "internal.h"
+
/**
* regulator_is_enabled_regmap - standard is_enabled() for regmap users
*
@@ -881,3 +883,15 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
consumers[i].supply = supply_names[i];
}
EXPORT_SYMBOL_GPL(regulator_bulk_set_supply_names);
+
+/**
+ * regulator_is_equal - test whether two regulators are the same
+ *
+ * @reg1: first regulator to operate on
+ * @reg2: second regulator to operate on
+ */
+bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2)
+{
+ return reg1->rdev == reg2->rdev;
+}
+EXPORT_SYMBOL_GPL(regulator_is_equal);
diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 469d0bc9f5fe..00cf33573136 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -1383,7 +1383,7 @@ csio_device_reset(struct device *dev,
return -EINVAL;

/* Delete NPIV lnodes */
- csio_lnodes_exit(hw, 1);
+ csio_lnodes_exit(hw, 1);

/* Block upper IOs */
csio_lnodes_block_request(hw);
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 42cf38c1ea99..0cbe6740e0c9 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -4392,7 +4392,8 @@ dcmd_timeout_ocr_possible(struct megasas_instance *instance) {
if (instance->adapter_type == MFI_SERIES)
return KILL_ADAPTER;
else if (instance->unload ||
- test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags))
+ test_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE,
+ &instance->reset_flags))
return IGNORE_TIMEOUT;
else
return INITIATE_OCR;
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index e301458bcbae..46bc062d873e 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -4847,6 +4847,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason)
if (instance->requestorId && !instance->skip_heartbeat_timer_del)
del_timer_sync(&instance->sriov_heartbeat_timer);
set_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags);
+ set_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags);
atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_POLLING);
instance->instancet->disable_intr(instance);
megasas_sync_irqs((unsigned long)instance);
@@ -5046,7 +5047,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason)
instance->skip_heartbeat_timer_del = 1;
retval = FAILED;
out:
- clear_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags);
+ clear_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags);
mutex_unlock(&instance->reset_mutex);
return retval;
}
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h
index c013c80fe4e6..dd2e37e40d6b 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h
@@ -89,6 +89,7 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE {

#define MEGASAS_FP_CMD_LEN 16
#define MEGASAS_FUSION_IN_RESET 0
+#define MEGASAS_FUSION_OCR_NOT_POSSIBLE 1
#define RAID_1_PEER_CMDS 2
#define JBOD_MAPS_COUNT 2
#define MEGASAS_REDUCE_QD_COUNT 64
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 30afc59c1870..7bbff91f8883 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -2519,12 +2519,6 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked)
/* Driver Debug Functions. */
/****************************************************************************/

-static inline int
-ql_mask_match(uint level)
-{
- return (level & ql2xextended_error_logging) == level;
-}
-
/*
* This function is for formatting and logging debug information.
* It is to be used when vha is available. It formats the message
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index bb01b680ce9f..433e95502808 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *,
extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *,
struct qla_hw_data *);
extern int qla24xx_soft_reset(struct qla_hw_data *);
+
+static inline int
+ql_mask_match(uint level)
+{
+ return (level & ql2xextended_error_logging) == level;
+}
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 1eb3fe281cc3..c57b95a20688 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2402,6 +2402,7 @@ typedef struct fc_port {
unsigned int scan_needed:1;
unsigned int n2n_flag:1;
unsigned int explicit_logout:1;
+ unsigned int prli_pend_timer:1;

struct completion nvme_del_done;
uint32_t nvme_prli_service_param;
@@ -2428,6 +2429,7 @@ typedef struct fc_port {
struct work_struct free_work;
struct work_struct reg_work;
uint64_t jiffies_at_registration;
+ unsigned long prli_expired;
struct qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];

uint16_t tgt_id;
@@ -4821,6 +4823,9 @@ struct sff_8247_a0 {
ha->current_topology == ISP_CFG_N || \
!ha->current_topology)

+#define PRLI_PHASE(_cls) \
+ ((_cls == DSC_LS_PRLI_PEND) || (_cls == DSC_LS_PRLI_COMP))
+
#include "qla_target.h"
#include "qla_gbl.h"
#include "qla_dbg.h"
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9ffaa920fc8f..ac4c47fc5f4c 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -686,7 +686,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
port_id_t id;
u64 wwn;
u16 data[2];
- u8 current_login_state;
+ u8 current_login_state, nvme_cls;

fcport = ea->fcport;
ql_dbg(ql_dbg_disc, vha, 0xffff,
@@ -745,10 +745,17 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,

loop_id = le16_to_cpu(e->nport_handle);
loop_id = (loop_id & 0x7fff);
- if (fcport->fc4f_nvme)
- current_login_state = e->current_login_state >> 4;
- else
- current_login_state = e->current_login_state & 0xf;
+ nvme_cls = e->current_login_state >> 4;
+ current_login_state = e->current_login_state & 0xf;
+
+ if (PRLI_PHASE(nvme_cls)) {
+ current_login_state = nvme_cls;
+ fcport->fc4_type &= ~FS_FC4TYPE_FCP;
+ fcport->fc4_type |= FS_FC4TYPE_NVME;
+ } else if (PRLI_PHASE(current_login_state)) {
+ fcport->fc4_type |= FS_FC4TYPE_FCP;
+ fcport->fc4_type &= ~FS_FC4TYPE_NVME;
+ }


ql_dbg(ql_dbg_disc, vha, 0x20e2,
@@ -1219,12 +1226,19 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport)
struct srb_iocb *lio;
int rval = QLA_FUNCTION_FAILED;

- if (!vha->flags.online)
+ if (!vha->flags.online) {
+ ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n",
+ __func__, __LINE__, fcport->port_name);
return rval;
+ }

- if (fcport->fw_login_state == DSC_LS_PLOGI_PEND ||
- fcport->fw_login_state == DSC_LS_PRLI_PEND)
+ if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND ||
+ fcport->fw_login_state == DSC_LS_PRLI_PEND) &&
+ qla_dual_mode_enabled(vha)) {
+ ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n",
+ __func__, __LINE__, fcport->port_name);
return rval;
+ }

sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
if (!sp)
@@ -1602,6 +1616,10 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
break;
default:
if (fcport->login_pause) {
+ ql_dbg(ql_dbg_disc, vha, 0x20d8,
+ "%s %d %8phC exit\n",
+ __func__, __LINE__,
+ fcport->port_name);
fcport->last_rscn_gen = fcport->rscn_gen;
fcport->last_login_gen = fcport->login_gen;
set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 7c5f2736ebee..3e9c5768815e 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1897,6 +1897,18 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
inbuf = (uint32_t *)&sts->nvme_ersp_data;
outbuf = (uint32_t *)fd->rspaddr;
iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len);
+ if (unlikely(iocb->u.nvme.rsp_pyld_len >
+ sizeof(struct nvme_fc_ersp_iu))) {
+ if (ql_mask_match(ql_dbg_io)) {
+ WARN_ONCE(1, "Unexpected response payload length %u.\n",
+ iocb->u.nvme.rsp_pyld_len);
+ ql_log(ql_log_warn, fcport->vha, 0x5100,
+ "Unexpected response payload length %u.\n",
+ iocb->u.nvme.rsp_pyld_len);
+ }
+ iocb->u.nvme.rsp_pyld_len =
+ sizeof(struct nvme_fc_ersp_iu);
+ }
iter = iocb->u.nvme.rsp_pyld_len >> 2;
for (; iter; iter--)
*outbuf++ = swab32(*inbuf++);
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index eac76e934cbe..1ef8907314e5 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -6151,9 +6151,8 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t addr,
mcp->mb[7] = LSW(MSD(req_dma));
mcp->mb[8] = MSW(addr);
/* Setting RAM ID to valid */
- mcp->mb[10] |= BIT_7;
/* For MCTP RAM ID is 0x40 */
- mcp->mb[10] |= 0x40;
+ mcp->mb[10] = BIT_7 | 0x40;

mcp->out_mb |= MBX_10|MBX_8|MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1|
MBX_0;
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 2b2028f2383e..c855d013ba8a 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -1612,8 +1612,7 @@ qla82xx_get_bootld_offset(struct qla_hw_data *ha)
return (u8 *)&ha->hablob->fw->data[offset];
}

-static __le32
-qla82xx_get_fw_size(struct qla_hw_data *ha)
+static u32 qla82xx_get_fw_size(struct qla_hw_data *ha)
{
struct qla82xx_uri_data_desc *uri_desc = NULL;

@@ -1624,7 +1623,7 @@ qla82xx_get_fw_size(struct qla_hw_data *ha)
return cpu_to_le32(uri_desc->size);
}

- return cpu_to_le32(*(u32 *)&ha->hablob->fw->data[FW_SIZE_OFFSET]);
+ return get_unaligned_le32(&ha->hablob->fw->data[FW_SIZE_OFFSET]);
}

static u8 *
@@ -1816,7 +1815,7 @@ qla82xx_fw_load_from_blob(struct qla_hw_data *ha)
}

flashaddr = FLASH_ADDR_START;
- size = (__force u32)qla82xx_get_fw_size(ha) / 8;
+ size = qla82xx_get_fw_size(ha) / 8;
ptr64 = (u64 *)qla82xx_get_fw_offs(ha);

for (i = 0; i < size; i++) {
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 74a378a91b71..cb8a892e2d39 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1257,6 +1257,7 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess)
sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
spin_unlock_irqrestore(&sess->vha->work_lock, flags);

+ sess->prli_pend_timer = 0;
sess->disc_state = DSC_DELETE_PEND;

qla24xx_chk_fcp_state(sess);
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 2323432a0edb..5504ab11decc 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -4145,7 +4145,7 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha)
dma_free_coherent(&ha->pdev->dev, ha->queues_len, ha->queues,
ha->queues_dma);

- if (ha->fw_dump)
+ if (ha->fw_dump)
vfree(ha->fw_dump);

ha->queues_len = 0;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1e38bb967871..0d41a7dc1d6b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5023,6 +5023,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba)

hba->auto_bkops_enabled = false;
trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Disabled");
+ hba->is_urgent_bkops_lvl_checked = false;
out:
return err;
}
@@ -5047,6 +5048,7 @@ static void ufshcd_force_reset_auto_bkops(struct ufs_hba *hba)
hba->ee_ctrl_mask &= ~MASK_EE_URGENT_BKOPS;
ufshcd_disable_auto_bkops(hba);
}
+ hba->is_urgent_bkops_lvl_checked = false;
}

static inline int ufshcd_get_bkops_status(struct ufs_hba *hba, u32 *status)
@@ -5093,6 +5095,7 @@ static int ufshcd_bkops_ctrl(struct ufs_hba *hba,
err = ufshcd_enable_auto_bkops(hba);
else
err = ufshcd_disable_auto_bkops(hba);
+ hba->urgent_bkops_lvl = curr_status;
out:
return err;
}
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 1c8b349379af..77c4a9abe365 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -688,7 +688,9 @@ struct dwc3_ep {
#define DWC3_EP_STALL BIT(1)
#define DWC3_EP_WEDGE BIT(2)
#define DWC3_EP_TRANSFER_STARTED BIT(3)
+#define DWC3_EP_END_TRANSFER_PENDING BIT(4)
#define DWC3_EP_PENDING_REQUEST BIT(5)
+#define DWC3_EP_DELAY_START BIT(6)

/* This last one is specific to EP0 */
#define DWC3_EP0_DIR_IN BIT(31)
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index fd1b100d2927..6dee4dabc0a4 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -1136,8 +1136,10 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc,
case DWC3_DEPEVT_EPCMDCMPLT:
cmd = DEPEVT_PARAMETER_CMD(event->parameters);

- if (cmd == DWC3_DEPCMD_ENDTRANSFER)
+ if (cmd == DWC3_DEPCMD_ENDTRANSFER) {
+ dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
+ }
break;
}
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 154f3f3e8cff..8b95be897078 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1447,6 +1447,12 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
list_add_tail(&req->list, &dep->pending_list);
req->status = DWC3_REQUEST_STATUS_QUEUED;

+ /* Start the transfer only after the END_TRANSFER is completed */
+ if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) {
+ dep->flags |= DWC3_EP_DELAY_START;
+ return 0;
+ }
+
/*
* NOTICE: Isochronous endpoints should NEVER be prestarted. We must
* wait for a XferNotReady event so we will know what's the current
@@ -2625,8 +2631,14 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
cmd = DEPEVT_PARAMETER_CMD(event->parameters);

if (cmd == DWC3_DEPCMD_ENDTRANSFER) {
+ dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
dwc3_gadget_ep_cleanup_cancelled_requests(dep);
+ if ((dep->flags & DWC3_EP_DELAY_START) &&
+ !usb_endpoint_xfer_isoc(dep->endpoint.desc))
+ __dwc3_gadget_kick_transfer(dep);
+
+ dep->flags &= ~DWC3_EP_DELAY_START;
}
break;
case DWC3_DEPEVT_STREAMEVT:
@@ -2683,7 +2695,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
u32 cmd;
int ret;

- if (!(dep->flags & DWC3_EP_TRANSFER_STARTED))
+ if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) ||
+ (dep->flags & DWC3_EP_END_TRANSFER_PENDING))
return;

/*
@@ -2728,6 +2741,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,

if (!interrupt)
dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
+ else
+ dep->flags |= DWC3_EP_END_TRANSFER_PENDING;

if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A)
udelay(100);
diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c
index 460d5d7c984f..7f5cf488b2b1 100644
--- a/drivers/usb/gadget/function/f_ecm.c
+++ b/drivers/usb/gadget/function/f_ecm.c
@@ -52,6 +52,7 @@ struct f_ecm {
struct usb_ep *notify;
struct usb_request *notify_req;
u8 notify_state;
+ atomic_t notify_count;
bool is_open;

/* FIXME is_open needs some irq-ish locking
@@ -380,7 +381,7 @@ static void ecm_do_notify(struct f_ecm *ecm)
int status;

/* notification already in flight? */
- if (!req)
+ if (atomic_read(&ecm->notify_count))
return;

event = req->buf;
@@ -420,10 +421,10 @@ static void ecm_do_notify(struct f_ecm *ecm)
event->bmRequestType = 0xA1;
event->wIndex = cpu_to_le16(ecm->ctrl_id);

- ecm->notify_req = NULL;
+ atomic_inc(&ecm->notify_count);
status = usb_ep_queue(ecm->notify, req, GFP_ATOMIC);
if (status < 0) {
- ecm->notify_req = req;
+ atomic_dec(&ecm->notify_count);
DBG(cdev, "notify --> %d\n", status);
}
}
@@ -448,17 +449,19 @@ static void ecm_notify_complete(struct usb_ep *ep, struct usb_request *req)
switch (req->status) {
case 0:
/* no fault */
+ atomic_dec(&ecm->notify_count);
break;
case -ECONNRESET:
case -ESHUTDOWN:
+ atomic_set(&ecm->notify_count, 0);
ecm->notify_state = ECM_NOTIFY_NONE;
break;
default:
DBG(cdev, "event %02x --> %d\n",
event->bNotificationType, req->status);
+ atomic_dec(&ecm->notify_count);
break;
}
- ecm->notify_req = req;
ecm_do_notify(ecm);
}

@@ -907,6 +910,11 @@ static void ecm_unbind(struct usb_configuration *c, struct usb_function *f)

usb_free_all_descriptors(f);

+ if (atomic_read(&ecm->notify_count)) {
+ usb_ep_dequeue(ecm->notify, ecm->notify_req);
+ atomic_set(&ecm->notify_count, 0);
+ }
+
kfree(ecm->notify_req->buf);
usb_ep_free_request(ecm->notify, ecm->notify_req);
}
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 59d9d512dcda..ced2581cf99f 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1062,6 +1062,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
req->num_sgs = io_data->sgt.nents;
} else {
req->buf = data;
+ req->num_sgs = 0;
}
req->length = data_len;

@@ -1105,6 +1106,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
req->num_sgs = io_data->sgt.nents;
} else {
req->buf = data;
+ req->num_sgs = 0;
}
req->length = data_len;

diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 2d6e76e4cffa..1d900081b1f0 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -53,6 +53,7 @@ struct f_ncm {
struct usb_ep *notify;
struct usb_request *notify_req;
u8 notify_state;
+ atomic_t notify_count;
bool is_open;

const struct ndp_parser_opts *parser_opts;
@@ -547,7 +548,7 @@ static void ncm_do_notify(struct f_ncm *ncm)
int status;

/* notification already in flight? */
- if (!req)
+ if (atomic_read(&ncm->notify_count))
return;

event = req->buf;
@@ -587,7 +588,8 @@ static void ncm_do_notify(struct f_ncm *ncm)
event->bmRequestType = 0xA1;
event->wIndex = cpu_to_le16(ncm->ctrl_id);

- ncm->notify_req = NULL;
+ atomic_inc(&ncm->notify_count);
+
/*
* In double buffering if there is a space in FIFO,
* completion callback can be called right after the call,
@@ -597,7 +599,7 @@ static void ncm_do_notify(struct f_ncm *ncm)
status = usb_ep_queue(ncm->notify, req, GFP_ATOMIC);
spin_lock(&ncm->lock);
if (status < 0) {
- ncm->notify_req = req;
+ atomic_dec(&ncm->notify_count);
DBG(cdev, "notify --> %d\n", status);
}
}
@@ -632,17 +634,19 @@ static void ncm_notify_complete(struct usb_ep *ep, struct usb_request *req)
case 0:
VDBG(cdev, "Notification %02x sent\n",
event->bNotificationType);
+ atomic_dec(&ncm->notify_count);
break;
case -ECONNRESET:
case -ESHUTDOWN:
+ atomic_set(&ncm->notify_count, 0);
ncm->notify_state = NCM_NOTIFY_NONE;
break;
default:
DBG(cdev, "event %02x --> %d\n",
event->bNotificationType, req->status);
+ atomic_dec(&ncm->notify_count);
break;
}
- ncm->notify_req = req;
ncm_do_notify(ncm);
spin_unlock(&ncm->lock);
}
@@ -1649,6 +1653,11 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f)
ncm_string_defs[0].id = 0;
usb_free_all_descriptors(f);

+ if (atomic_read(&ncm->notify_count)) {
+ usb_ep_dequeue(ncm->notify, ncm->notify_req);
+ atomic_set(&ncm->notify_count, 0);
+ }
+
kfree(ncm->notify_req->buf);
usb_ep_free_request(ncm->notify, ncm->notify_req);
}
diff --git a/drivers/usb/gadget/legacy/cdc2.c b/drivers/usb/gadget/legacy/cdc2.c
index da1c37933ca1..8d7a556ece30 100644
--- a/drivers/usb/gadget/legacy/cdc2.c
+++ b/drivers/usb/gadget/legacy/cdc2.c
@@ -225,7 +225,7 @@ static struct usb_composite_driver cdc_driver = {
.name = "g_cdc",
.dev = &device_desc,
.strings = dev_strings,
- .max_speed = USB_SPEED_HIGH,
+ .max_speed = USB_SPEED_SUPER,
.bind = cdc_bind,
.unbind = cdc_unbind,
};
diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c
index b640ed3fcf70..ae6d8f7092b8 100644
--- a/drivers/usb/gadget/legacy/g_ffs.c
+++ b/drivers/usb/gadget/legacy/g_ffs.c
@@ -149,7 +149,7 @@ static struct usb_composite_driver gfs_driver = {
.name = DRIVER_NAME,
.dev = &gfs_dev_desc,
.strings = gfs_dev_strings,
- .max_speed = USB_SPEED_HIGH,
+ .max_speed = USB_SPEED_SUPER,
.bind = gfs_bind,
.unbind = gfs_unbind,
};
diff --git a/drivers/usb/gadget/legacy/multi.c b/drivers/usb/gadget/legacy/multi.c
index 50515f9e1022..ec9749845660 100644
--- a/drivers/usb/gadget/legacy/multi.c
+++ b/drivers/usb/gadget/legacy/multi.c
@@ -482,7 +482,7 @@ static struct usb_composite_driver multi_driver = {
.name = "g_multi",
.dev = &device_desc,
.strings = dev_strings,
- .max_speed = USB_SPEED_HIGH,
+ .max_speed = USB_SPEED_SUPER,
.bind = multi_bind,
.unbind = multi_unbind,
.needs_serial = 1,
diff --git a/drivers/usb/gadget/legacy/ncm.c b/drivers/usb/gadget/legacy/ncm.c
index 8465f081e921..c61e71ba7045 100644
--- a/drivers/usb/gadget/legacy/ncm.c
+++ b/drivers/usb/gadget/legacy/ncm.c
@@ -197,7 +197,7 @@ static struct usb_composite_driver ncm_driver = {
.name = "g_ncm",
.dev = &device_desc,
.strings = dev_strings,
- .max_speed = USB_SPEED_HIGH,
+ .max_speed = USB_SPEED_SUPER,
.bind = gncm_bind,
.unbind = gncm_unbind,
};
diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index 8b4ff9fff340..753645bb2527 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -591,6 +591,12 @@ static int tcpci_probe(struct i2c_client *client,
static int tcpci_remove(struct i2c_client *client)
{
struct tcpci_chip *chip = i2c_get_clientdata(client);
+ int err;
+
+ /* Disable chip interrupts before unregistering port */
+ err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0);
+ if (err < 0)
+ return err;

tcpci_unregister_port(chip->tcpci);

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9f4117766bb1..c962d9b370c6 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -474,7 +474,9 @@ static int init_vqs(struct virtio_balloon *vb)
names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
+ callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
names[VIRTIO_BALLOON_VQ_STATS] = NULL;
+ callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;

if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
@@ -898,8 +900,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
if (IS_ERR(vb->vb_dev_info.inode)) {
err = PTR_ERR(vb->vb_dev_info.inode);
- kern_unmount(balloon_mnt);
- goto out_del_vqs;
+ goto out_kern_unmount;
}
vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
#endif
@@ -910,13 +911,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
*/
if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
err = -ENOSPC;
- goto out_del_vqs;
+ goto out_iput;
}
vb->balloon_wq = alloc_workqueue("balloon-wq",
WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
if (!vb->balloon_wq) {
err = -ENOMEM;
- goto out_del_vqs;
+ goto out_iput;
}
INIT_WORK(&vb->report_free_page_work, report_free_page_func);
vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
@@ -950,6 +951,12 @@ static int virtballoon_probe(struct virtio_device *vdev)
out_del_balloon_wq:
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
destroy_workqueue(vb->balloon_wq);
+out_iput:
+#ifdef CONFIG_BALLOON_COMPACTION
+ iput(vb->vb_dev_info.inode);
+out_kern_unmount:
+ kern_unmount(balloon_mnt);
+#endif
out_del_vqs:
vdev->config->del_vqs(vdev);
out_free_vb:
@@ -965,6 +972,10 @@ static void remove_common(struct virtio_balloon *vb)
leak_balloon(vb, vb->num_pages);
update_balloon_size(vb);

+ /* There might be free pages that are being reported: release them. */
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ return_free_pages_to_mm(vb, ULONG_MAX);
+
/* Now we reset the device so we can clean up the queues. */
vb->vdev->config->reset(vb->vdev);

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index f2862f66c2ac..222d630c41fc 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -294,7 +294,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
/* Best option: one for change interrupt, one per vq. */
nvectors = 1;
for (i = 0; i < nvqs; ++i)
- if (callbacks[i])
+ if (names[i] && callbacks[i])
++nvectors;
} else {
/* Second best: one for change, shared for all vqs. */
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 21e8085b848b..861daf4f37b2 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -147,6 +147,25 @@ int watchdog_init_timeout(struct watchdog_device *wdd,
}
EXPORT_SYMBOL_GPL(watchdog_init_timeout);

+static int watchdog_reboot_notifier(struct notifier_block *nb,
+ unsigned long code, void *data)
+{
+ struct watchdog_device *wdd;
+
+ wdd = container_of(nb, struct watchdog_device, reboot_nb);
+ if (code == SYS_DOWN || code == SYS_HALT) {
+ if (watchdog_active(wdd)) {
+ int ret;
+
+ ret = wdd->ops->stop(wdd);
+ if (ret)
+ return NOTIFY_BAD;
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
static int watchdog_restart_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -235,6 +254,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd)
}
}

+ if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) {
+ wdd->reboot_nb.notifier_call = watchdog_reboot_notifier;
+
+ ret = register_reboot_notifier(&wdd->reboot_nb);
+ if (ret) {
+ pr_err("watchdog%d: Cannot register reboot notifier (%d)\n",
+ wdd->id, ret);
+ watchdog_dev_unregister(wdd);
+ ida_simple_remove(&watchdog_ida, id);
+ return ret;
+ }
+ }
+
if (wdd->ops->restart) {
wdd->restart_nb.notifier_call = watchdog_restart_notifier;

@@ -289,6 +321,9 @@ static void __watchdog_unregister_device(struct watchdog_device *wdd)
if (wdd->ops->restart)
unregister_restart_handler(&wdd->restart_nb);

+ if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status))
+ unregister_reboot_notifier(&wdd->reboot_nb);
+
watchdog_dev_unregister(wdd);
ida_simple_remove(&watchdog_ida, wdd->id);
}
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 62483a99105c..ce04edc69e5f 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -38,7 +38,6 @@
#include <linux/miscdevice.h> /* For handling misc devices */
#include <linux/module.h> /* For module stuff/... */
#include <linux/mutex.h> /* For mutexes */
-#include <linux/reboot.h> /* For reboot notifier */
#include <linux/slab.h> /* For memory functions */
#include <linux/types.h> /* For standard types (like size_t) */
#include <linux/watchdog.h> /* For watchdog specific items */
@@ -1077,25 +1076,6 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
put_device(&wd_data->dev);
}

-static int watchdog_reboot_notifier(struct notifier_block *nb,
- unsigned long code, void *data)
-{
- struct watchdog_device *wdd;
-
- wdd = container_of(nb, struct watchdog_device, reboot_nb);
- if (code == SYS_DOWN || code == SYS_HALT) {
- if (watchdog_active(wdd)) {
- int ret;
-
- ret = wdd->ops->stop(wdd);
- if (ret)
- return NOTIFY_BAD;
- }
- }
-
- return NOTIFY_DONE;
-}
-
/*
* watchdog_dev_register: register a watchdog device
* @wdd: watchdog device
@@ -1114,22 +1094,8 @@ int watchdog_dev_register(struct watchdog_device *wdd)
return ret;

ret = watchdog_register_pretimeout(wdd);
- if (ret) {
+ if (ret)
watchdog_cdev_unregister(wdd);
- return ret;
- }
-
- if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) {
- wdd->reboot_nb.notifier_call = watchdog_reboot_notifier;
-
- ret = devm_register_reboot_notifier(&wdd->wd_data->dev,
- &wdd->reboot_nb);
- if (ret) {
- pr_err("watchdog%d: Cannot register reboot notifier (%d)\n",
- wdd->id, ret);
- watchdog_dev_unregister(wdd);
- }
- }

return ret;
}
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 6d12fc368210..a8d24433c8e9 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -94,7 +94,7 @@ static void watch_target(struct xenbus_watch *watch,
"%llu", &static_max) == 1))
static_max >>= PAGE_SHIFT - 10;
else
- static_max = new_target;
+ static_max = balloon_stats.current_pages;

target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0
: static_max - balloon_stats.target_pages;
diff --git a/fs/aio.c b/fs/aio.c
index 0d9a559d488c..4115d5ad6b90 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
return 0;
}

+static void aio_poll_put_work(struct work_struct *work)
+{
+ struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+
+ iocb_put(iocb);
+}
+
static void aio_poll_complete_work(struct work_struct *work)
{
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del_init(&req->wait.entry);

if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+ struct kioctx *ctx = iocb->ki_ctx;
+
/*
* Try to complete the iocb inline if we can. Use
* irqsave/irqrestore because not all filesystems (e.g. fuse)
@@ -1683,8 +1693,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
req->done = true;
- spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
- iocb_put(iocb);
+ if (iocb->ki_eventfd && eventfd_signal_count()) {
+ iocb = NULL;
+ INIT_WORK(&req->work, aio_poll_put_work);
+ schedule_work(&req->work);
+ }
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+ if (iocb)
+ iocb_put(iocb);
} else {
schedule_work(&req->work);
}
diff --git a/fs/attr.c b/fs/attr.c
index df28035aa23e..b4bbdbd4c8ca 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -183,18 +183,12 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
inode->i_uid = attr->ia_uid;
if (ia_valid & ATTR_GID)
inode->i_gid = attr->ia_gid;
- if (ia_valid & ATTR_ATIME) {
- inode->i_atime = timestamp_truncate(attr->ia_atime,
- inode);
- }
- if (ia_valid & ATTR_MTIME) {
- inode->i_mtime = timestamp_truncate(attr->ia_mtime,
- inode);
- }
- if (ia_valid & ATTR_CTIME) {
- inode->i_ctime = timestamp_truncate(attr->ia_ctime,
- inode);
- }
+ if (ia_valid & ATTR_ATIME)
+ inode->i_atime = attr->ia_atime;
+ if (ia_valid & ATTR_MTIME)
+ inode->i_mtime = attr->ia_mtime;
+ if (ia_valid & ATTR_CTIME)
+ inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;

@@ -268,8 +262,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
attr->ia_ctime = now;
if (!(ia_valid & ATTR_ATIME_SET))
attr->ia_atime = now;
+ else
+ attr->ia_atime = timestamp_truncate(attr->ia_atime, inode);
if (!(ia_valid & ATTR_MTIME_SET))
attr->ia_mtime = now;
+ else
+ attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode);
+
if (ia_valid & ATTR_KILL_PRIV) {
error = security_inode_need_killpriv(dentry);
if (error < 0)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index da9b0f060a9d..a989105d39c8 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -330,12 +330,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
write_lock(&fs_info->tree_mod_log_lock);
- spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- spin_unlock(&fs_info->tree_mod_seq_lock);
write_unlock(&fs_info->tree_mod_log_lock);

return elem->seq;
@@ -355,7 +353,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
if (!seq_putting)
return;

- spin_lock(&fs_info->tree_mod_seq_lock);
+ write_lock(&fs_info->tree_mod_log_lock);
list_del(&elem->list);
elem->seq = 0;

@@ -366,19 +364,17 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* blocker with lower sequence number exists, we
* cannot remove anything from the log
*/
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ write_unlock(&fs_info->tree_mod_log_lock);
return;
}
min_seq = cur_elem->seq;
}
}
- spin_unlock(&fs_info->tree_mod_seq_lock);

/*
* anything that's lower than the lowest existing (read: blocked)
* sequence number can be removed from the tree.
*/
- write_lock(&fs_info->tree_mod_log_lock);
tm_root = &fs_info->tree_mod_log;
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5e9f80b28fcf..290ca193c6c0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -671,14 +671,12 @@ struct btrfs_fs_info {
atomic_t nr_delayed_iputs;
wait_queue_head_t delayed_iputs_wait;

- /* this protects tree_mod_seq_list */
- spinlock_t tree_mod_seq_lock;
atomic64_t tree_mod_seq;
- struct list_head tree_mod_seq_list;

- /* this protects tree_mod_log */
+ /* this protects tree_mod_log and tree_mod_seq_list */
rwlock_t tree_mod_log_lock;
struct rb_root tree_mod_log;
+ struct list_head tree_mod_seq_list;

atomic_t async_delalloc_pages;

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index df3bd880061d..dfdb7d4f8406 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -492,7 +492,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
if (head->is_data)
return;

- spin_lock(&fs_info->tree_mod_seq_lock);
+ read_lock(&fs_info->tree_mod_log_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
struct seq_list *elem;

@@ -500,7 +500,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct seq_list, list);
seq = elem->seq;
}
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ read_unlock(&fs_info->tree_mod_log_lock);

again:
for (node = rb_first_cached(&head->ref_tree); node;
@@ -518,7 +518,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
struct seq_list *elem;
int ret = 0;

- spin_lock(&fs_info->tree_mod_seq_lock);
+ read_lock(&fs_info->tree_mod_log_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
@@ -531,7 +531,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
}
}

- spin_unlock(&fs_info->tree_mod_seq_lock);
+ read_unlock(&fs_info->tree_mod_log_lock);
return ret;
}

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index bae334212ee2..7becc5e96f92 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2016,7 +2016,7 @@ static void free_root_extent_buffers(struct btrfs_root *root)
}

/* helper to cleanup tree roots */
-static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
+static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
{
free_root_extent_buffers(info->tree_root);

@@ -2025,7 +2025,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_root_extent_buffers(info->csum_root);
free_root_extent_buffers(info->quota_root);
free_root_extent_buffers(info->uuid_root);
- if (chunk_root)
+ if (free_chunk_root)
free_root_extent_buffers(info->chunk_root);
free_root_extent_buffers(info->free_space_root);
}
@@ -2652,7 +2652,6 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
- spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
@@ -3324,7 +3323,7 @@ int open_ctree(struct super_block *sb,
btrfs_put_block_group_cache(fs_info);

fail_tree_roots:
- free_root_pointers(fs_info, 1);
+ free_root_pointers(fs_info, true);
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);

fail_sb_buffer:
@@ -3356,7 +3355,7 @@ int open_ctree(struct super_block *sb,
if (!btrfs_test_opt(fs_info, USEBACKUPROOT))
goto fail_tree_roots;

- free_root_pointers(fs_info, 0);
+ free_root_pointers(fs_info, false);

/* don't use the log in recovery mode, it won't be valid */
btrfs_set_super_log_root(disk_super, 0);
@@ -4047,10 +4046,17 @@ void close_ctree(struct btrfs_fs_info *fs_info)
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);

- btrfs_free_block_groups(fs_info);
-
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
- free_root_pointers(fs_info, 1);
+ free_root_pointers(fs_info, true);
+
+ /*
+ * We must free the block groups after dropping the fs_roots as we could
+ * have had an IO error and have left over tree log blocks that aren't
+ * cleaned up until the fs roots are freed. This makes the block group
+ * accounting appear to be wrong because there's pending reserved bytes,
+ * so make sure we do the block group cleanup afterwards.
+ */
+ btrfs_free_block_groups(fs_info);

iput(fs_info->btree_inode);

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 33c6b191ca59..284540cdbbd9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1583,21 +1583,25 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
/* Find first extent with bits cleared */
while (1) {
node = __etree_search(tree, start, &next, &prev, NULL, NULL);
- if (!node) {
+ if (!node && !next && !prev) {
+ /*
+ * Tree is completely empty, send full range and let
+ * caller deal with it
+ */
+ *start_ret = 0;
+ *end_ret = -1;
+ goto out;
+ } else if (!node && !next) {
+ /*
+ * We are past the last allocated chunk, set start at
+ * the end of the last extent.
+ */
+ state = rb_entry(prev, struct extent_state, rb_node);
+ *start_ret = state->end + 1;
+ *end_ret = -1;
+ goto out;
+ } else if (!node) {
node = next;
- if (!node) {
- /*
- * We are past the last allocated chunk,
- * set start at the end of the last extent. The
- * device alloc tree should never be empty so
- * prev is always set.
- */
- ASSERT(prev);
- state = rb_entry(prev, struct extent_state, rb_node);
- *start_ret = state->end + 1;
- *end_ret = -1;
- goto out;
- }
}
/*
* At this point 'node' either contains 'start' or start is
@@ -3938,6 +3942,11 @@ int btree_write_cache_pages(struct address_space *mapping,
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
+ /*
+ * Start from the beginning does not need to cycle over the
+ * range, mark it as scanned.
+ */
+ scanned = (index == 0);
} else {
index = wbc->range_start >> PAGE_SHIFT;
end = wbc->range_end >> PAGE_SHIFT;
@@ -3955,7 +3964,6 @@ int btree_write_cache_pages(struct address_space *mapping,
tag))) {
unsigned i;

- scanned = 1;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];

@@ -4084,6 +4092,11 @@ static int extent_write_cache_pages(struct address_space *mapping,
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
+ /*
+ * Start from the beginning does not need to cycle over the
+ * range, mark it as scanned.
+ */
+ scanned = (index == 0);
} else {
index = wbc->range_start >> PAGE_SHIFT;
end = wbc->range_end >> PAGE_SHIFT;
@@ -4117,7 +4130,6 @@ static int extent_write_cache_pages(struct address_space *mapping,
&index, end, tag))) {
unsigned i;

- scanned = 1;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];

@@ -4177,7 +4189,16 @@ static int extent_write_cache_pages(struct address_space *mapping,
*/
scanned = 1;
index = 0;
- goto retry;
+
+ /*
+ * If we're looping we could run into a page that is locked by a
+ * writer and that writer could be waiting on writeback for a
+ * page in our current bio, and thus deadlock, so flush the
+ * write bio here.
+ */
+ ret = flush_write_bio(epd);
+ if (!ret)
+ goto retry;
}

if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8e86b2d700c4..d88b8d8897cc 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3244,6 +3244,7 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
struct inode *dst, u64 dst_loff)
{
+ const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
int ret;

/*
@@ -3251,7 +3252,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
* source range to serialize with relocation.
*/
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
- ret = btrfs_clone(src, dst, loff, len, len, dst_loff, 1);
+ ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);

return ret;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 99fe9bf3fdac..98f9684e7ffc 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -121,7 +121,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
spin_lock_init(&fs_info->qgroup_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
- spin_lock_init(&fs_info->tree_mod_seq_lock);
mutex_init(&fs_info->qgroup_ioctl_lock);
mutex_init(&fs_info->qgroup_rescan_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 123d9a614357..df7ce874a74b 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -441,8 +441,17 @@ static int test_find_first_clear_extent_bit(void)
int ret = -EINVAL;

test_msg("running find_first_clear_extent_bit test");
+
extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);

+ /* Test correct handling of empty tree */
+ find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED);
+ if (start != 0 || end != -1) {
+ test_err(
+ "error getting a range from completely empty tree: start %llu end %llu",
+ start, end);
+ goto out;
+ }
/*
* Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
* 4M-32M
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8624bdee8c5b..ceffec752234 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -77,13 +77,14 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
}
}

-static noinline void switch_commit_roots(struct btrfs_transaction *trans)
+static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
{
+ struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root, *tmp;

down_write(&fs_info->commit_root_sem);
- list_for_each_entry_safe(root, tmp, &trans->switch_commits,
+ list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
dirty_list) {
list_del_init(&root->dirty_list);
free_extent_buffer(root->commit_root);
@@ -95,16 +96,17 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans)
}

/* We can free old roots now. */
- spin_lock(&trans->dropped_roots_lock);
- while (!list_empty(&trans->dropped_roots)) {
- root = list_first_entry(&trans->dropped_roots,
+ spin_lock(&cur_trans->dropped_roots_lock);
+ while (!list_empty(&cur_trans->dropped_roots)) {
+ root = list_first_entry(&cur_trans->dropped_roots,
struct btrfs_root, root_list);
list_del_init(&root->root_list);
- spin_unlock(&trans->dropped_roots_lock);
+ spin_unlock(&cur_trans->dropped_roots_lock);
+ btrfs_free_log(trans, root);
btrfs_drop_and_free_fs_root(fs_info, root);
- spin_lock(&trans->dropped_roots_lock);
+ spin_lock(&cur_trans->dropped_roots_lock);
}
- spin_unlock(&trans->dropped_roots_lock);
+ spin_unlock(&cur_trans->dropped_roots_lock);
up_write(&fs_info->commit_root_sem);
}

@@ -1359,7 +1361,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
ret = commit_cowonly_roots(trans);
if (ret)
goto out;
- switch_commit_roots(trans->transaction);
+ switch_commit_roots(trans);
ret = btrfs_write_and_wait_transaction(trans);
if (ret)
btrfs_handle_fs_error(fs_info, ret,
@@ -1949,6 +1951,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
struct btrfs_transaction *prev_trans = NULL;
int ret;

+ /*
+ * Some places just start a transaction to commit it. We need to make
+ * sure that if this commit fails that the abort code actually marks the
+ * transaction as failed, so set trans->dirty to make the abort code do
+ * the right thing.
+ */
+ trans->dirty = true;
+
/* Stop the commit early if ->aborted is set */
if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
@@ -2237,7 +2247,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
list_add_tail(&fs_info->chunk_root->dirty_list,
&cur_trans->switch_commits);

- switch_commit_roots(cur_trans);
+ switch_commit_roots(trans);

ASSERT(list_empty(&cur_trans->dirty_bgs));
ASSERT(list_empty(&cur_trans->io_bgs));
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ab27e6cd9b3e..6f2178618c22 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3953,7 +3953,7 @@ static int log_csums(struct btrfs_trans_handle *trans,
static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *dst_path,
- struct btrfs_path *src_path, u64 *last_extent,
+ struct btrfs_path *src_path,
int start_slot, int nr, int inode_only,
u64 logged_isize)
{
@@ -3964,7 +3964,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item *extent;
struct btrfs_inode_item *inode_item;
struct extent_buffer *src = src_path->nodes[0];
- struct btrfs_key first_key, last_key, key;
int ret;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
@@ -3972,9 +3971,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
int i;
struct list_head ordered_sums;
int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
- bool has_extents = false;
- bool need_find_last_extent = true;
- bool done = false;

INIT_LIST_HEAD(&ordered_sums);

@@ -3983,8 +3979,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
if (!ins_data)
return -ENOMEM;

- first_key.objectid = (u64)-1;
-
ins_sizes = (u32 *)ins_data;
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));

@@ -4005,9 +3999,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,

src_offset = btrfs_item_ptr_offset(src, start_slot + i);

- if (i == nr - 1)
- last_key = ins_keys[i];
-
if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
inode_item = btrfs_item_ptr(dst_path->nodes[0],
dst_path->slots[0],
@@ -4021,20 +4012,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
src_offset, ins_sizes[i]);
}

- /*
- * We set need_find_last_extent here in case we know we were
- * processing other items and then walk into the first extent in
- * the inode. If we don't hit an extent then nothing changes,
- * we'll do the last search the next time around.
- */
- if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
- has_extents = true;
- if (first_key.objectid == (u64)-1)
- first_key = ins_keys[i];
- } else {
- need_find_last_extent = false;
- }
-
/* take a reference on file data extents so that truncates
* or deletes of this inode don't have to relog the inode
* again
@@ -4100,167 +4077,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
kfree(sums);
}

- if (!has_extents)
- return ret;
-
- if (need_find_last_extent && *last_extent == first_key.offset) {
- /*
- * We don't have any leafs between our current one and the one
- * we processed before that can have file extent items for our
- * inode (and have a generation number smaller than our current
- * transaction id).
- */
- need_find_last_extent = false;
- }
-
- /*
- * Because we use btrfs_search_forward we could skip leaves that were
- * not modified and then assume *last_extent is valid when it really
- * isn't. So back up to the previous leaf and read the end of the last
- * extent before we go and fill in holes.
- */
- if (need_find_last_extent) {
- u64 len;
-
- ret = btrfs_prev_leaf(inode->root, src_path);
- if (ret < 0)
- return ret;
- if (ret)
- goto fill_holes;
- if (src_path->slots[0])
- src_path->slots[0]--;
- src = src_path->nodes[0];
- btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
- if (key.objectid != btrfs_ino(inode) ||
- key.type != BTRFS_EXTENT_DATA_KEY)
- goto fill_holes;
- extent = btrfs_item_ptr(src, src_path->slots[0],
- struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(src, extent) ==
- BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_ram_bytes(src, extent);
- *last_extent = ALIGN(key.offset + len,
- fs_info->sectorsize);
- } else {
- len = btrfs_file_extent_num_bytes(src, extent);
- *last_extent = key.offset + len;
- }
- }
-fill_holes:
- /* So we did prev_leaf, now we need to move to the next leaf, but a few
- * things could have happened
- *
- * 1) A merge could have happened, so we could currently be on a leaf
- * that holds what we were copying in the first place.
- * 2) A split could have happened, and now not all of the items we want
- * are on the same leaf.
- *
- * So we need to adjust how we search for holes, we need to drop the
- * path and re-search for the first extent key we found, and then walk
- * forward until we hit the last one we copied.
- */
- if (need_find_last_extent) {
- /* btrfs_prev_leaf could return 1 without releasing the path */
- btrfs_release_path(src_path);
- ret = btrfs_search_slot(NULL, inode->root, &first_key,
- src_path, 0, 0);
- if (ret < 0)
- return ret;
- ASSERT(ret == 0);
- src = src_path->nodes[0];
- i = src_path->slots[0];
- } else {
- i = start_slot;
- }
-
- /*
- * Ok so here we need to go through and fill in any holes we may have
- * to make sure that holes are punched for those areas in case they had
- * extents previously.
- */
- while (!done) {
- u64 offset, len;
- u64 extent_end;
-
- if (i >= btrfs_header_nritems(src_path->nodes[0])) {
- ret = btrfs_next_leaf(inode->root, src_path);
- if (ret < 0)
- return ret;
- ASSERT(ret == 0);
- src = src_path->nodes[0];
- i = 0;
- need_find_last_extent = true;
- }
-
- btrfs_item_key_to_cpu(src, &key, i);
- if (!btrfs_comp_cpu_keys(&key, &last_key))
- done = true;
- if (key.objectid != btrfs_ino(inode) ||
- key.type != BTRFS_EXTENT_DATA_KEY) {
- i++;
- continue;
- }
- extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(src, extent) ==
- BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_ram_bytes(src, extent);
- extent_end = ALIGN(key.offset + len,
- fs_info->sectorsize);
- } else {
- len = btrfs_file_extent_num_bytes(src, extent);
- extent_end = key.offset + len;
- }
- i++;
-
- if (*last_extent == key.offset) {
- *last_extent = extent_end;
- continue;
- }
- offset = *last_extent;
- len = key.offset - *last_extent;
- ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
- offset, 0, 0, len, 0, len, 0, 0, 0);
- if (ret)
- break;
- *last_extent = extent_end;
- }
-
- /*
- * Check if there is a hole between the last extent found in our leaf
- * and the first extent in the next leaf. If there is one, we need to
- * log an explicit hole so that at replay time we can punch the hole.
- */
- if (ret == 0 &&
- key.objectid == btrfs_ino(inode) &&
- key.type == BTRFS_EXTENT_DATA_KEY &&
- i == btrfs_header_nritems(src_path->nodes[0])) {
- ret = btrfs_next_leaf(inode->root, src_path);
- need_find_last_extent = true;
- if (ret > 0) {
- ret = 0;
- } else if (ret == 0) {
- btrfs_item_key_to_cpu(src_path->nodes[0], &key,
- src_path->slots[0]);
- if (key.objectid == btrfs_ino(inode) &&
- key.type == BTRFS_EXTENT_DATA_KEY &&
- *last_extent < key.offset) {
- const u64 len = key.offset - *last_extent;
-
- ret = btrfs_insert_file_extent(trans, log,
- btrfs_ino(inode),
- *last_extent, 0,
- 0, len, 0, len,
- 0, 0, 0);
- *last_extent += len;
- }
- }
- }
- /*
- * Need to let the callers know we dropped the path so they should
- * re-search.
- */
- if (!ret && need_find_last_extent)
- ret = 1;
return ret;
}

@@ -4425,7 +4241,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
const u64 i_size = i_size_read(&inode->vfs_inode);
const u64 ino = btrfs_ino(inode);
struct btrfs_path *dst_path = NULL;
- u64 last_extent = (u64)-1;
+ bool dropped_extents = false;
int ins_nr = 0;
int start_slot;
int ret;
@@ -4447,8 +4263,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
if (slot >= btrfs_header_nritems(leaf)) {
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
- &last_extent, start_slot,
- ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0);
if (ret < 0)
goto out;
ins_nr = 0;
@@ -4472,8 +4287,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
path->slots[0]++;
continue;
}
- if (last_extent == (u64)-1) {
- last_extent = key.offset;
+ if (!dropped_extents) {
/*
* Avoid logging extent items logged in past fsync calls
* and leading to duplicate keys in the log tree.
@@ -4487,6 +4301,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
} while (ret == -EAGAIN);
if (ret)
goto out;
+ dropped_extents = true;
}
if (ins_nr == 0)
start_slot = slot;
@@ -4501,7 +4316,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
}
}
if (ins_nr > 0) {
- ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ ret = copy_items(trans, inode, dst_path, path,
start_slot, ins_nr, 1, 0);
if (ret > 0)
ret = 0;
@@ -4688,13 +4503,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,

if (slot >= nritems) {
if (ins_nr > 0) {
- u64 last_extent = 0;
-
ret = copy_items(trans, inode, dst_path, path,
- &last_extent, start_slot,
- ins_nr, 1, 0);
- /* can't be 1, extent items aren't processed */
- ASSERT(ret <= 0);
+ start_slot, ins_nr, 1, 0);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -4718,13 +4528,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
cond_resched();
}
if (ins_nr > 0) {
- u64 last_extent = 0;
-
ret = copy_items(trans, inode, dst_path, path,
- &last_extent, start_slot,
- ins_nr, 1, 0);
- /* can't be 1, extent items aren't processed */
- ASSERT(ret <= 0);
+ start_slot, ins_nr, 1, 0);
if (ret < 0)
return ret;
}
@@ -4733,100 +4538,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
}

/*
- * If the no holes feature is enabled we need to make sure any hole between the
- * last extent and the i_size of our inode is explicitly marked in the log. This
- * is to make sure that doing something like:
- *
- * 1) create file with 128Kb of data
- * 2) truncate file to 64Kb
- * 3) truncate file to 256Kb
- * 4) fsync file
- * 5) <crash/power failure>
- * 6) mount fs and trigger log replay
- *
- * Will give us a file with a size of 256Kb, the first 64Kb of data match what
- * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
- * file correspond to a hole. The presence of explicit holes in a log tree is
- * what guarantees that log replay will remove/adjust file extent items in the
- * fs/subvol tree.
- *
- * Here we do not need to care about holes between extents, that is already done
- * by copy_items(). We also only need to do this in the full sync path, where we
- * lookup for extents from the fs/subvol tree only. In the fast path case, we
- * lookup the list of modified extent maps and if any represents a hole, we
- * insert a corresponding extent representing a hole in the log tree.
+ * When using the NO_HOLES feature if we punched a hole that causes the
+ * deletion of entire leafs or all the extent items of the first leaf (the one
+ * that contains the inode item and references) we may end up not processing
+ * any extents, because there are no leafs with a generation matching the
+ * current transaction that have extent items for our inode. So we need to find
+ * if any holes exist and then log them. We also need to log holes after any
+ * truncate operation that changes the inode's size.
*/
-static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_inode *inode,
- struct btrfs_path *path)
+static int btrfs_log_holes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- int ret;
struct btrfs_key key;
- u64 hole_start;
- u64 hole_size;
- struct extent_buffer *leaf;
- struct btrfs_root *log = root->log_root;
const u64 ino = btrfs_ino(inode);
const u64 i_size = i_size_read(&inode->vfs_inode);
+ u64 prev_extent_end = 0;
+ int ret;

- if (!btrfs_fs_incompat(fs_info, NO_HOLES))
+ if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
return 0;

key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = (u64)-1;
+ key.offset = 0;

ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- ASSERT(ret != 0);
if (ret < 0)
return ret;

- ASSERT(path->slots[0] > 0);
- path->slots[0]--;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
- /* inode does not have any extents */
- hole_start = 0;
- hole_size = i_size;
- } else {
+ while (true) {
struct btrfs_file_extent_item *extent;
+ struct extent_buffer *leaf = path->nodes[0];
u64 len;

- /*
- * If there's an extent beyond i_size, an explicit hole was
- * already inserted by copy_items().
- */
- if (key.offset >= i_size)
- return 0;
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+
+ /* We have a hole, log it. */
+ if (prev_extent_end < key.offset) {
+ const u64 hole_len = key.offset - prev_extent_end;
+
+ /*
+ * Release the path to avoid deadlocks with other code
+ * paths that search the root while holding locks on
+ * leafs from the log root.
+ */
+ btrfs_release_path(path);
+ ret = btrfs_insert_file_extent(trans, root->log_root,
+ ino, prev_extent_end, 0,
+ 0, hole_len, 0, hole_len,
+ 0, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Search for the same key again in the root. Since it's
+ * an extent item and we are holding the inode lock, the
+ * key must still exist. If it doesn't just emit warning
+ * and return an error to fall back to a transaction
+ * commit.
+ */
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+ if (WARN_ON(ret > 0))
+ return -ENOENT;
+ leaf = path->nodes[0];
+ }

extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
-
if (btrfs_file_extent_type(leaf, extent) ==
- BTRFS_FILE_EXTENT_INLINE)
- return 0;
+ BTRFS_FILE_EXTENT_INLINE) {
+ len = btrfs_file_extent_ram_bytes(leaf, extent);
+ prev_extent_end = ALIGN(key.offset + len,
+ fs_info->sectorsize);
+ } else {
+ len = btrfs_file_extent_num_bytes(leaf, extent);
+ prev_extent_end = key.offset + len;
+ }

- len = btrfs_file_extent_num_bytes(leaf, extent);
- /* Last extent goes beyond i_size, no need to log a hole. */
- if (key.offset + len > i_size)
- return 0;
- hole_start = key.offset + len;
- hole_size = i_size - hole_start;
+ path->slots[0]++;
+ cond_resched();
}
- btrfs_release_path(path);

- /* Last extent ends at i_size. */
- if (hole_size == 0)
- return 0;
+ if (prev_extent_end < i_size) {
+ u64 hole_len;

- hole_size = ALIGN(hole_size, fs_info->sectorsize);
- ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
- hole_size, 0, hole_size, 0, 0, 0);
- return ret;
+ btrfs_release_path(path);
+ hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
+ ret = btrfs_insert_file_extent(trans, root->log_root,
+ ino, prev_extent_end, 0, 0,
+ hole_len, 0, hole_len,
+ 0, 0, 0);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
}

/*
@@ -5030,6 +4854,50 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
}
continue;
}
+ /*
+ * If the inode was already logged skip it - otherwise we can
+ * hit an infinite loop. Example:
+ *
+ * From the commit root (previous transaction) we have the
+ * following inodes:
+ *
+ * inode 257 a directory
+ * inode 258 with references "zz" and "zz_link" on inode 257
+ * inode 259 with reference "a" on inode 257
+ *
+ * And in the current (uncommitted) transaction we have:
+ *
+ * inode 257 a directory, unchanged
+ * inode 258 with references "a" and "a2" on inode 257
+ * inode 259 with reference "zz_link" on inode 257
+ * inode 261 with reference "zz" on inode 257
+ *
+ * When logging inode 261 the following infinite loop could
+ * happen if we don't skip already logged inodes:
+ *
+ * - we detect inode 258 as a conflicting inode, with inode 261
+ * on reference "zz", and log it;
+ *
+ * - we detect inode 259 as a conflicting inode, with inode 258
+ * on reference "a", and log it;
+ *
+ * - we detect inode 258 as a conflicting inode, with inode 259
+ * on reference "zz_link", and log it - again! After this we
+ * repeat the above steps forever.
+ */
+ spin_lock(&BTRFS_I(inode)->lock);
+ /*
+ * Check the inode's logged_trans only instead of
+ * btrfs_inode_in_log(). This is because the last_log_commit of
+ * the inode is not updated when we only log that it exists and
+ * and it has the full sync bit set (see btrfs_log_inode()).
+ */
+ if (BTRFS_I(inode)->logged_trans == trans->transid) {
+ spin_unlock(&BTRFS_I(inode)->lock);
+ btrfs_add_delayed_iput(inode);
+ continue;
+ }
+ spin_unlock(&BTRFS_I(inode)->lock);
/*
* We are safe logging the other inode without acquiring its
* lock as long as we log with the LOG_INODE_EXISTS mode. We
@@ -5129,7 +4997,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key min_key;
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
- u64 last_extent = 0;
int err = 0;
int ret;
int nritems;
@@ -5307,7 +5174,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ins_start_slot = path->slots[0];
}
ret = copy_items(trans, inode, dst_path, path,
- &last_extent, ins_start_slot,
+ ins_start_slot,
ins_nr, inode_only,
logged_isize);
if (ret < 0) {
@@ -5330,17 +5197,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (ins_nr == 0)
goto next_slot;
ret = copy_items(trans, inode, dst_path, path,
- &last_extent, ins_start_slot,
+ ins_start_slot,
ins_nr, inode_only, logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
}
ins_nr = 0;
- if (ret) {
- btrfs_release_path(path);
- continue;
- }
goto next_slot;
}

@@ -5353,18 +5216,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
goto next_slot;
}

- ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ ret = copy_items(trans, inode, dst_path, path,
ins_start_slot, ins_nr, inode_only,
logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
}
- if (ret) {
- ins_nr = 0;
- btrfs_release_path(path);
- continue;
- }
ins_nr = 1;
ins_start_slot = path->slots[0];
next_slot:
@@ -5378,13 +5236,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
}
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path,
- &last_extent, ins_start_slot,
+ ins_start_slot,
ins_nr, inode_only, logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
}
- ret = 0;
ins_nr = 0;
}
btrfs_release_path(path);
@@ -5399,14 +5256,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
}
}
if (ins_nr) {
- ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ ret = copy_items(trans, inode, dst_path, path,
ins_start_slot, ins_nr, inode_only,
logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
}
- ret = 0;
ins_nr = 0;
}

@@ -5419,7 +5275,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
- err = btrfs_log_trailing_hole(trans, root, inode, path);
+ err = btrfs_log_holes(trans, root, inode, path);
if (err)
goto out_unlock;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 97f1ba7c18b2..f7d9fc1a6fc2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -881,17 +881,28 @@ static struct btrfs_fs_devices *find_fsid_changed(
/*
* Handles the case where scanned device is part of an fs that had
* multiple successful changes of FSID but curently device didn't
- * observe it. Meaning our fsid will be different than theirs.
+ * observe it. Meaning our fsid will be different than theirs. We need
+ * to handle two subcases :
+ * 1 - The fs still continues to have different METADATA/FSID uuids.
+ * 2 - The fs is switched back to its original FSID (METADATA/FSID
+ * are equal).
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+ /* Changed UUIDs */
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
BTRFS_FSID_SIZE) != 0 &&
memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
BTRFS_FSID_SIZE) == 0 &&
memcmp(fs_devices->fsid, disk_super->fsid,
- BTRFS_FSID_SIZE) != 0) {
+ BTRFS_FSID_SIZE) != 0)
+ return fs_devices;
+
+ /* Unchanged UUIDs */
+ if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
+ BTRFS_FSID_SIZE) == 0 &&
+ memcmp(fs_devices->fsid, disk_super->metadata_uuid,
+ BTRFS_FSID_SIZE) == 0)
return fs_devices;
- }
}

return NULL;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e1cac715d19e..06d932ed097e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
}

rc = cifs_negotiate_protocol(0, tcon->ses);
- if (!rc && tcon->ses->need_reconnect)
+ if (!rc && tcon->ses->need_reconnect) {
rc = cifs_setup_session(0, tcon->ses, nls_codepage);
-
+ if ((rc == -EACCES) && !tcon->retry) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&tcon->ses->session_mutex);
+ goto failed;
+ }
+ }
if (rc || !tcon->need_reconnect) {
mutex_unlock(&tcon->ses->session_mutex);
goto out;
@@ -397,6 +402,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
case SMB2_SET_INFO:
rc = -EAGAIN;
}
+failed:
unload_nls(nls_codepage);
return rc;
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 680aba9c00d5..fd0b5dd68f9e 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -76,14 +76,11 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (ia_valid & ATTR_GID)
sd_iattr->ia_gid = iattr->ia_gid;
if (ia_valid & ATTR_ATIME)
- sd_iattr->ia_atime = timestamp_truncate(iattr->ia_atime,
- inode);
+ sd_iattr->ia_atime = iattr->ia_atime;
if (ia_valid & ATTR_MTIME)
- sd_iattr->ia_mtime = timestamp_truncate(iattr->ia_mtime,
- inode);
+ sd_iattr->ia_mtime = iattr->ia_mtime;
if (ia_valid & ATTR_CTIME)
- sd_iattr->ia_ctime = timestamp_truncate(iattr->ia_ctime,
- inode);
+ sd_iattr->ia_ctime = iattr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = iattr->ia_mode;

diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index c34fa7c61b43..4ee65b2b6247 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -664,9 +664,6 @@ static int check_for_busy_inodes(struct super_block *sb,
struct list_head *pos;
size_t busy_count = 0;
unsigned long ino;
- struct dentry *dentry;
- char _path[256];
- char *path = NULL;

spin_lock(&mk->mk_decrypted_inodes_lock);

@@ -685,22 +682,14 @@ static int check_for_busy_inodes(struct super_block *sb,
struct fscrypt_info,
ci_master_key_link)->ci_inode;
ino = inode->i_ino;
- dentry = d_find_alias(inode);
}
spin_unlock(&mk->mk_decrypted_inodes_lock);

- if (dentry) {
- path = dentry_path(dentry, _path, sizeof(_path));
- dput(dentry);
- }
- if (IS_ERR_OR_NULL(path))
- path = "(unknown)";
-
fscrypt_warn(NULL,
- "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)",
+ "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu",
sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec),
master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u,
- ino, path);
+ ino);
return -EBUSY;
}

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 19f89f9fb10c..23b74b8e8f96 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -306,24 +306,22 @@ static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq,
}

src = kmap_atomic(*rq->in);
- if (!rq->out[0]) {
- dst = NULL;
- } else {
+ if (rq->out[0]) {
dst = kmap_atomic(rq->out[0]);
memcpy(dst + rq->pageofs_out, src, righthalf);
+ kunmap_atomic(dst);
}

- if (rq->out[1] == *rq->in) {
- memmove(src, src + righthalf, rq->pageofs_out);
- } else if (nrpages_out == 2) {
- if (dst)
- kunmap_atomic(dst);
+ if (nrpages_out == 2) {
DBG_BUGON(!rq->out[1]);
- dst = kmap_atomic(rq->out[1]);
- memcpy(dst, src + righthalf, rq->pageofs_out);
+ if (rq->out[1] == *rq->in) {
+ memmove(src, src + righthalf, rq->pageofs_out);
+ } else {
+ dst = kmap_atomic(rq->out[1]);
+ memcpy(dst, src + righthalf, rq->pageofs_out);
+ kunmap_atomic(dst);
+ }
}
- if (dst)
- kunmap_atomic(dst);
kunmap_atomic(src);
return 0;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8aa0ea8c55e8..78e41c7c3d05 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -24,6 +24,8 @@
#include <linux/seq_file.h>
#include <linux/idr.h>

+DEFINE_PER_CPU(int, eventfd_wake_count);
+
static DEFINE_IDA(eventfd_ida);

struct eventfd_ctx {
@@ -60,12 +62,25 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
{
unsigned long flags;

+ /*
+ * Deadlock or stack overflow issues can happen if we recurse here
+ * through waitqueue wakeup handlers. If the caller users potentially
+ * nested waitqueues with custom wakeup handlers, then it should
+ * check eventfd_signal_count() before calling this function. If
+ * it returns true, the eventfd_signal() call should be deferred to a
+ * safe context.
+ */
+ if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+ return 0;
+
spin_lock_irqsave(&ctx->wqh.lock, flags);
+ this_cpu_inc(eventfd_wake_count);
if (ULLONG_MAX - ctx->count < n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
+ this_cpu_dec(eventfd_wake_count);
spin_unlock_irqrestore(&ctx->wqh.lock, flags);

return n;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 30c630d73f0f..065cd2d1bdc6 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1082,9 +1082,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)

if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext2;
- sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
- le32_to_cpu(es->s_first_data_block) - 1)
- / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
+ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
+ le32_to_cpu(es->s_first_data_block) - 1)
+ / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc_array (db_count,
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 6305d5ec25af..5ef8d7ae231b 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -673,9 +673,11 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
struct qstr qstr = {.name = str, .len = len };
- struct inode *inode = dentry->d_parent->d_inode;
+ const struct dentry *parent = READ_ONCE(dentry->d_parent);
+ const struct inode *inode = READ_ONCE(parent->d_inode);

- if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) {
+ if (!inode || !IS_CASEFOLDED(inode) ||
+ !EXT4_SB(inode->i_sb)->s_encoding) {
if (len != name->len)
return -1;
return memcmp(str, name->name, len);
@@ -688,10 +690,11 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
{
const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
const struct unicode_map *um = sbi->s_encoding;
+ const struct inode *inode = READ_ONCE(dentry->d_inode);
unsigned char *norm;
int len, ret = 0;

- if (!IS_CASEFOLDED(dentry->d_inode) || !um)
+ if (!inode || !IS_CASEFOLDED(inode) || !um)
return 0;

norm = kmalloc(PATH_MAX, GFP_ATOMIC);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 12ceadef32c5..2cc9f2168b9e 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -478,17 +478,26 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_t gfp_flags = GFP_NOFS;
unsigned int enc_bytes = round_up(len, i_blocksize(inode));

+ /*
+ * Since bounce page allocation uses a mempool, we can only use
+ * a waiting mask (i.e. request guaranteed allocation) on the
+ * first page of the bio. Otherwise it can deadlock.
+ */
+ if (io->io_bio)
+ gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
retry_encrypt:
bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
0, gfp_flags);
if (IS_ERR(bounce_page)) {
ret = PTR_ERR(bounce_page);
- if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
- if (io->io_bio) {
+ if (ret == -ENOMEM &&
+ (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
+ gfp_flags = GFP_NOFS;
+ if (io->io_bio)
ext4_io_submit(io);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- }
- gfp_flags |= __GFP_NOFAIL;
+ else
+ gfp_flags |= __GFP_NOFAIL;
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_encrypt;
}
bounce_page = NULL;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4033778bcbbf..84280ad3786c 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1068,24 +1068,27 @@ static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
struct qstr qstr = {.name = str, .len = len };
+ const struct dentry *parent = READ_ONCE(dentry->d_parent);
+ const struct inode *inode = READ_ONCE(parent->d_inode);

- if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) {
+ if (!inode || !IS_CASEFOLDED(inode)) {
if (len != name->len)
return -1;
- return memcmp(str, name, len);
+ return memcmp(str, name->name, len);
}

- return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
+ return f2fs_ci_compare(inode, name, &qstr, false);
}

static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
const struct unicode_map *um = sbi->s_encoding;
+ const struct inode *inode = READ_ONCE(dentry->d_inode);
unsigned char *norm;
int len, ret = 0;

- if (!IS_CASEFOLDED(dentry->d_inode))
+ if (!inode || !IS_CASEFOLDED(inode))
return 0;

norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index fae665691481..72f308790a8e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -751,18 +751,12 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
inode->i_uid = attr->ia_uid;
if (ia_valid & ATTR_GID)
inode->i_gid = attr->ia_gid;
- if (ia_valid & ATTR_ATIME) {
- inode->i_atime = timestamp_truncate(attr->ia_atime,
- inode);
- }
- if (ia_valid & ATTR_MTIME) {
- inode->i_mtime = timestamp_truncate(attr->ia_mtime,
- inode);
- }
- if (ia_valid & ATTR_CTIME) {
- inode->i_ctime = timestamp_truncate(attr->ia_ctime,
- inode);
- }
+ if (ia_valid & ATTR_ATIME)
+ inode->i_atime = attr->ia_atime;
+ if (ia_valid & ATTR_MTIME)
+ inode->i_mtime = attr->ia_mtime;
+ if (ia_valid & ATTR_CTIME)
+ inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1443cee15863..ea8dbf1458c9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1213,9 +1213,11 @@ static int f2fs_statfs_project(struct super_block *sb,
return PTR_ERR(dquot);
spin_lock(&dquot->dq_dqb_lock);

- limit = (dquot->dq_dqb.dqb_bsoftlimit ?
- dquot->dq_dqb.dqb_bsoftlimit :
- dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+ limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
+ dquot->dq_dqb.dqb_bhardlimit);
+ if (limit)
+ limit >>= sb->s_blocksize_bits;
+
if (limit && buf->f_blocks > limit) {
curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
buf->f_blocks = limit;
@@ -1224,9 +1226,9 @@ static int f2fs_statfs_project(struct super_block *sb,
(buf->f_blocks - curblock) : 0;
}

- limit = dquot->dq_dqb.dqb_isoftlimit ?
- dquot->dq_dqb.dqb_isoftlimit :
- dquot->dq_dqb.dqb_ihardlimit;
+ limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
+ dquot->dq_dqb.dqb_ihardlimit);
+
if (limit && buf->f_files > limit) {
buf->f_files = limit;
buf->f_ffree =
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 335607b8c5c0..76ac9c7d32ec 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2063,7 +2063,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;

- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;

if (likely(!current_is_workqueue_rescuer() ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ce715380143c..695369f46f92 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1465,6 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
ia = NULL;
if (nres < 0) {
+ iov_iter_revert(iter, nbytes);
err = nres;
break;
}
@@ -1473,8 +1474,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
count -= nres;
res += nres;
pos += nres;
- if (nres != nbytes)
+ if (nres != nbytes) {
+ iov_iter_revert(iter, nbytes - nres);
break;
+ }
if (count) {
max_pages = iov_iter_npages(iter, fc->max_pages);
ia = fuse_io_alloc(io, max_pages);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 01ff37b76652..4a10b4e7092a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -833,7 +833,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct gfs2_inode *ip = GFS2_I(inode);
- ssize_t written = 0, ret;
+ ssize_t ret;

ret = gfs2_rsqa_alloc(ip);
if (ret)
@@ -853,68 +853,58 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
- goto out;
-
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
+ goto out_unlock;

ret = file_remove_privs(file);
if (ret)
- goto out2;
+ goto out_unlock;

ret = file_update_time(file);
if (ret)
- goto out2;
+ goto out_unlock;

if (iocb->ki_flags & IOCB_DIRECT) {
struct address_space *mapping = file->f_mapping;
- loff_t pos, endbyte;
- ssize_t buffered;
+ ssize_t buffered, ret2;

- written = gfs2_file_direct_write(iocb, from);
- if (written < 0 || !iov_iter_count(from))
- goto out2;
+ ret = gfs2_file_direct_write(iocb, from);
+ if (ret < 0 || !iov_iter_count(from))
+ goto out_unlock;

- ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
- if (unlikely(ret < 0))
- goto out2;
- buffered = ret;
+ iocb->ki_flags |= IOCB_DSYNC;
+ current->backing_dev_info = inode_to_bdi(inode);
+ buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+ current->backing_dev_info = NULL;
+ if (unlikely(buffered <= 0))
+ goto out_unlock;

/*
* We need to ensure that the page cache pages are written to
* disk and invalidated to preserve the expected O_DIRECT
- * semantics.
+ * semantics. If the writeback or invalidate fails, only report
+ * the direct I/O range as we don't know if the buffered pages
+ * made it to disk.
*/
- pos = iocb->ki_pos;
- endbyte = pos + buffered - 1;
- ret = filemap_write_and_wait_range(mapping, pos, endbyte);
- if (!ret) {
- iocb->ki_pos += buffered;
- written += buffered;
- invalidate_mapping_pages(mapping,
- pos >> PAGE_SHIFT,
- endbyte >> PAGE_SHIFT);
- } else {
- /*
- * We don't know how much we wrote, so just return
- * the number of bytes which were direct-written
- */
- }
+ iocb->ki_pos += buffered;
+ ret2 = generic_write_sync(iocb, buffered);
+ invalidate_mapping_pages(mapping,
+ (iocb->ki_pos - buffered) >> PAGE_SHIFT,
+ (iocb->ki_pos - 1) >> PAGE_SHIFT);
+ if (!ret || ret2 > 0)
+ ret += ret2;
} else {
+ current->backing_dev_info = inode_to_bdi(inode);
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
- if (likely(ret > 0))
+ current->backing_dev_info = NULL;
+ if (likely(ret > 0)) {
iocb->ki_pos += ret;
+ ret = generic_write_sync(iocb, ret);
+ }
}

-out2:
- current->backing_dev_info = NULL;
-out:
+out_unlock:
inode_unlock(inode);
- if (likely(ret > 0)) {
- /* Handle various SYNC-type writes */
- ret = generic_write_sync(iocb, ret);
- }
- return written ? written : ret;
+ return ret;
}

static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index e7b9d39955d4..7ca84be20cf6 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -421,7 +421,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,

for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
- if (lh.lh_sequence > head->lh_sequence)
+ if (lh.lh_sequence >= head->lh_sequence)
*head = lh;
else {
ret = true;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 1c58859aa592..ef485f892d1b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -981,6 +981,7 @@ static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)

static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ (*pos)++;
return NULL;
}

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e180033e35cf..05ed7be8a634 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -162,6 +162,17 @@ typedef struct {
bool eof;
} nfs_readdir_descriptor_t;

+static
+void nfs_readdir_init_array(struct page *page)
+{
+ struct nfs_cache_array *array;
+
+ array = kmap_atomic(page);
+ memset(array, 0, sizeof(struct nfs_cache_array));
+ array->eof_index = -1;
+ kunmap_atomic(array);
+}
+
/*
* we are freeing strings created by nfs_add_to_readdir_array()
*/
@@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page)
array = kmap_atomic(page);
for (i = 0; i < array->size; i++)
kfree(array->array[i].string.name);
+ array->size = 0;
kunmap_atomic(array);
}

@@ -610,6 +622,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
int status = -ENOMEM;
unsigned int array_size = ARRAY_SIZE(pages);

+ nfs_readdir_init_array(page);
+
entry.prev_cookie = 0;
entry.cookie = desc->last_cookie;
entry.eof = 0;
@@ -626,8 +640,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
}

array = kmap(page);
- memset(array, 0, sizeof(struct nfs_cache_array));
- array->eof_index = -1;

status = nfs_readdir_alloc_pages(pages, array_size);
if (status < 0)
@@ -682,6 +694,7 @@ int nfs_readdir_filler(void *data, struct page* page)
unlock_page(page);
return 0;
error:
+ nfs_readdir_clear_array(page);
unlock_page(page);
return ret;
}
@@ -689,8 +702,6 @@ int nfs_readdir_filler(void *data, struct page* page)
static
void cache_page_release(nfs_readdir_descriptor_t *desc)
{
- if (!desc->page->mapping)
- nfs_readdir_clear_array(desc->page);
put_page(desc->page);
desc->page = NULL;
}
@@ -704,19 +715,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc)

/*
* Returns 0 if desc->dir_cookie was found on page desc->page_index
+ * and locks the page to prevent removal from the page cache.
*/
static
-int find_cache_page(nfs_readdir_descriptor_t *desc)
+int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
{
int res;

desc->page = get_cache_page(desc);
if (IS_ERR(desc->page))
return PTR_ERR(desc->page);
-
- res = nfs_readdir_search_array(desc);
+ res = lock_page_killable(desc->page);
if (res != 0)
- cache_page_release(desc);
+ goto error;
+ res = -EAGAIN;
+ if (desc->page->mapping != NULL) {
+ res = nfs_readdir_search_array(desc);
+ if (res == 0)
+ return 0;
+ }
+ unlock_page(desc->page);
+error:
+ cache_page_release(desc);
return res;
}

@@ -731,7 +751,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
desc->last_cookie = 0;
}
do {
- res = find_cache_page(desc);
+ res = find_and_lock_cache_page(desc);
} while (res == -EAGAIN);
return res;
}
@@ -770,7 +790,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
desc->eof = true;

kunmap(desc->page);
- cache_page_release(desc);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
(unsigned long long)*desc->dir_cookie, res);
return res;
@@ -816,13 +835,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)

status = nfs_do_filldir(desc);

+ out_release:
+ nfs_readdir_clear_array(desc->page);
+ cache_page_release(desc);
out:
dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
__func__, status);
return status;
- out_release:
- cache_page_release(desc);
- goto out;
}

/* The file offset position represents the dirent entry number. A
@@ -887,6 +906,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;

res = nfs_do_filldir(desc);
+ unlock_page(desc->page);
+ cache_page_release(desc);
if (res < 0)
break;
} while (!desc->eof);
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index ef55e9b1cd4e..3007b8945d38 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -791,6 +791,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_file *nf, *new;
struct inode *inode;
unsigned int hashval;
+ bool retry = true;

/* FIXME: skip this if fh_dentry is already set? */
status = fh_verify(rqstp, fhp, S_IFREG,
@@ -826,6 +827,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,

/* Did construction of this file fail? */
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ if (!retry) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+ retry = false;
nfsd_file_put_noref(nf);
goto retry;
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2681c70283ce..e12409eca7cc 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)

/* Client gets 2 lease periods to return it */
cutoff = ktime_add_ns(task->tk_start,
- nn->nfsd4_lease * NSEC_PER_SEC * 2);
+ (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2);

if (ktime_before(now, cutoff)) {
rpc_delay(task, HZ/100); /* 10 mili-seconds */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 08f6eb2b73f8..1c82d7dd54df 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6550,7 +6550,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}

if (fl_flags & FL_SLEEP) {
- nbl->nbl_time = jiffies;
+ nbl->nbl_time = get_seconds();
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 46f56afb6cb8..a080789b4d13 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -605,7 +605,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
struct nfsd4_blocked_lock {
struct list_head nbl_list;
struct list_head nbl_lru;
- unsigned long nbl_time;
+ time_t nbl_time;
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index cf423fea0c6f..fc38b9fe4549 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -975,6 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
goto out_nfserr;
+ *cnt = host_err;
nfsdstats.io_write += *cnt;
fsnotify_modify(file);

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 6c7388430ad3..d4359a1df3d5 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2899,18 +2899,12 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
ia_valid |= ATTR_MTIME | ATTR_CTIME;
}
}
- if (ia_valid & ATTR_ATIME) {
- vi->i_atime = timestamp_truncate(attr->ia_atime,
- vi);
- }
- if (ia_valid & ATTR_MTIME) {
- vi->i_mtime = timestamp_truncate(attr->ia_mtime,
- vi);
- }
- if (ia_valid & ATTR_CTIME) {
- vi->i_ctime = timestamp_truncate(attr->ia_ctime,
- vi);
- }
+ if (ia_valid & ATTR_ATIME)
+ vi->i_atime = attr->ia_atime;
+ if (ia_valid & ATTR_MTIME)
+ vi->i_mtime = attr->ia_mtime;
+ if (ia_valid & ATTR_CTIME)
+ vi->i_ctime = attr->ia_ctime;
mark_inode_dirty(vi);
out:
return err;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9876db52913a..6cd5e4924e4d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2101,17 +2101,15 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
struct buffer_head **di_bh,
int meta_level,
- int overwrite_io,
int write_sem,
int wait)
{
int ret = 0;

if (wait)
- ret = ocfs2_inode_lock(inode, NULL, meta_level);
+ ret = ocfs2_inode_lock(inode, di_bh, meta_level);
else
- ret = ocfs2_try_inode_lock(inode,
- overwrite_io ? NULL : di_bh, meta_level);
+ ret = ocfs2_try_inode_lock(inode, di_bh, meta_level);
if (ret < 0)
goto out;

@@ -2136,6 +2134,7 @@ static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,

out_unlock:
brelse(*di_bh);
+ *di_bh = NULL;
ocfs2_inode_unlock(inode, meta_level);
out:
return ret;
@@ -2177,7 +2176,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
ret = ocfs2_inode_lock_for_extent_tree(inode,
&di_bh,
meta_level,
- overwrite_io,
write_sem,
wait);
if (ret < 0) {
@@ -2233,13 +2231,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
&di_bh,
meta_level,
write_sem);
+ meta_level = 1;
+ write_sem = 1;
ret = ocfs2_inode_lock_for_extent_tree(inode,
&di_bh,
meta_level,
- overwrite_io,
- 1,
+ write_sem,
wait);
- write_sem = 1;
if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index e235a635d9ec..15e4fa288475 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -146,7 +146,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
struct fd real;
const struct cred *old_cred;
- ssize_t ret;
+ loff_t ret;

/*
* The two special cases below do not need to involve real fs,
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 47a91c9733a5..7255e6a5838f 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -504,7 +504,13 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
if (err)
goto fail;

- WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+ /*
+ * Directory inode is always on overlay st_dev.
+ * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
+ * of xino bits overflow.
+ */
+ WARN_ON_ONCE(S_ISDIR(stat.mode) &&
+ dir->d_sb->s_dev != stat.dev);
ino = stat.ino;
} else if (xinobits && !OVL_TYPE_UPPER(type)) {
ino = ovl_remap_lower_ino(ino, xinobits,
diff --git a/fs/read_write.c b/fs/read_write.c
index 5bbf587f5bc1..7458fccc59e1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1777,10 +1777,9 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
* else. Assume that the offsets have already been checked for block
* alignment.
*
- * For deduplication we always scale down to the previous block because we
- * can't meaningfully compare post-EOF contents.
- *
- * For clone we only link a partial EOF block above the destination file's EOF.
+ * For clone we only link a partial EOF block above or at the destination file's
+ * EOF. For deduplication we accept a partial EOF block only if it ends at the
+ * destination file's EOF (can not link it into the middle of a file).
*
* Shorten the request if possible.
*/
@@ -1796,8 +1795,7 @@ static int generic_remap_check_len(struct inode *inode_in,
if ((*len & blkmask) == 0)
return 0;

- if ((remap_flags & REMAP_FILE_DEDUP) ||
- pos_out + *len < i_size_read(inode_out))
+ if (pos_out + *len < i_size_read(inode_out))
new_len &= ~blkmask;

if (new_len == *len)
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 0b98e3c8b461..6c0e19f7a21f 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -228,6 +228,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
if (nm.hash) {
ubifs_assert(c, fname_len(&nm) == 0);
ubifs_assert(c, fname_name(&nm) == NULL);
+ if (nm.hash & ~UBIFS_S_KEY_HASH_MASK)
+ goto done; /* ENOENT */
dent_key_init_hash(c, &key, dir->i_ino, nm.hash);
err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash);
} else {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index cd52585c8f4f..a771273fba7e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -786,7 +786,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,

if (page_offset > end_index)
break;
- page = find_or_create_page(mapping, page_offset, ra_gfp_mask);
+ page = pagecache_get_page(mapping, page_offset,
+ FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
+ ra_gfp_mask);
if (!page)
break;
if (!PageUptodate(page))
@@ -1078,18 +1080,12 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr)
inode->i_uid = attr->ia_uid;
if (attr->ia_valid & ATTR_GID)
inode->i_gid = attr->ia_gid;
- if (attr->ia_valid & ATTR_ATIME) {
- inode->i_atime = timestamp_truncate(attr->ia_atime,
- inode);
- }
- if (attr->ia_valid & ATTR_MTIME) {
- inode->i_mtime = timestamp_truncate(attr->ia_mtime,
- inode);
- }
- if (attr->ia_valid & ATTR_CTIME) {
- inode->i_ctime = timestamp_truncate(attr->ia_ctime,
- inode);
- }
+ if (attr->ia_valid & ATTR_ATIME)
+ inode->i_atime = attr->ia_atime;
+ if (attr->ia_valid & ATTR_MTIME)
+ inode->i_mtime = attr->ia_mtime;
+ if (attr->ia_valid & ATTR_CTIME)
+ inode->i_ctime = attr->ia_ctime;
if (attr->ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;

diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 5dc5abca11c7..eeb1be259888 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -113,7 +113,8 @@ static int setflags(struct inode *inode, int flags)
if (err)
goto out_unlock;

- ui->flags = ioctl2ubifs(flags);
+ ui->flags &= ~ioctl2ubifs(UBIFS_SUPPORTED_IOCTL_FLAGS);
+ ui->flags |= ioctl2ubifs(flags);
ubifs_set_inode_flags(inode);
inode->i_ctime = current_time(inode);
release = ui->dirty;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index a551eb3e9b89..6681c18e52b8 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -161,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c)
sup = kzalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_KERNEL);
mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
idx_node_size = ubifs_idx_node_sz(c, 1);
- idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
+ idx = kzalloc(ALIGN(idx_node_size, c->min_io_size), GFP_KERNEL);
ino = kzalloc(ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size), GFP_KERNEL);
cs = kzalloc(ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size), GFP_KERNEL);

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 5e1e8ec0589e..7fc2f3f07c16 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1599,6 +1599,7 @@ static int mount_ubifs(struct ubifs_info *c)
vfree(c->ileb_buf);
vfree(c->sbuf);
kfree(c->bottom_up_buf);
+ kfree(c->sup_node);
ubifs_debugging_exit(c);
return err;
}
@@ -1641,6 +1642,7 @@ static void ubifs_umount(struct ubifs_info *c)
vfree(c->ileb_buf);
vfree(c->sbuf);
kfree(c->bottom_up_buf);
+ kfree(c->sup_node);
ubifs_debugging_exit(c);
}

diff --git a/fs/utimes.c b/fs/utimes.c
index 1ba3f7883870..090739322463 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -36,14 +36,14 @@ static int utimes_common(const struct path *path, struct timespec64 *times)
if (times[0].tv_nsec == UTIME_OMIT)
newattrs.ia_valid &= ~ATTR_ATIME;
else if (times[0].tv_nsec != UTIME_NOW) {
- newattrs.ia_atime = timestamp_truncate(times[0], inode);
+ newattrs.ia_atime = times[0];
newattrs.ia_valid |= ATTR_ATIME_SET;
}

if (times[1].tv_nsec == UTIME_OMIT)
newattrs.ia_valid &= ~ATTR_MTIME;
else if (times[1].tv_nsec != UTIME_NOW) {
- newattrs.ia_mtime = timestamp_truncate(times[1], inode);
+ newattrs.ia_mtime = times[1];
newattrs.ia_valid |= ATTR_MTIME_SET;
}
/*
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 04c0644006fd..c716ea81e653 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -137,13 +137,6 @@
* When used, an architecture is expected to provide __tlb_remove_table()
* which does the actual freeing of these pages.
*
- * HAVE_RCU_TABLE_NO_INVALIDATE
- *
- * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
- * freeing the page-table pages. This can be avoided if you use
- * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
- * page-tables natively.
- *
* MMU_GATHER_NO_RANGE
*
* Use this if your architecture lacks an efficient flush_tlb_range().
@@ -189,8 +182,23 @@ struct mmu_table_batch {

extern void tlb_remove_table(struct mmu_gather *tlb, void *table);

+/*
+ * This allows an architecture that does not use the linux page-tables for
+ * hardware to skip the TLBI when freeing page tables.
+ */
+#ifndef tlb_needs_table_invalidate
+#define tlb_needs_table_invalidate() (true)
+#endif
+
+#else
+
+#ifdef tlb_needs_table_invalidate
+#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE
#endif

+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+
#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
/*
* If we can't allocate a page to make a big batch of page pointers
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 97967ce06de3..f88197c1ffc2 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -13,6 +13,7 @@
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/blkdev.h>
+#include <linux/device.h>
#include <linux/writeback.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
@@ -504,4 +505,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
(1 << WB_async_congested));
}

+extern const char *bdi_unknown_name;
+
+static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+ if (!bdi || !bdi->dev)
+ return bdi_unknown_name;
+ return dev_name(bdi->dev);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 31b1b0e03df8..018dce868de6 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -148,6 +148,20 @@ struct cpufreq_policy {
struct notifier_block nb_max;
};

+/*
+ * Used for passing new cpufreq policy data to the cpufreq driver's ->verify()
+ * callback for sanitization. That callback is only expected to modify the min
+ * and max values, if necessary, and specifically it must not update the
+ * frequency table.
+ */
+struct cpufreq_policy_data {
+ struct cpufreq_cpuinfo cpuinfo;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int cpu;
+ unsigned int min; /* in kHz */
+ unsigned int max; /* in kHz */
+};
+
struct cpufreq_freqs {
struct cpufreq_policy *policy;
unsigned int old;
@@ -201,8 +215,6 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu);
void cpufreq_cpu_release(struct cpufreq_policy *policy);
int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
-int cpufreq_set_policy(struct cpufreq_policy *policy,
- struct cpufreq_policy *new_policy);
void refresh_frequency_limits(struct cpufreq_policy *policy);
void cpufreq_update_policy(unsigned int cpu);
void cpufreq_update_limits(unsigned int cpu);
@@ -284,7 +296,7 @@ struct cpufreq_driver {

/* needed by all drivers */
int (*init)(struct cpufreq_policy *policy);
- int (*verify)(struct cpufreq_policy *policy);
+ int (*verify)(struct cpufreq_policy_data *policy);

/* define one out of two */
int (*setpolicy)(struct cpufreq_policy *policy);
@@ -415,8 +427,9 @@ static inline int cpufreq_thermal_control_enabled(struct cpufreq_driver *drv)
(drv->flags & CPUFREQ_IS_COOLING_DEV);
}

-static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
- unsigned int min, unsigned int max)
+static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *policy,
+ unsigned int min,
+ unsigned int max)
{
if (policy->min < min)
policy->min = min;
@@ -432,10 +445,10 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
}

static inline void
-cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy)
+cpufreq_verify_within_cpu_limits(struct cpufreq_policy_data *policy)
{
cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
- policy->cpuinfo.max_freq);
+ policy->cpuinfo.max_freq);
}

#ifdef CONFIG_CPU_FREQ
@@ -513,6 +526,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
* CPUFREQ GOVERNORS *
*********************************************************************/

+#define CPUFREQ_POLICY_UNKNOWN (0)
/*
* If (cpufreq_driver->target) exists, the ->governor decides what frequency
* within the limits is used. If (cpufreq_driver->setpolicy> exists, these
@@ -684,9 +698,9 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table);

-int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
+int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy,
struct cpufreq_frequency_table *table);
-int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy);
+int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy);

int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
unsigned int target_freq,
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index ffcc7724ca21..dc4fd8a6644d 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -12,6 +12,8 @@
#include <linux/fcntl.h>
#include <linux/wait.h>
#include <linux/err.h>
+#include <linux/percpu-defs.h>
+#include <linux/percpu.h>

/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -40,6 +42,13 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt);

+DECLARE_PER_CPU(int, eventfd_wake_count);
+
+static inline bool eventfd_signal_count(void)
+{
+ return this_cpu_read(eventfd_wake_count);
+}
+
#else /* CONFIG_EVENTFD */

/*
@@ -68,6 +77,11 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
return -ENOSYS;
}

+static inline bool eventfd_signal_count(void)
+{
+ return false;
+}
+
#endif

#endif /* _LINUX_EVENTFD_H */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fb301cf29148..f8755e5fcd74 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -209,6 +209,8 @@ struct irq_data {
* IRQD_SINGLE_TARGET - IRQ allows only a single affinity target
* IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set
* IRQD_CAN_RESERVE - Can use reservation mode
+ * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change
+ * required
*/
enum {
IRQD_TRIGGER_MASK = 0xf,
@@ -231,6 +233,7 @@ enum {
IRQD_SINGLE_TARGET = (1 << 24),
IRQD_DEFAULT_TRIGGER_SET = (1 << 25),
IRQD_CAN_RESERVE = (1 << 26),
+ IRQD_MSI_NOMASK_QUIRK = (1 << 27),
};

#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -390,6 +393,21 @@ static inline bool irqd_can_reserve(struct irq_data *d)
return __irqd_to_state(d) & IRQD_CAN_RESERVE;
}

+static inline void irqd_set_msi_nomask_quirk(struct irq_data *d)
+{
+ __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK;
+}
+
+static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d)
+{
+ __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK;
+}
+
+static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
+{
+ return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
+}
+
#undef __irqd_to_state

static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 583e7abd07f9..aba5ada373d6 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -205,6 +205,13 @@ enum {
/* Irq domain implements MSI remapping */
IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5),

+ /*
+ * Quirk to handle MSI implementations which do not provide
+ * masking. Currently known to affect x86, but partially
+ * handled in core code.
+ */
+ IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6),
+
/*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
* for implementation specific purposes and ignored by the
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d41c521a39da..b81f0f1ded5f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -204,7 +204,7 @@ struct kvm_async_pf {
struct list_head queue;
struct kvm_vcpu *vcpu;
struct mm_struct *mm;
- gva_t gva;
+ gpa_t cr2_or_gpa;
unsigned long addr;
struct kvm_arch_async_pf arch;
bool wakeup_all;
@@ -212,8 +212,8 @@ struct kvm_async_pf {

void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
- struct kvm_arch_async_pf *arch);
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ unsigned long hva, struct kvm_arch_async_pf *arch);
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif

@@ -728,6 +728,7 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
void kvm_get_pfn(kvm_pfn_t pfn);

+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
@@ -750,7 +751,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
+unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
@@ -758,8 +759,12 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
+int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool atomic);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
+int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index bde5374ae021..2382cb58969d 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -18,7 +18,7 @@ struct kvm_memslots;

enum kvm_mr_change;

-#include <asm/types.h>
+#include <linux/types.h>

/*
* Address types:
@@ -49,4 +49,11 @@ struct gfn_to_hva_cache {
struct kvm_memory_slot *memslot;
};

+struct gfn_to_pfn_cache {
+ u64 generation;
+ gfn_t gfn;
+ kvm_pfn_t pfn;
+ bool dirty;
+};
+
#endif /* __KVM_TYPES_H__ */
diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h
index 1013e60c5b25..b0109ee6dae2 100644
--- a/include/linux/mfd/rohm-bd70528.h
+++ b/include/linux/mfd/rohm-bd70528.h
@@ -317,7 +317,7 @@ enum {
#define BD70528_MASK_RTC_MINUTE 0x7f
#define BD70528_MASK_RTC_HOUR_24H 0x80
#define BD70528_MASK_RTC_HOUR_PM 0x20
-#define BD70528_MASK_RTC_HOUR 0x1f
+#define BD70528_MASK_RTC_HOUR 0x3f
#define BD70528_MASK_RTC_DAY 0x3f
#define BD70528_MASK_RTC_WEEK 0x07
#define BD70528_MASK_RTC_MONTH 0x1f
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 0836fe232f97..0cdc8d12785a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1417,14 +1417,15 @@ struct mlx5_ifc_cmd_hca_cap_bits {

u8 reserved_at_440[0x20];

- u8 tls[0x1];
- u8 reserved_at_461[0x2];
+ u8 reserved_at_460[0x3];
u8 log_max_uctx[0x5];
u8 reserved_at_468[0x3];
u8 log_max_umem[0x5];
u8 max_num_eqs[0x10];

- u8 reserved_at_480[0x3];
+ u8 reserved_at_480[0x1];
+ u8 tls_tx[0x1];
+ u8 reserved_at_482[0x1];
u8 log_max_l2_table[0x5];
u8 reserved_at_488[0x8];
u8 log_uar_page_sz[0x10];
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 23717eeaad23..cccab7a59787 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -9,6 +9,7 @@
#ifndef PADATA_H
#define PADATA_H

+#include <linux/compiler_types.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/list.h>
@@ -98,7 +99,7 @@ struct padata_cpumask {
* struct parallel_data - Internal control structure, covers everything
* that depends on the cpumask in use.
*
- * @pinst: padata instance.
+ * @sh: padata_shell object.
* @pqueue: percpu padata queues used for parallelization.
* @squeue: percpu padata queues used for serialuzation.
* @reorder_objects: Number of objects waiting in the reorder queues.
@@ -111,7 +112,7 @@ struct padata_cpumask {
* @lock: Reorder lock.
*/
struct parallel_data {
- struct padata_instance *pinst;
+ struct padata_shell *ps;
struct padata_parallel_queue __percpu *pqueue;
struct padata_serial_queue __percpu *squeue;
atomic_t reorder_objects;
@@ -124,14 +125,33 @@ struct parallel_data {
spinlock_t lock ____cacheline_aligned;
};

+/**
+ * struct padata_shell - Wrapper around struct parallel_data, its
+ * purpose is to allow the underlying control structure to be replaced
+ * on the fly using RCU.
+ *
+ * @pinst: padat instance.
+ * @pd: Actual parallel_data structure which may be substituted on the fly.
+ * @opd: Pointer to old pd to be freed by padata_replace.
+ * @list: List entry in padata_instance list.
+ */
+struct padata_shell {
+ struct padata_instance *pinst;
+ struct parallel_data __rcu *pd;
+ struct parallel_data *opd;
+ struct list_head list;
+};
+
/**
* struct padata_instance - The overall control structure.
*
* @cpu_notifier: cpu hotplug notifier.
* @parallel_wq: The workqueue used for parallel work.
* @serial_wq: The workqueue used for serial work.
- * @pd: The internal control structure.
+ * @pslist: List of padata_shell objects attached to this instance.
* @cpumask: User supplied cpumasks for parallel and serial works.
+ * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask.
+ * @omask: Temporary storage used to compute the notification mask.
* @cpumask_change_notifier: Notifiers chain for user-defined notify
* callbacks that will be called when either @pcpu or @cbcpu
* or both cpumasks change.
@@ -143,8 +163,10 @@ struct padata_instance {
struct hlist_node node;
struct workqueue_struct *parallel_wq;
struct workqueue_struct *serial_wq;
- struct parallel_data *pd;
+ struct list_head pslist;
struct padata_cpumask cpumask;
+ struct padata_cpumask rcpumask;
+ cpumask_var_t omask;
struct blocking_notifier_head cpumask_change_notifier;
struct kobject kobj;
struct mutex lock;
@@ -156,7 +178,9 @@ struct padata_instance {

extern struct padata_instance *padata_alloc_possible(const char *name);
extern void padata_free(struct padata_instance *pinst);
-extern int padata_do_parallel(struct padata_instance *pinst,
+extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
+extern void padata_free_shell(struct padata_shell *ps);
+extern int padata_do_parallel(struct padata_shell *ps,
struct padata_priv *padata, int *cb_cpu);
extern void padata_do_serial(struct padata_priv *padata);
extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index a6fabd865211..176bfbd52d97 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -175,8 +175,7 @@
* Declaration/definition used for per-CPU variables that should be accessed
* as decrypted when memory encryption is enabled in the guest.
*/
-#if defined(CONFIG_VIRTUALIZATION) && defined(CONFIG_AMD_MEM_ENCRYPT)
-
+#ifdef CONFIG_AMD_MEM_ENCRYPT
#define DECLARE_PER_CPU_DECRYPTED(type, name) \
DECLARE_PER_CPU_SECTION(type, name, "..decrypted")

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 337a46391527..6a92fd3105a3 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -287,6 +287,8 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
const char *const *supply_names,
unsigned int num_supplies);

+bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2);
+
#else

/*
@@ -593,6 +595,11 @@ regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
{
}

+static inline bool
+regulator_is_equal(struct regulator *reg1, struct regulator *reg2)
+{
+ return false;
+}
#endif

static inline int regulator_set_voltage_triplet(struct regulator *regulator,
diff --git a/include/media/v4l2-rect.h b/include/media/v4l2-rect.h
index c86474dc7b55..8800a640c224 100644
--- a/include/media/v4l2-rect.h
+++ b/include/media/v4l2-rect.h
@@ -63,10 +63,10 @@ static inline void v4l2_rect_map_inside(struct v4l2_rect *r,
r->left = boundary->left;
if (r->top < boundary->top)
r->top = boundary->top;
- if (r->left + r->width > boundary->width)
- r->left = boundary->width - r->width;
- if (r->top + r->height > boundary->height)
- r->top = boundary->height - r->height;
+ if (r->left + r->width > boundary->left + boundary->width)
+ r->left = boundary->left + boundary->width - r->width;
+ if (r->top + r->height > boundary->top + boundary->height)
+ r->top = boundary->top + boundary->height - r->height;
}

/**
diff --git a/include/net/ipx.h b/include/net/ipx.h
index baf090390998..9d1342807b59 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -47,11 +47,6 @@ struct ipxhdr {
/* From af_ipx.c */
extern int sysctl_ipx_pprop_broadcasting;

-static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
-{
- return (struct ipxhdr *)skb_transport_header(skb);
-}
-
struct ipx_interface {
/* IPX address */
__be32 if_netnum;
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index e05b95e83d5a..fb9dce4c6928 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -8,6 +8,7 @@

#include <linux/device.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/pm_runtime.h>
#include <linux/timecounter.h>
#include <sound/core.h>
@@ -330,6 +331,7 @@ struct hdac_bus {
bool chip_init:1; /* h/w initialized */

/* behavior flags */
+ bool aligned_mmio:1; /* aligned MMIO access */
bool sync_write:1; /* sync after verb write */
bool use_posbuf:1; /* use position buffer */
bool snoop:1; /* enable snooping */
@@ -405,34 +407,61 @@ void snd_hdac_bus_free_stream_pages(struct hdac_bus *bus);
unsigned int snd_hdac_aligned_read(void __iomem *addr, unsigned int mask);
void snd_hdac_aligned_write(unsigned int val, void __iomem *addr,
unsigned int mask);
-#define snd_hdac_reg_writeb(v, addr) snd_hdac_aligned_write(v, addr, 0xff)
-#define snd_hdac_reg_writew(v, addr) snd_hdac_aligned_write(v, addr, 0xffff)
-#define snd_hdac_reg_readb(addr) snd_hdac_aligned_read(addr, 0xff)
-#define snd_hdac_reg_readw(addr) snd_hdac_aligned_read(addr, 0xffff)
-#else /* CONFIG_SND_HDA_ALIGNED_MMIO */
-#define snd_hdac_reg_writeb(val, addr) writeb(val, addr)
-#define snd_hdac_reg_writew(val, addr) writew(val, addr)
-#define snd_hdac_reg_readb(addr) readb(addr)
-#define snd_hdac_reg_readw(addr) readw(addr)
-#endif /* CONFIG_SND_HDA_ALIGNED_MMIO */
-#define snd_hdac_reg_writel(val, addr) writel(val, addr)
-#define snd_hdac_reg_readl(addr) readl(addr)
+#define snd_hdac_aligned_mmio(bus) (bus)->aligned_mmio
+#else
+#define snd_hdac_aligned_mmio(bus) false
+#define snd_hdac_aligned_read(addr, mask) 0
+#define snd_hdac_aligned_write(val, addr, mask) do {} while (0)
+#endif
+
+static inline void snd_hdac_reg_writeb(struct hdac_bus *bus, void __iomem *addr,
+ u8 val)
+{
+ if (snd_hdac_aligned_mmio(bus))
+ snd_hdac_aligned_write(val, addr, 0xff);
+ else
+ writeb(val, addr);
+}
+
+static inline void snd_hdac_reg_writew(struct hdac_bus *bus, void __iomem *addr,
+ u16 val)
+{
+ if (snd_hdac_aligned_mmio(bus))
+ snd_hdac_aligned_write(val, addr, 0xffff);
+ else
+ writew(val, addr);
+}
+
+static inline u8 snd_hdac_reg_readb(struct hdac_bus *bus, void __iomem *addr)
+{
+ return snd_hdac_aligned_mmio(bus) ?
+ snd_hdac_aligned_read(addr, 0xff) : readb(addr);
+}
+
+static inline u16 snd_hdac_reg_readw(struct hdac_bus *bus, void __iomem *addr)
+{
+ return snd_hdac_aligned_mmio(bus) ?
+ snd_hdac_aligned_read(addr, 0xffff) : readw(addr);
+}
+
+#define snd_hdac_reg_writel(bus, addr, val) writel(val, addr)
+#define snd_hdac_reg_readl(bus, addr) readl(addr)

/*
* macros for easy use
*/
#define _snd_hdac_chip_writeb(chip, reg, value) \
- snd_hdac_reg_writeb(value, (chip)->remap_addr + (reg))
+ snd_hdac_reg_writeb(chip, (chip)->remap_addr + (reg), value)
#define _snd_hdac_chip_readb(chip, reg) \
- snd_hdac_reg_readb((chip)->remap_addr + (reg))
+ snd_hdac_reg_readb(chip, (chip)->remap_addr + (reg))
#define _snd_hdac_chip_writew(chip, reg, value) \
- snd_hdac_reg_writew(value, (chip)->remap_addr + (reg))
+ snd_hdac_reg_writew(chip, (chip)->remap_addr + (reg), value)
#define _snd_hdac_chip_readw(chip, reg) \
- snd_hdac_reg_readw((chip)->remap_addr + (reg))
+ snd_hdac_reg_readw(chip, (chip)->remap_addr + (reg))
#define _snd_hdac_chip_writel(chip, reg, value) \
- snd_hdac_reg_writel(value, (chip)->remap_addr + (reg))
+ snd_hdac_reg_writel(chip, (chip)->remap_addr + (reg), value)
#define _snd_hdac_chip_readl(chip, reg) \
- snd_hdac_reg_readl((chip)->remap_addr + (reg))
+ snd_hdac_reg_readl(chip, (chip)->remap_addr + (reg))

/* read/write a register, pass without AZX_REG_ prefix */
#define snd_hdac_chip_writel(chip, reg, value) \
@@ -540,17 +569,17 @@ int snd_hdac_get_stream_stripe_ctl(struct hdac_bus *bus,
*/
/* read/write a register, pass without AZX_REG_ prefix */
#define snd_hdac_stream_writel(dev, reg, value) \
- snd_hdac_reg_writel(value, (dev)->sd_addr + AZX_REG_ ## reg)
+ snd_hdac_reg_writel((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value)
#define snd_hdac_stream_writew(dev, reg, value) \
- snd_hdac_reg_writew(value, (dev)->sd_addr + AZX_REG_ ## reg)
+ snd_hdac_reg_writew((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value)
#define snd_hdac_stream_writeb(dev, reg, value) \
- snd_hdac_reg_writeb(value, (dev)->sd_addr + AZX_REG_ ## reg)
+ snd_hdac_reg_writeb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value)
#define snd_hdac_stream_readl(dev, reg) \
- snd_hdac_reg_readl((dev)->sd_addr + AZX_REG_ ## reg)
+ snd_hdac_reg_readl((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg)
#define snd_hdac_stream_readw(dev, reg) \
- snd_hdac_reg_readw((dev)->sd_addr + AZX_REG_ ## reg)
+ snd_hdac_reg_readw((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg)
#define snd_hdac_stream_readb(dev, reg) \
- snd_hdac_reg_readb((dev)->sd_addr + AZX_REG_ ## reg)
+ snd_hdac_reg_readb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg)

/* update a register, pass without AZX_REG_ prefix */
#define snd_hdac_stream_updatel(dev, reg, mask, val) \
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index c2ce6480b4b1..66282552db20 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -67,8 +67,8 @@ DECLARE_EVENT_CLASS(writeback_page_template,

TP_fast_assign(
strscpy_pad(__entry->name,
- mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)",
- 32);
+ bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
+ NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
__entry->index = page->index;
),
@@ -111,8 +111,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
struct backing_dev_info *bdi = inode_to_bdi(inode);

/* may be called for files on pseudo FSes w/ unregistered bdi */
- strscpy_pad(__entry->name,
- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->flags = flags;
@@ -193,7 +192,7 @@ TRACE_EVENT(inode_foreign_history,
),

TP_fast_assign(
- strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
__entry->history = history;
@@ -222,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs,
),

TP_fast_assign(
- strncpy(__entry->name, dev_name(old_wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32);
__entry->ino = inode->i_ino;
__entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
__entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
@@ -255,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty,
struct address_space *mapping = page_mapping(page);
struct inode *inode = mapping ? mapping->host : NULL;

- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->bdi_id = wb->bdi->id;
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
@@ -288,7 +287,7 @@ TRACE_EVENT(flush_foreign,
),

TP_fast_assign(
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
__entry->frn_bdi_id = frn_bdi_id;
__entry->frn_memcg_id = frn_memcg_id;
@@ -318,7 +317,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,

TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
@@ -361,9 +360,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__field(unsigned int, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name,
- wb->bdi->dev ? dev_name(wb->bdi->dev) :
- "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
@@ -416,7 +413,7 @@ DECLARE_EVENT_CLASS(writeback_class,
__field(unsigned int, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: cgroup_ino=%u",
@@ -438,7 +435,7 @@ TRACE_EVENT(writeback_bdi_register,
__array(char, name, 32)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
),
TP_printk("bdi %s",
__entry->name
@@ -463,7 +460,7 @@ DECLARE_EVENT_CLASS(wbc_class,
),

TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->sync_mode = wbc->sync_mode;
@@ -514,7 +511,7 @@ TRACE_EVENT(writeback_queue_io,
),
TP_fast_assign(
unsigned long *older_than_this = work->older_than_this;
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->older = older_than_this ? *older_than_this : 0;
__entry->age = older_than_this ?
(jiffies - *older_than_this) * 1000 / HZ : -1;
@@ -600,7 +597,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
),

TP_fast_assign(
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->write_bw = KBps(wb->write_bandwidth);
__entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
@@ -665,7 +662,7 @@ TRACE_EVENT(balance_dirty_pages,

TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);

__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
@@ -726,7 +723,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,

TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
@@ -800,7 +797,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,

TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
diff --git a/ipc/msg.c b/ipc/msg.c
index 8dec945fa030..767587ab45a3 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -377,7 +377,7 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
* NOTE: no locks must be held, the rwsem is taken inside this function.
*/
static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
- struct msqid64_ds *msqid64)
+ struct ipc64_perm *perm, int msg_qbytes)
{
struct kern_ipc_perm *ipcp;
struct msg_queue *msq;
@@ -387,7 +387,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
rcu_read_lock();

ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd,
- &msqid64->msg_perm, msqid64->msg_qbytes);
+ perm, msg_qbytes);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_unlock1;
@@ -409,18 +409,18 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
{
DEFINE_WAKE_Q(wake_q);

- if (msqid64->msg_qbytes > ns->msg_ctlmnb &&
+ if (msg_qbytes > ns->msg_ctlmnb &&
!capable(CAP_SYS_RESOURCE)) {
err = -EPERM;
goto out_unlock1;
}

ipc_lock_object(&msq->q_perm);
- err = ipc_update_perm(&msqid64->msg_perm, ipcp);
+ err = ipc_update_perm(perm, ipcp);
if (err)
goto out_unlock0;

- msq->q_qbytes = msqid64->msg_qbytes;
+ msq->q_qbytes = msg_qbytes;

msq->q_ctime = ktime_get_real_seconds();
/*
@@ -601,9 +601,10 @@ static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int ver
case IPC_SET:
if (copy_msqid_from_user(&msqid64, buf, version))
return -EFAULT;
- /* fallthru */
+ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm,
+ msqid64.msg_qbytes);
case IPC_RMID:
- return msgctl_down(ns, msqid, cmd, &msqid64);
+ return msgctl_down(ns, msqid, cmd, NULL, 0);
default:
return -EINVAL;
}
@@ -735,9 +736,9 @@ static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int versio
case IPC_SET:
if (copy_compat_msqid_from_user(&msqid64, uptr, version))
return -EFAULT;
- /* fallthru */
+ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes);
case IPC_RMID:
- return msgctl_down(ns, msqid, cmd, &msqid64);
+ return msgctl_down(ns, msqid, cmd, NULL, 0);
default:
return -EINVAL;
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3d3d61b5985b..b4b6b77f309c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -293,7 +293,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
struct hlist_head *head = dev_map_index_hash(dtab, key);
struct bpf_dtab_netdev *dev;

- hlist_for_each_entry_rcu(dev, head, index_hlist)
+ hlist_for_each_entry_rcu(dev, head, index_hlist,
+ lockdep_is_held(&dtab->index_lock))
if (dev->idx == key)
return dev;

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6c829e22bad3..15b123bdcaf5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5823,7 +5823,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
*/
user_lock_limit *= num_online_cpus();

- user_locked = atomic_long_read(&user->locked_vm) + user_extra;
+ user_locked = atomic_long_read(&user->locked_vm);
+
+ /*
+ * sysctl_perf_event_mlock may have changed, so that
+ * user->locked_vm > user_lock_limit
+ */
+ if (user_locked > user_lock_limit)
+ user_locked = user_lock_limit;
+ user_locked += user_extra;

if (user_locked <= user_lock_limit) {
/* charge all to locked_vm */
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index c1eccd4f6520..a949bd39e343 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -114,6 +114,7 @@ static const struct irq_bit_descr irqdata_states[] = {
BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
BIT_MASK_DESCR(IRQD_CAN_RESERVE),
+ BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK),

BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index dd822fd8a7d5..480df3659720 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1459,6 +1459,7 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg)
if (rv) {
/* Restore the original irq_data. */
*root_irq_data = *child_irq_data;
+ kfree(child_irq_data);
goto error;
}

diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index ad26fbcfbfc8..eb95f6106a1e 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -453,8 +453,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
continue;

irq_data = irq_domain_get_irq_data(domain, desc->irq);
- if (!can_reserve)
+ if (!can_reserve) {
irqd_clr_can_reserve(irq_data);
+ if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
+ irqd_set_msi_nomask_quirk(irq_data);
+ }
ret = irq_domain_activate_irq(irq_data, can_reserve);
if (ret)
goto cleanup;
diff --git a/kernel/padata.c b/kernel/padata.c
index c3fec1413295..9c82ee4a9732 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -35,6 +35,8 @@

#define MAX_OBJ_NUM 1000

+static void padata_free_pd(struct parallel_data *pd);
+
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
int cpu, target_cpu;
@@ -87,7 +89,7 @@ static void padata_parallel_worker(struct work_struct *parallel_work)
/**
* padata_do_parallel - padata parallelization function
*
- * @pinst: padata instance
+ * @ps: padatashell
* @padata: object to be parallelized
* @cb_cpu: pointer to the CPU that the serialization callback function should
* run on. If it's not in the serial cpumask of @pinst
@@ -98,16 +100,17 @@ static void padata_parallel_worker(struct work_struct *parallel_work)
* Note: Every object which is parallelized by padata_do_parallel
* must be seen by padata_do_serial.
*/
-int padata_do_parallel(struct padata_instance *pinst,
+int padata_do_parallel(struct padata_shell *ps,
struct padata_priv *padata, int *cb_cpu)
{
+ struct padata_instance *pinst = ps->pinst;
int i, cpu, cpu_index, target_cpu, err;
struct padata_parallel_queue *queue;
struct parallel_data *pd;

rcu_read_lock_bh();

- pd = rcu_dereference_bh(pinst->pd);
+ pd = rcu_dereference_bh(ps->pd);

err = -EINVAL;
if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
@@ -210,10 +213,10 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd,

static void padata_reorder(struct parallel_data *pd)
{
+ struct padata_instance *pinst = pd->ps->pinst;
int cb_cpu;
struct padata_priv *padata;
struct padata_serial_queue *squeue;
- struct padata_instance *pinst = pd->pinst;
struct padata_parallel_queue *next_queue;

/*
@@ -283,6 +286,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
struct padata_serial_queue *squeue;
struct parallel_data *pd;
LIST_HEAD(local_list);
+ int cnt;

local_bh_disable();
squeue = container_of(serial_work, struct padata_serial_queue, work);
@@ -292,6 +296,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_replace_init(&squeue->serial.list, &local_list);
spin_unlock(&squeue->serial.lock);

+ cnt = 0;
+
while (!list_empty(&local_list)) {
struct padata_priv *padata;

@@ -301,9 +307,12 @@ static void padata_serial_worker(struct work_struct *serial_work)
list_del_init(&padata->list);

padata->serial(padata);
- atomic_dec(&pd->refcnt);
+ cnt++;
}
local_bh_enable();
+
+ if (atomic_sub_and_test(cnt, &pd->refcnt))
+ padata_free_pd(pd);
}

/**
@@ -341,36 +350,39 @@ void padata_do_serial(struct padata_priv *padata)
}
EXPORT_SYMBOL(padata_do_serial);

-static int padata_setup_cpumasks(struct parallel_data *pd,
- const struct cpumask *pcpumask,
- const struct cpumask *cbcpumask)
+static int padata_setup_cpumasks(struct padata_instance *pinst)
{
struct workqueue_attrs *attrs;
+ int err;
+
+ attrs = alloc_workqueue_attrs();
+ if (!attrs)
+ return -ENOMEM;
+
+ /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
+ cpumask_copy(attrs->cpumask, pinst->cpumask.pcpu);
+ err = apply_workqueue_attrs(pinst->parallel_wq, attrs);
+ free_workqueue_attrs(attrs);
+
+ return err;
+}
+
+static int pd_setup_cpumasks(struct parallel_data *pd,
+ const struct cpumask *pcpumask,
+ const struct cpumask *cbcpumask)
+{
int err = -ENOMEM;

if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
goto out;
- cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
-
if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
goto free_pcpu_mask;
- cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
-
- attrs = alloc_workqueue_attrs();
- if (!attrs)
- goto free_cbcpu_mask;

- /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
- cpumask_copy(attrs->cpumask, pd->cpumask.pcpu);
- err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs);
- free_workqueue_attrs(attrs);
- if (err < 0)
- goto free_cbcpu_mask;
+ cpumask_copy(pd->cpumask.pcpu, pcpumask);
+ cpumask_copy(pd->cpumask.cbcpu, cbcpumask);

return 0;

-free_cbcpu_mask:
- free_cpumask_var(pd->cpumask.cbcpu);
free_pcpu_mask:
free_cpumask_var(pd->cpumask.pcpu);
out:
@@ -414,12 +426,16 @@ static void padata_init_pqueues(struct parallel_data *pd)
}

/* Allocate and initialize the internal cpumask dependend resources. */
-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
- const struct cpumask *pcpumask,
- const struct cpumask *cbcpumask)
+static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
{
+ struct padata_instance *pinst = ps->pinst;
+ const struct cpumask *cbcpumask;
+ const struct cpumask *pcpumask;
struct parallel_data *pd;

+ cbcpumask = pinst->rcpumask.cbcpu;
+ pcpumask = pinst->rcpumask.pcpu;
+
pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
if (!pd)
goto err;
@@ -432,15 +448,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
if (!pd->squeue)
goto err_free_pqueue;

- pd->pinst = pinst;
- if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
+ pd->ps = ps;
+ if (pd_setup_cpumasks(pd, pcpumask, cbcpumask))
goto err_free_squeue;

padata_init_pqueues(pd);
padata_init_squeues(pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
- atomic_set(&pd->refcnt, 0);
+ atomic_set(&pd->refcnt, 1);
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -466,29 +482,6 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}

-/* Flush all objects out of the padata queues. */
-static void padata_flush_queues(struct parallel_data *pd)
-{
- int cpu;
- struct padata_parallel_queue *pqueue;
- struct padata_serial_queue *squeue;
-
- for_each_cpu(cpu, pd->cpumask.pcpu) {
- pqueue = per_cpu_ptr(pd->pqueue, cpu);
- flush_work(&pqueue->work);
- }
-
- if (atomic_read(&pd->reorder_objects))
- padata_reorder(pd);
-
- for_each_cpu(cpu, pd->cpumask.cbcpu) {
- squeue = per_cpu_ptr(pd->squeue, cpu);
- flush_work(&squeue->work);
- }
-
- BUG_ON(atomic_read(&pd->refcnt) != 0);
-}
-
static void __padata_start(struct padata_instance *pinst)
{
pinst->flags |= PADATA_INIT;
@@ -502,39 +495,67 @@ static void __padata_stop(struct padata_instance *pinst)
pinst->flags &= ~PADATA_INIT;

synchronize_rcu();
-
- get_online_cpus();
- padata_flush_queues(pinst->pd);
- put_online_cpus();
}

/* Replace the internal control structure with a new one. */
-static void padata_replace(struct padata_instance *pinst,
- struct parallel_data *pd_new)
+static int padata_replace_one(struct padata_shell *ps)
{
- struct parallel_data *pd_old = pinst->pd;
- int notification_mask = 0;
+ struct parallel_data *pd_new;

- pinst->flags |= PADATA_RESET;
+ pd_new = padata_alloc_pd(ps);
+ if (!pd_new)
+ return -ENOMEM;

- rcu_assign_pointer(pinst->pd, pd_new);
+ ps->opd = rcu_dereference_protected(ps->pd, 1);
+ rcu_assign_pointer(ps->pd, pd_new);

- synchronize_rcu();
+ return 0;
+}
+
+static int padata_replace(struct padata_instance *pinst, int cpu)
+{
+ int notification_mask = 0;
+ struct padata_shell *ps;
+ int err;
+
+ pinst->flags |= PADATA_RESET;

- if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
+ cpumask_copy(pinst->omask, pinst->rcpumask.pcpu);
+ cpumask_and(pinst->rcpumask.pcpu, pinst->cpumask.pcpu,
+ cpu_online_mask);
+ if (cpu >= 0)
+ cpumask_clear_cpu(cpu, pinst->rcpumask.pcpu);
+ if (!cpumask_equal(pinst->omask, pinst->rcpumask.pcpu))
notification_mask |= PADATA_CPU_PARALLEL;
- if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
+
+ cpumask_copy(pinst->omask, pinst->rcpumask.cbcpu);
+ cpumask_and(pinst->rcpumask.cbcpu, pinst->cpumask.cbcpu,
+ cpu_online_mask);
+ if (cpu >= 0)
+ cpumask_clear_cpu(cpu, pinst->rcpumask.cbcpu);
+ if (!cpumask_equal(pinst->omask, pinst->rcpumask.cbcpu))
notification_mask |= PADATA_CPU_SERIAL;

- padata_flush_queues(pd_old);
- padata_free_pd(pd_old);
+ list_for_each_entry(ps, &pinst->pslist, list) {
+ err = padata_replace_one(ps);
+ if (err)
+ break;
+ }
+
+ synchronize_rcu();
+
+ list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
+ if (atomic_dec_and_test(&ps->opd->refcnt))
+ padata_free_pd(ps->opd);

if (notification_mask)
blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
notification_mask,
- &pd_new->cpumask);
+ &pinst->cpumask);

pinst->flags &= ~PADATA_RESET;
+
+ return err;
}

/**
@@ -587,7 +608,7 @@ static int __padata_set_cpumasks(struct padata_instance *pinst,
cpumask_var_t cbcpumask)
{
int valid;
- struct parallel_data *pd;
+ int err;

valid = padata_validate_cpumask(pinst, pcpumask);
if (!valid) {
@@ -600,19 +621,15 @@ static int __padata_set_cpumasks(struct padata_instance *pinst,
__padata_stop(pinst);

out_replace:
- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
- if (!pd)
- return -ENOMEM;
-
cpumask_copy(pinst->cpumask.pcpu, pcpumask);
cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);

- padata_replace(pinst, pd);
+ err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst, -1);

if (valid)
__padata_start(pinst);

- return 0;
+ return err;
}

/**
@@ -695,46 +712,32 @@ EXPORT_SYMBOL(padata_stop);

static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
{
- struct parallel_data *pd;
+ int err = 0;

if (cpumask_test_cpu(cpu, cpu_online_mask)) {
- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
- pinst->cpumask.cbcpu);
- if (!pd)
- return -ENOMEM;
-
- padata_replace(pinst, pd);
+ err = padata_replace(pinst, -1);

if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
__padata_start(pinst);
}

- return 0;
+ return err;
}

static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
{
- struct parallel_data *pd = NULL;
+ int err = 0;

if (cpumask_test_cpu(cpu, cpu_online_mask)) {
-
if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
!padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
__padata_stop(pinst);

- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
- pinst->cpumask.cbcpu);
- if (!pd)
- return -ENOMEM;
-
- padata_replace(pinst, pd);
-
- cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
- cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
+ err = padata_replace(pinst, cpu);
}

- return 0;
+ return err;
}

/**
@@ -817,8 +820,12 @@ static void __padata_free(struct padata_instance *pinst)
cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
#endif

+ WARN_ON(!list_empty(&pinst->pslist));
+
padata_stop(pinst);
- padata_free_pd(pinst->pd);
+ free_cpumask_var(pinst->omask);
+ free_cpumask_var(pinst->rcpumask.cbcpu);
+ free_cpumask_var(pinst->rcpumask.pcpu);
free_cpumask_var(pinst->cpumask.pcpu);
free_cpumask_var(pinst->cpumask.cbcpu);
destroy_workqueue(pinst->serial_wq);
@@ -965,7 +972,6 @@ static struct padata_instance *padata_alloc(const char *name,
const struct cpumask *cbcpumask)
{
struct padata_instance *pinst;
- struct parallel_data *pd = NULL;

pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
if (!pinst)
@@ -993,14 +999,22 @@ static struct padata_instance *padata_alloc(const char *name,
!padata_validate_cpumask(pinst, cbcpumask))
goto err_free_masks;

- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
- if (!pd)
+ if (!alloc_cpumask_var(&pinst->rcpumask.pcpu, GFP_KERNEL))
goto err_free_masks;
+ if (!alloc_cpumask_var(&pinst->rcpumask.cbcpu, GFP_KERNEL))
+ goto err_free_rcpumask_pcpu;
+ if (!alloc_cpumask_var(&pinst->omask, GFP_KERNEL))
+ goto err_free_rcpumask_cbcpu;

- rcu_assign_pointer(pinst->pd, pd);
+ INIT_LIST_HEAD(&pinst->pslist);

cpumask_copy(pinst->cpumask.pcpu, pcpumask);
cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
+ cpumask_and(pinst->rcpumask.pcpu, pcpumask, cpu_online_mask);
+ cpumask_and(pinst->rcpumask.cbcpu, cbcpumask, cpu_online_mask);
+
+ if (padata_setup_cpumasks(pinst))
+ goto err_free_omask;

pinst->flags = 0;

@@ -1016,6 +1030,12 @@ static struct padata_instance *padata_alloc(const char *name,

return pinst;

+err_free_omask:
+ free_cpumask_var(pinst->omask);
+err_free_rcpumask_cbcpu:
+ free_cpumask_var(pinst->rcpumask.cbcpu);
+err_free_rcpumask_pcpu:
+ free_cpumask_var(pinst->rcpumask.pcpu);
err_free_masks:
free_cpumask_var(pinst->cpumask.pcpu);
free_cpumask_var(pinst->cpumask.cbcpu);
@@ -1054,6 +1074,61 @@ void padata_free(struct padata_instance *pinst)
}
EXPORT_SYMBOL(padata_free);

+/**
+ * padata_alloc_shell - Allocate and initialize padata shell.
+ *
+ * @pinst: Parent padata_instance object.
+ */
+struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
+{
+ struct parallel_data *pd;
+ struct padata_shell *ps;
+
+ ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+ if (!ps)
+ goto out;
+
+ ps->pinst = pinst;
+
+ get_online_cpus();
+ pd = padata_alloc_pd(ps);
+ put_online_cpus();
+
+ if (!pd)
+ goto out_free_ps;
+
+ mutex_lock(&pinst->lock);
+ RCU_INIT_POINTER(ps->pd, pd);
+ list_add(&ps->list, &pinst->pslist);
+ mutex_unlock(&pinst->lock);
+
+ return ps;
+
+out_free_ps:
+ kfree(ps);
+out:
+ return NULL;
+}
+EXPORT_SYMBOL(padata_alloc_shell);
+
+/**
+ * padata_free_shell - free a padata shell
+ *
+ * @ps: padata shell to free
+ */
+void padata_free_shell(struct padata_shell *ps)
+{
+ struct padata_instance *pinst = ps->pinst;
+
+ mutex_lock(&pinst->lock);
+ list_del(&ps->list);
+ padata_free_pd(rcu_dereference_protected(ps->pd, 1));
+ mutex_unlock(&pinst->lock);
+
+ kfree(ps);
+}
+EXPORT_SYMBOL(padata_free_shell);
+
#ifdef CONFIG_HOTPLUG_CPU

static __init int padata_driver_init(void)
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 5dffade2d7cd..21acdff3bd27 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -530,7 +530,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
idx = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
cbdelay = srcu_get_delay(ssp);
- ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+ WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
@@ -762,6 +762,7 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp)
unsigned long flags;
struct srcu_data *sdp;
unsigned long t;
+ unsigned long tlast;

/* If the local srcu_data structure has callbacks, not idle. */
local_irq_save(flags);
@@ -780,9 +781,9 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp)

/* First, see if enough time has passed since the last GP. */
t = ktime_get_mono_fast_ns();
+ tlast = READ_ONCE(ssp->srcu_last_gp_end);
if (exp_holdoff == 0 ||
- time_in_range_open(t, ssp->srcu_last_gp_end,
- ssp->srcu_last_gp_end + exp_holdoff))
+ time_in_range_open(t, tlast, tlast + exp_holdoff))
return false; /* Too soon after last GP. */

/* Next, check for probable idleness. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index d632cd019597..69c5aa64fcfd 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -134,7 +134,7 @@ static void __maybe_unused sync_exp_reset_tree(void)
rcu_for_each_node_breadth_first(rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WARN_ON_ONCE(rnp->expmask);
- rnp->expmask = rnp->expmaskinit;
+ WRITE_ONCE(rnp->expmask, rnp->expmaskinit);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
}
@@ -211,7 +211,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
rnp = rnp->parent;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
WARN_ON_ONCE(!(rnp->expmask & mask));
- rnp->expmask &= ~mask;
+ WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
}
}

@@ -241,7 +241,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
- rnp->expmask &= ~mask;
+ WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
__rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */
}

@@ -372,12 +372,10 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);

/* IPI the remaining CPUs for expedited quiescent state. */
- for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+ for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);

- if (!(mask_ofl_ipi & mask))
- continue;
retry_ipi:
if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) {
mask_ofl_test |= mask;
@@ -491,7 +489,7 @@ static void synchronize_sched_expedited_wait(void)
struct rcu_data *rdp;

mask = leaf_node_cpu_bit(rnp, cpu);
- if (!(rnp->expmask & mask))
+ if (!(READ_ONCE(rnp->expmask) & mask))
continue;
ndetected++;
rdp = per_cpu_ptr(&rcu_data, cpu);
@@ -503,7 +501,8 @@ static void synchronize_sched_expedited_wait(void)
}
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
jiffies - jiffies_start, rcu_state.expedited_sequence,
- rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
+ READ_ONCE(rnp_root->expmask),
+ ".T"[!!rnp_root->exp_tasks]);
if (ndetected) {
pr_err("blocking rcu_node structures:");
rcu_for_each_node_breadth_first(rnp) {
@@ -513,7 +512,7 @@ static void synchronize_sched_expedited_wait(void)
continue;
pr_cont(" l=%u:%d-%d:%#lx/%c",
rnp->level, rnp->grplo, rnp->grphi,
- rnp->expmask,
+ READ_ONCE(rnp->expmask),
".T"[!!rnp->exp_tasks]);
}
pr_cont("\n");
@@ -521,7 +520,7 @@ static void synchronize_sched_expedited_wait(void)
rcu_for_each_leaf_node(rnp) {
for_each_leaf_node_possible_cpu(rnp, cpu) {
mask = leaf_node_cpu_bit(rnp, cpu);
- if (!(rnp->expmask & mask))
+ if (!(READ_ONCE(rnp->expmask) & mask))
continue;
dump_cpu_task(cpu);
}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index fa08d55f7040..f849e7429816 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -220,7 +220,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* blocked tasks.
*/
if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
- rnp->gp_tasks = &t->rcu_node_entry;
+ WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
}
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
@@ -340,7 +340,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
*/
static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
{
- return rnp->gp_tasks != NULL;
+ return READ_ONCE(rnp->gp_tasks) != NULL;
}

/* Bias and limit values for ->rcu_read_lock_nesting. */
@@ -493,7 +493,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
rnp->gp_seq, t->pid);
if (&t->rcu_node_entry == rnp->gp_tasks)
- rnp->gp_tasks = np;
+ WRITE_ONCE(rnp->gp_tasks, np);
if (&t->rcu_node_entry == rnp->exp_tasks)
rnp->exp_tasks = np;
if (IS_ENABLED(CONFIG_RCU_BOOST)) {
@@ -612,7 +612,7 @@ static void rcu_read_unlock_special(struct task_struct *t)

t->rcu_read_unlock_special.b.exp_hint = false;
exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
- (rdp->grpmask & rnp->expmask) ||
+ (rdp->grpmask & READ_ONCE(rnp->expmask)) ||
tick_nohz_full_cpu(rdp->cpu);
// Need to defer quiescent state until everything is enabled.
if (irqs_were_disabled && use_softirq &&
@@ -663,7 +663,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
dump_blkd_tasks(rnp, 10);
if (rcu_preempt_has_tasks(rnp) &&
(rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
- rnp->gp_tasks = rnp->blkd_tasks.next;
+ WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
t = container_of(rnp->gp_tasks, struct task_struct,
rcu_node_entry);
trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
@@ -757,7 +757,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
- __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks);
+ __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
+ rnp->exp_tasks);
pr_info("%s: ->blkd_tasks", __func__);
i = 0;
list_for_each(lhp, &rnp->blkd_tasks) {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 451f9d05ccfe..4b11f0309eee 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -88,6 +88,7 @@ static int alarmtimer_rtc_add_device(struct device *dev,
unsigned long flags;
struct rtc_device *rtc = to_rtc_device(dev);
struct wakeup_source *__ws;
+ int ret = 0;

if (rtcdev)
return -EBUSY;
@@ -102,8 +103,8 @@ static int alarmtimer_rtc_add_device(struct device *dev,
spin_lock_irqsave(&rtcdev_lock, flags);
if (!rtcdev) {
if (!try_module_get(rtc->owner)) {
- spin_unlock_irqrestore(&rtcdev_lock, flags);
- return -1;
+ ret = -1;
+ goto unlock;
}

rtcdev = rtc;
@@ -112,11 +113,12 @@ static int alarmtimer_rtc_add_device(struct device *dev,
ws = __ws;
__ws = NULL;
}
+unlock:
spin_unlock_irqrestore(&rtcdev_lock, flags);

wakeup_source_unregister(__ws);

- return 0;
+ return ret;
}

static inline void alarmtimer_rtc_timer_init(void)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index fff5f64981c6..428beb69426a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused)
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
- watchdog_timer.expires += WATCHDOG_INTERVAL;
- add_timer_on(&watchdog_timer, next_cpu);
+
+ /*
+ * Arm timer if not already pending: could race with concurrent
+ * pair clocksource_stop_watchdog() clocksource_start_watchdog().
+ */
+ if (!timer_pending(&watchdog_timer)) {
+ watchdog_timer.expires += WATCHDOG_INTERVAL;
+ add_timer_on(&watchdog_timer, next_cpu);
+ }
out:
spin_unlock(&watchdog_lock);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0708a41cfe2d..407d8bf4ed93 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5102,8 +5102,8 @@ static const struct file_operations ftrace_notrace_fops = {

static DEFINE_MUTEX(graph_lock);

-struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH;
-struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH;
+struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH;
+struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH;

enum graph_filter_type {
GRAPH_FILTER_NOTRACE = 0,
@@ -5378,8 +5378,15 @@ ftrace_graph_release(struct inode *inode, struct file *file)

mutex_unlock(&graph_lock);

- /* Wait till all users are no longer using the old hash */
- synchronize_rcu();
+ /*
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ schedule_on_each_cpu(ftrace_sync);

free_ftrace_hash(old_hash);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d685c61085c0..a3c29d5fcc61 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -932,22 +932,31 @@ extern void __trace_graph_return(struct trace_array *tr,
unsigned long flags, int pc);

#ifdef CONFIG_DYNAMIC_FTRACE
-extern struct ftrace_hash *ftrace_graph_hash;
-extern struct ftrace_hash *ftrace_graph_notrace_hash;
+extern struct ftrace_hash __rcu *ftrace_graph_hash;
+extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash;

static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
{
unsigned long addr = trace->func;
int ret = 0;
+ struct ftrace_hash *hash;

preempt_disable_notrace();

- if (ftrace_hash_empty(ftrace_graph_hash)) {
+ /*
+ * Have to open code "rcu_dereference_sched()" because the
+ * function graph tracer can be called when RCU is not
+ * "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
+ */
+ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible());
+
+ if (ftrace_hash_empty(hash)) {
ret = 1;
goto out;
}

- if (ftrace_lookup_ip(ftrace_graph_hash, addr)) {
+ if (ftrace_lookup_ip(hash, addr)) {

/*
* This needs to be cleared on the return functions
@@ -983,10 +992,20 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
static inline int ftrace_graph_notrace_addr(unsigned long addr)
{
int ret = 0;
+ struct ftrace_hash *notrace_hash;

preempt_disable_notrace();

- if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr))
+ /*
+ * Have to open code "rcu_dereference_sched()" because the
+ * function graph tracer can be called when RCU is not
+ * "watching".
+ * Protected with schedule_on_each_cpu(ftrace_sync)
+ */
+ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+ !preemptible());
+
+ if (ftrace_lookup_ip(notrace_hash, addr))
ret = 1;

preempt_enable_notrace();
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 205692181e7b..4be7fc84d6b6 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -470,11 +470,12 @@ struct action_data {
* When a histogram trigger is hit, the values of any
* references to variables, including variables being passed
* as parameters to synthetic events, are collected into a
- * var_ref_vals array. This var_ref_idx is the index of the
- * first param in the array to be passed to the synthetic
- * event invocation.
+ * var_ref_vals array. This var_ref_idx array is an array of
+ * indices into the var_ref_vals array, one for each synthetic
+ * event param, and is passed to the synthetic event
+ * invocation.
*/
- unsigned int var_ref_idx;
+ unsigned int var_ref_idx[TRACING_MAP_VARS_MAX];
struct synth_event *synth_event;
bool use_trace_keyword;
char *synth_event_name;
@@ -875,14 +876,14 @@ static struct trace_event_functions synth_event_funcs = {

static notrace void trace_event_raw_event_synth(void *__data,
u64 *var_ref_vals,
- unsigned int var_ref_idx)
+ unsigned int *var_ref_idx)
{
struct trace_event_file *trace_file = __data;
struct synth_trace_event *entry;
struct trace_event_buffer fbuffer;
struct ring_buffer *buffer;
struct synth_event *event;
- unsigned int i, n_u64;
+ unsigned int i, n_u64, val_idx;
int fields_size = 0;

event = trace_file->event_call->data;
@@ -905,15 +906,16 @@ static notrace void trace_event_raw_event_synth(void *__data,
goto out;

for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+ val_idx = var_ref_idx[i];
if (event->fields[i]->is_string) {
- char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i];
+ char *str_val = (char *)(long)var_ref_vals[val_idx];
char *str_field = (char *)&entry->fields[n_u64];

strscpy(str_field, str_val, STR_VAR_LEN_MAX);
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
struct synth_field *field = event->fields[i];
- u64 val = var_ref_vals[var_ref_idx + i];
+ u64 val = var_ref_vals[val_idx];

switch (field->size) {
case 1:
@@ -1113,10 +1115,10 @@ static struct tracepoint *alloc_synth_tracepoint(char *name)
}

typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
- unsigned int var_ref_idx);
+ unsigned int *var_ref_idx);

static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
- unsigned int var_ref_idx)
+ unsigned int *var_ref_idx)
{
struct tracepoint *tp = event->tp;

@@ -2655,6 +2657,22 @@ static int init_var_ref(struct hist_field *ref_field,
goto out;
}

+static int find_var_ref_idx(struct hist_trigger_data *hist_data,
+ struct hist_field *var_field)
+{
+ struct hist_field *ref_field;
+ int i;
+
+ for (i = 0; i < hist_data->n_var_refs; i++) {
+ ref_field = hist_data->var_refs[i];
+ if (ref_field->var.idx == var_field->var.idx &&
+ ref_field->var.hist_data == var_field->hist_data)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
/**
* create_var_ref - Create a variable reference and attach it to trigger
* @hist_data: The trigger that will be referencing the variable
@@ -4228,11 +4246,11 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
struct trace_array *tr = hist_data->event_file->tr;
char *event_name, *param, *system = NULL;
struct hist_field *hist_field, *var_ref;
- unsigned int i, var_ref_idx;
+ unsigned int i;
unsigned int field_pos = 0;
struct synth_event *event;
char *synth_event_name;
- int ret = 0;
+ int var_ref_idx, ret = 0;

lockdep_assert_held(&event_mutex);

@@ -4249,8 +4267,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data,

event->ref++;

- var_ref_idx = hist_data->n_var_refs;
-
for (i = 0; i < data->n_params; i++) {
char *p;

@@ -4299,6 +4315,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
goto err;
}

+ var_ref_idx = find_var_ref_idx(hist_data, var_ref);
+ if (WARN_ON(var_ref_idx < 0)) {
+ ret = var_ref_idx;
+ goto err;
+ }
+
+ data->var_ref_idx[i] = var_ref_idx;
+
field_pos++;
kfree(p);
continue;
@@ -4317,7 +4341,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
}

data->synth_event = event;
- data->var_ref_idx = var_ref_idx;
out:
return ret;
err:
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 9ae87be422f2..ab8b6436d53f 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -876,7 +876,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
for (i = 0; i < tp->nr_args; i++) {
parg = tp->args + i;
if (parg->count) {
- if (strcmp(parg->type->name, "string") == 0)
+ if ((strcmp(parg->type->name, "string") == 0) ||
+ (strcmp(parg->type->name, "ustring") == 0))
fmt = ", __get_str(%s[%d])";
else
fmt = ", REC->%s[%d]";
@@ -884,7 +885,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
pos += snprintf(buf + pos, LEN_OR_ZERO,
fmt, parg->name, j);
} else {
- if (strcmp(parg->type->name, "string") == 0)
+ if ((strcmp(parg->type->name, "string") == 0) ||
+ (strcmp(parg->type->name, "ustring") == 0))
fmt = ", __get_str(%s)";
else
fmt = ", REC->%s";
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index e288168661e1..e304196d7c28 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -89,8 +89,10 @@ static void tracing_sched_unregister(void)

static void tracing_start_sched_switch(int ops)
{
- bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
+ bool sched_register;
+
mutex_lock(&sched_register_mutex);
+ sched_register = (!sched_cmdline_ref && !sched_tgid_ref);

switch (ops) {
case RECORD_CMDLINE:
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 49cc4d570a40..bd3d9ef7d39e 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -157,6 +157,7 @@ static noinline void __init kmalloc_oob_krealloc_more(void)
if (!ptr1 || !ptr2) {
pr_err("Allocation failed\n");
kfree(ptr1);
+ kfree(ptr2);
return;
}

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c360f6a6c844..62f05f605fb5 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
EXPORT_SYMBOL_GPL(noop_backing_dev_info);

static struct class *bdi_class;
+const char *bdi_unknown_name = "(unknown)";

/*
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ef4e9eb572a4..b5b4e310fe70 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5465,14 +5465,6 @@ static int mem_cgroup_move_account(struct page *page,
__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
}

-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && !list_empty(page_deferred_list(page))) {
- spin_lock(&from->deferred_split_queue.split_queue_lock);
- list_del_init(page_deferred_list(page));
- from->deferred_split_queue.split_queue_len--;
- spin_unlock(&from->deferred_split_queue.split_queue_lock);
- }
-#endif
/*
* It is safe to change page->mem_cgroup here because the page
* is referenced, charged, and isolated - we can't race with
@@ -5482,16 +5474,6 @@ static int mem_cgroup_move_account(struct page *page,
/* caller should have done css_get */
page->mem_cgroup = to;

-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && list_empty(page_deferred_list(page))) {
- spin_lock(&to->deferred_split_queue.split_queue_lock);
- list_add_tail(page_deferred_list(page),
- &to->deferred_split_queue.split_queue);
- to->deferred_split_queue.split_queue_len++;
- spin_unlock(&to->deferred_split_queue.split_queue_lock);
- }
-#endif
-
spin_unlock_irqrestore(&from->move_lock, flags);

ret = 0;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index fab540685279..0aa154be3a52 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1738,8 +1738,6 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)

BUG_ON(check_hotplug_memory_range(start, size));

- mem_hotplug_begin();
-
/*
* All memory blocks must be offlined before removing memory. Check
* whether all memory blocks in question are offline and return error
@@ -1754,9 +1752,14 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
memblock_free(start, size);
memblock_remove(start, size);

- /* remove memory block devices before removing memory */
+ /*
+ * Memory block device removal under the device_hotplug_lock is
+ * a barrier against racing online attempts.
+ */
remove_memory_block_devices(start, size);

+ mem_hotplug_begin();
+
arch_remove_memory(nid, start, size, NULL);
__release_memory_resource(start, size);

diff --git a/mm/migrate.c b/mm/migrate.c
index 6956627ebf8b..c4c313e47f12 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1631,8 +1631,19 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
start = i;
} else if (node != current_node) {
err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ /*
+ * Positive err means the number of failed
+ * pages to migrate. Since we are going to
+ * abort and return the number of non-migrated
+ * pages, so need to incude the rest of the
+ * nr_pages that have not been attempted as
+ * well.
+ */
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
err = store_status(status, start, current_node, i - start);
if (err)
goto out;
@@ -1663,8 +1674,11 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
goto out_flush;

err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
if (i > start) {
err = store_status(status, start, current_node, i - start);
if (err)
@@ -1678,6 +1692,13 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,

/* Make sure we do not overwrite the existing error */
err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+ /*
+ * Don't have to report non-attempted pages here since:
+ * - If the above loop is done gracefully all pages have been
+ * attempted.
+ * - If the above loop is aborted it means a fatal error
+ * happened, should return ret.
+ */
if (!err1)
err1 = store_status(status, start, current_node, i - start);
if (err >= 0)
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 7d70e5c78f97..7c1b8f67af7b 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
*/
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
- /*
- * Invalidate page-table caches used by hardware walkers. Then we still
- * need to RCU-sched wait while freeing the pages because software
- * walkers can still be in-flight.
- */
- tlb_flush_mmu_tlbonly(tlb);
-#endif
+ if (tlb_needs_table_invalidate()) {
+ /*
+ * Invalidate page-table caches used by hardware walkers. Then
+ * we still need to RCU-sched wait while freeing the pages
+ * because software walkers can still be in-flight.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
}

static void tlb_remove_table_smp_sync(void *arg)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 45e39131a716..d387ca74cb5a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6933,7 +6933,8 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn)
* This function also addresses a similar issue where struct pages are left
* uninitialized because the physical address range is not covered by
* memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=.
+ * layout is manually configured via memmap=, or when the highest physical
+ * address (max_pfn) does not end on a section boundary.
*/
void __init zero_resv_unavail(void)
{
@@ -6951,7 +6952,16 @@ void __init zero_resv_unavail(void)
pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start));
next = end;
}
- pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn);
+
+ /*
+ * Early sections always have a fully populated memmap for the whole
+ * section - see pfn_valid(). If the last section has holes at the
+ * end and that section is marked "online", the memmap will be
+ * considered initialized. Make sure that memmap has a well defined
+ * state.
+ */
+ pgcnt += zero_pfn_range(PFN_DOWN(next),
+ round_up(max_pfn, PAGES_PER_SECTION));

/*
* Struct pages that do not have backing memory. This could be because
diff --git a/mm/sparse.c b/mm/sparse.c
index 1100fdb9649c..69b41b6046a5 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -787,7 +787,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr);
+ ms->section_mem_map = (unsigned long)NULL;
}

if (section_is_early && memmap)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index ae614965c8c2..61bc67047f56 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3863,6 +3863,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto out_unlock;
}

+ /* return 0 if there is no further data to read */
+ if (start_offset >= region->size) {
+ err = 0;
+ goto out_unlock;
+ }
+
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
DEVLINK_CMD_REGION_READ);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 536e032d95c8..246a258b1fac 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1004,8 +1004,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
{
int cpu;

- if (!monitor_hw)
+ if (!monitor_hw) {
NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+ return;
+ }

monitor_hw = false;

diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index ee561297d8a7..fbfd0db182b7 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)

rcu_read_lock(); /* hsr->node_db, hsr->ports */
port = hsr_port_get_rcu(skb->dev);
+ if (!port)
+ goto finish_pass;

if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) {
/* Directly kill frames sent by ourselves */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3640e8563a10..deb466fc3d1f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2618,10 +2618,12 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_cwnd = TCP_INIT_CWND;
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
+ tp->delivered = 0;
tp->delivered_ce = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
+ tp->total_retrans = 0;
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
* issue in __tcp_select_window()
@@ -2633,10 +2635,14 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
tp->compressed_ack = 0;
+ tp->segs_in = 0;
+ tp->segs_out = 0;
tp->bytes_sent = 0;
tp->bytes_acked = 0;
tp->bytes_received = 0;
tp->bytes_retrans = 0;
+ tp->data_segs_in = 0;
+ tp->data_segs_out = 0;
tp->duplicate_sack[0].start_seq = 0;
tp->duplicate_sack[0].end_seq = 0;
tp->dsack_dups = 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f9b5690e94fd..b11ccb53c7e0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5719,6 +5719,9 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
struct nlattr *tb[IFLA_INET6_MAX + 1];
int err;

+ if (!idev)
+ return -EAFNOSUPPORT;
+
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
BUG();

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index f82ea12bac37..425b95eb7e87 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -322,8 +322,13 @@ int l2tp_session_register(struct l2tp_session *session,

spin_lock_bh(&pn->l2tp_session_hlist_lock);

+ /* IP encap expects session IDs to be globally unique, while
+ * UDP encap doesn't.
+ */
hlist_for_each_entry(session_walk, g_head, global_hlist)
- if (session_walk->session_id == session->session_id) {
+ if (session_walk->session_id == session->session_id &&
+ (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP ||
+ tunnel->encap == L2TP_ENCAPTYPE_IP)) {
err = -EEXIST;
goto err_tlock_pnlock;
}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d8143a8c034d..a9df9dac57b2 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1293,31 +1293,34 @@ ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = {
};

static int
-dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
+ip_set_dump_start(struct netlink_callback *cb)
{
struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
struct nlattr *attr = (void *)nlh + min_len;
+ struct sk_buff *skb = cb->skb;
+ struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
u32 dump_type;
- ip_set_id_t index;
int ret;

ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr,
nlh->nlmsg_len - min_len,
ip_set_dump_policy, NULL);
if (ret)
- return ret;
+ goto error;

cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
if (cda[IPSET_ATTR_SETNAME]) {
+ ip_set_id_t index;
struct ip_set *set;

set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
&index);
- if (!set)
- return -ENOENT;
-
+ if (!set) {
+ ret = -ENOENT;
+ goto error;
+ }
dump_type = DUMP_ONE;
cb->args[IPSET_CB_INDEX] = index;
} else {
@@ -1333,10 +1336,17 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
cb->args[IPSET_CB_DUMP] = dump_type;

return 0;
+
+error:
+ /* We have to create and send the error message manually :-( */
+ if (nlh->nlmsg_flags & NLM_F_ACK) {
+ netlink_ack(cb->skb, nlh, ret, NULL);
+ }
+ return ret;
}

static int
-ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
+ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb)
{
ip_set_id_t index = IPSET_INVALID_ID, max;
struct ip_set *set = NULL;
@@ -1347,18 +1357,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
bool is_destroyed;
int ret = 0;

- if (!cb->args[IPSET_CB_DUMP]) {
- ret = dump_init(cb, inst);
- if (ret < 0) {
- nlh = nlmsg_hdr(cb->skb);
- /* We have to create and send the error message
- * manually :-(
- */
- if (nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(cb->skb, nlh, ret, NULL);
- return ret;
- }
- }
+ if (!cb->args[IPSET_CB_DUMP])
+ return -EINVAL;

if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
goto out;
@@ -1494,7 +1494,8 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,

{
struct netlink_dump_control c = {
- .dump = ip_set_dump_start,
+ .start = ip_set_dump_start,
+ .dump = ip_set_dump_do,
.done = ip_set_dump_done,
};
return netlink_dump_start(ctnl, skb, nlh, &c);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index d72ddb67bb74..4a6ca9723a12 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -194,6 +194,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
service_in_use:
write_unlock(&local->services_lock);
rxrpc_unuse_local(local);
+ rxrpc_put_local(local);
ret = -EADDRINUSE;
error_unlock:
release_sock(&rx->sk);
@@ -899,6 +900,7 @@ static int rxrpc_release_sock(struct sock *sk)
rxrpc_purge_queue(&sk->sk_receive_queue);

rxrpc_unuse_local(rx->local);
+ rxrpc_put_local(rx->local);
rx->local = NULL;
key_put(rx->key);
rx->key = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5e99df80e80a..7d730c438404 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -490,6 +490,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
RXRPC_CALL_IS_INTR, /* The call is interruptible */
+ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
};

/*
@@ -1021,6 +1022,16 @@ void rxrpc_unuse_local(struct rxrpc_local *);
void rxrpc_queue_local(struct rxrpc_local *);
void rxrpc_destroy_all_locals(struct rxrpc_net *);

+static inline bool __rxrpc_unuse_local(struct rxrpc_local *local)
+{
+ return atomic_dec_return(&local->active_users) == 0;
+}
+
+static inline bool __rxrpc_use_local(struct rxrpc_local *local)
+{
+ return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0;
+}
+
/*
* misc.c
*/
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index a31c18c09894..dbdbc4f18b5e 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -493,7 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)

_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);

- if (conn)
+ if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
rxrpc_disconnect_call(call);
if (call->security)
call->security->free_call_crypto(call);
@@ -569,6 +569,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
struct rxrpc_net *rxnet = call->rxnet;

+ rxrpc_put_connection(call->conn);
rxrpc_put_peer(call->peer);
kfree(call->rxtx_buffer);
kfree(call->rxtx_annotations);
@@ -590,7 +591,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)

ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
- ASSERTCMP(call->conn, ==, NULL);

rxrpc_cleanup_ring(call);
rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 376370cd9285..ea7d4c21f889 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -785,6 +785,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
u32 cid;

spin_lock(&conn->channel_lock);
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);

cid = call->cid;
if (cid) {
@@ -792,7 +793,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
chan = &conn->channels[channel];
}
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
- call->conn = NULL;

/* Calls that have never actually been assigned a channel can simply be
* discarded. If the conn didn't get used either, it will follow
@@ -908,7 +908,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
spin_unlock(&rxnet->client_conn_cache_lock);
out_2:
spin_unlock(&conn->channel_lock);
- rxrpc_put_connection(conn);
_leave("");
return;

diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 808a4723f868..06fcff2ebbba 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -438,16 +438,12 @@ static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn)
/*
* connection-level event processor
*/
-void rxrpc_process_connection(struct work_struct *work)
+static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
{
- struct rxrpc_connection *conn =
- container_of(work, struct rxrpc_connection, processor);
struct sk_buff *skb;
u32 abort_code = RX_PROTOCOL_ERROR;
int ret;

- rxrpc_see_connection(conn);
-
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);

@@ -475,18 +471,32 @@ void rxrpc_process_connection(struct work_struct *work)
}
}

-out:
- rxrpc_put_connection(conn);
- _leave("");
return;

requeue_and_leave:
skb_queue_head(&conn->rx_queue, skb);
- goto out;
+ return;

protocol_error:
if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
rxrpc_free_skb(skb, rxrpc_skb_freed);
- goto out;
+ return;
+}
+
+void rxrpc_process_connection(struct work_struct *work)
+{
+ struct rxrpc_connection *conn =
+ container_of(work, struct rxrpc_connection, processor);
+
+ rxrpc_see_connection(conn);
+
+ if (__rxrpc_use_local(conn->params.local)) {
+ rxrpc_do_process_connection(conn);
+ rxrpc_unuse_local(conn->params.local);
+ }
+
+ rxrpc_put_connection(conn);
+ _leave("");
+ return;
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 38d718e90dc6..19e141eeed17 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -223,9 +223,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
__rxrpc_disconnect_call(conn, call);
spin_unlock(&conn->channel_lock);

- call->conn = NULL;
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
conn->idle_timestamp = jiffies;
- rxrpc_put_connection(conn);
}

/*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 96d54e5bf7bc..ef10fbf71b15 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -599,10 +599,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
false, true,
rxrpc_propose_ack_input_data);

- if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) {
- trace_rxrpc_notify_socket(call->debug_id, serial);
- rxrpc_notify_socket(call);
- }
+ trace_rxrpc_notify_socket(call->debug_id, serial);
+ rxrpc_notify_socket(call);

unlock:
spin_unlock(&call->input_lock);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 36587260cabd..a6c1349e965d 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -364,11 +364,14 @@ void rxrpc_queue_local(struct rxrpc_local *local)
void rxrpc_put_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id;
int n;

if (local) {
+ debug_id = local->debug_id;
+
n = atomic_dec_return(&local->usage);
- trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here);
+ trace_rxrpc_local(debug_id, rxrpc_local_put, n, here);

if (n == 0)
call_rcu(&local->rcu, rxrpc_local_rcu);
@@ -380,14 +383,11 @@ void rxrpc_put_local(struct rxrpc_local *local)
*/
struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
{
- unsigned int au;
-
local = rxrpc_get_local_maybe(local);
if (!local)
return NULL;

- au = atomic_fetch_add_unless(&local->active_users, 1, 0);
- if (au == 0) {
+ if (!__rxrpc_use_local(local)) {
rxrpc_put_local(local);
return NULL;
}
@@ -401,14 +401,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
*/
void rxrpc_unuse_local(struct rxrpc_local *local)
{
- unsigned int au;
-
if (local) {
- au = atomic_dec_return(&local->active_users);
- if (au == 0)
+ if (__rxrpc_unuse_local(local)) {
+ rxrpc_get_local(local);
rxrpc_queue_local(local);
- else
- rxrpc_put_local(local);
+ }
}
}

@@ -465,7 +462,7 @@ static void rxrpc_local_processor(struct work_struct *work)

do {
again = false;
- if (atomic_read(&local->active_users) == 0) {
+ if (!__rxrpc_use_local(local)) {
rxrpc_local_destroyer(local);
break;
}
@@ -479,6 +476,8 @@ static void rxrpc_local_processor(struct work_struct *work)
rxrpc_process_local_events(local);
again = true;
}
+
+ __rxrpc_unuse_local(local);
} while (again);

rxrpc_put_local(local);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 935bb60fff56..bad3d2420344 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -129,7 +129,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
rxrpc_serial_t *_serial)
{
- struct rxrpc_connection *conn = NULL;
+ struct rxrpc_connection *conn;
struct rxrpc_ack_buffer *pkt;
struct msghdr msg;
struct kvec iov[2];
@@ -139,18 +139,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
int ret;
u8 reason;

- spin_lock_bh(&call->lock);
- if (call->conn)
- conn = rxrpc_get_connection_maybe(call->conn);
- spin_unlock_bh(&call->lock);
- if (!conn)
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return -ECONNRESET;

pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt) {
- rxrpc_put_connection(conn);
+ if (!pkt)
return -ENOMEM;
- }
+
+ conn = call->conn;

msg.msg_name = &call->peer->srx.transport;
msg.msg_namelen = call->peer->srx.transport_len;
@@ -244,7 +240,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
}

out:
- rxrpc_put_connection(conn);
kfree(pkt);
return ret;
}
@@ -254,7 +249,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
*/
int rxrpc_send_abort_packet(struct rxrpc_call *call)
{
- struct rxrpc_connection *conn = NULL;
+ struct rxrpc_connection *conn;
struct rxrpc_abort_buffer pkt;
struct msghdr msg;
struct kvec iov[1];
@@ -271,13 +266,11 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
test_bit(RXRPC_CALL_TX_LAST, &call->flags))
return 0;

- spin_lock_bh(&call->lock);
- if (call->conn)
- conn = rxrpc_get_connection_maybe(call->conn);
- spin_unlock_bh(&call->lock);
- if (!conn)
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return -ECONNRESET;

+ conn = call->conn;
+
msg.msg_name = &call->peer->srx.transport;
msg.msg_namelen = call->peer->srx.transport_len;
msg.msg_control = NULL;
@@ -312,8 +305,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
rxrpc_tx_point_call_abort);
rxrpc_tx_backoff(call, ret);
-
- rxrpc_put_connection(conn);
return ret;
}

diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 48f67a9b1037..923b263c401b 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -364,27 +364,31 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
if (!rxrpc_get_peer_maybe(peer))
continue;

- spin_unlock_bh(&rxnet->peer_hash_lock);
-
- keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
- slot = keepalive_at - base;
- _debug("%02x peer %u t=%d {%pISp}",
- cursor, peer->debug_id, slot, &peer->srx.transport);
+ if (__rxrpc_use_local(peer->local)) {
+ spin_unlock_bh(&rxnet->peer_hash_lock);
+
+ keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
+ slot = keepalive_at - base;
+ _debug("%02x peer %u t=%d {%pISp}",
+ cursor, peer->debug_id, slot, &peer->srx.transport);
+
+ if (keepalive_at <= base ||
+ keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
+ rxrpc_send_keepalive(peer);
+ slot = RXRPC_KEEPALIVE_TIME;
+ }

- if (keepalive_at <= base ||
- keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
- rxrpc_send_keepalive(peer);
- slot = RXRPC_KEEPALIVE_TIME;
+ /* A transmission to this peer occurred since last we
+ * examined it so put it into the appropriate future
+ * bucket.
+ */
+ slot += cursor;
+ slot &= mask;
+ spin_lock_bh(&rxnet->peer_hash_lock);
+ list_add_tail(&peer->keepalive_link,
+ &rxnet->peer_keepalive[slot & mask]);
+ rxrpc_unuse_local(peer->local);
}
-
- /* A transmission to this peer occurred since last we examined
- * it so put it into the appropriate future bucket.
- */
- slot += cursor;
- slot &= mask;
- spin_lock_bh(&rxnet->peer_hash_lock);
- list_add_tail(&peer->keepalive_link,
- &rxnet->peer_keepalive[slot & mask]);
rxrpc_put_peer_locked(peer);
}

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index c22624131949..d36949d9382c 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data)

static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
[TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
[TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
};

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 3d4a1280352f..09b7dc5fe7e0 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
cp->fall_through = p->fall_through;
cp->tp = tp;

+ if (tb[TCA_TCINDEX_HASH])
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+
+ if (tb[TCA_TCINDEX_MASK])
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+
+ if (tb[TCA_TCINDEX_SHIFT])
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+
+ if (!cp->hash) {
+ /* Hash not specified, use perfect hash if the upper limit
+ * of the hashing index is below the threshold.
+ */
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
+ else
+ cp->hash = DEFAULT_HASH_SIZE;
+ }
+
if (p->perfect) {
int i;

if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
- for (i = 0; i < cp->hash; i++)
+ for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
}
@@ -346,19 +365,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,

err = tcindex_filter_result_init(&new_filter_result, net);
if (err < 0)
- goto errout1;
+ goto errout_alloc;
if (old_r)
cr = r->res;

- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT])
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
err = -EBUSY;

/* Hash already allocated, make sure that we still meet the
@@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);

- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;

@@ -484,7 +484,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
-errout1:
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c609373c8661..660fc45ee40f 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(taprio_list_lock);

#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+#define TAPRIO_FLAGS_INVALID U32_MAX

struct sched_entry {
struct list_head list;
@@ -766,6 +767,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
};

static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
@@ -1367,6 +1369,33 @@ static int taprio_mqprio_cmp(const struct net_device *dev,
return 0;
}

+/* The semantics of the 'flags' argument in relation to 'change()'
+ * requests, are interpreted following two rules (which are applied in
+ * this order): (1) an omitted 'flags' argument is interpreted as
+ * zero; (2) the 'flags' of a "running" taprio instance cannot be
+ * changed.
+ */
+static int taprio_new_flags(const struct nlattr *attr, u32 old,
+ struct netlink_ext_ack *extack)
+{
+ u32 new = 0;
+
+ if (attr)
+ new = nla_get_u32(attr);
+
+ if (old != TAPRIO_FLAGS_INVALID && old != new) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!taprio_flags_valid(new)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
+ return -EINVAL;
+ }
+
+ return new;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1375,7 +1404,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- u32 taprio_flags = 0;
unsigned long flags;
ktime_t start;
int i, err;
@@ -1388,21 +1416,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);

- if (tb[TCA_TAPRIO_ATTR_FLAGS]) {
- taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]);
-
- if (q->flags != 0 && q->flags != taprio_flags) {
- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
- return -EOPNOTSUPP;
- } else if (!taprio_flags_valid(taprio_flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
- return -EINVAL;
- }
+ err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
+ q->flags, extack);
+ if (err < 0)
+ return err;

- q->flags = taprio_flags;
- }
+ q->flags = err;

- err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags);
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
return err;

@@ -1444,7 +1465,20 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,

taprio_set_picos_per_byte(dev, q);

- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ if (mqprio) {
+ netdev_set_num_tc(dev, mqprio->num_tc);
+ for (i = 0; i < mqprio->num_tc; i++)
+ netdev_set_tc_queue(dev, i,
+ mqprio->count[i],
+ mqprio->offset[i]);
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i <= TC_BITMASK; i++)
+ netdev_set_prio_tc_map(dev, i,
+ mqprio->prio_tc_map[i]);
+ }
+
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
err = taprio_enable_offload(dev, mqprio, q, new_admin, extack);
else
err = taprio_disable_offload(dev, q, extack);
@@ -1464,27 +1498,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
}

- if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
- !FULL_OFFLOAD_IS_ENABLED(taprio_flags) &&
+ if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
+ !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
!hrtimer_active(&q->advance_timer)) {
hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
}

- if (mqprio) {
- netdev_set_num_tc(dev, mqprio->num_tc);
- for (i = 0; i < mqprio->num_tc; i++)
- netdev_set_tc_queue(dev, i,
- mqprio->count[i],
- mqprio->offset[i]);
-
- /* Always use supplied priority mappings */
- for (i = 0; i <= TC_BITMASK; i++)
- netdev_set_prio_tc_map(dev, i,
- mqprio->prio_tc_map[i]);
- }
-
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) {
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
q->dequeue = taprio_dequeue_offload;
q->peek = taprio_peek_offload;
} else {
@@ -1501,9 +1522,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}

- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) {
- setup_txtime(q, new_admin, start);
+ setup_txtime(q, new_admin, start);

+ if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
if (!oper) {
rcu_assign_pointer(q->oper_sched, new_admin);
err = 0;
@@ -1528,7 +1549,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,

spin_unlock_irqrestore(&q->current_entry_lock, flags);

- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
taprio_offload_config_changed(q);
}

@@ -1567,7 +1588,7 @@ static void taprio_destroy(struct Qdisc *sch)
}
q->qdiscs = NULL;

- netdev_set_num_tc(dev, 0);
+ netdev_reset_tc(dev);

if (q->oper_sched)
call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
@@ -1597,6 +1618,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
* and get the valid one on taprio_change().
*/
q->clockid = -1;
+ q->flags = TAPRIO_FLAGS_INVALID;

spin_lock(&taprio_list_lock);
list_add(&q->taprio_list, &taprio_list);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 908b60a72d95..ed20fa8a6f70 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1245,6 +1245,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
dprintk("RPC: No creds found!\n");
goto out;
} else {
+ struct timespec64 boot;

/* steal creds */
rsci.cred = ud->creds;
@@ -1265,6 +1266,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
&expiry, GFP_KERNEL);
if (status)
goto out;
+
+ getboottime64(&boot);
+ expiry -= boot.tv_sec;
}

rsci.h.expiry_time = expiry;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 42b571cde177..e7ad48c605e0 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -236,7 +236,7 @@ all:

clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
- @rm -f *~
+ @find $(CURDIR) -type f -name '*~' -delete

$(LIBBPF): FORCE
# Fix up variables inherited from Kbuild that tools/ build system won't like
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 0da6e9e7132e..8b862a7a6c6a 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -16,6 +16,10 @@ static const char *__doc__ =
#include <getopt.h>
#include <net/if.h>
#include <time.h>
+#include <linux/limits.h>
+
+#define __must_check
+#include <linux/err.h>

#include <arpa/inet.h>
#include <linux/if_link.h>
@@ -46,6 +50,10 @@ static int cpus_count_map_fd;
static int cpus_iterator_map_fd;
static int exception_cnt_map_fd;

+#define NUM_TP 5
+struct bpf_link *tp_links[NUM_TP] = { 0 };
+static int tp_cnt = 0;
+
/* Exit return codes */
#define EXIT_OK 0
#define EXIT_FAIL 1
@@ -88,6 +96,10 @@ static void int_exit(int sig)
printf("program on interface changed, not removing\n");
}
}
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
exit(EXIT_OK);
}

@@ -588,23 +600,61 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
free_stats_record(prev);
}

+static struct bpf_link * attach_tp(struct bpf_object *obj,
+ const char *tp_category,
+ const char* tp_name)
+{
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ char sec_name[PATH_MAX];
+ int len;
+
+ len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
+ tp_category, tp_name);
+ if (len < 0)
+ exit(EXIT_FAIL);
+
+ prog = bpf_object__find_program_by_title(obj, sec_name);
+ if (!prog) {
+ fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
+ exit(EXIT_FAIL_BPF);
+ }
+
+ link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
+ if (IS_ERR(link))
+ exit(EXIT_FAIL_BPF);
+
+ return link;
+}
+
+static void init_tracepoints(struct bpf_object *obj) {
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
+}
+
static int init_map_fds(struct bpf_object *obj)
{
- cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
- rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
+ /* Maps updated by tracepoints */
redirect_err_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
+ exception_cnt_map_fd =
+ bpf_object__find_map_fd_by_name(obj, "exception_cnt");
cpumap_enqueue_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
cpumap_kthread_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
+
+ /* Maps used by XDP */
+ rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
+ cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
cpus_available_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_available");
cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
cpus_iterator_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
- exception_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "exception_cnt");

if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
@@ -662,6 +712,7 @@ int main(int argc, char **argv)
strerror(errno));
return EXIT_FAIL;
}
+ init_tracepoints(obj);
if (init_map_fds(obj) < 0) {
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
return EXIT_FAIL;
diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh
index 3f46f8977dc4..ee6a50e33aba 100755
--- a/scripts/find-unused-docs.sh
+++ b/scripts/find-unused-docs.sh
@@ -54,7 +54,7 @@ for file in `find $1 -name '*.c'`; do
if [[ ${FILES_INCLUDED[$file]+_} ]]; then
continue;
fi
- str=$(scripts/kernel-doc -text -export "$file" 2>/dev/null)
+ str=$(scripts/kernel-doc -export "$file" 2>/dev/null)
if [[ -n "$str" ]]; then
echo "$file"
fi
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index abeb09c30633..ad22066eba04 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2832,42 +2832,39 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap,
int addrlen)
{
int rc = 0;
-#if IS_ENABLED(CONFIG_IPV6)
- struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap;
-#endif
-#ifdef SMACK_IPV6_SECMARK_LABELING
- struct smack_known *rsp;
- struct socket_smack *ssp;
-#endif

if (sock->sk == NULL)
return 0;
-
+ if (sock->sk->sk_family != PF_INET &&
+ (!IS_ENABLED(CONFIG_IPV6) || sock->sk->sk_family != PF_INET6))
+ return 0;
+ if (addrlen < offsetofend(struct sockaddr, sa_family))
+ return 0;
+ if (IS_ENABLED(CONFIG_IPV6) && sap->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap;
#ifdef SMACK_IPV6_SECMARK_LABELING
- ssp = sock->sk->sk_security;
+ struct smack_known *rsp;
#endif

- switch (sock->sk->sk_family) {
- case PF_INET:
- if (addrlen < sizeof(struct sockaddr_in) ||
- sap->sa_family != AF_INET)
- return -EINVAL;
- rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap);
- break;
- case PF_INET6:
- if (addrlen < SIN6_LEN_RFC2133 || sap->sa_family != AF_INET6)
- return -EINVAL;
+ if (addrlen < SIN6_LEN_RFC2133)
+ return 0;
#ifdef SMACK_IPV6_SECMARK_LABELING
rsp = smack_ipv6host_label(sip);
- if (rsp != NULL)
+ if (rsp != NULL) {
+ struct socket_smack *ssp = sock->sk->sk_security;
+
rc = smk_ipv6_check(ssp->smk_out, rsp, sip,
- SMK_CONNECTING);
+ SMK_CONNECTING);
+ }
#endif
#ifdef SMACK_IPV6_PORT_LABELING
rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
#endif
- break;
+ return rc;
}
+ if (sap->sa_family != AF_INET || addrlen < sizeof(struct sockaddr_in))
+ return 0;
+ rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap);
return rc;
}

diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index aee7c04d49e5..b61ba0321a72 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -915,7 +915,7 @@ static void print_formats(struct snd_dummy *dummy,
{
int i;

- for (i = 0; i < SNDRV_PCM_FORMAT_LAST; i++) {
+ for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) {
if (dummy->pcm_hw.formats & (1ULL << i))
snd_iprintf(buffer, " %s", snd_pcm_format_name(i));
}
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index f6cbb831b86a..85beb172d810 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2156,6 +2156,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
/* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */
SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0),
/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+ SND_PCI_QUIRK(0x1558, 0x6504, "Clevo W65_67SB", 0),
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
SND_PCI_QUIRK(0x1028, 0x0497, "Dell Precision T3600", 0),
/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
/* Note the P55A-UD3 and Z87-D3HP share the subsys id for the HDA dev */
@@ -2415,6 +2417,8 @@ static const struct pci_device_id azx_ids[] = {
/* Jasperlake */
{ PCI_DEVICE(0x8086, 0x38c8),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+ { PCI_DEVICE(0x8086, 0x4dc8),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
/* Tigerlake */
{ PCI_DEVICE(0x8086, 0xa0c8),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index 8350954b7986..e5191584638a 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -398,6 +398,7 @@ static int hda_tegra_create(struct snd_card *card,
return err;

chip->bus.needs_damn_long_delay = 1;
+ chip->bus.core.aligned_mmio = 1;

err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
if (err < 0) {
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 488c17c9f375..8ac805a634f4 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -4153,6 +4153,7 @@ HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi),
HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi),
+HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi),
HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi),
HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi),
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index aa1f9637d895..e949b372cead 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -1344,7 +1344,8 @@ static int sgtl5000_set_power_regs(struct snd_soc_component *component)
* if vddio == vdda the source of charge pump should be
* assigned manually to VDDIO
*/
- if (vddio == vdda) {
+ if (regulator_is_equal(sgtl5000->supplies[VDDA].consumer,
+ sgtl5000->supplies[VDDIO].consumer)) {
lreg_ctrl |= SGTL5000_VDDC_ASSN_OVRD;
lreg_ctrl |= SGTL5000_VDDC_MAN_ASSN_VDDIO <<
SGTL5000_VDDC_MAN_ASSN_SHIFT;
diff --git a/sound/soc/intel/boards/skl_hda_dsp_common.c b/sound/soc/intel/boards/skl_hda_dsp_common.c
index 58409b6e476e..e3d405e57c5f 100644
--- a/sound/soc/intel/boards/skl_hda_dsp_common.c
+++ b/sound/soc/intel/boards/skl_hda_dsp_common.c
@@ -38,16 +38,19 @@ int skl_hda_hdmi_add_pcm(struct snd_soc_card *card, int device)
return 0;
}

-SND_SOC_DAILINK_DEFS(idisp1,
- DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin")),
+SND_SOC_DAILINK_DEF(idisp1_cpu,
+ DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin")));
+SND_SOC_DAILINK_DEF(idisp1_codec,
DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi1")));

-SND_SOC_DAILINK_DEFS(idisp2,
- DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin")),
+SND_SOC_DAILINK_DEF(idisp2_cpu,
+ DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin")));
+SND_SOC_DAILINK_DEF(idisp2_codec,
DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi2")));

-SND_SOC_DAILINK_DEFS(idisp3,
- DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin")),
+SND_SOC_DAILINK_DEF(idisp3_cpu,
+ DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin")));
+SND_SOC_DAILINK_DEF(idisp3_codec,
DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi3")));

SND_SOC_DAILINK_DEF(analog_cpu,
@@ -80,21 +83,21 @@ struct snd_soc_dai_link skl_hda_be_dai_links[HDA_DSP_MAX_BE_DAI_LINKS] = {
.id = 1,
.dpcm_playback = 1,
.no_pcm = 1,
- SND_SOC_DAILINK_REG(idisp1),
+ SND_SOC_DAILINK_REG(idisp1_cpu, idisp1_codec, platform),
},
{
.name = "iDisp2",
.id = 2,
.dpcm_playback = 1,
.no_pcm = 1,
- SND_SOC_DAILINK_REG(idisp2),
+ SND_SOC_DAILINK_REG(idisp2_cpu, idisp2_codec, platform),
},
{
.name = "iDisp3",
.id = 3,
.dpcm_playback = 1,
.no_pcm = 1,
- SND_SOC_DAILINK_REG(idisp3),
+ SND_SOC_DAILINK_REG(idisp3_cpu, idisp3_codec, platform),
},
{
.name = "Analog Playback and Capture",
diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c
index 5a3749938900..d286dff3171d 100644
--- a/sound/soc/meson/axg-fifo.c
+++ b/sound/soc/meson/axg-fifo.c
@@ -108,10 +108,12 @@ static int axg_fifo_pcm_hw_params(struct snd_pcm_substream *ss,
{
struct snd_pcm_runtime *runtime = ss->runtime;
struct axg_fifo *fifo = axg_fifo_data(ss);
+ unsigned int burst_num, period, threshold;
dma_addr_t end_ptr;
- unsigned int burst_num;
int ret;

+ period = params_period_bytes(params);
+
ret = snd_pcm_lib_malloc_pages(ss, params_buffer_bytes(params));
if (ret < 0)
return ret;
@@ -122,9 +124,25 @@ static int axg_fifo_pcm_hw_params(struct snd_pcm_substream *ss,
regmap_write(fifo->map, FIFO_FINISH_ADDR, end_ptr);

/* Setup interrupt periodicity */
- burst_num = params_period_bytes(params) / AXG_FIFO_BURST;
+ burst_num = period / AXG_FIFO_BURST;
regmap_write(fifo->map, FIFO_INT_ADDR, burst_num);

+ /*
+ * Start the fifo request on the smallest of the following:
+ * - Half the fifo size
+ * - Half the period size
+ */
+ threshold = min(period / 2,
+ (unsigned int)AXG_FIFO_MIN_DEPTH / 2);
+
+ /*
+ * With the threshold in bytes, register value is:
+ * V = (threshold / burst) - 1
+ */
+ threshold /= AXG_FIFO_BURST;
+ regmap_field_write(fifo->field_threshold,
+ threshold ? threshold - 1 : 0);
+
/* Enable block count irq */
regmap_update_bits(fifo->map, FIFO_CTRL0,
CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT),
@@ -360,6 +378,11 @@ int axg_fifo_probe(struct platform_device *pdev)
return fifo->irq;
}

+ fifo->field_threshold =
+ devm_regmap_field_alloc(dev, fifo->map, data->field_threshold);
+ if (IS_ERR(fifo->field_threshold))
+ return PTR_ERR(fifo->field_threshold);
+
return devm_snd_soc_register_component(dev, data->component_drv,
data->dai_drv, 1);
}
diff --git a/sound/soc/meson/axg-fifo.h b/sound/soc/meson/axg-fifo.h
index bb1e2ce50256..ab546a3cf940 100644
--- a/sound/soc/meson/axg-fifo.h
+++ b/sound/soc/meson/axg-fifo.h
@@ -9,7 +9,9 @@

struct clk;
struct platform_device;
+struct reg_field;
struct regmap;
+struct regmap_field;
struct reset_control;

struct snd_soc_component_driver;
@@ -50,8 +52,6 @@ struct snd_soc_pcm_runtime;
#define CTRL1_STATUS2_SEL_MASK GENMASK(11, 8)
#define CTRL1_STATUS2_SEL(x) ((x) << 8)
#define STATUS2_SEL_DDR_READ 0
-#define CTRL1_THRESHOLD_MASK GENMASK(23, 16)
-#define CTRL1_THRESHOLD(x) ((x) << 16)
#define CTRL1_FRDDR_DEPTH_MASK GENMASK(31, 24)
#define CTRL1_FRDDR_DEPTH(x) ((x) << 24)
#define FIFO_START_ADDR 0x08
@@ -67,12 +67,14 @@ struct axg_fifo {
struct regmap *map;
struct clk *pclk;
struct reset_control *arb;
+ struct regmap_field *field_threshold;
int irq;
};

struct axg_fifo_match_data {
const struct snd_soc_component_driver *component_drv;
struct snd_soc_dai_driver *dai_drv;
+ struct reg_field field_threshold;
};

extern const struct snd_pcm_ops axg_fifo_pcm_ops;
diff --git a/sound/soc/meson/axg-frddr.c b/sound/soc/meson/axg-frddr.c
index 6ab111c31b28..09773a9ae964 100644
--- a/sound/soc/meson/axg-frddr.c
+++ b/sound/soc/meson/axg-frddr.c
@@ -50,7 +50,7 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai)
{
struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai);
- unsigned int fifo_depth, fifo_threshold;
+ unsigned int fifo_depth;
int ret;

/* Enable pclk to access registers and clock the fifo ip */
@@ -68,11 +68,8 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream,
* Depth and threshold are zero based.
*/
fifo_depth = AXG_FIFO_MIN_CNT - 1;
- fifo_threshold = (AXG_FIFO_MIN_CNT / 2) - 1;
- regmap_update_bits(fifo->map, FIFO_CTRL1,
- CTRL1_FRDDR_DEPTH_MASK | CTRL1_THRESHOLD_MASK,
- CTRL1_FRDDR_DEPTH(fifo_depth) |
- CTRL1_THRESHOLD(fifo_threshold));
+ regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH_MASK,
+ CTRL1_FRDDR_DEPTH(fifo_depth));

return 0;
}
@@ -153,8 +150,9 @@ static const struct snd_soc_component_driver axg_frddr_component_drv = {
};

static const struct axg_fifo_match_data axg_frddr_match_data = {
- .component_drv = &axg_frddr_component_drv,
- .dai_drv = &axg_frddr_dai_drv
+ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23),
+ .component_drv = &axg_frddr_component_drv,
+ .dai_drv = &axg_frddr_dai_drv
};

static const struct snd_soc_dai_ops g12a_frddr_ops = {
@@ -271,8 +269,9 @@ static const struct snd_soc_component_driver g12a_frddr_component_drv = {
};

static const struct axg_fifo_match_data g12a_frddr_match_data = {
- .component_drv = &g12a_frddr_component_drv,
- .dai_drv = &g12a_frddr_dai_drv
+ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23),
+ .component_drv = &g12a_frddr_component_drv,
+ .dai_drv = &g12a_frddr_dai_drv
};

/* On SM1, the output selection in on CTRL2 */
@@ -335,8 +334,9 @@ static const struct snd_soc_component_driver sm1_frddr_component_drv = {
};

static const struct axg_fifo_match_data sm1_frddr_match_data = {
- .component_drv = &sm1_frddr_component_drv,
- .dai_drv = &g12a_frddr_dai_drv
+ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23),
+ .component_drv = &sm1_frddr_component_drv,
+ .dai_drv = &g12a_frddr_dai_drv
};

static const struct of_device_id axg_frddr_of_match[] = {
diff --git a/sound/soc/meson/axg-toddr.c b/sound/soc/meson/axg-toddr.c
index c8ea2145f576..ecf41c7549a6 100644
--- a/sound/soc/meson/axg-toddr.c
+++ b/sound/soc/meson/axg-toddr.c
@@ -89,7 +89,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai)
{
struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai);
- unsigned int fifo_threshold;
int ret;

/* Enable pclk to access registers and clock the fifo ip */
@@ -107,11 +106,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream,
/* Apply single buffer mode to the interface */
regmap_update_bits(fifo->map, FIFO_CTRL0, CTRL0_TODDR_PP_MODE, 0);

- /* TODDR does not have a configurable fifo depth */
- fifo_threshold = AXG_FIFO_MIN_CNT - 1;
- regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_THRESHOLD_MASK,
- CTRL1_THRESHOLD(fifo_threshold));
-
return 0;
}

@@ -185,8 +179,9 @@ static const struct snd_soc_component_driver axg_toddr_component_drv = {
};

static const struct axg_fifo_match_data axg_toddr_match_data = {
- .component_drv = &axg_toddr_component_drv,
- .dai_drv = &axg_toddr_dai_drv
+ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23),
+ .component_drv = &axg_toddr_component_drv,
+ .dai_drv = &axg_toddr_dai_drv
};

static const struct snd_soc_dai_ops g12a_toddr_ops = {
@@ -218,8 +213,9 @@ static const struct snd_soc_component_driver g12a_toddr_component_drv = {
};

static const struct axg_fifo_match_data g12a_toddr_match_data = {
- .component_drv = &g12a_toddr_component_drv,
- .dai_drv = &g12a_toddr_dai_drv
+ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23),
+ .component_drv = &g12a_toddr_component_drv,
+ .dai_drv = &g12a_toddr_dai_drv
};

static const char * const sm1_toddr_sel_texts[] = {
@@ -282,8 +278,9 @@ static const struct snd_soc_component_driver sm1_toddr_component_drv = {
};

static const struct axg_fifo_match_data sm1_toddr_match_data = {
- .component_drv = &sm1_toddr_component_drv,
- .dai_drv = &g12a_toddr_dai_drv
+ .field_threshold = REG_FIELD(FIFO_CTRL1, 12, 23),
+ .component_drv = &sm1_toddr_component_drv,
+ .dai_drv = &g12a_toddr_dai_drv
};

static const struct of_device_id axg_toddr_of_match[] = {
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index 81f28f7ff1a0..12aec140819a 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -288,6 +288,46 @@ static int sof_machine_check(struct snd_sof_dev *sdev)
#endif
}

+/*
+ * FW Boot State Transition Diagram
+ *
+ * +-----------------------------------------------------------------------+
+ * | |
+ * ------------------ ------------------ |
+ * | | | | |
+ * | BOOT_FAILED | | READY_FAILED |-------------------------+ |
+ * | | | | | |
+ * ------------------ ------------------ | |
+ * ^ ^ | |
+ * | | | |
+ * (FW Boot Timeout) (FW_READY FAIL) | |
+ * | | | |
+ * | | | |
+ * ------------------ | ------------------ | |
+ * | | | | | | |
+ * | IN_PROGRESS |---------------+------------->| COMPLETE | | |
+ * | | (FW Boot OK) (FW_READY OK) | | | |
+ * ------------------ ------------------ | |
+ * ^ | | |
+ * | | | |
+ * (FW Loading OK) (System Suspend/Runtime Suspend)
+ * | | | |
+ * | | | |
+ * ------------------ ------------------ | | |
+ * | | | |<-----+ | |
+ * | PREPARE | | NOT_STARTED |<---------------------+ |
+ * | | | |<---------------------------+
+ * ------------------ ------------------
+ * | ^ | ^
+ * | | | |
+ * | +-----------------------+ |
+ * | (DSP Probe OK) |
+ * | |
+ * | |
+ * +------------------------------------+
+ * (System Suspend/Runtime Suspend)
+ */
+
static int sof_probe_continue(struct snd_sof_dev *sdev)
{
struct snd_sof_pdata *plat_data = sdev->pdata;
@@ -303,6 +343,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
return ret;
}

+ sdev->fw_state = SOF_FW_BOOT_PREPARE;
+
/* check machine info */
ret = sof_machine_check(sdev);
if (ret < 0) {
@@ -342,7 +384,12 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
goto fw_load_err;
}

- /* boot the firmware */
+ sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS;
+
+ /*
+ * Boot the firmware. The FW boot status will be modified
+ * in snd_sof_run_firmware() depending on the outcome.
+ */
ret = snd_sof_run_firmware(sdev);
if (ret < 0) {
dev_err(sdev->dev, "error: failed to boot DSP firmware %d\n",
@@ -368,7 +415,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
if (ret < 0) {
dev_err(sdev->dev,
"error: failed to register DSP DAI driver %d\n", ret);
- goto fw_run_err;
+ goto fw_trace_err;
}

drv_name = plat_data->machine->drv_name;
@@ -382,7 +429,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)

if (IS_ERR(plat_data->pdev_mach)) {
ret = PTR_ERR(plat_data->pdev_mach);
- goto fw_run_err;
+ goto fw_trace_err;
}

dev_dbg(sdev->dev, "created machine %s\n",
@@ -393,7 +440,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)

return 0;

-#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)
+fw_trace_err:
+ snd_sof_free_trace(sdev);
fw_run_err:
snd_sof_fw_unload(sdev);
fw_load_err:
@@ -402,21 +450,10 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
snd_sof_free_debug(sdev);
dbg_err:
snd_sof_remove(sdev);
-#else
-
- /*
- * when the probe_continue is handled in a work queue, the
- * probe does not fail so we don't release resources here.
- * They will be released with an explicit call to
- * snd_sof_device_remove() when the PCI/ACPI device is removed
- */

-fw_run_err:
-fw_load_err:
-ipc_err:
-dbg_err:
-
-#endif
+ /* all resources freed, update state to match */
+ sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
+ sdev->first_boot = true;

return ret;
}
@@ -447,6 +484,7 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data)

sdev->pdata = plat_data;
sdev->first_boot = true;
+ sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
dev_set_drvdata(dev, sdev);

/* check all mandatory ops */
@@ -494,10 +532,12 @@ int snd_sof_device_remove(struct device *dev)
if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE))
cancel_work_sync(&sdev->probe_work);

- snd_sof_fw_unload(sdev);
- snd_sof_ipc_free(sdev);
- snd_sof_free_debug(sdev);
- snd_sof_free_trace(sdev);
+ if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) {
+ snd_sof_fw_unload(sdev);
+ snd_sof_ipc_free(sdev);
+ snd_sof_free_debug(sdev);
+ snd_sof_free_trace(sdev);
+ }

/*
* Unregister machine driver. This will unbind the snd_card which
@@ -513,7 +553,8 @@ int snd_sof_device_remove(struct device *dev)
* scheduled on, when they are unloaded. Therefore, the DSP must be
* removed only after the topology has been unloaded.
*/
- snd_sof_remove(sdev);
+ if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED)
+ snd_sof_remove(sdev);

/* release firmware */
release_firmware(pdata->fw);
diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c
index 65c2af3fcaab..356bb134ae93 100644
--- a/sound/soc/sof/intel/hda-loader.c
+++ b/sound/soc/sof/intel/hda-loader.c
@@ -278,7 +278,6 @@ int hda_dsp_cl_boot_firmware(struct snd_sof_dev *sdev)

/* init for booting wait */
init_waitqueue_head(&sdev->boot_wait);
- sdev->boot_complete = false;

/* prepare DMA for code loader stream */
tag = cl_stream_prepare(sdev, 0x40, stripped_firmware.size,
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 5a5163eef2ef..3c4b604412f0 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -166,7 +166,7 @@ void hda_dsp_dump_skl(struct snd_sof_dev *sdev, u32 flags)
panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR,
HDA_ADSP_ERROR_CODE_SKL + 0x4);

- if (sdev->boot_complete) {
+ if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) {
hda_dsp_get_registers(sdev, &xoops, &panic_info, stack,
HDA_DSP_STACK_DUMP_SIZE);
snd_sof_get_status(sdev, status, panic, &xoops, &panic_info,
@@ -193,7 +193,7 @@ void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags)
HDA_DSP_SRAM_REG_FW_STATUS);
panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_SRAM_REG_FW_TRACEP);

- if (sdev->boot_complete) {
+ if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) {
hda_dsp_get_registers(sdev, &xoops, &panic_info, stack,
HDA_DSP_STACK_DUMP_SIZE);
snd_sof_get_status(sdev, status, panic, &xoops, &panic_info,
diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c
index 7b6d69783e16..8984d965037d 100644
--- a/sound/soc/sof/ipc.c
+++ b/sound/soc/sof/ipc.c
@@ -348,19 +348,12 @@ void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev)
break;
case SOF_IPC_FW_READY:
/* check for FW boot completion */
- if (!sdev->boot_complete) {
+ if (sdev->fw_state == SOF_FW_BOOT_IN_PROGRESS) {
err = sof_ops(sdev)->fw_ready(sdev, cmd);
- if (err < 0) {
- /*
- * this indicates a mismatch in ABI
- * between the driver and fw
- */
- dev_err(sdev->dev, "error: ABI mismatch %d\n",
- err);
- } else {
- /* firmware boot completed OK */
- sdev->boot_complete = true;
- }
+ if (err < 0)
+ sdev->fw_state = SOF_FW_BOOT_READY_FAILED;
+ else
+ sdev->fw_state = SOF_FW_BOOT_COMPLETE;

/* wake up firmware loader */
wake_up(&sdev->boot_wait);
diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c
index a041adf0669d..ce114df5e4fc 100644
--- a/sound/soc/sof/loader.c
+++ b/sound/soc/sof/loader.c
@@ -511,7 +511,6 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev)
int init_core_mask;

init_waitqueue_head(&sdev->boot_wait);
- sdev->boot_complete = false;

/* create read-only fw_version debugfs to store boot version info */
if (sdev->first_boot) {
@@ -543,19 +542,27 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev)

init_core_mask = ret;

- /* now wait for the DSP to boot */
- ret = wait_event_timeout(sdev->boot_wait, sdev->boot_complete,
+ /*
+ * now wait for the DSP to boot. There are 3 possible outcomes:
+ * 1. Boot wait times out indicating FW boot failure.
+ * 2. FW boots successfully and fw_ready op succeeds.
+ * 3. FW boots but fw_ready op fails.
+ */
+ ret = wait_event_timeout(sdev->boot_wait,
+ sdev->fw_state > SOF_FW_BOOT_IN_PROGRESS,
msecs_to_jiffies(sdev->boot_timeout));
if (ret == 0) {
dev_err(sdev->dev, "error: firmware boot failure\n");
snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX |
SOF_DBG_TEXT | SOF_DBG_PCI);
- /* after this point FW_READY msg should be ignored */
- sdev->boot_complete = true;
+ sdev->fw_state = SOF_FW_BOOT_FAILED;
return -EIO;
}

- dev_info(sdev->dev, "firmware boot complete\n");
+ if (sdev->fw_state == SOF_FW_BOOT_COMPLETE)
+ dev_info(sdev->dev, "firmware boot complete\n");
+ else
+ return -EIO; /* FW boots but fw_ready op failed */

/* perform post fw run operations */
ret = snd_sof_dsp_post_fw_run(sdev);
diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
index e23beaeefe00..195af259e78e 100644
--- a/sound/soc/sof/pm.c
+++ b/sound/soc/sof/pm.c
@@ -269,6 +269,10 @@ static int sof_resume(struct device *dev, bool runtime_resume)
if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume)
return 0;

+ /* DSP was never successfully started, nothing to resume */
+ if (sdev->first_boot)
+ return 0;
+
/*
* if the runtime_resume flag is set, call the runtime_resume routine
* or else call the system resume routine
@@ -283,6 +287,8 @@ static int sof_resume(struct device *dev, bool runtime_resume)
return ret;
}

+ sdev->fw_state = SOF_FW_BOOT_PREPARE;
+
/* load the firmware */
ret = snd_sof_load_firmware(sdev);
if (ret < 0) {
@@ -292,7 +298,12 @@ static int sof_resume(struct device *dev, bool runtime_resume)
return ret;
}

- /* boot the firmware */
+ sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS;
+
+ /*
+ * Boot the firmware. The FW boot status will be modified
+ * in snd_sof_run_firmware() depending on the outcome.
+ */
ret = snd_sof_run_firmware(sdev);
if (ret < 0) {
dev_err(sdev->dev,
@@ -338,6 +349,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend)
if (!sof_ops(sdev)->suspend)
return 0;

+ if (sdev->fw_state != SOF_FW_BOOT_COMPLETE)
+ goto power_down;
+
/* release trace */
snd_sof_release_trace(sdev);

@@ -375,6 +389,12 @@ static int sof_suspend(struct device *dev, bool runtime_suspend)
ret);
}

+power_down:
+
+ /* return if the DSP was not probed successfully */
+ if (sdev->fw_state == SOF_FW_BOOT_NOT_STARTED)
+ return 0;
+
/* power down all DSP cores */
if (runtime_suspend)
ret = snd_sof_dsp_runtime_suspend(sdev);
@@ -385,6 +405,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend)
"error: failed to power down DSP during suspend %d\n",
ret);

+ /* reset FW state */
+ sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
+
return ret;
}

diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h
index 730f3259dd02..7b329bd99674 100644
--- a/sound/soc/sof/sof-priv.h
+++ b/sound/soc/sof/sof-priv.h
@@ -356,6 +356,15 @@ struct snd_sof_dai {
struct list_head list; /* list in sdev dai list */
};

+enum snd_sof_fw_state {
+ SOF_FW_BOOT_NOT_STARTED = 0,
+ SOF_FW_BOOT_PREPARE,
+ SOF_FW_BOOT_IN_PROGRESS,
+ SOF_FW_BOOT_FAILED,
+ SOF_FW_BOOT_READY_FAILED, /* firmware booted but fw_ready op failed */
+ SOF_FW_BOOT_COMPLETE,
+};
+
/*
* SOF Device Level.
*/
@@ -372,7 +381,7 @@ struct snd_sof_dev {

/* DSP firmware boot */
wait_queue_head_t boot_wait;
- u32 boot_complete;
+ enum snd_sof_fw_state fw_state;
u32 first_boot;

/* work queue in case the probe is implemented in two steps */
diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index 94b903d95afa..74c00c905d24 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -558,11 +558,11 @@ static const struct scarlett2_config

/* proprietary request/response format */
struct scarlett2_usb_packet {
- u32 cmd;
- u16 size;
- u16 seq;
- u32 error;
- u32 pad;
+ __le32 cmd;
+ __le16 size;
+ __le16 seq;
+ __le32 error;
+ __le32 pad;
u8 data[];
};

@@ -664,11 +664,11 @@ static int scarlett2_usb(
"Scarlett Gen 2 USB invalid response; "
"cmd tx/rx %d/%d seq %d/%d size %d/%d "
"error %d pad %d\n",
- le16_to_cpu(req->cmd), le16_to_cpu(resp->cmd),
+ le32_to_cpu(req->cmd), le32_to_cpu(resp->cmd),
le16_to_cpu(req->seq), le16_to_cpu(resp->seq),
resp_size, le16_to_cpu(resp->size),
- le16_to_cpu(resp->error),
- le16_to_cpu(resp->pad));
+ le32_to_cpu(resp->error),
+ le32_to_cpu(resp->pad));
err = -EINVAL;
goto unlock;
}
@@ -687,7 +687,7 @@ static int scarlett2_usb(
/* Send SCARLETT2_USB_DATA_CMD SCARLETT2_USB_CONFIG_SAVE */
static void scarlett2_config_save(struct usb_mixer_interface *mixer)
{
- u32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE);
+ __le32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE);

scarlett2_usb(mixer, SCARLETT2_USB_DATA_CMD,
&req, sizeof(u32),
@@ -713,11 +713,11 @@ static int scarlett2_usb_set_config(
const struct scarlett2_config config_item =
scarlett2_config_items[config_item_num];
struct {
- u32 offset;
- u32 bytes;
- s32 value;
+ __le32 offset;
+ __le32 bytes;
+ __le32 value;
} __packed req;
- u32 req2;
+ __le32 req2;
int err;
struct scarlett2_mixer_data *private = mixer->private_data;

@@ -753,8 +753,8 @@ static int scarlett2_usb_get(
int offset, void *buf, int size)
{
struct {
- u32 offset;
- u32 size;
+ __le32 offset;
+ __le32 size;
} __packed req;

req.offset = cpu_to_le32(offset);
@@ -794,8 +794,8 @@ static int scarlett2_usb_set_mix(struct usb_mixer_interface *mixer,
const struct scarlett2_device_info *info = private->info;

struct {
- u16 mix_num;
- u16 data[SCARLETT2_INPUT_MIX_MAX];
+ __le16 mix_num;
+ __le16 data[SCARLETT2_INPUT_MIX_MAX];
} __packed req;

int i, j;
@@ -850,9 +850,9 @@ static int scarlett2_usb_set_mux(struct usb_mixer_interface *mixer)
};

struct {
- u16 pad;
- u16 num;
- u32 data[SCARLETT2_MUX_MAX];
+ __le16 pad;
+ __le16 num;
+ __le32 data[SCARLETT2_MUX_MAX];
} __packed req;

req.pad = 0;
@@ -911,9 +911,9 @@ static int scarlett2_usb_get_meter_levels(struct usb_mixer_interface *mixer,
u16 *levels)
{
struct {
- u16 pad;
- u16 num_meters;
- u32 magic;
+ __le16 pad;
+ __le16 num_meters;
+ __le32 magic;
} __packed req;
u32 resp[SCARLETT2_NUM_METERS];
int i, err;
diff --git a/sound/usb/validate.c b/sound/usb/validate.c
index 389e8657434a..5a3c4f7882b0 100644
--- a/sound/usb/validate.c
+++ b/sound/usb/validate.c
@@ -110,7 +110,7 @@ static bool validate_processing_unit(const void *p,
default:
if (v->type == UAC1_EXTENSION_UNIT)
return true; /* OK */
- switch (d->wProcessType) {
+ switch (le16_to_cpu(d->wProcessType)) {
case UAC_PROCESS_UP_DOWNMIX:
case UAC_PROCESS_DOLBY_PROLOGIC:
if (d->bLength < len + 1) /* bNrModes */
@@ -125,7 +125,7 @@ static bool validate_processing_unit(const void *p,
case UAC_VERSION_2:
if (v->type == UAC2_EXTENSION_UNIT_V2)
return true; /* OK */
- switch (d->wProcessType) {
+ switch (le16_to_cpu(d->wProcessType)) {
case UAC2_PROCESS_UP_DOWNMIX:
case UAC2_PROCESS_DOLBY_PROLOCIC: /* SiC! */
if (d->bLength < len + 1) /* bNrModes */
@@ -142,7 +142,7 @@ static bool validate_processing_unit(const void *p,
len += 2; /* wClusterDescrID */
break;
}
- switch (d->wProcessType) {
+ switch (le16_to_cpu(d->wProcessType)) {
case UAC3_PROCESS_UP_DOWNMIX:
if (d->bLength < len + 1) /* bNrModes */
return false;
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index ad1b9e646c49..4cf93110c259 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -270,6 +270,7 @@ class ArchX86(Arch):
def __init__(self, exit_reasons):
self.sc_perf_evt_open = 298
self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reason_field = 'exit_reason'
self.exit_reasons = exit_reasons

def debugfs_is_child(self, field):
@@ -289,6 +290,7 @@ class ArchPPC(Arch):
# numbers depend on the wordsize.
char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
+ self.exit_reason_field = 'exit_nr'
self.exit_reasons = {}

def debugfs_is_child(self, field):
@@ -300,6 +302,7 @@ class ArchA64(Arch):
def __init__(self):
self.sc_perf_evt_open = 241
self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reason_field = 'esr_ec'
self.exit_reasons = AARCH64_EXIT_REASONS

def debugfs_is_child(self, field):
@@ -311,6 +314,7 @@ class ArchS390(Arch):
def __init__(self):
self.sc_perf_evt_open = 331
self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reason_field = None
self.exit_reasons = None

def debugfs_is_child(self, field):
@@ -541,8 +545,8 @@ class TracepointProvider(Provider):
"""
filters = {}
filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
- if ARCH.exit_reasons:
- filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
+ if ARCH.exit_reason_field and ARCH.exit_reasons:
+ filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
return filters

def _get_available_fields(self):
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d98838c5820c..b6403712c2f4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2541,7 +2541,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
if (strncmp(local_name, targ_name, local_essent_len) == 0) {
pr_debug("[%d] %s: found candidate [%d] %s\n",
local_type_id, local_name, i, targ_name);
- new_ids = realloc(cand_ids->data, cand_ids->len + 1);
+ new_ids = reallocarray(cand_ids->data,
+ cand_ids->len + 1,
+ sizeof(*cand_ids->data));
if (!new_ids) {
err = -ENOMEM;
goto err_out;
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 0a832e265a50..c3ae1e8ae119 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -47,5 +47,3 @@ check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[
check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"'
-
-cd -
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index 2f55d4d23446..6e04304560ca 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -332,21 +332,74 @@ void cpufreq_put_available_governors(struct cpufreq_available_governors *any)
}


-struct cpufreq_frequencies
-*cpufreq_get_frequencies(const char *type, unsigned int cpu)
+struct cpufreq_available_frequencies
+*cpufreq_get_available_frequencies(unsigned int cpu)
{
- struct cpufreq_frequencies *first = NULL;
- struct cpufreq_frequencies *current = NULL;
+ struct cpufreq_available_frequencies *first = NULL;
+ struct cpufreq_available_frequencies *current = NULL;
char one_value[SYSFS_PATH_MAX];
char linebuf[MAX_LINE_LEN];
- char fname[MAX_LINE_LEN];
unsigned int pos, i;
unsigned int len;

- snprintf(fname, MAX_LINE_LEN, "scaling_%s_frequencies", type);
+ len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;

- len = sysfs_cpufreq_read_file(cpu, fname,
- linebuf, sizeof(linebuf));
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+ if (i - pos < 2)
+ continue;
+ if (i - pos >= SYSFS_PATH_MAX)
+ goto error_out;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ memcpy(one_value, linebuf + pos, i - pos);
+ one_value[i - pos] = '\0';
+ if (sscanf(one_value, "%lu", &current->frequency) != 1)
+ goto error_out;
+
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ free(first);
+ first = current;
+ }
+ return NULL;
+}
+
+struct cpufreq_available_frequencies
+*cpufreq_get_boost_frequencies(unsigned int cpu)
+{
+ struct cpufreq_available_frequencies *first = NULL;
+ struct cpufreq_available_frequencies *current = NULL;
+ char one_value[SYSFS_PATH_MAX];
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, "scaling_boost_frequencies",
+ linebuf, sizeof(linebuf));
if (len == 0)
return NULL;

@@ -391,9 +444,9 @@ struct cpufreq_frequencies
return NULL;
}

-void cpufreq_put_frequencies(struct cpufreq_frequencies *any)
+void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies *any)
{
- struct cpufreq_frequencies *tmp, *next;
+ struct cpufreq_available_frequencies *tmp, *next;

if (!any)
return;
@@ -406,6 +459,11 @@ void cpufreq_put_frequencies(struct cpufreq_frequencies *any)
}
}

+void cpufreq_put_boost_frequencies(struct cpufreq_available_frequencies *any)
+{
+ cpufreq_put_available_frequencies(any);
+}
+
static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
const char *file)
{
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index a55f0d19215b..95f4fd9e2656 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -20,10 +20,10 @@ struct cpufreq_available_governors {
struct cpufreq_available_governors *first;
};

-struct cpufreq_frequencies {
+struct cpufreq_available_frequencies {
unsigned long frequency;
- struct cpufreq_frequencies *next;
- struct cpufreq_frequencies *first;
+ struct cpufreq_available_frequencies *next;
+ struct cpufreq_available_frequencies *first;
};


@@ -124,11 +124,17 @@ void cpufreq_put_available_governors(
* cpufreq_put_frequencies after use.
*/

-struct cpufreq_frequencies
-*cpufreq_get_frequencies(const char *type, unsigned int cpu);
+struct cpufreq_available_frequencies
+*cpufreq_get_available_frequencies(unsigned int cpu);

-void cpufreq_put_frequencies(
- struct cpufreq_frequencies *first);
+void cpufreq_put_available_frequencies(
+ struct cpufreq_available_frequencies *first);
+
+struct cpufreq_available_frequencies
+*cpufreq_get_boost_frequencies(unsigned int cpu);
+
+void cpufreq_put_boost_frequencies(
+ struct cpufreq_available_frequencies *first);


/* determine affected CPUs
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index e63cf55f81cf..6efc0f6b1b11 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -244,14 +244,14 @@ static int get_boost_mode_x86(unsigned int cpu)

static int get_boost_mode(unsigned int cpu)
{
- struct cpufreq_frequencies *freqs;
+ struct cpufreq_available_frequencies *freqs;

if (cpupower_cpu_info.vendor == X86_VENDOR_AMD ||
cpupower_cpu_info.vendor == X86_VENDOR_HYGON ||
cpupower_cpu_info.vendor == X86_VENDOR_INTEL)
return get_boost_mode_x86(cpu);

- freqs = cpufreq_get_frequencies("boost", cpu);
+ freqs = cpufreq_get_boost_frequencies(cpu);
if (freqs) {
printf(_(" boost frequency steps: "));
while (freqs->next) {
@@ -261,7 +261,7 @@ static int get_boost_mode(unsigned int cpu)
}
print_speed(freqs->frequency);
printf("\n");
- cpufreq_put_frequencies(freqs);
+ cpufreq_put_available_frequencies(freqs);
}

return 0;
@@ -475,7 +475,7 @@ static int get_latency(unsigned int cpu, unsigned int human)

static void debug_output_one(unsigned int cpu)
{
- struct cpufreq_frequencies *freqs;
+ struct cpufreq_available_frequencies *freqs;

get_driver(cpu);
get_related_cpus(cpu);
@@ -483,7 +483,7 @@ static void debug_output_one(unsigned int cpu)
get_latency(cpu, 1);
get_hardware_limits(cpu, 1);

- freqs = cpufreq_get_frequencies("available", cpu);
+ freqs = cpufreq_get_available_frequencies(cpu);
if (freqs) {
printf(_(" available frequency steps: "));
while (freqs->next) {
@@ -493,7 +493,7 @@ static void debug_output_one(unsigned int cpu)
}
print_speed(freqs->frequency);
printf("\n");
- cpufreq_put_frequencies(freqs);
+ cpufreq_put_available_frequencies(freqs);
}

get_available_governors(cpu);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 5ecc267d98b0..fad615c22e4d 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,7 +2,7 @@
#include <test_progs.h>

ssize_t get_base_addr() {
- size_t start;
+ size_t start, offset;
char buf[256];
FILE *f;

@@ -10,10 +10,11 @@ ssize_t get_base_addr() {
if (!f)
return -errno;

- while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) {
+ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+ &start, buf, &offset) == 3) {
if (strcmp(buf, "r-xp") == 0) {
fclose(f);
- return start;
+ return start - offset;
}
}

diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 3003fddc0613..cf6c87936c69 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -4,6 +4,7 @@
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
+#include "libbpf_internal.h"

static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
@@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)

void test_perf_buffer(void)
{
- int err, prog_fd, nr_cpus, i, duration = 0;
+ int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
const char *prog_name = "kprobe/sys_nanosleep";
const char *file = "./test_perf_buffer.o";
struct perf_buffer_opts pb_opts = {};
@@ -29,15 +30,27 @@ void test_perf_buffer(void)
struct bpf_object *obj;
struct perf_buffer *pb;
struct bpf_link *link;
+ bool *online;

nr_cpus = libbpf_num_possible_cpus();
if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
return;

+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
+ &online, &on_len);
+ if (CHECK(err, "nr_on_cpus", "err %d\n", err))
+ return;
+
+ for (i = 0; i < on_len; i++)
+ if (online[i])
+ nr_on_cpus++;
+
/* load program */
err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
- return;
+ if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out_close;
+ }

prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
@@ -64,6 +77,11 @@ void test_perf_buffer(void)
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
for (i = 0; i < nr_cpus; i++) {
+ if (i >= on_len || !online[i]) {
+ printf("skipping offline CPU #%d\n", i);
+ continue;
+ }
+
CPU_ZERO(&cpu_set);
CPU_SET(i, &cpu_set);

@@ -81,8 +99,8 @@ void test_perf_buffer(void)
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
goto out_free_pb;

- if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
- "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
+ if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt",
+ "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;

out_free_pb:
@@ -91,4 +109,5 @@ void test_perf_buffer(void)
bpf_link__destroy(link);
out_close:
bpf_object__close(obj);
+ free(online);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f62aa0eb959b..1735faf17536 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -49,8 +49,12 @@ void test_stacktrace_build_id_nmi(void)
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
- if (CHECK(pmu_fd < 0, "perf_event_open",
- "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+ if (pmu_fd < 0 && errno == ENOENT) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
pmu_fd, errno))
goto close_prog;

diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index ea7d84f01235..e6be383a003f 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
data_check.skb_ports[0] = th->source;
data_check.skb_ports[1] = th->dest;

+ if (th->fin)
+ /* The connection is being torn down at the end of a
+ * test. It can't contain a cmd, so return early.
+ */
+ return SK_PASS;
+
if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
GOTO_DONE(DROP_ERR_SKB_DATA);
if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 4a851513c842..779e11da979c 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -331,7 +331,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
FILE *file;
int i, fp;

- file = fopen(".sendpage_tst.tmp", "w+");
+ file = tmpfile();
if (!file) {
perror("create file for sendpage");
return 1;
@@ -340,13 +340,8 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
fwrite(&k, sizeof(char), 1, file);
fflush(file);
fseek(file, 0, SEEK_SET);
- fclose(file);

- fp = open(".sendpage_tst.tmp", O_RDONLY);
- if (fp < 0) {
- perror("reopen file for sendpage");
- return 1;
- }
+ fp = fileno(file);

clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
@@ -354,11 +349,11 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,

if (!drop && sent < 0) {
perror("send loop error");
- close(fp);
+ fclose(file);
return sent;
} else if (drop && sent >= 0) {
printf("sendpage loop error expected: %i\n", sent);
- close(fp);
+ fclose(file);
return -EIO;
}

@@ -366,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
s->bytes_sent += sent;
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
- close(fp);
+ fclose(file);
return 0;
}

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
index e98c36750fae..d34fe06268d2 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -54,7 +54,7 @@ class SubPlugin(TdcPlugin):
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
- env=ENVIR)
+ env=os.environ.copy())
(rawout, serr) = proc.communicate()

if proc.returncode != 0 and len(serr) > 0:
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index c4c57ba99e90..631d397ac81b 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -10,6 +10,7 @@
* Author: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx>
*/

+#include <linux/bits.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
@@ -28,25 +29,115 @@ static const u8 return_offsets[8][2] = {
[7] = { 4, 4 }, /* FIQ, unused */
};

+/*
+ * When an exception is taken, most CPSR fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]).
+ *
+ * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with
+ * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was
+ * obsoleted by the ARMv7 virtualization extensions and is RES0.
+ *
+ * For the SPSR layout seen from AArch32, see:
+ * - ARM DDI 0406C.d, page B1-1148
+ * - ARM DDI 0487E.a, page G8-6264
+ *
+ * For the SPSR_ELx layout for AArch32 seen from AArch64, see:
+ * - ARM DDI 0487E.a, page C5-426
+ *
+ * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode)
+{
+ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_AA32_N_BIT);
+ new |= (old & PSR_AA32_Z_BIT);
+ new |= (old & PSR_AA32_C_BIT);
+ new |= (old & PSR_AA32_V_BIT);
+ new |= (old & PSR_AA32_Q_BIT);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See ARM DDI 0487E.a, section G1.12.3
+ // See ARM DDI 0406C.d, section B1.8.3
+
+ new |= (old & PSR_AA32_DIT_BIT);
+
+ // CPSR.SSBS is set to SCTLR.DSSBS upon any exception
+ // See ARM DDI 0487E.a, page G8-6244
+ if (sctlr & BIT(31))
+ new |= PSR_AA32_SSBS_BIT;
+
+ // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0
+ // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page G8-6246
+ new |= (old & PSR_AA32_PAN_BIT);
+ if (!(sctlr & BIT(23)))
+ new |= PSR_AA32_PAN_BIT;
+
+ // SS does not exist in AArch32, so ignore
+
+ // CPSR.IL is set to zero upon any exception
+ // See ARM DDI 0487E.a, page G1-5527
+
+ new |= (old & PSR_AA32_GE_MASK);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See prior comment above
+
+ // CPSR.E is set to SCTLR.EE upon any exception
+ // See ARM DDI 0487E.a, page G8-6245
+ // See ARM DDI 0406C.d, page B4-1701
+ if (sctlr & BIT(25))
+ new |= PSR_AA32_E_BIT;
+
+ // CPSR.A is unchanged upon an exception to Undefined, Supervisor
+ // CPSR.A is set upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_A_BIT);
+ if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC)
+ new |= PSR_AA32_A_BIT;
+
+ // CPSR.I is set upon any exception
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= PSR_AA32_I_BIT;
+
+ // CPSR.F is set upon an exception to FIQ
+ // CPSR.F is unchanged upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_F_BIT);
+ if (mode == PSR_AA32_MODE_FIQ)
+ new |= PSR_AA32_F_BIT;
+
+ // CPSR.T is set to SCTLR.TE upon any exception
+ // See ARM DDI 0487E.a, page G8-5514
+ // See ARM DDI 0406C.d, page B1-1181
+ if (sctlr & BIT(30))
+ new |= PSR_AA32_T_BIT;
+
+ new |= mode;
+
+ return new;
+}
+
static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
{
- unsigned long cpsr;
- unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
- bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT);
+ unsigned long spsr = *vcpu_cpsr(vcpu);
+ bool is_thumb = (spsr & PSR_AA32_T_BIT);
u32 return_offset = return_offsets[vect_offset >> 2][is_thumb];
u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);

- cpsr = mode | PSR_AA32_I_BIT;
-
- if (sctlr & (1 << 30))
- cpsr |= PSR_AA32_T_BIT;
- if (sctlr & (1 << 25))
- cpsr |= PSR_AA32_E_BIT;
-
- *vcpu_cpsr(vcpu) = cpsr;
+ *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode);

/* Note: These now point to the banked copies */
- vcpu_write_spsr(vcpu, new_spsr_value);
+ vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr));
*vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;

/* Branch to exception vector */
@@ -84,7 +175,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
fsr = &vcpu_cp15(vcpu, c5_DFSR);
}

- prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset);
+ prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset);

*far = addr;

diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index 6af5c91337f2..f274fabb4301 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -105,6 +105,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
data = (data ^ mask) - mask;
}

+ if (!vcpu->arch.mmio_decode.sixty_four)
+ data = data & 0xffffffff;
+
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
&data);
data = vcpu_data_host_to_guest(vcpu, data, len);
@@ -125,6 +128,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
unsigned long rt;
int access_size;
bool sign_extend;
+ bool sixty_four;

if (kvm_vcpu_dabt_iss1tw(vcpu)) {
/* page table accesses IO mem: tell guest to fix its TTBR */
@@ -138,11 +142,13 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)

*is_write = kvm_vcpu_dabt_iswrite(vcpu);
sign_extend = kvm_vcpu_dabt_issext(vcpu);
+ sixty_four = kvm_vcpu_dabt_issf(vcpu);
rt = kvm_vcpu_dabt_get_rd(vcpu);

*len = access_size;
vcpu->arch.mmio_decode.sign_extend = sign_extend;
vcpu->arch.mmio_decode.rt = rt;
+ vcpu->arch.mmio_decode.sixty_four = sixty_four;

return 0;
}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 35305d6e68cc..d8ef708a2ef6 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -64,7 +64,7 @@ static void async_pf_execute(struct work_struct *work)
struct mm_struct *mm = apf->mm;
struct kvm_vcpu *vcpu = apf->vcpu;
unsigned long addr = apf->addr;
- gva_t gva = apf->gva;
+ gpa_t cr2_or_gpa = apf->cr2_or_gpa;
int locked = 1;

might_sleep();
@@ -92,7 +92,7 @@ static void async_pf_execute(struct work_struct *work)
* this point
*/

- trace_kvm_async_pf_completed(addr, gva);
+ trace_kvm_async_pf_completed(addr, cr2_or_gpa);

if (swq_has_sleeper(&vcpu->wq))
swake_up_one(&vcpu->wq);
@@ -165,8 +165,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
}

-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
- struct kvm_arch_async_pf *arch)
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ unsigned long hva, struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;

@@ -185,7 +185,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,

work->wakeup_all = false;
work->vcpu = vcpu;
- work->gva = gva;
+ work->cr2_or_gpa = cr2_or_gpa;
work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13efc291b1c7..b5ea1bafe513 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1394,14 +1394,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);

-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct vm_area_struct *vma;
unsigned long addr, size;

size = PAGE_SIZE;

- addr = gfn_to_hva(kvm, gfn);
+ addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL);
if (kvm_is_error_hva(addr))
return PAGE_SIZE;

@@ -1809,26 +1809,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_page);

-static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
- struct kvm_host_map *map)
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+{
+ if (pfn == 0)
+ return;
+
+ if (cache)
+ cache->pfn = cache->gfn = 0;
+
+ if (dirty)
+ kvm_release_pfn_dirty(pfn);
+ else
+ kvm_release_pfn_clean(pfn);
+}
+
+static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct gfn_to_pfn_cache *cache, u64 gen)
+{
+ kvm_release_pfn(cache->pfn, cache->dirty, cache);
+
+ cache->pfn = gfn_to_pfn_memslot(slot, gfn);
+ cache->gfn = gfn;
+ cache->dirty = false;
+ cache->generation = gen;
+}
+
+static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
+ struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache,
+ bool atomic)
{
kvm_pfn_t pfn;
void *hva = NULL;
struct page *page = KVM_UNMAPPED_PAGE;
+ struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
+ u64 gen = slots->generation;

if (!map)
return -EINVAL;

- pfn = gfn_to_pfn_memslot(slot, gfn);
+ if (cache) {
+ if (!cache->pfn || cache->gfn != gfn ||
+ cache->generation != gen) {
+ if (atomic)
+ return -EAGAIN;
+ kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
+ }
+ pfn = cache->pfn;
+ } else {
+ if (atomic)
+ return -EAGAIN;
+ pfn = gfn_to_pfn_memslot(slot, gfn);
+ }
if (is_error_noslot_pfn(pfn))
return -EINVAL;

if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
- hva = kmap(page);
+ if (atomic)
+ hva = kmap_atomic(page);
+ else
+ hva = kmap(page);
#ifdef CONFIG_HAS_IOMEM
- } else {
+ } else if (!atomic) {
hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+ } else {
+ return -EINVAL;
#endif
}

@@ -1843,14 +1889,25 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
return 0;
}

+int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool atomic)
+{
+ return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
+ cache, atomic);
+}
+EXPORT_SYMBOL_GPL(kvm_map_gfn);
+
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
{
- return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map);
+ return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
+ NULL, false);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_map);

-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
- bool dirty)
+static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+ struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache,
+ bool dirty, bool atomic)
{
if (!map)
return;
@@ -1858,23 +1915,45 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
if (!map->hva)
return;

- if (map->page != KVM_UNMAPPED_PAGE)
- kunmap(map->page);
+ if (map->page != KVM_UNMAPPED_PAGE) {
+ if (atomic)
+ kunmap_atomic(map->hva);
+ else
+ kunmap(map->page);
+ }
#ifdef CONFIG_HAS_IOMEM
- else
+ else if (!atomic)
memunmap(map->hva);
+ else
+ WARN_ONCE(1, "Unexpected unmapping in atomic context");
#endif

- if (dirty) {
- kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
- kvm_release_pfn_dirty(map->pfn);
- } else {
- kvm_release_pfn_clean(map->pfn);
- }
+ if (dirty)
+ mark_page_dirty_in_slot(memslot, map->gfn);
+
+ if (cache)
+ cache->dirty |= dirty;
+ else
+ kvm_release_pfn(map->pfn, dirty, NULL);

map->hva = NULL;
map->page = NULL;
}
+
+int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
+{
+ __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
+ cache, dirty, atomic);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
+
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
+{
+ __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
+ dirty, false);
+}
EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);

struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)