Re: Linux 5.9.3

From: Greg Kroah-Hartman
Date: Sun Nov 01 2020 - 07:32:20 EST


diff --git a/Makefile b/Makefile
index 53e7f4ee2557..50e927f34853 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 9
-SUBLEVEL = 2
+SUBLEVEL = 3
EXTRAVERSION =
NAME = Kleptomaniac Octopus

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 130569f90c54..3904f9ea1938 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -10,14 +10,14 @@
#
# Copyright (C) 1995-2001 by Russell King

-LDFLAGS_vmlinux :=--no-undefined -X
+LDFLAGS_vmlinux :=--no-undefined -X -z norelro
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)

ifeq ($(CONFIG_RELOCATABLE), y)
# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
# for relative relocs, since this leads to better Image compression
# with the relocation offsets always being zero.
-LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \
+LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \
$(call ld-option, --no-apply-dynamic-relocs)
endif

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index fe3a7695a420..966672b2213e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -457,6 +457,12 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
return required;
}

+static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap)
+{
+ if (ssbd_state != ARM64_SSBD_FORCE_DISABLE)
+ cap->matches(cap, SCOPE_LOCAL_CPU);
+}
+
/* known invulnerable cores */
static const struct midr_range arm64_ssb_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
@@ -599,6 +605,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
return (need_wa > 0);
}

+static void
+cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap)
+{
+ cap->matches(cap, SCOPE_LOCAL_CPU);
+}
+
static const __maybe_unused struct midr_range tx2_family_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
@@ -890,9 +902,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
#endif
{
+ .desc = "Branch predictor hardening",
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = check_branch_predictor,
+ .cpu_enable = cpu_enable_branch_predictor_hardening,
},
#ifdef CONFIG_RANDOMIZE_BASE
{
@@ -906,6 +920,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_SSBD,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = has_ssbd_mitigation,
+ .cpu_enable = cpu_enable_ssbd_mitigation,
.midr_range_list = arm64_ssb_cpus,
},
#ifdef CONFIG_ARM64_ERRATUM_1418040
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 997da0221780..2b15b4870565 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,7 +135,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
- select ARCH_HAS_UACCESS_MCSAFE if PPC64
+ select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 283552cd0e58..2aa0e31e6884 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
-#define __HAVE_ARCH_MEMCPY_MCSAFE

-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 00699903f1ef..20a35373cafc 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -435,6 +435,32 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);

+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_generic(void *to, const void *from, unsigned long size);
+
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
+{
+ return copy_mc_generic(to, from, size);
+}
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+static inline unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (likely(check_copy_size(from, n, true))) {
+ if (access_ok(to, n)) {
+ allow_write_to_user(to, n);
+ n = copy_mc_generic((void *)to, from, n);
+ prevent_write_to_user(to, n);
+ }
+ }
+
+ return n;
+}
+#endif
+
#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
@@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
return ret;
}

-static __always_inline unsigned long __must_check
-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
-{
- if (likely(check_copy_size(from, n, true))) {
- if (access_ok(to, n)) {
- allow_write_to_user(to, n);
- n = memcpy_mcsafe((void *)to, from, n);
- prevent_write_to_user(to, n);
- }
- }
-
- return n;
-}
-
unsigned long __arch_clear_user(void __user *addr, unsigned long size);

static inline unsigned long clear_user(void __user *addr, unsigned long size)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d66a645503eb..69a91b571845 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o

obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- memcpy_64.o memcpy_mcsafe_64.o
+ memcpy_64.o copy_mc_64.o

ifndef CONFIG_PPC_QUEUED_SPINLOCKS
obj64-$(CONFIG_SMP) += locks.o
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
new file mode 100644
index 000000000000..88d46c471493
--- /dev/null
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@xxxxxxxxxx>
+ * Author - Balbir Singh <bsingharora@xxxxxxxxx>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+ .macro err3
+300: EX_TABLE(300b,.Ldone)
+ .endm
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ /* Do a byte by byte copy to get the exact remaining size */
+ mtctr r7
+46:
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 46b
+ li r3,0
+ blr
+
+.Ldone:
+ mfctr r3
+ blr
+
+
+_GLOBAL(copy_mc_generic)
+ mr r7,r5
+ cmpldi r5,16
+ blt .Lshort_copy
+
+.Lcopy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+ subi r7,r7,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ blt 5f
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+err2; ld r15,64(r4)
+err2; ld r16,72(r4)
+err2; ld r17,80(r4)
+err2; ld r18,88(r4)
+err2; ld r19,96(r4)
+err2; ld r20,104(r4)
+err2; ld r21,112(r4)
+err2; ld r22,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+err2; std r15,64(r3)
+err2; std r16,72(r3)
+err2; std r17,80(r3)
+err2; std r18,88(r3)
+err2; std r19,96(r3)
+err2; std r20,104(r3)
+err2; std r21,112(r3)
+err2; std r22,120(r3)
+ addi r3,r3,128
+ subi r7,r7,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+ addi r4,r4,64
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+ addi r3,r3,64
+ subi r7,r7,64
+
+7: ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 63B to go */
+ bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r8,16(r4)
+err1; ld r9,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r8,16(r3)
+err1; std r9,24(r3)
+ addi r3,r3,32
+ subi r7,r7,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+ subi r7,r7,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+ subi r7,r7,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+EXPORT_SYMBOL_GPL(copy_mc_generic);
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
deleted file mode 100644
index cb882d9a6d8a..000000000000
--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1,242 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) IBM Corporation, 2011
- * Derived from copyuser_power7.s by Anton Blanchard <anton@xxxxxxxxxx>
- * Author - Balbir Singh <bsingharora@xxxxxxxxx>
- */
-#include <asm/ppc_asm.h>
-#include <asm/errno.h>
-#include <asm/export.h>
-
- .macro err1
-100:
- EX_TABLE(100b,.Ldo_err1)
- .endm
-
- .macro err2
-200:
- EX_TABLE(200b,.Ldo_err2)
- .endm
-
- .macro err3
-300: EX_TABLE(300b,.Ldone)
- .endm
-
-.Ldo_err2:
- ld r22,STK_REG(R22)(r1)
- ld r21,STK_REG(R21)(r1)
- ld r20,STK_REG(R20)(r1)
- ld r19,STK_REG(R19)(r1)
- ld r18,STK_REG(R18)(r1)
- ld r17,STK_REG(R17)(r1)
- ld r16,STK_REG(R16)(r1)
- ld r15,STK_REG(R15)(r1)
- ld r14,STK_REG(R14)(r1)
- addi r1,r1,STACKFRAMESIZE
-.Ldo_err1:
- /* Do a byte by byte copy to get the exact remaining size */
- mtctr r7
-46:
-err3; lbz r0,0(r4)
- addi r4,r4,1
-err3; stb r0,0(r3)
- addi r3,r3,1
- bdnz 46b
- li r3,0
- blr
-
-.Ldone:
- mfctr r3
- blr
-
-
-_GLOBAL(memcpy_mcsafe)
- mr r7,r5
- cmpldi r5,16
- blt .Lshort_copy
-
-.Lcopy:
- /* Get the source 8B aligned */
- neg r6,r4
- mtocrf 0x01,r6
- clrldi r6,r6,(64-3)
-
- bf cr7*4+3,1f
-err1; lbz r0,0(r4)
- addi r4,r4,1
-err1; stb r0,0(r3)
- addi r3,r3,1
- subi r7,r7,1
-
-1: bf cr7*4+2,2f
-err1; lhz r0,0(r4)
- addi r4,r4,2
-err1; sth r0,0(r3)
- addi r3,r3,2
- subi r7,r7,2
-
-2: bf cr7*4+1,3f
-err1; lwz r0,0(r4)
- addi r4,r4,4
-err1; stw r0,0(r3)
- addi r3,r3,4
- subi r7,r7,4
-
-3: sub r5,r5,r6
- cmpldi r5,128
-
- mflr r0
- stdu r1,-STACKFRAMESIZE(r1)
- std r14,STK_REG(R14)(r1)
- std r15,STK_REG(R15)(r1)
- std r16,STK_REG(R16)(r1)
- std r17,STK_REG(R17)(r1)
- std r18,STK_REG(R18)(r1)
- std r19,STK_REG(R19)(r1)
- std r20,STK_REG(R20)(r1)
- std r21,STK_REG(R21)(r1)
- std r22,STK_REG(R22)(r1)
- std r0,STACKFRAMESIZE+16(r1)
-
- blt 5f
- srdi r6,r5,7
- mtctr r6
-
- /* Now do cacheline (128B) sized loads and stores. */
- .align 5
-4:
-err2; ld r0,0(r4)
-err2; ld r6,8(r4)
-err2; ld r8,16(r4)
-err2; ld r9,24(r4)
-err2; ld r10,32(r4)
-err2; ld r11,40(r4)
-err2; ld r12,48(r4)
-err2; ld r14,56(r4)
-err2; ld r15,64(r4)
-err2; ld r16,72(r4)
-err2; ld r17,80(r4)
-err2; ld r18,88(r4)
-err2; ld r19,96(r4)
-err2; ld r20,104(r4)
-err2; ld r21,112(r4)
-err2; ld r22,120(r4)
- addi r4,r4,128
-err2; std r0,0(r3)
-err2; std r6,8(r3)
-err2; std r8,16(r3)
-err2; std r9,24(r3)
-err2; std r10,32(r3)
-err2; std r11,40(r3)
-err2; std r12,48(r3)
-err2; std r14,56(r3)
-err2; std r15,64(r3)
-err2; std r16,72(r3)
-err2; std r17,80(r3)
-err2; std r18,88(r3)
-err2; std r19,96(r3)
-err2; std r20,104(r3)
-err2; std r21,112(r3)
-err2; std r22,120(r3)
- addi r3,r3,128
- subi r7,r7,128
- bdnz 4b
-
- clrldi r5,r5,(64-7)
-
- /* Up to 127B to go */
-5: srdi r6,r5,4
- mtocrf 0x01,r6
-
-6: bf cr7*4+1,7f
-err2; ld r0,0(r4)
-err2; ld r6,8(r4)
-err2; ld r8,16(r4)
-err2; ld r9,24(r4)
-err2; ld r10,32(r4)
-err2; ld r11,40(r4)
-err2; ld r12,48(r4)
-err2; ld r14,56(r4)
- addi r4,r4,64
-err2; std r0,0(r3)
-err2; std r6,8(r3)
-err2; std r8,16(r3)
-err2; std r9,24(r3)
-err2; std r10,32(r3)
-err2; std r11,40(r3)
-err2; std r12,48(r3)
-err2; std r14,56(r3)
- addi r3,r3,64
- subi r7,r7,64
-
-7: ld r14,STK_REG(R14)(r1)
- ld r15,STK_REG(R15)(r1)
- ld r16,STK_REG(R16)(r1)
- ld r17,STK_REG(R17)(r1)
- ld r18,STK_REG(R18)(r1)
- ld r19,STK_REG(R19)(r1)
- ld r20,STK_REG(R20)(r1)
- ld r21,STK_REG(R21)(r1)
- ld r22,STK_REG(R22)(r1)
- addi r1,r1,STACKFRAMESIZE
-
- /* Up to 63B to go */
- bf cr7*4+2,8f
-err1; ld r0,0(r4)
-err1; ld r6,8(r4)
-err1; ld r8,16(r4)
-err1; ld r9,24(r4)
- addi r4,r4,32
-err1; std r0,0(r3)
-err1; std r6,8(r3)
-err1; std r8,16(r3)
-err1; std r9,24(r3)
- addi r3,r3,32
- subi r7,r7,32
-
- /* Up to 31B to go */
-8: bf cr7*4+3,9f
-err1; ld r0,0(r4)
-err1; ld r6,8(r4)
- addi r4,r4,16
-err1; std r0,0(r3)
-err1; std r6,8(r3)
- addi r3,r3,16
- subi r7,r7,16
-
-9: clrldi r5,r5,(64-4)
-
- /* Up to 15B to go */
-.Lshort_copy:
- mtocrf 0x01,r5
- bf cr7*4+0,12f
-err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
-err1; lwz r6,4(r4)
- addi r4,r4,8
-err1; stw r0,0(r3)
-err1; stw r6,4(r3)
- addi r3,r3,8
- subi r7,r7,8
-
-12: bf cr7*4+1,13f
-err1; lwz r0,0(r4)
- addi r4,r4,4
-err1; stw r0,0(r3)
- addi r3,r3,4
- subi r7,r7,4
-
-13: bf cr7*4+2,14f
-err1; lhz r0,0(r4)
- addi r4,r4,2
-err1; sth r0,0(r3)
- addi r3,r3,2
- subi r7,r7,2
-
-14: bf cr7*4+3,15f
-err1; lbz r0,0(r4)
-err1; stb r0,0(r3)
-
-15: li r3,0
- blr
-
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7101ac64bb20..e876b3a087f9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -75,7 +75,7 @@ config X86
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
- select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
+ select ARCH_HAS_COPY_MC if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ee1d3c5834c6..27b5e2bc6a01 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.

-config MCSAFE_TEST
+config COPY_MC_TEST
def_bool n

config EFI_PGT_DUMP
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 26c36357c4c9..a023cbe21230 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -89,6 +89,7 @@ struct perf_ibs {
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
+ unsigned int fetch_count_reset_broken : 1;
struct cpu_perf_ibs __percpu *pcpu;

struct attribute **format_attrs;
@@ -363,7 +364,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+ u64 tmp = hwc->config | config;
+
+ if (perf_ibs->fetch_count_reset_broken)
+ wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
+
+ wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
}

/*
@@ -733,6 +739,13 @@ static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;

+ /*
+ * Some chips fail to reset the fetch count when it is written; instead
+ * they need a 0-1 transition of IbsFetchEn.
+ */
+ if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
+ perf_ibs_fetch.fetch_count_reset_broken = 1;
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");

if (ibs_caps & IBS_CAPS_OPCNT) {
diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
new file mode 100644
index 000000000000..e4991ba96726
--- /dev/null
+++ b/arch/x86/include/asm/copy_mc_test.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _COPY_MC_TEST_H_
+#define _COPY_MC_TEST_H_
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_COPY_MC_TEST
+extern unsigned long copy_mc_test_src;
+extern unsigned long copy_mc_test_dst;
+
+static inline void copy_mc_inject_src(void *addr)
+{
+ if (addr)
+ copy_mc_test_src = (unsigned long) addr;
+ else
+ copy_mc_test_src = ~0UL;
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+ if (addr)
+ copy_mc_test_dst = (unsigned long) addr;
+ else
+ copy_mc_test_dst = ~0UL;
+}
+#else /* CONFIG_COPY_MC_TEST */
+static inline void copy_mc_inject_src(void *addr)
+{
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+}
+#endif /* CONFIG_COPY_MC_TEST */
+
+#else /* __ASSEMBLY__ */
+#include <asm/export.h>
+
+#ifdef CONFIG_COPY_MC_TEST
+.macro COPY_MC_TEST_CTL
+ .pushsection .data
+ .align 8
+ .globl copy_mc_test_src
+ copy_mc_test_src:
+ .quad 0
+ EXPORT_SYMBOL_GPL(copy_mc_test_src)
+ .globl copy_mc_test_dst
+ copy_mc_test_dst:
+ .quad 0
+ EXPORT_SYMBOL_GPL(copy_mc_test_dst)
+ .popsection
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+ leaq \count(\reg), %r9
+ cmp copy_mc_test_src, %r9
+ ja \target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+ leaq \count(\reg), %r9
+ cmp copy_mc_test_dst, %r9
+ ja \target
+.endm
+#else
+.macro COPY_MC_TEST_CTL
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+.endm
+#endif /* CONFIG_COPY_MC_TEST */
+#endif /* __ASSEMBLY__ */
+#endif /* _COPY_MC_TEST_H_ */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cf503824529c..9b9112e4379a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);

extern int mce_p5_enabled;

+#ifdef CONFIG_ARCH_HAS_COPY_MC
+extern void enable_copy_mc_fragile(void);
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
+#else
+static inline void enable_copy_mc_fragile(void)
+{
+}
+#endif
+
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index eb59804b6201..000000000000
--- a/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_MCSAFE_TEST
-extern unsigned long mcsafe_test_src;
-extern unsigned long mcsafe_test_dst;
-
-static inline void mcsafe_inject_src(void *addr)
-{
- if (addr)
- mcsafe_test_src = (unsigned long) addr;
- else
- mcsafe_test_src = ~0UL;
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
- if (addr)
- mcsafe_test_dst = (unsigned long) addr;
- else
- mcsafe_test_dst = ~0UL;
-}
-#else /* CONFIG_MCSAFE_TEST */
-static inline void mcsafe_inject_src(void *addr)
-{
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_MCSAFE_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_MCSAFE_TEST
-.macro MCSAFE_TEST_CTL
- .pushsection .data
- .align 8
- .globl mcsafe_test_src
- mcsafe_test_src:
- .quad 0
- EXPORT_SYMBOL_GPL(mcsafe_test_src)
- .globl mcsafe_test_dst
- mcsafe_test_dst:
- .quad 0
- EXPORT_SYMBOL_GPL(mcsafe_test_dst)
- .popsection
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
- leaq \count(\reg), %r9
- cmp mcsafe_test_src, %r9
- ja \target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
- leaq \count(\reg), %r9
- cmp mcsafe_test_dst, %r9
- ja \target
-.endm
-#else
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* CONFIG_MCSAFE_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 75314c3dbe47..6e450827f677 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);

#endif

-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
- size_t cnt);
-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
-
-/**
- * memcpy_mcsafe - copy memory with indication if a machine check happened
- *
- * @dst: destination address
- * @src: source address
- * @cnt: number of bytes to copy
- *
- * Low level memory copy function that catches machine checks
- * We only call into the "safe" function on systems that can
- * actually do machine check recovery. Everyone else can just
- * use memcpy().
- *
- * Return 0 for success, or number of bytes not copied if there was an
- * exception.
- */
-static __always_inline __must_check unsigned long
-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
-{
-#ifdef CONFIG_X86_MCE
- if (static_branch_unlikely(&mcsafe_key))
- return __memcpy_mcsafe(dst, src, cnt);
- else
-#endif
- memcpy(dst, src, cnt);
- return 0;
-}
-
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ecefaffd15d4..eff7fb847149 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);

+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned len);
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+unsigned long __must_check
+copy_mc_to_user(void *to, const void *from, unsigned len);
+#endif
+
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index bc10e3dc64fe..e7265a552f4f 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
return ret;
}

-static __always_inline __must_check unsigned long
-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
-{
- unsigned long ret;
-
- __uaccess_begin();
- /*
- * Note, __memcpy_mcsafe() is explicitly used since it can
- * handle exceptions / faults. memcpy_mcsafe() may fall back to
- * memcpy() which lacks this handling.
- */
- ret = __memcpy_mcsafe(to, from, len);
- __uaccess_end();
- return ret;
-}
-
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
-
-unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len);
-
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 84eef4fa9599..de29c4a267c0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -40,7 +40,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
-#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/sync_core.h>
#include <linux/task_work.h>
@@ -2127,7 +2126,7 @@ void mce_disable_bank(int bank)
and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
- * mce=recovery force enable memcpy_mcsafe()
+ * mce=recovery force enable copy_mc_fragile()
*/
static int __init mcheck_enable(char *str)
{
@@ -2735,13 +2734,10 @@ static void __init mcheck_debugfs_init(void)
static void __init mcheck_debugfs_init(void) { }
#endif

-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
-EXPORT_SYMBOL_GPL(mcsafe_key);
-
static int __init mcheck_late_init(void)
{
if (mca_cfg.recovery)
- static_branch_inc(&mcsafe_key);
+ enable_copy_mc_fragile();

mcheck_debugfs_init();

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1b10717c9321..6d0df6a58873 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -8,6 +8,7 @@

#include <asm/hpet.h>
#include <asm/setup.h>
+#include <asm/mce.h>

#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)

@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
amd_disable_seq_and_redirect_scrub);

-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
-#include <linux/jump_label.h>
-#include <asm/string_64.h>
-
/* Ivy Bridge, Haswell, Broadwell */
static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
{
@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
pci_read_config_dword(pdev, 0x84, &capid0);

if (capid0 & 0x10)
- static_branch_inc(&mcsafe_key);
+ enable_copy_mc_fragile();
}

/* Skylake */
@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
* enabled, so memory machine check recovery is also enabled.
*/
if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
- static_branch_inc(&mcsafe_key);
+ enable_copy_mc_fragile();

}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
#endif
-#endif

bool x86_apple_machine;
EXPORT_SYMBOL(x86_apple_machine);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 81a2fb711091..316ce1c09e84 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -195,7 +195,7 @@ static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)

DEFINE_IDTENTRY(exc_divide_error)
{
- do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE,
+ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
FPE_INTDIV, error_get_trap_addr(regs));
}

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index aa067859a70b..bad4dee4f0e4 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
+lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
new file mode 100644
index 000000000000..c13e8c9ee926
--- /dev/null
+++ b/arch/x86/lib/copy_mc.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/mce.h>
+
+#ifdef CONFIG_X86_MCE
+/*
+ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
+ * implementation.
+ */
+static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
+
+void enable_copy_mc_fragile(void)
+{
+ static_branch_inc(&copy_mc_fragile_key);
+}
+#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
+
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point, or
+ * source exception point.
+ */
+__visible notrace unsigned long
+copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
+{
+ for (; len; --len, to++, from++)
+ if (copy_mc_fragile(to, from, 1))
+ break;
+ return len;
+}
+#else
+/*
+ * No point in doing careful copying, or consulting a static key when
+ * there is no #MC handler in the CONFIG_X86_MCE=n case.
+ */
+void enable_copy_mc_fragile(void)
+{
+}
+#define copy_mc_fragile_enabled (0)
+#endif
+
+unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
+
+/**
+ * copy_mc_to_kernel - memory copy that handles source exceptions
+ *
+ * @dst: destination address
+ * @src: source address
+ * @len: number of bytes to copy
+ *
+ * Call into the 'fragile' version on systems that benefit from avoiding
+ * corner case poison consumption scenarios, For example, accessing
+ * poison across 2 cachelines with a single instruction. Almost all
+ * other uses case can use copy_mc_enhanced_fast_string() for a fast
+ * recoverable copy, or fallback to plain memcpy.
+ *
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
+ */
+unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
+{
+ if (copy_mc_fragile_enabled)
+ return copy_mc_fragile(dst, src, len);
+ if (static_cpu_has(X86_FEATURE_ERMS))
+ return copy_mc_enhanced_fast_string(dst, src, len);
+ memcpy(dst, src, len);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
+
+unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
+{
+ unsigned long ret;
+
+ if (copy_mc_fragile_enabled) {
+ __uaccess_begin();
+ ret = copy_mc_fragile(dst, src, len);
+ __uaccess_end();
+ return ret;
+ }
+
+ if (static_cpu_has(X86_FEATURE_ERMS)) {
+ __uaccess_begin();
+ ret = copy_mc_enhanced_fast_string(dst, src, len);
+ __uaccess_end();
+ return ret;
+ }
+
+ return copy_user_generic(dst, src, len);
+}
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
new file mode 100644
index 000000000000..892d8915f609
--- /dev/null
+++ b/arch/x86/lib/copy_mc_64.S
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/linkage.h>
+#include <asm/copy_mc_test.h>
+#include <asm/export.h>
+#include <asm/asm.h>
+
+#ifndef CONFIG_UML
+
+#ifdef CONFIG_X86_MCE
+COPY_MC_TEST_CTL
+
+/*
+ * copy_mc_fragile - copy memory with indication if an exception / fault happened
+ *
+ * The 'fragile' version is opted into by platform quirks and takes
+ * pains to avoid unrecoverable corner cases like 'fast-string'
+ * instruction sequences, and consuming poison across a cacheline
+ * boundary. The non-fragile version is equivalent to memcpy()
+ * regardless of CPU machine-check-recovery capability.
+ */
+SYM_FUNC_START(copy_mc_fragile)
+ cmpl $8, %edx
+ /* Less than 8 bytes? Go to byte copy loop */
+ jb .L_no_whole_words
+
+ /* Check for bad alignment of source */
+ testl $7, %esi
+ /* Already aligned */
+ jz .L_8byte_aligned
+
+ /* Copy one byte at a time until source is 8-byte aligned */
+ movl %esi, %ecx
+ andl $7, %ecx
+ subl $8, %ecx
+ negl %ecx
+ subl %ecx, %edx
+.L_read_leading_bytes:
+ movb (%rsi), %al
+ COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
+ COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
+.L_write_leading_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_leading_bytes
+
+.L_8byte_aligned:
+ movl %edx, %ecx
+ andl $7, %edx
+ shrl $3, %ecx
+ jz .L_no_whole_words
+
+.L_read_words:
+ movq (%rsi), %r8
+ COPY_MC_TEST_SRC %rsi 8 .E_read_words
+ COPY_MC_TEST_DST %rdi 8 .E_write_words
+.L_write_words:
+ movq %r8, (%rdi)
+ addq $8, %rsi
+ addq $8, %rdi
+ decl %ecx
+ jnz .L_read_words
+
+ /* Any trailing bytes? */
+.L_no_whole_words:
+ andl %edx, %edx
+ jz .L_done_memcpy_trap
+
+ /* Copy trailing bytes */
+ movl %edx, %ecx
+.L_read_trailing_bytes:
+ movb (%rsi), %al
+ COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
+ COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
+.L_write_trailing_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_trailing_bytes
+
+ /* Copy successful. Return zero */
+.L_done_memcpy_trap:
+ xorl %eax, %eax
+.L_done:
+ ret
+SYM_FUNC_END(copy_mc_fragile)
+EXPORT_SYMBOL_GPL(copy_mc_fragile)
+
+ .section .fixup, "ax"
+ /*
+ * Return number of bytes not copied for any failure. Note that
+ * there is no "tail" handling since the source buffer is 8-byte
+ * aligned and poison is cacheline aligned.
+ */
+.E_read_words:
+ shll $3, %ecx
+.E_leading_bytes:
+ addl %edx, %ecx
+.E_trailing_bytes:
+ mov %ecx, %eax
+ jmp .L_done
+
+ /*
+ * For write fault handling, given the destination is unaligned,
+ * we handle faults on multi-byte writes with a byte-by-byte
+ * copy up to the write-protected page.
+ */
+.E_write_words:
+ shll $3, %ecx
+ addl %edx, %ecx
+ movl %ecx, %edx
+ jmp copy_mc_fragile_handle_tail
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE(.L_write_words, .E_write_words)
+ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+#endif /* CONFIG_X86_MCE */
+
+/*
+ * copy_mc_enhanced_fast_string - memory copy with exception handling
+ *
+ * Fast string copy + fault / exception handling. If the CPU does
+ * support machine check exception recovery, but does not support
+ * recovering from fast-string exceptions then this CPU needs to be
+ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
+ * machine check recovery support this version should be no slower than
+ * standard memcpy.
+ */
+SYM_FUNC_START(copy_mc_enhanced_fast_string)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+.L_copy:
+ rep movsb
+ /* Copy successful. Return zero */
+ xorl %eax, %eax
+ ret
+SYM_FUNC_END(copy_mc_enhanced_fast_string)
+
+ .section .fixup, "ax"
+.E_copy:
+ /*
+ * On fault %rcx is updated such that the copy instruction could
+ * optionally be restarted at the fault position, i.e. it
+ * contains 'bytes remaining'. A non-zero return indicates error
+ * to copy_mc_generic() users, or indicate short transfers to
+ * user-copy routines.
+ */
+ movq %rcx, %rax
+ ret
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_copy, .E_copy)
+#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bbcc05bcefad..037faac46b0c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>

@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
SYM_FUNC_END(memcpy_orig)

.popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
- cmpl $8, %edx
- /* Less than 8 bytes? Go to byte copy loop */
- jb .L_no_whole_words
-
- /* Check for bad alignment of source */
- testl $7, %esi
- /* Already aligned */
- jz .L_8byte_aligned
-
- /* Copy one byte at a time until source is 8-byte aligned */
- movl %esi, %ecx
- andl $7, %ecx
- subl $8, %ecx
- negl %ecx
- subl %ecx, %edx
-.L_read_leading_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
- movl %edx, %ecx
- andl $7, %edx
- shrl $3, %ecx
- jz .L_no_whole_words
-
-.L_read_words:
- movq (%rsi), %r8
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
- MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
- movq %r8, (%rdi)
- addq $8, %rsi
- addq $8, %rdi
- decl %ecx
- jnz .L_read_words
-
- /* Any trailing bytes? */
-.L_no_whole_words:
- andl %edx, %edx
- jz .L_done_memcpy_trap
-
- /* Copy trailing bytes */
- movl %edx, %ecx
-.L_read_trailing_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_trailing_bytes
-
- /* Copy successful. Return zero */
-.L_done_memcpy_trap:
- xorl %eax, %eax
-.L_done:
- ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
- .section .fixup, "ax"
- /*
- * Return number of bytes not copied for any failure. Note that
- * there is no "tail" handling since the source buffer is 8-byte
- * aligned and poison is cacheline aligned.
- */
-.E_read_words:
- shll $3, %ecx
-.E_leading_bytes:
- addl %edx, %ecx
-.E_trailing_bytes:
- mov %ecx, %eax
- jmp .L_done
-
- /*
- * For write fault handling, given the destination is unaligned,
- * we handle faults on multi-byte writes with a byte-by-byte
- * copy up to the write-protected page.
- */
-.E_write_words:
- shll $3, %ecx
- addl %edx, %ecx
- movl %ecx, %edx
- jmp mcsafe_handle_tail
-
- .previous
-
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE(.L_write_words, .E_write_words)
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 1847e993ac63..508c81e97ab1 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
}
EXPORT_SYMBOL(clear_user);

-/*
- * Similar to copy_user_handle_tail, probe for the write fault point,
- * but reuse __memcpy_mcsafe in case a new read error is encountered.
- * clac() is handled in _copy_to_iter_mcsafe().
- */
-__visible notrace unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
-
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 00c62115f39c..0aaf31917061 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -33,6 +33,7 @@
#include <asm/hw_irq.h>
#include <asm/io_apic.h>
#include <asm/intel-mid.h>
+#include <asm/acpi.h>

#define PCIE_CAP_OFFSET 0x100

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 22e741e0b10c..351ac1a9a119 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1376,6 +1376,15 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_init.mpparse.get_smp_config = x86_init_uint_noop;

xen_boot_params_init_edd();
+
+#ifdef CONFIG_ACPI
+ /*
+ * Disable selecting "Firmware First mode" for correctable
+ * memory errors, as this is the duty of the hypervisor to
+ * decide.
+ */
+ acpi_disable_cmcff = 1;
+#endif
}

if (!boot_params.screen_info.orig_video_isVGA)
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index d991dd46e89c..98b8baa47dc5 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -240,6 +240,8 @@ enum {
as default lpm_policy */
AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during
suspend/resume */
+ AHCI_HFLAG_IGN_NOTSUPP_POWER_ON = (1 << 27), /* ignore -EOPNOTSUPP
+ from phy_power_on() */

/* ap->flags bits */

diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index d4bba3ace45d..3ad46d26d9d5 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -227,7 +227,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = {

static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = {
.plat_config = ahci_mvebu_armada_3700_config,
- .flags = AHCI_HFLAG_SUSPEND_PHYS,
+ .flags = AHCI_HFLAG_SUSPEND_PHYS | AHCI_HFLAG_IGN_NOTSUPP_POWER_ON,
};

static const struct of_device_id ahci_mvebu_of_match[] = {
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 86261deeb4c5..de638dafce21 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -59,7 +59,7 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
}

rc = phy_power_on(hpriv->phys[i]);
- if (rc) {
+ if (rc && !(rc == -EOPNOTSUPP && (hpriv->flags & AHCI_HFLAG_IGN_NOTSUPP_POWER_ON))) {
phy_exit(hpriv->phys[i]);
goto disable_phys;
}
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 141ac600b64c..44b0ed8f6bb8 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -120,7 +120,7 @@
/* Descriptor table word 0 bit (when DTA32M = 1) */
#define SATA_RCAR_DTEND BIT(0)

-#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL
+#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFFUL

/* Gen2 Physical Layer Control Registers */
#define RCAR_GEN2_PHY_CTL1_REG 0x1704
diff --git a/drivers/base/firmware_loader/fallback_platform.c b/drivers/base/firmware_loader/fallback_platform.c
index 685edb7dd05a..6958ab1a8059 100644
--- a/drivers/base/firmware_loader/fallback_platform.c
+++ b/drivers/base/firmware_loader/fallback_platform.c
@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 opt_flags)
if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM))
return -ENOENT;

- rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED);
+ rc = security_kernel_load_data(LOADING_FIRMWARE);
if (rc)
return rc;

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 47aa90f9a7c2..dade36725b8f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1450,14 +1450,13 @@ static int cpufreq_online(unsigned int cpu)
*/
if ((cpufreq_driver->flags & CPUFREQ_NEED_INITIAL_FREQ_CHECK)
&& has_target()) {
+ unsigned int old_freq = policy->cur;
+
/* Are we running at unknown frequency ? */
- ret = cpufreq_frequency_table_get_index(policy, policy->cur);
+ ret = cpufreq_frequency_table_get_index(policy, old_freq);
if (ret == -EINVAL) {
- /* Warn user and fix it */
- pr_warn("%s: CPU%d: Running at unlisted freq: %u KHz\n",
- __func__, policy->cpu, policy->cur);
- ret = __cpufreq_driver_target(policy, policy->cur - 1,
- CPUFREQ_RELATION_L);
+ ret = __cpufreq_driver_target(policy, old_freq - 1,
+ CPUFREQ_RELATION_L);

/*
* Reaching here after boot in a few seconds may not
@@ -1465,8 +1464,8 @@ static int cpufreq_online(unsigned int cpu)
* frequency for longer duration. Hence, a BUG_ON().
*/
BUG_ON(ret);
- pr_warn("%s: CPU%d: Unlisted initial frequency changed to: %u KHz\n",
- __func__, policy->cpu, policy->cur);
+ pr_info("%s: CPU%d: Running at unlisted initial frequency: %u KHz, changing to: %u KHz\n",
+ __func__, policy->cpu, old_freq, policy->cur);
}
}

diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index ec4f79049a06..d581c4e623f8 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -772,14 +772,13 @@ static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
if (rpl->status != CPL_ERR_NONE) {
pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
rpl->status, stid);
- return CPL_RET_BUF_DONE;
+ } else {
+ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+ sock_put(listen_ctx->lsk);
+ kfree(listen_ctx);
+ module_put(THIS_MODULE);
}
- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
- sock_put(listen_ctx->lsk);
- kfree(listen_ctx);
- module_put(THIS_MODULE);
-
- return 0;
+ return CPL_RET_BUF_DONE;
}

static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
@@ -796,15 +795,13 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
if (rpl->status != CPL_ERR_NONE) {
pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
rpl->status, stid);
- return CPL_RET_BUF_DONE;
+ } else {
+ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+ sock_put(listen_ctx->lsk);
+ kfree(listen_ctx);
+ module_put(THIS_MODULE);
}
-
- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
- sock_put(listen_ctx->lsk);
- kfree(listen_ctx);
- module_put(THIS_MODULE);
-
- return 0;
+ return CPL_RET_BUF_DONE;
}

static void chtls_purge_wr_queue(struct sock *sk)
@@ -1514,7 +1511,6 @@ static void add_to_reap_list(struct sock *sk)
struct chtls_sock *csk = sk->sk_user_data;

local_bh_disable();
- bh_lock_sock(sk);
release_tcp_port(sk); /* release the port immediately */

spin_lock(&reap_list_lock);
@@ -1523,7 +1519,6 @@ static void add_to_reap_list(struct sock *sk)
if (!csk->passive_reap_next)
schedule_work(&reap_task);
spin_unlock(&reap_list_lock);
- bh_unlock_sock(sk);
local_bh_enable();
}

diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index 9fb5ca6682ea..188d871f6b8c 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1585,6 +1585,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
tp->urg_data = 0;

if ((avail + offset) >= skb->len) {
+ struct sk_buff *next_skb;
if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
tp->copied_seq += skb->len;
hws->rcvpld = skb->hdr_len;
@@ -1595,8 +1596,10 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
chtls_free_skb(sk, skb);
buffers_freed++;
hws->copied_seq = 0;
- if (copied >= target &&
- !skb_peek(&sk->sk_receive_queue))
+ next_skb = skb_peek(&sk->sk_receive_queue);
+ if (copied >= target && !next_skb)
+ break;
+ if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
break;
}
} while (len > 0);
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index e5bfac79e5ac..04f5d79d4265 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -62,10 +62,12 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
status = efi_get_random_bytes(sizeof(phys_seed),
(u8 *)&phys_seed);
if (status == EFI_NOT_FOUND) {
- efi_info("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
+ efi_info("EFI_RNG_PROTOCOL unavailable, KASLR will be disabled\n");
+ efi_nokaslr = true;
} else if (status != EFI_SUCCESS) {
- efi_err("efi_get_random_bytes() failed\n");
- return status;
+ efi_err("efi_get_random_bytes() failed (0x%lx), KASLR will be disabled\n",
+ status);
+ efi_nokaslr = true;
}
} else {
efi_info("KASLR disabled on kernel command line\n");
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 11ecf3c4640e..368cd60000ee 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -136,7 +136,7 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
if (status)
goto fdt_set_fail;

- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
efi_status_t efi_status;

efi_status = efi_get_random_bytes(sizeof(fdt_val64),
@@ -145,8 +145,6 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64);
if (status)
goto fdt_set_fail;
- } else if (efi_status != EFI_NOT_FOUND) {
- return efi_status;
}
}

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 784219962193..ea469168cd44 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -326,6 +326,7 @@ static void print_context_stats(struct seq_file *m,
}
i915_gem_context_unlock_engines(ctx);

+ mutex_lock(&ctx->mutex);
if (!IS_ERR_OR_NULL(ctx->file_priv)) {
struct file_stats stats = {
.vm = rcu_access_pointer(ctx->vm),
@@ -346,6 +347,7 @@ static void print_context_stats(struct seq_file *m,

print_file_stats(m, name, stats);
}
+ mutex_unlock(&ctx->mutex);

spin_lock(&i915->gem.contexts.lock);
list_safe_reset_next(ctx, cn, link);
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..0abce004a959 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work)
req->callback = NULL;

spin_lock_bh(&lock);
+ /*
+ * Although the work will normally have been canceled by the workqueue,
+ * it can still be requeued as long as it is on the req_list.
+ */
+ cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) {
- /*
- * Although the work will normally have been canceled by the
- * workqueue, it can still be requeued as long as it is on the
- * req_list.
- */
- cancel_delayed_work(&req->work);
list_del_init(&req->list);
kfree(req);
}
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 6271d1e741cf..9ae4ce7df95c 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -49,7 +49,7 @@ do { \
#define pmem_assign(dest, src) ((dest) = (src))
#endif

-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
+#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
#endif

@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_target *ti)
}
wc->freelist_size = 0;

- r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
+ r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
+ sizeof(uint64_t));
if (r) {
writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
sb_seq_count = cpu_to_le64(0);
@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_target *ti)
e->seq_count = -1;
continue;
}
- r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+ r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
+ sizeof(struct wc_memory_entry));
if (r) {
writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
(unsigned long)b, r);
@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data

if (rw == READ) {
int r;
- r = memcpy_mcsafe(buf, data, size);
+ r = copy_mc_to_kernel(buf, data, size);
flush_dcache_page(bio_page(bio));
if (unlikely(r)) {
writecache_error(wc, r, "hardware memory error when reading data: %d", r);
@@ -2349,7 +2351,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
}

- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+ r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;
@@ -2360,7 +2362,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "Unable to initialize device";
goto bad;
}
- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+ r = copy_mc_to_kernel(&s, sb(wc),
+ sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index f2b2805942f5..c089eae71ccc 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -1155,10 +1155,6 @@ void rtsx_pci_init_ocp(struct rtsx_pcr *pcr)
rtsx_pci_write_register(pcr, REG_OCPGLITCH,
SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch);
rtsx_pci_enable_ocp(pcr);
- } else {
- /* OC power down */
- rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
- OC_POWER_DOWN);
}
}
}
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 25a9dd9c0c1b..2ba899f5659f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -393,8 +393,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
*capp_unit_id = get_capp_unit_id(np, *phb_index);
of_node_put(np);
if (!*capp_unit_id) {
- pr_err("cxl: invalid capp unit id (phb_index: %d)\n",
- *phb_index);
+ pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n",
+ *chipid, *phb_index);
return -ENODEV;
}

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7b7e8b7883c8..7b5d52192487 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1158,16 +1158,6 @@ static void bnxt_queue_sp_work(struct bnxt *bp)
schedule_work(&bp->sp_task);
}

-static void bnxt_cancel_sp_work(struct bnxt *bp)
-{
- if (BNXT_PF(bp)) {
- flush_workqueue(bnxt_pf_wq);
- } else {
- cancel_work_sync(&bp->sp_task);
- cancel_delayed_work_sync(&bp->fw_reset_task);
- }
-}
-
static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
{
if (!rxr->bnapi->in_reset) {
@@ -4306,7 +4296,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
u16 dst = BNXT_HWRM_CHNL_CHIMP;

- if (BNXT_NO_FW_ACCESS(bp))
+ if (BNXT_NO_FW_ACCESS(bp) &&
+ le16_to_cpu(req->req_type) != HWRM_FUNC_RESET)
return -EBUSY;

if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
@@ -9566,7 +9557,10 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
{
int rc = 0;

- rc = __bnxt_open_nic(bp, irq_re_init, link_re_init);
+ if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state))
+ rc = -EIO;
+ if (!rc)
+ rc = __bnxt_open_nic(bp, irq_re_init, link_re_init);
if (rc) {
netdev_err(bp->dev, "nic open fail (rc: %x)\n", rc);
dev_close(bp->dev);
@@ -11787,15 +11781,17 @@ static void bnxt_remove_one(struct pci_dev *pdev)
if (BNXT_PF(bp))
bnxt_sriov_disable(bp);

- clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
- bnxt_cancel_sp_work(bp);
- bp->sp_event = 0;
-
- bnxt_dl_fw_reporters_destroy(bp, true);
if (BNXT_PF(bp))
devlink_port_type_clear(&bp->dl_port);
pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
+ clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ /* Flush any pending tasks */
+ cancel_work_sync(&bp->sp_task);
+ cancel_delayed_work_sync(&bp->fw_reset_task);
+ bp->sp_event = 0;
+
+ bnxt_dl_fw_reporters_destroy(bp, true);
bnxt_dl_unregister(bp);
bnxt_shutdown_tc(bp);

@@ -12535,6 +12531,9 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_DISCONNECT;
}

+ if (state == pci_channel_io_frozen)
+ set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
+
if (netif_running(netdev))
bnxt_close(netdev);

@@ -12561,7 +12560,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct bnxt *bp = netdev_priv(netdev);
- int err = 0;
+ int err = 0, off;
pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;

netdev_info(bp->dev, "PCI Slot Reset\n");
@@ -12573,6 +12572,20 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
"Cannot re-enable PCI device after reset.\n");
} else {
pci_set_master(pdev);
+ /* Upon fatal error, our device internal logic that latches to
+ * BAR value is getting reset and will restore only upon
+ * rewritting the BARs.
+ *
+ * As pci_restore_state() does not re-write the BARs if the
+ * value is same as saved value earlier, driver needs to
+ * write the BARs to 0 to force restore, in case of fatal error.
+ */
+ if (test_and_clear_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN,
+ &bp->state)) {
+ for (off = PCI_BASE_ADDRESS_0;
+ off <= PCI_BASE_ADDRESS_5; off += 4)
+ pci_write_config_dword(bp->pdev, off, 0);
+ }
pci_restore_state(pdev);
pci_save_state(pdev);

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 0ef89dabfd61..2a02ca7b0f20 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1736,6 +1736,7 @@ struct bnxt {
#define BNXT_STATE_ABORT_ERR 5
#define BNXT_STATE_FW_FATAL_COND 6
#define BNXT_STATE_DRV_REGISTERED 7
+#define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8

#define BNXT_NO_FW_ACCESS(bp) \
(test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 481498585ead..8eb976106d0c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f)
int err;

/* do a set-tcb for smac-sel and CWR bit.. */
- err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
- if (err)
- goto smac_err;
-
err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W,
TCB_SMAC_SEL_V(TCB_SMAC_SEL_M),
TCB_SMAC_SEL_V(f->smt->idx), 1);
+ if (err)
+ goto smac_err;
+
+ err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
if (!err)
return 0;

@@ -865,6 +865,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
+ FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
f->fs.newvlan == VLAN_REWRITE) |
FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
@@ -882,7 +883,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
- fwr->smac_sel = 0;
+ fwr->smac_sel = f->smt->idx;
fwr->rx_chan_rx_rpl_iq =
htons(FW_FILTER_WR_RX_CHAN_V(0) |
FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
@@ -1326,11 +1327,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
TX_QUEUE_V(f->fs.nat_mode) |
T5_OPT_2_VALID_F |
RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
- CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
- (f->fs.dirsteer << 1)) |
PACE_V((f->fs.maskhash) |
- ((f->fs.dirsteerhash) << 1)) |
- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
+ ((f->fs.dirsteerhash) << 1)));
}

static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
@@ -1366,11 +1364,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
TX_QUEUE_V(f->fs.nat_mode) |
T5_OPT_2_VALID_F |
RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
- CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
- (f->fs.dirsteer << 1)) |
PACE_V((f->fs.maskhash) |
- ((f->fs.dirsteerhash) << 1)) |
- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
+ ((f->fs.dirsteerhash) << 1)));
}

static int cxgb4_set_hash_filter(struct net_device *dev,
@@ -2042,6 +2037,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl)
}
return;
}
+ switch (f->fs.action) {
+ case FILTER_PASS:
+ if (f->fs.dirsteer)
+ set_tcb_tflag(adap, f, tid,
+ TF_DIRECT_STEER_S, 1, 1);
+ break;
+ case FILTER_DROP:
+ set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1);
+ break;
+ case FILTER_SWITCH:
+ set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1);
+ break;
+ }
+
break;

default:
@@ -2109,22 +2118,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
if (ctx)
ctx->result = 0;
} else if (ret == FW_FILTER_WR_FLT_ADDED) {
- int err = 0;
-
- if (f->fs.newsmac)
- err = configure_filter_smac(adap, f);
-
- if (!err) {
- f->pending = 0; /* async setup completed */
- f->valid = 1;
- if (ctx) {
- ctx->result = 0;
- ctx->tid = idx;
- }
- } else {
- clear_filter(adap, f);
- if (ctx)
- ctx->result = err;
+ f->pending = 0; /* async setup completed */
+ f->valid = 1;
+ if (ctx) {
+ ctx->result = 0;
+ ctx->tid = idx;
}
} else {
/* Something went wrong. Issue a warning about the
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
index 50232e063f49..92473dda55d9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
@@ -50,6 +50,10 @@
#define TCB_T_FLAGS_M 0xffffffffffffffffULL
#define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S)

+#define TF_DROP_S 22
+#define TF_DIRECT_STEER_S 23
+#define TF_LPBK_S 59
+
#define TF_CCTRL_ECE_S 60
#define TF_CCTRL_CWR_S 61
#define TF_CCTRL_RFR_S 62
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index e972138a14ad..c20f6803e9d5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -3146,8 +3146,8 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
hclgevf_uninit_msi(hdev);
}

- hclgevf_pci_uninit(hdev);
hclgevf_cmd_uninit(hdev);
+ hclgevf_pci_uninit(hdev);
hclgevf_uninit_mac_list(hdev);
}

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 7ef3369953b6..c3ec9ceed833 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1031,12 +1031,6 @@ static int ibmveth_is_packet_unsupported(struct sk_buff *skb,
ret = -EOPNOTSUPP;
}

- if (!ether_addr_equal(ether_header->h_source, netdev->dev_addr)) {
- netdev_dbg(netdev, "source packet MAC address does not match veth device's, dropping packet.\n");
- netdev->stats.tx_dropped++;
- ret = -EOPNOTSUPP;
- }
-
return ret;
}

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 3e0aab04d86f..f96bb3dab5a8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1828,9 +1828,13 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
int rc;

rc = 0;
- ether_addr_copy(adapter->mac_addr, addr->sa_data);
- if (adapter->state != VNIC_PROBED)
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ if (adapter->state != VNIC_PROBED) {
+ ether_addr_copy(adapter->mac_addr, addr->sa_data);
rc = __ibmvnic_set_mac(netdev, addr->sa_data);
+ }

return rc;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index ec45a03140d7..f6aa80fe343f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1485,6 +1485,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
if (!reload)
devlink_resources_unregister(devlink, NULL);
mlxsw_core->bus->fini(mlxsw_core->bus_priv);
+ if (!reload)
+ devlink_free(devlink);

return;

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f3c0e241e1b4..dc8e1423ba9c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1546,11 +1546,14 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port)
u32 eth_proto_cap, eth_proto_admin, eth_proto_oper;
const struct mlxsw_sp_port_type_speed_ops *ops;
char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 eth_proto_cap_masked;
int err;

ops = mlxsw_sp->port_type_speed_ops;

- /* Set advertised speeds to supported speeds. */
+ /* Set advertised speeds to speeds supported by both the driver
+ * and the device.
+ */
ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
0, false);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
@@ -1559,8 +1562,10 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port)

ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, &eth_proto_cap,
&eth_proto_admin, &eth_proto_oper);
+ eth_proto_cap_masked = ops->ptys_proto_cap_masked_get(eth_proto_cap);
ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
- eth_proto_cap, mlxsw_sp_port->link.autoneg);
+ eth_proto_cap_masked,
+ mlxsw_sp_port->link.autoneg);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 5240bf11b6c4..23d8e60a7187 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -340,6 +340,7 @@ struct mlxsw_sp_port_type_speed_ops {
u32 *p_eth_proto_cap,
u32 *p_eth_proto_admin,
u32 *p_eth_proto_oper);
+ u32 (*ptys_proto_cap_masked_get)(u32 eth_proto_cap);
};

static inline struct net_device *
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index 14c78f73bb65..c4006fd74d9e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -1208,6 +1208,20 @@ mlxsw_sp1_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload,
p_eth_proto_oper);
}

+static u32 mlxsw_sp1_ptys_proto_cap_masked_get(u32 eth_proto_cap)
+{
+ u32 ptys_proto_cap_masked = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+ if (mlxsw_sp1_port_link_mode[i].mask & eth_proto_cap)
+ ptys_proto_cap_masked |=
+ mlxsw_sp1_port_link_mode[i].mask;
+ }
+
+ return ptys_proto_cap_masked;
+}
+
const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = {
.from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port,
.from_ptys_link = mlxsw_sp1_from_ptys_link,
@@ -1217,6 +1231,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = {
.to_ptys_speed = mlxsw_sp1_to_ptys_speed,
.reg_ptys_eth_pack = mlxsw_sp1_reg_ptys_eth_pack,
.reg_ptys_eth_unpack = mlxsw_sp1_reg_ptys_eth_unpack,
+ .ptys_proto_cap_masked_get = mlxsw_sp1_ptys_proto_cap_masked_get,
};

static const enum ethtool_link_mode_bit_indices
@@ -1632,6 +1647,20 @@ mlxsw_sp2_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload,
p_eth_proto_admin, p_eth_proto_oper);
}

+static u32 mlxsw_sp2_ptys_proto_cap_masked_get(u32 eth_proto_cap)
+{
+ u32 ptys_proto_cap_masked = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+ if (mlxsw_sp2_port_link_mode[i].mask & eth_proto_cap)
+ ptys_proto_cap_masked |=
+ mlxsw_sp2_port_link_mode[i].mask;
+ }
+
+ return ptys_proto_cap_masked;
+}
+
const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = {
.from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port,
.from_ptys_link = mlxsw_sp2_from_ptys_link,
@@ -1641,4 +1670,5 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = {
.to_ptys_speed = mlxsw_sp2_to_ptys_speed,
.reg_ptys_eth_pack = mlxsw_sp2_reg_ptys_eth_pack,
.reg_ptys_eth_unpack = mlxsw_sp2_reg_ptys_eth_unpack,
+ .ptys_proto_cap_masked_get = mlxsw_sp2_ptys_proto_cap_masked_get,
};
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 88b4b17ea22c..434bc0a7aa95 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4563,7 +4563,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
}

rtl_irq_disable(tp);
- napi_schedule_irqoff(&tp->napi);
+ napi_schedule(&tp->napi);
out:
rtl_ack_events(tp, status);

@@ -4738,7 +4738,7 @@ static int rtl_open(struct net_device *dev)
rtl_request_firmware(tp);

retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt,
- IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp);
+ IRQF_SHARED, dev->name, tp);
if (retval < 0)
goto err_release_fw_2;

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 99f7aae102ce..6c58ba186b2c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1747,12 +1747,16 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req)
config.flags = 0;
config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON :
HWTSTAMP_TX_OFF;
- if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT)
+ switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) {
+ case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT:
config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
- else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL)
+ break;
+ case RAVB_RXTSTAMP_TYPE_ALL:
config.rx_filter = HWTSTAMP_FILTER_ALL;
- else
+ break;
+ default:
config.rx_filter = HWTSTAMP_FILTER_NONE;
+ }

return copy_to_user(req->ifr_data, &config, sizeof(config)) ?
-EFAULT : 0;
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 8e47d0112e5d..10f910f8cbe5 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -663,10 +663,6 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,

gtp = netdev_priv(dev);

- err = gtp_encap_enable(gtp, data);
- if (err < 0)
- return err;
-
if (!data[IFLA_GTP_PDP_HASHSIZE]) {
hashsize = 1024;
} else {
@@ -677,12 +673,16 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,

err = gtp_hashtable_new(gtp, hashsize);
if (err < 0)
- goto out_encap;
+ return err;
+
+ err = gtp_encap_enable(gtp, data);
+ if (err < 0)
+ goto out_hashtable;

err = register_netdevice(dev);
if (err < 0) {
netdev_dbg(dev, "failed to register new netdev %d\n", err);
- goto out_hashtable;
+ goto out_encap;
}

gn = net_generic(dev_net(dev), gtp_net_id);
@@ -693,11 +693,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,

return 0;

+out_encap:
+ gtp_encap_disable(gtp);
out_hashtable:
kfree(gtp->addr_hash);
kfree(gtp->tid_hash);
-out_encap:
- gtp_encap_disable(gtp);
return err;
}

diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index bdbfeed359db..41e9af35a582 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -398,15 +398,24 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size,

/* assert(which < trans->tre_count); */

- /* Set the page information for the buffer. We also need to fill in
- * the DMA address and length for the buffer (something dma_map_sg()
- * normally does).
+ /* Commands are quite different from data transfer requests.
+ * Their payloads come from a pool whose memory is allocated
+ * using dma_alloc_coherent(). We therefore do *not* map them
+ * for DMA (unlike what we do for pages and skbs).
+ *
+ * When a transaction completes, the SGL is normally unmapped.
+ * A command transaction has direction DMA_NONE, which tells
+ * gsi_trans_complete() to skip the unmapping step.
+ *
+ * The only things we use directly in a command scatter/gather
+ * entry are the DMA address and length. We still need the SG
+ * table flags to be maintained though, so assign a NULL page
+ * pointer for that purpose.
*/
sg = &trans->sgl[which];
-
- sg_set_buf(sg, buf, size);
+ sg_assign_page(sg, NULL);
sg_dma_address(sg) = addr;
- sg_dma_len(sg) = sg->length;
+ sg_dma_len(sg) = size;

info = &trans->info[which];
info->opcode = opcode;
diff --git a/drivers/net/wireless/intersil/p54/p54pci.c b/drivers/net/wireless/intersil/p54/p54pci.c
index 9d96c8b8409d..ec53bb769a64 100644
--- a/drivers/net/wireless/intersil/p54/p54pci.c
+++ b/drivers/net/wireless/intersil/p54/p54pci.c
@@ -333,10 +333,12 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
struct p54p_desc *desc;
dma_addr_t mapping;
u32 idx, i;
+ __le32 device_addr;

spin_lock_irqsave(&priv->lock, flags);
idx = le32_to_cpu(ring_control->host_idx[1]);
i = idx % ARRAY_SIZE(ring_control->tx_data);
+ device_addr = ((struct p54_hdr *)skb->data)->req_id;

mapping = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
DMA_TO_DEVICE);
@@ -350,7 +352,7 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb)

desc = &ring_control->tx_data[i];
desc->host_addr = cpu_to_le32(mapping);
- desc->device_addr = ((struct p54_hdr *)skb->data)->req_id;
+ desc->device_addr = device_addr;
desc->len = cpu_to_le16(skb->len);
desc->flags = 0;

diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 45964acba944..22d865ba6353 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
if (rw == READ) {
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO;
- if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
+ if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
return -EIO;
return 0;
}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index fab29b514372..5c6939e004e2 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE - off);
- rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
+ rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
return BLK_STS_IOERR;
@@ -304,7 +304,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,

/*
* Use the 'no check' versions of copy_from_iter_flushcache() and
- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
+ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
* checking, both file offset and device offset, is handled by
* dax_iomap_actor()
*/
@@ -317,7 +317,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
- return _copy_to_iter_mcsafe(addr, bytes, i);
+ return _copy_mc_to_iter(addr, bytes, i);
}

static const struct dax_operations pmem_dax_ops = {
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 2e2e2a2ff51d..a3594ab7309b 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -1076,7 +1076,9 @@ static int advk_pcie_enable_phy(struct advk_pcie *pcie)
}

ret = phy_power_on(pcie->phy);
- if (ret) {
+ if (ret == -EOPNOTSUPP) {
+ dev_warn(&pcie->pdev->dev, "PHY unsupported by firmware\n");
+ } else if (ret) {
phy_exit(pcie->phy);
return ret;
}
diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
index 1a138be8bd6a..810f25a47632 100644
--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
@@ -26,7 +26,6 @@
#define COMPHY_SIP_POWER_ON 0x82000001
#define COMPHY_SIP_POWER_OFF 0x82000002
#define COMPHY_SIP_PLL_LOCK 0x82000003
-#define COMPHY_FW_NOT_SUPPORTED (-1)

#define COMPHY_FW_MODE_SATA 0x1
#define COMPHY_FW_MODE_SGMII 0x2
@@ -112,10 +111,19 @@ static int mvebu_a3700_comphy_smc(unsigned long function, unsigned long lane,
unsigned long mode)
{
struct arm_smccc_res res;
+ s32 ret;

arm_smccc_smc(function, lane, mode, 0, 0, 0, 0, 0, &res);
+ ret = res.a0;

- return res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return 0;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
}

static int mvebu_a3700_comphy_get_fw_mode(int lane, int port,
@@ -220,7 +228,7 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy)
}

ret = mvebu_a3700_comphy_smc(COMPHY_SIP_POWER_ON, lane->id, fw_param);
- if (ret == COMPHY_FW_NOT_SUPPORTED)
+ if (ret == -EOPNOTSUPP)
dev_err(lane->dev,
"unsupported SMC call, try updating your firmware\n");

diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
index e41367f36ee1..53ad127b100f 100644
--- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
@@ -123,7 +123,6 @@

#define COMPHY_SIP_POWER_ON 0x82000001
#define COMPHY_SIP_POWER_OFF 0x82000002
-#define COMPHY_FW_NOT_SUPPORTED (-1)

/*
* A lane is described by the following bitfields:
@@ -273,10 +272,19 @@ static int mvebu_comphy_smc(unsigned long function, unsigned long phys,
unsigned long lane, unsigned long mode)
{
struct arm_smccc_res res;
+ s32 ret;

arm_smccc_smc(function, phys, lane, mode, 0, 0, 0, 0, &res);
+ ret = res.a0;

- return res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return 0;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
}

static int mvebu_comphy_get_mode(bool fw_mode, int lane, int port,
@@ -819,7 +827,7 @@ static int mvebu_comphy_power_on(struct phy *phy)
if (!ret)
return ret;

- if (ret == COMPHY_FW_NOT_SUPPORTED)
+ if (ret == -EOPNOTSUPP)
dev_err(priv->dev,
"unsupported SMC call, try updating your firmware\n");

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 67498594d7d7..87dc3fc15694 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -308,8 +308,9 @@ static void pl011_write(unsigned int val, const struct uart_amba_port *uap,
*/
static int pl011_fifo_to_tty(struct uart_amba_port *uap)
{
- u16 status;
unsigned int ch, flag, fifotaken;
+ int sysrq;
+ u16 status;

for (fifotaken = 0; fifotaken != 256; fifotaken++) {
status = pl011_read(uap, REG_FR);
@@ -344,10 +345,12 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap)
flag = TTY_FRAME;
}

- if (uart_handle_sysrq_char(&uap->port, ch & 255))
- continue;
+ spin_unlock(&uap->port.lock);
+ sysrq = uart_handle_sysrq_char(&uap->port, ch & 255);
+ spin_lock(&uap->port.lock);

- uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
+ if (!sysrq)
+ uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
}

return fifotaken;
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 184b458820a3..6ff1e725f404 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1000,7 +1000,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
sampling_rate = UART_OVERSAMPLING;
/* Sampling rate is halved for IP versions >= 2.5 */
ver = geni_se_get_qup_hw_version(&port->se);
- if (GENI_SE_VERSION_MAJOR(ver) >= 2 && GENI_SE_VERSION_MINOR(ver) >= 5)
+ if (ver >= QUP_SE_VERSION_2_5)
sampling_rate /= 2;

clk_rate = get_clk_div_rate(baud, sampling_rate, &clk_div);
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 64a9025a87be..1f32db7b72b2 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -720,17 +720,18 @@ struct gntdev_copy_batch {
s16 __user *status[GNTDEV_COPY_BATCH];
unsigned int nr_ops;
unsigned int nr_pages;
+ bool writeable;
};

static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
- bool writeable, unsigned long *gfn)
+ unsigned long *gfn)
{
unsigned long addr = (unsigned long)virt;
struct page *page;
unsigned long xen_pfn;
int ret;

- ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page);
+ ret = get_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page);
if (ret < 0)
return ret;

@@ -746,9 +747,13 @@ static void gntdev_put_pages(struct gntdev_copy_batch *batch)
{
unsigned int i;

- for (i = 0; i < batch->nr_pages; i++)
+ for (i = 0; i < batch->nr_pages; i++) {
+ if (batch->writeable && !PageDirty(batch->pages[i]))
+ set_page_dirty_lock(batch->pages[i]);
put_page(batch->pages[i]);
+ }
batch->nr_pages = 0;
+ batch->writeable = false;
}

static int gntdev_copy(struct gntdev_copy_batch *batch)
@@ -837,8 +842,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
virt = seg->source.virt + copied;
off = (unsigned long)virt & ~XEN_PAGE_MASK;
len = min(len, (size_t)XEN_PAGE_SIZE - off);
+ batch->writeable = false;

- ret = gntdev_get_page(batch, virt, false, &gfn);
+ ret = gntdev_get_page(batch, virt, &gfn);
if (ret < 0)
return ret;

@@ -856,8 +862,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
virt = seg->dest.virt + copied;
off = (unsigned long)virt & ~XEN_PAGE_MASK;
len = min(len, (size_t)XEN_PAGE_SIZE - off);
+ batch->writeable = true;

- ret = gntdev_get_page(batch, virt, true, &gfn);
+ ret = gntdev_get_page(batch, virt, &gfn);
if (ret < 0)
return ret;

diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 28bb5689333a..15880a68faad 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -141,6 +141,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,

name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';

+ /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */
+ strreplace(name, '/', '!');
+
inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
is_removable);
if (!inode)
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index c8c381eadcd6..5bde77d70852 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -473,8 +473,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case EROFS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
break;
case EROFS_XATTR_INDEX_SECURITY:
break;
diff --git a/fs/exec.c b/fs/exec.c
index a91003e28eaa..07910f5032e7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,6 +62,7 @@
#include <linux/oom.h>
#include <linux/compat.h>
#include <linux/vmalloc.h>
+#include <linux/io_uring.h>

#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -1895,6 +1896,11 @@ static int bprm_execve(struct linux_binprm *bprm,
struct files_struct *displaced;
int retval;

+ /*
+ * Cancel any io_uring activity across execve
+ */
+ io_uring_task_cancel();
+
retval = unshare_files(&displaced);
if (retval)
return retval;
diff --git a/fs/file.c b/fs/file.c
index 21c0893f2f1d..4559b5fec3bd 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -21,6 +21,7 @@
#include <linux/rcupdate.h>
#include <linux/close_range.h>
#include <net/sock.h>
+#include <linux/io_uring.h>

unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -452,6 +453,7 @@ void exit_files(struct task_struct *tsk)
struct files_struct * files = tsk->files;

if (files) {
+ io_uring_files_cancel(files);
task_lock(tsk);
tsk->files = NULL;
task_unlock(tsk);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 02b3c36b3676..5078a6ca7dfc 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -785,15 +785,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
struct page *newpage;
struct pipe_buffer *buf = cs->pipebufs;

+ get_page(oldpage);
err = unlock_request(cs->req);
if (err)
- return err;
+ goto out_put_old;

fuse_copy_finish(cs);

err = pipe_buf_confirm(cs->pipe, buf);
if (err)
- return err;
+ goto out_put_old;

BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
@@ -833,7 +834,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
if (err) {
unlock_page(newpage);
- return err;
+ goto out_put_old;
}

get_page(newpage);
@@ -852,14 +853,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (err) {
unlock_page(newpage);
put_page(newpage);
- return err;
+ goto out_put_old;
}

unlock_page(oldpage);
+ /* Drop ref for ap->pages[] array */
put_page(oldpage);
cs->len = 0;

- return 0;
+ err = 0;
+out_put_old:
+ /* Drop ref obtained in this function */
+ put_page(oldpage);
+ return err;

out_fallback_unlock:
unlock_page(newpage);
@@ -868,10 +874,10 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
cs->offset = buf->offset;

err = lock_request(cs->req);
- if (err)
- return err;
+ if (!err)
+ err = 1;

- return 1;
+ goto out_put_old;
}

static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
@@ -883,14 +889,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;

+ get_page(page);
err = unlock_request(cs->req);
- if (err)
+ if (err) {
+ put_page(page);
return err;
+ }

fuse_copy_finish(cs);

buf = cs->pipebufs;
- get_page(page);
buf->page = page;
buf->offset = offset;
buf->len = count;
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 414beb543883..19db17e99cf9 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -60,6 +60,7 @@ struct io_worker {
const struct cred *cur_creds;
const struct cred *saved_creds;
struct files_struct *restore_files;
+ struct nsproxy *restore_nsproxy;
struct fs_struct *restore_fs;
};

@@ -87,7 +88,7 @@ enum {
*/
struct io_wqe {
struct {
- spinlock_t lock;
+ raw_spinlock_t lock;
struct io_wq_work_list work_list;
unsigned long hash_map;
unsigned flags;
@@ -148,11 +149,12 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)

if (current->files != worker->restore_files) {
__acquire(&wqe->lock);
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
dropped_lock = true;

task_lock(current);
current->files = worker->restore_files;
+ current->nsproxy = worker->restore_nsproxy;
task_unlock(current);
}

@@ -166,7 +168,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
if (worker->mm) {
if (!dropped_lock) {
__acquire(&wqe->lock);
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
dropped_lock = true;
}
__set_current_state(TASK_RUNNING);
@@ -200,7 +202,6 @@ static void io_worker_exit(struct io_worker *worker)
{
struct io_wqe *wqe = worker->wqe;
struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
- unsigned nr_workers;

/*
* If we're not at zero, someone else is holding a brief reference
@@ -220,23 +221,19 @@ static void io_worker_exit(struct io_worker *worker)
worker->flags = 0;
preempt_enable();

- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
if (__io_worker_unuse(wqe, worker)) {
__release(&wqe->lock);
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
}
acct->nr_workers--;
- nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
- wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
- spin_unlock_irq(&wqe->lock);
-
- /* all workers gone, wq exit can proceed */
- if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
- complete(&wqe->wq->done);
+ raw_spin_unlock_irq(&wqe->lock);

kfree_rcu(worker, rcu);
+ if (refcount_dec_and_test(&wqe->wq->refs))
+ complete(&wqe->wq->done);
}

static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@@ -318,6 +315,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)

worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
worker->restore_files = current->files;
+ worker->restore_nsproxy = current->nsproxy;
worker->restore_fs = current->fs;
io_wqe_inc_running(wqe, worker);
}
@@ -454,6 +452,7 @@ static void io_impersonate_work(struct io_worker *worker,
if (work->files && current->files != work->files) {
task_lock(current);
current->files = work->files;
+ current->nsproxy = work->nsproxy;
task_unlock(current);
}
if (work->fs && current->fs != work->fs)
@@ -504,7 +503,7 @@ static void io_worker_handle_work(struct io_worker *worker)
else if (!wq_list_empty(&wqe->work_list))
wqe->flags |= IO_WQE_FLAG_STALLED;

- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
if (!work)
break;
io_assign_current_work(worker, work);
@@ -538,17 +537,17 @@ static void io_worker_handle_work(struct io_worker *worker)
io_wqe_enqueue(wqe, linked);

if (hash != -1U && !next_hashed) {
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
wqe->hash_map &= ~BIT_ULL(hash);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
/* skip unnecessary unlock-lock wqe->lock */
if (!work)
goto get_next;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
}
} while (work);

- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
} while (1);
}

@@ -563,7 +562,7 @@ static int io_wqe_worker(void *data)
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
set_current_state(TASK_INTERRUPTIBLE);
loop:
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING);
io_worker_handle_work(worker);
@@ -574,7 +573,7 @@ static int io_wqe_worker(void *data)
__release(&wqe->lock);
goto loop;
}
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
if (signal_pending(current))
flush_signals(current);
if (schedule_timeout(WORKER_IDLE_TIMEOUT))
@@ -586,11 +585,11 @@ static int io_wqe_worker(void *data)
}

if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
if (!wq_list_empty(&wqe->work_list))
io_worker_handle_work(worker);
else
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
}

io_worker_exit(worker);
@@ -630,14 +629,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk)

worker->flags &= ~IO_WORKER_F_RUNNING;

- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
io_wqe_dec_running(wqe, worker);
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
}

static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
{
- struct io_wqe_acct *acct =&wqe->acct[index];
+ struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;

worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
@@ -656,7 +655,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
return false;
}

- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
worker->flags |= IO_WORKER_F_FREE;
@@ -665,11 +664,12 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
acct->nr_workers++;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);

if (index == IO_WQ_ACCT_UNBOUND)
atomic_inc(&wq->user->processes);

+ refcount_inc(&wq->refs);
wake_up_process(worker->task);
return true;
}
@@ -685,28 +685,63 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
return acct->nr_workers < acct->max_workers;
}

+static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
+{
+ send_sig(SIGINT, worker->task, 1);
+ return false;
+}
+
+/*
+ * Iterate the passed in list and call the specific function for each
+ * worker that isn't exiting
+ */
+static bool io_wq_for_each_worker(struct io_wqe *wqe,
+ bool (*func)(struct io_worker *, void *),
+ void *data)
+{
+ struct io_worker *worker;
+ bool ret = false;
+
+ list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
+ if (io_worker_get(worker)) {
+ /* no task if node is/was offline */
+ if (worker->task)
+ ret = func(worker, data);
+ io_worker_release(worker);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static bool io_wq_worker_wake(struct io_worker *worker, void *data)
+{
+ wake_up_process(worker->task);
+ return false;
+}
+
/*
* Manager thread. Tasked with creating new workers, if we need them.
*/
static int io_wq_manager(void *data)
{
struct io_wq *wq = data;
- int workers_to_create = num_possible_nodes();
int node;

/* create fixed workers */
- refcount_set(&wq->refs, workers_to_create);
+ refcount_set(&wq->refs, 1);
for_each_node(node) {
if (!node_online(node))
continue;
- if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
- goto err;
- workers_to_create--;
+ if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
+ continue;
+ set_bit(IO_WQ_BIT_ERROR, &wq->state);
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ goto out;
}

- while (workers_to_create--)
- refcount_dec(&wq->refs);
-
complete(&wq->done);

while (!kthread_should_stop()) {
@@ -720,12 +755,12 @@ static int io_wq_manager(void *data)
if (!node_online(node))
continue;

- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
fork_worker[IO_WQ_ACCT_BOUND] = true;
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
fork_worker[IO_WQ_ACCT_UNBOUND] = true;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
if (fork_worker[IO_WQ_ACCT_BOUND])
create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
if (fork_worker[IO_WQ_ACCT_UNBOUND])
@@ -738,12 +773,18 @@ static int io_wq_manager(void *data)
if (current->task_works)
task_work_run();

- return 0;
-err:
- set_bit(IO_WQ_BIT_ERROR, &wq->state);
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
- if (refcount_sub_and_test(workers_to_create, &wq->refs))
+out:
+ if (refcount_dec_and_test(&wq->refs)) {
complete(&wq->done);
+ return 0;
+ }
+ /* if ERROR is set and we get here, we have workers to wake */
+ if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
+ rcu_read_lock();
+ for_each_node(node)
+ io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
+ rcu_read_unlock();
+ }
return 0;
}

@@ -821,10 +862,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
}

work_flags = work->flags;
- spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock_irqsave(&wqe->lock, flags);
io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
- spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);

if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
!atomic_read(&acct->nr_running))
@@ -850,37 +891,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}

-static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
-{
- send_sig(SIGINT, worker->task, 1);
- return false;
-}
-
-/*
- * Iterate the passed in list and call the specific function for each
- * worker that isn't exiting
- */
-static bool io_wq_for_each_worker(struct io_wqe *wqe,
- bool (*func)(struct io_worker *, void *),
- void *data)
-{
- struct io_worker *worker;
- bool ret = false;
-
- list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
- if (io_worker_get(worker)) {
- /* no task if node is/was offline */
- if (worker->task)
- ret = func(worker, data);
- io_worker_release(worker);
- if (ret)
- break;
- }
- }
-
- return ret;
-}
-
void io_wq_cancel_all(struct io_wq *wq)
{
int node;
@@ -951,13 +961,13 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
unsigned long flags;

retry:
- spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock_irqsave(&wqe->lock, flags);
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data))
continue;
io_wqe_remove_pending(wqe, work, prev);
- spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
io_run_cancel(work, wqe);
match->nr_pending++;
if (!match->cancel_all)
@@ -966,7 +976,7 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
/* not safe to continue after unlock */
goto retry;
}
- spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
}

static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1074,7 +1084,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
}
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
wqe->wq = wq;
- spin_lock_init(&wqe->lock);
+ raw_spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
INIT_LIST_HEAD(&wqe->all_list);
@@ -1113,12 +1123,6 @@ bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
return refcount_inc_not_zero(&wq->use_refs);
}

-static bool io_wq_worker_wake(struct io_worker *worker, void *data)
-{
- wake_up_process(worker->task);
- return false;
-}
-
static void __io_wq_destroy(struct io_wq *wq)
{
int node;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index ddaf9614cf9b..2519830c8c55 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -88,6 +88,7 @@ struct io_wq_work {
struct files_struct *files;
struct mm_struct *mm;
const struct cred *creds;
+ struct nsproxy *nsproxy;
struct fs_struct *fs;
unsigned long fsize;
unsigned flags;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index aae0ef2ec34d..59ab8c5c2aaa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -79,6 +79,7 @@
#include <linux/splice.h>
#include <linux/task_work.h>
#include <linux/pagemap.h>
+#include <linux/io_uring.h>

#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -265,7 +266,16 @@ struct io_ring_ctx {
/* IO offload */
struct io_wq *io_wq;
struct task_struct *sqo_thread; /* if using sq thread polling */
- struct mm_struct *sqo_mm;
+
+ /*
+ * For SQPOLL usage - we hold a reference to the parent task, so we
+ * have access to the ->files
+ */
+ struct task_struct *sqo_task;
+
+ /* Only used for accounting purposes */
+ struct mm_struct *mm_account;
+
wait_queue_head_t sqo_wait;

/*
@@ -275,8 +285,6 @@ struct io_ring_ctx {
*/
struct fixed_file_data *file_data;
unsigned nr_user_files;
- int ring_fd;
- struct file *ring_file;

/* if used, fixed mapped user buffers */
unsigned nr_user_bufs;
@@ -544,7 +552,6 @@ enum {
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_WORK_INITIALIZED_BIT,
- REQ_F_TASK_PINNED_BIT,

/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -590,8 +597,6 @@ enum {
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
/* io_wq_work is initialized */
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
- /* req->task is refcounted */
- REQ_F_TASK_PINNED = BIT(REQ_F_TASK_PINNED_BIT),
};

struct async_poll {
@@ -933,14 +938,6 @@ struct sock *io_uring_get_socket(struct file *file)
}
EXPORT_SYMBOL(io_uring_get_socket);

-static void io_get_req_task(struct io_kiocb *req)
-{
- if (req->flags & REQ_F_TASK_PINNED)
- return;
- get_task_struct(req->task);
- req->flags |= REQ_F_TASK_PINNED;
-}
-
static inline void io_clean_op(struct io_kiocb *req)
{
if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
@@ -948,13 +945,6 @@ static inline void io_clean_op(struct io_kiocb *req)
__io_clean_op(req);
}

-/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
-static void __io_put_req_task(struct io_kiocb *req)
-{
- if (req->flags & REQ_F_TASK_PINNED)
- put_task_struct(req->task);
-}
-
static void io_sq_thread_drop_mm(void)
{
struct mm_struct *mm = current->mm;
@@ -969,9 +959,10 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
{
if (!current->mm) {
if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
- !mmget_not_zero(ctx->sqo_mm)))
+ !ctx->sqo_task->mm ||
+ !mmget_not_zero(ctx->sqo_task->mm)))
return -EFAULT;
- kthread_use_mm(ctx->sqo_mm);
+ kthread_use_mm(ctx->sqo_task->mm);
}

return 0;
@@ -1226,14 +1217,34 @@ static void io_kill_timeout(struct io_kiocb *req)
}
}

-static void io_kill_timeouts(struct io_ring_ctx *ctx)
+static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+
+ if (!tsk || req->task == tsk)
+ return true;
+ if ((ctx->flags & IORING_SETUP_SQPOLL) && req->task == ctx->sqo_thread)
+ return true;
+ return false;
+}
+
+/*
+ * Returns true if we found and killed one or more timeouts
+ */
+static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk)
{
struct io_kiocb *req, *tmp;
+ int canceled = 0;

spin_lock_irq(&ctx->completion_lock);
- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list)
- io_kill_timeout(req);
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+ if (io_task_match(req, tsk)) {
+ io_kill_timeout(req);
+ canceled++;
+ }
+ }
spin_unlock_irq(&ctx->completion_lock);
+ return canceled != 0;
}

static void __io_queue_deferred(struct io_ring_ctx *ctx)
@@ -1332,12 +1343,24 @@ static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
}
}

+static inline bool io_match_files(struct io_kiocb *req,
+ struct files_struct *files)
+{
+ if (!files)
+ return true;
+ if (req->flags & REQ_F_WORK_INITIALIZED)
+ return req->work.files == files;
+ return false;
+}
+
/* Returns true if there are no backlogged entries after the flush */
-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+ struct task_struct *tsk,
+ struct files_struct *files)
{
struct io_rings *rings = ctx->rings;
+ struct io_kiocb *req, *tmp;
struct io_uring_cqe *cqe;
- struct io_kiocb *req;
unsigned long flags;
LIST_HEAD(list);

@@ -1356,13 +1379,16 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
ctx->cq_overflow_flushed = 1;

cqe = NULL;
- while (!list_empty(&ctx->cq_overflow_list)) {
+ list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) {
+ if (tsk && req->task != tsk)
+ continue;
+ if (!io_match_files(req, files))
+ continue;
+
cqe = io_get_cqring(ctx);
if (!cqe && !force)
break;

- req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
- compl.list);
list_move(&req->compl.list, &list);
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
@@ -1406,7 +1432,12 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- } else if (ctx->cq_overflow_flushed) {
+ } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) {
+ /*
+ * If we're in ring overflow flush mode, or in task cancel mode,
+ * then we cannot store the request for later flushing, we need
+ * to drop it on the floor.
+ */
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
} else {
@@ -1564,9 +1595,14 @@ static bool io_dismantle_req(struct io_kiocb *req)

static void __io_free_req_finish(struct io_kiocb *req)
{
+ struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx = req->ctx;

- __io_put_req_task(req);
+ atomic_long_inc(&tctx->req_complete);
+ if (tctx->in_idle)
+ wake_up(&tctx->wait);
+ put_task_struct(req->task);
+
if (likely(!io_is_fallback_req(req)))
kmem_cache_free(req_cachep, req);
else
@@ -1879,6 +1915,7 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
if (rb->to_free)
__io_req_free_batch_flush(ctx, rb);
if (rb->task) {
+ atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete);
put_task_struct_many(rb->task, rb->task_refs);
rb->task = NULL;
}
@@ -1893,16 +1930,15 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
if (req->flags & REQ_F_LINK_HEAD)
io_queue_next(req);

- if (req->flags & REQ_F_TASK_PINNED) {
- if (req->task != rb->task) {
- if (rb->task)
- put_task_struct_many(rb->task, rb->task_refs);
- rb->task = req->task;
- rb->task_refs = 0;
+ if (req->task != rb->task) {
+ if (rb->task) {
+ atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete);
+ put_task_struct_many(rb->task, rb->task_refs);
}
- rb->task_refs++;
- req->flags &= ~REQ_F_TASK_PINNED;
+ rb->task = req->task;
+ rb->task_refs = 0;
}
+ rb->task_refs++;

WARN_ON_ONCE(io_dismantle_req(req));
rb->reqs[rb->to_free++] = req;
@@ -1978,7 +2014,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
if (noflush && !list_empty(&ctx->cq_overflow_list))
return -1U;

- io_cqring_overflow_flush(ctx, false);
+ io_cqring_overflow_flush(ctx, false, NULL, NULL);
}

/* See comment at the top of this file */
@@ -2527,9 +2563,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (kiocb->ki_flags & IOCB_NOWAIT)
req->flags |= REQ_F_NOWAIT;

- if (kiocb->ki_flags & IOCB_DIRECT)
- io_get_req_task(req);
-
if (force_nonblock)
kiocb->ki_flags |= IOCB_NOWAIT;

@@ -2541,7 +2574,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
req->iopoll_completed = 0;
- io_get_req_task(req);
} else {
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
@@ -3109,8 +3141,6 @@ static bool io_rw_should_retry(struct io_kiocb *req)
kiocb->ki_flags |= IOCB_WAITQ;
kiocb->ki_flags &= ~IOCB_NOWAIT;
kiocb->ki_waitq = wait;
-
- io_get_req_task(req);
return true;
}

@@ -3959,8 +3989,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EBADF;

req->close.fd = READ_ONCE(sqe->fd);
- if ((req->file && req->file->f_op == &io_uring_fops) ||
- req->close.fd == req->ctx->ring_fd)
+ if ((req->file && req->file->f_op == &io_uring_fops))
return -EBADF;

req->close.put_file = NULL;
@@ -4942,7 +4971,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
apoll->double_poll = NULL;

req->flags |= REQ_F_POLLED;
- io_get_req_task(req);
req->apoll = apoll;
INIT_HLIST_NODE(&req->hash_node);

@@ -5017,7 +5045,10 @@ static bool io_poll_remove_one(struct io_kiocb *req)
return do_complete;
}

-static void io_poll_remove_all(struct io_ring_ctx *ctx)
+/*
+ * Returns true if we found and killed one or more poll requests
+ */
+static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk)
{
struct hlist_node *tmp;
struct io_kiocb *req;
@@ -5028,13 +5059,17 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx)
struct hlist_head *list;

list = &ctx->cancel_hash[i];
- hlist_for_each_entry_safe(req, tmp, list, hash_node)
- posted += io_poll_remove_one(req);
+ hlist_for_each_entry_safe(req, tmp, list, hash_node) {
+ if (io_task_match(req, tsk))
+ posted += io_poll_remove_one(req);
+ }
}
spin_unlock_irq(&ctx->completion_lock);

if (posted)
io_cqring_ev_posted(ctx);
+
+ return posted != 0;
}

static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
@@ -5123,8 +5158,6 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
#endif
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP |
(events & EPOLLEXCLUSIVE);
-
- io_get_req_task(req);
return 0;
}

@@ -5633,6 +5666,22 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EIOCBQUEUED;
}

+static void io_req_drop_files(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->inflight_lock, flags);
+ list_del(&req->inflight_entry);
+ if (waitqueue_active(&ctx->inflight_wait))
+ wake_up(&ctx->inflight_wait);
+ spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+ req->flags &= ~REQ_F_INFLIGHT;
+ put_files_struct(req->work.files);
+ put_nsproxy(req->work.nsproxy);
+ req->work.files = NULL;
+}
+
static void __io_clean_op(struct io_kiocb *req)
{
struct io_async_ctx *io = req->io;
@@ -5682,17 +5731,8 @@ static void __io_clean_op(struct io_kiocb *req)
req->flags &= ~REQ_F_NEED_CLEANUP;
}

- if (req->flags & REQ_F_INFLIGHT) {
- struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->inflight_lock, flags);
- list_del(&req->inflight_entry);
- if (waitqueue_active(&ctx->inflight_wait))
- wake_up(&ctx->inflight_wait);
- spin_unlock_irqrestore(&ctx->inflight_lock, flags);
- req->flags &= ~REQ_F_INFLIGHT;
- }
+ if (req->flags & REQ_F_INFLIGHT)
+ io_req_drop_files(req);
}

static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
@@ -6039,34 +6079,22 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,

static int io_grab_files(struct io_kiocb *req)
{
- int ret = -EBADF;
struct io_ring_ctx *ctx = req->ctx;

io_req_init_async(req);

if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE))
return 0;
- if (!ctx->ring_file)
- return -EBADF;

- rcu_read_lock();
+ req->work.files = get_files_struct(current);
+ get_nsproxy(current->nsproxy);
+ req->work.nsproxy = current->nsproxy;
+ req->flags |= REQ_F_INFLIGHT;
+
spin_lock_irq(&ctx->inflight_lock);
- /*
- * We use the f_ops->flush() handler to ensure that we can flush
- * out work accessing these files if the fd is closed. Check if
- * the fd has changed since we started down this path, and disallow
- * this operation if it has.
- */
- if (fcheck(ctx->ring_fd) == ctx->ring_file) {
- list_add(&req->inflight_entry, &ctx->inflight_list);
- req->flags |= REQ_F_INFLIGHT;
- req->work.files = current->files;
- ret = 0;
- }
+ list_add(&req->inflight_entry, &ctx->inflight_list);
spin_unlock_irq(&ctx->inflight_lock);
- rcu_read_unlock();
-
- return ret;
+ return 0;
}

static inline int io_prep_work_files(struct io_kiocb *req)
@@ -6221,8 +6249,10 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (nxt) {
req = nxt;

- if (req->flags & REQ_F_FORCE_ASYNC)
+ if (req->flags & REQ_F_FORCE_ASYNC) {
+ linked_timeout = NULL;
goto punt;
+ }
goto again;
}
exit:
@@ -6306,7 +6336,6 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return ret;
}
trace_io_uring_link(ctx, req, head);
- io_get_req_task(req);
list_add_tail(&req->link_list, &head->link_list);

/* last request of a link, enqueue the link */
@@ -6431,6 +6460,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
req->task = current;
+ get_task_struct(req->task);
+ atomic_long_inc(&req->task->io_uring->req_issue);
req->result = 0;

if (unlikely(req->opcode >= IORING_OP_LAST))
@@ -6466,8 +6497,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
return io_req_set_file(state, req, READ_ONCE(sqe->fd));
}

-static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
- struct file *ring_file, int ring_fd)
+static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
{
struct io_submit_state state;
struct io_kiocb *link = NULL;
@@ -6476,7 +6506,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
/* if we have a backlog and couldn't flush it all, return BUSY */
if (test_bit(0, &ctx->sq_check_overflow)) {
if (!list_empty(&ctx->cq_overflow_list) &&
- !io_cqring_overflow_flush(ctx, false))
+ !io_cqring_overflow_flush(ctx, false, NULL, NULL))
return -EBUSY;
}

@@ -6488,9 +6518,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,

io_submit_state_start(&state, ctx, nr);

- ctx->ring_fd = ring_fd;
- ctx->ring_file = ring_file;
-
for (i = 0; i < nr; i++) {
const struct io_uring_sqe *sqe;
struct io_kiocb *req;
@@ -6659,7 +6686,7 @@ static int io_sq_thread(void *data)

mutex_lock(&ctx->uring_lock);
if (likely(!percpu_ref_is_dying(&ctx->refs)))
- ret = io_submit_sqes(ctx, to_submit, NULL, -1);
+ ret = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
timeout = jiffies + ctx->sq_thread_idle;
}
@@ -7488,6 +7515,33 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx,
return ret;
}

+static int io_uring_alloc_task_context(struct task_struct *task)
+{
+ struct io_uring_task *tctx;
+
+ tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
+ if (unlikely(!tctx))
+ return -ENOMEM;
+
+ xa_init(&tctx->xa);
+ init_waitqueue_head(&tctx->wait);
+ tctx->last = NULL;
+ tctx->in_idle = 0;
+ atomic_long_set(&tctx->req_issue, 0);
+ atomic_long_set(&tctx->req_complete, 0);
+ task->io_uring = tctx;
+ return 0;
+}
+
+void __io_uring_free(struct task_struct *tsk)
+{
+ struct io_uring_task *tctx = tsk->io_uring;
+
+ WARN_ON_ONCE(!xa_empty(&tctx->xa));
+ kfree(tctx);
+ tsk->io_uring = NULL;
+}
+
static int io_sq_offload_start(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
@@ -7523,6 +7577,9 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
ctx->sqo_thread = NULL;
goto err;
}
+ ret = io_uring_alloc_task_context(ctx->sqo_thread);
+ if (ret)
+ goto err;
wake_up_process(ctx->sqo_thread);
} else if (p->flags & IORING_SETUP_SQ_AFF) {
/* Can't have SQ_AFF without SQPOLL */
@@ -7571,11 +7628,11 @@ static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
if (ctx->limit_mem)
__io_unaccount_mem(ctx->user, nr_pages);

- if (ctx->sqo_mm) {
+ if (ctx->mm_account) {
if (acct == ACCT_LOCKED)
- ctx->sqo_mm->locked_vm -= nr_pages;
+ ctx->mm_account->locked_vm -= nr_pages;
else if (acct == ACCT_PINNED)
- atomic64_sub(nr_pages, &ctx->sqo_mm->pinned_vm);
+ atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
}
}

@@ -7590,11 +7647,11 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
return ret;
}

- if (ctx->sqo_mm) {
+ if (ctx->mm_account) {
if (acct == ACCT_LOCKED)
- ctx->sqo_mm->locked_vm += nr_pages;
+ ctx->mm_account->locked_vm += nr_pages;
else if (acct == ACCT_PINNED)
- atomic64_add(nr_pages, &ctx->sqo_mm->pinned_vm);
+ atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
}

return 0;
@@ -7898,9 +7955,12 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_finish_async(ctx);
io_sqe_buffer_unregister(ctx);
- if (ctx->sqo_mm) {
- mmdrop(ctx->sqo_mm);
- ctx->sqo_mm = NULL;
+
+ if (ctx->sqo_task) {
+ put_task_struct(ctx->sqo_task);
+ ctx->sqo_task = NULL;
+ mmdrop(ctx->mm_account);
+ ctx->mm_account = NULL;
}

io_sqe_files_unregister(ctx);
@@ -7977,7 +8037,7 @@ static void io_ring_exit_work(struct work_struct *work)
*/
do {
if (ctx->rings)
- io_cqring_overflow_flush(ctx, true);
+ io_cqring_overflow_flush(ctx, true, NULL, NULL);
io_iopoll_try_reap_events(ctx);
} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
io_ring_ctx_free(ctx);
@@ -7989,15 +8049,15 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
percpu_ref_kill(&ctx->refs);
mutex_unlock(&ctx->uring_lock);

- io_kill_timeouts(ctx);
- io_poll_remove_all(ctx);
+ io_kill_timeouts(ctx, NULL);
+ io_poll_remove_all(ctx, NULL);

if (ctx->io_wq)
io_wq_cancel_all(ctx->io_wq);

/* if we failed setting up the ctx, we might not have any rings */
if (ctx->rings)
- io_cqring_overflow_flush(ctx, true);
+ io_cqring_overflow_flush(ctx, true, NULL, NULL);
io_iopoll_try_reap_events(ctx);
idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);

@@ -8032,7 +8092,7 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data)
{
struct files_struct *files = data;

- return work->files == files;
+ return !files || work->files == files;
}

/*
@@ -8053,12 +8113,6 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req)
return false;
}

-static inline bool io_match_files(struct io_kiocb *req,
- struct files_struct *files)
-{
- return (req->flags & REQ_F_WORK_INITIALIZED) && req->work.files == files;
-}
-
static bool io_match_link_files(struct io_kiocb *req,
struct files_struct *files)
{
@@ -8174,11 +8228,14 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
}
}

-static void io_uring_cancel_files(struct io_ring_ctx *ctx,
+/*
+ * Returns true if we found and killed one or more files pinning requests
+ */
+static bool io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
if (list_empty_careful(&ctx->inflight_list))
- return;
+ return false;

io_cancel_defer_files(ctx, files);
/* cancel all at once, should be faster than doing it one by one*/
@@ -8190,7 +8247,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,

spin_lock_irq(&ctx->inflight_lock);
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
- if (req->work.files != files)
+ if (files && req->work.files != files)
continue;
/* req is being completed, ignore */
if (!refcount_inc_not_zero(&req->refs))
@@ -8214,6 +8271,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
schedule();
finish_wait(&ctx->inflight_wait, &wait);
}
+
+ return true;
}

static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
@@ -8221,21 +8280,198 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct task_struct *task = data;

- return req->task == task;
+ return io_task_match(req, task);
+}
+
+static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
+ struct task_struct *task,
+ struct files_struct *files)
+{
+ bool ret;
+
+ ret = io_uring_cancel_files(ctx, files);
+ if (!files) {
+ enum io_wq_cancel cret;
+
+ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true);
+ if (cret != IO_WQ_CANCEL_NOTFOUND)
+ ret = true;
+
+ /* SQPOLL thread does its own polling */
+ if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
+ while (!list_empty_careful(&ctx->iopoll_list)) {
+ io_iopoll_try_reap_events(ctx);
+ ret = true;
+ }
+ }
+
+ ret |= io_poll_remove_all(ctx, task);
+ ret |= io_kill_timeouts(ctx, task);
+ }
+
+ return ret;
+}
+
+/*
+ * We need to iteratively cancel requests, in case a request has dependent
+ * hard links. These persist even for failure of cancelations, hence keep
+ * looping until none are found.
+ */
+static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
+ struct files_struct *files)
+{
+ struct task_struct *task = current;
+
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ task = ctx->sqo_thread;
+
+ io_cqring_overflow_flush(ctx, true, task, files);
+
+ while (__io_uring_cancel_task_requests(ctx, task, files)) {
+ io_run_task_work();
+ cond_resched();
+ }
+}
+
+/*
+ * Note that this task has used io_uring. We use it for cancelation purposes.
+ */
+static int io_uring_add_task_file(struct file *file)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (unlikely(!tctx)) {
+ int ret;
+
+ ret = io_uring_alloc_task_context(current);
+ if (unlikely(ret))
+ return ret;
+ tctx = current->io_uring;
+ }
+ if (tctx->last != file) {
+ void *old = xa_load(&tctx->xa, (unsigned long)file);
+
+ if (!old) {
+ get_file(file);
+ xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
+ }
+ tctx->last = file;
+ }
+
+ return 0;
+}
+
+/*
+ * Remove this io_uring_file -> task mapping.
+ */
+static void io_uring_del_task_file(struct file *file)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (tctx->last == file)
+ tctx->last = NULL;
+ file = xa_erase(&tctx->xa, (unsigned long)file);
+ if (file)
+ fput(file);
+}
+
+static void __io_uring_attempt_task_drop(struct file *file)
+{
+ struct file *old = xa_load(&current->io_uring->xa, (unsigned long)file);
+
+ if (old == file)
+ io_uring_del_task_file(file);
+}
+
+/*
+ * Drop task note for this file if we're the only ones that hold it after
+ * pending fput()
+ */
+static void io_uring_attempt_task_drop(struct file *file, bool exiting)
+{
+ if (!current->io_uring)
+ return;
+ /*
+ * fput() is pending, will be 2 if the only other ref is our potential
+ * task file note. If the task is exiting, drop regardless of count.
+ */
+ if (!exiting && atomic_long_read(&file->f_count) != 2)
+ return;
+
+ __io_uring_attempt_task_drop(file);
+}
+
+void __io_uring_files_cancel(struct files_struct *files)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ struct file *file;
+ unsigned long index;
+
+ /* make sure overflow events are dropped */
+ tctx->in_idle = true;
+
+ xa_for_each(&tctx->xa, index, file) {
+ struct io_ring_ctx *ctx = file->private_data;
+
+ io_uring_cancel_task_requests(ctx, files);
+ if (files)
+ io_uring_del_task_file(file);
+ }
+}
+
+static inline bool io_uring_task_idle(struct io_uring_task *tctx)
+{
+ return atomic_long_read(&tctx->req_issue) ==
+ atomic_long_read(&tctx->req_complete);
+}
+
+/*
+ * Find any io_uring fd that this task has registered or done IO on, and cancel
+ * requests.
+ */
+void __io_uring_task_cancel(void)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ DEFINE_WAIT(wait);
+ long completions;
+
+ /* make sure overflow events are dropped */
+ tctx->in_idle = true;
+
+ while (!io_uring_task_idle(tctx)) {
+ /* read completions before cancelations */
+ completions = atomic_long_read(&tctx->req_complete);
+ __io_uring_files_cancel(NULL);
+
+ prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ /*
+ * If we've seen completions, retry. This avoids a race where
+ * a completion comes in before we did prepare_to_wait().
+ */
+ if (completions != atomic_long_read(&tctx->req_complete))
+ continue;
+ if (io_uring_task_idle(tctx))
+ break;
+ schedule();
+ }
+
+ finish_wait(&tctx->wait, &wait);
+ tctx->in_idle = false;
}

static int io_uring_flush(struct file *file, void *data)
{
struct io_ring_ctx *ctx = file->private_data;

- io_uring_cancel_files(ctx, data);
-
/*
* If the task is going away, cancel work it may have pending
*/
if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
- io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
+ data = NULL;

+ io_uring_cancel_task_requests(ctx, data);
+ io_uring_attempt_task_drop(file, !data);
return 0;
}

@@ -8344,13 +8580,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
if (!list_empty_careful(&ctx->cq_overflow_list))
- io_cqring_overflow_flush(ctx, false);
+ io_cqring_overflow_flush(ctx, false, NULL, NULL);
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sqo_wait);
submitted = to_submit;
} else if (to_submit) {
+ ret = io_uring_add_task_file(f.file);
+ if (unlikely(ret))
+ goto out;
mutex_lock(&ctx->uring_lock);
- submitted = io_submit_sqes(ctx, to_submit, f.file, fd);
+ submitted = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);

if (submitted != to_submit)
@@ -8560,6 +8799,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
O_RDWR | O_CLOEXEC);
if (IS_ERR(file)) {
+err_fd:
put_unused_fd(ret);
ret = PTR_ERR(file);
goto err;
@@ -8568,6 +8808,10 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
#if defined(CONFIG_UNIX)
ctx->ring_sock->file = file;
#endif
+ if (unlikely(io_uring_add_task_file(file))) {
+ file = ERR_PTR(-ENOMEM);
+ goto err_fd;
+ }
fd_install(ret, file);
return ret;
err:
@@ -8645,8 +8889,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->user = user;
ctx->creds = get_current_cred();

+ ctx->sqo_task = get_task_struct(current);
+
+ /*
+ * This is just grabbed for accounting purposes. When a process exits,
+ * the mm is exited and dropped before the files, hence we need to hang
+ * on to this mm purely for the purposes of being able to unaccount
+ * memory (locked/pinned vm). It's not used for anything else.
+ */
mmgrab(current->mm);
- ctx->sqo_mm = current->mm;
+ ctx->mm_account = current->mm;

/*
* Account memory _before_ installing the file descriptor. Once
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a08..7d4d04c9d3e6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2862,7 +2862,6 @@ extern int do_pipe_flags(int *, int);
id(UNKNOWN, unknown) \
id(FIRMWARE, firmware) \
id(FIRMWARE_PREALLOC_BUFFER, firmware) \
- id(FIRMWARE_EFI_EMBEDDED, firmware) \
id(MODULE, kernel-module) \
id(KEXEC_IMAGE, kexec-image) \
id(KEXEC_INITRAMFS, kexec-initramfs) \
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
new file mode 100644
index 000000000000..c09135a1ef13
--- /dev/null
+++ b/include/linux/io_uring.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_IO_URING_H
+#define _LINUX_IO_URING_H
+
+#include <linux/sched.h>
+#include <linux/xarray.h>
+#include <linux/percpu-refcount.h>
+
+struct io_uring_task {
+ /* submission side */
+ struct xarray xa;
+ struct wait_queue_head wait;
+ struct file *last;
+ atomic_long_t req_issue;
+
+ /* completion side */
+ bool in_idle ____cacheline_aligned_in_smp;
+ atomic_long_t req_complete;
+};
+
+#if defined(CONFIG_IO_URING)
+void __io_uring_task_cancel(void);
+void __io_uring_files_cancel(struct files_struct *files);
+void __io_uring_free(struct task_struct *tsk);
+
+static inline void io_uring_task_cancel(void)
+{
+ if (current->io_uring && !xa_empty(&current->io_uring->xa))
+ __io_uring_task_cancel();
+}
+static inline void io_uring_files_cancel(struct files_struct *files)
+{
+ if (current->io_uring && !xa_empty(&current->io_uring->xa))
+ __io_uring_files_cancel(files);
+}
+static inline void io_uring_free(struct task_struct *tsk)
+{
+ if (tsk->io_uring)
+ __io_uring_free(tsk);
+}
+#else
+static inline void io_uring_task_cancel(void)
+{
+}
+static inline void io_uring_files_cancel(struct files_struct *files)
+{
+}
+static inline void io_uring_free(struct task_struct *tsk)
+{
+}
+#endif
+
+#endif
diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
index 6166e7c60869..b8da6f8e854b 100644
--- a/include/linux/mtd/pfow.h
+++ b/include/linux/mtd/pfow.h
@@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr)

if (!(dsr & DSR_AVAILABLE))
printk(KERN_NOTICE"DSR.15: (0) Device not Available\n");
- if (prog_status & 0x03)
+ if ((prog_status & 0x03) == 0x03)
printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid "
"half with 41h command\n");
else if (prog_status & 0x02)
diff --git a/include/linux/pm.h b/include/linux/pm.h
index a30a4b54df52..47aca6bac1d6 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -590,7 +590,7 @@ struct dev_pm_info {
#endif
#ifdef CONFIG_PM
struct hrtimer suspend_timer;
- unsigned long timer_expires;
+ u64 timer_expires;
struct work_struct work;
wait_queue_head_t wait_queue;
struct wake_irq *wakeirq;
diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h
index 8f385fbe5a0e..1c31f26ccc7a 100644
--- a/include/linux/qcom-geni-se.h
+++ b/include/linux/qcom-geni-se.h
@@ -248,6 +248,9 @@ struct geni_se {
#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT)
#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK)

+/* QUP SE VERSION value for major number 2 and minor number 5 */
+#define QUP_SE_VERSION_2_5 0x20050000
+
/*
* Define bandwidth thresholds that cause the underlying Core 2X interconnect
* clock to run at the named frequency. These baseline values are recommended
diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935..8bf2295ebee4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -63,6 +63,7 @@ struct sighand_struct;
struct signal_struct;
struct task_delay_info;
struct task_group;
+struct io_uring_task;

/*
* Task state bitmask. NOTE! These bits are also
@@ -935,6 +936,10 @@ struct task_struct {
/* Open file information: */
struct files_struct *files;

+#ifdef CONFIG_IO_URING
+ struct io_uring_task *io_uring;
+#endif
+
/* Namespaces: */
struct nsproxy *nsproxy;

diff --git a/include/linux/string.h b/include/linux/string.h
index 9b7a0632e87a..b1f3894a0a3e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t);
#ifndef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *,int,__kernel_size_t);
#endif
-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
-static inline __must_check unsigned long memcpy_mcsafe(void *dst,
- const void *src, size_t cnt)
-{
- memcpy(dst, src, cnt);
- return 0;
-}
-#endif
#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
}
#endif
+
void *memchr_inv(const void *s, int c, size_t n);
char *strreplace(char *s, char old, char new);

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 94b285411659..1ae36bc8db35 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -179,6 +179,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
}
#endif

+#ifndef copy_mc_to_kernel
+/*
+ * Without arch opt-in this generic copy_mc_to_kernel() will not handle
+ * #MC (or arch equivalent) during source read.
+ */
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
+{
+ memcpy(dst, src, cnt);
+ return 0;
+}
+#endif
+
static __always_inline void pagefault_disabled_inc(void)
{
current->pagefault_disabled++;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9ea..f14410c678bd 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -185,10 +185,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#define _copy_from_iter_flushcache _copy_from_iter_nocache
#endif

-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#else
-#define _copy_to_iter_mcsafe _copy_to_iter
+#define _copy_mc_to_iter _copy_to_iter
#endif

static __always_inline __must_check
@@ -201,12 +201,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
}

static __always_inline __must_check
-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
+size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, true)))
return 0;
else
- return _copy_to_iter_mcsafe(addr, bytes, i);
+ return _copy_mc_to_iter(addr, bytes, i);
}

size_t iov_iter_zero(size_t bytes, struct iov_iter *);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 224d194ad29d..e5b7fbabedfb 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -896,6 +896,12 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
return (struct nft_expr *)&rule->data[rule->dlen];
}

+static inline bool nft_expr_more(const struct nft_rule *rule,
+ const struct nft_expr *expr)
+{
+ return expr != nft_expr_last(rule) && expr->ops;
+}
+
static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
{
return (void *)&rule->data[rule->dlen];
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b6238b2209b7..f4ef5d5a1232 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1438,8 +1438,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if the *skb* task belongs to the cgroup2.
- * * 1, if the *skb* task does not belong to the cgroup2.
+ * * 0, if current task belongs to the cgroup2.
+ * * 1, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
diff --git a/init/init_task.c b/init/init_task.c
index f6889fce64af..a56f0abb63e9 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -114,6 +114,9 @@ struct task_struct init_task
.thread = INIT_THREAD,
.fs = &init_fs,
.files = &init_files,
+#ifdef CONFIG_IO_URING
+ .io_uring = NULL,
+#endif
.signal = &init_signals,
.sighand = &init_sighand,
.nsproxy = &init_nsproxy,
diff --git a/kernel/fork.c b/kernel/fork.c
index a9ce750578ca..8934886d1654 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -95,6 +95,7 @@
#include <linux/stackleak.h>
#include <linux/kasan.h>
#include <linux/scs.h>
+#include <linux/io_uring.h>

#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -728,6 +729,7 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(refcount_read(&tsk->usage));
WARN_ON(tsk == current);

+ io_uring_free(tsk);
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
@@ -2002,6 +2004,10 @@ static __latent_entropy struct task_struct *copy_process(
p->vtime.state = VTIME_INACTIVE;
#endif

+#ifdef CONFIG_IO_URING
+ p->io_uring = NULL;
+#endif
+
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index b4b98a03ff98..b46a9fd122c8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -635,7 +635,12 @@ config UACCESS_MEMCPY
config ARCH_HAS_UACCESS_FLUSHCACHE
bool

-config ARCH_HAS_UACCESS_MCSAFE
+# arch has a concept of a recoverable synchronous exception due to a
+# memory-read error like x86 machine-check or ARM data-abort, and
+# implements copy_mc_to_{user,kernel} to abort and report
+# 'bytes-transferred' if that exception fires when accessing the source
+# buffer.
+config ARCH_HAS_COPY_MC
bool

# Temporary. Goes away when all archs are cleaned up
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f12..d13304a034f5 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -637,30 +637,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(_copy_to_iter);

-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+static int copyout_mc(void __user *to, const void *from, size_t n)
{
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
- n = copy_to_user_mcsafe((__force void *) to, from, n);
+ n = copy_mc_to_user((__force void *) to, from, n);
}
return n;
}

-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+static unsigned long copy_mc_to_page(struct page *page, size_t offset,
const char *from, size_t len)
{
unsigned long ret;
char *to;

to = kmap_atomic(page);
- ret = memcpy_mcsafe(to + offset, from, len);
+ ret = copy_mc_to_kernel(to + offset, from, len);
kunmap_atomic(to);

return ret;
}

-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
+static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i)
{
struct pipe_inode_info *pipe = i->pipe;
@@ -678,7 +678,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
unsigned long rem;

- rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
+ rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
off, addr, chunk);
i->head = i_head;
i->iov_offset = off + chunk - rem;
@@ -695,18 +695,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
}

/**
- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
+ * _copy_mc_to_iter - copy to iter with source memory error exception handling
* @addr: source kernel address
* @bytes: total transfer length
* @iter: destination iterator
*
- * The pmem driver arranges for filesystem-dax to use this facility via
- * dax_copy_to_iter() for protecting read/write to persistent memory.
- * Unless / until an architecture can guarantee identical performance
- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
- * performance regression to switch more users to the mcsafe version.
+ * The pmem driver deploys this for the dax operation
+ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
+ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
+ * successfully copied.
*
- * Otherwise, the main differences between this and typical _copy_to_iter().
+ * The main differences between this and typical _copy_to_iter().
*
* * Typical tail/residue handling after a fault retries the copy
* byte-by-byte until the fault happens again. Re-triggering machine
@@ -717,23 +716,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
* * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
* Compare to copy_to_iter() where only ITER_IOVEC attempts might return
* a short copy.
- *
- * See MCSAFE_TEST for self-test.
*/
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
unsigned long rem, curr_addr, s_addr = (unsigned long) addr;

if (unlikely(iov_iter_is_pipe(i)))
- return copy_pipe_to_iter_mcsafe(addr, bytes, i);
+ return copy_mc_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
iterate_and_advance(i, bytes, v,
- copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+ copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
+ v.iov_len),
({
- rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len);
+ rem = copy_mc_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
@@ -741,8 +739,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
}
}),
({
- rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
- v.iov_len);
+ rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
+ - v.iov_len, v.iov_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
@@ -753,8 +751,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)

return bytes;
}
-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
+#endif /* CONFIG_ARCH_HAS_COPY_MC */

size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
diff --git a/mm/filemap.c b/mm/filemap.c
index f6d36ccc2351..407b94d8ce00 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2179,6 +2179,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
offset = *ppos & ~PAGE_MASK;

+ /*
+ * If we've already successfully copied some data, then we
+ * can no longer safely return -EIOCBQUEUED. Hence mark
+ * an async read NOWAIT at that point.
+ */
+ if (written && (iocb->ki_flags & IOCB_WAITQ))
+ iocb->ki_flags |= IOCB_NOWAIT;
+
for (;;) {
struct page *page;
pgoff_t end_index;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2135ee7c806d..001e16ee1506 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -483,6 +483,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
return true;
if (tcp_rmem_pressure(sk))
return true;
+ if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
+ return true;
}
if (sk->sk_prot->stream_memory_read)
return sk->sk_prot->stream_memory_read(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 75be97f6a7da..9e14bf4fa38f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4840,7 +4840,8 @@ void tcp_data_ready(struct sock *sk)
int avail = tp->rcv_nxt - tp->copied_seq;

if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
- !sock_flag(sk, SOCK_DONE))
+ !sock_flag(sk, SOCK_DONE) &&
+ tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
return;

sk->sk_data_ready(sk);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4603b667973a..72f3ee47e478 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -302,7 +302,7 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
struct nft_expr *expr;

expr = nft_expr_first(rule);
- while (expr != nft_expr_last(rule) && expr->ops) {
+ while (nft_expr_more(rule, expr)) {
if (expr->ops->activate)
expr->ops->activate(ctx, expr);

@@ -317,7 +317,7 @@ static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
struct nft_expr *expr;

expr = nft_expr_first(rule);
- while (expr != nft_expr_last(rule) && expr->ops) {
+ while (nft_expr_more(rule, expr)) {
if (expr->ops->deactivate)
expr->ops->deactivate(ctx, expr, phase);

@@ -3036,7 +3036,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
* is called on error from nf_tables_newrule().
*/
expr = nft_expr_first(rule);
- while (expr != nft_expr_last(rule) && expr->ops) {
+ while (nft_expr_more(rule, expr)) {
next = nft_expr_next(expr);
nf_tables_expr_destroy(ctx, expr);
expr = next;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 9ef37c1b7b3b..822b3edfb1b6 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -37,7 +37,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
struct nft_expr *expr;

expr = nft_expr_first(rule);
- while (expr->ops && expr != nft_expr_last(rule)) {
+ while (nft_expr_more(rule, expr)) {
if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
num_actions++;

@@ -61,7 +61,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
ctx->net = net;
ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;

- while (expr->ops && expr != nft_expr_last(rule)) {
+ while (nft_expr_more(rule, expr)) {
if (!expr->ops->offload) {
err = -EOPNOTSUPP;
goto err_out;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index e298ec3b3c9e..ca026e2bf8d2 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -408,6 +408,7 @@ static void __exit mpls_cleanup_module(void)
module_init(mpls_init_module);
module_exit(mpls_cleanup_module);

+MODULE_SOFTDEP("post: mpls_gso");
MODULE_AUTHOR("Netronome Systems <oss-drivers@xxxxxxxxxxxxx>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MPLS manipulation actions");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index faeabff283a2..838b3fd94d77 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -652,12 +652,12 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
block_cb->indr.binder_type,
&block->flow_block, tcf_block_shared(block),
&extack);
+ rtnl_lock();
down_write(&block->cb_lock);
list_del(&block_cb->driver_list);
list_move(&block_cb->list, &bo.cb_list);
- up_write(&block->cb_lock);
- rtnl_lock();
tcf_block_unbind(block, &bo);
+ up_write(&block->cb_lock);
rtnl_unlock();
}

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 84f82771cdf5..0c345e43a09a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma,

/* default uniform distribution */
if (dist == NULL)
- return ((rnd % (2 * sigma)) + mu) - sigma;
+ return ((rnd % (2 * (u32)sigma)) + mu) - sigma;

t = dist->table[rnd % dist->size];
x = (sigma % NETEM_DIST_SCALE) * t;
@@ -812,6 +812,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
q->slot_config.max_packets = INT_MAX;
if (q->slot_config.max_bytes == 0)
q->slot_config.max_bytes = INT_MAX;
+
+ /* capping dist_jitter to the range acceptable by tabledist() */
+ q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
+
q->slot.packets_left = q->slot_config.max_packets;
q->slot.bytes_left = q->slot_config.max_bytes;
if (q->slot_config.min_delay | q->slot_config.max_delay |
@@ -1037,6 +1041,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_NETEM_SLOT])
get_slot(q, tb[TCA_NETEM_SLOT]);

+ /* capping jitter to the range acceptable by tabledist() */
+ q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+
return ret;

get_table_failure:
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 7c0e4fac9748..efa65ec5e686 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1616,7 +1616,11 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) {
kfree(buf_desc);
- return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc);
+ if (rc == -ENOMEM)
+ return ERR_PTR(-EAGAIN);
+ if (rc == -ENOSPC)
+ return ERR_PTR(-ENOSPC);
+ return ERR_PTR(-EIO);
}
buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 681224401871..bee159924a55 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -150,12 +150,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- if (skb_cloned(frag))
- frag = skb_copy(frag, GFP_ATOMIC);
+ *buf = NULL;
+ frag = skb_unshare(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
head = *headbuf = frag;
- *buf = NULL;
TIPC_SKB_CB(head)->tail = NULL;
if (skb_is_nonlinear(head)) {
skb_walk_frags(head, tail) {
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 20f2efd57b11..bb709eda96cd 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -45,7 +45,7 @@ scm_version()

# Check for git and a git repo.
if test -z "$(git rev-parse --show-cdup 2>/dev/null)" &&
- head=$(git rev-parse --verify --short HEAD 2>/dev/null); then
+ head=$(git rev-parse --verify HEAD 2>/dev/null); then

# If we are at a tagged commit (like "v2.6.30-rc6"), we ignore
# it, because this version is defined in the top level Makefile.
@@ -59,11 +59,22 @@ scm_version()
fi
# If we are past a tagged commit (like
# "v2.6.30-rc5-302-g72357d5"), we pretty print it.
- if atag="$(git describe 2>/dev/null)"; then
- echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}'
-
- # If we don't have a tag at all we print -g{commitish}.
+ #
+ # Ensure the abbreviated sha1 has exactly 12
+ # hex characters, to make the output
+ # independent of git version, local
+ # core.abbrev settings and/or total number of
+ # objects in the current repository - passing
+ # --abbrev=12 ensures a minimum of 12, and the
+ # awk substr() then picks the 'g' and first 12
+ # hex chars.
+ if atag="$(git describe --abbrev=12 2>/dev/null)"; then
+ echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),substr($(NF),0,13))}'
+
+ # If we don't have a tag at all we print -g{commitish},
+ # again using exactly 12 hex chars.
else
+ head="$(echo $head | cut -c1-12)"
printf '%s%s' -g $head
fi
fi
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 0d36259b690d..e4b47759ba1c 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -181,6 +181,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
break;
case EVM_IMA_XATTR_DIGSIG:
case EVM_XATTR_PORTABLE_DIGSIG:
+ /* accept xattr with non-empty signature field */
+ if (xattr_len <= sizeof(struct signature_v2_hdr)) {
+ evm_status = INTEGRITY_FAIL;
+ goto out;
+ }
+
hdr = (struct signature_v2_hdr *)xattr_data;
digest.hdr.algo = hdr->hash_algo;
rc = evm_calc_hash(dentry, xattr_name, xattr_value,
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index 2ccd588fbad4..000000000000
--- a/tools/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 45f8e1b02241..0b5b8ae56bd9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>

@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
SYM_FUNC_END(memcpy_orig)

.popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
- cmpl $8, %edx
- /* Less than 8 bytes? Go to byte copy loop */
- jb .L_no_whole_words
-
- /* Check for bad alignment of source */
- testl $7, %esi
- /* Already aligned */
- jz .L_8byte_aligned
-
- /* Copy one byte at a time until source is 8-byte aligned */
- movl %esi, %ecx
- andl $7, %ecx
- subl $8, %ecx
- negl %ecx
- subl %ecx, %edx
-.L_read_leading_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
- movl %edx, %ecx
- andl $7, %edx
- shrl $3, %ecx
- jz .L_no_whole_words
-
-.L_read_words:
- movq (%rsi), %r8
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
- MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
- movq %r8, (%rdi)
- addq $8, %rsi
- addq $8, %rdi
- decl %ecx
- jnz .L_read_words
-
- /* Any trailing bytes? */
-.L_no_whole_words:
- andl %edx, %edx
- jz .L_done_memcpy_trap
-
- /* Copy trailing bytes */
- movl %edx, %ecx
-.L_read_trailing_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_trailing_bytes
-
- /* Copy successful. Return zero */
-.L_done_memcpy_trap:
- xorl %eax, %eax
-.L_done:
- ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
- .section .fixup, "ax"
- /*
- * Return number of bytes not copied for any failure. Note that
- * there is no "tail" handling since the source buffer is 8-byte
- * aligned and poison is cacheline aligned.
- */
-.E_read_words:
- shll $3, %ecx
-.E_leading_bytes:
- addl %edx, %ecx
-.E_trailing_bytes:
- mov %ecx, %eax
- jmp .L_done
-
- /*
- * For write fault handling, given the destination is unaligned,
- * we handle faults on multi-byte writes with a byte-by-byte
- * copy up to the write-protected page.
- */
-.E_write_words:
- shll $3, %ecx
- addl %edx, %ecx
- movl %ecx, %edx
- jmp mcsafe_handle_tail
-
- .previous
-
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE(.L_write_words, .E_write_words)
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b6238b2209b7..f4ef5d5a1232 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1438,8 +1438,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if the *skb* task belongs to the cgroup2.
- * * 1, if the *skb* task does not belong to the cgroup2.
+ * * 0, if current task belongs to the cgroup2.
+ * * 1, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 90a66891441a..42ac19e0299c 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -548,8 +548,9 @@ static const char *uaccess_safe_builtin[] = {
"__ubsan_handle_shift_out_of_bounds",
/* misc */
"csum_partial_copy_generic",
- "__memcpy_mcsafe",
- "mcsafe_handle_tail",
+ "copy_mc_fragile",
+ "copy_mc_fragile_handle_tail",
+ "copy_mc_enhanced_fast_string",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
NULL
};
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index dd68a40a790c..878db6a59a41 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -13,7 +13,6 @@ perf-y += synthesize.o
perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o

-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o

diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734dde84..000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a1a5dc645b40..2ac0fff6dad8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
#include "nfit_test.h"
#include "../watermark.h"

-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>

/*
* Generate an NFIT table to describe the following topology:
@@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};

-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));

enum INJECT {
INJECT_NONE,
@@ -3291,7 +3292,7 @@ enum INJECT {
INJECT_DST,
};

-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
{
size_t i;

@@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
src[i] = (char) i;
}

-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
size_t size, unsigned long rem)
{
size_t i;
@@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
return true;
}

-void mcsafe_test(void)
+void copy_mc_test(void)
{
char *inject_desc[] = { "none", "source", "destination" };
enum INJECT inj;

- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
pr_info("%s: run...\n", __func__);
} else {
pr_info("%s: disabled, skip.\n", __func__);
@@ -3344,31 +3345,31 @@ void mcsafe_test(void)

switch (inj) {
case INJECT_NONE:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 0;
break;
case INJECT_SRC:
- mcsafe_inject_src(&mcsafe_buf[1024]);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(&copy_mc_buf[1024]);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 512 - i;
break;
case INJECT_DST:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(&mcsafe_buf[2048]);
- dst = &mcsafe_buf[2048 - i];
- src = &mcsafe_buf[1024];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(&copy_mc_buf[2048]);
+ dst = &copy_mc_buf[2048 - i];
+ src = &copy_mc_buf[1024];
expect = 512 - i;
break;
}

- mcsafe_test_init(dst, src, 512);
- rem = __memcpy_mcsafe(dst, src, 512);
- valid = mcsafe_test_validate(dst, src, 512, expect);
+ copy_mc_test_init(dst, src, 512);
+ rem = copy_mc_fragile(dst, src, 512);
+ valid = copy_mc_test_validate(dst, src, 512, expect);
if (rem == expect && valid)
continue;
pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3380,8 +3381,8 @@ void mcsafe_test(void)
}
}

- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
}

static __init int nfit_test_init(void)
@@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void)
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
- mcsafe_test();
+ copy_mc_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..994b11af765c 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..3095b1f1c02b 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2

EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^

-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
- -D COPY_LOOP=test_memcpy_mcsafe \
+ -D COPY_LOOP=test_copy_mc_generic \
-o $@ $^

$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 100644
index 000000000000..88d46c471493
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@xxxxxxxxxx>
+ * Author - Balbir Singh <bsingharora@xxxxxxxxx>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+ .macro err3
+300: EX_TABLE(300b,.Ldone)
+ .endm
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ /* Do a byte by byte copy to get the exact remaining size */
+ mtctr r7
+46:
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 46b
+ li r3,0
+ blr
+
+.Ldone:
+ mfctr r3
+ blr
+
+
+_GLOBAL(copy_mc_generic)
+ mr r7,r5
+ cmpldi r5,16
+ blt .Lshort_copy
+
+.Lcopy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+ subi r7,r7,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ blt 5f
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+err2; ld r15,64(r4)
+err2; ld r16,72(r4)
+err2; ld r17,80(r4)
+err2; ld r18,88(r4)
+err2; ld r19,96(r4)
+err2; ld r20,104(r4)
+err2; ld r21,112(r4)
+err2; ld r22,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+err2; std r15,64(r3)
+err2; std r16,72(r3)
+err2; std r17,80(r3)
+err2; std r18,88(r3)
+err2; std r19,96(r3)
+err2; std r20,104(r3)
+err2; std r21,112(r3)
+err2; std r22,120(r3)
+ addi r3,r3,128
+ subi r7,r7,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+ addi r4,r4,64
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+ addi r3,r3,64
+ subi r7,r7,64
+
+7: ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 63B to go */
+ bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r8,16(r4)
+err1; ld r9,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r8,16(r3)
+err1; std r9,24(r3)
+ addi r3,r3,32
+ subi r7,r7,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+ subi r7,r7,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+ subi r7,r7,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+EXPORT_SYMBOL_GPL(copy_mc_generic);