Re: Linux 6.6.26

From: Greg Kroah-Hartman
Date: Wed Apr 10 2024 - 11:00:20 EST


diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 32a8893e5617..9edb2860a3e1 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.

-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.

Attack scenarios
----------------
@@ -430,6 +429,23 @@ The possible values in this file are:
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================

+ - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+ - System is not affected
+ * - BHI: Retpoline
+ - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+ - System is protected by BHI_DIS_S
+ * - BHI: SW loop; KVM SW loop
+ - System is protected by software clearing sequence
+ * - BHI: Syscall hardening
+ - Syscalls are hardened against BHI
+ * - BHI: Syscall hardening; KVM: SW loop
+ - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence
+
Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
@@ -484,7 +500,11 @@ Spectre variant 2

Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
- Spectre v2 variant attacks.
+ some Spectre v2 variant attacks. The BHB can still influence the choice of
+ indirect branch predictor entry, and although branch predictor entries are
+ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+ between modes. Systems which support BHI_DIS_S will set it to protect against
+ BHI attacks.

On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -638,6 +658,22 @@ kernel command line.
spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled.

+ spectre_bhi=
+
+ [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. Syscalls are hardened against BHI
+ regardless of this setting. This setting affects the deployment
+ of the HW BHI control and the SW BHB clearing sequence.
+
+ on
+ unconditionally enable.
+ off
+ unconditionally disable.
+ auto
+ enable if hardware mitigation
+ control(BHI_DIS_S) is available, otherwise
+ enable alternate mitigation in KVM.
+
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt

Mitigation selection guide
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7a36124dde5e..61199466c043 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5920,6 +5920,18 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst

+ spectre_bhi= [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. Syscalls are hardened against BHI
+ reglardless of this setting. This setting affects the
+ deployment of the HW BHI control and the SW BHB
+ clearing sequence.
+
+ on - unconditionally enable.
+ off - unconditionally disable.
+ auto - (default) enable hardware mitigation
+ (BHI_DIS_S) if available, otherwise enable
+ alternate mitigation in KVM.
+
spectre_v2= [X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
diff --git a/Makefile b/Makefile
index 022af2a9a6d9..77ad41bd298e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 6
-SUBLEVEL = 25
+SUBLEVEL = 26
EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth

diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
index 5a33e16a8b67..c2f5e9f6679d 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
@@ -970,6 +970,8 @@ bluetooth: bluetooth {
vddrf-supply = <&pp1300_l2c>;
vddch0-supply = <&pp3300_l10c>;
max-speed = <3200000>;
+
+ qcom,local-bd-address-broken;
};
};

diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index c94c0f8c9a73..d95416b93a9d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -728,7 +728,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
{
unsigned int vq;
bool active;
- bool fpsimd_only;
enum vec_type task_type;

memset(header, 0, sizeof(*header));
@@ -744,12 +743,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = false;
break;
default:
WARN_ON_ONCE(1);
@@ -757,7 +754,7 @@ static void sve_init_header_from_task(struct user_sve_header *header,
}

if (active) {
- if (fpsimd_only) {
+ if (target->thread.fp_type == FP_STATE_FPSIMD) {
header->flags |= SVE_PT_REGS_FPSIMD;
} else {
header->flags |= SVE_PT_REGS_SVE;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f155b8c9e98c..15aa9bad1c28 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -805,12 +805,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* Perform the appropriate TLB invalidation based on the
* evicted pte value (if any).
*/
- if (kvm_pte_table(ctx->old, ctx->level))
- kvm_tlb_flush_vmid_range(mmu, ctx->addr,
- kvm_granule_size(ctx->level));
- else if (kvm_pte_valid(ctx->old))
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ u64 size = kvm_granule_size(ctx->level);
+ u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+ kvm_tlb_flush_vmid_range(mmu, addr, size);
+ } else if (kvm_pte_valid(ctx->old)) {
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
+ }
}

if (stage2_pte_is_counted(ctx->old))
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 150d1c6543f7..29196dce9b91 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -876,7 +876,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_UXTH(is64, dst, dst), ctx);
break;
case 32:
- emit(A64_REV32(is64, dst, dst), ctx);
+ emit(A64_REV32(0, dst, dst), ctx);
/* upper 32 bits already cleared */
break;
case 64:
@@ -1189,7 +1189,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
} else {
emit_a64_mov_i(1, tmp, off, ctx);
if (sign_extend)
- emit(A64_LDRSW(dst, src_adj, off_adj), ctx);
+ emit(A64_LDRSW(dst, src, tmp), ctx);
else
emit(A64_LDR32(dst, src, tmp), ctx);
}
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 926bec775f41..9822366dc186 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -130,7 +130,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr,

WARN_ON(pte_hw_valid(pud_pte(*pudp)));
assert_spin_locked(pud_lockptr(mm, pudp));
- WARN_ON(!(pud_large(pud)));
+ WARN_ON(!(pud_leaf(pud)));
#endif
trace_hugepage_set_pud(addr, pud_val(pud));
return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index ec0cab9fbddd..72ec1d9bd3f3 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)

#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
if (unlikely(__kr_err)) \
@@ -328,7 +328,7 @@ do { \

#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
if (unlikely(__kr_err)) \
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index 10aaa83db89e..95050ebe9ad0 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -34,7 +34,7 @@
#define AT_L3_CACHEGEOMETRY 47

/* entries in ARCH_DLINFO */
-#define AT_VECTOR_SIZE_ARCH 9
+#define AT_VECTOR_SIZE_ARCH 10
#define AT_MINSIGSTKSZ 51

#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 37e87fdcf6a0..30e12b310cab 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
*/
lockdep_assert_held(&text_mutex);

+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);

@@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);

+ preempt_enable();
+
return 0;
}
NOKPROBE_SYMBOL(__patch_insn_set);
@@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (!riscv_patch_in_stop_machine)
lockdep_assert_held(&text_mutex);

+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);

@@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);

+ preempt_enable();
+
return ret;
}
NOKPROBE_SYMBOL(__patch_insn_write);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index e32d737e039f..83e223318822 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -26,8 +26,6 @@
#include <asm/cpuidle.h>
#include <asm/vector.h>

-register unsigned long gp_in_global __asm__("gp");
-
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
@@ -186,7 +184,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gp = gp_in_global;
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;

diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
index 39e72aa016a4..b467ba5ed910 100644
--- a/arch/riscv/kvm/aia_aplic.c
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
raw_spin_lock_irqsave(&irqd->lock, flags);

sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
- if (!pending &&
- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
goto skip_write_pending;

+ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
+ if (!pending)
+ goto skip_write_pending;
+ if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
+ goto skip_write_pending;
+ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
+ goto skip_write_pending;
+ }
+
if (pending)
irqd->state |= APLIC_IRQ_STATE_PENDING;
else
@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)

static bool aplic_read_input(struct aplic *aplic, u32 irq)
{
- bool ret;
- unsigned long flags;
+ u32 sourcecfg, sm, raw_input, irq_inverted;
struct aplic_irq *irqd;
+ unsigned long flags;
+ bool ret = false;

if (!irq || aplic->nr_irqs <= irq)
return false;
irqd = &aplic->irqs[irq];

raw_spin_lock_irqsave(&irqd->lock, flags);
- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+
+ sourcecfg = irqd->sourcecfg;
+ if (sourcecfg & APLIC_SOURCECFG_D)
+ goto skip;
+
+ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ goto skip;
+
+ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
+ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+ ret = !!(raw_input ^ irq_inverted);
+
+skip:
raw_spin_unlock_irqrestore(&irqd->lock, flags);

return ret;
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 442a74f113cb..14e1a73ffcfe 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -360,7 +360,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
}
pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
pud_populate(&init_mm, pud, pmd);
- } else if (pud_large(*pud)) {
+ } else if (pud_leaf(*pud)) {
continue;
}
pgtable_pmd_populate(pud, addr, next, mode);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fb3ee7758b76..38290b0078c5 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -729,7 +729,7 @@ static inline int pud_bad(pud_t pud)
{
unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;

- if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
+ if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud))
return 1;
if (type < _REGION_ENTRY_TYPE_R3)
return 0;
@@ -1396,7 +1396,7 @@ static inline unsigned long pud_deref(pud_t pud)
unsigned long origin_mask;

origin_mask = _REGION_ENTRY_ORIGIN;
- if (pud_large(pud))
+ if (pud_leaf(pud))
origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
return (unsigned long)__va(pud_val(pud) & origin_mask);
}
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 49a11f6dd7ae..26c08ee87740 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -653,6 +653,7 @@ SYM_DATA_START_LOCAL(daton_psw)
SYM_DATA_END(daton_psw)

.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
#include "asm/syscall_table.h"
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 157e0a8d5157..d17bb1ef63f4 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -596,7 +596,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
pud = pud_offset(p4d, vmaddr);
VM_BUG_ON(pud_none(*pud));
/* large puds cannot yet be handled */
- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return -EFAULT;
pmd = pmd_offset(pud, vmaddr);
VM_BUG_ON(pmd_none(*pmd));
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 297a6d897d5a..5f64f3d0fafb 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
if (p4d_present(*p4dp)) {
pudp = pud_offset(p4dp, addr);
if (pud_present(*pudp)) {
- if (pud_large(*pudp))
+ if (pud_leaf(*pudp))
return (pte_t *) pudp;
pmdp = pmd_offset(pudp, addr);
}
@@ -240,7 +240,7 @@ int pmd_huge(pmd_t pmd)

int pud_huge(pud_t pud)
{
- return pud_large(pud);
+ return pud_leaf(pud);
}

bool __init arch_hugetlb_valid_size(unsigned long size)
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index b87e96c64b61..441f654d048d 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
if (pud_none(*pudp))
return -EINVAL;
next = pud_addr_end(addr, end);
- if (pud_large(*pudp)) {
+ if (pud_leaf(*pudp)) {
need_split = !!(flags & SET_MEMORY_4K);
need_split |= !!(addr & ~PUD_MASK);
need_split |= !!(addr + PUD_SIZE > next);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5cb92941540b..5e349869590a 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -479,7 +479,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
return -ENOENT;

/* Large PUDs are not supported yet. */
- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return -EFAULT;

*pmdp = pmd_offset(pud, addr);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6d276103c6d5..2d3f65da56ee 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -322,7 +322,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
if (!add) {
if (pud_none(*pud))
continue;
- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE)) {
pud_clear(pud);
@@ -343,7 +343,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
if (!pmd)
goto out;
pud_populate(&init_mm, pud, pmd);
- } else if (pud_large(*pud)) {
+ } else if (pud_leaf(*pud)) {
continue;
}
ret = modify_pmd_table(pud, addr, next, add, direct);
@@ -586,7 +586,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
if (!pmd)
goto out;
pud_populate(&init_mm, pud, pmd);
- } else if (WARN_ON_ONCE(pud_large(*pud))) {
+ } else if (WARN_ON_ONCE(pud_leaf(*pud))) {
goto out;
}
pmd = pmd_offset(pud, addr);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e507692e51e7..8af02176f68b 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
* PLT for hotpatchable calls. The calling convention is the same as for the
* ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
*/
-extern const char bpf_plt[];
-extern const char bpf_plt_ret[];
-extern const char bpf_plt_target[];
-extern const char bpf_plt_end[];
-#define BPF_PLT_SIZE 32
+struct bpf_plt {
+ char code[16];
+ void *ret;
+ void *target;
+} __packed;
+extern const struct bpf_plt bpf_plt;
asm(
".pushsection .rodata\n"
" .balign 8\n"
@@ -531,15 +532,14 @@ asm(
" .balign 8\n"
"bpf_plt_ret: .quad 0\n"
"bpf_plt_target: .quad 0\n"
- "bpf_plt_end:\n"
" .popsection\n"
);

-static void bpf_jit_plt(void *plt, void *ret, void *target)
+static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
- memcpy(plt, bpf_plt, BPF_PLT_SIZE);
- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
+ memcpy(plt, &bpf_plt, sizeof(*plt));
+ plt->ret = ret;
+ plt->target = target;
}

/*
@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
if (jit->prg_buf)
- bpf_jit_plt(jit->prg_buf + jit->prg,
+ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
jit->prg_buf + jit->prologue_plt_ret, NULL);
- jit->prg += BPF_PLT_SIZE;
+ jit->prg += sizeof(struct bpf_plt);
}

static int get_probe_mem_regno(const u8 *insn)
@@ -1901,9 +1901,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;

- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
- return orig_fp;
-
if (!fp->jit_requested)
return orig_fp;

@@ -2009,14 +2006,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
{
+ struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
u16 opc;
s32 disp;
} __packed insn;
- char expected_plt[BPF_PLT_SIZE];
- char current_plt[BPF_PLT_SIZE];
- char new_plt[BPF_PLT_SIZE];
- char *plt;
char *ret;
int err;

@@ -2035,18 +2029,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
*/
} else {
/* Verify the PLT. */
- plt = (char *)ip + (insn.disp << 1);
- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ plt = ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(&current_plt, plt,
+ sizeof(current_plt));
if (err < 0)
return err;
ret = (char *)ip + 6;
- bpf_jit_plt(expected_plt, ret, old_addr);
- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ bpf_jit_plt(&expected_plt, ret, old_addr);
+ if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
return -EINVAL;
/* Adjust the call address. */
- bpf_jit_plt(new_plt, ret, new_addr);
- s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
- new_plt + (bpf_plt_target - bpf_plt),
+ bpf_jit_plt(&new_plt, ret, new_addr);
+ s390_kernel_write(&plt->target, &new_plt.target,
sizeof(void *));
}

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f83017992eaa..d7db4e737218 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1665,7 +1665,7 @@ bool kern_addr_valid(unsigned long addr)
if (pud_none(*pud))
return false;

- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return pfn_valid(pud_pfn(*pud));

pmd = pmd_offset(pud, addr);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4b81e884a614..b4e6859542a3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2566,6 +2566,31 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>

+choice
+ prompt "Clear branch history"
+ depends on CPU_SUP_INTEL
+ default SPECTRE_BHI_ON
+ help
+ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+ where the branch history buffer is poisoned to speculatively steer
+ indirect branches.
+ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
+config SPECTRE_BHI_ON
+ bool "on"
+ help
+ Equivalent to setting spectre_bhi=on command line parameter.
+config SPECTRE_BHI_OFF
+ bool "off"
+ help
+ Equivalent to setting spectre_bhi=off command line parameter.
+config SPECTRE_BHI_AUTO
+ bool "auto"
+ help
+ Equivalent to setting spectre_bhi=auto command line parameter.
+
+endchoice
+
endif

config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 71fc531b95b4..583c11664c63 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include

-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'

quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index f711f2a85862..b5ecbd32a46f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -330,6 +330,7 @@ static size_t parse_elf(void *output)
return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
}

+const unsigned long kernel_text_size = VO___start_rodata - VO__text;
const unsigned long kernel_total_size = VO__end - VO__text;

static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
@@ -357,6 +358,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
return entry;
}

+/*
+ * Set the memory encryption xloadflag based on the mem_encrypt= command line
+ * parameter, if provided.
+ */
+static void parse_mem_encrypt(struct setup_header *hdr)
+{
+ int on = cmdline_find_option_bool("mem_encrypt=on");
+ int off = cmdline_find_option_bool("mem_encrypt=off");
+
+ if (on > off)
+ hdr->xloadflags |= XLF_MEM_ENCRYPTION;
+}
+
/*
* The compressed kernel image (ZO), has been moved so that its position
* is against the end of the buffer used to hold the uncompressed kernel
@@ -387,6 +401,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
/* Clear flags intended for solely in-kernel use. */
boot_params->hdr.loadflags &= ~KASLR_FLAG;

+ parse_mem_encrypt(&boot_params->hdr);
+
sanitize_boot_params(boot_params);

if (boot_params->screen_info.orig_video_mode == 7) {
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 80d76aea1f7b..0a49218a516a 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -116,6 +116,9 @@ static bool fault_in_kernel_space(unsigned long address)
#undef __init
#define __init

+#undef __head
+#define __head
+
#define __BOOT_COMPRESSED

/* Basic instruction decoding support needed */
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index d07be9d05cd0..ddd4efdc79d6 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -3,13 +3,17 @@
* Confidential Computing Platform Capability checks
*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@xxxxxxxxx>. All Rights Reserved.
*
* Author: Tom Lendacky <thomas.lendacky@xxxxxxx>
*/

#include <linux/export.h>
#include <linux/cc_platform.h>
+#include <linux/string.h>
+#include <linux/random.h>

+#include <asm/archrandom.h>
#include <asm/coco.h>
#include <asm/processor.h>

@@ -148,3 +152,40 @@ u64 cc_mkdec(u64 val)
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
+
+__init void cc_random_init(void)
+{
+ /*
+ * The seed is 32 bytes (in units of longs), which is 256 bits, which
+ * is the security level that the RNG is targeting.
+ */
+ unsigned long rng_seed[32 / sizeof(long)];
+ size_t i, longs;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ /*
+ * Since the CoCo threat model includes the host, the only reliable
+ * source of entropy that can be neither observed nor manipulated is
+ * RDRAND. Usually, RDRAND failure is considered tolerable, but since
+ * CoCo guests have no other unobservable source of entropy, it's
+ * important to at least ensure the RNG gets some initial random seeds.
+ */
+ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
+ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
+
+ /*
+ * A zero return value means that the guest doesn't have RDRAND
+ * or the CPU is physically broken, and in both cases that
+ * means most crypto inside of the CoCo instance will be
+ * broken, defeating the purpose of CoCo in the first place. So
+ * just panic here because it's absolutely unsafe to continue
+ * executing.
+ */
+ if (longs == 0)
+ panic("RDRAND is defective.");
+ }
+ add_device_randomness(rng_seed, sizeof(rng_seed));
+ memzero_explicit(rng_seed, sizeof(rng_seed));
+}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9c0b26ae5106..e72dac092245 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -48,7 +48,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)

if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = sys_call_table[unr](regs);
+ regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
@@ -65,7 +65,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)

if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call_table[xnr](regs);
+ regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
@@ -114,7 +114,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)

if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call_table[unr](regs);
+ regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
@@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void)
}

/**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
@@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9f97a8bd11e8..5d96561c0d6a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY

call do_syscall_64 /* returns with IRQs disabled */

@@ -1549,3 +1550,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+ push %rbp
+ mov %rsp, %rbp
+ movl $5, %ecx
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+ jmp 5f
+ .align 64, 0xcc
+ ANNOTATE_INTRA_FUNCTION_CALL
+1: call 2f
+ RET
+ .align 64, 0xcc
+2: movl $5, %eax
+3: jmp 4f
+ nop
+4: sub $1, %eax
+ jnz 3b
+ sub $1, %ecx
+ jnz 1b
+ RET
+5: lfence
+ pop %rbp
+ RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 306181e4fcb9..4c1dfc51c56e 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)

IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY

/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)

IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY

movq %rsp, %rdi
call do_fast_syscall_32
@@ -277,3 +279,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ CLEAR_BRANCH_HISTORY
+ jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8cfc9bc73e7f..c2235bae17ef 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -18,8 +18,25 @@
#include <asm/syscalls_32.h>
#undef __SYSCALL

+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_32.h>
+ default: return __ia32_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index be120eec1fc9..33b3f09e6f15 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -11,8 +11,23 @@
#include <asm/syscalls_64.h>
#undef __SYSCALL

+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
#define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_64.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index bdd0e03a1265..03de4a932131 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h>
#undef __SYSCALL

-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);

-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
};
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5365d6acbf09..8ed10366c4a2 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};

+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];

return amd_perfmon_event_map[hw_event];
}
@@ -904,8 +918,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;

- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index eb31f850841a..110e34c59643 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -400,10 +400,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}

- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }

- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}

@@ -416,10 +418,12 @@ void amd_pmu_lbr_disable_all(void)
return;

rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}

__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index eb8dd8b8a1e8..2b53f696c3c9 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1236,11 +1236,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct pmu *pmu = event->pmu;

/*
- * Make sure we get updated with the first PEBS
- * event. It will trigger also during removal, but
- * that does not hurt:
+ * Make sure we get updated with the first PEBS event.
+ * During removal, ->pebs_data_cfg is still valid for
+ * the last PEBS event. Don't clear it.
*/
- if (cpuc->n_pebs == 1)
+ if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;

if (needed_cb != pebs_needs_sched_cb(cpuc)) {
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index b1a98fa38828..0e82074517f6 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -13,6 +13,7 @@
#include <asm/preempt.h>
#include <asm/asm.h>
#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>

#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index b3a7cfb0d99e..c945c893c52e 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -81,6 +81,7 @@

#ifndef __ASSEMBLY__
extern unsigned int output_len;
+extern const unsigned long kernel_text_size;
extern const unsigned long kernel_total_size;

unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 21940ef8d290..de03537a0182 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask)

u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
+void cc_random_init(void);
#else
static inline u64 cc_mkenc(u64 val)
{
@@ -32,6 +33,7 @@ static inline u64 cc_mkdec(u64 val)
{
return val;
}
+static inline void cc_random_init(void) { }
#endif

#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a1273698fc43..686e92d2663e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -33,6 +33,8 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};

#define X86_CAP_FMT_NUM "%d:%d"
@@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))

#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))

#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index bd33f6366c80..8c1593dd2c31 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */

/*
@@ -218,7 +218,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -312,6 +312,10 @@
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */

/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -452,6 +456,18 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */

+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+
/*
* BUG word(s)
*/
@@ -499,4 +515,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 702d93fdd10e..88fcf08458d9 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -143,6 +143,7 @@
#define DISABLED_MASK18 (DISABLE_IBT)
#define DISABLED_MASK19 0
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)

#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 5f1d3c421f68..cc9ccf61b6bd 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H

+#define __head __section(".head.text")
+
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void *context; /* context for alloc_pgt_page */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index f4f526984629..76081a34fc23 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
void __init sev_setup_arch(void);

-void __init sme_encrypt_kernel(struct boot_params *bp);
-void __init sme_enable(struct boot_params *bp);
+void sme_encrypt_kernel(struct boot_params *bp);
+void sme_enable(struct boot_params *bp);

int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
@@ -81,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
static inline void __init sev_setup_arch(void) { }

-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
-static inline void __init sme_enable(struct boot_params *bp) { }
+static inline void sme_encrypt_kernel(struct boot_params *bp) { }
+static inline void sme_enable(struct boot_params *bp) { }

static inline void sev_es_init_vc_handling(void) { }

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c75cc5610be3..621bac6b7401 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -50,10 +50,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)

/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)

#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -152,6 +155,10 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 8ae2cb30ade3..a8781c8763b4 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -271,11 +271,20 @@
.Lskip_rsb_\@:
.endm

+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
-#define CALL_UNTRAIN_RET "call entry_untrain_ret"
-#else
-#define CALL_UNTRAIN_RET ""
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
+.endm

/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
@@ -288,38 +297,24 @@
* As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
* where we have a stack but before any RET instruction.
*/
-.macro UNTRAIN_RET
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
+#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+ CALL_UNTRAIN_RET
+ ALTERNATIVE_2 "", \
+ "call entry_ibpb", \ibpb_feature, \
+ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
.endm

-.macro UNTRAIN_RET_VM
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
- VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \
- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
-#endif
-.endm
+#define UNTRAIN_RET \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)

-.macro UNTRAIN_RET_FROM_CALL
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
- VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
- __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
-#endif
-.endm
+#define UNTRAIN_RET_VM \
+ __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_FROM_CALL \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)


.macro CALL_DEPTH_ACCOUNT
@@ -340,6 +335,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm

+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
#else /* __ASSEMBLY__ */

#define ANNOTATE_RETPOLINE_SAFE \
@@ -359,6 +367,22 @@ extern void __x86_return_thunk(void);
static inline void __x86_return_thunk(void) {}
#endif

+#ifdef CONFIG_CPU_UNRET_ENTRY
+extern void retbleed_return_thunk(void);
+#else
+static inline void retbleed_return_thunk(void) {}
+#endif
+
+extern void srso_alias_untrain_ret(void);
+
+#ifdef CONFIG_CPU_SRSO
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+#else
+static inline void srso_return_thunk(void) {}
+static inline void srso_alias_return_thunk(void) {}
+#endif
+
extern void retbleed_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
@@ -370,6 +394,10 @@ extern void srso_alias_untrain_ret(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);

+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);

#ifdef CONFIG_CALL_DEPTH_TRACKING
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 7ba1726b71c7..e9187ddd3d1f 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -99,6 +99,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)

#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 36f905797075..75a5388d4068 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -199,15 +199,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
struct snp_guest_request_ioctl;

void setup_ghcb(void);
-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
- unsigned long npages);
-void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
- unsigned long npages);
+void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
+void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
-void __init __noreturn snp_abort(void);
+void __noreturn snp_abort(void);
void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4fb36fba4b5a..03bb950eba69 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>

+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];

-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);

/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}

void do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);

#endif /* CONFIG_X86_32 */

diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 01d19fc22346..eeea058cf602 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -38,6 +38,7 @@
#define XLF_EFI_KEXEC (1<<4)
#define XLF_5LEVEL (1<<5)
#define XLF_5LEVEL_ENABLED (1<<6)
+#define XLF_MEM_ENCRYPTION (1<<7)

#ifndef __ASSEMBLY__

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 031bca974fbf..9fd91022d92d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -66,20 +66,6 @@ static const int amd_erratum_400[] =
static const int amd_erratum_383[] =
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));

-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
-static const int amd_erratum_1054[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
-static const int amd_zenbleed[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
- AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
- AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
- AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
-
-static const int amd_div0[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
- AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
-
static const int amd_erratum_1485[] =
AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
@@ -620,6 +606,49 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}

resctrl_cpu_detect(c);
+
+ /* Figure out Zen generations: */
+ switch (c->x86) {
+ case 0x17: {
+ switch (c->x86_model) {
+ case 0x00 ... 0x2f:
+ case 0x50 ... 0x5f:
+ setup_force_cpu_cap(X86_FEATURE_ZEN1);
+ break;
+ case 0x30 ... 0x4f:
+ case 0x60 ... 0x7f:
+ case 0x90 ... 0x91:
+ case 0xa0 ... 0xaf:
+ setup_force_cpu_cap(X86_FEATURE_ZEN2);
+ break;
+ default:
+ goto warn;
+ }
+ break;
+ }
+ case 0x19: {
+ switch (c->x86_model) {
+ case 0x00 ... 0x0f:
+ case 0x20 ... 0x5f:
+ setup_force_cpu_cap(X86_FEATURE_ZEN3);
+ break;
+ case 0x10 ... 0x1f:
+ case 0x60 ... 0xaf:
+ setup_force_cpu_cap(X86_FEATURE_ZEN4);
+ break;
+ default:
+ goto warn;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return;
+
+warn:
+ WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model);
}

static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -945,6 +974,19 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
clear_rdrand_cpuid_bit(c);
}

+static void fix_erratum_1386(struct cpuinfo_x86 *c)
+{
+ /*
+ * Work around Erratum 1386. The XSAVES instruction malfunctions in
+ * certain circumstances on Zen1/2 uarch, and not all parts have had
+ * updated microcode at the time of writing (March 2023).
+ *
+ * Affected parts all have no supervisor XSAVE states, meaning that
+ * the XSAVEC instruction (which works fine) is equivalent.
+ */
+ clear_cpu_cap(c, X86_FEATURE_XSAVES);
+}
+
void init_spectral_chicken(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_CPU_UNRET_ENTRY
@@ -965,24 +1007,19 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
}
}
#endif
- /*
- * Work around Erratum 1386. The XSAVES instruction malfunctions in
- * certain circumstances on Zen1/2 uarch, and not all parts have had
- * updated microcode at the time of writing (March 2023).
- *
- * Affected parts all have no supervisor XSAVE states, meaning that
- * the XSAVEC instruction (which works fine) is equivalent.
- */
- clear_cpu_cap(c, X86_FEATURE_XSAVES);
}

static void init_amd_zn(struct cpuinfo_x86 *c)
{
- set_cpu_cap(c, X86_FEATURE_ZEN);
-
+ setup_force_cpu_cap(X86_FEATURE_ZEN);
#ifdef CONFIG_NUMA
node_reclaim_distance = 32;
#endif
+}
+
+static void init_amd_zen1(struct cpuinfo_x86 *c)
+{
+ fix_erratum_1386(c);

/* Fix up CPUID bits, but only if not virtualised. */
if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
@@ -999,6 +1036,9 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
set_cpu_cap(c, X86_FEATURE_BTC_NO);
}
+
+ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+ setup_force_cpu_bug(X86_BUG_DIV0);
}

static bool cpu_has_zenbleed_microcode(void)
@@ -1023,11 +1063,8 @@ static bool cpu_has_zenbleed_microcode(void)
return true;
}

-static void zenbleed_check(struct cpuinfo_x86 *c)
+static void zen2_zenbleed_check(struct cpuinfo_x86 *c)
{
- if (!cpu_has_amd_erratum(c, amd_zenbleed))
- return;
-
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
return;

@@ -1042,6 +1079,20 @@ static void zenbleed_check(struct cpuinfo_x86 *c)
}
}

+static void init_amd_zen2(struct cpuinfo_x86 *c)
+{
+ fix_erratum_1386(c);
+ zen2_zenbleed_check(c);
+}
+
+static void init_amd_zen3(struct cpuinfo_x86 *c)
+{
+}
+
+static void init_amd_zen4(struct cpuinfo_x86 *c)
+{
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
early_init_amd(c);
@@ -1080,6 +1131,15 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x19: init_amd_zn(c); break;
}

+ if (boot_cpu_has(X86_FEATURE_ZEN1))
+ init_amd_zen1(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN2))
+ init_amd_zen2(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN3))
+ init_amd_zen3(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN4))
+ init_amd_zen4(c);
+
/*
* Enable workaround for FXSAVE leak on CPUs
* without a XSaveErPtr feature
@@ -1131,7 +1191,7 @@ static void init_amd(struct cpuinfo_x86 *c)
* Counter May Be Inaccurate".
*/
if (cpu_has(c, X86_FEATURE_IRPERF) &&
- !cpu_has_amd_erratum(c, amd_erratum_1054))
+ (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f))
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);

check_null_seg_clears_base(c);
@@ -1147,13 +1207,6 @@ static void init_amd(struct cpuinfo_x86 *c)
cpu_has(c, X86_FEATURE_AUTOIBRS))
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));

- zenbleed_check(c);
-
- if (cpu_has_amd_erratum(c, amd_div0)) {
- pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
- setup_force_cpu_bug(X86_BUG_DIV0);
- }
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
cpu_has_amd_erratum(c, amd_erratum_1485))
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
@@ -1313,7 +1366,7 @@ static void zenbleed_check_cpu(void *unused)
{
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());

- zenbleed_check(c);
+ zen2_zenbleed_check(c);
}

void amd_check_microcode(void)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3452f7271d07..3fc230155627 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -63,7 +63,7 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd);

static DEFINE_MUTEX(spec_ctrl_mutex);

-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;

/* Update SPEC_CTRL MSR and its cached copy unconditionally */
static void update_spec_ctrl(u64 val)
@@ -1108,8 +1108,7 @@ static void __init retbleed_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_UNRET);

- if (IS_ENABLED(CONFIG_RETHUNK))
- x86_return_thunk = retbleed_return_thunk;
+ x86_return_thunk = retbleed_return_thunk;

if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -1607,6 +1606,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack();
}

+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+ return true;
+}
+
+enum bhi_mitigations {
+ BHI_MITIGATION_OFF,
+ BHI_MITIGATION_ON,
+ BHI_MITIGATION_AUTO,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON :
+ IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF :
+ BHI_MITIGATION_AUTO;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ bhi_mitigation = BHI_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ bhi_mitigation = BHI_MITIGATION_ON;
+ else if (!strcmp(str, "auto"))
+ bhi_mitigation = BHI_MITIGATION_AUTO;
+ else
+ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+ return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+ if (bhi_mitigation == BHI_MITIGATION_OFF)
+ return;
+
+ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
+ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
+ return;
+
+ if (spec_ctrl_bhi_dis())
+ return;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Mitigate KVM by default */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+ if (bhi_mitigation == BHI_MITIGATION_AUTO)
+ return;
+
+ /* Mitigate syscalls when the mitigation is forced =on */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1790,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();

+ if (boot_cpu_has(X86_BUG_BHI))
+ bhi_select_mitigation();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);

@@ -2695,15 +2770,15 @@ static char *stibp_state(void)

switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
- return ", STIBP: disabled";
+ return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
- return ", STIBP: forced";
+ return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
- return ", STIBP: always-on";
+ return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
- return ", STIBP: conditional";
+ return "; STIBP: conditional";
}
return "";
}
@@ -2712,10 +2787,10 @@ static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
- return ", IBPB: always-on";
+ return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
- return ", IBPB: conditional";
- return ", IBPB: disabled";
+ return "; IBPB: conditional";
+ return "; IBPB: disabled";
}
return "";
}
@@ -2725,14 +2800,31 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
- return ", PBRSB-eIBRS: SW sequence";
+ return "; PBRSB-eIBRS: SW sequence";
else
- return ", PBRSB-eIBRS: Vulnerable";
+ return "; PBRSB-eIBRS: Vulnerable";
} else {
- return ", PBRSB-eIBRS: Not affected";
+ return "; PBRSB-eIBRS: Not affected";
}
}

+static const char * const spectre_bhi_state(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_BHI))
+ return "; BHI: Not affected";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+ return "; BHI: BHI_DIS_S";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+ return "; BHI: SW loop, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
+ return "; BHI: Retpoline";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Syscall hardening, KVM: SW loop";
+
+ return "; BHI: Vulnerable (Syscall hardening enabled)";
+}
+
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2837,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");

- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
+ spectre_bhi_state(),
+ /* this should always be at the end */
spectre_v2_module_string());
}

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 73cfac3fc9c4..fc4c9a7fb1e3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1165,6 +1165,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
+#define NO_BHI BIT(11)

#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1227,18 +1228,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),

/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),

/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),

/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};

@@ -1475,6 +1476,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(ia32_cap))
setup_force_cpu_bug(X86_BUG_RFDS);

+ /* When virtualized, eIBRS could be hidden, assume vulnerable */
+ if (!(ia32_cap & ARCH_CAP_BHI_NO) &&
+ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+ setup_force_cpu_bug(X86_BUG_BHI);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 20ab11aec60b..e103c227acd3 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2468,12 +2468,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
return -EINVAL;

b = &per_cpu(mce_banks_array, s->id)[bank];
-
if (!b->init)
return -ENODEV;

b->ctl = new;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);

return size;
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..af5aa2c754c2 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
@@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index bbc21798df10..c58213bce294 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -41,6 +41,7 @@
#include <asm/trapnr.h>
#include <asm/sev.h>
#include <asm/tdx.h>
+#include <asm/init.h>

/*
* Manage page tables very early on.
@@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = {
.address = 0,
};

-#define __head __section(".head.text")
-
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
{
return ptr - (void *)_text + (void *)physaddr;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 15c700d35870..b223922248e9 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -196,12 +196,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
if (!smp_check_mpc(mpc, oem, str))
return 0;

- if (early) {
- /* Initialize the lapic mapping */
- if (!acpi_lapic)
- register_lapic_address(mpc->lapic);
+ /* Initialize the lapic mapping */
+ if (!acpi_lapic)
+ register_lapic_address(mpc->lapic);
+
+ if (early)
return 1;
- }

/* Now process the configuration blocks. */
while (count < mpc->length) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e63a8d05ce29..eb129277dcdd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -35,6 +35,7 @@
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
+#include <asm/coco.h>
#include <asm/cpu.h>
#include <asm/efi.h>
#include <asm/gart.h>
@@ -1120,6 +1121,7 @@ void __init setup_arch(char **cmdline_p)
* memory size.
*/
sev_setup_arch();
+ cc_random_init();

efi_fake_memmap();
efi_find_mirror();
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 466fe09898cc..acbec4de3ec3 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -89,7 +89,8 @@ static bool __init sev_es_check_cpu_features(void)
return true;
}

-static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
+static void __head __noreturn
+sev_es_terminate(unsigned int set, unsigned int reason)
{
u64 val = GHCB_MSR_TERM_REQ;

@@ -326,13 +327,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
*/
static const struct snp_cpuid_table *snp_cpuid_get_table(void)
{
- void *ptr;
-
- asm ("lea cpuid_table_copy(%%rip), %0"
- : "=r" (ptr)
- : "p" (&cpuid_table_copy));
-
- return ptr;
+ return &RIP_REL_REF(cpuid_table_copy);
}

/*
@@ -391,7 +386,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
return xsave_size;
}

-static bool
+static bool __head
snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -528,7 +523,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+static int __head
+snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();

@@ -570,7 +566,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le
* page yet, so it only supports the MSR based communication with the
* hypervisor and only the CPUID exit-code.
*/
-void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
{
unsigned int subfn = lower_bits(regs->cx, 32);
unsigned int fn = lower_bits(regs->ax, 32);
@@ -1016,7 +1012,8 @@ struct cc_setup_data {
* Search for a Confidential Computing blob passed in as a setup_data entry
* via the Linux Boot Protocol.
*/
-static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+static __head
+struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
{
struct cc_setup_data *sd = NULL;
struct setup_data *hdr;
@@ -1043,7 +1040,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
* mapping needs to be updated in sync with all the changes to virtual memory
* layout and related mapping facilities throughout the boot process.
*/
-static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
{
const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
int i;
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index a8db68a063c4..9905dc0e0b09 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -26,6 +26,7 @@
#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>

+#include <asm/init.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/sev.h>
@@ -683,8 +684,9 @@ static u64 __init get_jump_table_addr(void)
return ret;
}

-static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op)
+static void __head
+early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages, enum psc_op op)
{
unsigned long paddr_end;
u64 val;
@@ -740,7 +742,7 @@ static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
}

-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages)
{
/*
@@ -2045,7 +2047,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
*
* Scan for the blob in that order.
*/
-static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;

@@ -2071,7 +2073,7 @@ static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
return cc_info;
}

-bool __init snp_init(struct boot_params *bp)
+bool __head snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;

@@ -2093,7 +2095,7 @@ bool __init snp_init(struct boot_params *bp)
return true;
}

-void __init __noreturn snp_abort(void)
+void __head __noreturn snp_abort(void)
{
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f15fb71f280e..54a5596adaa6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -139,10 +139,7 @@ SECTIONS
STATIC_CALL_TEXT

ALIGN_ENTRY_TEXT_BEGIN
-#ifdef CONFIG_CPU_SRSO
*(.text..__x86.rethunk_untrain)
-#endif
-
ENTRY_TEXT

#ifdef CONFIG_CPU_SRSO
@@ -520,12 +517,12 @@ INIT_PER_CPU(irq_stack_backing_store);
"fixed_percpu_data is not at start of per-cpu area");
#endif

-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_CPU_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
-. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
#endif

#ifdef CONFIG_CPU_SRSO
+. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
/*
* GNU ld cannot do XOR until 2.41.
* https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f7901cb4d2fa..11c484d72eab 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3120,7 +3120,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
if (pud_none(pud) || !pud_present(pud))
goto out;

- if (pud_large(pud)) {
+ if (pud_leaf(pud)) {
level = PG_LEVEL_1G;
goto out;
}
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index aadefcaa9561..2f4e155080ba 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)

/* CPUID level 0x80000007 (EDX). */
@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
*/
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
+ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
}
@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e86231c3b8a5..c5845f31c34d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -84,9 +84,10 @@ struct enc_region {
};

/* Called with the sev_bitmap_lock held, or on shutdown */
-static int sev_flush_asids(int min_asid, int max_asid)
+static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
{
- int ret, asid, error = 0;
+ int ret, error = 0;
+ unsigned int asid;

/* Check if there are any ASIDs to reclaim before performing a flush */
asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
}

/* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(int min_asid, int max_asid)
+static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
{
if (sev_flush_asids(min_asid, max_asid))
return false;
@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)

static int sev_asid_new(struct kvm_sev_info *sev)
{
- int asid, min_asid, max_asid, ret;
+ /*
+ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+ * Note: min ASID can end up larger than the max if basic SEV support is
+ * effectively disabled by disallowing use of ASIDs for SEV guests.
+ */
+ unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
+ unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+ unsigned int asid;
bool retry = true;
+ int ret;
+
+ if (min_asid > max_asid)
+ return -ENOTTY;

WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev)

mutex_lock(&sev_bitmap_lock);

- /*
- * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
- * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
- */
- min_asid = sev->es_active ? 1 : min_sev_asid;
- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
if (asid > max_asid) {
@@ -187,7 +194,7 @@ static int sev_asid_new(struct kvm_sev_info *sev)
return ret;
}

-static int sev_get_asid(struct kvm *kvm)
+static unsigned int sev_get_asid(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

@@ -284,8 +291,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)

static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
+ unsigned int asid = sev_get_asid(kvm);
struct sev_data_activate activate;
- int asid = sev_get_asid(kvm);
int ret;

/* activate ASID on the given handle */
@@ -2234,8 +2241,10 @@ void __init sev_hardware_setup(void)
goto out;
}

- sev_asid_count = max_sev_asid - min_sev_asid + 1;
- WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ if (min_sev_asid <= max_sev_asid) {
+ sev_asid_count = max_sev_asid - min_sev_asid + 1;
+ WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ }
sev_supported = true;

/* SEV-ES support requested? */
@@ -2266,7 +2275,9 @@ void __init sev_hardware_setup(void)
out:
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
- sev_supported ? "enabled" : "disabled",
+ sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
+ "unusable" :
+ "disabled",
min_sev_asid, max_sev_asid);
if (boot_cpu_has(X86_FEATURE_SEV_ES))
pr_info("SEV-ES %s (ASIDs %u - %u)\n",
@@ -2314,7 +2325,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
*/
static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
{
- int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+ unsigned int asid = sev_get_asid(vcpu->kvm);

/*
* Note! The address must be a kernel address, as regular page walk
@@ -2632,7 +2643,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- int asid = sev_get_asid(svm->vcpu.kvm);
+ unsigned int asid = sev_get_asid(svm->vcpu.kvm);

/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 83843379813e..b82e6ed4f024 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -732,13 +732,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_invlpga,
- TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_PROTO(__u64 rip, unsigned int asid, u64 address),
TP_ARGS(rip, asid, address),

TP_STRUCT__entry(
- __field( __u64, rip )
- __field( int, asid )
- __field( __u64, address )
+ __field( __u64, rip )
+ __field( unsigned int, asid )
+ __field( __u64, address )
),

TP_fast_assign(
@@ -747,7 +747,7 @@ TRACE_EVENT(kvm_invlpga,
__entry->address = address;
),

- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
+ TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
__entry->rip, __entry->asid, __entry->address)
);

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 139960deb736..9522d46567f8 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)

call vmx_spec_ctrl_restore_host

+ CLEAR_BRANCH_HISTORY_VMEXIT
+
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4aafd007964f..4ed8a7dc0536 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)

static u64 kvm_get_arch_capabilities(void)
{
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index ea3a28e7b613..f0dae4fb6d07 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN
CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
endif

-# Early boot use of cmdline; don't instrument it
-ifdef CONFIG_AMD_MEM_ENCRYPT
-KCOV_INSTRUMENT_cmdline.o := n
-KASAN_SANITIZE_cmdline.o := n
-KCSAN_SANITIZE_cmdline.o := n
-
-ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_cmdline.o = -pg
-endif
-
-CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
-endif
-
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index cd86aeb5fdd3..ffa51f392e17 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -126,12 +126,13 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#endif
-/*
- * This function name is magical and is used by -mfunction-return=thunk-extern
- * for the compiler to generate JMPs to it.
- */
+
#ifdef CONFIG_RETHUNK

+ .section .text..__x86.return_thunk
+
+#ifdef CONFIG_CPU_SRSO
+
/*
* srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
* special addresses:
@@ -147,9 +148,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
*
* As a result, srso_alias_safe_ret() becomes a safe return.
*/
-#ifdef CONFIG_CPU_SRSO
- .section .text..__x86.rethunk_untrain
-
+ .pushsection .text..__x86.rethunk_untrain
SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
@@ -158,17 +157,9 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
jmp srso_alias_return_thunk
SYM_FUNC_END(srso_alias_untrain_ret)
__EXPORT_THUNK(srso_alias_untrain_ret)
+ .popsection

- .section .text..__x86.rethunk_safe
-#else
-/* dummy definition for alternatives */
-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
- ANNOTATE_UNRET_SAFE
- ret
- int3
-SYM_FUNC_END(srso_alias_untrain_ret)
-#endif
-
+ .pushsection .text..__x86.rethunk_safe
SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
lea 8(%_ASM_SP), %_ASM_SP
UNWIND_HINT_FUNC
@@ -177,14 +168,69 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
int3
SYM_FUNC_END(srso_alias_safe_ret)

- .section .text..__x86.return_thunk
-
-SYM_CODE_START(srso_alias_return_thunk)
+SYM_CODE_START_NOALIGN(srso_alias_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
call srso_alias_safe_ret
ud2
SYM_CODE_END(srso_alias_return_thunk)
+ .popsection
+
+/*
+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+ * movabs $0xccccc30824648d48,%rax
+ *
+ * and when the return thunk executes the inner label srso_safe_ret()
+ * later, it is a stack manipulation and a RET which is mispredicted and
+ * thus a "safe" one to use.
+ */
+ .align 64
+ .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+SYM_START(srso_untrain_ret, SYM_L_LOCAL, SYM_A_NONE)
+ ANNOTATE_NOENDBR
+ .byte 0x48, 0xb8
+
+/*
+ * This forces the function return instruction to speculate into a trap
+ * (UD2 in srso_return_thunk() below). This RET will then mispredict
+ * and execution will continue at the return site read from the top of
+ * the stack.
+ */
+SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+ lea 8(%_ASM_SP), %_ASM_SP
+ ret
+ int3
+ int3
+ /* end of movabs */
+ lfence
+ call srso_safe_ret
+ ud2
+SYM_CODE_END(srso_safe_ret)
+SYM_FUNC_END(srso_untrain_ret)
+
+SYM_CODE_START(srso_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ call srso_safe_ret
+ ud2
+SYM_CODE_END(srso_return_thunk)
+
+#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
+#else /* !CONFIG_CPU_SRSO */
+#define JMP_SRSO_UNTRAIN_RET "ud2"
+/* Dummy for the alternative in CALL_UNTRAIN_RET. */
+SYM_CODE_START(srso_alias_untrain_ret)
+ ANNOTATE_UNRET_SAFE
+ ANNOTATE_NOENDBR
+ ret
+ int3
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
+#endif /* CONFIG_CPU_SRSO */
+
+#ifdef CONFIG_CPU_UNRET_ENTRY

/*
* Some generic notes on the untraining sequences:
@@ -266,65 +312,19 @@ SYM_CODE_END(retbleed_return_thunk)
SYM_FUNC_END(retbleed_untrain_ret)
__EXPORT_THUNK(retbleed_untrain_ret)

-/*
- * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
- * above. On kernel entry, srso_untrain_ret() is executed which is a
- *
- * movabs $0xccccc30824648d48,%rax
- *
- * and when the return thunk executes the inner label srso_safe_ret()
- * later, it is a stack manipulation and a RET which is mispredicted and
- * thus a "safe" one to use.
- */
- .align 64
- .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
-SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
- ANNOTATE_NOENDBR
- .byte 0x48, 0xb8
-
-/*
- * This forces the function return instruction to speculate into a trap
- * (UD2 in srso_return_thunk() below). This RET will then mispredict
- * and execution will continue at the return site read from the top of
- * the stack.
- */
-SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
- lea 8(%_ASM_SP), %_ASM_SP
- ret
- int3
- int3
- /* end of movabs */
- lfence
- call srso_safe_ret
- ud2
-SYM_CODE_END(srso_safe_ret)
-SYM_FUNC_END(srso_untrain_ret)
-__EXPORT_THUNK(srso_untrain_ret)
+#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret"
+#else /* !CONFIG_CPU_UNRET_ENTRY */
+#define JMP_RETBLEED_UNTRAIN_RET "ud2"
+#endif /* CONFIG_CPU_UNRET_ENTRY */

-SYM_CODE_START(srso_return_thunk)
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
- call srso_safe_ret
- ud2
-SYM_CODE_END(srso_return_thunk)
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)

SYM_FUNC_START(entry_untrain_ret)
- ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
- "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
- "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+ ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)

-SYM_CODE_START(__x86_return_thunk)
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
- ANNOTATE_UNRET_SAFE
- ret
- int3
-SYM_CODE_END(__x86_return_thunk)
-EXPORT_SYMBOL(__x86_return_thunk)
-
-#endif /* CONFIG_RETHUNK */
+#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */

#ifdef CONFIG_CALL_DEPTH_TRACKING

@@ -359,3 +359,22 @@ SYM_FUNC_START(__x86_return_skl)
SYM_FUNC_END(__x86_return_skl)

#endif /* CONFIG_CALL_DEPTH_TRACKING */
+
+/*
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ *
+ * This code is only used during kernel boot or module init. All
+ * 'JMP __x86_return_thunk' sites are changed to something else by
+ * apply_returns().
+ */
+SYM_CODE_START(__x86_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_CODE_END(__x86_return_thunk)
+EXPORT_SYMBOL(__x86_return_thunk)
+
+#endif /* CONFIG_RETHUNK */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a9d69ec994b7..e23851796883 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -376,7 +376,7 @@ static void dump_pagetable(unsigned long address)
goto bad;

pr_cont("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud) || pud_large(*pud))
+ if (!pud_present(*pud) || pud_leaf(*pud))
goto out;

pmd = pmd_offset(pud, address);
@@ -1037,7 +1037,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
if (!pud_present(*pud))
return 0;

- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return spurious_kernel_fault_check(error_code, (pte_t *) pud);

pmd = pmd_offset(pud, address);
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index f50cc210a981..968d7005f4a7 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
- bool use_gbpage;

next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;

- /* if this is already a gbpage, this portion is already mapped */
- if (pud_large(*pud))
- continue;
-
- /* Is using a gbpage allowed? */
- use_gbpage = info->direct_gbpages;
-
- /* Don't use gbpage if it maps more than the requested region. */
- /* at the begining: */
- use_gbpage &= ((addr & ~PUD_MASK) == 0);
- /* ... or at the end: */
- use_gbpage &= ((next & ~PUD_MASK) == 0);
-
- /* Never overwrite existing mappings */
- use_gbpage &= !pud_present(*pud);
-
- if (use_gbpage) {
+ if (info->direct_gbpages) {
pud_t pudval;

+ if (pud_present(*pud))
+ continue;
+
+ addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a190aae8ceaf..19d209b412d7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -617,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
}

if (!pud_none(*pud)) {
- if (!pud_large(*pud)) {
+ if (!pud_leaf(*pud)) {
pmd = pmd_offset(pud, 0);
paddr_last = phys_pmd_init(pmd, paddr,
paddr_end,
@@ -1163,7 +1163,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
if (!pud_present(*pud))
continue;

- if (pud_large(*pud) &&
+ if (pud_leaf(*pud) &&
IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE)) {
spin_lock(&init_mm.page_table_lock);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0302491d799d..fcf508c52bdc 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -115,7 +115,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
- if (!pud_large(*pud))
+ if (!pud_leaf(*pud))
kasan_populate_pud(pud, addr, next, nid);
} while (pud++, addr = next, addr != end);
}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 0166ab1780cc..cc47a818a640 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -41,9 +41,9 @@
#include <linux/mem_encrypt.h>
#include <linux/cc_platform.h>

+#include <asm/init.h>
#include <asm/setup.h>
#include <asm/sections.h>
-#include <asm/cmdline.h>
#include <asm/coco.h>
#include <asm/sev.h>

@@ -95,10 +95,7 @@ struct sme_populate_pgd_data {
*/
static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");

-static char sme_cmdline_arg[] __initdata = "mem_encrypt";
-static char sme_cmdline_on[] __initdata = "on";
-
-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
@@ -113,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
memset(pgd_p, 0, pgd_size);
}

-static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -144,13 +141,13 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
}

- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return NULL;

return pud;
}

-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -166,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
}

-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -192,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
}

-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd_large(ppd);
@@ -202,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
}
}

-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd(ppd);
@@ -212,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
}
}

-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
pmdval_t pmd_flags, pteval_t pte_flags)
{
unsigned long vaddr_end;
@@ -236,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
__sme_map_range_pte(ppd);
}

-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}

-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}

-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}

-static unsigned long __init sme_pgtable_calc(unsigned long len)
+static unsigned long __head sme_pgtable_calc(unsigned long len)
{
unsigned long entries = 0, tables = 0;

@@ -288,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return entries + tables;
}

-void __init sme_encrypt_kernel(struct boot_params *bp)
+void __head sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
@@ -323,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* memory from being cached.
*/

- /* Physical addresses gives us the identity mapped virtual addresses */
- kernel_start = __pa_symbol(_text);
- kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
+ kernel_start = (unsigned long)RIP_REL_REF(_text);
+ kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
kernel_len = kernel_end - kernel_start;

initrd_start = 0;
@@ -342,14 +338,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
}
#endif

- /*
- * We're running identity mapped, so we must obtain the address to the
- * SME encryption workarea using rip-relative addressing.
- */
- asm ("lea sme_workarea(%%rip), %0"
- : "=r" (workarea_start)
- : "p" (sme_workarea));
-
/*
* Calculate required number of workarea bytes needed:
* executable encryption area size:
@@ -359,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* pagetable structures for the encryption of the kernel
* pagetable structures for workarea (in case not currently mapped)
*/
- execute_start = workarea_start;
+ execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
execute_len = execute_end - execute_start;

@@ -502,13 +490,11 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
native_write_cr3(__native_read_cr3());
}

-void __init sme_enable(struct boot_params *bp)
+void __head sme_enable(struct boot_params *bp)
{
- const char *cmdline_ptr, *cmdline_arg, *cmdline_on;
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
unsigned long me_mask;
- char buffer[16];
bool snp;
u64 msr;

@@ -551,6 +537,9 @@ void __init sme_enable(struct boot_params *bp)

/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
+ if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION))
+ return;
+
/*
* No SME if Hypervisor bit is set. This check is here to
* prevent a guest from trying to enable SME. For running as a
@@ -570,31 +559,8 @@ void __init sme_enable(struct boot_params *bp)
msr = __rdmsr(MSR_AMD64_SYSCFG);
if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
- } else {
- /* SEV state cannot be controlled by a command line option */
- goto out;
}

- /*
- * Fixups have not been applied to phys_base yet and we're running
- * identity mapped, so we must obtain the address to the SME command
- * line argument data using rip-relative addressing.
- */
- asm ("lea sme_cmdline_arg(%%rip), %0"
- : "=r" (cmdline_arg)
- : "p" (sme_cmdline_arg));
- asm ("lea sme_cmdline_on(%%rip), %0"
- : "=r" (cmdline_on)
- : "p" (sme_cmdline_on));
-
- cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
- ((u64)bp->ext_cmd_line_ptr << 32));
-
- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 ||
- strncmp(buffer, cmdline_on, sizeof(buffer)))
- return;
-
-out:
RIP_REL_REF(sme_me_mask) = me_mask;
physical_mask &= ~me_mask;
cc_vendor = CC_VENDOR_AMD;
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index de10800cd4dd..e7b9ac63bb02 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -950,6 +950,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}

+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -960,20 +992,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;

if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}

@@ -1048,7 +1073,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;

if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1056,11 +1080,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index bda9f129835e..f3c4c756fe1e 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
return NULL;

*level = PG_LEVEL_1G;
- if (pud_large(*pud) || !pud_present(*pud))
+ if (pud_leaf(*pud) || !pud_present(*pud))
return (pte_t *)pud;

pmd = pmd_offset(pud, address);
@@ -743,7 +743,7 @@ pmd_t *lookup_pmd_address(unsigned long address)
return NULL;

pud = pud_offset(p4d, address);
- if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
+ if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud))
return NULL;

return pmd_offset(pud, address);
@@ -1274,7 +1274,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
*/
while (end - start >= PUD_SIZE) {

- if (pud_large(*pud))
+ if (pud_leaf(*pud))
pud_clear(pud);
else
unmap_pmd_range(pud, start, start + PUD_SIZE);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 9deadf517f14..8e1ef5345b7a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -774,7 +774,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
*/
int pud_clear_huge(pud_t *pud)
{
- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
pud_clear(pud);
return 1;
}
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 78414c6d1b5e..51b6b78e6b17 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -217,7 +217,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)

pud = pud_offset(p4d, address);
/* The user page tables do not use large mappings: */
- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
WARN_ON(1);
return NULL;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 955133077c10..a6a4d3ca8ddc 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -344,7 +344,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
- x86_call_depth_emit_accounting(pprog, func);
+ ip += x86_call_depth_emit_accounting(pprog, func);
return emit_patch(pprog, func, ip, 0xE8);
}

diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index 6f955eb1e163..d8af46e67750 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -170,7 +170,7 @@ int relocate_restore_code(void)
goto out;
}
pud = pud_offset(p4d, relocated_restore_code);
- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
goto out;
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index b6830554ff69..9d4a9311e819 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1082,7 +1082,7 @@ static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
pmd_t *pmd_tbl;
int i;

- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
xen_free_ro_pages(pa, PUD_SIZE);
return;
@@ -1863,7 +1863,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
if (!pud_present(pud))
return 0;
pa = pud_val(pud) & PTE_PFN_MASK;
- if (pud_large(pud))
+ if (pud_leaf(pud))
return pa + (vaddr & ~PUD_MASK);

pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index b91155ea9c34..c9131259f717 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle,
ACPI_FREE(buffer.pointer);

buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
- acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
-
+ status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_os_printf("Could Not evaluate object %p\n",
+ obj_handle);
+ return (AE_OK);
+ }
/*
* Since this is a field unit, surround the output in braces
*/
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 45e48d653c60..80a45e11fb5b 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = {
},
};

-static const struct pci_device_id mv_pci_tbl[] = {
- { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
- { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
- /* RocketRAID 1720/174x have different identifiers */
- { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
-
- { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
- { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
- { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
-
- { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
-
- /* Adaptec 1430SA */
- { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
-
- /* Marvell 7042 support */
- { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
-
- /* Highpoint RocketRAID PCIe series */
- { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
- { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
-
- { } /* terminate list */
-};
-
static const struct mv_hw_ops mv5xxx_ops = {
.phy_errata = mv5_phy_errata,
.enable_leds = mv5_enable_leds,
@@ -4300,6 +4269,36 @@ static int mv_pci_init_one(struct pci_dev *pdev,
static int mv_pci_device_resume(struct pci_dev *pdev);
#endif

+static const struct pci_device_id mv_pci_tbl[] = {
+ { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
+ { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
+ /* RocketRAID 1720/174x have different identifiers */
+ { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
+
+ { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
+ { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
+ { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
+
+ { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
+
+ /* Adaptec 1430SA */
+ { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
+
+ /* Marvell 7042 support */
+ { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
+
+ /* Highpoint RocketRAID PCIe series */
+ { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
+ { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
+
+ { } /* terminate list */
+};

static struct pci_driver mv_pci_driver = {
.name = DRV_NAME,
@@ -4312,6 +4311,7 @@ static struct pci_driver mv_pci_driver = {
#endif

};
+MODULE_DEVICE_TABLE(pci, mv_pci_tbl);

/**
* mv_print_info - Dump key info to kernel log for perusal.
@@ -4484,7 +4484,6 @@ static void __exit mv_exit(void)
MODULE_AUTHOR("Brett Russ");
MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers");
MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
MODULE_VERSION(DRV_VERSION);
MODULE_ALIAS("platform:" DRV_NAME);

diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index b51d7a9d0d90..a482741eb181 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,

offset -= (idx * window_size);
idx++;
- dist = ((long) (window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_fromio(psource, dimm_mmio + offset / 4, dist);

psource += dist;
@@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
readl(mmio + PDC_DIMM_WINDOW_CTLR);
offset -= (idx * window_size);
idx++;
- dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_toio(dimm_mmio + offset / 4, psource, dist);
writel(0x01, mmio + PDC_GENERAL_CTLR);
readl(mmio + PDC_GENERAL_CTLR);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2cc0ab854168..0214288765c8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;

/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -531,12 +532,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}

+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4090,9 +4105,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;

return 0;

+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
index 41edd6a430eb..55999a50ccc0 100644
--- a/drivers/base/regmap/regcache-maple.c
+++ b/drivers/base/regmap/regcache-maple.c
@@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
unsigned long *entry, *lower, *upper;
unsigned long lower_index, lower_last;
unsigned long upper_index, upper_last;
- int ret;
+ int ret = 0;

lower = NULL;
upper = NULL;
@@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
upper_index = max + 1;
upper_last = mas.last;

- upper = kmemdup(&entry[max + 1],
+ upper = kmemdup(&entry[max - mas.index + 1],
((mas.last - max) *
sizeof(unsigned long)),
map->alloc_flags);
@@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
unsigned long lmin = min;
unsigned long lmax = max;
unsigned int r, v, sync_start;
- int ret;
+ int ret = 0;
bool sync_needed = false;

map->cache_bypass = true;
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 0211f704a358..5277090c6d6d 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -758,11 +758,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup);

int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
{
+ bdaddr_t bdaddr_swapped;
struct sk_buff *skb;
int err;

- skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
- HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+ baswap(&bdaddr_swapped, bdaddr);
+
+ skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
+ &bdaddr_swapped, HCI_EV_VENDOR,
+ HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index f2d4985e036e..8861b8017fbd 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -7,7 +7,6 @@
*
* Copyright (C) 2007 Texas Instruments, Inc.
* Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
- * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* Acknowledgements:
* This file is based on hci_ll.c, which was...
@@ -226,6 +225,7 @@ struct qca_serdev {
struct qca_power *bt_power;
u32 init_speed;
u32 oper_speed;
+ bool bdaddr_property_broken;
const char *firmware_name;
};

@@ -1825,6 +1825,7 @@ static int qca_setup(struct hci_uart *hu)
const char *firmware_name = qca_get_firmware_name(hu);
int ret;
struct qca_btsoc_version ver;
+ struct qca_serdev *qcadev;
const char *soc_name;

ret = qca_check_speeds(hu);
@@ -1882,16 +1883,11 @@ static int qca_setup(struct hci_uart *hu)
case QCA_WCN6750:
case QCA_WCN6855:
case QCA_WCN7850:
+ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);

- /* Set BDA quirk bit for reading BDA value from fwnode property
- * only if that property exist in DT.
- */
- if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
- bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
- } else {
- bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
- }
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+ if (qcadev->bdaddr_property_broken)
+ set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);

hci_set_aosp_capable(hdev);

@@ -2264,6 +2260,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
if (!qcadev->oper_speed)
BT_DBG("UART will pick default operating speed");

+ qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev,
+ "qcom,local-bd-address-broken");
+
if (data)
qcadev->btsoc_type = data->soc_type;
else
diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
index c0979c8049b5..661de4add4c7 100644
--- a/drivers/dma-buf/st-dma-fence-chain.c
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -84,11 +84,11 @@ static int sanitycheck(void *arg)
return -ENOMEM;

chain = mock_chain(NULL, f, 1);
- if (!chain)
+ if (chain)
+ dma_fence_enable_sw_signaling(chain);
+ else
err = -ENOMEM;

- dma_fence_enable_sw_signaling(chain);
-
dma_fence_signal(f);
dma_fence_put(f);

diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index bfa30625f5d0..3dc2f9aaf08d 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -24,6 +24,8 @@ static bool efi_noinitrd;
static bool efi_nosoftreserve;
static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);

+int efi_mem_encrypt;
+
bool __pure __efi_soft_reserve_enabled(void)
{
return !efi_nosoftreserve;
@@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_noinitrd = true;
} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
efi_no5lvl = true;
+ } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) &&
+ !strcmp(param, "mem_encrypt") && val) {
+ if (parse_option_str(val, "on"))
+ efi_mem_encrypt = 1;
+ else if (parse_option_str(val, "off"))
+ efi_mem_encrypt = -1;
} else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap");
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index c04b82ea40f2..fc18fd649ed7 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -37,8 +37,8 @@ extern bool efi_no5lvl;
extern bool efi_nochunk;
extern bool efi_nokaslr;
extern int efi_loglevel;
+extern int efi_mem_encrypt;
extern bool efi_novamap;
-
extern const efi_system_table_t *efi_system_table;

typedef union efi_dxe_services_table efi_dxe_services_table_t;
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 8307950fe3ce..e4ae3db727ef 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -238,6 +238,15 @@ efi_status_t efi_adjust_memory_range_protection(unsigned long start,
rounded_end = roundup(start + size, EFI_PAGE_SIZE);

if (memattr != NULL) {
+ status = efi_call_proto(memattr, set_memory_attributes,
+ rounded_start,
+ rounded_end - rounded_start,
+ EFI_MEMORY_RO);
+ if (status != EFI_SUCCESS) {
+ efi_warn("Failed to set EFI_MEMORY_RO attribute\n");
+ return status;
+ }
+
status = efi_call_proto(memattr, clear_memory_attributes,
rounded_start,
rounded_end - rounded_start,
@@ -816,7 +825,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)

*kernel_entry = addr + entry;

- return efi_adjust_memory_range_protection(addr, kernel_total_size);
+ return efi_adjust_memory_range_protection(addr, kernel_text_size);
}

static void __noreturn enter_kernel(unsigned long kernel_addr,
@@ -888,6 +897,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
}
}

+ if (efi_mem_encrypt > 0)
+ hdr->xloadflags |= XLF_MEM_ENCRYPTION;
+
status = efi_decompress_kernel(&kernel_entry);
if (status != EFI_SUCCESS) {
efi_err("Failed to decompress kernel\n");
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 4f3e66ece7f7..84125e55de10 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -655,6 +655,25 @@ static u32 line_event_id(int level)
GPIO_V2_LINE_EVENT_FALLING_EDGE;
}

+static inline char *make_irq_label(const char *orig)
+{
+ char *new;
+
+ if (!orig)
+ return NULL;
+
+ new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ return new;
+}
+
+static inline void free_irq_label(const char *label)
+{
+ kfree(label);
+}
+
#ifdef CONFIG_HTE

static enum hte_return process_hw_ts_thread(void *p)
@@ -942,6 +961,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
{
unsigned long irqflags;
int ret, level, irq;
+ char *label;

/* try hardware */
ret = gpiod_set_debounce(line->desc, debounce_period_us);
@@ -964,11 +984,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
if (irq < 0)
return -ENXIO;

+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return -ENOMEM;
+
irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
ret = request_irq(irq, debounce_irq_handler, irqflags,
- line->req->label, line);
- if (ret)
+ label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
} else {
ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH);
@@ -1013,7 +1039,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
static void edge_detector_stop(struct line *line)
{
if (line->irq) {
- free_irq(line->irq, line);
+ free_irq_label(free_irq(line->irq, line));
line->irq = 0;
}

@@ -1038,6 +1064,7 @@ static int edge_detector_setup(struct line *line,
unsigned long irqflags = 0;
u64 eflags;
int irq, ret;
+ char *label;

eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
if (eflags && !kfifo_initialized(&line->req->events)) {
@@ -1074,11 +1101,17 @@ static int edge_detector_setup(struct line *line,
IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
irqflags |= IRQF_ONESHOT;

+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return PTR_ERR(label);
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
- irqflags, line->req->label, line);
- if (ret)
+ irqflags, label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }

line->irq = irq;
return 0;
@@ -1943,7 +1976,7 @@ static void lineevent_free(struct lineevent_state *le)
blocking_notifier_chain_unregister(&le->gdev->device_notifier,
&le->device_unregistered_nb);
if (le->irq)
- free_irq(le->irq, le);
+ free_irq_label(free_irq(le->irq, le));
if (le->desc)
gpiod_free(le->desc);
kfree(le->label);
@@ -2091,6 +2124,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
int fd;
int ret;
int irq, irqflags = 0;
+ char *label;

if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
return -EFAULT;
@@ -2175,15 +2209,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_le;

+ label = make_irq_label(le->label);
+ if (IS_ERR(label)) {
+ ret = PTR_ERR(label);
+ goto out_free_le;
+ }
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq,
lineevent_irq_handler,
lineevent_irq_thread,
irqflags,
- le->label,
+ label,
le);
- if (ret)
+ if (ret) {
+ free_irq_label(label);
goto out_free_le;
+ }

le->irq = irq;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 85efd686e538..d59e8536192c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1369,6 +1369,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
void amdgpu_driver_release_kms(struct drm_device *dev);

int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+int amdgpu_device_prepare(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 79261bec2654..062d78818da1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1549,6 +1549,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
} else {
pr_info("switched off\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ amdgpu_device_prepare(dev);
amdgpu_device_suspend(dev, true);
amdgpu_device_cache_pci_state(pdev);
/* Shut down the device */
@@ -4094,6 +4095,43 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
/*
* Suspend & resume.
*/
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i, r;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ /* Evict the majority of BOs before starting suspend sequence */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_device_suspend - initiate device suspend
*
@@ -4114,11 +4152,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)

adev->in_suspend = true;

- /* Evict the majority of BOs before grabbing the full access */
- r = amdgpu_device_evict_resources(adev);
- if (r)
- return r;
-
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_fini_data_exchange(adev);
r = amdgpu_virt_request_full_gpu(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3204c3a42f2a..f9bc38d20ce3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2386,8 +2386,9 @@ static int amdgpu_pmops_prepare(struct device *dev)
/* Return a positive number here so
* DPM_FLAG_SMART_SUSPEND works properly
*/
- if (amdgpu_device_supports_boco(drm_dev))
- return pm_runtime_suspended(dev);
+ if (amdgpu_device_supports_boco(drm_dev) &&
+ pm_runtime_suspended(dev))
+ return 1;

/* if we will not support s3 or s2i for the device
* then skip suspend
@@ -2396,7 +2397,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
!amdgpu_acpi_is_s3_active(adev))
return 1;

- return 0;
+ return amdgpu_device_prepare(drm_dev);
}

static void amdgpu_pmops_complete(struct device *dev)
@@ -2598,6 +2599,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
if (amdgpu_device_supports_boco(drm_dev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;

+ ret = amdgpu_device_prepare(drm_dev);
+ if (ret)
+ return ret;
ret = amdgpu_device_suspend(drm_dev, false);
if (ret) {
adev->in_runpm = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 251dd800a2a6..7b5c1498941d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1179,9 +1179,10 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
dto_params.timing = &pipe_ctx->stream->timing;
dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
if (dccg) {
- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
+ if (dccg && dccg->funcs->set_dtbclk_dto)
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
}
} else if (dccg && dccg->funcs->disable_symclk_se) {
dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 1e3803739ae6..12af2859002f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2728,18 +2728,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
}

if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
- dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
- dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
-
- phyd32clk = get_phyd32clk_src(link);
- dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
-
dto_params.otg_inst = tg->inst;
dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
dto_params.timing = &pipe_ctx->stream->timing;
dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+ dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
+
+ phyd32clk = get_phyd32clk_src(link);
+ dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
} else {
}
if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) {
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index abe829bbd54a..a9880fc53195 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -295,6 +295,7 @@ struct amd_ip_funcs {
int (*hw_init)(void *handle);
int (*hw_fini)(void *handle);
void (*late_fini)(void *handle);
+ int (*prepare_suspend)(void *handle);
int (*suspend)(void *handle);
int (*resume)(void *handle);
bool (*is_idle)(void *handle);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 7352bde299d5..03bd3c7bd0dc 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
{
struct drm_gem_object *obj = dma_buf->priv;

- if (!obj->funcs->get_sg_table)
+ /*
+ * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers
+ * that implement their own ->map_dma_buf() do not.
+ */
+ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf &&
+ !obj->funcs->get_sg_table)
return -ENOSYS;

return drm_gem_pin(obj);
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 79f65eff6bb2..23400313d8a6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -104,6 +104,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+ gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index b342fad180ca..61df6cd3f377 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -23,6 +23,8 @@
#include "intel_psr.h"
#include "skl_watermark.h"

+#include "gem/i915_gem_object.h"
+
/* Cursor formats */
static const u32 intel_cursor_formats[] = {
DRM_FORMAT_ARGB8888,
@@ -32,12 +34,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->uapi.plane->dev);
- const struct drm_framebuffer *fb = plane_state->hw.fb;
- const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
u32 base;

if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
- base = sg_dma_address(obj->mm.pages->sgl);
+ base = plane_state->phys_dma_addr;
else
base = intel_plane_ggtt_offset(plane_state);

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 7fc92b1474cc..8b0dc2b75da4 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -701,6 +701,7 @@ struct intel_plane_state {
#define PLANE_HAS_FENCE BIT(0)

struct intel_fb_view view;
+ u32 phys_dma_addr; /* for cursor_needs_physical */

/* Plane pxp decryption state */
bool decrypt;
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index fffd568070d4..a131656757f2 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -254,6 +254,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
return PTR_ERR(vma);

plane_state->ggtt_vma = vma;
+
+ /*
+ * Pre-populate the dma address before we enter the vblank
+ * evade critical section as i915_gem_object_get_dma_address()
+ * will trigger might_sleep() even if it won't actually sleep,
+ * which is the case when the fb has already been pinned.
+ */
+ if (phys_cursor)
+ plane_state->phys_dma_addr =
+ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
} else {
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);

diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index ffc15d278a39..d557ecd4e1eb 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -20,6 +20,7 @@
#include "skl_scaler.h"
#include "skl_universal_plane.h"
#include "skl_watermark.h"
+#include "gt/intel_gt.h"
#include "pxp/intel_pxp.h"

static const u32 skl_plane_formats[] = {
@@ -2169,8 +2170,8 @@ static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915,
enum pipe pipe, enum plane_id plane_id)
{
/* Wa_14017240301 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0))
return false;

/* Wa_22011186057 */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index d24c0ce8805c..19156ba4b9ef 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -405,8 +405,8 @@ static int ext_set_pat(struct i915_user_extension __user *base, void *data)
BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));

- /* Limiting the extension only to Meteor Lake */
- if (!IS_METEORLAKE(i915))
+ /* Limiting the extension only to Xe_LPG and beyond */
+ if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70))
return -ENODEV;

if (copy_from_user(&ext, base, sizeof(ext)))
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 7ad36198aab2..cddf8c16e9a7 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -4,9 +4,9 @@
*/

#include "gen8_engine_cs.h"
-#include "i915_drv.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
+#include "intel_gt.h"
#include "intel_lrc.h"
#include "intel_ring.h"

@@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
static int mtl_dummy_pipe_control(struct i915_request *rq)
{
/* Wa_14016712196 */
- if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_DG2(rq->i915)) {
u32 *cs;

/* dummy PIPE_CONTROL + depth flush */
@@ -808,6 +808,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
struct drm_i915_private *i915 = rq->i915;
+ struct intel_gt *gt = rq->engine->gt;
u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
@@ -818,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_FLUSH_ENABLE);

/* Wa_14016712196 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
/* dummy PIPE_CONTROL + depth flush */
cs = gen12_emit_pipe_control(cs, 0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e85d70a62123..765387639dab 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -912,6 +912,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}

+ /*
+ * Do not create the command streamer for CCS slices beyond the first.
+ * All the workload submitted to the first engine will be shared among
+ * all the slices.
+ *
+ * Once the user will be allowed to customize the CCS mode, then this
+ * check needs to be removed.
+ */
+ if (IS_DG2(gt->i915)) {
+ u8 first_ccs = __ffs(CCS_MASK(gt));
+
+ /* Mask off all the CCS engine */
+ info->engine_mask &= ~GENMASK(CCS3, CCS0);
+ /* Put back in the first CCS engine */
+ info->engine_mask |= BIT(_CCS(first_ccs));
+ }
+
return info->engine_mask;
}

@@ -1616,9 +1633,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
* Wa_22011802037: Prior to doing a reset, ensure CS is
* stopped, set ring stop bit and prefetch disable bit to halt CS
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index a95615b345cd..5a3a5b29d150 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;

- if (IS_METEORLAKE(i915) && engine->id == GSC0) {
+ if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) {
intel_uncore_write(engine->gt->uncore,
RC_PSMI_CTRL_GSCCS,
_MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 5a720e252312..42e09f158920 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_engine_wait_for_pending_mi_fw(engine);

engine->execlists.reset_ccid = active_ccid(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 6c34547b58b5..6e63b46682f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -14,6 +14,37 @@
struct drm_i915_private;
struct drm_printer;

+/*
+ * Check that the GT is a graphics GT and has an IP version within the
+ * specified range (inclusive).
+ */
+#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \
+ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \
+ BUILD_BUG_ON_ZERO((until) < (from)) + \
+ ((gt)->type != GT_MEDIA && \
+ GRAPHICS_VER_FULL((gt)->i915) >= (from) && \
+ GRAPHICS_VER_FULL((gt)->i915) <= (until)))
+
+/*
+ * Check that the GT is a graphics GT with a specific IP version and has
+ * a stepping in the range [from, until). The lower stepping bound is
+ * inclusive, the upper bound is exclusive. The most common use-case of this
+ * macro is for checking bounds for workarounds, which usually have a stepping
+ * ("from") at which the hardware issue is first present and another stepping
+ * ("until") at which a hardware fix is present and the software workaround is
+ * no longer necessary. E.g.,
+ *
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER)
+ *
+ * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper
+ * stepping bound for the specified IP version.
+ */
+#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \
+ BUILD_BUG_ON_ZERO((until) <= (from)) + \
+ (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \
+ IS_GRAPHICS_STEP((gt)->i915, (from), (until))))
+
#define GT_TRACE(gt, fmt, ...) do { \
const struct intel_gt *gt__ __maybe_unused = (gt); \
GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index 000000000000..044219c5960a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+ int cslice;
+ u32 mode = 0;
+ int first_ccs = __ffs(CCS_MASK(gt));
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /* Build the value for the fixed CCS load balancing */
+ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+ if (CCS_MASK(gt) & BIT(cslice))
+ /*
+ * If available, assign the cslice
+ * to the first available engine...
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+ else
+ /*
+ * ... otherwise, mark the cslice as
+ * unavailable if no CCS dispatches here
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice,
+ XEHP_CCS_MODE_CSLICE_MASK);
+ }
+
+ intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index 000000000000..9e5549caeb26
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 2c0f1f3e28ff..c6dec485aefb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -3,8 +3,7 @@
* Copyright © 2022 Intel Corporation
*/

-#include "i915_drv.h"
-
+#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
@@ -166,8 +165,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
/* Wa_14016747170 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK,
intel_uncore_read(gt->uncore,
MTL_GT_ACTIVITY_FACTOR));
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 2cdfb2f713d0..64acab146b52 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1468,8 +1468,14 @@
#define ECOBITS_PPGTT_CACHE4B (0 << 8)

#define GEN12_RCU_MODE _MMIO(0x14800)
+#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)

+#define XEHP_CCS_MODE _MMIO(0x14804)
+#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c378cc7c953c..b99efa348ad1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1316,29 +1316,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
return cs;
}

-/*
- * On DG2 during context restore of a preempted context in GPGPU mode,
- * RCS restore hang is detected. This is extremely timing dependent.
- * To address this below sw wabb is implemented for DG2 A steppings.
- */
-static u32 *
-dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
-{
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
- *cs++ = 0x21;
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
-
- return cs;
-}
-
/*
* The bspec's tuning guide asks us to program a vertical watermark value of
* 0x3FF. However this register is not saved/restored properly by the
@@ -1363,21 +1340,15 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_cmd_buf_wa(ce, cs);
cs = gen12_emit_restore_scratch(ce, cs);

- /* Wa_22011450934:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
- cs = dg2_emit_rcs_hang_wabb(ce, cs);
-
/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);

cs = gen12_emit_aux_table_inv(ce->engine, cs);

/* Wa_16014892111 */
- if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
IS_DG2(ce->engine->i915))
cs = dg2_emit_draw_watermark_setting(cs);

@@ -1391,8 +1362,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_restore_scratch(ce, cs);

/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
if (ce->engine->class == COMPUTE_CLASS)
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 2c014407225c..07269ff3be13 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
};

-static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
- /* Wa_14011441408: Set Go to Memory for MOCS#0 */
- MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Coherent; GO:Memory */
- MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Non-Coherent; GO:Memory */
- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
-
- /* WB - LC */
- MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
-};
-
static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
/* Error */
MOCS_ENTRY(0, 0, L3_3_WB),
@@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table));

table->unused_entries_index = I915_MOCS_PTE;
- if (IS_METEORLAKE(i915)) {
+ if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) {
table->size = ARRAY_SIZE(mtl_mocs_table);
table->table = mtl_mocs_table;
table->n_entries = MTL_NUM_MOCS_ENTRIES;
@@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
table->wb_index = 2;
table->unused_entries_index = 2;
} else if (IS_DG2(i915)) {
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
- table->table = dg2_mocs_table_g10_ax;
- } else {
- table->size = ARRAY_SIZE(dg2_mocs_table);
- table->table = dg2_mocs_table;
- }
+ table->size = ARRAY_SIZE(dg2_mocs_table);
+ table->table = dg2_mocs_table;
table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->unused_entries_index = 3;
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index ccdc1afbf11b..9e113e947326 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_EI_MODE(1);

/*
- * Wa_16011777198 and BSpec 52698 - Render powergating must be off.
+ * BSpec 52698 - Render powergating must be off.
* FIXME BSpec is outdated, disabling powergating for MTL is just
* temporary wa and should be removed after fixing real cause
* of forcewake timeouts.
*/
- if (IS_METEORLAKE(gt->i915) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
pg_enable =
GEN9_MEDIA_PG_ENABLE |
GEN11_MEDIA_SAMPLER_PG_ENABLE;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 5fa57a34cf4b..13fb8e5042c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -705,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt)

static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
- if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0))
+ if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0))
return false;

if (!__HAS_ENGINE(engine_mask, GSC0))
@@ -1632,6 +1632,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
w->gt = NULL;
}

+/*
+ * Wa_22011802037 requires that we (or the GuC) ensure that no command
+ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated.
+ */
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt)
+{
+ if (GRAPHICS_VER(gt->i915) < 11)
+ return false;
+
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0))
+ return true;
+
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ return false;
+
+ return true;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
#include "selftest_hangcheck.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 25c975b6e8fc..f615b30b81c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
bool intel_has_gpu_reset(const struct intel_gt *gt);
bool intel_has_reset_engine(const struct intel_gt *gt);

+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt);
+
#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 092542f53aad..4feef874e6d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
{
struct drm_i915_private *i915 = rps_to_i915(rps);

- if (IS_METEORLAKE(i915))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
return mtl_get_freq_caps(rps, caps);
else
return __gen6_rps_get_freq_caps(rps, caps);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3ae0dbd39eaa..be060b32bd9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,6 +10,7 @@
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
#include "intel_gt_mcr.h"
#include "intel_gt_regs.h"
#include "intel_ring.h"
@@ -50,7 +51,8 @@
* registers belonging to BCS, VCS or VECS should be implemented in
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
- * should be implemented in general_render_compute_wa_init().
+ * should be implemented in general_render_compute_wa_init(). The settings
+ * about the CCS load balancing should be added in ccs_engine_wa_mode().
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -764,39 +766,15 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
{
dg2_ctx_gt_tuning_init(engine, wal);

- /* Wa_16011186671:dg2_g11 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
- wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
- wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010469329:dg2_g10 */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
-
- /*
- * Wa_22010465075:dg2_g10
- * Wa_22010613112:dg2_g10
- * Wa_14010698770:dg2_g10
- */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
- }
-
/* Wa_16013271637:dg2 */
wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);

/* Wa_14014947963:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
+ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);

/* Wa_18018764978:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
+ wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);

/* Wa_15010599737:dg2 */
wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
@@ -805,27 +783,32 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
}

-static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
+static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;

dg2_ctx_gt_tuning_init(engine, wal);

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
+ /*
+ * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
+ * gen12_emit_indirect_ctx_rcs() rather than here on some early
+ * steppings.
+ */
+ if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
}

-static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
+static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;

- mtl_ctx_gt_tuning_init(engine, wal);
+ xelpg_ctx_gt_tuning_init(engine, wal);

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014947963 */
wa_masked_field_set(wal, VF_PREEMPTION,
PREEMPTION_VERTEX_COUNT, 0x4000);
@@ -931,8 +914,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;

- if (IS_METEORLAKE(i915))
- mtl_ctx_workarounds_init(engine, wal);
+ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
+ xelpg_ctx_workarounds_init(engine, wal);
else if (IS_PONTEVECCHIO(i915))
; /* noop; none at this time */
else if (IS_DG2(i915))
@@ -1606,31 +1589,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- struct intel_engine_cs *engine;
- int id;
-
xehp_init_mcr(gt, wal);

/* Wa_14011060649:dg2 */
wa_14011060649(gt, wal);

- /*
- * Although there are per-engine instances of these registers,
- * they technically exist outside the engine itself and are not
- * impacted by engine resets. Furthermore, they're part of the
- * GuC blacklist so trying to treat them as engine workarounds
- * will result in GuC initialization failure and a wedged GPU.
- */
- for_each_engine(engine, gt, id) {
- if (engine->class != VIDEO_DECODE_CLASS)
- continue;
-
- /* Wa_16010515920:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
- ALNUNIT_CLKGATE_DIS);
- }
-
if (IS_DG2_G10(gt->i915)) {
/* Wa_22010523718:dg2 */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
@@ -1641,65 +1604,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
DSS_ROUTER_CLKGATE_DIS);
}

- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012362059:dg2 */
- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
- }
-
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010948348:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
-
- /* Wa_14011037102:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
-
- /* Wa_14011371254:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
-
- /* Wa_14011431319:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
- GAMTLBVDBOX7_CLKGATE_DIS |
- GAMTLBVDBOX6_CLKGATE_DIS |
- GAMTLBVDBOX5_CLKGATE_DIS |
- GAMTLBVDBOX4_CLKGATE_DIS |
- GAMTLBVDBOX3_CLKGATE_DIS |
- GAMTLBVDBOX2_CLKGATE_DIS |
- GAMTLBVDBOX1_CLKGATE_DIS |
- GAMTLBVDBOX0_CLKGATE_DIS |
- GAMTLBKCR_CLKGATE_DIS |
- GAMTLBGUC_CLKGATE_DIS |
- GAMTLBBLT_CLKGATE_DIS);
- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
- GAMTLBGFXA1_CLKGATE_DIS |
- GAMTLBCOMPA0_CLKGATE_DIS |
- GAMTLBCOMPA1_CLKGATE_DIS |
- GAMTLBCOMPB0_CLKGATE_DIS |
- GAMTLBCOMPB1_CLKGATE_DIS |
- GAMTLBCOMPC0_CLKGATE_DIS |
- GAMTLBCOMPC1_CLKGATE_DIS |
- GAMTLBCOMPD0_CLKGATE_DIS |
- GAMTLBCOMPD1_CLKGATE_DIS |
- GAMTLBMERT_CLKGATE_DIS |
- GAMTLBVEBOX3_CLKGATE_DIS |
- GAMTLBVEBOX2_CLKGATE_DIS |
- GAMTLBVEBOX1_CLKGATE_DIS |
- GAMTLBVEBOX0_CLKGATE_DIS);
-
- /* Wa_14010569222:dg2_g10 */
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- GAMEDIA_CLKGATE_DIS);
-
- /* Wa_14011028019:dg2_g10 */
- wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
-
- /* Wa_14010680813:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
- CONTROL_BLOCK_CLKGATE_DIS |
- EGRESS_BLOCK_CLKGATE_DIS |
- TAG_BLOCK_CLKGATE_DIS);
- }
-
/* Wa_14014830051:dg2 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);

@@ -1741,14 +1645,15 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- /* Wa_14018778641 / Wa_18018781329 */
+ /* Wa_14018575942 / Wa_18018781329 */
+ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);

/* Wa_22016670082 */
wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);

- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014830051 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);

@@ -1791,10 +1696,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
{
- if (IS_METEORLAKE(gt->i915)) {
- if (gt->type != GT_MEDIA)
- wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
}

@@ -1826,7 +1729,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
return;
}

- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_gt_workarounds_init(gt, wal);
else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal);
@@ -2242,29 +2145,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)

switch (engine->class) {
case RENDER_CLASS:
- /*
- * Wa_1507100340:dg2_g10
- *
- * This covers 4 registers which are next to one another :
- * - PS_INVOCATION_COUNT
- * - PS_INVOCATION_COUNT_UDW
- * - PS_DEPTH_COUNT
- * - PS_DEPTH_COUNT_UDW
- */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg_ext(w, PS_INVOCATION_COUNT,
- RING_FORCE_TO_NONPRIV_ACCESS_RD |
- RING_FORCE_TO_NONPRIV_RANGE_4);
-
/* Required by recommended tuning setting (not a workaround) */
whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);

break;
- case COMPUTE_CLASS:
- /* Wa_16011157294:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
- break;
default:
break;
}
@@ -2294,7 +2178,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine)
blacklist_trtt(engine);
}

-static void mtl_whitelist_build(struct intel_engine_cs *engine)
+static void xelpg_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;

@@ -2316,8 +2200,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)

wa_init_start(w, engine->gt, "whitelist", engine->name);

- if (IS_METEORLAKE(i915))
- mtl_whitelist_build(engine);
+ if (engine->gt->type == GT_MEDIA)
+ ; /* none yet */
+ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
+ xelpg_whitelist_build(engine);
else if (IS_PONTEVECCHIO(i915))
pvc_whitelist_build(engine);
else if (IS_DG2(i915))
@@ -2415,62 +2301,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
}

-static bool needs_wa_1308578152(struct intel_engine_cs *engine)
-{
- return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
- GEN_DSS_PER_GSLICE;
-}
-
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_22014600077 */
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
ENABLE_EU_COUNT_FOR_TDL_FLUSH);
}

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_1509727124 */
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
SC_DISABLE_POWER_OPTIMIZATION_EBB);
}

- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
- IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_22012856258 */
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
GEN12_DISABLE_READ_SUPPRESSION);
}

- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14013392000:dg2_g11 */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012419201:dg2 */
- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
- }
-
- /* Wa_1308578152:dg2_g10 when first gslice is fused off */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
- needs_wa_1308578152(engine)) {
- wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
- GEN12_REPLAY_MODE_GRANULARITY);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_DG2(i915)) {
/*
* Wa_22010960976:dg2
* Wa_14013347512:dg2
@@ -2479,34 +2338,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
}

- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- /*
- * Wa_1608949956:dg2_g10
- * Wa_14010198302:dg2_g10
- */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
- /* Wa_22010430635:dg2 */
- wa_mcr_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
-
- /* Wa_14013202645:dg2 */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
- wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
-
- /* Wa_22012532006:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
- wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
-
- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G10(i915)) {
+ if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
/* Wa_22014600077:dg2 */
wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
_MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
@@ -2514,6 +2346,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
true);
}

+ if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
+ IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ /*
+ * Wa_1606700617:tgl,dg1,adl-p
+ * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
+ * Wa_14010826681:tgl,dg1,rkl,adl-p
+ * Wa_18019627453:dg2
+ */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+ }
+
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
@@ -2527,19 +2372,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
*/
wa_write_or(wal, GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
- }

- if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
- IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
- /*
- * Wa_1606700617:tgl,dg1,adl-p
- * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
- * Wa_14010826681:tgl,dg1,rkl,adl-p
- * Wa_18019627453:dg2
- */
- wa_masked_en(wal,
- GEN9_CS_DEBUG_MODE1,
- FF_DOP_CLOCK_GATE_DISABLE);
+ /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
+ wa_mcr_masked_en(wal,
+ GEN10_SAMPLER_MODE,
+ ENABLE_SMALLPL);
}

if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
@@ -2566,14 +2403,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
}

- if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
- IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
- /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
- wa_mcr_masked_en(wal,
- GEN10_SAMPLER_MODE,
- ENABLE_SMALLPL);
- }
-
if (GRAPHICS_VER(i915) == 11) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -2975,10 +2804,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* function invoked by __intel_engine_init_ctx_wa().
*/
static void
-add_render_compute_tuning_settings(struct drm_i915_private *i915,
+add_render_compute_tuning_settings(struct intel_gt *gt,
struct i915_wa_list *wal)
{
- if (IS_METEORLAKE(i915) || IS_DG2(i915))
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);

/*
@@ -2994,6 +2825,28 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
}

+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /*
+ * Wa_14019159160: This workaround, along with others, leads to
+ * significant challenges in utilizing load balancing among the
+ * CCS slices. Consequently, an architectural decision has been
+ * made to completely disable automatic CCS load balancing.
+ */
+ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+ /*
+ * After having disabled automatic load balancing we need to
+ * assign all slices to a single CCS. We will call it CCS mode 1
+ */
+ intel_gt_apply_ccs_mode(gt);
+}
+
/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
@@ -3007,8 +2860,9 @@ static void
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;

- add_render_compute_tuning_settings(i915, wal);
+ add_render_compute_tuning_settings(gt, wal);

if (GRAPHICS_VER(i915) >= 11) {
/* This is not a Wa (although referred to as
@@ -3029,13 +2883,14 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
}

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74)))
/* Wa_14017856879 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/*
* Wa_14017066071
* Wa_14017654203
@@ -3043,37 +2898,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
MTL_DISABLE_SAMPLER_SC_OOO);

- if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/* Wa_22015279794 */
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
DISABLE_PREFETCH_INTO_IC);

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_22013037850 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
DISABLE_128B_EVICTION_COMMAND_UDW);
+
+ /* Wa_18017747507 */
+ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
}

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
IS_PONTEVECCHIO(i915) ||
IS_DG2(i915)) {
/* Wa_22014226127 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
}

- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2(i915)) {
- /* Wa_18017747507 */
- wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
+ if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
+ /* Wa_14015227452:dg2,pvc */
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+
+ /* Wa_16015675438:dg2,pvc */
+ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
+ }
+
+ if (IS_DG2(i915)) {
+ /*
+ * Wa_16011620976:dg2_g11
+ * Wa_22015475538:dg2
+ */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
}

- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(i915)) {
+ if (IS_DG2_G11(i915)) {
/*
* Wa_22012826095:dg2
* Wa_22013059131:dg2
@@ -3085,18 +2950,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
/* Wa_22013059131:dg2 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
- }

- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
/*
- * Wa_14010918519:dg2_g10
+ * Wa_22012654132
*
- * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
- * so ignoring verification.
+ * Note that register 0xE420 is write-only and cannot be read
+ * back for verification on DG2 (due to Wa_14012342262), so
+ * we need to explicitly skip the readback.
*/
- wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
- FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
- 0, false);
+ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+ 0 /* write-only, so skip validation */,
+ true);
}

if (IS_XEHPSDV(i915)) {
@@ -3114,35 +2979,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
}
-
- if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
- /* Wa_14015227452:dg2,pvc */
- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
-
- /* Wa_16015675438:dg2,pvc */
- wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
- }
-
- if (IS_DG2(i915)) {
- /*
- * Wa_16011620976:dg2_g11
- * Wa_22015475538:dg2
- */
- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
- /*
- * Wa_22012654132
- *
- * Note that register 0xE420 is write-only and cannot be read
- * back for verification on DG2 (due to Wa_14012342262), so
- * we need to explicitly skip the readback.
- */
- wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
- 0 /* write-only, so skip validation */,
- true);
}

static void
@@ -3158,8 +2994,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+ ccs_engine_wa_mode(engine, wal);
+ }

if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 569b5fe94c41..861d0c58388c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -272,18 +272,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
flags |= GUC_WA_POLLCS;

- /* Wa_16011759253:dg2_g10:a0 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- flags |= GUC_WA_GAM_CREDITS;
-
/* Wa_14014475959 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;

/*
- * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
- * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
+ * Wa_14012197797
+ * Wa_22011391025
*
* The same WA bit is used for both and 22011391025 is applicable to
* all DG2.
@@ -292,22 +288,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
flags |= GUC_WA_DUAL_QUEUE;

/* Wa_22011802037: graphics version 11/12 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(gt->i915) >= 11 &&
- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(gt))
flags |= GUC_WA_PRE_PARSER;

- /* Wa_16011777198:dg2 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
- flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
-
/*
- * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..)
- * Wa_22012727685:dg2_g11[a0..)
+ * Wa_22012727170
+ * Wa_22012727685
*/
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
+ if (IS_DG2_G11(gt->i915))
flags |= GUC_WA_CONTEXT_ISOLATION;

/* Wa_16015675438 */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 836e4d9d65ef..b5de5a9f5967 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1690,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) {
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) {
intel_engine_stop_cs(engine);
intel_engine_wait_for_pending_mi_fw(engine);
}
@@ -4299,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)

/* Wa_14014475959:dg2 */
if (engine->class == COMPUTE_CLASS)
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(engine->i915))
engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4de44cf1026d..7a90a2e32c9f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -144,7 +144,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = obj_to_i915(obj);

- if (IS_METEORLAKE(i915)) {
+ if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) {
switch (obj->pat_index) {
case 0: return " WB";
case 1: return " WT";
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7a8ce7239bc9..e0e0493d6c1f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -658,10 +658,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \
(IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))

-#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \
- (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \
- IS_GRAPHICS_STEP(__i915, since, until))
-
#define IS_MTL_DISPLAY_STEP(__i915, since, until) \
(IS_METEORLAKE(__i915) && \
IS_DISPLAY_STEP(__i915, since, until))
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 8f4a25d2cfc2..3f90403d86cb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3255,11 +3255,10 @@ get_sseu_config(struct intel_sseu *out_sseu,
*/
u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
{
- /*
- * Wa_18013179988:dg2
- * Wa_14015846243:mtl
- */
- if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
+ struct intel_gt *gt = to_gt(i915);
+
+ /* Wa_18013179988 */
+ if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
intel_wakeref_t wakeref;
u32 reg, shift;

@@ -4564,7 +4563,7 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)

static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- if (IS_METEORLAKE(perf->i915))
+ if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
return reg_in_range_table(addr, mtl_oa_mux_regs);
else
return reg_in_range_table(addr, gen12_oa_mux_regs);
diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c
index 81a4d32734e9..c66eb6abd4a2 100644
--- a/drivers/gpu/drm/i915/intel_clock_gating.c
+++ b/drivers/gpu/drm/i915/intel_clock_gating.c
@@ -396,14 +396,6 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915)
/* Wa_22010954014:dg2 */
intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
SGSI_SIDECLK_DIS);
-
- /*
- * Wa_14010733611:dg2_g10
- * Wa_22010146351:dg2_g10
- */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
- intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
- SGR_DIS | SGGI_DIS);
}

static void pvc_init_clock_gating(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index aae780e4a4aa..2bbcdc649e86 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -804,15 +804,15 @@ op_remap(struct drm_gpuva_op_remap *r,
struct drm_gpuva_op_unmap *u = r->unmap;
struct nouveau_uvma *uvma = uvma_from_va(u->va);
u64 addr = uvma->va.va.addr;
- u64 range = uvma->va.va.range;
+ u64 end = uvma->va.va.addr + uvma->va.va.range;

if (r->prev)
addr = r->prev->va.addr + r->prev->va.range;

if (r->next)
- range = r->next->va.addr - addr;
+ end = r->next->va.addr;

- op_unmap_range(u, addr, range);
+ op_unmap_range(u, addr, end - addr);
}

static int
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index eca45b83e4e6..c067ff550692 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -387,19 +387,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)

gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "shader power transition timeout");

gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "tiler power transition timeout");

gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
- val, !val, 0, 1000);
+ val, !val, 0, 2000);
if (ret)
dev_err(pfdev->dev, "l2 power transition timeout");
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index e7cd27e387df..470add73f7bd 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4231,7 +4231,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
} else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
} else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
- if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
r = -EINVAL;
ti->error = "Invalid bitmap_flush_interval argument";
goto bad;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 5ad51271a534..b8fde22aebf9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -5386,8 +5386,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.family = MV88E6XXX_FAMILY_6250,
.name = "Marvell 88E6020",
.num_databases = 64,
- .num_ports = 4,
+ /* Ports 2-4 are not routed to pins
+ * => usable ports 0, 1, 5, 6
+ */
+ .num_ports = 7,
.num_internal_phys = 2,
+ .invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
.max_vid = 4095,
.port_base_addr = 0x8,
.phy_base_addr = 0x0,
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 833e55e4b961..52ddb4ef259e 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg)
return tmp & 0xffff;
}

-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd,
+int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg,
u16 val)
{
struct sja1105_mdio_private *mdio_priv = bus->priv;
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
index 9cae5a309000..b3d04f49f77e 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -391,7 +391,9 @@ static void umac_reset(struct bcmasp_intf *intf)
umac_wl(intf, 0x0, UMC_CMD);
umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD);
usleep_range(10, 100);
- umac_wl(intf, 0x0, UMC_CMD);
+ /* We hold the umac in reset and bring it out of
+ * reset when phy link is up.
+ */
}

static void umac_set_hw_addr(struct bcmasp_intf *intf,
@@ -411,6 +413,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask,
u32 reg;

reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ return;
if (enable)
reg |= mask;
else
@@ -429,7 +433,6 @@ static void umac_init(struct bcmasp_intf *intf)
umac_wl(intf, 0x800, UMC_FRM_LEN);
umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL);
umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ);
- umac_enable_set(intf, UMC_CMD_PROMISC, 1);
}

static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
@@ -656,6 +659,12 @@ static void bcmasp_adj_link(struct net_device *dev)
UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE |
UMC_CMD_TX_PAUSE_IGNORE);
reg |= cmd_bits;
+ if (reg & UMC_CMD_SW_RESET) {
+ reg &= ~UMC_CMD_SW_RESET;
+ umac_wl(intf, reg, UMC_CMD);
+ udelay(2);
+ reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ }
umac_wl(intf, reg, UMC_CMD);

intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
@@ -1061,9 +1070,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)

umac_init(intf);

- /* Disable the UniMAC RX/TX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0);
-
umac_set_hw_addr(intf, dev->dev_addr);

intf->old_duplex = -1;
@@ -1083,9 +1089,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)

bcmasp_enable_rx(intf, 1);

- /* Turn on UniMAC TX/RX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1);
-
intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD);

bcmasp_netif_start(dev);
@@ -1321,7 +1324,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf)
if (intf->wolopts & WAKE_FILTER)
bcmasp_netfilt_suspend(intf);

- /* UniMAC receive needs to be turned on */
+ /* Bring UniMAC out of reset if needed and enable RX */
+ reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ reg &= ~UMC_CMD_SW_RESET;
+
+ reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ umac_wl(intf, reg, UMC_CMD);
+
umac_enable_set(intf, UMC_CMD_RX_EN, 1);

if (intf->parent->wol_irq > 0) {
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 54da59286df4..7ca8cd78d557 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2381,8 +2381,6 @@ static int fec_enet_mii_probe(struct net_device *ndev)
fep->link = 0;
fep->full_duplex = 0;

- phy_dev->mac_managed_pm = true;
-
phy_attached_info(phy_dev);

return 0;
@@ -2394,10 +2392,12 @@ static int fec_enet_mii_init(struct platform_device *pdev)
struct net_device *ndev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(ndev);
bool suppress_preamble = false;
+ struct phy_device *phydev;
struct device_node *node;
int err = -ENXIO;
u32 mii_speed, holdtime;
u32 bus_freq;
+ int addr;

/*
* The i.MX28 dual fec interfaces are not equal.
@@ -2511,6 +2511,13 @@ static int fec_enet_mii_init(struct platform_device *pdev)
goto err_out_free_mdiobus;
of_node_put(node);

+ /* find all the PHY devices on the bus and set mac_managed_pm to true */
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ phydev = mdiobus_get_phy(fep->mii_bus, addr);
+ if (phydev)
+ phydev->mac_managed_pm = true;
+ }
+
mii_cnt++;

/* save fec0 mii_bus */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
index f3c9395d8351..618f66d9586b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
@@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS,
true);

- desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
+ desc.data[0] = cpu_to_le32(tqp->index);
ret = hclge_comm_cmd_send(hw, &desc, 1);
if (ret) {
dev_err(&hw->cmq.csq.pdev->dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 682239f33082..78181eea93c1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = {
#define HNS3_NIC_LB_TEST_NO_MEM_ERR 1
#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
+#define HNS3_NIC_LB_TEST_UNEXECUTED 4
+
+static int hns3_get_sset_count(struct net_device *netdev, int stringset);

static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
{
@@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev,
static void hns3_self_test(struct net_device *ndev,
struct ethtool_test *eth_test, u64 *data)
{
+ int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST);
struct hns3_nic_priv *priv = netdev_priv(ndev);
struct hnae3_handle *h = priv->ae_handle;
int st_param[HNAE3_LOOP_NONE][2];
bool if_running = netif_running(ndev);
+ int i;
+
+ /* initialize the loopback test result, avoid marking an unexcuted
+ * loopback test as PASS.
+ */
+ for (i = 0; i < cnt; i++)
+ data[i] = HNS3_NIC_LB_TEST_UNEXECUTED;

if (hns3_nic_resetting(ndev)) {
netdev_err(ndev, "dev resetting!");
- return;
+ goto failure;
}

if (!(eth_test->flags & ETH_TEST_FL_OFFLINE))
- return;
+ goto failure;

if (netif_msg_ifdown(h))
netdev_info(ndev, "self test start\n");
@@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev,

if (netif_msg_ifdown(h))
netdev_info(ndev, "self test end\n");
+ return;
+
+failure:
+ eth_test->flags |= ETH_TEST_FL_FAILED;
}

static void hns3_update_limit_promisc_mode(struct net_device *netdev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f1ca2cda2961..dfd0c5f4cb9f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11614,6 +11614,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
if (ret)
goto err_pci_uninit;

+ devl_lock(hdev->devlink);
+
/* Firmware command queue initialize */
ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw);
if (ret)
@@ -11793,6 +11795,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)

hclge_task_schedule(hdev, round_jiffies_relative(HZ));

+ devl_unlock(hdev->devlink);
return 0;

err_mdiobus_unreg:
@@ -11805,6 +11808,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
err_cmd_uninit:
hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw);
err_devlink_uninit:
+ devl_unlock(hdev->devlink);
hclge_devlink_uninit(hdev);
err_pci_uninit:
pcim_iounmap(pdev, hdev->hw.hw.io_base);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 4542e2bc28e8..f9328f2e669f 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -5,6 +5,7 @@
* Shared functions for accessing and configuring the MAC
*/

+#include <linux/bitfield.h>
#include "e1000.h"

static s32 e1000_check_downshift(struct e1000_hw *hw);
@@ -3260,8 +3261,7 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
return ret_val;

phy_info->mdix_mode =
- (e1000_auto_x_mode) ((phy_data & IGP01E1000_PSSR_MDIX) >>
- IGP01E1000_PSSR_MDIX_SHIFT);
+ (e1000_auto_x_mode)FIELD_GET(IGP01E1000_PSSR_MDIX, phy_data);

if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
IGP01E1000_PSSR_SPEED_1000MBPS) {
@@ -3272,11 +3272,11 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
if (ret_val)
return ret_val;

- phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
- SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
+ phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS,
+ phy_data) ?
e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
- phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
- SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
+ phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS,
+ phy_data) ?
e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;

/* Get cable length */
@@ -3326,14 +3326,12 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
return ret_val;

phy_info->extended_10bt_distance =
- ((phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >>
- M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT) ?
+ FIELD_GET(M88E1000_PSCR_10BT_EXT_DIST_ENABLE, phy_data) ?
e1000_10bt_ext_dist_enable_lower :
e1000_10bt_ext_dist_enable_normal;

phy_info->polarity_correction =
- ((phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >>
- M88E1000_PSCR_POLARITY_REVERSAL_SHIFT) ?
+ FIELD_GET(M88E1000_PSCR_POLARITY_REVERSAL, phy_data) ?
e1000_polarity_reversal_disabled : e1000_polarity_reversal_enabled;

/* Check polarity status */
@@ -3347,27 +3345,25 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
return ret_val;

phy_info->mdix_mode =
- (e1000_auto_x_mode) ((phy_data & M88E1000_PSSR_MDIX) >>
- M88E1000_PSSR_MDIX_SHIFT);
+ (e1000_auto_x_mode)FIELD_GET(M88E1000_PSSR_MDIX, phy_data);

if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
/* Cable Length Estimation and Local/Remote Receiver Information
* are only valid at 1000 Mbps.
*/
phy_info->cable_length =
- (e1000_cable_length) ((phy_data &
- M88E1000_PSSR_CABLE_LENGTH) >>
- M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+ (e1000_cable_length)FIELD_GET(M88E1000_PSSR_CABLE_LENGTH,
+ phy_data);

ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
if (ret_val)
return ret_val;

- phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
- SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
+ phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS,
+ phy_data) ?
e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
- phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
- SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
+ phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS,
+ phy_data) ?
e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
}

@@ -3515,7 +3511,7 @@ s32 e1000_init_eeprom_params(struct e1000_hw *hw)
if (ret_val)
return ret_val;
eeprom_size =
- (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT;
+ FIELD_GET(EEPROM_SIZE_MASK, eeprom_size);
/* 256B eeprom size was not supported in earlier hardware, so we
* bump eeprom_size up one to ensure that "1" (which maps to
* 256B) is never the result used in the shifting logic below.
@@ -4891,8 +4887,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
&phy_data);
if (ret_val)
return ret_val;
- cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
- M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+ cable_length = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);

/* Convert the enum value to ranged values */
switch (cable_length) {
@@ -5001,8 +4996,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw,
&phy_data);
if (ret_val)
return ret_val;
- *polarity = ((phy_data & M88E1000_PSSR_REV_POLARITY) >>
- M88E1000_PSSR_REV_POLARITY_SHIFT) ?
+ *polarity = FIELD_GET(M88E1000_PSSR_REV_POLARITY, phy_data) ?
e1000_rev_polarity_reversed : e1000_rev_polarity_normal;

} else if (hw->phy_type == e1000_phy_igp) {
@@ -5072,8 +5066,8 @@ static s32 e1000_check_downshift(struct e1000_hw *hw)
if (ret_val)
return ret_val;

- hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >>
- M88E1000_PSSR_DOWNSHIFT_SHIFT;
+ hw->speed_downgraded = FIELD_GET(M88E1000_PSSR_DOWNSHIFT,
+ phy_data);
}

return E1000_SUCCESS;
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index be9c695dde12..c51fb6bf9c4e 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -92,8 +92,7 @@ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw)

nvm->type = e1000_nvm_eeprom_spi;

- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
- E1000_EECD_SIZE_EX_SHIFT);
+ size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);

/* Added to a constant, "size" becomes the left-shift value
* for setting word_size.
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 0b1e890dd583..969f855a79ee 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -157,8 +157,7 @@ static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw)
fallthrough;
default:
nvm->type = e1000_nvm_eeprom_spi;
- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
- E1000_EECD_SIZE_EX_SHIFT);
+ size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);
/* Added to a constant, "size" becomes the left-shift value
* for setting word_size.
*/
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 9835e6a90d56..fc0f98ea6133 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -654,8 +654,8 @@ static void e1000_get_drvinfo(struct net_device *netdev,
*/
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d-%d",
- (adapter->eeprom_vers & 0xF000) >> 12,
- (adapter->eeprom_vers & 0x0FF0) >> 4,
+ FIELD_GET(0xF000, adapter->eeprom_vers),
+ FIELD_GET(0x0FF0, adapter->eeprom_vers),
(adapter->eeprom_vers & 0x000F));

strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
@@ -925,8 +925,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
}

if (mac->type >= e1000_pch_lpt)
- wlock_mac = (er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK) >>
- E1000_FWSM_WLOCK_MAC_SHIFT;
+ wlock_mac = FIELD_GET(E1000_FWSM_WLOCK_MAC_MASK, er32(FWSM));

for (i = 0; i < mac->rar_entry_count; i++) {
if (mac->type >= e1000_pch_lpt) {
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 1fef6bb5a5fb..4b6e7536170a 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -628,6 +628,7 @@ struct e1000_phy_info {
u32 id;
u32 reset_delay_us; /* in usec */
u32 revision;
+ u32 retry_count;

enum e1000_media_type media_type;

@@ -644,6 +645,7 @@ struct e1000_phy_info {
bool polarity_correction;
bool speed_downgraded;
bool autoneg_wait_to_complete;
+ bool retry_enabled;
};

struct e1000_nvm_info {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 39e9fc601bf5..4d83c9a0c023 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -222,11 +222,18 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
if (hw->mac.type >= e1000_pch_lpt) {
/* Only unforce SMBus if ME is not active */
if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);

+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
@@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
goto out;
}

+ /* There is no guarantee that the PHY is accessible at this time
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* The MAC-PHY interconnect may be in SMBus mode. If the PHY is
* inaccessible and resetting the PHY is not blocked, toggle the
* LANPHYPC Value bit to force the interconnect to PCIe mode.
@@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
break;
}

+ e1000e_enable_phy_retry(hw);
+
hw->phy.ops.release(hw);
if (!ret_val) {

@@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)

phy->id = e1000_phy_unknown;

+ if (hw->mac.type == e1000_pch_mtp) {
+ phy->retry_count = 2;
+ e1000e_enable_phy_retry(hw);
+ }
+
ret_val = e1000_init_phy_workarounds_pchlan(hw);
if (ret_val)
return ret_val;
@@ -1072,13 +1091,11 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)

lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) *
(1U << (E1000_LTRV_SCALE_FACTOR *
- ((lat_enc & E1000_LTRV_SCALE_MASK)
- >> E1000_LTRV_SCALE_SHIFT)));
+ FIELD_GET(E1000_LTRV_SCALE_MASK, lat_enc)));

max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) *
- (1U << (E1000_LTRV_SCALE_FACTOR *
- ((max_ltr_enc & E1000_LTRV_SCALE_MASK)
- >> E1000_LTRV_SCALE_SHIFT)));
+ (1U << (E1000_LTRV_SCALE_FACTOR *
+ FIELD_GET(E1000_LTRV_SCALE_MASK, max_ltr_enc)));

if (lat_enc_d > max_ltr_enc_d)
lat_enc = max_ltr_enc;
@@ -1148,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (ret_val)
goto out;

- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val)
- goto release;
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
-
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
@@ -1315,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
/* Toggle LANPHYPC Value bit */
e1000_toggle_lanphypc_pch_lpt(hw);

+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
if (ret_val) {
@@ -1335,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);

+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
@@ -2075,8 +2087,7 @@ static s32 e1000_write_smbus_addr(struct e1000_hw *hw)
{
u16 phy_data;
u32 strap = er32(STRAP);
- u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >>
- E1000_STRAP_SMT_FREQ_SHIFT;
+ u32 freq = FIELD_GET(E1000_STRAP_SMT_FREQ_MASK, strap);
s32 ret_val;

strap &= E1000_STRAP_SMBUS_ADDRESS_MASK;
@@ -2562,8 +2573,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw)
hw->phy.ops.write_reg_page(hw, BM_RAR_H(i),
(u16)(mac_reg & 0xFFFF));
hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i),
- (u16)((mac_reg & E1000_RAH_AV)
- >> 16));
+ FIELD_GET(E1000_RAH_AV, mac_reg));
}

e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
@@ -3205,7 +3215,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
&nvm_dword);
if (ret_val)
return ret_val;
- sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+ sig_byte = FIELD_GET(0xFF00, nvm_dword);
if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
E1000_ICH_NVM_SIG_VALUE) {
*bank = 0;
@@ -3218,7 +3228,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
&nvm_dword);
if (ret_val)
return ret_val;
- sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+ sig_byte = FIELD_GET(0xFF00, nvm_dword);
if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
E1000_ICH_NVM_SIG_VALUE) {
*bank = 1;
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index 5df7ad93f3d7..30515bfb259e 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -52,7 +52,7 @@ void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
* for the device regardless of function swap state.
*/
reg = er32(STATUS);
- bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
+ bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg);
}

/**
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index f536c856727c..3692fce20195 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1788,8 +1788,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data)
adapter->corr_errors +=
pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
adapter->uncorr_errors +=
- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);

/* Do the reset outside of interrupt context */
schedule_work(&adapter->reset_task);
@@ -1868,8 +1867,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data)
adapter->corr_errors +=
pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
adapter->uncorr_errors +=
- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);

/* Do the reset outside of interrupt context */
schedule_work(&adapter->reset_task);
@@ -5031,8 +5029,7 @@ static void e1000e_update_stats(struct e1000_adapter *adapter)
adapter->corr_errors +=
pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
adapter->uncorr_errors +=
- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);
}
}

@@ -6249,7 +6246,7 @@ static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc)
phy_reg |= BM_RCTL_MPE;
phy_reg &= ~(BM_RCTL_MO_MASK);
if (mac_reg & E1000_RCTL_MO_3)
- phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
+ phy_reg |= (FIELD_GET(E1000_RCTL_MO_3, mac_reg)
<< BM_RCTL_MO_SHIFT);
if (mac_reg & E1000_RCTL_BAM)
phy_reg |= BM_RCTL_BAM;
@@ -6626,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, ctrl_ext, rctl, status, wufc;
int retval = 0;
+ u16 smb_ctrl;

/* Runtime suspend should only enable wakeup for link changes */
if (runtime)
@@ -6691,14 +6689,31 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
if (adapter->hw.phy.type == e1000_phy_igp_3) {
e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
} else if (hw->mac.type >= e1000_pch_lpt) {
- if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC)))
+ if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) {
/* ULP does not support wake from unicast, multicast
* or broadcast.
*/
retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
+ if (retval)
+ return retval;
+ }
+
+ /* Force SMBUS to allow WOL */
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
+ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);

- if (retval)
- return retval;
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in MAC */
+ ctrl_ext = er32(CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, ctrl_ext);
}

/* Ensure that the appropriate bits are set in LPI_CTRL
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 08c3d477dd6f..395746bcf8f7 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
}

+void e1000e_disable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = false;
+}
+
+void e1000e_enable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = true;
+}
+
/**
* e1000e_read_phy_reg_mdic - Read MDI control register
* @hw: pointer to the HW structure
@@ -118,57 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
**/
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;

if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}

+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = ((offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_READ));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;

- ew32(MDIC, mdic);
+ mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_READ));

- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
- e_dbg("MDI Read offset error - requested %d, returned %d\n",
- offset,
- (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
- return -E1000_ERR_PHY;
- }
- *data = (u16)mdic;
+ ew32(MDIC, mdic);

- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Read PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Read offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }

- return 0;
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success) {
+ *data = (u16)mdic;
+ return 0;
+ }
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
+ }
+
+ return -E1000_ERR_PHY;
}

/**
@@ -181,57 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
**/
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;

if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}

+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = (((u32)data) |
- (offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_WRITE));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;

- ew32(MDIC, mdic);
+ mdic = (((u32)data) |
+ (offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_WRITE));

- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Write PHY Red Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
- e_dbg("MDI Write offset error - requested %d, returned %d\n",
- offset,
- (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
- return -E1000_ERR_PHY;
- }
+ ew32(MDIC, mdic);

- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Write PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Write PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Write offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }

- return 0;
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success)
+ return 0;
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
+ }
+
+ return -E1000_ERR_PHY;
}

/**
@@ -1793,8 +1834,7 @@ s32 e1000e_get_cable_length_m88(struct e1000_hw *hw)
if (ret_val)
return ret_val;

- index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
- M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);

if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
return -E1000_ERR_PHY;
@@ -3234,8 +3274,7 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
if (ret_val)
return ret_val;

- length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
- I82577_DSTATUS_CABLE_LENGTH_SHIFT);
+ length = FIELD_GET(I82577_DSTATUS_CABLE_LENGTH, phy_data);

if (length == E1000_CABLE_LENGTH_UNDEFINED)
return -E1000_ERR_PHY;
diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
index c48777d09523..049bb325b4b1 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.h
+++ b/drivers/net/ethernet/intel/e1000e/phy.h
@@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
void e1000_power_up_phy_copper(struct e1000_hw *hw);
void e1000_power_down_phy_copper(struct e1000_hw *hw);
+void e1000e_disable_phy_retry(struct e1000_hw *hw);
+void e1000e_enable_phy_retry(struct e1000_hw *hw);
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index af1b0cde3670..aed5e0bf6313 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2019 Intel Corporation. */

+#include <linux/bitfield.h>
#include "fm10k_pf.h"
#include "fm10k_vf.h"

@@ -1575,8 +1576,7 @@ static s32 fm10k_get_fault_pf(struct fm10k_hw *hw, int type,
if (func & FM10K_FAULT_FUNC_PF)
fault->func = 0;
else
- fault->func = 1 + ((func & FM10K_FAULT_FUNC_VF_MASK) >>
- FM10K_FAULT_FUNC_VF_SHIFT);
+ fault->func = 1 + FIELD_GET(FM10K_FAULT_FUNC_VF_MASK, func);

/* record fault type */
fault->type = func & FM10K_FAULT_FUNC_TYPE_MASK;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index dc8ccd378ec9..7fb1961f2921 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2019 Intel Corporation. */

+#include <linux/bitfield.h>
#include "fm10k_vf.h"

/**
@@ -126,15 +127,14 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
hw->mac.max_queues = i;

/* fetch default VLAN and ITR scale */
- hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) &
- FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT;
+ hw->mac.default_vid = FIELD_GET(FM10K_TXQCTL_VID_MASK,
+ fm10k_read_reg(hw, FM10K_TXQCTL(0)));
/* Read the ITR scale from TDLEN. See the definition of
* FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is
* used here.
*/
- hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) &
- FM10K_TDLEN_ITR_SCALE_MASK) >>
- FM10K_TDLEN_ITR_SCALE_SHIFT;
+ hw->mac.itr_scale = FIELD_GET(FM10K_TDLEN_ITR_SCALE_MASK,
+ fm10k_read_reg(hw, FM10K_TDLEN(0)));

return 0;

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 55bb0b5310d5..3e6839ac1f0f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -4,47 +4,20 @@
#ifndef _I40E_H_
#define _I40E_H_

-#include <net/tcp.h>
-#include <net/udp.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/ioport.h>
-#include <linux/iommu.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/hashtable.h>
-#include <linux/string.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <linux/pkt_sched.h>
-#include <linux/ipv6.h>
-#include <net/checksum.h>
-#include <net/ip6_checksum.h>
#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/if_macvlan.h>
-#include <linux/if_bridge.h>
-#include <linux/clocksource.h>
-#include <linux/net_tstamp.h>
+#include <linux/pci.h>
#include <linux/ptp_clock_kernel.h>
+#include <linux/types.h>
+#include <linux/avf/virtchnl.h>
+#include <linux/net/intel/i40e_client.h>
#include <net/pkt_cls.h>
-#include <net/pkt_sched.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_mirred.h>
#include <net/udp_tunnel.h>
-#include <net/xdp_sock.h>
-#include <linux/bitfield.h>
-#include "i40e_type.h"
+#include "i40e_dcb.h"
+#include "i40e_debug.h"
+#include "i40e_io.h"
#include "i40e_prototype.h"
-#include <linux/net/intel/i40e_client.h>
-#include <linux/avf/virtchnl.h>
-#include "i40e_virtchnl_pf.h"
+#include "i40e_register.h"
#include "i40e_txrx.h"
-#include "i40e_dcb.h"

/* Useful i40e defaults */
#define I40E_MAX_VEB 16
@@ -108,7 +81,7 @@
#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */

/* driver state flags */
-enum i40e_state_t {
+enum i40e_state {
__I40E_TESTING,
__I40E_CONFIG_BUSY,
__I40E_CONFIG_DONE,
@@ -156,7 +129,7 @@ enum i40e_state_t {
BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED)

/* VSI state flags */
-enum i40e_vsi_state_t {
+enum i40e_vsi_state {
__I40E_VSI_DOWN,
__I40E_VSI_NEEDS_RESTART,
__I40E_VSI_SYNCING_FILTERS,
@@ -992,6 +965,7 @@ struct i40e_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
+ bool in_busy_poll;
int irq_num; /* IRQ assigned to this q_vector */
} ____cacheline_internodealigned_in_smp;

@@ -1321,4 +1295,15 @@ static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
return pf->flags & I40E_FLAG_TC_MQPRIO;
}

+/**
+ * i40e_hw_to_pf - get pf pointer from the hardware structure
+ * @hw: pointer to the device HW structure
+ **/
+static inline struct i40e_pf *i40e_hw_to_pf(struct i40e_hw *hw)
+{
+ return container_of(hw, struct i40e_pf, hw);
+}
+
+struct device *i40e_hw_to_dev(struct i40e_hw *hw);
+
#endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 100eb77b8dfe..9ce6e633cc2f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

-#include "i40e_type.h"
+#include <linux/delay.h>
+#include "i40e_alloc.h"
#include "i40e_register.h"
-#include "i40e_adminq.h"
#include "i40e_prototype.h"

static void i40e_resume_aq(struct i40e_hw *hw);
@@ -51,7 +51,6 @@ static int i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
int ret_code;

ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
- i40e_mem_atq_ring,
(hw->aq.num_asq_entries *
sizeof(struct i40e_aq_desc)),
I40E_ADMINQ_DESC_ALIGNMENT);
@@ -78,7 +77,6 @@ static int i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
int ret_code;

ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
- i40e_mem_arq_ring,
(hw->aq.num_arq_entries *
sizeof(struct i40e_aq_desc)),
I40E_ADMINQ_DESC_ALIGNMENT);
@@ -136,7 +134,6 @@ static int i40e_alloc_arq_bufs(struct i40e_hw *hw)
for (i = 0; i < hw->aq.num_arq_entries; i++) {
bi = &hw->aq.arq.r.arq_bi[i];
ret_code = i40e_allocate_dma_mem(hw, bi,
- i40e_mem_arq_buf,
hw->aq.arq_buf_size,
I40E_ADMINQ_DESC_ALIGNMENT);
if (ret_code)
@@ -198,7 +195,6 @@ static int i40e_alloc_asq_bufs(struct i40e_hw *hw)
for (i = 0; i < hw->aq.num_asq_entries; i++) {
bi = &hw->aq.asq.r.asq_bi[i];
ret_code = i40e_allocate_dma_mem(hw, bi,
- i40e_mem_asq_buf,
hw->aq.asq_buf_size,
I40E_ADMINQ_DESC_ALIGNMENT);
if (ret_code)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index 267f2e0a21ce..80125bea80a2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -4,7 +4,8 @@
#ifndef _I40E_ADMINQ_H_
#define _I40E_ADMINQ_H_

-#include "i40e_osdep.h"
+#include <linux/mutex.h>
+#include "i40e_alloc.h"
#include "i40e_adminq_cmd.h"

#define I40E_ADMINQ_DESC(R, i) \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 3357d65a906b..18a1c3b6d72c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -4,6 +4,8 @@
#ifndef _I40E_ADMINQ_CMD_H_
#define _I40E_ADMINQ_CMD_H_

+#include <linux/bits.h>
+
/* This header file defines the i40e Admin Queue commands and is shared between
* i40e Firmware and Software.
*
diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
index a6c9a9e343d1..e0dde326255d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
@@ -4,25 +4,25 @@
#ifndef _I40E_ALLOC_H_
#define _I40E_ALLOC_H_

+#include <linux/types.h>
+
struct i40e_hw;

-/* Memory allocation types */
-enum i40e_memory_type {
- i40e_mem_arq_buf = 0, /* ARQ indirect command buffer */
- i40e_mem_asq_buf = 1,
- i40e_mem_atq_buf = 2, /* ATQ indirect command buffer */
- i40e_mem_arq_ring = 3, /* ARQ descriptor ring */
- i40e_mem_atq_ring = 4, /* ATQ descriptor ring */
- i40e_mem_pd = 5, /* Page Descriptor */
- i40e_mem_bp = 6, /* Backing Page - 4KB */
- i40e_mem_bp_jumbo = 7, /* Backing Page - > 4KB */
- i40e_mem_reserved
+/* memory allocation tracking */
+struct i40e_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+};
+
+struct i40e_virt_mem {
+ void *va;
+ u32 size;
};

/* prototype for functions used for dynamic memory allocation */
int i40e_allocate_dma_mem(struct i40e_hw *hw,
struct i40e_dma_mem *mem,
- enum i40e_memory_type type,
u64 size, u32 alignment);
int i40e_free_dma_mem(struct i40e_hw *hw,
struct i40e_dma_mem *mem);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 639c5a1ca853..306758428aef 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -6,7 +6,6 @@
#include <linux/net/intel/i40e_client.h>

#include "i40e.h"
-#include "i40e_prototype.h"

static LIST_HEAD(i40e_devices);
static DEFINE_MUTEX(i40e_device_mutex);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 1b493854f522..4d7caa119971 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1,11 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2021 Intel Corporation. */

-#include "i40e.h"
-#include "i40e_type.h"
-#include "i40e_adminq.h"
-#include "i40e_prototype.h"
#include <linux/avf/virtchnl.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include "i40e_adminq_cmd.h"
+#include "i40e_devids.h"
+#include "i40e_prototype.h"
+#include "i40e_register.h"

/**
* i40e_set_mac_type - Sets MAC type
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index f81e744c0fb3..d57dd30b024f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2021 Intel Corporation. */

+#include <linux/bitfield.h>
#include "i40e_adminq.h"
-#include "i40e_prototype.h"
+#include "i40e_alloc.h"
#include "i40e_dcb.h"
+#include "i40e_prototype.h"

/**
* i40e_get_dcbx_status
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 195421d863ab..077a95dad32c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -2,8 +2,8 @@
/* Copyright(c) 2013 - 2021 Intel Corporation. */

#ifdef CONFIG_I40E_DCB
-#include "i40e.h"
#include <net/dcbnl.h>
+#include "i40e.h"

#define I40E_DCBNL_STATUS_SUCCESS 0
#define I40E_DCBNL_STATUS_ERROR 1
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
index 0e72abd178ae..21b3518c4096 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

+#include <linux/firmware.h>
#include "i40e.h"

-#include <linux/firmware.h>

/**
* i40e_ddp_profiles_eq - checks if DDP profiles are the equivalent
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debug.h b/drivers/net/ethernet/intel/i40e/i40e_debug.h
new file mode 100644
index 000000000000..27ebc72d8bfe
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_debug.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Intel Corporation. */
+
+#ifndef _I40E_DEBUG_H_
+#define _I40E_DEBUG_H_
+
+#include <linux/dev_printk.h>
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+enum i40e_debug_mask {
+ I40E_DEBUG_INIT = 0x00000001,
+ I40E_DEBUG_RELEASE = 0x00000002,
+
+ I40E_DEBUG_LINK = 0x00000010,
+ I40E_DEBUG_PHY = 0x00000020,
+ I40E_DEBUG_HMC = 0x00000040,
+ I40E_DEBUG_NVM = 0x00000080,
+ I40E_DEBUG_LAN = 0x00000100,
+ I40E_DEBUG_FLOW = 0x00000200,
+ I40E_DEBUG_DCB = 0x00000400,
+ I40E_DEBUG_DIAG = 0x00000800,
+ I40E_DEBUG_FD = 0x00001000,
+ I40E_DEBUG_PACKAGE = 0x00002000,
+ I40E_DEBUG_IWARP = 0x00F00000,
+ I40E_DEBUG_AQ_MESSAGE = 0x01000000,
+ I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000,
+ I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000,
+ I40E_DEBUG_AQ_COMMAND = 0x06000000,
+ I40E_DEBUG_AQ = 0x0F000000,
+
+ I40E_DEBUG_USER = 0xF0000000,
+
+ I40E_DEBUG_ALL = 0xFFFFFFFF
+};
+
+struct i40e_hw;
+struct device *i40e_hw_to_dev(struct i40e_hw *hw);
+
+#define hw_dbg(hw, S, A...) dev_dbg(i40e_hw_to_dev(hw), S, ##A)
+
+#define i40e_debug(h, m, s, ...) \
+do { \
+ if (((m) & (h)->debug_mask)) \
+ dev_info(i40e_hw_to_dev(hw), s, ##__VA_ARGS__); \
+} while (0)
+
+#endif /* _I40E_DEBUG_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 1a497cb07710..999c9708def5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -5,8 +5,9 @@

#include <linux/fs.h>
#include <linux/debugfs.h>
-
+#include <linux/if_bridge.h>
#include "i40e.h"
+#include "i40e_virtchnl_pf.h"

static struct dentry *i40e_dbg_root;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
index c3ce5f35211f..ece3a6b9a5c6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
@@ -4,7 +4,10 @@
#ifndef _I40E_DIAG_H_
#define _I40E_DIAG_H_

-#include "i40e_type.h"
+#include "i40e_adminq_cmd.h"
+
+/* forward-declare the HW struct for the compiler */
+struct i40e_hw;

enum i40e_lb_mode {
I40E_LB_MODE_NONE = 0x0,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index bd1321bf7e26..4e90570ba780 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -3,9 +3,10 @@

/* ethtool support for i40e */

-#include "i40e.h"
+#include "i40e_devids.h"
#include "i40e_diag.h"
#include "i40e_txrx_common.h"
+#include "i40e_virtchnl_pf.h"

/* ethtool statistics helpers */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
index 96ee63aca7a1..1742624ca62e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
@@ -1,10 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

-#include "i40e.h"
-#include "i40e_osdep.h"
-#include "i40e_register.h"
#include "i40e_alloc.h"
+#include "i40e_debug.h"
#include "i40e_hmc.h"
#include "i40e_type.h"

@@ -22,7 +20,6 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw,
enum i40e_sd_entry_type type,
u64 direct_mode_sz)
{
- enum i40e_memory_type mem_type __attribute__((unused));
struct i40e_hmc_sd_entry *sd_entry;
bool dma_mem_alloc_done = false;
struct i40e_dma_mem mem;
@@ -43,16 +40,13 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw,

sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
if (!sd_entry->valid) {
- if (I40E_SD_TYPE_PAGED == type) {
- mem_type = i40e_mem_pd;
+ if (type == I40E_SD_TYPE_PAGED)
alloc_len = I40E_HMC_PAGED_BP_SIZE;
- } else {
- mem_type = i40e_mem_bp_jumbo;
+ else
alloc_len = direct_mode_sz;
- }

/* allocate a 4K pd page or 2M backing page */
- ret_code = i40e_allocate_dma_mem(hw, &mem, mem_type, alloc_len,
+ ret_code = i40e_allocate_dma_mem(hw, &mem, alloc_len,
I40E_HMC_PD_BP_BUF_ALIGNMENT);
if (ret_code)
goto exit;
@@ -140,7 +134,7 @@ int i40e_add_pd_table_entry(struct i40e_hw *hw,
page = rsrc_pg;
} else {
/* allocate a 4K backing page */
- ret_code = i40e_allocate_dma_mem(hw, page, i40e_mem_bp,
+ ret_code = i40e_allocate_dma_mem(hw, page,
I40E_HMC_PAGED_BP_SIZE,
I40E_HMC_PD_BP_BUF_ALIGNMENT);
if (ret_code)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
index 9960da07a573..480e3a883cc7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
@@ -4,6 +4,10 @@
#ifndef _I40E_HMC_H_
#define _I40E_HMC_H_

+#include "i40e_alloc.h"
+#include "i40e_io.h"
+#include "i40e_register.h"
+
#define I40E_HMC_MAX_BP_COUNT 512

/* forward-declare the HW struct for the compiler */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_io.h b/drivers/net/ethernet/intel/i40e/i40e_io.h
new file mode 100644
index 000000000000..2a2ed9a1d476
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_io.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Intel Corporation. */
+
+#ifndef _I40E_IO_H_
+#define _I40E_IO_H_
+
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg) readl((a)->hw_addr + (reg))
+
+#define rd64(a, reg) readq((a)->hw_addr + (reg))
+#define i40e_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT)
+
+#endif /* _I40E_IO_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index 474365bf0648..beaaf5c309d5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -1,13 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

-#include "i40e.h"
-#include "i40e_osdep.h"
-#include "i40e_register.h"
-#include "i40e_type.h"
-#include "i40e_hmc.h"
+#include "i40e_alloc.h"
+#include "i40e_debug.h"
#include "i40e_lan_hmc.h"
-#include "i40e_prototype.h"
+#include "i40e_type.h"

/* lan specific interface functions */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
index 9f960404c2b3..305a276953b0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
@@ -4,6 +4,8 @@
#ifndef _I40E_LAN_HMC_H_
#define _I40E_LAN_HMC_H_

+#include "i40e_hmc.h"
+
/* forward-declare the HW struct for the compiler */
struct i40e_hw;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ae32e83a6990..a21fc92aa272 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1,19 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2021 Intel Corporation. */

-#include <linux/etherdevice.h>
-#include <linux/of_net.h>
-#include <linux/pci.h>
-#include <linux/bpf.h>
#include <generated/utsrelease.h>
#include <linux/crash_dump.h>
+#include <linux/if_bridge.h>
+#include <linux/if_macvlan.h>
+#include <linux/module.h>
+#include <net/pkt_cls.h>
+#include <net/xdp_sock_drv.h>

/* Local includes */
#include "i40e.h"
+#include "i40e_devids.h"
#include "i40e_diag.h"
+#include "i40e_lan_hmc.h"
+#include "i40e_virtchnl_pf.h"
#include "i40e_xsk.h"
-#include <net/udp_tunnel.h>
-#include <net/xdp_sock_drv.h>
+
/* All i40e tracepoints are defined by the include below, which
* must be included exactly once across the whole kernel with
* CREATE_TRACE_POINTS defined
@@ -126,16 +129,27 @@ static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
}

/**
- * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * i40e_hw_to_dev - get device pointer from the hardware structure
+ * @hw: pointer to the device HW structure
+ **/
+struct device *i40e_hw_to_dev(struct i40e_hw *hw)
+{
+ struct i40e_pf *pf = i40e_hw_to_pf(hw);
+
+ return &pf->pdev->dev;
+}
+
+/**
+ * i40e_allocate_dma_mem - OS specific memory alloc for shared code
* @hw: pointer to the HW structure
* @mem: ptr to mem struct to fill out
* @size: size of memory requested
* @alignment: what to align the allocation to
**/
-int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
- u64 size, u32 alignment)
+int i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+ u64 size, u32 alignment)
{
- struct i40e_pf *pf = (struct i40e_pf *)hw->back;
+ struct i40e_pf *pf = i40e_hw_to_pf(hw);

mem->size = ALIGN(size, alignment);
mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa,
@@ -147,13 +161,13 @@ int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
}

/**
- * i40e_free_dma_mem_d - OS specific memory free for shared code
+ * i40e_free_dma_mem - OS specific memory free for shared code
* @hw: pointer to the HW structure
* @mem: ptr to mem struct to free
**/
-int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+int i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem)
{
- struct i40e_pf *pf = (struct i40e_pf *)hw->back;
+ struct i40e_pf *pf = i40e_hw_to_pf(hw);

dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa);
mem->va = NULL;
@@ -164,13 +178,13 @@ int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
}

/**
- * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code
+ * i40e_allocate_virt_mem - OS specific memory alloc for shared code
* @hw: pointer to the HW structure
* @mem: ptr to mem struct to fill out
* @size: size of memory requested
**/
-int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
- u32 size)
+int i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem,
+ u32 size)
{
mem->size = size;
mem->va = kzalloc(size, GFP_KERNEL);
@@ -182,11 +196,11 @@ int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
}

/**
- * i40e_free_virt_mem_d - OS specific memory free for shared code
+ * i40e_free_virt_mem - OS specific memory free for shared code
* @hw: pointer to the HW structure
* @mem: ptr to mem struct to free
**/
-int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
+int i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem)
{
/* it's ok to kfree a NULL pointer */
kfree(mem->va);
@@ -1249,8 +1263,11 @@ int i40e_count_filters(struct i40e_vsi *vsi)
int bkt;
int cnt = 0;

- hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
- ++cnt;
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+ if (f->state == I40E_FILTER_NEW ||
+ f->state == I40E_FILTER_ACTIVE)
+ ++cnt;
+ }

return cnt;
}
@@ -3905,6 +3922,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
q_vector->tx.target_itr >> 1);
q_vector->tx.current_itr = q_vector->tx.target_itr;

+ /* Set ITR for software interrupts triggered after exiting
+ * busy-loop polling.
+ */
+ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
+ I40E_ITR_20K);
+
wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit));

@@ -15644,10 +15667,10 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
**/
static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw)
{
- struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev;
+ struct i40e_pf *pf = i40e_hw_to_pf(hw);

- hw->subsystem_device_id = pdev->subsystem_device ?
- pdev->subsystem_device :
+ hw->subsystem_device_id = pf->pdev->subsystem_device ?
+ pf->pdev->subsystem_device :
(ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX);
}

@@ -15717,7 +15740,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
set_bit(__I40E_DOWN, pf->state);

hw = &pf->hw;
- hw->back = pf;

pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
I40E_MAX_CSR_SPACE);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 07a46adeab38..e5aec09d58e2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include "i40e_alloc.h"
#include "i40e_prototype.h"

/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
deleted file mode 100644
index 2bd4de03dafa..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_OSDEP_H_
-#define _I40E_OSDEP_H_
-
-#include <linux/types.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/tcp.h>
-#include <linux/pci.h>
-#include <linux/highuid.h>
-
-/* get readq/writeq support for 32 bit kernels, use the low-first version */
-#include <linux/io-64-nonatomic-lo-hi.h>
-
-/* File to be the magic between shared code and
- * actual OS primitives
- */
-
-#define hw_dbg(hw, S, A...) \
-do { \
- dev_dbg(&((struct i40e_pf *)hw->back)->pdev->dev, S, ##A); \
-} while (0)
-
-#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
-#define rd32(a, reg) readl((a)->hw_addr + (reg))
-
-#define rd64(a, reg) readq((a)->hw_addr + (reg))
-#define i40e_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT)
-
-/* memory allocation tracking */
-struct i40e_dma_mem {
- void *va;
- dma_addr_t pa;
- u32 size;
-};
-
-#define i40e_allocate_dma_mem(h, m, unused, s, a) \
- i40e_allocate_dma_mem_d(h, m, s, a)
-#define i40e_free_dma_mem(h, m) i40e_free_dma_mem_d(h, m)
-
-struct i40e_virt_mem {
- void *va;
- u32 size;
-};
-
-#define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s)
-#define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m)
-
-#define i40e_debug(h, m, s, ...) \
-do { \
- if (((m) & (h)->debug_mask)) \
- pr_info("i40e %02x:%02x.%x " s, \
- (h)->bus.bus_id, (h)->bus.device, \
- (h)->bus.func, ##__VA_ARGS__); \
-} while (0)
-
-#endif /* _I40E_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 3eeee224f1fb..2001fefa0c52 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -4,9 +4,9 @@
#ifndef _I40E_PROTOTYPE_H_
#define _I40E_PROTOTYPE_H_

-#include "i40e_type.h"
-#include "i40e_alloc.h"
#include <linux/avf/virtchnl.h>
+#include "i40e_debug.h"
+#include "i40e_type.h"

/* Prototypes for shared code functions that are not in
* the standard function pointer structures. These are
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 8a26811140b4..65c714d0bfff 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -1,9 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

-#include "i40e.h"
#include <linux/ptp_classify.h>
#include <linux/posix-clock.h>
+#include "i40e.h"
+#include "i40e_devids.h"

/* The XL710 timesync is very much like Intel's 82599 design when it comes to
* the fundamental clock design. However, the clock operations are much simpler
@@ -34,7 +35,7 @@ enum i40e_ptp_pin {
GPIO_4
};

-enum i40e_can_set_pins_t {
+enum i40e_can_set_pins {
CANT_DO_PINS = -1,
CAN_SET_PINS,
CAN_DO_PINS
@@ -192,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
* return CAN_DO_PINS if pins can be manipulated within a NIC or
* return CANT_DO_PINS otherwise.
**/
-static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
+static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf)
{
if (!i40e_is_ptp_pin_dev(&pf->hw)) {
dev_warn(&pf->pdev->dev,
@@ -1070,7 +1071,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
static int i40e_ptp_set_pins(struct i40e_pf *pf,
struct i40e_ptp_pins_settings *pins)
{
- enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
+ enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf);
int i = 0;

if (pin_caps == CANT_DO_PINS)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 7339003aa17c..989c18682473 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -202,7 +202,9 @@
#define I40E_GLGEN_MSCA_DEVADD_SHIFT 16
#define I40E_GLGEN_MSCA_PHYADD_SHIFT 21
#define I40E_GLGEN_MSCA_OPCODE_SHIFT 26
+#define I40E_GLGEN_MSCA_OPCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_OPCODE_SHIFT)
#define I40E_GLGEN_MSCA_STCODE_SHIFT 28
+#define I40E_GLGEN_MSCA_STCODE_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_STCODE_SHIFT)
#define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
#define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
#define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
@@ -328,8 +330,11 @@
#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
#define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
#define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 1df2f9338812..c962987d8b51 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1,14 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

-#include <linux/prefetch.h>
#include <linux/bpf_trace.h>
+#include <linux/prefetch.h>
+#include <linux/sctp.h>
#include <net/mpls.h>
#include <net/xdp.h>
-#include "i40e.h"
-#include "i40e_trace.h"
-#include "i40e_prototype.h"
#include "i40e_txrx_common.h"
+#include "i40e_trace.h"
#include "i40e_xsk.h"

#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
@@ -2644,7 +2643,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
return failure ? budget : (int)total_rx_packets;
}

-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+/**
+ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
+ * @itr_idx: interrupt throttling index
+ * @interval: interrupt throttling interval value in usecs
+ * @force_swint: force software interrupt
+ *
+ * The function builds a value for I40E_PFINT_DYN_CTLN register that
+ * is used to update interrupt throttling interval for specified ITR index
+ * and optionally enforces a software interrupt. If the @itr_idx is equal
+ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
+ * parameter is taken into account. If the interval change and enforced
+ * software interrupt are not requested then the built value just enables
+ * appropriate vector interrupt.
+ **/
+static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
+ bool force_swint)
{
u32 val;

@@ -2658,23 +2672,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
* an event in the PBA anyway so we need to rely on the automask
* to hold pending events for us until the interrupt is re-enabled
*
- * The itr value is reported in microseconds, and the register
- * value is recorded in 2 microsecond units. For this reason we
- * only need to shift by the interval shift - 1 instead of the
- * full value.
+ * We have to shift the given value as it is reported in microseconds
+ * and the register value is recorded in 2 microsecond units.
*/
- itr &= I40E_ITR_MASK;
+ interval >>= 1;

+ /* 1. Enable vector interrupt
+ * 2. Update the interval for the specified ITR index
+ * (I40E_ITR_NONE in the register is used to indicate that
+ * no interval update is requested)
+ */
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
+
+ /* 3. Enforce software interrupt trigger if requested
+ * (These software interrupts rate is limited by ITR2 that is
+ * set to 20K interrupts per second)
+ */
+ if (force_swint)
+ val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
+ I40E_SW_ITR);

return val;
}

-/* a small macro to shorten up some long lines */
-#define INTREG I40E_PFINT_DYN_CTLN
-
/* The act of updating the ITR will cause it to immediately trigger. In order
* to prevent this from throwing off adaptive update statistics we defer the
* update so that it can only happen so often. So after either Tx or Rx are
@@ -2693,8 +2717,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
+ enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
struct i40e_hw *hw = &vsi->back->hw;
- u32 intval;
+ u16 interval = 0;
+ u32 itr_val;

/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
@@ -2716,8 +2742,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
*/
if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
/* Rx ITR needs to be reduced, this is highest priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
@@ -2726,25 +2752,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/
- intval = i40e_buildreg_itr(I40E_TX_ITR,
- q_vector->tx.target_itr);
+ itr_idx = I40E_TX_ITR;
+ interval = q_vector->tx.target_itr;
q_vector->tx.current_itr = q_vector->tx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
/* Rx ITR needs to be increased, third priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* No ITR update, lowest priority */
- intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
}

- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), intval);
+ /* Do not update interrupt control register if VSI is down */
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ /* Update ITR interval if necessary and enforce software interrupt
+ * if we are exiting busy poll.
+ */
+ if (q_vector->in_busy_poll) {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, true);
+ q_vector->in_busy_poll = false;
+ } else {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, false);
+ }
+ wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
}

/**
@@ -2859,6 +2896,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
*/
if (likely(napi_complete_done(napi, work_done)))
i40e_update_enable_itr(vsi, q_vector);
+ else
+ q_vector->in_busy_poll = true;

return min(work_done, budget - 1);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 900b0d9ede9f..2b1d50873a4d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -5,6 +5,7 @@
#define _I40E_TXRX_H_

#include <net/xdp.h>
+#include "i40e_type.h"

/* Interrupt Throttling and Rate Limiting Goodies */
#define I40E_DEFAULT_IRQ_WORK 256
@@ -57,7 +58,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl)
* mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
* register but instead is a special value meaning "don't update" ITR0/1/2.
*/
-enum i40e_dyn_idx_t {
+enum i40e_dyn_idx {
I40E_IDX_ITR0 = 0,
I40E_IDX_ITR1 = 1,
I40E_IDX_ITR2 = 2,
@@ -67,6 +68,7 @@ enum i40e_dyn_idx_t {
/* these are indexes into ITRN registers */
#define I40E_RX_ITR I40E_IDX_ITR0
#define I40E_TX_ITR I40E_IDX_ITR1
+#define I40E_SW_ITR I40E_IDX_ITR2

/* Supported RSS offloads */
#define I40E_DEFAULT_RSS_HENA ( \
@@ -305,7 +307,7 @@ struct i40e_rx_queue_stats {
u64 page_busy_count;
};

-enum i40e_ring_state_t {
+enum i40e_ring_state {
__I40E_TX_FDIR_INIT_DONE,
__I40E_TX_XPS_INIT_DONE,
__I40E_RING_STATE_NBITS /* must be last */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index 8c5118c8baaf..e26807fd2123 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -4,6 +4,8 @@
#ifndef I40E_TXRX_COMMON_
#define I40E_TXRX_COMMON_

+#include "i40e.h"
+
int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
u64 qword1);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 232131bedc3e..4092f82bcfb1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -4,12 +4,9 @@
#ifndef _I40E_TYPE_H_
#define _I40E_TYPE_H_

-#include "i40e_osdep.h"
-#include "i40e_register.h"
+#include <uapi/linux/if_ether.h>
#include "i40e_adminq.h"
#include "i40e_hmc.h"
-#include "i40e_lan_hmc.h"
-#include "i40e_devids.h"

/* I40E_MASK is a macro used on 32 bit registers */
#define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
@@ -43,48 +40,14 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
#define I40E_QTX_CTL_VM_QUEUE 0x1
#define I40E_QTX_CTL_PF_QUEUE 0x2

-/* debug masks - set these bits in hw->debug_mask to control output */
-enum i40e_debug_mask {
- I40E_DEBUG_INIT = 0x00000001,
- I40E_DEBUG_RELEASE = 0x00000002,
-
- I40E_DEBUG_LINK = 0x00000010,
- I40E_DEBUG_PHY = 0x00000020,
- I40E_DEBUG_HMC = 0x00000040,
- I40E_DEBUG_NVM = 0x00000080,
- I40E_DEBUG_LAN = 0x00000100,
- I40E_DEBUG_FLOW = 0x00000200,
- I40E_DEBUG_DCB = 0x00000400,
- I40E_DEBUG_DIAG = 0x00000800,
- I40E_DEBUG_FD = 0x00001000,
- I40E_DEBUG_PACKAGE = 0x00002000,
- I40E_DEBUG_IWARP = 0x00F00000,
- I40E_DEBUG_AQ_MESSAGE = 0x01000000,
- I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000,
- I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000,
- I40E_DEBUG_AQ_COMMAND = 0x06000000,
- I40E_DEBUG_AQ = 0x0F000000,
-
- I40E_DEBUG_USER = 0xF0000000,
-
- I40E_DEBUG_ALL = 0xFFFFFFFF
-};
-
-#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_MASK(1, \
- I40E_GLGEN_MSCA_STCODE_SHIFT)
-#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_MASK(1, \
- I40E_GLGEN_MSCA_OPCODE_SHIFT)
-#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_MASK(2, \
- I40E_GLGEN_MSCA_OPCODE_SHIFT)
-
-#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_MASK(0, \
- I40E_GLGEN_MSCA_STCODE_SHIFT)
-#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_MASK(0, \
- I40E_GLGEN_MSCA_OPCODE_SHIFT)
-#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_MASK(1, \
- I40E_GLGEN_MSCA_OPCODE_SHIFT)
-#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_MASK(3, \
- I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK
+#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1)
+#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(2)
+
+#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK
+#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_GLGEN_MSCA_OPCODE_MASK(0)
+#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1)
+#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(3)

#define I40E_PHY_COM_REG_PAGE 0x1E
#define I40E_PHY_LED_LINK_MODE_MASK 0xF0
@@ -525,7 +488,6 @@ struct i40e_dcbx_config {
/* Port hardware description */
struct i40e_hw {
u8 __iomem *hw_addr;
- void *back;

/* subsystem structs */
struct i40e_phy_info phy;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 082c09920999..7d47a0527454 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2,6 +2,8 @@
/* Copyright(c) 2013 - 2018 Intel Corporation. */

#include "i40e.h"
+#include "i40e_lan_hmc.h"
+#include "i40e_virtchnl_pf.h"

/*********************notification routines***********************/

@@ -1628,8 +1630,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
{
struct i40e_hw *hw = &pf->hw;
struct i40e_vf *vf;
- int i, v;
u32 reg;
+ int i;

/* If we don't have any VFs, then there is nothing to reset */
if (!pf->num_alloc_vfs)
@@ -1640,11 +1642,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
return false;

/* Begin reset on all VFs at once */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
- vf = &pf->vf[v];
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is being reset no need to trigger reset again */
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
- i40e_trigger_vf_reset(&pf->vf[v], flr);
+ i40e_trigger_vf_reset(vf, flr);
}

/* HW requires some time to make sure it can flush the FIFO for a VF
@@ -1653,14 +1654,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
* the VFs using a simple iterator that increments once that VF has
* finished resetting.
*/
- for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+ for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
usleep_range(10000, 20000);

/* Check each VF in sequence, beginning with the VF to fail
* the previous check.
*/
- while (v < pf->num_alloc_vfs) {
- vf = &pf->vf[v];
+ while (vf < &pf->vf[pf->num_alloc_vfs]) {
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
@@ -1670,7 +1670,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
/* If the current VF has finished resetting, move on
* to the next VF in sequence.
*/
- v++;
+ ++vf;
}
}

@@ -1680,39 +1680,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
/* Display a warning if at least one VF didn't manage to reset in
* time, but continue on with the operation.
*/
- if (v < pf->num_alloc_vfs)
+ if (vf < &pf->vf[pf->num_alloc_vfs])
dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
- pf->vf[v].vf_id);
+ vf->vf_id);
usleep_range(10000, 20000);

/* Begin disabling all the rings associated with VFs, but do not wait
* between each VF.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;

/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;

- i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
}

/* Now that we've notified HW to disable all of the VF rings, wait
* until they finish.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;

/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;

- i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
}

/* Hw may need up to 50ms to finish disabling the RX queues. We
@@ -1721,12 +1721,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
mdelay(50);

/* Finish the reset on each VF */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;

- i40e_cleanup_reset_vf(&pf->vf[v]);
+ i40e_cleanup_reset_vf(vf);
}

i40e_flush(hw);
@@ -3143,11 +3143,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
/* Allow to delete VF primary MAC only if it was not set
* administratively by PF or if VF is trusted.
*/
- if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
- i40e_can_vf_change_mac(vf))
- was_unimac_deleted = true;
- else
- continue;
+ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+ if (i40e_can_vf_change_mac(vf))
+ was_unimac_deleted = true;
+ else
+ continue;
+ }

if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
ret = -EINVAL;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index cf190762421c..66f95e2f3146 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -4,7 +4,9 @@
#ifndef _I40E_VIRTCHNL_PF_H_
#define _I40E_VIRTCHNL_PF_H_

-#include "i40e.h"
+#include <linux/avf/virtchnl.h>
+#include <linux/netdevice.h>
+#include "i40e_type.h"

#define I40E_MAX_VLANID 4095

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 1f8ae6f5d980..65f38a57b3df 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -2,11 +2,7 @@
/* Copyright(c) 2018 Intel Corporation. */

#include <linux/bpf_trace.h>
-#include <linux/stringify.h>
#include <net/xdp_sock_drv.h>
-#include <net/xdp.h>
-
-#include "i40e.h"
#include "i40e_txrx_common.h"
#include "i40e_xsk.h"

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 821df248f8be..ef156fad52f2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -4,6 +4,8 @@
#ifndef _I40E_XSK_H_
#define _I40E_XSK_H_

+#include <linux/types.h>
+
/* This value should match the pragma in the loop_unrolled_for
* macro. Why 4? It is strictly empirical. It seems to be a good
* compromise between the advantage of having simultaneous outstanding
@@ -20,7 +22,9 @@
#define loop_unrolled_for for
#endif

+struct i40e_ring;
struct i40e_vsi;
+struct net_device;
struct xsk_buff_pool;

int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
index 1afd761d8052..f7988cf5efa5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_common.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

+#include <linux/avf/virtchnl.h>
+#include <linux/bitfield.h>
#include "iavf_type.h"
#include "iavf_adminq.h"
#include "iavf_prototype.h"
-#include <linux/avf/virtchnl.h>

/**
* iavf_set_mac_type - Sets MAC type
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 892c6a4f03bb..1ac97bd606e3 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -1,11 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

+#include <linux/bitfield.h>
+#include <linux/uaccess.h>
+
/* ethtool support for iavf */
#include "iavf.h"

-#include <linux/uaccess.h>
-
/* ethtool statistics helpers */

/**
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index 03e774bd2a5b..65ddcd81c993 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -3,6 +3,7 @@

/* flow director ethtool support for iavf */

+#include <linux/bitfield.h>
#include "iavf.h"

#define GTPU_PORT 2152
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 8c5f6096b002..f998ecf743c4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */

+#include <linux/bitfield.h>
#include <linux/prefetch.h>

#include "iavf.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 45f3e351653d..72ca2199c957 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -592,8 +592,9 @@ struct ice_aqc_recipe_data_elem {
struct ice_aqc_recipe_to_profile {
__le16 profile_id;
u8 rsvd[6];
- DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
+ __le64 recipe_assoc;
};
+static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16);

/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index 23e197c3d02a..4e675c7c199f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -2000,14 +2000,14 @@ int ice_init_lag(struct ice_pf *pf)
/* associate recipes to profiles */
for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
err = ice_aq_get_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ &recipe_bits, NULL);
if (err)
continue;

if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) {
recipe_bits |= BIT(lag->pf_recipe);
ice_aq_map_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ recipe_bits, NULL);
}
}

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 7f4bc110ead4..2004120a58ac 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3084,27 +3084,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
}

/**
- * ice_vsi_realloc_stat_arrays - Frees unused stat structures
+ * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones
* @vsi: VSI pointer
- * @prev_txq: Number of Tx rings before ring reallocation
- * @prev_rxq: Number of Rx rings before ring reallocation
*/
-static void
-ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
+static int
+ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi)
{
+ u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq;
+ u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq;
+ struct ice_ring_stats **tx_ring_stats;
+ struct ice_ring_stats **rx_ring_stats;
struct ice_vsi_stats *vsi_stat;
struct ice_pf *pf = vsi->back;
+ u16 prev_txq = vsi->alloc_txq;
+ u16 prev_rxq = vsi->alloc_rxq;
int i;

- if (!prev_txq || !prev_rxq)
- return;
- if (vsi->type == ICE_VSI_CHNL)
- return;
-
vsi_stat = pf->vsi_stats[vsi->idx];

- if (vsi->num_txq < prev_txq) {
- for (i = vsi->num_txq; i < prev_txq; i++) {
+ if (req_txq < prev_txq) {
+ for (i = req_txq; i < prev_txq; i++) {
if (vsi_stat->tx_ring_stats[i]) {
kfree_rcu(vsi_stat->tx_ring_stats[i], rcu);
WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL);
@@ -3112,14 +3111,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
}
}

- if (vsi->num_rxq < prev_rxq) {
- for (i = vsi->num_rxq; i < prev_rxq; i++) {
+ tx_ring_stats = vsi_stat->tx_ring_stats;
+ vsi_stat->tx_ring_stats =
+ krealloc_array(vsi_stat->tx_ring_stats, req_txq,
+ sizeof(*vsi_stat->tx_ring_stats),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vsi_stat->tx_ring_stats) {
+ vsi_stat->tx_ring_stats = tx_ring_stats;
+ return -ENOMEM;
+ }
+
+ if (req_rxq < prev_rxq) {
+ for (i = req_rxq; i < prev_rxq; i++) {
if (vsi_stat->rx_ring_stats[i]) {
kfree_rcu(vsi_stat->rx_ring_stats[i], rcu);
WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL);
}
}
}
+
+ rx_ring_stats = vsi_stat->rx_ring_stats;
+ vsi_stat->rx_ring_stats =
+ krealloc_array(vsi_stat->rx_ring_stats, req_rxq,
+ sizeof(*vsi_stat->rx_ring_stats),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vsi_stat->rx_ring_stats) {
+ vsi_stat->rx_ring_stats = rx_ring_stats;
+ return -ENOMEM;
+ }
+
+ return 0;
}

/**
@@ -3136,9 +3157,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
{
struct ice_vsi_cfg_params params = {};
struct ice_coalesce_stored *coalesce;
- int ret, prev_txq, prev_rxq;
- int prev_num_q_vectors = 0;
+ int prev_num_q_vectors;
struct ice_pf *pf;
+ int ret;

if (!vsi)
return -EINVAL;
@@ -3150,6 +3171,15 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
return -EINVAL;

+ ret = ice_vsi_realloc_stat_arrays(vsi);
+ if (ret)
+ goto err_vsi_cfg;
+
+ ice_vsi_decfg(vsi);
+ ret = ice_vsi_cfg_def(vsi, &params);
+ if (ret)
+ goto err_vsi_cfg;
+
coalesce = kcalloc(vsi->num_q_vectors,
sizeof(struct ice_coalesce_stored), GFP_KERNEL);
if (!coalesce)
@@ -3157,14 +3187,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)

prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);

- prev_txq = vsi->num_txq;
- prev_rxq = vsi->num_rxq;
-
- ice_vsi_decfg(vsi);
- ret = ice_vsi_cfg_def(vsi, &params);
- if (ret)
- goto err_vsi_cfg;
-
ret = ice_vsi_cfg_tc_lan(pf, vsi);
if (ret) {
if (vsi_flags & ICE_VSI_FLAG_INIT) {
@@ -3176,8 +3198,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
return ice_schedule_reset(pf, ICE_RESET_PFR);
}

- ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
-
ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
kfree(coalesce);

@@ -3185,8 +3205,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)

err_vsi_cfg_tc_lan:
ice_vsi_decfg(vsi);
-err_vsi_cfg:
kfree(coalesce);
+err_vsi_cfg:
return ret;
}

diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 2f77b684ff76..4c6d58bb2690 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -2032,12 +2032,12 @@ ice_update_recipe_lkup_idx(struct ice_hw *hw,
* ice_aq_map_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Recipe to profile association (0x0291)
*/
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
@@ -2049,7 +2049,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
/* Set the recipe ID bit in the bitmask to let the device know which
* profile we are associating the recipe to
*/
- memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
+ cmd->recipe_assoc = cpu_to_le64(r_assoc);

return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
}
@@ -2058,12 +2058,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
* ice_aq_get_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Associate profile ID with given recipe (0x0293)
*/
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
@@ -2076,7 +2076,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,

status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
if (!status)
- memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
+ *r_assoc = le64_to_cpu(cmd->recipe_assoc);

return status;
}
@@ -2121,6 +2121,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid)
static void ice_get_recp_to_prof_map(struct ice_hw *hw)
{
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 i;

for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
@@ -2128,8 +2129,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw)

bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
- if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
+ if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL))
continue;
+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_copy(profile_to_recipe[i], r_bitmap,
ICE_MAX_NUM_RECIPES);
for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
@@ -5431,22 +5433,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
*/
list_for_each_entry(fvit, &rm->fv_list, list_entry) {
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 j;

status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap, NULL);
+ &recp_assoc, NULL);
if (status)
goto err_unroll;

+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
ICE_MAX_NUM_RECIPES);
status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
if (status)
goto err_unroll;

+ bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES);
status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap,
- NULL);
+ recp_assoc, NULL);
ice_release_change_lock(hw);

if (status)
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index db7e501b7e0a..89ffa1b51b5a 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw,
struct ice_aqc_recipe_data_elem *s_recipe_list,
u16 num_recipes, struct ice_sq_cd *cd);
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd);
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd);

#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
index 80dc4bcdd3a4..b3e1bdcb80f8 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
struct ice_vsi_vlan_ops *vlan_ops;
struct ice_pf *pf = vsi->back;

- if (ice_is_dvm_ena(&pf->hw)) {
- vlan_ops = &vsi->outer_vlan_ops;
-
- /* setup outer VLAN ops */
- vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
- vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;

- /* setup inner VLAN ops */
- vlan_ops = &vsi->inner_vlan_ops;
+ if (ice_is_dvm_ena(&pf->hw)) {
vlan_ops->add_vlan = noop_vlan_arg;
vlan_ops->del_vlan = noop_vlan_arg;
vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
- } else {
- vlan_ops = &vsi->inner_vlan_ops;

+ /* setup outer VLAN ops */
+ vlan_ops = &vsi->outer_vlan_ops;
+ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ } else {
vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
}
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 8d6e44ee1895..64dfc362d1dc 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -222,8 +222,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
}

/* set lan id */
- hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >>
- E1000_STATUS_FUNC_SHIFT;
+ hw->bus.func = FIELD_GET(E1000_STATUS_FUNC_MASK, rd32(E1000_STATUS));

/* Set phy->phy_addr and phy->id. */
ret_val = igb_get_phy_id_82575(hw);
@@ -262,8 +261,8 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
if (ret_val)
goto out;

- data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
- E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
+ data = FIELD_GET(E1000_M88E1112_MAC_CTRL_1_MODE_MASK,
+ data);
if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
data == E1000_M88E1112_AUTO_COPPER_BASEX)
hw->mac.ops.check_for_link =
@@ -330,8 +329,7 @@ static s32 igb_init_nvm_params_82575(struct e1000_hw *hw)
u32 eecd = rd32(E1000_EECD);
u16 size;

- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
- E1000_EECD_SIZE_EX_SHIFT);
+ size = FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);

/* Added to a constant, "size" becomes the left-shift value
* for setting word_size.
@@ -2798,7 +2796,7 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
return 0;

hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
- if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+ if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg)
!= NVM_ETS_TYPE_EMC)
return E1000_NOT_IMPLEMENTED;

@@ -2808,10 +2806,8 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)

for (i = 1; i < num_sensors; i++) {
hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
- sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
- NVM_ETS_DATA_INDEX_SHIFT);
- sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
- NVM_ETS_DATA_LOC_SHIFT);
+ sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor);
+ sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor);

if (sensor_location != 0)
hw->phy.ops.read_i2c_byte(hw,
@@ -2859,20 +2855,17 @@ static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
return 0;

hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
- if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+ if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg)
!= NVM_ETS_TYPE_EMC)
return E1000_NOT_IMPLEMENTED;

- low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
- NVM_ETS_LTHRES_DELTA_SHIFT);
+ low_thresh_delta = FIELD_GET(NVM_ETS_LTHRES_DELTA_MASK, ets_cfg);
num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);

for (i = 1; i <= num_sensors; i++) {
hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
- sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
- NVM_ETS_DATA_INDEX_SHIFT);
- sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
- NVM_ETS_DATA_LOC_SHIFT);
+ sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor);
+ sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor);
therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;

hw->phy.ops.write_i2c_byte(hw,
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index b9b9d35494d2..503b239868e8 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -5,9 +5,9 @@
* e1000_i211
*/

-#include <linux/types.h>
+#include <linux/bitfield.h>
#include <linux/if_ether.h>
-
+#include <linux/types.h>
#include "e1000_hw.h"
#include "e1000_i210.h"

@@ -473,7 +473,7 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
/* Check if we have second version location used */
else if ((i == 1) &&
((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
- version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+ version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record);
status = 0;
break;
}
@@ -483,8 +483,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
(i != 1))) {
- version = (*next_record & E1000_INVM_VER_FIELD_TWO)
- >> 13;
+ version = FIELD_GET(E1000_INVM_VER_FIELD_TWO,
+ *next_record);
status = 0;
break;
}
@@ -493,15 +493,15 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
*/
else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
((*record & 0x3) == 0)) {
- version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+ version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record);
status = 0;
break;
}
}

if (!status) {
- invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
- >> E1000_INVM_MAJOR_SHIFT;
+ invm_ver->invm_major = FIELD_GET(E1000_INVM_MAJOR_MASK,
+ version);
invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
}
/* Read Image Type */
@@ -520,7 +520,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
((((*record & 0x3) != 0) && (i != 1)))) {
invm_ver->invm_img_type =
- (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
+ FIELD_GET(E1000_INVM_IMGTYPE_FIELD,
+ *next_record);
status = 0;
break;
}
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index caf91c6f52b4..ceaec2cf08a4 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -56,7 +56,7 @@ s32 igb_get_bus_info_pcie(struct e1000_hw *hw)
}

reg = rd32(E1000_STATUS);
- bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
+ bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg);

return 0;
}
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
index fa136e6e9328..2dcd64d6dec3 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2007 - 2018 Intel Corporation. */

-#include <linux/if_ether.h>
+#include <linux/bitfield.h>
#include <linux/delay.h>
-
+#include <linux/if_ether.h>
#include "e1000_mac.h"
#include "e1000_nvm.h"

@@ -708,10 +708,10 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
*/
if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
- fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
- >> NVM_MAJOR_SHIFT;
- fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
- >> NVM_MINOR_SHIFT;
+ fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK,
+ fw_version);
+ fw_vers->eep_minor = FIELD_GET(NVM_MINOR_MASK,
+ fw_version);
fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
goto etrack_id;
}
@@ -753,15 +753,13 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
return;
}
hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
- fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
- >> NVM_MAJOR_SHIFT;
+ fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK, fw_version);

/* check for old style version format in newer images*/
if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
} else {
- eeprom_verl = (fw_version & NVM_MINOR_MASK)
- >> NVM_MINOR_SHIFT;
+ eeprom_verl = FIELD_GET(NVM_MINOR_MASK, fw_version);
}
/* Convert minor value to hex before assigning to output struct
* Val to be converted will not be higher than 99, per tool output
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index a018000f7db9..bed94e50a669 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2007 - 2018 Intel Corporation. */

-#include <linux/if_ether.h>
+#include <linux/bitfield.h>
#include <linux/delay.h>
-
+#include <linux/if_ether.h>
#include "e1000_mac.h"
#include "e1000_phy.h"

@@ -1682,8 +1682,7 @@ s32 igb_get_cable_length_m88(struct e1000_hw *hw)
if (ret_val)
goto out;

- index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
- M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
ret_val = -E1000_ERR_PHY;
goto out;
@@ -1796,8 +1795,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
if (ret_val)
goto out;

- index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
- M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
ret_val = -E1000_ERR_PHY;
goto out;
@@ -2578,8 +2576,7 @@ s32 igb_get_cable_length_82580(struct e1000_hw *hw)
if (ret_val)
goto out;

- length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >>
- I82580_DSTATUS_CABLE_LENGTH_SHIFT;
+ length = FIELD_GET(I82580_DSTATUS_CABLE_LENGTH, phy_data);

if (length == E1000_CABLE_LENGTH_UNDEFINED)
ret_val = -E1000_ERR_PHY;
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 4ee849985e2b..92b2be06a6e9 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2434,7 +2434,7 @@ static int igb_get_ts_info(struct net_device *dev,
}
}

-#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+#define ETHER_TYPE_FULL_MASK cpu_to_be16(FIELD_MAX(U16_MAX))
static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
struct ethtool_rxnfc *cmd)
{
@@ -2733,8 +2733,8 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
u32 vlapqf;

vlapqf = rd32(E1000_VLAPQF);
- vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
- >> VLAN_PRIO_SHIFT;
+ vlan_priority = FIELD_GET(VLAN_PRIO_MASK,
+ ntohs(input->filter.vlan_tci));
queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK;

/* check whether this vlan prio is already set */
@@ -2817,7 +2817,7 @@ static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter,
u8 vlan_priority;
u32 vlapqf;

- vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ vlan_priority = FIELD_GET(VLAN_PRIO_MASK, vlan_tci);

vlapqf = rd32(E1000_VLAPQF);
vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 11921141b607..4431e7693d45 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -7283,7 +7283,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
static int igb_set_vf_multicasts(struct igb_adapter *adapter,
u32 *msgbuf, u32 vf)
{
- int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+ int n = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]);
u16 *hash_list = (u16 *)&msgbuf[1];
struct vf_data_storage *vf_data = &adapter->vf_data[vf];
int i;
@@ -7543,7 +7543,7 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,

static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
{
- int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+ int add = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]);
int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
int ret;

diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
index a3cd7ac48d4b..d15282ee5ea8 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.c
+++ b/drivers/net/ethernet/intel/igbvf/mbx.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2009 - 2018 Intel Corporation. */

+#include <linux/bitfield.h>
#include "mbx.h"

/**
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 7ff2752dd763..c5012fa36af2 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -3,25 +3,25 @@

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
+#include <linux/bitfield.h>
#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/tcp.h>
-#include <linux/ipv6.h>
-#include <linux/slab.h>
-#include <net/checksum.h>
-#include <net/ip6_checksum.h>
-#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/ipv6.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pagemap.h>
+#include <linux/pci.h>
#include <linux/prefetch.h>
#include <linux/sctp.h>
-
+#include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
#include "igbvf.h"

char igbvf_driver_name[] = "igbvf";
@@ -273,9 +273,8 @@ static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter,
* that case, it fills the header buffer and spills the rest
* into the page.
*/
- hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info)
- & E1000_RXDADV_HDRBUFLEN_MASK) >>
- E1000_RXDADV_HDRBUFLEN_SHIFT;
+ hlen = le16_get_bits(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info,
+ E1000_RXDADV_HDRBUFLEN_MASK);
if (hlen > adapter->rx_ps_hdr_size)
hlen = adapter->rx_ps_hdr_size;

diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
index 17546a035ab1..d2562c8e8015 100644
--- a/drivers/net/ethernet/intel/igc/igc_i225.c
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Intel Corporation */

+#include <linux/bitfield.h>
#include <linux/delay.h>

#include "igc_hw.h"
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index fc1de116d554..e83700ad7e62 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1640,10 +1640,6 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,

if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- /* FIXME: add support for retrieving timestamps from
- * the other timer registers before skipping the
- * timestamping request.
- */
unsigned long flags;
u32 tstamp_flags;

diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 53b77c969c85..d0d9e7170154 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Intel Corporation */

+#include <linux/bitfield.h>
#include "igc_phy.h"

/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index b2a0f2aaa05b..2e6e0365154a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -684,7 +684,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
u32 reg;

reg = IXGBE_READ_REG(hw, IXGBE_STATUS);
- bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT;
+ bus->func = FIELD_GET(IXGBE_STATUS_LAN_ID, reg);
bus->lan_id = bus->func;

/* check for a port swap */
@@ -695,8 +695,8 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
/* Get MAC instance from EEPROM for configuring CS4227 */
if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) {
hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4);
- bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >>
- IXGBE_EE_CTRL_4_INST_ID_SHIFT;
+ bus->instance_id = FIELD_GET(IXGBE_EE_CTRL_4_INST_ID,
+ ee_ctrl_4);
}
}

@@ -870,10 +870,9 @@ s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
* SPI EEPROM is assumed here. This code would need to
* change if a future EEPROM is not SPI.
*/
- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
- IXGBE_EEC_SIZE_SHIFT);
+ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
eeprom->word_size = BIT(eeprom_size +
- IXGBE_EEPROM_WORD_SIZE_SHIFT);
+ IXGBE_EEPROM_WORD_SIZE_SHIFT);
}

if (eec & IXGBE_EEC_ADDR_SIZE)
@@ -3935,10 +3934,10 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
if (status)
return status;

- sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
- IXGBE_ETS_DATA_INDEX_SHIFT);
- sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
- IXGBE_ETS_DATA_LOC_SHIFT);
+ sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK,
+ ets_sensor);
+ sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK,
+ ets_sensor);

if (sensor_location != 0) {
status = hw->phy.ops.read_i2c_byte(hw,
@@ -3982,8 +3981,7 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
if (status)
return status;

- low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >>
- IXGBE_ETS_LTHRES_DELTA_SHIFT);
+ low_thresh_delta = FIELD_GET(IXGBE_ETS_LTHRES_DELTA_MASK, ets_cfg);
num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
if (num_sensors > IXGBE_MAX_SENSORS)
num_sensors = IXGBE_MAX_SENSORS;
@@ -3997,10 +3995,10 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
ets_offset + 1 + i);
continue;
}
- sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
- IXGBE_ETS_DATA_INDEX_SHIFT);
- sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
- IXGBE_ETS_DATA_LOC_SHIFT);
+ sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK,
+ ets_sensor);
+ sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK,
+ ets_sensor);
therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK;

hw->phy.ops.write_i2c_byte(hw,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 13a6fca31004..866024f2b9ee 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
goto err_out;
}

- xs = kzalloc(sizeof(*xs), GFP_KERNEL);
+ algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+ if (unlikely(!algo)) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ xs = kzalloc(sizeof(*xs), GFP_ATOMIC);
if (unlikely(!xs)) {
err = -ENOMEM;
goto err_out;
@@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
xs->xso.dev = adapter->netdev;

- algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
- if (unlikely(!algo)) {
- err = -ENOENT;
- goto err_xs;
- }
-
aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
- xs->aead = kzalloc(aead_len, GFP_KERNEL);
+ xs->aead = kzalloc(aead_len, GFP_ATOMIC);
if (unlikely(!xs->aead)) {
err = -ENOMEM;
goto err_xs;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index cb23aad5953b..f245f3df40fc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -11409,7 +11409,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
if ((pf_func & 1) == (pdev->devfn & 1)) {
unsigned int device_id;

- vf = (req_id & 0x7F) >> 1;
+ vf = FIELD_GET(0x7F, req_id);
e_dev_err("VF %d has caused a PCIe error\n", vf);
e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
"%8.8x\tdw3: %8.8x\n",
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 930dc5071936..f28140a05f09 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -276,9 +276,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
return 0;

if (hw->phy.nw_mng_if_sel) {
- phy_addr = (hw->phy.nw_mng_if_sel &
- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+ phy_addr = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD,
+ hw->phy.nw_mng_if_sel);
if (ixgbe_probe_phy(hw, phy_addr))
return 0;
else
@@ -1447,8 +1446,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
if (ret_val)
goto err_eeprom;
- control = (eword & IXGBE_CONTROL_MASK_NL) >>
- IXGBE_CONTROL_SHIFT_NL;
+ control = FIELD_GET(IXGBE_CONTROL_MASK_NL, eword);
edata = eword & IXGBE_DATA_MASK_NL;
switch (control) {
case IXGBE_DELAY_NL:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 198ab9d97618..d0a6c220a12a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -363,8 +363,7 @@ int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
u32 *msgbuf, u32 vf)
{
- int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK)
- >> IXGBE_VT_MSGINFO_SHIFT;
+ int entries = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
u16 *hash_list = (u16 *)&msgbuf[1];
struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
struct ixgbe_hw *hw = &adapter->hw;
@@ -971,7 +970,7 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
u32 *msgbuf, u32 vf)
{
- u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
+ u32 add = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK);
u8 tcs = adapter->hw_tcs;

@@ -994,8 +993,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
u32 *msgbuf, u32 vf)
{
u8 *new_mac = ((u8 *)(&msgbuf[1]));
- int index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
- IXGBE_VT_MSGINFO_SHIFT;
+ int index = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
int err;

if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted &&
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 15325c549d9b..57a912e4653f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -187,16 +187,16 @@ s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
{
struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
- u32 eec;
- u16 eeprom_size;

if (eeprom->type == ixgbe_eeprom_uninitialized) {
+ u16 eeprom_size;
+ u32 eec;
+
eeprom->semaphore_delay = 10;
eeprom->type = ixgbe_flash;

eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
- IXGBE_EEC_SIZE_SHIFT);
+ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
eeprom->word_size = BIT(eeprom_size +
IXGBE_EEPROM_WORD_SIZE_SHIFT);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index cdc912bba808..c1adc94a5a65 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -630,16 +630,16 @@ static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
{
struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
- u32 eec;
- u16 eeprom_size;

if (eeprom->type == ixgbe_eeprom_uninitialized) {
+ u16 eeprom_size;
+ u32 eec;
+
eeprom->semaphore_delay = 10;
eeprom->type = ixgbe_flash;

eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
- IXGBE_EEC_SIZE_SHIFT);
+ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
eeprom->word_size = BIT(eeprom_size +
IXGBE_EEPROM_WORD_SIZE_SHIFT);

@@ -714,8 +714,7 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
ret = ixgbe_iosf_wait(hw, &command);

if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
- error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
- IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
+ error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
hw_dbg(hw, "Failed to read, error %x\n", error);
ret = -EIO;
goto out;
@@ -1415,8 +1414,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
ret = ixgbe_iosf_wait(hw, &command);

if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
- error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
- IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
+ error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
hw_dbg(hw, "Failed to write, error %x\n", error);
return -EIO;
}
@@ -3229,9 +3227,8 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw)
*/
if (hw->mac.type == ixgbe_mac_x550em_a &&
hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) {
- hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+ hw->phy.mdio.prtad = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD,
+ hw->phy.nw_mng_if_sel);
}
}

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 6c18d3d2442e..2539c985f695 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id,
if (!is_lmac_valid(cgx, lmac_id))
return -ENODEV;

+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+ cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 9181ac5de912..19075f217d00 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
continue;
lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+ if (iter >= MAX_LMAC_COUNT)
+ continue;
lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
iter);
rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 55639c133dd0..91a4ea529d07 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1669,7 +1669,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz,
struct npc_coalesced_kpu_prfl *img_data = NULL;
int i = 0, rc = -EINVAL;
void __iomem *kpu_prfl_addr;
- u16 offset;
+ u32 offset;

img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index b40bd0e46751..3f46d5e0fb2e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev)
* mcam entries are enabled to receive the packets. Hence disable the
* packet I/O.
*/
- if (err == EIO)
+ if (err == -EIO)
goto err_disable_rxtx;
else if (err)
goto err_tx_stop_queues;
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index aaf1faed4133..7bb92e2dacda 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/phy.h>
#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
#include <linux/skbuff.h>

#include "mlxbf_gige.h"
@@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
control |= MLXBF_GIGE_CONTROL_PORT_EN;
writeq(control, priv->base + MLXBF_GIGE_CONTROL);

- err = mlxbf_gige_request_irqs(priv);
- if (err)
- return err;
mlxbf_gige_cache_stats(priv);
err = mlxbf_gige_clean_port(priv);
if (err)
- goto free_irqs;
+ return err;

/* Clear driver's valid_polarity to match hardware,
* since the above call to clean_port() resets the
@@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev)

err = mlxbf_gige_tx_init(priv);
if (err)
- goto free_irqs;
+ goto phy_deinit;
err = mlxbf_gige_rx_init(priv);
if (err)
goto tx_deinit;
@@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
napi_enable(&priv->napi);
netif_start_queue(netdev);

+ err = mlxbf_gige_request_irqs(priv);
+ if (err)
+ goto napi_deinit;
+
/* Set bits in INT_EN that we care about */
int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
@@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev)

return 0;

+napi_deinit:
+ netif_stop_queue(netdev);
+ napi_disable(&priv->napi);
+ netif_napi_del(&priv->napi);
+ mlxbf_gige_rx_deinit(priv);
+
tx_deinit:
mlxbf_gige_tx_deinit(priv);

-free_irqs:
- mlxbf_gige_free_irqs(priv);
+phy_deinit:
+ phy_stop(phydev);
return err;
}

@@ -487,8 +495,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev)
{
struct mlxbf_gige *priv = platform_get_drvdata(pdev);

- writeq(0, priv->base + MLXBF_GIGE_INT_EN);
- mlxbf_gige_clean_port(priv);
+ rtnl_lock();
+ netif_device_detach(priv->netdev);
+
+ if (netif_running(priv->netdev))
+ dev_close(priv->netdev);
+
+ rtnl_unlock();
}

static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index c81cdeb4d4e7..0b6174748d2b 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -25,6 +25,8 @@
#define PCS_POWER_STATE_DOWN 0x6
#define PCS_POWER_STATE_UP 0x4

+#define RFE_RD_FIFO_TH_3_DWORDS 0x3
+
static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
{
u32 chip_rev;
@@ -3223,6 +3225,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
lan743x_pci_cleanup(adapter);
}

+static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter)
+{
+ u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_;
+
+ if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) {
+ u32 misc_ctl;
+
+ misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0);
+ misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_;
+ misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_,
+ RFE_RD_FIFO_TH_3_DWORDS);
+ lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl);
+ }
+}
+
static int lan743x_hardware_init(struct lan743x_adapter *adapter,
struct pci_dev *pdev)
{
@@ -3238,6 +3255,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
pci11x1x_strap_get_status(adapter);
spin_lock_init(&adapter->eth_syslock_spinlock);
mutex_init(&adapter->sgmii_rw_lock);
+ pci11x1x_set_rfe_rd_fifo_threshold(adapter);
} else {
adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 52609fc13ad9..f0b486f85450 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -26,6 +26,7 @@
#define ID_REV_CHIP_REV_MASK_ (0x0000FFFF)
#define ID_REV_CHIP_REV_A0_ (0x00000000)
#define ID_REV_CHIP_REV_B0_ (0x00000010)
+#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0)

#define FPGA_REV (0x04)
#define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF)
@@ -311,6 +312,9 @@
#define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8)
#define SGMII_CTL_SGMII_POWER_DN_ BIT(1)

+#define MISC_CTL_0 (0x920)
+#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4)
+
/* Vendor Specific SGMII MMD details */
#define SR_VSMMD_PCS_ID1 0x0004
#define SR_VSMMD_PCS_ID2 0x0005
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 48ea4aeeea5d..e443d69e3951 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,

*alloc_size = mtu + MANA_RXBUF_PAD + *headroom;

- *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
+ *datasize = mtu + ETH_HLEN;
}

static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 81fd31f6fac4..e6f1da66c450 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1201,17 +1201,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
}

+static void rtl_dash_loop_wait(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long usecs, int n, bool high)
+{
+ if (!tp->dash_enabled)
+ return;
+ rtl_loop_wait(tp, c, usecs, n, high);
+}
+
+static void rtl_dash_loop_wait_high(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, true);
+}
+
+static void rtl_dash_loop_wait_low(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, false);
+}
+
static void rtl8168dp_driver_start(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
- rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}

static void rtl8168ep_driver_start(struct rtl8169_private *tp)
{
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+ rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
}

static void rtl8168_driver_start(struct rtl8169_private *tp)
@@ -1225,7 +1248,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp)
static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
- rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}

static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
@@ -1233,7 +1256,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
rtl8168ep_stop_cmac(tp);
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
}

static void rtl8168_driver_stop(struct rtl8169_private *tp)
@@ -5055,6 +5078,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
struct mii_bus *new_bus;
int ret;

+ /* On some boards with this chip version the BIOS is buggy and misses
+ * to reset the PHY page selector. This results in the PHY ID read
+ * accessing registers on a different page, returning a more or
+ * less random value. Fix this by resetting the page selector first.
+ */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_25 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_26)
+ r8169_mdio_write(tp, 0x1f, 0);
+
new_bus = devm_mdiobus_alloc(&pdev->dev);
if (!new_bus)
return -ENOMEM;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 8fec0dbbbe7b..c6897e6ea362 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1288,25 +1288,16 @@ static int ravb_poll(struct napi_struct *napi, int budget)
struct net_device *ndev = napi->dev;
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
- bool gptp = info->gptp || info->ccc_gac;
- struct ravb_rx_desc *desc;
unsigned long flags;
int q = napi - priv->napi;
int mask = BIT(q);
int quota = budget;
- unsigned int entry;
+ bool unmask;

- if (!gptp) {
- entry = priv->cur_rx[q] % priv->num_rx_ring[q];
- desc = &priv->gbeth_rx_ring[entry];
- }
/* Processing RX Descriptor Ring */
/* Clear RX interrupt */
ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- if (gptp || desc->die_dt != DT_FEMPTY) {
- if (ravb_rx(ndev, &quota, q))
- goto out;
- }
+ unmask = !ravb_rx(ndev, &quota, q);

/* Processing TX Descriptor Ring */
spin_lock_irqsave(&priv->lock, flags);
@@ -1316,6 +1307,18 @@ static int ravb_poll(struct napi_struct *napi, int budget)
netif_wake_subqueue(ndev, q);
spin_unlock_irqrestore(&priv->lock, flags);

+ /* Receive error message handling */
+ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
+ if (info->nc_queues)
+ priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
+ if (priv->rx_over_errors != ndev->stats.rx_over_errors)
+ ndev->stats.rx_over_errors = priv->rx_over_errors;
+ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
+ ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
+
+ if (!unmask)
+ goto out;
+
napi_complete(napi);

/* Re-enable RX/TX interrupts */
@@ -1329,14 +1332,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
}
spin_unlock_irqrestore(&priv->lock, flags);

- /* Receive error message handling */
- priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
- if (info->nc_queues)
- priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
- if (priv->rx_over_errors != ndev->stats.rx_over_errors)
- ndev->stats.rx_over_errors = priv->rx_over_errors;
- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
- ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
out:
return budget - quota;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index c6ff1fa0e04d..683c34e60963 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -92,19 +92,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
u32 prio, u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 base_register;
- u32 value;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;

- base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
- if (queue >= 4)
- queue -= 4;
+ ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3);

- value = readl(ioaddr + base_register);
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) &
+ GMAC_RXQCTRL_PSRQX_MASK(i));
+
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ GMAC_RXQCTRL_PSRQX_MASK(queue);

- value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
- value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ } else {
+ queue -= 4;
+
+ ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
GMAC_RXQCTRL_PSRQX_MASK(queue);
- writel(value, ioaddr + base_register);
+
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ }
}

static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index b5509f244ecd..24c53b7255a2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -105,17 +105,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio,
u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 value, reg;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;

- reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
- if (queue >= 4)
+ ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3);
+
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) &
+ XGMAC_PSRQ(i));
+
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);
+
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ } else {
queue -= 4;

- value = readl(ioaddr + reg);
- value &= ~XGMAC_PSRQ(queue);
- value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue);
+ ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);

- writel(value, ioaddr + reg);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ }
}

static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index e457ac9ae6d8..ad5c213dac07 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -20,6 +20,8 @@
#include "txgbe_phy.h"
#include "txgbe_hw.h"

+#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw"
+
static int txgbe_swnodes_register(struct txgbe *txgbe)
{
struct txgbe_nodes *nodes = &txgbe->nodes;
@@ -551,8 +553,8 @@ static int txgbe_clock_register(struct txgbe *txgbe)
char clk_name[32];
struct clk *clk;

- snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d",
- pci_dev_id(pdev));
+ snprintf(clk_name, sizeof(clk_name), "%s.%d",
+ TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev));

clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
if (IS_ERR(clk))
@@ -614,7 +616,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe)

info.parent = &pdev->dev;
info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]);
- info.name = "i2c_designware";
+ info.name = TXGBE_I2C_CLK_DEV_NAME;
info.id = pci_dev_id(pdev);

info.res = &DEFINE_RES_IRQ(pdev->irq);
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index f81c4bcd85a2..cbd98ea4a84a 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -2388,6 +2388,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
struct hwtstamp_config config;
int txcfg = 0, rxcfg = 0;
int pkt_ts_enable;
+ int tx_mod;

if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
return -EFAULT;
@@ -2437,9 +2438,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);

- if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC)
+ tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
+ if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
- PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
+ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
+ tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ }

if (config.rx_filter != HWTSTAMP_FILTER_NONE)
lan8814_config_ts_intr(ptp_priv->phydev, true);
@@ -2497,7 +2503,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
}
}

-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
@@ -2507,7 +2513,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
ptp_header = ptp_parse_header(skb, type);
skb_pull_inline(skb, ETH_HLEN);

+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}

static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2519,7 +2529,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
bool ret = false;
u16 skb_sig;

- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ return ret;

/* Iterate over all RX timestamps and match it with the received skbs */
spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags);
@@ -2799,7 +2810,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
return 0;
}

-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
@@ -2807,7 +2818,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
type = ptp_classify_raw(skb);
ptp_header = ptp_parse_header(skb, type);

+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}

static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2821,7 +2836,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,

spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) {
- lan8814_get_sig_tx(skb, &skb_sig);
+ if (!lan8814_get_sig_tx(skb, &skb_sig))
+ continue;

if (memcmp(&skb_sig, &seq_id, sizeof(seq_id)))
continue;
@@ -2875,7 +2891,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv,

spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) {
- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ continue;

if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id)))
continue;
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index d837c1887416..e0e9b4c53cb0 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev)

if (is_valid_ether_addr(mac)) {
eth_hw_addr_set(dev->net, mac);
+ if (!is_local_ether_addr(mac))
+ dev->net->addr_assign_type = NET_ADDR_PERM;
} else {
netdev_info(dev->net, "invalid MAC address, using random\n");
eth_hw_addr_random(dev->net);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 168eda2132fb..9dcc1506bd0b 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -278,7 +278,7 @@ static inline void iwl_free_rxb(struct iwl_rx_cmd_buffer *r)
#define IWL_MGMT_TID 15
#define IWL_FRAME_LIMIT 64
#define IWL_MAX_RX_HW_QUEUES 16
-#define IWL_9000_MAX_RX_HW_QUEUES 6
+#define IWL_9000_MAX_RX_HW_QUEUES 1

/**
* enum iwl_wowlan_status - WoWLAN image/device status
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index aaa9840d0d4c..ee9d14250a26 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -352,7 +352,9 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
ieee80211_hw_set(hw, HAS_RATE_CONTROL);
}

- if (iwl_mvm_has_new_rx_api(mvm))
+ /* We want to use the mac80211's reorder buffer for 9000 */
+ if (iwl_mvm_has_new_rx_api(mvm) &&
+ mvm->trans->trans_cfg->device_family > IWL_DEVICE_FAMILY_9000)
ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER);

if (fw_has_capa(&mvm->fw->ucode_capa,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
index 2ecd32bed752..045c862a8fc4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
@@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm)
if (ret)
return ERR_PTR(ret);

- if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size))
+ if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) !=
+ resp_size)) {
+ iwl_free_resp(&cmd);
return ERR_PTR(-EIO);
+ }

resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL);
+ iwl_free_resp(&cmd);
+
if (!resp)
return ERR_PTR(-ENOMEM);

- iwl_free_resp(&cmd);
return resp;
}

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index bac0228b8c86..e9360b555ac9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
struct napi_struct *napi,
struct sk_buff *skb, int queue,
- struct ieee80211_sta *sta,
- struct ieee80211_link_sta *link_sta)
+ struct ieee80211_sta *sta)
{
if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) {
kfree_skb(skb);
return;
}

- if (sta && sta->valid_links && link_sta) {
- struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
-
- rx_status->link_valid = 1;
- rx_status->link_id = link_sta->link_id;
- }
-
ieee80211_rx_napi(mvm->hw, sta, skb, napi);
}

@@ -636,7 +628,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm,
while ((skb = __skb_dequeue(skb_list))) {
iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb,
reorder_buf->queue,
- sta, NULL /* FIXME */);
+ sta);
reorder_buf->num_stored--;
}
}
@@ -963,6 +955,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >>
IWL_RX_MPDU_REORDER_BAID_SHIFT;

+ if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000)
+ return false;
+
/*
* This also covers the case of receiving a Block Ack Request
* outside a BA session; we'll pass it to mac80211 and that
@@ -2486,6 +2481,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
if (IS_ERR(sta))
sta = NULL;
link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]);
+
+ if (sta && sta->valid_links && link_sta) {
+ rx_status->link_valid = 1;
+ rx_status->link_id = link_sta->link_id;
+ }
}
} else if (!is_multicast_ether_addr(hdr->addr2)) {
/*
@@ -2621,9 +2621,14 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,

if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc) &&
likely(!iwl_mvm_time_sync_frame(mvm, skb, hdr->addr2)) &&
- likely(!iwl_mvm_mei_filter_scan(mvm, skb)))
- iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta,
- link_sta);
+ likely(!iwl_mvm_mei_filter_scan(mvm, skb))) {
+ if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000 &&
+ (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) &&
+ !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME))
+ rx_status->flag |= RX_FLAG_AMSDU_MORE;
+
+ iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta);
+ }
out:
rcu_read_unlock();
}
diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c
index 9f43f256db1d..f0a4783baf1f 100644
--- a/drivers/net/wwan/t7xx/t7xx_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_cldma.c
@@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno)
{
u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE;

- return ioread64(hw_info->ap_pdn_base + offset);
+ return ioread64_lo_hi(hw_info->ap_pdn_base + offset);
}

void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address,
@@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn

reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 :
hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0;
- iowrite64(address, reg + offset);
+ iowrite64_lo_hi(address, reg + offset);
}

void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno,
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
index cc70360364b7..554ba4669cc8 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
@@ -139,8 +139,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool
return -ENODEV;
}

- gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base +
+ REG_CLDMA_DL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100)
return 0;

@@ -318,8 +319,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue)
struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info;

/* Check current processing TGPD, 64-bit address is in a table by Q index */
- ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr != ul_curr_addr) {
spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags);
dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n",
diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
index 76da4c15e3de..f071ec7ff23d 100644
--- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
+++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
@@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_
for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) {
offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i;
reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
- iowrite64(0, reg);
+ iowrite64_lo_hi(0, reg);
}
}

@@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_

reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset;
value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT;
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);

reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
iowrite32(cfg->trsl_id, reg);

reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0);
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);

/* Ensure ATR is set */
- ioread64(reg);
+ ioread64_lo_hi(reg);
return 0;
}

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ad29f370034e..8d2aee88526c 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
return NULL;
}
skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
+ skb_mark_for_recycle(skb);

/* Align ip header to a 16 bytes boundary */
skb_reserve(skb, NET_IP_ALIGN);
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3bf27052832f..4d57a4e34105 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -9,6 +9,7 @@

#define pr_fmt(fmt) "OF: " fmt

+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
{
struct of_changeset_entry *ce, *cen;

+ /*
+ * When a device is deleted, the device links to/from it are also queued
+ * for deletion. Until these device links are freed, the devices
+ * themselves aren't freed. If the device being deleted is due to an
+ * overlay change, this device might be holding a reference to a device
+ * node that will be freed. So, wait until all already pending device
+ * links are deleted before freeing a device node. This ensures we don't
+ * free any device node that has a non-zero reference count.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
diff --git a/drivers/of/module.c b/drivers/of/module.c
index 0e8aa974f0f2..f58e624953a2 100644
--- a/drivers/of/module.c
+++ b/drivers/of/module.c
@@ -16,6 +16,14 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len)
ssize_t csize;
ssize_t tsize;

+ /*
+ * Prevent a kernel oops in vsnprintf() -- it only allows passing a
+ * NULL ptr when the length is also 0. Also filter out the negative
+ * lengths...
+ */
+ if ((len > 0 && !str) || len < 0)
+ return -EINVAL;
+
/* Name & Type */
/* %p eats all alphanum characters, so %c must be used here */
csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T',
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index c78a6fd6c57f..b4efdddb2ad9 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event)
u64 event_config = 0;
uint64_t cmask;

+ /* driver does not support branch stack sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
hwc->flags = 0;
mapped_event = rvpmu->event_map(event, &event_config);
if (mapped_event < 0) {
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index cd783290bde5..1148b4ecabdd 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev,
}
}

+/**
+ * qeth_irq() - qeth interrupt handler
+ * @cdev: ccw device
+ * @intparm: expect pointer to iob
+ * @irb: Interruption Response Block
+ *
+ * In the good path:
+ * corresponding qeth channel is locked with last used iob as active_cmd.
+ * But this function is also called for error interrupts.
+ *
+ * Caller ensures that:
+ * Interrupts are disabled; ccw device lock is held;
+ *
+ */
static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
struct irb *irb)
{
@@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
iob = (struct qeth_cmd_buffer *) (addr_t)intparm;
}

- qeth_unlock_channel(card, channel);
-
rc = qeth_check_irb_error(card, cdev, irb);
if (rc) {
/* IO was terminated, free its resources. */
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
return;
@@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
rc = qeth_get_problem(card, cdev, irb);
if (rc) {
card->read_or_write_problem = 1;
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
qeth_clear_ipacmd_list(card);
@@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
}
}

+ if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) {
+ /* channel command hasn't started: retry.
+ * active_cmd is still set to last iob
+ */
+ QETH_CARD_TEXT(card, 2, "irqcc1");
+ rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob),
+ (addr_t)iob, 0, 0, iob->timeout);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "ccw retry on %x failed, rc = %i\n",
+ CARD_DEVID(card), rc);
+ QETH_CARD_TEXT_(card, 2, " err%d", rc);
+ qeth_unlock_channel(card, channel);
+ qeth_cancel_cmd(iob, rc);
+ }
+ return;
+ }
+
+ qeth_unlock_channel(card, channel);
+
if (iob) {
/* sanity check: */
if (irb->scsw.cmd.count > iob->length) {
diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
index ca2e932dd9b7..f684eb5e0489 100644
--- a/drivers/scsi/myrb.c
+++ b/drivers/scsi/myrb.c
@@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev,

name = myrb_devstate_name(ldev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
} else {
struct myrb_pdev_state *pdev_info = sdev->hostdata;
@@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev,
else
name = myrb_devstate_name(pdev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->state);
}
return ret;
@@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev,

name = myrb_raidlevel_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
- return snprintf(buf, 32, "Physical Drive\n");
+ return snprintf(buf, 64, "Physical Drive\n");
}
static DEVICE_ATTR_RO(raid_level);

@@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev,
unsigned char status;

if (sdev->channel < myrb_logical_channel(sdev->host))
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");

status = myrb_get_rbld_progress(cb, &rbld_buf);

if (rbld_buf.ldev_num != sdev->id ||
status != MYRB_STATUS_SUCCESS)
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");

- return snprintf(buf, 32, "rebuilding block %u of %u\n",
+ return snprintf(buf, 64, "rebuilding block %u of %u\n",
rbld_buf.ldev_size - rbld_buf.blocks_left,
rbld_buf.ldev_size);
}
diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index a1eec65a9713..e824be9d9bbb 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev,

name = myrs_devstate_name(ldev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);
} else {
struct myrs_pdev_info *pdev_info;
@@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev,
pdev_info = sdev->hostdata;
name = myrs_devstate_name(pdev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->dev_state);
}
return ret;
@@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev,
ldev_info = sdev->hostdata;
name = myrs_raid_level_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);

} else
name = myrs_raid_level_name(MYRS_RAID_PHYSICAL);

- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
static DEVICE_ATTR_RO(raid_level);

@@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev,
unsigned char status;

if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");

ldev_info = sdev->hostdata;
ldev_num = ldev_info->ldev_num;
@@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev,
return -EIO;
}
if (ldev_info->rbld_active) {
- return snprintf(buf, 32, "rebuilding block %zu of %zu\n",
+ return snprintf(buf, 64, "rebuilding block %zu of %zu\n",
(size_t)ldev_info->rbld_lba,
(size_t)ldev_info->cfg_devsize);
} else
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");
}

static ssize_t rebuild_store(struct device *dev,
@@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev,
unsigned short ldev_num;

if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not checking\n");
+ return snprintf(buf, 64, "physical device - not checking\n");

ldev_info = sdev->hostdata;
if (!ldev_info)
@@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev,
ldev_num = ldev_info->ldev_num;
myrs_get_ldev_info(cs, ldev_num, ldev_info);
if (ldev_info->cc_active)
- return snprintf(buf, 32, "checking block %zu of %zu\n",
+ return snprintf(buf, 64, "checking block %zu of %zu\n",
(size_t)ldev_info->cc_lba,
(size_t)ldev_info->cfg_devsize);
else
- return snprintf(buf, 32, "not checking\n");
+ return snprintf(buf, 64, "not checking\n");
}

static ssize_t consistency_check_store(struct device *dev,
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e80c33cdad2b..c62f677084b4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3754,7 +3754,7 @@ static int sd_probe(struct device *dev)

error = device_add_disk(dev, gd, NULL);
if (error) {
- put_device(&sdkp->disk_dev);
+ device_unregister(&sdkp->disk_dev);
put_disk(gd);
goto out;
}
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index 3638e974f5d4..06bf58b7e5d7 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -275,6 +275,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev,
sizeof(struct pci1xxxx_spi_internal),
GFP_KERNEL);
+ if (!spi_bus->spi_int[iter])
+ return -ENOMEM;
spi_sub_ptr = spi_bus->spi_int[iter];
spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller));
if (!spi_sub_ptr->spi_host)
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 0e48ffd499b9..652eadbefe24 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -3,19 +3,20 @@
// Copyright (c) 2009 Samsung Electronics Co., Ltd.
// Jaswinder Singh <jassi.brar@xxxxxxxxxxx>

-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/bits.h>
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_data/spi-s3c64xx.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/spi/spi.h>
-#include <linux/of.h>
-
-#include <linux/platform_data/spi-s3c64xx.h>

#define MAX_SPI_PORTS 12
#define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1)
@@ -76,6 +77,7 @@
#define S3C64XX_SPI_INT_RX_FIFORDY_EN (1<<1)
#define S3C64XX_SPI_INT_TX_FIFORDY_EN (1<<0)

+#define S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT 6
#define S3C64XX_SPI_ST_RX_OVERRUN_ERR (1<<5)
#define S3C64XX_SPI_ST_RX_UNDERRUN_ERR (1<<4)
#define S3C64XX_SPI_ST_TX_OVERRUN_ERR (1<<3)
@@ -106,9 +108,11 @@
#define FIFO_LVL_MASK(i) ((i)->port_conf->fifo_lvl_mask[i->port_id])
#define S3C64XX_SPI_ST_TX_DONE(v, i) (((v) & \
(1 << (i)->port_conf->tx_st_done)) ? 1 : 0)
-#define TX_FIFO_LVL(v, i) (((v) >> 6) & FIFO_LVL_MASK(i))
-#define RX_FIFO_LVL(v, i) (((v) >> (i)->port_conf->rx_lvl_offset) & \
- FIFO_LVL_MASK(i))
+#define TX_FIFO_LVL(v, sdd) (((v) & (sdd)->tx_fifomask) >> \
+ __ffs((sdd)->tx_fifomask))
+#define RX_FIFO_LVL(v, sdd) (((v) & (sdd)->rx_fifomask) >> \
+ __ffs((sdd)->rx_fifomask))
+#define FIFO_DEPTH(i) ((FIFO_LVL_MASK(i) >> 1) + 1)

#define S3C64XX_SPI_MAX_TRAILCNT 0x3ff
#define S3C64XX_SPI_TRAILCNT_OFF 19
@@ -133,6 +137,10 @@ struct s3c64xx_spi_dma_data {
* struct s3c64xx_spi_port_config - SPI Controller hardware info
* @fifo_lvl_mask: Bit-mask for {TX|RX}_FIFO_LVL bits in SPI_STATUS register.
* @rx_lvl_offset: Bit offset of RX_FIFO_LVL bits in SPI_STATUS regiter.
+ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's
+ * length and position.
+ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's
+ * length and position.
* @tx_st_done: Bit offset of TX_DONE bit in SPI_STATUS regiter.
* @clk_div: Internal clock divider
* @quirks: Bitmask of known quirks
@@ -150,6 +158,8 @@ struct s3c64xx_spi_dma_data {
struct s3c64xx_spi_port_config {
int fifo_lvl_mask[MAX_SPI_PORTS];
int rx_lvl_offset;
+ u32 rx_fifomask;
+ u32 tx_fifomask;
int tx_st_done;
int quirks;
int clk_div;
@@ -179,6 +189,11 @@ struct s3c64xx_spi_port_config {
* @tx_dma: Local transmit DMA data (e.g. chan and direction)
* @port_conf: Local SPI port configuartion data
* @port_id: Port identification number
+ * @fifo_depth: depth of the FIFO.
+ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's
+ * length and position.
+ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's
+ * length and position.
*/
struct s3c64xx_spi_driver_data {
void __iomem *regs;
@@ -198,6 +213,9 @@ struct s3c64xx_spi_driver_data {
struct s3c64xx_spi_dma_data tx_dma;
const struct s3c64xx_spi_port_config *port_conf;
unsigned int port_id;
+ unsigned int fifo_depth;
+ u32 rx_fifomask;
+ u32 tx_fifomask;
};

static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd)
@@ -405,12 +423,10 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host,
{
struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);

- if (sdd->rx_dma.ch && sdd->tx_dma.ch) {
- return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1;
- } else {
- return false;
- }
+ if (sdd->rx_dma.ch && sdd->tx_dma.ch)
+ return xfer->len >= sdd->fifo_depth;

+ return false;
}

static int s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd,
@@ -495,9 +511,7 @@ static u32 s3c64xx_spi_wait_for_timeout(struct s3c64xx_spi_driver_data *sdd,
void __iomem *regs = sdd->regs;
unsigned long val = 1;
u32 status;
-
- /* max fifo depth available */
- u32 max_fifo = (FIFO_LVL_MASK(sdd) >> 1) + 1;
+ u32 max_fifo = sdd->fifo_depth;

if (timeout_ms)
val = msecs_to_loops(timeout_ms);
@@ -604,7 +618,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
* For any size less than the fifo size the below code is
* executed atleast once.
*/
- loops = xfer->len / ((FIFO_LVL_MASK(sdd) >> 1) + 1);
+ loops = xfer->len / sdd->fifo_depth;
buf = xfer->rx_buf;
do {
/* wait for data to be received in the fifo */
@@ -741,7 +755,7 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
struct spi_transfer *xfer)
{
struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
- const unsigned int fifo_len = (FIFO_LVL_MASK(sdd) >> 1) + 1;
+ const unsigned int fifo_len = sdd->fifo_depth;
const void *tx_buf = NULL;
void *rx_buf = NULL;
int target_len = 0, origin_len = 0;
@@ -769,10 +783,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
return status;
}

- if (!is_polling(sdd) && (xfer->len > fifo_len) &&
+ if (!is_polling(sdd) && xfer->len >= fifo_len &&
sdd->rx_dma.ch && sdd->tx_dma.ch) {
use_dma = 1;
-
} else if (xfer->len >= fifo_len) {
tx_buf = xfer->tx_buf;
rx_buf = xfer->rx_buf;
@@ -1146,6 +1159,23 @@ static inline const struct s3c64xx_spi_port_config *s3c64xx_spi_get_port_config(
return (const struct s3c64xx_spi_port_config *)platform_get_device_id(pdev)->driver_data;
}

+static void s3c64xx_spi_set_fifomask(struct s3c64xx_spi_driver_data *sdd)
+{
+ const struct s3c64xx_spi_port_config *port_conf = sdd->port_conf;
+
+ if (port_conf->rx_fifomask)
+ sdd->rx_fifomask = port_conf->rx_fifomask;
+ else
+ sdd->rx_fifomask = FIFO_LVL_MASK(sdd) <<
+ port_conf->rx_lvl_offset;
+
+ if (port_conf->tx_fifomask)
+ sdd->tx_fifomask = port_conf->tx_fifomask;
+ else
+ sdd->tx_fifomask = FIFO_LVL_MASK(sdd) <<
+ S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT;
+}
+
static int s3c64xx_spi_probe(struct platform_device *pdev)
{
struct resource *mem_res;
@@ -1191,6 +1221,10 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
sdd->port_id = pdev->id;
}

+ sdd->fifo_depth = FIFO_DEPTH(sdd);
+
+ s3c64xx_spi_set_fifomask(sdd);
+
sdd->cur_bpw = 8;

sdd->tx_dma.direction = DMA_MEM_TO_DEV;
@@ -1280,7 +1314,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
dev_dbg(&pdev->dev, "Samsung SoC SPI Driver loaded for Bus SPI-%d with %d Targets attached\n",
sdd->port_id, host->num_chipselect);
dev_dbg(&pdev->dev, "\tIOmem=[%pR]\tFIFO %dbytes\n",
- mem_res, (FIFO_LVL_MASK(sdd) >> 1) + 1);
+ mem_res, sdd->fifo_depth);

pm_runtime_mark_last_busy(&pdev->dev);
pm_runtime_put_autosuspend(&pdev->dev);
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 4853141cd10c..894622b6556a 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -254,6 +254,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work)
static void pmic_glink_ucsi_register(struct work_struct *work)
{
struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
+ int orientation;
+ int i;
+
+ for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) {
+ if (!ucsi->port_orientation[i])
+ continue;
+ orientation = gpiod_get_value(ucsi->port_orientation[i]);
+
+ if (orientation >= 0) {
+ typec_switch_set(ucsi->port_switch[i],
+ orientation ? TYPEC_ORIENTATION_REVERSE
+ : TYPEC_ORIENTATION_NORMAL);
+ }
+ }

ucsi_register(ucsi->ucsi);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fc8eb8d86ca2..5acb2cb79d4b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2410,12 +2410,65 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
return try_release_extent_state(tree, page, mask);
}

+struct btrfs_fiemap_entry {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+};
+
+/*
+ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
+ * range from the inode's io tree, unlock the subvolume tree search path, flush
+ * the fiemap cache and relock the file range and research the subvolume tree.
+ * The value here is something negative that can't be confused with a valid
+ * errno value and different from 1 because that's also a return value from
+ * fiemap_fill_next_extent() and also it's often used to mean some btree search
+ * did not find a key, so make it some distinct negative value.
+ */
+#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
+
/*
- * To cache previous fiemap extent
+ * Used to:
*
- * Will be used for merging fiemap extent
+ * - Cache the next entry to be emitted to the fiemap buffer, so that we can
+ * merge extents that are contiguous and can be grouped as a single one;
+ *
+ * - Store extents ready to be written to the fiemap buffer in an intermediary
+ * buffer. This intermediary buffer is to ensure that in case the fiemap
+ * buffer is memory mapped to the fiemap target file, we don't deadlock
+ * during btrfs_page_mkwrite(). This is because during fiemap we are locking
+ * an extent range in order to prevent races with delalloc flushing and
+ * ordered extent completion, which is needed in order to reliably detect
+ * delalloc in holes and prealloc extents. And this can lead to a deadlock
+ * if the fiemap buffer is memory mapped to the file we are running fiemap
+ * against (a silly, useless in practice scenario, but possible) because
+ * btrfs_page_mkwrite() will try to lock the same extent range.
*/
struct fiemap_cache {
+ /* An array of ready fiemap entries. */
+ struct btrfs_fiemap_entry *entries;
+ /* Number of entries in the entries array. */
+ int entries_size;
+ /* Index of the next entry in the entries array to write to. */
+ int entries_pos;
+ /*
+ * Once the entries array is full, this indicates what's the offset for
+ * the next file extent item we must search for in the inode's subvolume
+ * tree after unlocking the extent range in the inode's io tree and
+ * releasing the search path.
+ */
+ u64 next_search_offset;
+ /*
+ * This matches struct fiemap_extent_info::fi_mapped_extents, we use it
+ * to count ourselves emitted extents and stop instead of relying on
+ * fiemap_fill_next_extent() because we buffer ready fiemap entries at
+ * the @entries array, and we want to stop as soon as we hit the max
+ * amount of extents to map, not just to save time but also to make the
+ * logic at extent_fiemap() simpler.
+ */
+ unsigned int extents_mapped;
+ /* Fields for the cached extent (unsubmitted, not ready, extent). */
u64 offset;
u64 phys;
u64 len;
@@ -2423,6 +2476,28 @@ struct fiemap_cache {
bool cached;
};

+static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ for (int i = 0; i < cache->entries_pos; i++) {
+ struct btrfs_fiemap_entry *entry = &cache->entries[i];
+ int ret;
+
+ ret = fiemap_fill_next_extent(fieinfo, entry->offset,
+ entry->phys, entry->len,
+ entry->flags);
+ /*
+ * Ignore 1 (reached max entries) because we keep track of that
+ * ourselves in emit_fiemap_extent().
+ */
+ if (ret < 0)
+ return ret;
+ }
+ cache->entries_pos = 0;
+
+ return 0;
+}
+
/*
* Helper to submit fiemap extent.
*
@@ -2437,8 +2512,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
u64 offset, u64 phys, u64 len, u32 flags)
{
+ struct btrfs_fiemap_entry *entry;
u64 cache_end;
- int ret = 0;

/* Set at the end of extent_fiemap(). */
ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
@@ -2451,7 +2526,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
* find an extent that starts at an offset behind the end offset of the
* previous extent we processed. This happens if fiemap is called
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
- * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
+ * after we had to unlock the file range, release the search path, emit
+ * the fiemap extents stored in the buffer (cache->entries array) and
+ * the lock the remainder of the range and re-search the btree.
*
* For example we are in leaf X processing its last item, which is the
* file extent item for file range [512K, 1M[, and after
@@ -2564,11 +2641,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,

emit:
/* Not mergeable, need to submit cached one */
- ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
- cache->len, cache->flags);
- cache->cached = false;
- if (ret)
- return ret;
+
+ if (cache->entries_pos == cache->entries_size) {
+ /*
+ * We will need to research for the end offset of the last
+ * stored extent and not from the current offset, because after
+ * unlocking the range and releasing the path, if there's a hole
+ * between that end offset and this current offset, a new extent
+ * may have been inserted due to a new write, so we don't want
+ * to miss it.
+ */
+ entry = &cache->entries[cache->entries_size - 1];
+ cache->next_search_offset = entry->offset + entry->len;
+ cache->cached = false;
+
+ return BTRFS_FIEMAP_FLUSH_CACHE;
+ }
+
+ entry = &cache->entries[cache->entries_pos];
+ entry->offset = cache->offset;
+ entry->phys = cache->phys;
+ entry->len = cache->len;
+ entry->flags = cache->flags;
+ cache->entries_pos++;
+ cache->extents_mapped++;
+
+ if (cache->extents_mapped == fieinfo->fi_extents_max) {
+ cache->cached = false;
+ return 1;
+ }
assign:
cache->cached = true;
cache->offset = offset;
@@ -2694,8 +2795,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path
* neighbour leaf).
* We also need the private clone because holding a read lock on an
* extent buffer of the subvolume's b+tree will make lockdep unhappy
- * when we call fiemap_fill_next_extent(), because that may cause a page
- * fault when filling the user space buffer with fiemap data.
+ * when we check if extents are shared, as backref walking may need to
+ * lock the same leaf we are processing.
*/
clone = btrfs_clone_extent_buffer(path->nodes[0]);
if (!clone)
@@ -2735,34 +2836,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
- u64 lockstart;
- u64 lockend;
u64 prealloc_len = 0;
bool delalloc;

- lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
- lockend = round_up(end, inode->root->fs_info->sectorsize);
-
- /*
- * We are only locking for the delalloc range because that's the
- * only thing that can change here. With fiemap we have a lock
- * on the inode, so no buffered or direct writes can happen.
- *
- * However mmaps and normal page writeback will cause this to
- * change arbitrarily. We have to lock the extent lock here to
- * make sure that nobody messes with the tree while we're doing
- * btrfs_find_delalloc_in_range.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
break;

@@ -2930,6 +3013,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
+ struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
@@ -2942,18 +3026,23 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
bool stopped = false;
int ret;

+ cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
+ cache.entries = kmalloc_array(cache.entries_size,
+ sizeof(struct btrfs_fiemap_entry),
+ GFP_KERNEL);
backref_ctx = btrfs_alloc_backref_share_check_ctx();
path = btrfs_alloc_path();
- if (!backref_ctx || !path) {
+ if (!cache.entries || !backref_ctx || !path) {
ret = -ENOMEM;
goto out;
}

+restart:
range_start = round_down(start, sectorsize);
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;

- btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
+ lock_extent(&inode->io_tree, range_start, range_end, &cached_state);

ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
@@ -3079,7 +3168,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
- /* fiemap_fill_next_extent() told us to stop. */
+ /* emit_fiemap_extent() told us to stop. */
stopped = true;
break;
}
@@ -3102,16 +3191,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}

check_eof_delalloc:
- /*
- * Release (and free) the path before emitting any final entries to
- * fiemap_fill_next_extent() to keep lockdep happy. This is because
- * once we find no more file extent items exist, we may have a
- * non-cloned leaf, and fiemap_fill_next_extent() can trigger page
- * faults when copying data to the user space buffer.
- */
- btrfs_free_path(path);
- path = NULL;
-
if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
@@ -3125,28 +3204,16 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
const u64 i_size = i_size_read(&inode->vfs_inode);

if (prev_extent_end < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
- u64 lockstart;
- u64 lockend;
bool delalloc;

- lockstart = round_down(prev_extent_end, sectorsize);
- lockend = round_up(i_size, sectorsize);
-
- /*
- * See the comment in fiemap_process_hole as to why
- * we're doing the locking here.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
@@ -3154,12 +3221,39 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
}

- ret = emit_last_fiemap_cache(fieinfo, &cache);
-
out_unlock:
- btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
+ if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
+ btrfs_release_path(path);
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+ len -= cache.next_search_offset - start;
+ start = cache.next_search_offset;
+ goto restart;
+ } else if (ret < 0) {
+ goto out;
+ }
+
+ /*
+ * Must free the path before emitting to the fiemap buffer because we
+ * may have a non-cloned leaf and if the fiemap buffer is memory mapped
+ * to a file, a write into it (through btrfs_page_mkwrite()) may trigger
+ * waiting for an ordered extent that in order to complete needs to
+ * modify that leaf, therefore leading to a deadlock.
+ */
+ btrfs_free_path(path);
+ path = NULL;
+
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+
+ ret = emit_last_fiemap_cache(fieinfo, &cache);
out:
free_extent_state(delalloc_cached_state);
+ kfree(cache.entries);
btrfs_free_backref_share_ctx(backref_ctx);
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca79c2b8adc4..1ac14223ffb5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7813,6 +7813,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
int ret;

ret = fiemap_prep(inode, fieinfo, start, &len, 0);
@@ -7838,7 +7839,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ret;
}

- return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
+ btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
+
+ /*
+ * We did an initial flush to avoid holding the inode's lock while
+ * triggering writeback and waiting for the completion of IO and ordered
+ * extents. Now after we locked the inode we do it again, because it's
+ * possible a new write may have happened in between those two steps.
+ */
+ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
+ ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
+ if (ret) {
+ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
+ return ret;
+ }
+ }
+
+ ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
+ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
+
+ return ret;
}

static int btrfs_writepages(struct address_space *mapping,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 522596060252..c7e52d980cd7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2886,12 +2886,9 @@ static void
nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

- spin_lock(&nn->client_lock);
clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
- put_client_renew_locked(clp);
- spin_unlock(&nn->client_lock);
+ drop_client(clp);
}

static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
@@ -6273,7 +6270,7 @@ deleg_reaper(struct nfsd_net *nn)
list_add(&clp->cl_ra_cblist, &cblist);

/* release in nfsd4_cb_recall_any_release */
- atomic_inc(&clp->cl_rpc_users);
+ kref_get(&clp->cl_nfsdfs.cl_ref);
set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
clp->cl_ra_time = ktime_get_boottime_seconds();
}
diff --git a/fs/pipe.c b/fs/pipe.c
index a234035cc375..ba4376341ddd 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -425,6 +425,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
bool was_empty = false;
bool wake_next_writer = false;

+ /*
+ * Reject writing to watch queue pipes before the point where we lock
+ * the pipe.
+ * Otherwise, lockdep would be unhappy if the caller already has another
+ * pipe locked.
+ * If we had to support locking a normal pipe and a notification pipe at
+ * the same time, we could set up lockdep annotations for that, but
+ * since we don't actually need that, it's simpler to just bail here.
+ */
+ if (pipe_has_watch_queue(pipe))
+ return -EXDEV;
+
/* Null write succeeds. */
if (unlikely(total_len == 0))
return 0;
@@ -437,11 +449,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
goto out;
}

- if (pipe_has_watch_queue(pipe)) {
- ret = -EXDEV;
- goto out;
- }
-
/*
* If it wasn't empty we try to merge new data into
* the last buffer.
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 15e1215bc4e5..1a9e705d6500 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -401,6 +401,7 @@ smb2_close_cached_fid(struct kref *ref)
{
struct cached_fid *cfid = container_of(ref, struct cached_fid,
refcount);
+ int rc;

spin_lock(&cfid->cfids->cfid_list_lock);
if (cfid->on_list) {
@@ -414,9 +415,10 @@ smb2_close_cached_fid(struct kref *ref)
cfid->dentry = NULL;

if (cfid->is_open) {
- SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
+ rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
cfid->fid.volatile_fid);
- atomic_dec(&cfid->tcon->num_remote_opens);
+ if (rc != -EBUSY && rc != -EAGAIN)
+ atomic_dec(&cfid->tcon->num_remote_opens);
}

free_cached_dir(cfid);
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 7206167f4184..6c85edb8635d 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
@@ -654,6 +656,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
}
#endif /* CONFIG_CIFS_STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
atomic_set(&tcon->num_smbs_sent, 0);
spin_lock(&tcon->stat_lock);
@@ -732,6 +736,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
}
#endif /* STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
i++;
seq_printf(m, "\n%d) %s", i, tcon->tree_name);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 2131638f26d0..fcb93a66e47c 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -159,6 +159,7 @@ struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
struct workqueue_struct *deferredclose_wq;
+struct workqueue_struct *serverclose_wq;
__u32 cifs_lock_secret;

/*
@@ -1877,6 +1878,13 @@ init_cifs(void)
goto out_destroy_cifsoplockd_wq;
}

+ serverclose_wq = alloc_workqueue("serverclose",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ if (!serverclose_wq) {
+ rc = -ENOMEM;
+ goto out_destroy_serverclose_wq;
+ }
+
rc = cifs_init_inodecache();
if (rc)
goto out_destroy_deferredclose_wq;
@@ -1951,6 +1959,8 @@ init_cifs(void)
destroy_workqueue(decrypt_wq);
out_destroy_cifsiod_wq:
destroy_workqueue(cifsiod_wq);
+out_destroy_serverclose_wq:
+ destroy_workqueue(serverclose_wq);
out_clean_proc:
cifs_proc_clean();
return rc;
@@ -1980,6 +1990,7 @@ exit_cifs(void)
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
destroy_workqueue(fileinfo_put_wq);
+ destroy_workqueue(serverclose_wq);
destroy_workqueue(cifsiod_wq);
cifs_proc_clean();
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 35a12413bbee..a878b1e5aa31 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -425,10 +425,10 @@ struct smb_version_operations {
/* set fid protocol-specific info */
void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
/* close a file */
- void (*close)(const unsigned int, struct cifs_tcon *,
+ int (*close)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* close a file, returning file attributes and timestamps */
- void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
+ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *pfile_info);
/* send a flush request to the server */
int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
@@ -1408,6 +1408,7 @@ struct cifsFileInfo {
bool invalidHandle:1; /* file closed via session abend */
bool swapfile:1;
bool oplock_break_cancelled:1;
+ bool offload:1; /* offload final part of _put to a wq */
unsigned int oplock_epoch; /* epoch from the lease break */
__u32 oplock_level; /* oplock/lease level from the lease break */
int count;
@@ -1416,6 +1417,7 @@ struct cifsFileInfo {
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
+ struct work_struct serverclose; /* work for serverclose */
struct delayed_work deferred;
bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
char *symlink_target;
@@ -2073,6 +2075,7 @@ extern struct workqueue_struct *decrypt_wq;
extern struct workqueue_struct *fileinfo_put_wq;
extern struct workqueue_struct *cifsoplockd_wq;
extern struct workqueue_struct *deferredclose_wq;
+extern struct workqueue_struct *serverclose_wq;
extern __u32 cifs_lock_secret;

extern mempool_t *cifs_mid_poolp;
@@ -2278,4 +2281,14 @@ struct smb2_compound_vars {
struct smb2_file_link_info link_info;
};

+static inline bool cifs_ses_exiting(struct cifs_ses *ses)
+{
+ bool ret;
+
+ spin_lock(&ses->ses_lock);
+ ret = ses->ses_status == SES_EXITING;
+ spin_unlock(&ses->ses_lock);
+ return ret;
+}
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 4c958129181d..97776dd12b6b 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -178,6 +178,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,

spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
if (!ses->chans[i].server)
@@ -3981,13 +3983,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}

static struct cifs_tcon *
-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
{
int rc;
struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
struct cifs_ses *ses;
struct cifs_tcon *tcon = NULL;
struct smb3_fs_context *ctx;
+ char *origin_fullpath = NULL;

ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx == NULL)
@@ -4011,6 +4014,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
ctx->sign = master_tcon->ses->sign;
ctx->seal = master_tcon->seal;
ctx->witness = master_tcon->use_witness;
+ ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;

rc = cifs_set_vol_auth(ctx, master_tcon->ses);
if (rc) {
@@ -4030,12 +4034,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}

+#ifdef CONFIG_CIFS_DFS_UPCALL
+ spin_lock(&master_tcon->tc_lock);
+ if (master_tcon->origin_fullpath) {
+ spin_unlock(&master_tcon->tc_lock);
+ origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
+ if (IS_ERR(origin_fullpath)) {
+ tcon = ERR_CAST(origin_fullpath);
+ origin_fullpath = NULL;
+ cifs_put_smb_ses(ses);
+ goto out;
+ }
+ } else {
+ spin_unlock(&master_tcon->tc_lock);
+ }
+#endif
+
tcon = cifs_get_tcon(ses, ctx);
if (IS_ERR(tcon)) {
cifs_put_smb_ses(ses);
goto out;
}

+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (origin_fullpath) {
+ spin_lock(&tcon->tc_lock);
+ tcon->origin_fullpath = origin_fullpath;
+ spin_unlock(&tcon->tc_lock);
+ origin_fullpath = NULL;
+ queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
+ dfs_cache_get_ttl() * HZ);
+ }
+#endif
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, ctx);
@@ -4044,11 +4075,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
out:
kfree(ctx->username);
kfree_sensitive(ctx->password);
+ kfree(origin_fullpath);
kfree(ctx);

return tcon;
}

+static struct cifs_tcon *
+cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+{
+ struct cifs_tcon *ret;
+
+ cifs_mount_lock();
+ ret = __cifs_construct_tcon(cifs_sb, fsuid);
+ cifs_mount_unlock();
+ return ret;
+}
+
struct cifs_tcon *
cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
{
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index 580a27a3a7e6..855468a32904 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;

*oplock = 0;
if (tcon->ses->server->oplocks)
@@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
return PTR_ERR(full_path);
}

+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
@@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
desired_access |= GENERIC_READ; /* is this too little? */
if (OPEN_FMODE(oflags) & FMODE_WRITE)
desired_access |= GENERIC_WRITE;
+ if (rdwr_for_fscache == 1)
+ desired_access |= GENERIC_READ;

disposition = FILE_OVERWRITE_IF;
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
create_options |= CREATE_OPTION_READONLY;

+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc) {
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access &= ~GENERIC_READ;
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
goto out;
}
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);

#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
/*
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index c711d5eb2987..53a8c633221b 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
*/
}

-static inline int cifs_convert_flags(unsigned int flags)
+static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
{
if ((flags & O_ACCMODE) == O_RDONLY)
return GENERIC_READ;
else if ((flags & O_ACCMODE) == O_WRONLY)
- return GENERIC_WRITE;
+ return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
else if ((flags & O_ACCMODE) == O_RDWR) {
/* GENERIC_ALL is too much permission to request
can cause unnecessary access denied on create */
@@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
int create_options = CREATE_NOT_DIR;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;

if (!server->ops->open)
return -ENOSYS;

- desired_access = cifs_convert_flags(f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);

/*********************************************************************
* open flag mapping table:
@@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
if (f_flags & O_DIRECT)
create_options |= CREATE_NO_BUFFER;

+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
};

rc = server->ops->open(xid, &oparms, oplock, buf);
- if (rc)
+ if (rc) {
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
return rc;
+ }
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);

/* TODO: Add support for calling posix query info but with passing in fid */
if (tcon->unix_ext)
@@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem)
}

static void cifsFileInfo_put_work(struct work_struct *work);
+void serverclose_work(struct work_struct *work);

struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock,
@@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+ INIT_WORK(&cfile->serverclose, serverclose_work);
INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
mutex_init(&cfile->fh_mutex);
spin_lock_init(&cfile->file_info_lock);
@@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
cifsFileInfo_put_final(cifs_file);
}

+void serverclose_work(struct work_struct *work)
+{
+ struct cifsFileInfo *cifs_file = container_of(work,
+ struct cifsFileInfo, serverclose);
+
+ struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
+
+ struct TCP_Server_Info *server = tcon->ses->server;
+ int rc = 0;
+ int retries = 0;
+ int MAX_RETRIES = 4;
+
+ do {
+ if (server->ops->close_getattr)
+ rc = server->ops->close_getattr(0, tcon, cifs_file);
+ else if (server->ops->close)
+ rc = server->ops->close(0, tcon, &cifs_file->fid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ retries++;
+ msleep(250);
+ }
+ } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
+ );
+
+ if (retries == MAX_RETRIES)
+ pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
+
+ if (cifs_file->offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+}
+
/**
* cifsFileInfo_put - release a reference of file priv data
*
@@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
struct cifs_fid fid = {};
struct cifs_pending_open open;
bool oplock_break_cancelled;
+ bool serverclose_offloaded = false;

spin_lock(&tcon->open_file_lock);
spin_lock(&cifsi->open_file_lock);
spin_lock(&cifs_file->file_info_lock);
+
+ cifs_file->offload = offload;
if (--cifs_file->count > 0) {
spin_unlock(&cifs_file->file_info_lock);
spin_unlock(&cifsi->open_file_lock);
@@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
unsigned int xid;
+ int rc = 0;

xid = get_xid();
if (server->ops->close_getattr)
- server->ops->close_getattr(xid, tcon, cifs_file);
+ rc = server->ops->close_getattr(xid, tcon, cifs_file);
else if (server->ops->close)
- server->ops->close(xid, tcon, &cifs_file->fid);
+ rc = server->ops->close(xid, tcon, &cifs_file->fid);
_free_xid(xid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ // Server close failed, hence offloading it as an async op
+ queue_work(serverclose_wq, &cifs_file->serverclose);
+ serverclose_offloaded = true;
+ }
}

if (oplock_break_cancelled)
@@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,

cifs_del_pending_open(&open);

- if (offload)
- queue_work(fileinfo_put_wq, &cifs_file->put);
- else
- cifsFileInfo_put_final(cifs_file);
+ // if serverclose has been offloaded to wq (on failure), it will
+ // handle offloading put as well. If serverclose not offloaded,
+ // we need to handle offloading put here.
+ if (!serverclose_offloaded) {
+ if (offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+ }
}

int cifs_open(struct inode *inode, struct file *file)
@@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file)
use_cache:
fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
file->f_mode & FMODE_WRITE);
- if (file->f_flags & O_DIRECT &&
- (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
- file->f_flags & O_APPEND))
- cifs_invalidate_cache(file_inode(file),
- FSCACHE_INVAL_DIO_WRITE);
+ if (!(file->f_flags & O_DIRECT))
+ goto out;
+ if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
+ goto out;
+ cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);

out:
free_dentry_path(page);
@@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
int disposition = FILE_OPEN;
int create_options = CREATE_NOT_DIR;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;

xid = get_xid();
mutex_lock(&cfile->fh_mutex);
@@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */

- desired_access = cifs_convert_flags(cfile->f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);

/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (cfile->f_flags & O_SYNC)
@@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &cfile->fid);

+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
/* indicate that we need to relock the file */
oparms.reconnect = true;
}
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(cfile->f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }

if (rc) {
mutex_unlock(&cfile->fh_mutex);
@@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
goto reopen_error_exit;
}

+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
reopen_success:
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index e4a6b240d226..58567ae617b9 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -37,7 +37,7 @@
#include "rfc1002pdu.h"
#include "fs_context.h"

-static DEFINE_MUTEX(cifs_mount_mutex);
+DEFINE_MUTEX(cifs_mount_mutex);

static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
@@ -752,9 +752,9 @@ static int smb3_get_tree(struct fs_context *fc)

if (err)
return err;
- mutex_lock(&cifs_mount_mutex);
+ cifs_mount_lock();
ret = smb3_get_tree_common(fc);
- mutex_unlock(&cifs_mount_mutex);
+ cifs_mount_unlock();
return ret;
}

diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index cf46916286d0..8cfc25b609b6 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -293,4 +293,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
#define MAX_CACHED_FIDS 16
extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);

+extern struct mutex cifs_mount_mutex;
+
+static inline void cifs_mount_lock(void)
+{
+ mutex_lock(&cifs_mount_mutex);
+}
+
+static inline void cifs_mount_unlock(void)
+{
+ mutex_unlock(&cifs_mount_mutex);
+}
+
#endif
diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
index e5cad149f5a2..a4ee801b2939 100644
--- a/fs/smb/client/fscache.c
+++ b/fs/smb/client/fscache.c
@@ -12,6 +12,16 @@
#include "cifs_fs_sb.h"
#include "cifsproto.h"

+/*
+ * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode().
+ */
+struct cifs_fscache_inode_key {
+
+ __le64 uniqueid; /* server inode number */
+ __le64 createtime; /* creation time on server */
+ u8 type; /* S_IFMT file type */
+} __packed;
+
static void cifs_fscache_fill_volume_coherency(
struct cifs_tcon *tcon,
struct cifs_fscache_volume_coherency_data *cd)
@@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
void cifs_fscache_get_inode_cookie(struct inode *inode)
{
struct cifs_fscache_inode_coherency_data cd;
+ struct cifs_fscache_inode_key key;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);

+ key.uniqueid = cpu_to_le64(cifsi->uniqueid);
+ key.createtime = cpu_to_le64(cifsi->createtime);
+ key.type = (inode->i_mode & S_IFMT) >> 12;
cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd);

cifsi->netfs.cache =
fscache_acquire_cookie(tcon->fscache, 0,
- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &key, sizeof(key),
&cd, sizeof(cd),
i_size_read(&cifsi->netfs.inode));
if (cifsi->netfs.cache)
diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h
index a3d73720914f..1f2ea9f5cc9a 100644
--- a/fs/smb/client/fscache.h
+++ b/fs/smb/client/fscache.h
@@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode,
__cifs_readahead_to_fscache(inode, pos, len);
}

+static inline bool cifs_fscache_enabled(struct inode *inode)
+{
+ return fscache_cookie_enabled(cifs_inode_cookie(inode));
+}
+
#else /* CONFIG_CIFS_FSCACHE */
static inline
void cifs_fscache_fill_coherency(struct inode *inode,
@@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
+static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }

static inline int cifs_fscache_query_occupancy(struct inode *inode,
pgoff_t first, unsigned int nr_pages,
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index cb9e719e67ae..fa6330d586e8 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1390,6 +1390,8 @@ cifs_find_inode(struct inode *inode, void *opaque)
{
struct cifs_fattr *fattr = opaque;

+ /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */
+
/* don't match inode with different uniqueid */
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index 73ededa8eba5..204dd7c47126 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -246,7 +246,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
- if (ses_it->Suid == out.session_id) {
+ spin_lock(&ses_it->ses_lock);
+ if (ses_it->ses_status != SES_EXITING &&
+ ses_it->Suid == out.session_id) {
ses = ses_it;
/*
* since we are using the session outside the crit
@@ -254,9 +256,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
* so increment its refcount
*/
cifs_smb_ses_inc_refcount(ses);
+ spin_unlock(&ses_it->ses_lock);
found = true;
goto search_end;
}
+ spin_unlock(&ses_it->ses_lock);
}
}
search_end:
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index c2137ea3c253..ef573e3f8e52 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -489,6 +489,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid != buf->Tid)
continue;
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 64e25233e85d..1aebcf95c195 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
}

-static void
+static int
cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- CIFSSMBClose(xid, tcon, fid->netfid);
+ return CIFSSMBClose(xid, tcon, fid->netfid);
}

static int
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index 82b84a4941dd..cc72be5a93a9 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
cifs_stats_inc(
@@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {

spin_lock(&tcon->open_file_lock);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 978a9f409857..04fea874d0a3 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -1392,14 +1392,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
memcpy(cfile->fid.create_guid, fid->create_guid, 16);
}

-static void
+static int
smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+ return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}

-static void
+static int
smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
{
@@ -1410,7 +1410,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, &file_inf);
if (rc)
- return;
+ return rc;

inode = d_inode(cfile->dentry);

@@ -1439,6 +1439,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,

/* End of file and Attributes should not have to be updated on close */
spin_unlock(&inode->i_lock);
+ return rc;
}

static int
@@ -2429,6 +2430,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)

spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
spin_lock(&tcon->tc_lock);
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 4d7d0bdf7a47..94bd4c6d2d68 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -3549,9 +3549,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
memcpy(&pbuf->network_open_info,
&rsp->network_open_info,
sizeof(pbuf->network_open_info));
+ atomic_dec(&tcon->num_remote_opens);
}

- atomic_dec(&tcon->num_remote_opens);
close_exit:
SMB2_close_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index 0ebf91ffa236..4464a62228cf 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -166,7 +166,8 @@ struct ksmbd_share_config_response {
__u16 force_uid;
__u16 force_gid;
__s8 share_name[KSMBD_REQ_MAX_SHARE_NAME];
- __u32 reserved[112]; /* Reserved room */
+ __u32 reserved[111]; /* Reserved room */
+ __u32 payload_sz;
__u32 veto_list_sz;
__s8 ____payload[];
};
diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
index 328a412259dc..a2f0a2edceb8 100644
--- a/fs/smb/server/mgmt/share_config.c
+++ b/fs/smb/server/mgmt/share_config.c
@@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
share->name = kstrdup(name, GFP_KERNEL);

if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
- share->path = kstrdup(ksmbd_share_config_path(resp),
+ int path_len = PATH_MAX;
+
+ if (resp->payload_sz)
+ path_len = resp->payload_sz - resp->veto_list_sz;
+
+ share->path = kstrndup(ksmbd_share_config_path(resp), path_len,
GFP_KERNEL);
if (share->path)
share->path_sz = strlen(share->path);
diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c
index 27a9dce3e03a..8600f32c981a 100644
--- a/fs/smb/server/smb2ops.c
+++ b/fs/smb/server/smb2ops.c
@@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;

+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
}
@@ -275,11 +280,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
SMB2_GLOBAL_CAP_DIRECTORY_LEASING;

- if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
- (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
- conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
-
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;

diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 199c31c275e5..924f08326eef 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -5631,8 +5631,9 @@ static int smb2_rename(struct ksmbd_work *work,
if (!file_info->ReplaceIfExists)
flags = RENAME_NOREPLACE;

- smb_break_all_levII_oplock(work, fp, 0);
rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
+ if (!rc)
+ smb_break_all_levII_oplock(work, fp, 0);
out:
kfree(new_name);
return rc;
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index f29bb03f0dc4..8752ac82c557 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -65,6 +65,7 @@ struct ipc_msg_table_entry {
struct hlist_node ipc_table_hlist;

void *response;
+ unsigned int msg_sz;
};

static struct delayed_work ipc_timer_work;
@@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz)
}

memcpy(entry->response, payload, sz);
+ entry->msg_sz = sz;
wake_up_interruptible(&entry->wait);
ret = 0;
break;
@@ -453,6 +455,34 @@ static int ipc_msg_send(struct ksmbd_ipc_msg *msg)
return ret;
}

+static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
+{
+ unsigned int msg_sz = entry->msg_sz;
+
+ if (entry->type == KSMBD_EVENT_RPC_REQUEST) {
+ struct ksmbd_rpc_command *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz;
+ } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) {
+ struct ksmbd_spnego_authen_response *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_spnego_authen_response) +
+ resp->session_key_len + resp->spnego_blob_len;
+ } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) {
+ struct ksmbd_share_config_response *resp = entry->response;
+
+ if (resp->payload_sz) {
+ if (resp->payload_sz < resp->veto_list_sz)
+ return -EINVAL;
+
+ msg_sz = sizeof(struct ksmbd_share_config_response) +
+ resp->payload_sz;
+ }
+ }
+
+ return entry->msg_sz != msg_sz ? -EINVAL : 0;
+}
+
static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
{
struct ipc_msg_table_entry entry;
@@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
ret = wait_event_interruptible_timeout(entry.wait,
entry.response != NULL,
IPC_WAIT_TIMEOUT);
+ if (entry.response) {
+ ret = ipc_validate_msg(&entry);
+ if (ret) {
+ kvfree(entry.response);
+ entry.response = NULL;
+ }
+ }
out:
down_write(&ipc_msg_table_lock);
hash_del(&entry.ipc_table_hlist);
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 1fb8f4df60cb..9848af78215b 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
if (!sbi->nls) {
vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
err = -EINVAL;
- goto fail_free;
+ goto fail_destroy_idr;
}
}

@@ -224,6 +224,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
+fail_destroy_idr:
idr_destroy(&sbi->ino_idr);
kfree(sbi);
return err;
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 31029f4f7be8..c4aabbf002f7 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
*/
#define kvm_pmu_update_vcpu_events(vcpu) \
do { \
- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
+ if (!has_vhe() && kvm_arm_support_pmu_v3()) \
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
} while (0)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index d0807ad43f93..6e950594215a 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -4,6 +4,11 @@
#ifndef _VIRTCHNL_H_
#define _VIRTCHNL_H_

+#include <linux/bitops.h>
+#include <linux/bits.h>
+#include <linux/overflow.h>
+#include <uapi/linux/if_ether.h>
+
/* Description:
* This header file describes the Virtual Function (VF) - Physical Function
* (PF) communication protocol used by the drivers for all devices starting
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b08d792fa95..2ebb5d4d43dc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1524,12 +1524,26 @@ struct bpf_link {
enum bpf_link_type type;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
- struct work_struct work;
+ /* rcu is used before freeing, work can be used to schedule that
+ * RCU-based freeing before that, so they never overlap
+ */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
};

struct bpf_link_ops {
void (*release)(struct bpf_link *link);
+ /* deallocate link resources callback, called without RCU grace period
+ * waiting
+ */
void (*dealloc)(struct bpf_link *link);
+ /* deallocate link resources callback, called after RCU grace period;
+ * if underlying BPF program is sleepable we go through tasks trace
+ * RCU GP and then "classic" RCU GP
+ */
+ void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);
diff --git a/include/linux/device.h b/include/linux/device.h
index 99496a0a5ddb..a070160fbcb8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1250,6 +1250,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);

/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 731beb3198c4..8215e193178a 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -250,7 +250,6 @@ struct io_ring_ctx {

struct io_submit_state submit_state;

- struct io_buffer_list *io_bl;
struct xarray io_bl_xa;

struct io_hash_table cancel_table_locked;
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 35f3a4a8ceb1..acf7e1a3f3de 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio)
/*
* Using folio_mapping() is quite slow because of the actual call
* instruction.
- * We know that secretmem pages are not compound and LRU so we can
+ * We know that secretmem pages are not compound, so we can
* save a couple of cycles here.
*/
- if (folio_test_large(folio) || !folio_test_lru(folio))
+ if (folio_test_large(folio))
return false;

mapping = (struct address_space *)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2922059908cc..9e61f6df6bc5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -736,8 +736,6 @@ typedef unsigned char *sk_buff_data_t;
* @list: queue head
* @ll_node: anchor in an llist (eg socket defer_list)
* @sk: Socket we are owned by
- * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
- * fragmentation management
* @dev: Device we arrived on/are leaving by
* @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
@@ -860,10 +858,7 @@ struct sk_buff {
struct llist_node ll_node;
};

- union {
- struct sock *sk;
- int ip_defrag_offset;
- };
+ struct sock *sk;

union {
ktime_t tstamp;
diff --git a/include/linux/udp.h b/include/linux/udp.h
index d04188714dca..94e63b269540 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -140,6 +140,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
}
}

+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
+#if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+#endif
+
+static inline bool udp_encap_needed(void)
+{
+ if (static_branch_unlikely(&udp_encap_needed_key))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (static_branch_unlikely(&udpv6_encap_needed_key))
+ return true;
+#endif
+
+ return false;
+}
+
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
if (!skb_is_gso(skb))
@@ -153,6 +171,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
!udp_test_bit(ACCEPT_FRAGLIST, sk))
return true;

+ /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
+ * land in a tunnel as the socket check in udp_gro_receive cannot be
+ * foolproof.
+ */
+ if (udp_encap_needed() &&
+ READ_ONCE(udp_sk(sk)->encap_rcv) &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
+ return true;
+
return false;
}

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0d231024570a..03e68a8e229f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -176,6 +176,15 @@ enum {
*/
HCI_QUIRK_USE_BDADDR_PROPERTY,

+ /* When this quirk is set, the Bluetooth Device Address provided by
+ * the 'local-bd-address' fwnode property is incorrectly specified in
+ * big-endian order.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BDADDR_PROPERTY_BROKEN,
+
/* When this quirk is set, the duplicate filtering during
* scanning is based on Bluetooth devices addresses. To allow
* RSSI based updates, restart scanning if needed.
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 01a73bf74fa1..6ecac01115d9 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
void (*delack_handler)(struct timer_list *),
void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk);

static inline void inet_csk_schedule_ack(struct sock *sk)
{
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 4d43adf18606..cd526fd31b45 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -39,7 +39,6 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64

#define RX_BUFFERS_PER_QUEUE 512
-#define MANA_RX_DATA_ALIGN 64

#define MAX_SEND_BUFFERS_PER_QUEUE 256

diff --git a/include/net/sock.h b/include/net/sock.h
index e70c903b04f3..25780942ec8b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
#endif
}

+static inline void sock_not_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
static inline bool sock_owned_by_user(const struct sock *sk)
{
sock_owned_by_me(sk);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index aed10bae50ac..2c0a9a98272c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -148,6 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
static void io_queue_sqe(struct io_kiocb *req);

struct kmem_cache *req_cachep;
+static struct workqueue_struct *iou_wq __ro_after_init;

static int __read_mostly sysctl_io_uring_disabled;
static int __read_mostly sysctl_io_uring_group = -1;
@@ -343,7 +344,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
err:
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
return NULL;
@@ -2934,7 +2934,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_wq_put_hash(ctx->hash_map);
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
}
@@ -3182,7 +3181,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
* noise and overhead, there's no discernable change in runtime
* over using system_wq.
*/
- queue_work(system_unbound_wq, &ctx->exit_work);
+ queue_work(iou_wq, &ctx->exit_work);
}

static int io_uring_release(struct inode *inode, struct file *file)
@@ -3430,14 +3429,15 @@ static void *io_uring_validate_mmap_request(struct file *file,
ptr = ctx->sq_sqes;
break;
case IORING_OFF_PBUF_RING: {
+ struct io_buffer_list *bl;
unsigned int bgid;

bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
- rcu_read_lock();
- ptr = io_pbuf_get_address(ctx, bgid);
- rcu_read_unlock();
- if (!ptr)
- return ERR_PTR(-EINVAL);
+ bl = io_pbuf_get_bl(ctx, bgid);
+ if (IS_ERR(bl))
+ return bl;
+ ptr = bl->buf_ring;
+ io_put_bl(ctx, bl);
break;
}
default:
@@ -4666,6 +4666,8 @@ static int __init io_uring_init(void)
offsetof(struct io_kiocb, cmd.data),
sizeof_field(struct io_kiocb, cmd.data), NULL);

+ iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
+
#ifdef CONFIG_SYSCTL
register_sysctl_init("kernel", kernel_io_uring_disabled_table);
#endif
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index e8516f3bbbaa..26a00920042c 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -17,8 +17,6 @@

#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))

-#define BGID_ARRAY 64
-
/* BIDs are addressed by a 16-bit field in a CQE */
#define MAX_BIDS_PER_BGID (1 << 16)

@@ -31,13 +29,9 @@ struct io_provide_buf {
__u16 bid;
};

-static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
- struct io_buffer_list *bl,
- unsigned int bgid)
+static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+ unsigned int bgid)
{
- if (bl && bgid < BGID_ARRAY)
- return &bl[bgid];
-
return xa_load(&ctx->io_bl_xa, bgid);
}

@@ -53,7 +47,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
{
lockdep_assert_held(&ctx->uring_lock);

- return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
+ return __io_buffer_get_list(ctx, bgid);
}

static int io_buffer_add_list(struct io_ring_ctx *ctx,
@@ -65,11 +59,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
* always under the ->uring_lock, but the RCU lookup from mmap does.
*/
bl->bgid = bgid;
- smp_store_release(&bl->is_ready, 1);
-
- if (bgid < BGID_ARRAY)
- return 0;
-
+ atomic_set(&bl->refs, 1);
return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
}

@@ -215,24 +205,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
return ret;
}

-static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
-{
- struct io_buffer_list *bl;
- int i;
-
- bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- INIT_LIST_HEAD(&bl[i].buf_list);
- bl[i].bgid = i;
- }
-
- smp_store_release(&ctx->io_bl, bl);
- return 0;
-}
-
/*
* Mark the given mapped range as free for reuse
*/
@@ -301,22 +273,22 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
return i;
}

+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ if (atomic_dec_and_test(&bl->refs)) {
+ __io_remove_buffers(ctx, bl, -1U);
+ kfree_rcu(bl, rcu);
+ }
+}
+
void io_destroy_buffers(struct io_ring_ctx *ctx)
{
struct io_buffer_list *bl;
unsigned long index;
- int i;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- if (!ctx->io_bl)
- break;
- __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
- }

xa_for_each(&ctx->io_bl_xa, index, bl) {
xa_erase(&ctx->io_bl_xa, bl->bgid);
- __io_remove_buffers(ctx, bl, -1U);
- kfree_rcu(bl, rcu);
+ io_put_bl(ctx, bl);
}

while (!list_empty(&ctx->io_buffers_pages)) {
@@ -485,12 +457,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)

io_ring_submit_lock(ctx, issue_flags);

- if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
- ret = io_init_bl_list(ctx);
- if (ret)
- goto err;
- }
-
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
@@ -503,14 +469,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
if (ret) {
/*
* Doesn't need rcu free as it was never visible, but
- * let's keep it consistent throughout. Also can't
- * be a lower indexed array group, as adding one
- * where lookup failed cannot happen.
+ * let's keep it consistent throughout.
*/
- if (p->bgid >= BGID_ARRAY)
- kfree_rcu(bl, rcu);
- else
- WARN_ON_ONCE(1);
+ kfree_rcu(bl, rcu);
goto err;
}
}
@@ -675,12 +636,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (reg.ring_entries >= 65536)
return -EINVAL;

- if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
- int ret = io_init_bl_list(ctx);
- if (ret)
- return ret;
- }
-
bl = io_buffer_get_list(ctx, reg.bgid);
if (bl) {
/* if mapped buffer ring OR classic exists, don't allow */
@@ -729,31 +684,40 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!bl->is_mapped)
return -EINVAL;

- __io_remove_buffers(ctx, bl, -1U);
- if (bl->bgid >= BGID_ARRAY) {
- xa_erase(&ctx->io_bl_xa, bl->bgid);
- kfree_rcu(bl, rcu);
- }
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+ io_put_bl(ctx, bl);
return 0;
}

-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid)
{
struct io_buffer_list *bl;
+ bool ret;

- bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
-
- if (!bl || !bl->is_mmap)
- return NULL;
/*
- * Ensure the list is fully setup. Only strictly needed for RCU lookup
- * via mmap, and in that case only for the array indexed groups. For
- * the xarray lookups, it's either visible and ready, or not at all.
+ * We have to be a bit careful here - we're inside mmap and cannot grab
+ * the uring_lock. This means the buffer_list could be simultaneously
+ * going away, if someone is trying to be sneaky. Look it up under rcu
+ * so we know it's not going away, and attempt to grab a reference to
+ * it. If the ref is already zero, then fail the mapping. If successful,
+ * the caller will call io_put_bl() to drop the the reference at at the
+ * end. This may then safely free the buffer_list (and drop the pages)
+ * at that point, vm_insert_pages() would've already grabbed the
+ * necessary vma references.
*/
- if (!smp_load_acquire(&bl->is_ready))
- return NULL;
-
- return bl->buf_ring;
+ rcu_read_lock();
+ bl = xa_load(&ctx->io_bl_xa, bgid);
+ /* must be a mmap'able buffer ring and have pages */
+ ret = false;
+ if (bl && bl->is_mmap)
+ ret = atomic_inc_not_zero(&bl->refs);
+ rcu_read_unlock();
+
+ if (ret)
+ return bl;
+
+ return ERR_PTR(-EINVAL);
}

/*
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 3d0cb6b8c1ed..8d7929369501 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -25,12 +25,12 @@ struct io_buffer_list {
__u16 head;
__u16 mask;

+ atomic_t refs;
+
/* ring mapped provided buffers */
__u8 is_mapped;
/* ring mapped provided buffers, but mmap'ed by application */
__u8 is_mmap;
- /* bl is visible from an RCU point of view for lookup */
- __u8 is_ready;
};

struct io_buffer {
@@ -60,7 +60,9 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);

void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);

-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid);

static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
{
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4b7d186c7622..4902a7487f07 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2866,17 +2866,46 @@ void bpf_link_inc(struct bpf_link *link)
atomic64_inc(&link->refcnt);
}

+static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
+{
+ struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
+
+ /* free bpf_link and its containing memory */
+ link->ops->dealloc_deferred(link);
+}
+
+static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_link_defer_dealloc_rcu_gp(rcu);
+ else
+ call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
+}
+
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
+ bool sleepable = false;
+
bpf_link_free_id(link->id);
if (link->prog) {
+ sleepable = link->prog->aux->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
- /* free bpf_link and its containing memory */
- link->ops->dealloc(link);
+ if (link->ops->dealloc_deferred) {
+ /* schedule BPF link deallocation; if underlying BPF program
+ * is sleepable, we need to first wait for RCU tasks trace
+ * sync, then go through "classic" RCU grace period
+ */
+ if (sleepable)
+ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ else
+ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+ }
+ if (link->ops->dealloc)
+ link->ops->dealloc(link);
}

static void bpf_link_put_deferred(struct work_struct *work)
@@ -3381,7 +3410,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,

static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.release = bpf_raw_tp_link_release,
- .dealloc = bpf_raw_tp_link_dealloc,
+ .dealloc_deferred = bpf_raw_tp_link_dealloc,
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 396c4c66932f..c9fc734989c6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6637,6 +6637,11 @@ static int check_stack_access_within_bounds(
err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */
+ if (!err && access_size < 0)
+ /* access_size should not be negative (or overflow an int); others checks
+ * along the way should have prevented such an access.
+ */
+ err = -EFAULT; /* invalid negative access size; integer overflow? */

if (err) {
if (tnum_is_const(reg->var_off)) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1d76f3b014ae..1e79084a9d9d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2639,7 +2639,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,

static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
- .dealloc = bpf_kprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};

@@ -3065,6 +3065,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)

umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
+ if (umulti_link->task)
+ put_task_struct(umulti_link->task);
+ path_put(&umulti_link->path);
}

static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
@@ -3072,16 +3075,13 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
struct bpf_uprobe_multi_link *umulti_link;

umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
- if (umulti_link->task)
- put_task_struct(umulti_link->task);
- path_put(&umulti_link->path);
kvfree(umulti_link->uprobes);
kfree(umulti_link);
}

static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
.release = bpf_uprobe_multi_link_release,
- .dealloc = bpf_uprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
};

static int uprobe_prog_run(struct bpf_uprobe *uprobe,
diff --git a/mm/memory.c b/mm/memory.c
index 78e05d3e9e4a..e44d4d887cf6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5674,6 +5674,10 @@ int follow_phys(struct vm_area_struct *vma,
goto out;
pte = ptep_get(ptep);

+ /* Never return PFNs of anon folios in COW mappings. */
+ if (vm_normal_folio(vma, address, pte))
+ goto unlock;
+
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;

diff --git a/net/9p/client.c b/net/9p/client.c
index e265a0ca6bdd..f7e90b4769bb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
received = rsize;
}

- p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);

if (non_zc) {
int n = copy_to_iter(dataptr, received, to);
@@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
int total = 0;
*err = 0;

- p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
- fid->fid, offset, iov_iter_count(from));
-
while (iov_iter_count(from)) {
int count = iov_iter_count(from);
int rsize = fid->iounit;
@@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
if (count < rsize)
rsize = count;

+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
+ fid->fid, offset, rsize, count);
+
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
@@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
written = rsize;
}

- p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);

p9_req_put(clnt, req);
iov_iter_revert(from, count - written - iov_iter_count(from));
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca..282ec581c072 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);

#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif

/*
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 233453807b50..ce3ff2fa72e5 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val == 0 || val > hdev->conn_info_max_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val > hdev->conn_info_max_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->conn_info_min_age = val;
hci_dev_unlock(hdev);

@@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val == 0 || val < hdev->conn_info_min_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val < hdev->conn_info_min_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->conn_info_max_age = val;
hci_dev_unlock(hdev);

@@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->sniff_min_interval = val;
hci_dev_unlock(hdev);

@@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->sniff_max_interval = val;
hci_dev_unlock(hdev);

@@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->le_conn_min_interval = val;
hci_dev_unlock(hdev);

@@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->le_conn_max_interval = val;
hci_dev_unlock(hdev);

@@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->le_adv_min_interval = val;
hci_dev_unlock(hdev);

@@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;

- if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }

- hci_dev_lock(hdev);
hdev->le_adv_max_interval = val;
hci_dev_unlock(hdev);

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2bb8ab9302a9..bb0e5902a3e6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3219,6 +3219,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
if (test_bit(HCI_ENCRYPT, &hdev->flags))
set_bit(HCI_CONN_ENCRYPT, &conn->flags);

+ /* "Link key request" completed ahead of "connect request" completes */
+ if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
+ ev->link_type == ACL_LINK) {
+ struct link_key *key;
+ struct hci_cp_read_enc_key_size cp;
+
+ key = hci_find_link_key(hdev, &ev->bdaddr);
+ if (key) {
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+
+ if (!(hdev->commands[20] & 0x10)) {
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ } else {
+ cp.handle = cpu_to_le16(conn->handle);
+ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
+ sizeof(cp), &cp)) {
+ bt_dev_err(hdev, "sending read key size failed");
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ }
+ }
+
+ hci_encrypt_cfm(conn, ev->status);
+ }
+ }
+
/* Get remote features */
if (conn->type == ACL_LINK) {
struct hci_cp_read_remote_features cp;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 9b241eabca3e..d6c0633bfe5b 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -3292,7 +3292,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
return;

- bacpy(&hdev->public_addr, &ba);
+ if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
+ baswap(&hdev->public_addr, &ba);
+ else
+ bacpy(&hdev->public_addr, &ba);
}

struct hci_init_stage {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index aa23479b20b2..ed62c1026fe9 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
struct ebt_table_info *newinfo;
struct ebt_replace tmp;

+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;

@@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
{
struct ebt_replace hlp;

+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;

@@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg,
{
struct compat_ebt_replace hlp;

+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;

diff --git a/net/core/gro.c b/net/core/gro.c
index 0759277dc14e..cefddf65f7db 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -195,8 +195,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
}

merge:
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 27d733c0f65e..8598466a3805 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;

+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;

+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);

diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index e5742f2a2d52..1b6457f357bd 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port)
netdev_update_features(master->dev);
dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
netdev_rx_handler_unregister(port->dev);
- dev_set_promiscuity(port->dev, -1);
+ if (!port->hsr->fwd_offloaded)
+ dev_set_promiscuity(port->dev, -1);
netdev_upper_dev_unlink(port->dev, master->dev);
}

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 762817d6c8d7..a018981b4514 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -289,6 +289,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
struct sock_reuseport *reuseport_cb;
struct inet_bind_hashbucket *head2;
struct inet_bind2_bucket *tb2;
+ bool conflict = false;
bool reuseport_cb_ok;

rcu_read_lock();
@@ -301,18 +302,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l

spin_lock(&head2->lock);

- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
- break;
+ inet_bind_bucket_for_each(tb2, &head2->chain) {
+ if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+ continue;

- if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok)) {
- spin_unlock(&head2->lock);
- return true;
+ if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
+ continue;
+
+ conflict = true;
+ break;
}

spin_unlock(&head2->lock);
- return false;
+
+ return conflict;
}

/*
@@ -774,6 +777,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);

+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 7072fc0783ef..c88c9034d630 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -24,6 +24,8 @@
#include <net/ip.h>
#include <net/ipv6.h>

+#include "../core/sock_destructor.h"
+
/* Use skb->cb to track consecutive/adjacent fragments coming at
* the end of the queue. Nodes in the rb-tree queue will
* contain "runs" of one or more adjacent fragments.
@@ -39,6 +41,7 @@ struct ipfrag_skb_cb {
};
struct sk_buff *next_frag;
int frag_run_len;
+ int ip_defrag_offset;
};

#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
@@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
*/
if (!last)
fragrun_create(q, skb); /* First fragment. */
- else if (last->ip_defrag_offset + last->len < end) {
+ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) {
/* This is the common case: skb goes to the end. */
/* Detect and discard overlaps. */
- if (offset < last->ip_defrag_offset + last->len)
+ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len)
return IPFRAG_OVERLAP;
- if (offset == last->ip_defrag_offset + last->len)
+ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
fragrun_append_to_last(q, skb);
else
fragrun_create(q, skb);
@@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,

parent = *rbn;
curr = rb_to_skb(parent);
- curr_run_end = curr->ip_defrag_offset +
+ curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
FRAG_CB(curr)->frag_run_len;
- if (end <= curr->ip_defrag_offset)
+ if (end <= FRAG_CB(curr)->ip_defrag_offset)
rbn = &parent->rb_left;
else if (offset >= curr_run_end)
rbn = &parent->rb_right;
- else if (offset >= curr->ip_defrag_offset &&
+ else if (offset >= FRAG_CB(curr)->ip_defrag_offset &&
end <= curr_run_end)
return IPFRAG_DUP;
else
@@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
rb_insert_color(&skb->rbnode, &q->rb_fragments);
}

- skb->ip_defrag_offset = offset;
+ FRAG_CB(skb)->ip_defrag_offset = offset;

return IPFRAG_OK;
}
@@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
struct sk_buff *parent)
{
struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
- struct sk_buff **nextp;
+ void (*destructor)(struct sk_buff *);
+ unsigned int orig_truesize = 0;
+ struct sk_buff **nextp = NULL;
+ struct sock *sk = skb->sk;
int delta;

+ if (sk && is_skb_wmem(skb)) {
+ /* TX: skb->sk might have been passed as argument to
+ * dst->output and must remain valid until tx completes.
+ *
+ * Move sk to reassembled skb and fix up wmem accounting.
+ */
+ orig_truesize = skb->truesize;
+ destructor = skb->destructor;
+ }
+
if (head != skb) {
fp = skb_clone(skb, GFP_ATOMIC);
- if (!fp)
- return NULL;
+ if (!fp) {
+ head = skb;
+ goto out_restore_sk;
+ }
FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
if (RB_EMPTY_NODE(&skb->rbnode))
FRAG_CB(parent)->next_frag = fp;
@@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
&q->rb_fragments);
if (q->fragments_tail == skb)
q->fragments_tail = fp;
+
+ if (orig_truesize) {
+ /* prevent skb_morph from releasing sk */
+ skb->sk = NULL;
+ skb->destructor = NULL;
+ }
skb_morph(skb, head);
FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
rb_replace_node(&head->rbnode, &skb->rbnode,
@@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
consume_skb(head);
head = skb;
}
- WARN_ON(head->ip_defrag_offset != 0);
+ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);

delta = -head->truesize;

/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
- return NULL;
+ goto out_restore_sk;

delta += head->truesize;
if (delta)
@@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,

clone = alloc_skb(0, GFP_ATOMIC);
if (!clone)
- return NULL;
+ goto out_restore_sk;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
@@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
nextp = &skb_shinfo(head)->frag_list;
}

+out_restore_sk:
+ if (orig_truesize) {
+ int ts_delta = head->truesize - orig_truesize;
+
+ /* if this reassembled skb is fragmented later,
+ * fraglist skbs will get skb->sk assigned from head->sk,
+ * and each frag skb will be released via sock_wfree.
+ *
+ * Update sk_wmem_alloc.
+ */
+ head->sk = sk;
+ head->destructor = destructor;
+ refcount_add(ts_delta, &sk->sk_wmem_alloc);
+ }
+
return nextp;
}
EXPORT_SYMBOL(inet_frag_reasm_prepare);
@@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
+ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL;
+ const unsigned int head_truesize = head->truesize;
struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
@@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
head->prev = NULL;
head->tstamp = q->stamp;
head->mono_delivery_time = q->mono_delivery_time;
+
+ if (sk)
+ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
}
EXPORT_SYMBOL(inet_frag_reasm_finish);

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a4941f53b523..fb947d1613fe 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}

skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;

insert_error:
@@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
struct ipq *qp;

__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
- skb_orphan(skb);

/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5169c3c72cff..f21a1a540372 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
tpi->flags | TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
} else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2407066b0fec..b150c9929b12 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;

+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;

@@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;

+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7da1df4997d0..487670759578 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;

+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;

@@ -1492,6 +1494,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;

+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 68bb8d6bcc11..f8df35f7352a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout)
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 848072793fa9..70a9a4a48216 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -584,6 +584,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
}

DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
void udp_encap_enable(void)
{
static_branch_inc(&udp_encap_needed_key);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6c95d28d0c4a..c3d67423ae18 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;

- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
p->truesize += skb->truesize;
p->len += skb->len;

@@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
unsigned int off = skb_gro_offset(skb);
int flush = 1;

- /* we can do L4 aggregation only if the packet can't land in a tunnel
- * otherwise we could corrupt the inner stream
+ /* We can do L4 aggregation only if the packet can't land in a tunnel
+ * otherwise we could corrupt the inner stream. Detecting such packets
+ * cannot be foolproof and the aggregation might still happen in some
+ * cases. Such packets should be caught in udp_unexpected_gso later.
*/
NAPI_GRO_CB(skb)->is_flist = 0;
if (!sk || !udp_sk(sk)->gro_receive) {
+ /* If the packet was locally encapsulated in a UDP tunnel that
+ * wasn't detected above, do not GRO.
+ */
+ if (skb->encapsulation)
+ goto out;
+
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;

@@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;

- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);

return 0;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4fc2cae0d116..54294f6a8ec5 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -645,19 +645,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (!w) {
/* New dump:
*
- * 1. hook callback destructor.
- */
- cb->args[3] = (long)cb->done;
- cb->done = fib6_dump_done;
-
- /*
- * 2. allocate and initialize walker.
+ * 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
w->func = fib6_dump_node;
cb->args[2] = (long)w;
+
+ /* 2. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
}

arg.skb = skb;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 070d87abf7c0..26c3287beb29 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
struct ip6_tnl *tunnel;
u8 ver;

+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index fd9f049d6d41..636b360311c5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;

+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;

@@ -1501,6 +1503,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;

+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b2dd48911c8d..efbec7ee27d0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
}

skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;

insert_error:
@@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);

- skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 438476a31313..d31beb65db08 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -450,7 +450,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto try_again;
}

-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_inc(&udpv6_encap_needed_key);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b95ba241ebe..626d7b362dc7 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;

- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);

return 0;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b54951ae07aa..01ac690af779 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
return READ_ONCE(msk->wnd_end);
}

-static bool mptcp_is_tcpsk(struct sock *sk)
+static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
- struct socket *sock = sk->sk_socket;
-
- if (unlikely(sk->sk_prot == &tcp_prot)) {
- /* we are being invoked after mptcp_accept() has
- * accepted a non-mp-capable flow: sk is a tcp_sk,
- * not an mptcp one.
- *
- * Hand the socket over to tcp so all further socket ops
- * bypass mptcp.
- */
- WRITE_ONCE(sock->ops, &inet_stream_ops);
- return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- } else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- WRITE_ONCE(sock->ops, &inet6_stream_ops);
- return true;
+ if (sk->sk_prot == &tcpv6_prot)
+ return &inet6_stream_ops;
#endif
- }
-
- return false;
+ WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ return &inet_stream_ops;
}

static int __mptcp_socket_create(struct mptcp_sock *msk)
@@ -3328,44 +3314,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
}

-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
- bool kern)
-{
- struct sock *newsk;
-
- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, err, kern);
- if (!newsk)
- return NULL;
-
- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
- if (sk_is_mptcp(newsk)) {
- struct mptcp_subflow_context *subflow;
- struct sock *new_mptcp_sock;
-
- subflow = mptcp_subflow_ctx(newsk);
- new_mptcp_sock = subflow->conn;
-
- /* is_mptcp should be false if subflow->conn is missing, see
- * subflow_syn_recv_sock()
- */
- if (WARN_ON_ONCE(!new_mptcp_sock)) {
- tcp_sk(newsk)->is_mptcp = 0;
- goto out;
- }
-
- newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- } else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
- }
-
-out:
- newsk->sk_kern_sock = kern;
- return newsk;
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
{
struct mptcp_subflow_context *subflow, *tmp;
@@ -3802,7 +3750,6 @@ static struct proto mptcp_prot = {
.connect = mptcp_connect,
.disconnect = mptcp_disconnect,
.close = mptcp_close,
- .accept = mptcp_accept,
.setsockopt = mptcp_setsockopt,
.getsockopt = mptcp_getsockopt,
.shutdown = mptcp_shutdown,
@@ -3912,18 +3859,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk)
return -EINVAL;

- newsk = mptcp_accept(ssk, flags, &err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, &err, kern);
if (!newsk)
return err;

- lock_sock(newsk);
-
- __inet_accept(sock, newsock, newsk);
- if (!mptcp_is_tcpsk(newsock->sk)) {
- struct mptcp_sock *msk = mptcp_sk(newsk);
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+ if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
+ struct sock *new_mptcp_sock;
+
+ subflow = mptcp_subflow_ctx(newsk);
+ new_mptcp_sock = subflow->conn;
+
+ /* is_mptcp should be false if subflow->conn is missing, see
+ * subflow_syn_recv_sock()
+ */
+ if (WARN_ON_ONCE(!new_mptcp_sock)) {
+ tcp_sk(newsk)->is_mptcp = 0;
+ goto tcpfallback;
+ }
+
+ newsk = new_mptcp_sock;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);

set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk = mptcp_sk(newsk);
msk->in_accept_queue = 0;

/* set ssk->sk_socket of accept()ed flows to mptcp socket.
@@ -3945,6 +3910,19 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (unlikely(list_is_singular(&msk->conn_list)))
mptcp_set_state(newsk, TCP_CLOSE);
}
+ } else {
+tcpfallback:
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
+ /* we are being invoked after accepting a non-mp-capable
+ * flow: sk is a tcp_sk, not an mptcp one.
+ *
+ * Hand the socket over to tcp so all further socket ops
+ * bypass mptcp.
+ */
+ WRITE_ONCE(newsock->sk->sk_socket->ops,
+ mptcp_fallback_tcp_ops(newsock->sk));
}
release_sock(newsk);

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ab41700bee68..23ee96c6abcb 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -905,6 +905,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
return child;

fallback:
+ if (fallback)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
mptcp_subflow_drop_ctx(child);
return child;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f10419ba6e0b..2a4649df8f08 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1200,6 +1200,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
__NFT_TABLE_F_WAS_AWAKEN)

+static bool nft_table_pending_update(const struct nft_ctx *ctx)
+{
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ struct nft_trans *trans;
+
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return true;
+
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
+ if (trans->ctx.table == ctx->table &&
+ ((trans->msg_type == NFT_MSG_NEWCHAIN &&
+ nft_trans_chain_update(trans)) ||
+ (trans->msg_type == NFT_MSG_DELCHAIN &&
+ nft_is_base_chain(trans->ctx.chain))))
+ return true;
+ }
+
+ return false;
+}
+
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
@@ -1223,7 +1243,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return -EOPNOTSUPP;

/* No dormant off/on/off/on games in single transaction */
- if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ if (nft_table_pending_update(ctx))
return -EINVAL;

trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
@@ -2420,6 +2440,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};

+ if (table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
if (flags & NFT_CHAIN_BINDING)
return -EOPNOTSUPP;

@@ -2621,6 +2644,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
}

+ if (table->flags & __NFT_TABLE_F_UPDATE &&
+ !list_empty(&hook.list)) {
+ NL_SET_BAD_ATTR(extack, attr);
+ err = -EOPNOTSUPP;
+ goto err_hooks;
+ }
+
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
nft_is_base_chain(chain) &&
!list_empty(&hook.list)) {
@@ -2850,6 +2880,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
struct nft_trans *trans;
int err;

+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EOPNOTSUPP;
+
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
@@ -2934,7 +2967,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);

if (nla[NFTA_CHAIN_HOOK]) {
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN ||
+ chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;

if (nft_is_base_chain(chain)) {
@@ -8134,11 +8168,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
return err;
}

+/* call under rcu_read_lock */
static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
{
const struct nf_flowtable_type *type;

- list_for_each_entry(type, &nf_tables_flowtables, list) {
+ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
if (family == type->family)
return type;
}
@@ -8150,9 +8185,13 @@ nft_flowtable_type_get(struct net *net, u8 family)
{
const struct nf_flowtable_type *type;

+ rcu_read_lock();
type = __nft_flowtable_type_get(family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();

lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -10053,9 +10092,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
if (nft_trans_chain_update(trans)) {
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
&nft_trans_chain_hooks(trans));
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
} else {
nft_chain_del(trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
@@ -10294,10 +10335,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
+ int err = 0;

if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
- return -EAGAIN;
+ err = -EAGAIN;

list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
@@ -10327,9 +10369,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
@@ -10483,12 +10527,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nf_tables_abort_release(trans);
}

- if (action == NFNL_ABORT_AUTOLOAD)
- nf_tables_module_autoload(net);
- else
- nf_tables_module_autoload_cleanup(net);
-
- return 0;
+ return err;
}

static int nf_tables_abort(struct net *net, struct sk_buff *skb,
@@ -10501,6 +10540,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
gc_seq = nft_gc_seq_begin(nft_net);
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);
+
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ /* module autoload needs to happen after GC sequence update because it
+ * temporarily releases and grabs mutex again.
+ */
+ if (action == NFNL_ABORT_AUTOLOAD)
+ nf_tables_module_autoload(net);
+ else
+ nf_tables_module_autoload_cleanup(net);
+
mutex_unlock(&nft_net->commit_mutex);

return ret;
@@ -11301,9 +11351,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)

gc_seq = nft_gc_seq_begin(nft_net);

- if (!list_empty(&nft_net->commit_list) ||
- !list_empty(&nft_net->module_list))
- __nf_tables_abort(net, NFNL_ABORT_NONE);
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ if (!list_empty(&nft_net->module_list))
+ nf_tables_module_autoload_cleanup(net);

__nft_release_tables(net);

@@ -11395,6 +11446,7 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
+ nf_tables_trans_destroy_flush_work();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 12684d835cb5..772ddb5824d9 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work)
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);

+ if (!nci_plen(skb->data)) {
+ kfree_skb(skb);
+ break;
+ }
+
/* Process frame */
switch (nci_mt(skb->data)) {
case NCI_MT_RSP_PKT:
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index a4e3c5de998b..00dbcd4d28e6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
}
ret = PTR_ERR(trans_private);
/* Trigger connection so that its ready for the next retry */
- if (ret == -ENODEV)
+ if (ret == -ENODEV && cp)
rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index dffa990a9629..e34f1be15164 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_skbmod *d = to_skbmod(a);
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbmod_params *p;
- struct tc_skbmod opt = {
- .index = d->tcf_index,
- .refcnt = refcount_read(&d->tcf_refcnt) - ref,
- .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
- };
+ struct tc_skbmod opt;
struct tcf_t t;

+ memset(&opt, 0, sizeof(opt));
+ opt.index = d->tcf_index;
+ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
spin_lock_bh(&d->tcf_lock);
opt.action = d->tcf_action;
p = rcu_dereference_protected(d->skbmod_p,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e9eaf637220e..5f25a2595add 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
!qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
- sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
+ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
WARN_ON_ONCE(parentid != TC_H_ROOT);
break;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e0ce4276274b..933e12e3a55c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1216,15 +1216,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
- *
- * Note that the send is non-blocking. The caller has incremented
- * the reference count on each page backing the RPC message, and
- * the network layer will "put" these pages when transmission is
- * complete.
- *
- * This is safe for our RPC services because the memory backing
- * the head and tail components is never kmalloc'd. These always
- * come from pages in the svc_rqst::rq_pages array.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
@@ -1254,6 +1245,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
+ page_frag_free(buf);
if (ret < 0)
return ret;
*sentp += ret;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index acf5bb74fd38..df166f6afad8 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk,
if (unlikely(flags & MSG_ERRQUEUE))
return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);

- psock = sk_psock_get(sk);
err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
if (err < 0)
return err;
+ psock = sk_psock_get(sk);
bpf_strp_enabled = sk_psock_strp_enabled(psock);

/* If crypto failed the connection is broken */
@@ -2152,12 +2152,15 @@ int tls_sw_recvmsg(struct sock *sk,
}

/* Drain records from the rx_list & copy if required */
- if (is_peek || is_kvec)
+ if (is_peek)
err = process_rx_list(ctx, msg, &control, copied + peeked,
decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
async_copy_bytes, is_peek, NULL);
+
+ /* we could have copied less than we wanted, and possibly nothing */
+ decrypted += max(err, 0) - async_copy_bytes;
}

copied += decrypted;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index a64bf601b480..2925f5d27ad3 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -109,7 +109,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
if (!skb)
break;

- virtio_transport_deliver_tap_pkt(skb);
reply = virtio_vsock_skb_reply(skb);

sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
@@ -128,6 +127,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
break;
}

+ virtio_transport_deliver_tap_pkt(skb);
+
if (reply) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
int val;
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 0669bac5e900..3f899cc7e99a 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -414,8 +414,8 @@ class PrinterRST(Printer):
version = version.stdout.decode().rstrip()
except:
try:
- version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot,
- capture_output=True, check=True)
+ version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'],
+ cwd=linuxRoot, capture_output=True, check=True)
version = version.stdout.decode().rstrip()
except:
return 'Linux'
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index c9e38ad937fd..3c54125eb373 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
hostprogs-always-y += modpost mk_elfconfig
always-y += empty.o

-modpost-objs := modpost.o file2alias.o sumversion.o
+modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o

devicetable-offsets-file := devicetable-offsets.h

@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s

# dependencies on generated files need to be listed explicitly

-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
+$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
$(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)

quiet_cmd_elfconfig = MKELF $@
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 5191fdbd3fa2..7d53942445d7 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -22,7 +22,6 @@
#include <errno.h>
#include "modpost.h"
#include "../../include/linux/license.h"
-#include "../../include/linux/module_symbol.h"

static bool module_enabled;
/* Are we using CONFIG_MODVERSIONS? */
@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
*p = TO_NATIVE(*p);
}

+ symsearch_init(info);
+
return 1;
}

static void parse_elf_finish(struct elf_info *info)
{
+ symsearch_finish(info);
release_file(info->hdr, info->size);
}

@@ -1042,75 +1044,16 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
return 1;
}

-/*
- * If there's no name there, ignore it; likewise, ignore it if it's
- * one of the magic symbols emitted used by current tools.
- *
- * Otherwise if find_symbols_between() returns those symbols, they'll
- * fail the whitelist tests and cause lots of false alarms ... fixable
- * only by merging __exit and __init sections into __text, bloating
- * the kernel (which is especially evil on embedded platforms).
- */
-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
-{
- const char *name = elf->strtab + sym->st_name;
-
- if (!name || !strlen(name))
- return 0;
- return !is_mapping_symbol(name);
-}
-
-/* Look up the nearest symbol based on the section and the address */
-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
- unsigned int secndx, bool allow_negative,
- Elf_Addr min_distance)
-{
- Elf_Sym *sym;
- Elf_Sym *near = NULL;
- Elf_Addr sym_addr, distance;
- bool is_arm = (elf->hdr->e_machine == EM_ARM);
-
- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
- if (get_secindex(elf, sym) != secndx)
- continue;
- if (!is_valid_name(elf, sym))
- continue;
-
- sym_addr = sym->st_value;
-
- /*
- * For ARM Thumb instruction, the bit 0 of st_value is set
- * if the symbol is STT_FUNC type. Mask it to get the address.
- */
- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
- sym_addr &= ~1;
-
- if (addr >= sym_addr)
- distance = addr - sym_addr;
- else if (allow_negative)
- distance = sym_addr - addr;
- else
- continue;
-
- if (distance <= min_distance) {
- min_distance = distance;
- near = sym;
- }
-
- if (min_distance == 0)
- break;
- }
- return near;
-}
-
static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
unsigned int secndx)
{
- return find_nearest_sym(elf, addr, secndx, false, ~0);
+ return symsearch_find_nearest(elf, addr, secndx, false, ~0);
}

static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
{
+ Elf_Sym *new_sym;
+
/* If the supplied symbol has a valid name, return it */
if (is_valid_name(elf, sym))
return sym;
@@ -1119,7 +1062,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
* Strive to find a better symbol name, but the resulting name may not
* match the symbol referenced in the original code.
*/
- return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20);
+ new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
+ true, 20);
+ return new_sym ? new_sym : sym;
}

static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 5f94c2c9f2d9..6413f26fcb6b 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -10,6 +10,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <elf.h>
+#include "../../include/linux/module_symbol.h"

#include "list.h"
#include "elfconfig.h"
@@ -128,6 +129,8 @@ struct elf_info {
* take shndx from symtab_shndx_start[N] instead */
Elf32_Word *symtab_shndx_start;
Elf32_Word *symtab_shndx_stop;
+
+ struct symsearch *symsearch;
};

/* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
return index;
}

+/*
+ * If there's no name there, ignore it; likewise, ignore it if it's
+ * one of the magic symbols emitted used by current tools.
+ *
+ * Internal symbols created by tools should be ignored by modpost.
+ */
+static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+{
+ const char *name = elf->strtab + sym->st_name;
+
+ if (!name || !strlen(name))
+ return 0;
+ return !is_mapping_symbol(name);
+}
+
+/* symsearch.c */
+void symsearch_init(struct elf_info *elf);
+void symsearch_finish(struct elf_info *elf);
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance);
+
/* file2alias.c */
void handle_moddevtable(struct module *mod, struct elf_info *info,
Elf_Sym *sym, const char *symname);
diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
new file mode 100644
index 000000000000..aa4ed51f9960
--- /dev/null
+++ b/scripts/mod/symsearch.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Helper functions for finding the symbol in an ELF which is "nearest"
+ * to a given address.
+ */
+
+#include "modpost.h"
+
+struct syminfo {
+ unsigned int symbol_index;
+ unsigned int section_index;
+ Elf_Addr addr;
+};
+
+/*
+ * Container used to hold an entire binary search table.
+ * Entries in table are ascending, sorted first by section_index,
+ * then by addr, and last by symbol_index. The sorting by
+ * symbol_index is used to ensure predictable behavior when
+ * multiple symbols are present with the same address; all
+ * symbols past the first are effectively ignored, by eliding
+ * them in symsearch_fixup().
+ */
+struct symsearch {
+ unsigned int table_size;
+ struct syminfo table[];
+};
+
+static int syminfo_compare(const void *s1, const void *s2)
+{
+ const struct syminfo *sym1 = s1;
+ const struct syminfo *sym2 = s2;
+
+ if (sym1->section_index > sym2->section_index)
+ return 1;
+ if (sym1->section_index < sym2->section_index)
+ return -1;
+ if (sym1->addr > sym2->addr)
+ return 1;
+ if (sym1->addr < sym2->addr)
+ return -1;
+ if (sym1->symbol_index > sym2->symbol_index)
+ return 1;
+ if (sym1->symbol_index < sym2->symbol_index)
+ return -1;
+ return 0;
+}
+
+static unsigned int symbol_count(struct elf_info *elf)
+{
+ unsigned int result = 0;
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym))
+ result++;
+ }
+ return result;
+}
+
+/*
+ * Populate the search array that we just allocated.
+ * Be slightly paranoid here. The ELF file is mmap'd and could
+ * conceivably change between symbol_count() and symsearch_populate().
+ * If we notice any difference, bail out rather than potentially
+ * propagating errors or crashing.
+ */
+static void symsearch_populate(struct elf_info *elf,
+ struct syminfo *table,
+ unsigned int table_size)
+{
+ bool is_arm = (elf->hdr->e_machine == EM_ARM);
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym)) {
+ if (table_size-- == 0)
+ fatal("%s: size mismatch\n", __func__);
+ table->symbol_index = sym - elf->symtab_start;
+ table->section_index = get_secindex(elf, sym);
+ table->addr = sym->st_value;
+
+ /*
+ * For ARM Thumb instruction, the bit 0 of st_value is
+ * set if the symbol is STT_FUNC type. Mask it to get
+ * the address.
+ */
+ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+ table->addr &= ~1;
+
+ table++;
+ }
+ }
+
+ if (table_size != 0)
+ fatal("%s: size mismatch\n", __func__);
+}
+
+/*
+ * Do any fixups on the table after sorting.
+ * For now, this just finds adjacent entries which have
+ * the same section_index and addr, and it propagates
+ * the first symbol_index over the subsequent entries,
+ * so that only one symbol_index is seen for any given
+ * section_index and addr. This ensures that whether
+ * we're looking at an address from "above" or "below"
+ * that we see the same symbol_index.
+ * This does leave some duplicate entries in the table;
+ * in practice, these are a small fraction of the
+ * total number of entries, and they are harmless to
+ * the binary search algorithm other than a few occasional
+ * unnecessary comparisons.
+ */
+static void symsearch_fixup(struct syminfo *table, unsigned int table_size)
+{
+ /* Don't look at index 0, it will never change. */
+ for (unsigned int i = 1; i < table_size; i++) {
+ if (table[i].addr == table[i - 1].addr &&
+ table[i].section_index == table[i - 1].section_index) {
+ table[i].symbol_index = table[i - 1].symbol_index;
+ }
+ }
+}
+
+void symsearch_init(struct elf_info *elf)
+{
+ unsigned int table_size = symbol_count(elf);
+
+ elf->symsearch = NOFAIL(malloc(sizeof(struct symsearch) +
+ sizeof(struct syminfo) * table_size));
+ elf->symsearch->table_size = table_size;
+
+ symsearch_populate(elf, elf->symsearch->table, table_size);
+ qsort(elf->symsearch->table, table_size,
+ sizeof(struct syminfo), syminfo_compare);
+
+ symsearch_fixup(elf->symsearch->table, table_size);
+}
+
+void symsearch_finish(struct elf_info *elf)
+{
+ free(elf->symsearch);
+ elf->symsearch = NULL;
+}
+
+/*
+ * Find the syminfo which is in secndx and "nearest" to addr.
+ * allow_negative: allow returning a symbol whose address is > addr.
+ * min_distance: ignore symbols which are further away than this.
+ *
+ * Returns a pointer into the symbol table for success.
+ * Returns NULL if no legal symbol is found within the requested range.
+ */
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance)
+{
+ unsigned int hi = elf->symsearch->table_size;
+ unsigned int lo = 0;
+ struct syminfo *table = elf->symsearch->table;
+ struct syminfo target;
+
+ target.addr = addr;
+ target.section_index = secndx;
+ target.symbol_index = ~0; /* compares greater than any actual index */
+ while (hi > lo) {
+ unsigned int mid = lo + (hi - lo) / 2; /* Avoids overflow */
+
+ if (syminfo_compare(&table[mid], &target) > 0)
+ hi = mid;
+ else
+ lo = mid + 1;
+ }
+
+ /*
+ * table[hi], if it exists, is the first entry in the array which
+ * lies beyond target. table[hi - 1], if it exists, is the last
+ * entry in the array which comes before target, including the
+ * case where it perfectly matches the section and the address.
+ *
+ * Note -- if the address we're looking up falls perfectly
+ * in the middle of two symbols, this is written to always
+ * prefer the symbol with the lower address.
+ */
+ Elf_Sym *result = NULL;
+
+ if (allow_negative &&
+ hi < elf->symsearch->table_size &&
+ table[hi].section_index == secndx &&
+ table[hi].addr - addr <= min_distance) {
+ min_distance = table[hi].addr - addr;
+ result = &elf->symtab_start[table[hi].symbol_index];
+ }
+ if (hi > 0 &&
+ table[hi - 1].section_index == secndx &&
+ addr - table[hi - 1].addr <= min_distance) {
+ result = &elf->symtab_start[table[hi - 1].symbol_index];
+ }
+ return result;
+}
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 6fa640263216..2c23a5a28608 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -2135,7 +2135,6 @@ static struct file_system_type sel_fs_type = {
.kill_sb = sel_kill_sb,
};

-static struct vfsmount *selinuxfs_mount __ro_after_init;
struct path selinux_null __ro_after_init;

static int __init init_sel_fs(void)
@@ -2157,18 +2156,21 @@ static int __init init_sel_fs(void)
return err;
}

- selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
- if (IS_ERR(selinuxfs_mount)) {
+ selinux_null.mnt = kern_mount(&sel_fs_type);
+ if (IS_ERR(selinux_null.mnt)) {
pr_err("selinuxfs: could not mount!\n");
- err = PTR_ERR(selinuxfs_mount);
- selinuxfs_mount = NULL;
+ err = PTR_ERR(selinux_null.mnt);
+ selinux_null.mnt = NULL;
+ return err;
}
+
selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
&null_name);
if (IS_ERR(selinux_null.dentry)) {
pr_err("selinuxfs: could not lookup null!\n");
err = PTR_ERR(selinux_null.dentry);
selinux_null.dentry = NULL;
+ return err;
}

return err;
diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c
index d36234b88fb4..941bfbf812ed 100644
--- a/sound/pci/emu10k1/emu10k1_callback.c
+++ b/sound/pci/emu10k1/emu10k1_callback.c
@@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw,
/* check if sample is finished playing (non-looping only) */
if (bp != best + V_OFF && bp != best + V_FREE &&
(vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) {
- val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64;
+ val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch);
if (val >= vp->reg.loopstart)
bp = best + V_OFF;
}
@@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp)

map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0);

- addr = vp->reg.start + 64;
+ addr = vp->reg.start;
temp = vp->reg.parm.filterQ;
ccca = (temp << 28) | addr;
if (vp->apitch < 0xe400)
@@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp)
/* Q & current address (Q 4bit value, MSB) */
CCCA, ccca,

- /* cache */
- CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64),
-
/* reset volume */
VTFT, vtarget | vp->ftarget,
CVCF, vtarget | CVCF_CURRENTFILTER_MASK,
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 7adc1d373d65..27848d646963 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -978,14 +978,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id)
pm_runtime_mark_last_busy(cs35l56->base.dev);
pm_runtime_enable(cs35l56->base.dev);

+ cs35l56->base.init_done = true;
+
ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops);
if (ret) {
dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret);
goto pm_err;
}

- cs35l56->base.init_done = true;
-
return 0;

pm_err:
diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
index 757a4d193e0f..c31f60b0421e 100644
--- a/sound/pci/hda/cs35l56_hda_i2c.c
+++ b/sound/pci/hda/cs35l56_hda_i2c.c
@@ -49,10 +49,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = {
{}
};

+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct i2c_driver cs35l56_hda_i2c_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_i2c_id,
.probe = cs35l56_hda_i2c_probe,
diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c
index 756aec342eab..52c9e04b3c55 100644
--- a/sound/pci/hda/cs35l56_hda_spi.c
+++ b/sound/pci/hda/cs35l56_hda_spi.c
@@ -49,10 +49,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = {
{}
};

+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct spi_driver cs35l56_hda_spi_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_spi_id,
.probe = cs35l56_hda_spi_probe,
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0db9326b6f84..b1c2fb43cab6 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10072,7 +10072,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
- SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
+ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
@@ -10302,6 +10302,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c
index a32c14a109b7..223238f662f8 100644
--- a/sound/soc/amd/acp/acp-pci.c
+++ b/sound/soc/amd/acp/acp-pci.c
@@ -107,7 +107,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
goto unregister_dmic_dev;
}

- acp_init(chip);
+ ret = acp_init(chip);
+ if (ret)
+ goto unregister_dmic_dev;
+
res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL);
if (!res) {
ret = -ENOMEM;
diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
index e67c2e19cb1a..1fdbef5fd6cb 100644
--- a/sound/soc/codecs/rt5682-sdw.c
+++ b/sound/soc/codecs/rt5682-sdw.c
@@ -763,12 +763,12 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
return 0;

if (!slave->unattach_request) {
+ mutex_lock(&rt5682->disable_irq_lock);
if (rt5682->disable_irq == true) {
- mutex_lock(&rt5682->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt5682->disable_irq = false;
- mutex_unlock(&rt5682->disable_irq_lock);
}
+ mutex_unlock(&rt5682->disable_irq_lock);
goto regmap_sync;
}

diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c
index 935e597022d3..b8471b2d8f4f 100644
--- a/sound/soc/codecs/rt711-sdca-sdw.c
+++ b/sound/soc/codecs/rt711-sdca-sdw.c
@@ -438,13 +438,13 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
return 0;

if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}

diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
index 3f5773310ae8..988451f24a75 100644
--- a/sound/soc/codecs/rt711-sdw.c
+++ b/sound/soc/codecs/rt711-sdw.c
@@ -536,12 +536,12 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
return 0;

if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}

diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c
index 6b644a89c589..ba877432cea6 100644
--- a/sound/soc/codecs/rt712-sdca-sdw.c
+++ b/sound/soc/codecs/rt712-sdca-sdw.c
@@ -438,13 +438,14 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
return 0;

if (!slave->unattach_request) {
+ mutex_lock(&rt712->disable_irq_lock);
if (rt712->disable_irq == true) {
- mutex_lock(&rt712->disable_irq_lock);
+
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt712->disable_irq = false;
- mutex_unlock(&rt712->disable_irq_lock);
}
+ mutex_unlock(&rt712->disable_irq_lock);
goto regmap_sync;
}

diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
index a38ec5862214..43a4e79e5696 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.c
+++ b/sound/soc/codecs/rt722-sdca-sdw.c
@@ -464,13 +464,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
return 0;

if (!slave->unattach_request) {
+ mutex_lock(&rt722->disable_irq_lock);
if (rt722->disable_irq == true) {
- mutex_lock(&rt722->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt722->disable_irq = false;
- mutex_unlock(&rt722->disable_irq_lock);
}
+ mutex_unlock(&rt722->disable_irq_lock);
goto regmap_sync;
}

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 72b90a7ee4b6..b9c20e29fe63 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
unsigned int alg, void *buf, size_t len)
{
- struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+ struct cs_dsp_coeff_ctl *cs_ctl;
struct wm_coeff_ctl *ctl;
int ret;

mutex_lock(&dsp->cs_dsp.pwr_lock);
+ cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
mutex_unlock(&dsp->cs_dsp.pwr_lock);

diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index 2d25748ca706..b27e89ff6a16 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol,
int max = mc->max;
int min = mc->min;
int sign_bit = mc->sign_bit;
- unsigned int mask = (1 << fls(max)) - 1;
+ unsigned int mask = (1ULL << fls(max)) - 1;
unsigned int invert = mc->invert;
int val;
int ret;
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index 4c54ce212de6..cc006d7038d9 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -522,6 +522,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
goto unregister_dev;
}

+ ret = acp_init(sdev);
+ if (ret < 0)
+ goto free_smn_dev;
+
sdev->ipc_irq = pci->irq;
ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread,
IRQF_SHARED, "AudioDSP", sdev);
@@ -531,10 +535,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
goto free_smn_dev;
}

- ret = acp_init(sdev);
- if (ret < 0)
- goto free_ipc_irq;
-
sdev->dsp_box.offset = 0;
sdev->dsp_box.size = BOX_SIZE_512;

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 798e60b5454b..845a4023ba44 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -219,7 +219,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 897af958cee8..575b7e248e52 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -198,8 +198,11 @@ class Type(SpecAttr):
presence = ''
for i in range(0, len(ref)):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
- if self.presence_type() == 'bit':
- code.append(presence + ' = 1;')
+ # Every layer below last is a nest, so we know it uses bit presence
+ # last layer is "self" and may be a complex type
+ if i == len(ref) - 1 and self.presence_type() != 'bit':
+ continue
+ code.append(presence + ' = 1;')
code += self._setter_lines(ri, member, presence)

func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index c7fa61f0dff8..0c603bec5e20 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -3,7 +3,7 @@
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
-#include <string.h> /* ffsl() */
+#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */

#define BIT_ULL(nr) (1ULL << (nr))
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 3b971d1617d8..7647c74adb26 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -1,6 +1,11 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"

time_start=$(date +%s)
@@ -13,7 +18,6 @@ sout=""
cin_disconnect=""
cin=""
cout=""
-ksft_skip=4
capture=false
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
@@ -131,6 +135,8 @@ ns4="ns4-$rndh"
TEST_COUNT=0
TEST_GROUP=""

+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f "$cin_disconnect" "$cout_disconnect"
@@ -225,8 +231,9 @@ set_ethtool_flags() {
local dev="$2"
local flags="$3"

- ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
- [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
+ if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then
+ echo "INFO: set $ns dev $dev: ethtool -K $flags"
+ fi
}

set_random_ethtool_flags() {
@@ -363,7 +370,7 @@ do_transfer()
local extra_args="$7"

local port
- port=$((10000+$TEST_COUNT))
+ port=$((10000+TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))

if [ "$rcvbuf" -gt 0 ]; then
@@ -420,12 +427,20 @@ do_transfer()
nstat -n
fi

- local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local stat_synrx_last_l
+ local stat_ackrx_last_l
+ local stat_cookietx_last
+ local stat_cookierx_last
+ local stat_csum_err_s
+ local stat_csum_err_c
+ local stat_tcpfb_last_l
+ stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")

timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -488,11 +503,18 @@ do_transfer()
check_transfer $cin $sout "file received by server"
rets=$?

- local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ local stat_synrx_now_l
+ local stat_ackrx_now_l
+ local stat_cookietx_now
+ local stat_cookierx_now
+ local stat_ooo_now
+ local stat_tcpfb_now_l
+ stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")

expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -501,8 +523,8 @@ do_transfer()
cookies=${cookies##*=}

if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
- expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
+ expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
+ expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
fi

if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
@@ -510,7 +532,7 @@ do_transfer()
"${stat_synrx_now_l}" "${expect_synrx}" 1>&2
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
if [ ${stat_ooo_now} -eq 0 ]; then
printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
@@ -521,18 +543,20 @@ do_transfer()
fi

if $checksum; then
- local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local csum_err_s
+ local csum_err_c
+ csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")

local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
if [ $csum_err_s_nr -gt 0 ]; then
- printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
+ printf "[ FAIL ]\nserver got %d data checksum error[s]" ${csum_err_s_nr}
rets=1
fi

local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
if [ $csum_err_c_nr -gt 0 ]; then
- printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
+ printf "[ FAIL ]\nclient got %d data checksum error[s]" ${csum_err_c_nr}
retc=1
fi
fi
@@ -544,6 +568,11 @@ do_transfer()
mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
fi

+ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ mptcp_lib_pr_fail "unexpected fallback to TCP"
+ rets=1
+ fi
+
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
printf " WARN: CookieSent: did not advance"
@@ -701,7 +730,7 @@ run_test_transparent()
return
fi

-ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+ if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
flush ruleset
table inet mangle {
chain divert {
@@ -712,7 +741,7 @@ table inet mangle {
}
}
EOF
- if [ $? -ne 0 ]; then
+ then
echo "SKIP: $msg, could not load nft ruleset"
mptcp_lib_fail_if_expected_feature "nft rules"
mptcp_lib_result_skip "${TEST_GROUP}"
@@ -727,8 +756,7 @@ EOF
local_addr="0.0.0.0"
fi

- ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
- if [ $? -ne 0 ]; then
+ if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then
ip netns exec "$listener_ns" nft flush ruleset
echo "SKIP: $msg, ip $r6flag rule failed"
mptcp_lib_fail_if_expected_feature "ip rule"
@@ -736,8 +764,7 @@ EOF
return
fi

- ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
- if [ $? -ne 0 ]; then
+ if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then
ip netns exec "$listener_ns" nft flush ruleset
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
echo "SKIP: $msg, ip route add local $local_addr failed"
@@ -900,7 +927,7 @@ stop_if_error "Could not even run ping tests"
echo -n "INFO: Using loss of $tc_loss "
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "

-reorder_delay=$(($tc_delay / 4))
+reorder_delay=$((tc_delay / 4))

if [ -z "${tc_reorder}" ]; then
reorder1=$((RANDOM%10))
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 34c342346967..00cf4efac4c2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -796,7 +796,7 @@ pm_nl_check_endpoint()
[ -n "$_flags" ]; flags="flags $_flags"
shift
elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $1"
+ [ -n "$2" ]; dev="dev $2"
shift
elif [ $1 = "id" ]; then
_id=$2
@@ -3507,6 +3507,8 @@ endpoint_tests()
local tests_pid=$!

wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete" 2
chk_mptcp_info subflows 1 subflows 1

diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
index 7c5b12664b03..bfb07dc49518 100644
--- a/tools/testing/selftests/net/reuseaddr_conflict.c
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -109,6 +109,6 @@ int main(void)
fd1 = open_port(0, 1);
if (fd1 >= 0)
error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
- fprintf(stderr, "Success");
+ fprintf(stderr, "Success\n");
return 0;
}
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 31e5f0f8859d..be8e66abc74e 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -984,6 +984,7 @@ encap_params_common()
local plen=$1; shift
local enc_ethtype=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift

@@ -1002,11 +1003,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"

run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - match"

- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - no match"

@@ -1019,20 +1020,20 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"

run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - match"

- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - no match"

run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - match"

- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - no match"

@@ -1045,11 +1046,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"

run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - match"

- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - no match"

@@ -1057,11 +1058,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"

run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - match"

- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - no match"

@@ -1079,6 +1080,7 @@ encap_params_ipv4_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129

echo
@@ -1086,7 +1088,7 @@ encap_params_ipv4_ipv4()
echo "------------------------------------------------------------------"

encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}

encap_params_ipv6_ipv4()
@@ -1098,6 +1100,7 @@ encap_params_ipv6_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1

echo
@@ -1105,7 +1108,7 @@ encap_params_ipv6_ipv4()
echo "------------------------------------------------------------------"

encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}

encap_params_ipv4_ipv6()
@@ -1117,6 +1120,7 @@ encap_params_ipv4_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129

echo
@@ -1124,7 +1128,7 @@ encap_params_ipv4_ipv6()
echo "------------------------------------------------------------------"

encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}

encap_params_ipv6_ipv6()
@@ -1136,6 +1140,7 @@ encap_params_ipv6_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1

echo
@@ -1143,7 +1148,7 @@ encap_params_ipv6_ipv6()
echo "------------------------------------------------------------------"

encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}

starg_exclude_ir_common()
@@ -1154,6 +1159,7 @@ starg_exclude_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1175,14 +1181,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"

# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"

# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1192,14 +1198,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"

# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"

# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1214,6 +1220,7 @@ starg_exclude_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1222,7 +1229,7 @@ starg_exclude_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"

starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}

starg_exclude_ir_ipv6_ipv4()
@@ -1233,6 +1240,7 @@ starg_exclude_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1241,7 +1249,7 @@ starg_exclude_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"

starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}

starg_exclude_ir_ipv4_ipv6()
@@ -1252,6 +1260,7 @@ starg_exclude_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1260,7 +1269,7 @@ starg_exclude_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"

starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}

starg_exclude_ir_ipv6_ipv6()
@@ -1271,6 +1280,7 @@ starg_exclude_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1279,7 +1289,7 @@ starg_exclude_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"

starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}

starg_include_ir_common()
@@ -1290,6 +1300,7 @@ starg_include_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1311,14 +1322,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"

# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"

# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1328,14 +1339,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"

# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"

# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1350,6 +1361,7 @@ starg_include_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1358,7 +1370,7 @@ starg_include_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"

starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}

starg_include_ir_ipv6_ipv4()
@@ -1369,6 +1381,7 @@ starg_include_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1377,7 +1390,7 @@ starg_include_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"

starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}

starg_include_ir_ipv4_ipv6()
@@ -1388,6 +1401,7 @@ starg_include_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1396,7 +1410,7 @@ starg_include_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"

starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}

starg_include_ir_ipv6_ipv6()
@@ -1407,6 +1421,7 @@ starg_include_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1415,7 +1430,7 @@ starg_include_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"

starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}

starg_exclude_p2mp_common()
@@ -1425,6 +1440,7 @@ starg_exclude_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1442,12 +1458,12 @@ starg_exclude_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"

# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"

# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"

@@ -1455,7 +1471,7 @@ starg_exclude_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"

# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1467,6 +1483,7 @@ starg_exclude_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1474,7 +1491,7 @@ starg_exclude_p2mp_ipv4_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"

- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}

@@ -1485,6 +1502,7 @@ starg_exclude_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1492,7 +1510,7 @@ starg_exclude_p2mp_ipv6_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"

- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}

@@ -1503,6 +1521,7 @@ starg_exclude_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1510,7 +1529,7 @@ starg_exclude_p2mp_ipv4_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"

- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}

@@ -1521,6 +1540,7 @@ starg_exclude_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1528,7 +1548,7 @@ starg_exclude_p2mp_ipv6_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"

- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}

@@ -1539,6 +1559,7 @@ starg_include_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1556,12 +1577,12 @@ starg_include_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"

# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"

# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"

@@ -1569,7 +1590,7 @@ starg_include_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"

# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1581,6 +1602,7 @@ starg_include_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1588,7 +1610,7 @@ starg_include_p2mp_ipv4_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"

- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}

@@ -1599,6 +1621,7 @@ starg_include_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1606,7 +1629,7 @@ starg_include_p2mp_ipv6_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"

- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}

@@ -1617,6 +1640,7 @@ starg_include_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145

@@ -1624,7 +1648,7 @@ starg_include_p2mp_ipv4_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"

- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}

@@ -1635,6 +1659,7 @@ starg_include_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1

@@ -1642,7 +1667,7 @@ starg_include_p2mp_ipv6_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"

- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}

@@ -1654,6 +1679,7 @@ egress_vni_translation_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift

@@ -1689,20 +1715,20 @@ egress_vni_translation_common()
# Make sure that packets sent from the first VTEP over VLAN 10 are
# received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
# the second VTEP, since it is configured as PVID.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - PVID configured"

# Remove PVID flag from VLAN 4000 on the second VTEP and make sure
# packets are no longer received by the SVI interface.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - no PVID configured"

# Reconfigure the PVID and make sure packets are received again.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
log_test $? 0 "Egress VNI translation - PVID reconfigured"
}
@@ -1715,6 +1741,7 @@ egress_vni_translation_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129

echo
@@ -1722,7 +1749,7 @@ egress_vni_translation_ipv4_ipv4()
echo "----------------------------------------------------------------"

egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}

egress_vni_translation_ipv6_ipv4()
@@ -1733,6 +1760,7 @@ egress_vni_translation_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1

echo
@@ -1740,7 +1768,7 @@ egress_vni_translation_ipv6_ipv4()
echo "----------------------------------------------------------------"

egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}

egress_vni_translation_ipv4_ipv6()
@@ -1751,6 +1779,7 @@ egress_vni_translation_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129

echo
@@ -1758,7 +1787,7 @@ egress_vni_translation_ipv4_ipv6()
echo "----------------------------------------------------------------"

egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}

egress_vni_translation_ipv6_ipv6()
@@ -1769,6 +1798,7 @@ egress_vni_translation_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1

echo
@@ -1776,7 +1806,7 @@ egress_vni_translation_ipv6_ipv6()
echo "----------------------------------------------------------------"

egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}

all_zeros_mdb_common()
@@ -1789,12 +1819,18 @@ all_zeros_mdb_common()
local vtep4_ip=$1; shift
local plen=$1; shift
local ipv4_grp=239.1.1.1
+ local ipv4_grp_dmac=01:00:5e:01:01:01
local ipv4_unreg_grp=239.2.2.2
+ local ipv4_unreg_grp_dmac=01:00:5e:02:02:02
local ipv4_ll_grp=224.0.0.100
+ local ipv4_ll_grp_dmac=01:00:5e:00:00:64
local ipv4_src=192.0.2.129
local ipv6_grp=ff0e::1
+ local ipv6_grp_dmac=33:33:00:00:00:01
local ipv6_unreg_grp=ff0e::2
+ local ipv6_unreg_grp_dmac=33:33:00:00:00:02
local ipv6_ll_grp=ff02::1
+ local ipv6_ll_grp_dmac=33:33:00:00:00:01
local ipv6_src=2001:db8:100::1

# Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
@@ -1830,7 +1866,7 @@ all_zeros_mdb_common()

# Send registered IPv4 multicast and make sure it only arrives to the
# first VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Registered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -1838,7 +1874,7 @@ all_zeros_mdb_common()

# Send unregistered IPv4 multicast that is not link-local and make sure
# it arrives to the first and second VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1846,7 +1882,7 @@ all_zeros_mdb_common()

# Send IPv4 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Link-local IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1881,7 +1917,7 @@ all_zeros_mdb_common()

# Send registered IPv6 multicast and make sure it only arrives to the
# third VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 1
log_test $? 0 "Registered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 0
@@ -1889,7 +1925,7 @@ all_zeros_mdb_common()

# Send unregistered IPv6 multicast that is not link-local and make sure
# it arrives to the third and fourth VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -1897,7 +1933,7 @@ all_zeros_mdb_common()

# Send IPv6 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Link-local IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -1972,6 +2008,7 @@ mdb_fdb_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift

@@ -1995,7 +2032,7 @@ mdb_fdb_common()

# Send IP multicast traffic and make sure it is forwarded by the MDB
# and only arrives to the first VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -2012,7 +2049,7 @@ mdb_fdb_common()
# Remove the MDB entry and make sure that IP multicast is now forwarded
# by the FDB to the second VTEP.
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 2
@@ -2028,14 +2065,15 @@ mdb_fdb_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129

echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
echo "------------------------------------------------------"

- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}

mdb_fdb_ipv6_ipv4()
@@ -2047,14 +2085,15 @@ mdb_fdb_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1

echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
echo "------------------------------------------------------"

- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}

mdb_fdb_ipv4_ipv6()
@@ -2066,14 +2105,15 @@ mdb_fdb_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129

echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
echo "------------------------------------------------------"

- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}

mdb_fdb_ipv6_ipv6()
@@ -2085,14 +2125,15 @@ mdb_fdb_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1

echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
echo "------------------------------------------------------"

- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}

mdb_grp1_loop()
@@ -2127,7 +2168,9 @@ mdb_torture_common()
local vtep1_ip=$1; shift
local vtep2_ip=$1; shift
local grp1=$1; shift
+ local grp1_dmac=$1; shift
local grp2=$1; shift
+ local grp2_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
local pid1
@@ -2152,9 +2195,9 @@ mdb_torture_common()
pid1=$!
mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
pid2=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid3=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid4=$!

sleep 30
@@ -2170,15 +2213,17 @@ mdb_torture_ipv4_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129

echo
echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
echo "----------------------------------------------------------"

- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}

mdb_torture_ipv6_ipv4()
@@ -2187,15 +2232,17 @@ mdb_torture_ipv6_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1

echo
echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
echo "----------------------------------------------------------"

- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}

mdb_torture_ipv4_ipv6()
@@ -2204,15 +2251,17 @@ mdb_torture_ipv4_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129

echo
echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
echo "----------------------------------------------------------"

- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}

mdb_torture_ipv6_ipv6()
@@ -2221,15 +2270,17 @@ mdb_torture_ipv6_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1

echo
echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
echo "----------------------------------------------------------"

- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}

################################################################################
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 9cd5e885e91f..f4549e6894dd 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -241,7 +241,7 @@ for family in 4 6; do

create_vxlan_pair
ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
- run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
cleanup

# use NAT to circumvent GRO FWD check
@@ -254,13 +254,7 @@ for family in 4 6; do
# load arp cache before running the test to reduce the amount of
# stray traffic on top of the UDP tunnel
ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
- run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
- cleanup
-
- create_vxlan_pair
- run_bench "UDP tunnel fwd perf" $OL_NET$DST
- ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
- run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
cleanup
done