Re: Linux 6.12.77

From: Greg Kroah-Hartman

Date: Fri Mar 13 2026 - 12:38:41 EST

diff --git a/Documentation/hwmon/aht10.rst b/Documentation/hwmon/aht10.rst
index 213644b4ecba..7903b6434326 100644
--- a/Documentation/hwmon/aht10.rst
+++ b/Documentation/hwmon/aht10.rst
@@ -20,6 +20,14 @@ Supported chips:

English: http://www.aosong.com/userfiles/files/media/Data%20Sheet%20AHT20.pdf

+ * Aosong DHT20
+
+ Prefix: 'dht20'
+
+ Addresses scanned: None
+
+ Datasheet: https://www.digikey.co.nz/en/htmldatasheets/production/9184855/0/0/1/101020932
+
Author: Johannes Cornelis Draaijer <jcdra1@xxxxxxxxx>

@@ -33,7 +41,7 @@ The address of this i2c device may only be 0x38
Special Features
----------------

-AHT20 has additional CRC8 support which is sent as the last byte of the sensor
+AHT20, DHT20 has additional CRC8 support which is sent as the last byte of the sensor
values.

Usage Notes
diff --git a/Makefile b/Makefile
index ae059d188511..930fcea203d7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 12
-SUBLEVEL = 76
+SUBLEVEL = 77
EXTRAVERSION =
NAME = Baby Opossum Posse

diff --git a/arch/Kconfig b/arch/Kconfig
index 593452b43dd4..1812e4e4d714 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -135,6 +135,7 @@ config KPROBES_ON_FTRACE
config UPROBES
def_bool n
depends on ARCH_SUPPORTS_UPROBES
+ select TASKS_TRACE_RCU
help
Uprobes is the user-space counterpart to kprobes: they
enable instrumentation applications (such as 'perf probe')
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index c35250c4991b..96fc6cf460ec 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -39,13 +39,17 @@ static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
}

#define __HAVE_ARCH_MEMSET64
-extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t hi);
+extern void *__memset64(uint64_t *, uint32_t first, __kernel_size_t, uint32_t second);
static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
{
- if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
- return __memset64(p, v, n * 8, v >> 32);
- else
- return __memset64(p, v >> 32, n * 8, v);
+ union {
+ uint64_t val;
+ struct {
+ uint32_t first, second;
+ };
+ } word = { .val = v };
+
+ return __memset64(p, word.first, n * 8, word.second);
}

/*
diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
index 6fd67ae27117..0d16f74949b6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
@@ -97,7 +97,7 @@ pcie3x1: pcie@fe270000 {
<0x0 0xf2000000 0x0 0x00100000>;
ranges = <0x01000000 0x0 0xf2100000 0x0 0xf2100000 0x0 0x00100000>,
<0x02000000 0x0 0xf2200000 0x0 0xf2200000 0x0 0x01e00000>,
- <0x03000000 0x0 0x40000000 0x3 0x40000000 0x0 0x40000000>;
+ <0x03000000 0x3 0x40000000 0x3 0x40000000 0x0 0x40000000>;
reg-names = "dbi", "apb", "config";
resets = <&cru SRST_PCIE30X1_POWERUP>;
reset-names = "pipe";
@@ -150,7 +150,7 @@ pcie3x2: pcie@fe280000 {
<0x0 0xf0000000 0x0 0x00100000>;
ranges = <0x01000000 0x0 0xf0100000 0x0 0xf0100000 0x0 0x00100000>,
<0x02000000 0x0 0xf0200000 0x0 0xf0200000 0x0 0x01e00000>,
- <0x03000000 0x0 0x40000000 0x3 0x80000000 0x0 0x40000000>;
+ <0x03000000 0x3 0x80000000 0x3 0x80000000 0x0 0x40000000>;
reg-names = "dbi", "apb", "config";
resets = <&cru SRST_PCIE30X2_POWERUP>;
reset-names = "pipe";
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index bc0f57a26c2f..32ccc5755554 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -1045,7 +1045,7 @@ pcie2x1: pcie@fe260000 {
power-domains = <&power RK3568_PD_PIPE>;
ranges = <0x01000000 0x0 0xf4100000 0x0 0xf4100000 0x0 0x00100000>,
<0x02000000 0x0 0xf4200000 0x0 0xf4200000 0x0 0x01e00000>,
- <0x03000000 0x0 0x40000000 0x3 0x00000000 0x0 0x40000000>;
+ <0x03000000 0x3 0x00000000 0x3 0x00000000 0x0 0x40000000>;
resets = <&cru SRST_PCIE20_POWERUP>;
reset-names = "pipe";
#address-cells = <3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
index ad4331bc0780..68801eb5713d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
@@ -1650,7 +1650,7 @@ pcie2x1l1: pcie@fe180000 {
power-domains = <&power RK3588_PD_PCIE>;
ranges = <0x01000000 0x0 0xf3100000 0x0 0xf3100000 0x0 0x00100000>,
<0x02000000 0x0 0xf3200000 0x0 0xf3200000 0x0 0x00e00000>,
- <0x03000000 0x0 0x40000000 0x9 0xc0000000 0x0 0x40000000>;
+ <0x03000000 0x9 0xc0000000 0x9 0xc0000000 0x0 0x40000000>;
reg = <0xa 0x40c00000 0x0 0x00400000>,
<0x0 0xfe180000 0x0 0x00010000>,
<0x0 0xf3000000 0x0 0x00100000>;
@@ -1701,7 +1701,7 @@ pcie2x1l2: pcie@fe190000 {
power-domains = <&power RK3588_PD_PCIE>;
ranges = <0x01000000 0x0 0xf4100000 0x0 0xf4100000 0x0 0x00100000>,
<0x02000000 0x0 0xf4200000 0x0 0xf4200000 0x0 0x00e00000>,
- <0x03000000 0x0 0x40000000 0xa 0x00000000 0x0 0x40000000>;
+ <0x03000000 0xa 0x00000000 0xa 0x00000000 0x0 0x40000000>;
reg = <0xa 0x41000000 0x0 0x00400000>,
<0x0 0xfe190000 0x0 0x00010000>,
<0x0 0xf4000000 0x0 0x00100000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi
index 0ce0934ec6b7..8af2e5b59e1a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi
@@ -168,7 +168,7 @@ pcie3x4: pcie@fe150000 {
power-domains = <&power RK3588_PD_PCIE>;
ranges = <0x01000000 0x0 0xf0100000 0x0 0xf0100000 0x0 0x00100000>,
<0x02000000 0x0 0xf0200000 0x0 0xf0200000 0x0 0x00e00000>,
- <0x03000000 0x0 0x40000000 0x9 0x00000000 0x0 0x40000000>;
+ <0x03000000 0x9 0x00000000 0x9 0x00000000 0x0 0x40000000>;
reg = <0xa 0x40000000 0x0 0x00400000>,
<0x0 0xfe150000 0x0 0x00010000>,
<0x0 0xf0000000 0x0 0x00100000>;
@@ -254,7 +254,7 @@ pcie3x2: pcie@fe160000 {
power-domains = <&power RK3588_PD_PCIE>;
ranges = <0x01000000 0x0 0xf1100000 0x0 0xf1100000 0x0 0x00100000>,
<0x02000000 0x0 0xf1200000 0x0 0xf1200000 0x0 0x00e00000>,
- <0x03000000 0x0 0x40000000 0x9 0x40000000 0x0 0x40000000>;
+ <0x03000000 0x9 0x40000000 0x9 0x40000000 0x0 0x40000000>;
reg = <0xa 0x40400000 0x0 0x00400000>,
<0x0 0xfe160000 0x0 0x00010000>,
<0x0 0xf1000000 0x0 0x00100000>;
@@ -303,7 +303,7 @@ pcie2x1l0: pcie@fe170000 {
power-domains = <&power RK3588_PD_PCIE>;
ranges = <0x01000000 0x0 0xf2100000 0x0 0xf2100000 0x0 0x00100000>,
<0x02000000 0x0 0xf2200000 0x0 0xf2200000 0x0 0x00e00000>,
- <0x03000000 0x0 0x40000000 0x9 0x80000000 0x0 0x40000000>;
+ <0x03000000 0x9 0x80000000 0x9 0x80000000 0x0 0x40000000>;
reg = <0xa 0x40800000 0x0 0x00400000>,
<0x0 0xfe170000 0x0 0x00010000>,
<0x0 0xf2000000 0x0 0x00100000>;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5c09c788aaa6..94ba3bfca2ba 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1561,6 +1561,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
case SYS_ID_AA64MMFR3_EL1:
val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
ID_AA64MMFR3_EL1_S1PIE;
+
+ if (!system_supports_poe())
+ val &= ~ID_AA64MMFR3_EL1_S1POE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 82b57436f2f1..9310196e0a09 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1880,7 +1880,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
image_size = extable_offset + extable_size;
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
- sizeof(u32), &header, &image_ptr,
+ sizeof(u64), &header, &image_ptr,
jit_fill_hole);
if (!ro_header) {
prog = orig_prog;
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index 3c2fb16b11b6..f81375e5e89c 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -7,6 +7,7 @@
#define _LOONGARCH_SETUP_H

#include <linux/types.h>
+#include <linux/threads.h>
#include <asm/sections.h>
#include <uapi/asm/setup.h>

@@ -14,6 +15,8 @@

extern unsigned long eentry;
extern unsigned long tlbrentry;
+extern unsigned long pcpu_handlers[NR_CPUS];
+extern long exception_handlers[VECSIZE * 128 / sizeof(long)];
extern char init_command_line[COMMAND_LINE_SIZE];
extern void tlb_init(int cpu);
extern void cpu_cache_init(void);
diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c
index 471652c0c865..e8b95f1bc578 100644
--- a/arch/loongarch/kernel/unwind_orc.c
+++ b/arch/loongarch/kernel/unwind_orc.c
@@ -357,13 +357,21 @@ static bool is_entry_func(unsigned long addr)

static inline unsigned long bt_address(unsigned long ra)
{
- extern unsigned long eentry;
+#if defined(CONFIG_NUMA) && !defined(CONFIG_PREEMPT_RT)
+ int cpu;
+ int vec_sz = sizeof(exception_handlers);

- if (__kernel_text_address(ra))
- return ra;
+ for_each_possible_cpu(cpu) {
+ if (!pcpu_handlers[cpu])
+ continue;

- if (__module_text_address(ra))
- return ra;
+ if (ra >= pcpu_handlers[cpu] &&
+ ra < pcpu_handlers[cpu] + vec_sz) {
+ ra = ra + eentry - pcpu_handlers[cpu];
+ break;
+ }
+ }
+#endif

if (ra >= eentry && ra < eentry + EXCCODE_INT_END * VECSIZE) {
unsigned long func;
@@ -382,10 +390,13 @@ static inline unsigned long bt_address(unsigned long ra)
break;
}

- return func + offset;
+ ra = func + offset;
}

- return ra;
+ if (__kernel_text_address(ra))
+ return ra;
+
+ return 0;
}

bool unwind_next_frame(struct unwind_state *state)
@@ -399,7 +410,7 @@ bool unwind_next_frame(struct unwind_state *state)
return false;

/* Don't let modules unload while we're reading their ORC data. */
- preempt_disable();
+ guard(rcu)();

if (is_entry_func(state->pc))
goto end;
@@ -511,17 +522,12 @@ bool unwind_next_frame(struct unwind_state *state)
goto err;
}

- if (!__kernel_text_address(state->pc))
- goto err;
-
- preempt_enable();
return true;

err:
state->error = true;

end:
- preempt_enable();
state->stack_info.type = STACK_TYPE_UNKNOWN;
return false;
}
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index c9ee6892d81c..d4c42dc67134 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -22,10 +22,6 @@ extern const int unwind_hint_lasx;
extern const int unwind_hint_lbt;
extern const int unwind_hint_ri;
extern const int unwind_hint_watch;
-extern unsigned long eentry;
-#ifdef CONFIG_NUMA
-extern unsigned long pcpu_handlers[NR_CPUS];
-#endif

static inline bool scan_handlers(unsigned long entry_offset)
{
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index f46c15d6e7ea..24add95ecb65 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -260,7 +260,6 @@ static void output_pgtable_bits_defines(void)
#ifdef CONFIG_NUMA
unsigned long pcpu_handlers[NR_CPUS];
#endif
-extern long exception_handlers[VECSIZE * 128 / sizeof(long)];

static void setup_tlb_handler(int cpu)
{
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 09f763b9eb40..133059d9a949 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -23,5 +23,6 @@ extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;

void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
+void update_timer_idle(void);

#endif /* _S390_IDLE_H */
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 39cb8d0ae348..0f9e53f0a068 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -21,11 +21,10 @@

static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);

-void account_idle_time_irq(void)
+void update_timer_idle(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
struct lowcore *lc = get_lowcore();
- unsigned long idle_time;
u64 cycles_new[8];
int i;

@@ -35,13 +34,19 @@ void account_idle_time_irq(void)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}

- idle_time = lc->int_clock - idle->clock_idle_enter;
-
lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
lc->last_update_clock = lc->int_clock;

lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
lc->last_update_timer = lc->sys_enter_timer;
+}
+
+void account_idle_time_irq(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long idle_time;
+
+ idle_time = get_lowcore()->int_clock - idle->clock_idle_enter;

/* Account time spent with enabled wait psw loaded as idle time. */
WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 2639a3d12736..1fe941dc86c3 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -140,6 +140,10 @@ void noinstr do_io_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
bool from_idle;

+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle)
+ update_timer_idle();
+
irq_enter_rcu();

if (user_mode(regs)) {
@@ -148,7 +152,6 @@ void noinstr do_io_irq(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}

- from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();

@@ -176,6 +179,10 @@ void noinstr do_ext_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
bool from_idle;

+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle)
+ update_timer_idle();
+
irq_enter_rcu();

if (user_mode(regs)) {
@@ -188,7 +195,6 @@ void noinstr do_ext_irq(struct pt_regs *regs)
regs->int_parm = get_lowcore()->ext_params;
regs->int_parm_long = get_lowcore()->ext_params2;

- from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();

diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 234a0ba30510..122d30b10440 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -225,10 +225,6 @@ static u64 vtime_delta(void)
return timer - lc->last_update_timer;
}

-/*
- * Update process times based on virtual cpu times stored by entry.S
- * to the lowcore fields user_timer, system_timer & steal_clock.
- */
void vtime_account_kernel(struct task_struct *tsk)
{
struct lowcore *lc = get_lowcore();
@@ -238,27 +234,17 @@ void vtime_account_kernel(struct task_struct *tsk)
lc->guest_timer += delta;
else
lc->system_timer += delta;
-
- virt_timer_forward(delta);
}
EXPORT_SYMBOL_GPL(vtime_account_kernel);

void vtime_account_softirq(struct task_struct *tsk)
{
- u64 delta = vtime_delta();
-
- get_lowcore()->softirq_timer += delta;
-
- virt_timer_forward(delta);
+ get_lowcore()->softirq_timer += vtime_delta();
}

void vtime_account_hardirq(struct task_struct *tsk)
{
- u64 delta = vtime_delta();
-
- get_lowcore()->hardirq_timer += delta;
-
- virt_timer_forward(delta);
+ get_lowcore()->hardirq_timer += vtime_delta();
}

/*
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index f004a4dc74c2..563e439b743f 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -159,8 +159,6 @@ void __init fred_complete_exception_setup(void)
static noinstr void fred_extint(struct pt_regs *regs)
{
unsigned int vector = regs->fred_ss.vector;
- unsigned int index = array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
- NR_SYSTEM_VECTORS);

if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR))
return;
@@ -169,7 +167,8 @@ static noinstr void fred_extint(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);

instrumentation_begin();
- sysvec_table[index](regs);
+ sysvec_table[array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
+ NR_SYSTEM_VECTORS)](regs);
instrumentation_end();
irqentry_exit(regs, state);
} else {
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 521aad70e41b..69920800da36 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -138,7 +138,7 @@ extern void __init efi_apply_memmap_quirks(void);
extern int __init efi_reuse_config(u64 tables, int nr_tables);
extern void efi_delete_dummy_variable(void);
extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
-extern void efi_free_boot_services(void);
+extern void efi_unmap_boot_services(void);

void arch_efi_call_virt_setup(void);
void arch_efi_call_virt_teardown(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 63adda8a143f..a1acff7782db 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
#include <asm/smp.h>
#include <asm/i8259.h>
#include <asm/setup.h>
+#include <asm/hypervisor.h>

#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
@@ -164,11 +165,14 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags)
if (lapic_flags & ACPI_MADT_ENABLED)
return true;

- if (!acpi_support_online_capable ||
- (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
- return true;
+ if (acpi_support_online_capable)
+ return lapic_flags & ACPI_MADT_ONLINE_CAPABLE;

- return false;
+ /*
+ * QEMU expects legacy "Enabled=0" LAPIC entries to be counted as usable
+ * in order to support CPU hotplug in guests.
+ */
+ return !hypervisor_is_type(X86_HYPER_NATIVE);
}

static int __init
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index b2e313ea17bf..03d3e1f1a407 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,6 @@
#include <xen/xen.h>

#include <asm/apic.h>
-#include <asm/hypervisor.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
@@ -239,20 +238,6 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
cpuid_to_apicid[cpu] = apic_id;
topo_set_cpuids(cpu, apic_id, acpi_id);
} else {
- u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
-
- /*
- * Check for present APICs in the same package when running
- * on bare metal. Allow the bogosity in a guest.
- */
- if (hypervisor_is_type(X86_HYPER_NATIVE) &&
- topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
- pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
- apic_id);
- topo_info.nr_rejected_cpus++;
- return;
- }
-
topo_info.nr_disabled_cpus++;
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f673aaa0490..0d9035993ed3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11285,8 +11285,7 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu)) {
int r = kvm_check_nested_events(vcpu);

- WARN_ON_ONCE(r == -EBUSY);
- if (r < 0)
+ if (r < 0 && r != -EBUSY)
return 0;
}

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 88a96816de9a..6727cff19d92 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -858,7 +858,7 @@ static void __init __efi_enter_virtual_mode(void)
}

efi_check_for_embedded_firmwares();
- efi_free_boot_services();
+ efi_unmap_boot_services();

if (!efi_is_mixed())
efi_native_runtime_setup();
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index f0cc00032751..df3b45ef1420 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -341,7 +341,7 @@ void __init efi_reserve_boot_services(void)

/*
* Because the following memblock_reserve() is paired
- * with memblock_free_late() for this region in
+ * with free_reserved_area() for this region in
* efi_free_boot_services(), we must be extremely
* careful not to reserve, and subsequently free,
* critical regions of memory (like the kernel image) or
@@ -404,17 +404,33 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
pr_err("Failed to unmap VA mapping for 0x%llx\n", va);
}

-void __init efi_free_boot_services(void)
+struct efi_freeable_range {
+ u64 start;
+ u64 end;
+};
+
+static struct efi_freeable_range *ranges_to_free;
+
+void __init efi_unmap_boot_services(void)
{
struct efi_memory_map_data data = { 0 };
efi_memory_desc_t *md;
int num_entries = 0;
+ int idx = 0;
+ size_t sz;
void *new, *new_md;

/* Keep all regions for /sys/kernel/debug/efi */
if (efi_enabled(EFI_DBG))
return;

+ sz = sizeof(*ranges_to_free) * efi.memmap.nr_map + 1;
+ ranges_to_free = kzalloc(sz, GFP_KERNEL);
+ if (!ranges_to_free) {
+ pr_err("Failed to allocate storage for freeable EFI regions\n");
+ return;
+ }
+
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -471,7 +487,15 @@ void __init efi_free_boot_services(void)
start = SZ_1M;
}

- memblock_free_late(start, size);
+ /*
+ * With CONFIG_DEFERRED_STRUCT_PAGE_INIT parts of the memory
+ * map are still not initialized and we can't reliably free
+ * memory here.
+ * Queue the ranges to free at a later point.
+ */
+ ranges_to_free[idx].start = start;
+ ranges_to_free[idx].end = start + size;
+ idx++;
}

if (!num_entries)
@@ -512,6 +536,31 @@ void __init efi_free_boot_services(void)
}
}

+static int __init efi_free_boot_services(void)
+{
+ struct efi_freeable_range *range = ranges_to_free;
+ unsigned long freed = 0;
+
+ if (!ranges_to_free)
+ return 0;
+
+ while (range->start) {
+ void *start = phys_to_virt(range->start);
+ void *end = phys_to_virt(range->end);
+
+ free_reserved_area(start, end, -1, NULL);
+ freed += (end - start);
+ range++;
+ }
+ kfree(ranges_to_free);
+
+ if (freed)
+ pr_info("Freeing EFI boot services memory: %ldK\n", freed / SZ_1K);
+
+ return 0;
+}
+arch_initcall(efi_free_boot_services);
+
/*
* A number of config table entries get remapped to virtual addresses
* after entering EFI virtual mode. However, the kexec kernel requires
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
index 2c474e6477e1..346cdf0a0ef9 100644
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ACPI_APEI) += apei.o
obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
+# clang versions prior to 18 may blow out the stack with KASAN
+ifeq ($(CONFIG_COMPILE_TEST)_$(CONFIG_CC_IS_CLANG)_$(call clang-min-version, 180000),y_y_)
+KASAN_SANITIZE_ghes.o := n
+endif
obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o
einj-y := einj-core.o
einj-$(CONFIG_ACPI_APEI_EINJ_CXL) += einj-cxl.o
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 39dcefb1fdd5..237d8fd2a2cf 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2439,18 +2439,7 @@ static void ata_dev_config_zac(struct ata_device *dev)
dev->zac_zones_optimal_nonseq = U32_MAX;
dev->zac_zones_max_open = U32_MAX;

- /*
- * Always set the 'ZAC' flag for Host-managed devices.
- */
- if (dev->class == ATA_DEV_ZAC)
- dev->flags |= ATA_DFLAG_ZAC;
- else if (ata_id_zoned_cap(dev->id) == 0x01)
- /*
- * Check for host-aware devices.
- */
- dev->flags |= ATA_DFLAG_ZAC;
-
- if (!(dev->flags & ATA_DFLAG_ZAC))
+ if (!ata_dev_is_zac(dev))
return;

if (!ata_identify_page_supported(dev, ATA_LOG_ZONED_INFORMATION)) {
@@ -5532,6 +5521,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
mutex_init(&ap->scsi_scan_mutex);
INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
+ INIT_WORK(&ap->deferred_qc_work, ata_scsi_deferred_qc_work);
INIT_LIST_HEAD(&ap->eh_done_q);
init_waitqueue_head(&ap->eh_wait_q);
init_completion(&ap->park_req_pending);
@@ -6152,9 +6142,11 @@ static void ata_port_detach(struct ata_port *ap)
/* wait till EH commits suicide */
ata_port_wait_eh(ap);

- /* it better be dead now */
+ /* It better be dead now and not have any remaining deferred qc. */
WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
+ WARN_ON(ap->deferred_qc);

+ cancel_work_sync(&ap->deferred_qc_work);
cancel_delayed_work_sync(&ap->hotplug_task);
cancel_delayed_work_sync(&ap->scsi_rescan_task);

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 205c62cf9e32..59788a34871a 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -642,12 +642,29 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
set_host_byte(scmd, DID_OK);

ata_qc_for_each_raw(ap, qc, i) {
- if (qc->flags & ATA_QCFLAG_ACTIVE &&
- qc->scsicmd == scmd)
+ if (qc->scsicmd != scmd)
+ continue;
+ if ((qc->flags & ATA_QCFLAG_ACTIVE) ||
+ qc == ap->deferred_qc)
break;
}

- if (i < ATA_MAX_QUEUE) {
+ if (i < ATA_MAX_QUEUE && qc == ap->deferred_qc) {
+ /*
+ * This is a deferred command that timed out while
+ * waiting for the command queue to drain. Since the qc
+ * is not active yet (deferred_qc is still set, so the
+ * deferred qc work has not issued the command yet),
+ * simply signal the timeout by finishing the SCSI
+ * command and clear the deferred qc to prevent the
+ * deferred qc work from issuing this qc.
+ */
+ WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE);
+ ap->deferred_qc = NULL;
+ cancel_work(&ap->deferred_qc_work);
+ set_host_byte(scmd, DID_TIME_OUT);
+ scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
+ } else if (i < ATA_MAX_QUEUE) {
/* the scmd has an associated qc */
if (!(qc->flags & ATA_QCFLAG_EH)) {
/* which hasn't failed yet, timeout */
@@ -826,7 +843,7 @@ void ata_port_wait_eh(struct ata_port *ap)
retry:
spin_lock_irqsave(ap->lock, flags);

- while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
+ while (ata_port_eh_scheduled(ap)) {
prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
spin_unlock_irqrestore(ap->lock, flags);
schedule();
@@ -920,6 +937,12 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)

ap->pflags |= ATA_PFLAG_EH_PENDING;

+ /*
+ * If we have a deferred qc, requeue it so that it is retried once EH
+ * completes.
+ */
+ ata_scsi_requeue_deferred_qc(ap);
+
if (!fastdrain)
return;

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 097080c8b82d..d7c88a111ea3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1671,8 +1671,78 @@ static void ata_qc_done(struct ata_queued_cmd *qc)
done(cmd);
}

+void ata_scsi_deferred_qc_work(struct work_struct *work)
+{
+ struct ata_port *ap =
+ container_of(work, struct ata_port, deferred_qc_work);
+ struct ata_queued_cmd *qc;
+ unsigned long flags;
+
+ spin_lock_irqsave(ap->lock, flags);
+
+ /*
+ * If we still have a deferred qc and we are not in EH, issue it. In
+ * such case, we should not need any more deferring the qc, so warn if
+ * qc_defer() says otherwise.
+ */
+ qc = ap->deferred_qc;
+ if (qc && !ata_port_eh_scheduled(ap)) {
+ WARN_ON_ONCE(ap->ops->qc_defer(qc));
+ ap->deferred_qc = NULL;
+ ata_qc_issue(qc);
+ }
+
+ spin_unlock_irqrestore(ap->lock, flags);
+}
+
+void ata_scsi_requeue_deferred_qc(struct ata_port *ap)
+{
+ struct ata_queued_cmd *qc = ap->deferred_qc;
+ struct scsi_cmnd *scmd;
+
+ lockdep_assert_held(ap->lock);
+
+ /*
+ * If we have a deferred qc when a reset occurs or NCQ commands fail,
+ * do not try to be smart about what to do with this deferred command
+ * and simply retry it by completing it with DID_SOFT_ERROR.
+ */
+ if (!qc)
+ return;
+
+ scmd = qc->scsicmd;
+ ap->deferred_qc = NULL;
+ cancel_work(&ap->deferred_qc_work);
+ ata_qc_free(qc);
+ scmd->result = (DID_SOFT_ERROR << 16);
+ scsi_done(scmd);
+}
+
+static void ata_scsi_schedule_deferred_qc(struct ata_port *ap)
+{
+ struct ata_queued_cmd *qc = ap->deferred_qc;
+
+ lockdep_assert_held(ap->lock);
+
+ /*
+ * If we have a deferred qc, then qc_defer() is defined and we can use
+ * this callback to determine if this qc is good to go, unless EH has
+ * been scheduled.
+ */
+ if (!qc)
+ return;
+
+ if (ata_port_eh_scheduled(ap)) {
+ ata_scsi_requeue_deferred_qc(ap);
+ return;
+ }
+ if (!ap->ops->qc_defer(qc))
+ queue_work(system_highpri_wq, &ap->deferred_qc_work);
+}
+
static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
{
+ struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
u8 *cdb = cmd->cmnd;
bool have_sense = qc->flags & ATA_QCFLAG_SENSE_VALID;
@@ -1700,6 +1770,8 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
}

ata_qc_done(qc);
+
+ ata_scsi_schedule_deferred_qc(ap);
}

static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
@@ -1709,6 +1781,16 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
if (!ap->ops->qc_defer)
goto issue;

+ /*
+ * If we already have a deferred qc, then rely on the SCSI layer to
+ * requeue and defer all incoming commands until the deferred qc is
+ * processed, once all on-going commands complete.
+ */
+ if (ap->deferred_qc) {
+ ata_qc_free(qc);
+ return SCSI_MLQUEUE_DEVICE_BUSY;
+ }
+
/* Check if the command needs to be deferred. */
ret = ap->ops->qc_defer(qc);
switch (ret) {
@@ -1727,6 +1809,18 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
}

if (ret) {
+ /*
+ * We must defer this qc: if this is not an NCQ command, keep
+ * this qc as a deferred one and report to the SCSI layer that
+ * we issued it so that it is not requeued. The deferred qc will
+ * be issued with the port deferred_qc_work once all on-going
+ * commands complete.
+ */
+ if (!ata_is_ncq(qc->tf.protocol)) {
+ ap->deferred_qc = qc;
+ return 0;
+ }
+
/* Force a requeue of the command to defer its execution. */
ata_qc_free(qc);
return ret;
@@ -1806,15 +1900,10 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
return 0;
}

-struct ata_scsi_args {
- struct ata_device *dev;
- u16 *id;
- struct scsi_cmnd *cmd;
-};
-
/**
* ata_scsi_rbuf_fill - wrapper for SCSI command simulators
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @actor: Callback hook for desired SCSI command simulator
*
* Takes care of the hard work of simulating a SCSI command...
@@ -1827,30 +1916,30 @@ struct ata_scsi_args {
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
- unsigned int (*actor)(struct ata_scsi_args *args, u8 *rbuf))
+static void ata_scsi_rbuf_fill(struct ata_device *dev, struct scsi_cmnd *cmd,
+ unsigned int (*actor)(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf))
{
unsigned int rc;
- struct scsi_cmnd *cmd = args->cmd;
unsigned long flags;

spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);

memset(ata_scsi_rbuf, 0, ATA_SCSI_RBUF_SIZE);
- rc = actor(args, ata_scsi_rbuf);
- if (rc == 0)
+ rc = actor(dev, cmd, ata_scsi_rbuf);
+ if (rc == 0) {
sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd),
ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE);
+ cmd->result = SAM_STAT_GOOD;
+ }

spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
-
- if (rc == 0)
- cmd->result = SAM_STAT_GOOD;
}

/**
- * ata_scsiop_inq_std - Simulate INQUIRY command
- * @args: device IDENTIFY data / SCSI command of interest.
+ * ata_scsiop_inq_std - Simulate standard INQUIRY command
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Returns standard device identification data associated
@@ -1859,7 +1948,8 @@ static void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_inq_std(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
static const u8 versions[] = {
0x00,
@@ -1900,30 +1990,30 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
* Set the SCSI Removable Media Bit (RMB) if the ATA removable media
* device bit (obsolete since ATA-8 ACS) is set.
*/
- if (ata_id_removable(args->id))
+ if (ata_id_removable(dev->id))
hdr[1] |= (1 << 7);

- if (args->dev->class == ATA_DEV_ZAC) {
+ if (dev->class == ATA_DEV_ZAC) {
hdr[0] = TYPE_ZBC;
hdr[2] = 0x7; /* claim SPC-5 version compatibility */
}

- if (args->dev->flags & ATA_DFLAG_CDL)
+ if (dev->flags & ATA_DFLAG_CDL)
hdr[2] = 0xd; /* claim SPC-6 version compatibility */

memcpy(rbuf, hdr, sizeof(hdr));
memcpy(&rbuf[8], "ATA ", 8);
- ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16);
+ ata_id_string(dev->id, &rbuf[16], ATA_ID_PROD, 16);

/* From SAT, use last 2 words from fw rev unless they are spaces */
- ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV + 2, 4);
+ ata_id_string(dev->id, &rbuf[32], ATA_ID_FW_REV + 2, 4);
if (strncmp(&rbuf[32], " ", 4) == 0)
- ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4);
+ ata_id_string(dev->id, &rbuf[32], ATA_ID_FW_REV, 4);

if (rbuf[32] == 0 || rbuf[32] == ' ')
memcpy(&rbuf[32], "n/a ", 4);

- if (ata_id_zoned_cap(args->id) || args->dev->class == ATA_DEV_ZAC)
+ if (ata_id_zoned_cap(dev->id) || dev->class == ATA_DEV_ZAC)
memcpy(rbuf + 58, versions_zbc, sizeof(versions_zbc));
else
memcpy(rbuf + 58, versions, sizeof(versions));
@@ -1933,7 +2023,8 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)

/**
* ata_scsiop_inq_00 - Simulate INQUIRY VPD page 0, list of pages
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Returns list of inquiry VPD pages available.
@@ -1941,7 +2032,8 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_inq_00(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
int i, num_pages = 0;
static const u8 pages[] = {
@@ -1957,8 +2049,7 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
};

for (i = 0; i < sizeof(pages); i++) {
- if (pages[i] == 0xb6 &&
- !(args->dev->flags & ATA_DFLAG_ZAC))
+ if (pages[i] == 0xb6 && !ata_dev_is_zac(dev))
continue;
rbuf[num_pages + 4] = pages[i];
num_pages++;
@@ -1969,7 +2060,8 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)

/**
* ata_scsiop_inq_80 - Simulate INQUIRY VPD page 80, device serial number
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Returns ATA device serial number.
@@ -1977,7 +2069,8 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_inq_80(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
static const u8 hdr[] = {
0,
@@ -1987,14 +2080,15 @@ static unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf)
};

memcpy(rbuf, hdr, sizeof(hdr));
- ata_id_string(args->id, (unsigned char *) &rbuf[4],
+ ata_id_string(dev->id, (unsigned char *) &rbuf[4],
ATA_ID_SERNO, ATA_ID_SERNO_LEN);
return 0;
}

/**
* ata_scsiop_inq_83 - Simulate INQUIRY VPD page 83, device identity
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Yields two logical unit device identification designators:
@@ -2005,7 +2099,8 @@ static unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_inq_83(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
const int sat_model_serial_desc_len = 68;
int num;
@@ -2017,7 +2112,7 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
rbuf[num + 0] = 2;
rbuf[num + 3] = ATA_ID_SERNO_LEN;
num += 4;
- ata_id_string(args->id, (unsigned char *) rbuf + num,
+ ata_id_string(dev->id, (unsigned char *) rbuf + num,
ATA_ID_SERNO, ATA_ID_SERNO_LEN);
num += ATA_ID_SERNO_LEN;

@@ -2029,21 +2124,21 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
num += 4;
memcpy(rbuf + num, "ATA ", 8);
num += 8;
- ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_PROD,
+ ata_id_string(dev->id, (unsigned char *) rbuf + num, ATA_ID_PROD,
ATA_ID_PROD_LEN);
num += ATA_ID_PROD_LEN;
- ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_SERNO,
+ ata_id_string(dev->id, (unsigned char *) rbuf + num, ATA_ID_SERNO,
ATA_ID_SERNO_LEN);
num += ATA_ID_SERNO_LEN;

- if (ata_id_has_wwn(args->id)) {
+ if (ata_id_has_wwn(dev->id)) {
/* SAT defined lu world wide name */
/* piv=0, assoc=lu, code_set=binary, designator=NAA */
rbuf[num + 0] = 1;
rbuf[num + 1] = 3;
rbuf[num + 3] = ATA_ID_WWN_LEN;
num += 4;
- ata_id_string(args->id, (unsigned char *) rbuf + num,
+ ata_id_string(dev->id, (unsigned char *) rbuf + num,
ATA_ID_WWN, ATA_ID_WWN_LEN);
num += ATA_ID_WWN_LEN;
}
@@ -2053,7 +2148,8 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)

/**
* ata_scsiop_inq_89 - Simulate INQUIRY VPD page 89, ATA info
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Yields SAT-specified ATA VPD page.
@@ -2061,7 +2157,8 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_inq_89(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
rbuf[1] = 0x89; /* our page code */
rbuf[2] = (0x238 >> 8); /* page size fixed at 238h */
@@ -2082,13 +2179,24 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)

rbuf[56] = ATA_CMD_ID_ATA;

- memcpy(&rbuf[60], &args->id[0], 512);
+ memcpy(&rbuf[60], &dev->id[0], 512);
return 0;
}

-static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
+/**
+ * ata_scsiop_inq_b0 - Simulate INQUIRY VPD page B0, Block Limits
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ *
+ * Return data for the VPD page B0h (Block Limits).
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ */
+static unsigned int ata_scsiop_inq_b0(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
- struct ata_device *dev = args->dev;
u16 min_io_sectors;

rbuf[1] = 0xb0;
@@ -2101,7 +2209,7 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
* logical than physical sector size we need to figure out what the
* latter is.
*/
- min_io_sectors = 1 << ata_id_log2_per_physical_sector(args->id);
+ min_io_sectors = 1 << ata_id_log2_per_physical_sector(dev->id);
put_unaligned_be16(min_io_sectors, &rbuf[6]);

/*
@@ -2113,7 +2221,7 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
* that we support some form of unmap - in thise case via WRITE SAME
* with the unmap bit set.
*/
- if (ata_id_has_trim(args->id)) {
+ if (ata_id_has_trim(dev->id)) {
u64 max_blocks = 65535 * ATA_MAX_TRIM_RNUM;

if (dev->quirks & ATA_QUIRK_MAX_TRIM_128M)
@@ -2126,11 +2234,24 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
return 0;
}

-static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf)
+/**
+ * ata_scsiop_inq_b1 - Simulate INQUIRY VPD page B1, Block Device
+ * Characteristics
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ *
+ * Return data for the VPD page B1h (Block Device Characteristics).
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ */
+static unsigned int ata_scsiop_inq_b1(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
- int form_factor = ata_id_form_factor(args->id);
- int media_rotation_rate = ata_id_rotation_rate(args->id);
- u8 zoned = ata_id_zoned_cap(args->id);
+ int form_factor = ata_id_form_factor(dev->id);
+ int media_rotation_rate = ata_id_rotation_rate(dev->id);
+ u8 zoned = ata_id_zoned_cap(dev->id);

rbuf[1] = 0xb1;
rbuf[3] = 0x3c;
@@ -2143,7 +2264,20 @@ static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf)
return 0;
}

-static unsigned int ata_scsiop_inq_b2(struct ata_scsi_args *args, u8 *rbuf)
+/**
+ * ata_scsiop_inq_b2 - Simulate INQUIRY VPD page B2, Logical Block
+ * Provisioning
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ *
+ * Return data for the VPD page B2h (Logical Block Provisioning).
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ */
+static unsigned int ata_scsiop_inq_b2(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
/* SCSI Thin Provisioning VPD page: SBC-3 rev 22 or later */
rbuf[1] = 0xb2;
@@ -2153,8 +2287,26 @@ static unsigned int ata_scsiop_inq_b2(struct ata_scsi_args *args, u8 *rbuf)
return 0;
}

-static unsigned int ata_scsiop_inq_b6(struct ata_scsi_args *args, u8 *rbuf)
+/**
+ * ata_scsiop_inq_b6 - Simulate INQUIRY VPD page B6, Zoned Block Device
+ * Characteristics
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ *
+ * Return data for the VPD page B2h (Zoned Block Device Characteristics).
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ */
+static unsigned int ata_scsiop_inq_b6(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
+ if (!ata_dev_is_zac(dev)) {
+ ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
+ return 1;
+ }
+
/*
* zbc-r05 SCSI Zoned Block device characteristics VPD page
*/
@@ -2164,21 +2316,39 @@ static unsigned int ata_scsiop_inq_b6(struct ata_scsi_args *args, u8 *rbuf)
/*
* URSWRZ bit is only meaningful for host-managed ZAC drives
*/
- if (args->dev->zac_zoned_cap & 1)
+ if (dev->zac_zoned_cap & 1)
rbuf[4] |= 1;
- put_unaligned_be32(args->dev->zac_zones_optimal_open, &rbuf[8]);
- put_unaligned_be32(args->dev->zac_zones_optimal_nonseq, &rbuf[12]);
- put_unaligned_be32(args->dev->zac_zones_max_open, &rbuf[16]);
+ put_unaligned_be32(dev->zac_zones_optimal_open, &rbuf[8]);
+ put_unaligned_be32(dev->zac_zones_optimal_nonseq, &rbuf[12]);
+ put_unaligned_be32(dev->zac_zones_max_open, &rbuf[16]);

return 0;
}

-static unsigned int ata_scsiop_inq_b9(struct ata_scsi_args *args, u8 *rbuf)
+/**
+ * ata_scsiop_inq_b9 - Simulate INQUIRY VPD page B9, Concurrent Positioning
+ * Ranges
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ *
+ * Return data for the VPD page B9h (Concurrent Positioning Ranges).
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ */
+static unsigned int ata_scsiop_inq_b9(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
- struct ata_cpr_log *cpr_log = args->dev->cpr_log;
+ struct ata_cpr_log *cpr_log = dev->cpr_log;
u8 *desc = &rbuf[64];
int i;

+ if (!cpr_log) {
+ ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
+ return 1;
+ }
+
/* SCSI Concurrent Positioning Ranges VPD page: SBC-5 rev 1 or later */
rbuf[1] = 0xb9;
put_unaligned_be16(64 + (int)cpr_log->nr_cpr * 32 - 4, &rbuf[2]);
@@ -2193,6 +2363,57 @@ static unsigned int ata_scsiop_inq_b9(struct ata_scsi_args *args, u8 *rbuf)
return 0;
}

+/**
+ * ata_scsiop_inquiry - Simulate INQUIRY command
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
+ * @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ *
+ * Returns data associated with an INQUIRY command output.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ */
+static unsigned int ata_scsiop_inquiry(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
+{
+ const u8 *scsicmd = cmd->cmnd;
+
+ /* is CmdDt set? */
+ if (scsicmd[1] & 2) {
+ ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
+ return 1;
+ }
+
+ /* Is EVPD clear? */
+ if ((scsicmd[1] & 1) == 0)
+ return ata_scsiop_inq_std(dev, cmd, rbuf);
+
+ switch (scsicmd[2]) {
+ case 0x00:
+ return ata_scsiop_inq_00(dev, cmd, rbuf);
+ case 0x80:
+ return ata_scsiop_inq_80(dev, cmd, rbuf);
+ case 0x83:
+ return ata_scsiop_inq_83(dev, cmd, rbuf);
+ case 0x89:
+ return ata_scsiop_inq_89(dev, cmd, rbuf);
+ case 0xb0:
+ return ata_scsiop_inq_b0(dev, cmd, rbuf);
+ case 0xb1:
+ return ata_scsiop_inq_b1(dev, cmd, rbuf);
+ case 0xb2:
+ return ata_scsiop_inq_b2(dev, cmd, rbuf);
+ case 0xb6:
+ return ata_scsiop_inq_b6(dev, cmd, rbuf);
+ case 0xb9:
+ return ata_scsiop_inq_b9(dev, cmd, rbuf);
+ default:
+ ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
+ return 1;
+ }
+}
+
/**
* modecpy - Prepare response for MODE SENSE
* @dest: output buffer
@@ -2413,7 +2634,8 @@ static unsigned int ata_msense_rw_recovery(u8 *buf, bool changeable)

/**
* ata_scsiop_mode_sense - Simulate MODE SENSE 6, 10 commands
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Simulate MODE SENSE commands. Assume this is invoked for direct
@@ -2423,10 +2645,10 @@ static unsigned int ata_msense_rw_recovery(u8 *buf, bool changeable)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_mode_sense(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
- struct ata_device *dev = args->dev;
- u8 *scsicmd = args->cmd->cmnd, *p = rbuf;
+ u8 *scsicmd = cmd->cmnd, *p = rbuf;
static const u8 sat_blk_desc[] = {
0, 0, 0, 0, /* number of blocks: sat unspecified */
0,
@@ -2491,17 +2713,17 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
break;

case CACHE_MPAGE:
- p += ata_msense_caching(args->id, p, page_control == 1);
+ p += ata_msense_caching(dev->id, p, page_control == 1);
break;

case CONTROL_MPAGE:
- p += ata_msense_control(args->dev, p, spg, page_control == 1);
+ p += ata_msense_control(dev, p, spg, page_control == 1);
break;

case ALL_MPAGES:
p += ata_msense_rw_recovery(p, page_control == 1);
- p += ata_msense_caching(args->id, p, page_control == 1);
- p += ata_msense_control(args->dev, p, spg, page_control == 1);
+ p += ata_msense_caching(dev->id, p, page_control == 1);
+ p += ata_msense_control(dev, p, spg, page_control == 1);
break;

default: /* invalid page code */
@@ -2530,18 +2752,19 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
return 0;

invalid_fld:
- ata_scsi_set_invalid_field(dev, args->cmd, fp, bp);
+ ata_scsi_set_invalid_field(dev, cmd, fp, bp);
return 1;

saving_not_supp:
- ata_scsi_set_sense(dev, args->cmd, ILLEGAL_REQUEST, 0x39, 0x0);
+ ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x39, 0x0);
/* "Saving parameters not supported" */
return 1;
}

/**
* ata_scsiop_read_cap - Simulate READ CAPACITY[ 16] commands
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Simulate READ CAPACITY commands.
@@ -2549,9 +2772,10 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
* LOCKING:
* None.
*/
-static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_read_cap(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
- struct ata_device *dev = args->dev;
+ u8 *scsicmd = cmd->cmnd;
u64 last_lba = dev->n_sectors - 1; /* LBA of the last block */
u32 sector_size; /* physical sector size in bytes */
u8 log2_per_phys;
@@ -2561,7 +2785,7 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
log2_per_phys = ata_id_log2_per_physical_sector(dev->id);
lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys);

- if (args->cmd->cmnd[0] == READ_CAPACITY) {
+ if (scsicmd[0] == READ_CAPACITY) {
if (last_lba >= 0xffffffffULL)
last_lba = 0xffffffff;

@@ -2576,48 +2800,59 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
rbuf[5] = sector_size >> (8 * 2);
rbuf[6] = sector_size >> (8 * 1);
rbuf[7] = sector_size;
- } else {
- /* sector count, 64-bit */
- rbuf[0] = last_lba >> (8 * 7);
- rbuf[1] = last_lba >> (8 * 6);
- rbuf[2] = last_lba >> (8 * 5);
- rbuf[3] = last_lba >> (8 * 4);
- rbuf[4] = last_lba >> (8 * 3);
- rbuf[5] = last_lba >> (8 * 2);
- rbuf[6] = last_lba >> (8 * 1);
- rbuf[7] = last_lba;

- /* sector size */
- rbuf[ 8] = sector_size >> (8 * 3);
- rbuf[ 9] = sector_size >> (8 * 2);
- rbuf[10] = sector_size >> (8 * 1);
- rbuf[11] = sector_size;
-
- rbuf[12] = 0;
- rbuf[13] = log2_per_phys;
- rbuf[14] = (lowest_aligned >> 8) & 0x3f;
- rbuf[15] = lowest_aligned;
-
- if (ata_id_has_trim(args->id) &&
- !(dev->quirks & ATA_QUIRK_NOTRIM)) {
- rbuf[14] |= 0x80; /* LBPME */
-
- if (ata_id_has_zero_after_trim(args->id) &&
- dev->quirks & ATA_QUIRK_ZERO_AFTER_TRIM) {
- ata_dev_info(dev, "Enabling discard_zeroes_data\n");
- rbuf[14] |= 0x40; /* LBPRZ */
- }
+ return 0;
+ }
+
+ /*
+ * READ CAPACITY 16 command is defined as a service action
+ * (SERVICE_ACTION_IN_16 command).
+ */
+ if (scsicmd[0] != SERVICE_ACTION_IN_16 ||
+ (scsicmd[1] & 0x1f) != SAI_READ_CAPACITY_16) {
+ ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
+ return 1;
+ }
+
+ /* sector count, 64-bit */
+ rbuf[0] = last_lba >> (8 * 7);
+ rbuf[1] = last_lba >> (8 * 6);
+ rbuf[2] = last_lba >> (8 * 5);
+ rbuf[3] = last_lba >> (8 * 4);
+ rbuf[4] = last_lba >> (8 * 3);
+ rbuf[5] = last_lba >> (8 * 2);
+ rbuf[6] = last_lba >> (8 * 1);
+ rbuf[7] = last_lba;
+
+ /* sector size */
+ rbuf[ 8] = sector_size >> (8 * 3);
+ rbuf[ 9] = sector_size >> (8 * 2);
+ rbuf[10] = sector_size >> (8 * 1);
+ rbuf[11] = sector_size;
+
+ if (ata_id_zoned_cap(dev->id) || dev->class == ATA_DEV_ZAC)
+ rbuf[12] = (1 << 4); /* RC_BASIS */
+ rbuf[13] = log2_per_phys;
+ rbuf[14] = (lowest_aligned >> 8) & 0x3f;
+ rbuf[15] = lowest_aligned;
+
+ if (ata_id_has_trim(dev->id) && !(dev->quirks & ATA_QUIRK_NOTRIM)) {
+ rbuf[14] |= 0x80; /* LBPME */
+
+ if (ata_id_has_zero_after_trim(dev->id) &&
+ dev->quirks & ATA_QUIRK_ZERO_AFTER_TRIM) {
+ ata_dev_info(dev, "Enabling discard_zeroes_data\n");
+ rbuf[14] |= 0x40; /* LBPRZ */
}
- if (ata_id_zoned_cap(args->id) ||
- args->dev->class == ATA_DEV_ZAC)
- rbuf[12] = (1 << 4); /* RC_BASIS */
}
+
return 0;
}

/**
* ata_scsiop_report_luns - Simulate REPORT LUNS command
- * @args: device IDENTIFY data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Simulate REPORT LUNS command.
@@ -2625,7 +2860,8 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_report_luns(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
rbuf[3] = 8; /* just one lun, LUN 0, size 8 bytes */

@@ -3340,7 +3576,8 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)

/**
* ata_scsiop_maint_in - Simulate a subset of MAINTENANCE_IN
- * @args: device MAINTENANCE_IN data / SCSI command of interest.
+ * @dev: Target device.
+ * @cmd: SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Yields a subset to satisfy scsi_report_opcode()
@@ -3348,17 +3585,21 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf)
+static unsigned int ata_scsiop_maint_in(struct ata_device *dev,
+ struct scsi_cmnd *cmd, u8 *rbuf)
{
- struct ata_device *dev = args->dev;
- u8 *cdb = args->cmd->cmnd;
+ u8 *cdb = cmd->cmnd;
u8 supported = 0, cdlp = 0, rwcdlp = 0;
- unsigned int err = 0;
+
+ if ((cdb[1] & 0x1f) != MI_REPORT_SUPPORTED_OPERATION_CODES) {
+ ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
+ return 1;
+ }

if (cdb[2] != 1 && cdb[2] != 3) {
ata_dev_warn(dev, "invalid command format %d\n", cdb[2]);
- err = 2;
- goto out;
+ ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
+ return 1;
}

switch (cdb[3]) {
@@ -3426,11 +3667,12 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf)
default:
break;
}
-out:
+
/* One command format */
rbuf[0] = rwcdlp;
rbuf[1] = cdlp | supported;
- return err;
+
+ return 0;
}

/**
@@ -4191,9 +4433,10 @@ int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
* scsi_queue_rq() will defer commands if scsi_host_in_recovery().
* However, this check is done without holding the ap->lock (a libata
* specific lock), so we can have received an error irq since then,
- * therefore we must check if EH is pending, while holding ap->lock.
+ * therefore we must check if EH is pending or running, while holding
+ * ap->lock.
*/
- if (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS))
+ if (ata_port_eh_scheduled(ap))
return SCSI_MLQUEUE_DEVICE_BUSY;

if (unlikely(!scmd->cmd_len))
@@ -4294,78 +4537,26 @@ EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);

void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
{
- struct ata_scsi_args args;
const u8 *scsicmd = cmd->cmnd;
u8 tmp8;

- args.dev = dev;
- args.id = dev->id;
- args.cmd = cmd;
-
switch(scsicmd[0]) {
case INQUIRY:
- if (scsicmd[1] & 2) /* is CmdDt set? */
- ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
- else if ((scsicmd[1] & 1) == 0) /* is EVPD clear? */
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_std);
- else switch (scsicmd[2]) {
- case 0x00:
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_00);
- break;
- case 0x80:
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_80);
- break;
- case 0x83:
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_83);
- break;
- case 0x89:
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89);
- break;
- case 0xb0:
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b0);
- break;
- case 0xb1:
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1);
- break;
- case 0xb2:
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2);
- break;
- case 0xb6:
- if (dev->flags & ATA_DFLAG_ZAC)
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b6);
- else
- ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
- break;
- case 0xb9:
- if (dev->cpr_log)
- ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b9);
- else
- ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
- break;
- default:
- ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
- break;
- }
+ ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_inquiry);
break;

case MODE_SENSE:
case MODE_SENSE_10:
- ata_scsi_rbuf_fill(&args, ata_scsiop_mode_sense);
+ ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_mode_sense);
break;

case READ_CAPACITY:
- ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap);
- break;
-
case SERVICE_ACTION_IN_16:
- if ((scsicmd[1] & 0x1f) == SAI_READ_CAPACITY_16)
- ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap);
- else
- ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
+ ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_read_cap);
break;

case REPORT_LUNS:
- ata_scsi_rbuf_fill(&args, ata_scsiop_report_luns);
+ ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_report_luns);
break;

case REQUEST_SENSE:
@@ -4393,10 +4584,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
break;

case MAINTENANCE_IN:
- if ((scsicmd[1] & 0x1f) == MI_REPORT_SUPPORTED_OPERATION_CODES)
- ata_scsi_rbuf_fill(&args, ata_scsiop_maint_in);
- else
- ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
+ ata_scsi_rbuf_fill(dev, cmd, ata_scsiop_maint_in);
break;

/* all other commands */
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index d07693bd054e..1a2d0f7115b5 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -44,6 +44,18 @@ static inline bool ata_sstatus_online(u32 sstatus)
return (sstatus & 0xf) == 0x3;
}

+static inline bool ata_dev_is_zac(struct ata_device *dev)
+{
+ /* Host managed device or host aware device */
+ return dev->class == ATA_DEV_ZAC ||
+ ata_id_zoned_cap(dev->id) == 0x01;
+}
+
+static inline bool ata_port_eh_scheduled(struct ata_port *ap)
+{
+ return ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS);
+}
+
#ifdef CONFIG_ATA_FORCE
extern void ata_force_cbl(struct ata_port *ap);
#else
@@ -154,6 +166,8 @@ void ata_scsi_sdev_config(struct scsi_device *sdev);
int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
struct ata_device *dev);
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
+void ata_scsi_deferred_qc_work(struct work_struct *work);
+void ata_scsi_requeue_deferred_qc(struct ata_port *ap);

/* libata-eh.c */
extern unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 742b2908ff68..b3dbf6c76e98 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -483,38 +483,20 @@ void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)

int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
{
- struct lru_cache *al = device->act_log;
/* for bios crossing activity log extent boundaries,
* we may need to activate two extents in one go */
unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
- unsigned nr_al_extents;
- unsigned available_update_slots;
unsigned enr;

- D_ASSERT(device, first <= last);
-
- nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */
- available_update_slots = min(al->nr_elements - al->used,
- al->max_pending_changes - al->pending_changes);
-
- /* We want all necessary updates for a given request within the same transaction
- * We could first check how many updates are *actually* needed,
- * and use that instead of the worst-case nr_al_extents */
- if (available_update_slots < nr_al_extents) {
- /* Too many activity log extents are currently "hot".
- *
- * If we have accumulated pending changes already,
- * we made progress.
- *
- * If we cannot get even a single pending change through,
- * stop the fast path until we made some progress,
- * or requests to "cold" extents could be starved. */
- if (!al->pending_changes)
- __set_bit(__LC_STARVING, &device->act_log->flags);
- return -ENOBUFS;
+ if (i->partially_in_al_next_enr) {
+ D_ASSERT(device, first < i->partially_in_al_next_enr);
+ D_ASSERT(device, last >= i->partially_in_al_next_enr);
+ first = i->partially_in_al_next_enr;
}

+ D_ASSERT(device, first <= last);
+
/* Is resync active in this area? */
for (enr = first; enr <= last; enr++) {
struct lc_element *tmp;
@@ -529,14 +511,21 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
}
}

- /* Checkout the refcounts.
- * Given that we checked for available elements and update slots above,
- * this has to be successful. */
+ /* Try to checkout the refcounts. */
for (enr = first; enr <= last; enr++) {
struct lc_element *al_ext;
al_ext = lc_get_cumulative(device->act_log, enr);
- if (!al_ext)
- drbd_info(device, "LOGIC BUG for enr=%u\n", enr);
+
+ if (!al_ext) {
+ /* Did not work. We may have exhausted the possible
+ * changes per transaction. Or raced with someone
+ * "locking" it against changes.
+ * Remember where to continue from.
+ */
+ if (enr > first)
+ i->partially_in_al_next_enr = enr;
+ return -ENOBUFS;
+ }
}
return 0;
}
@@ -556,7 +545,11 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)

for (enr = first; enr <= last; enr++) {
extent = lc_find(device->act_log, enr);
- if (!extent) {
+ /* Yes, this masks a bug elsewhere. However, during normal
+ * operation this is harmless, so no need to crash the kernel
+ * by the BUG_ON(refcount == 0) in lc_put().
+ */
+ if (!extent || extent->refcnt == 0) {
drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr);
continue;
}
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index 366489b72fe9..5d3213b81eed 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -8,12 +8,15 @@
struct drbd_interval {
struct rb_node rb;
sector_t sector; /* start sector of the interval */
- unsigned int size; /* size in bytes */
sector_t end; /* highest interval end in subtree */
+ unsigned int size; /* size in bytes */
unsigned int local:1 /* local or remote request? */;
unsigned int waiting:1; /* someone is waiting for completion */
unsigned int completed:1; /* this has been completed already;
* ignore for conflict detection */
+
+ /* to resume a partially successful drbd_al_begin_io_nonblock(); */
+ unsigned int partially_in_al_next_enr;
};

static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 380e6584a4ee..fce2060579eb 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -621,7 +621,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;

case READ_COMPLETED_WITH_ERROR:
- drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
+ drbd_set_out_of_sync(first_peer_device(device),
+ req->i.sector, req->i.size);
drbd_report_io_error(device, req);
__drbd_chk_io_error(device, DRBD_READ_ERROR);
fallthrough;
diff --git a/drivers/clk/tegra/clk-tegra124-emc.c b/drivers/clk/tegra/clk-tegra124-emc.c
index 0f6fb776b229..5f1af6dfe715 100644
--- a/drivers/clk/tegra/clk-tegra124-emc.c
+++ b/drivers/clk/tegra/clk-tegra124-emc.c
@@ -197,8 +197,8 @@ static struct tegra_emc *emc_ensure_emc_driver(struct tegra_clk_emc *tegra)
tegra->emc_node = NULL;

tegra->emc = platform_get_drvdata(pdev);
+ put_device(&pdev->dev);
if (!tegra->emc) {
- put_device(&pdev->dev);
pr_err("%s: cannot find EMC driver\n", __func__);
return NULL;
}
diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c
index 4eb0dff4dfaf..bd84a22805b5 100644
--- a/drivers/firmware/efi/mokvar-table.c
+++ b/drivers/firmware/efi/mokvar-table.c
@@ -85,7 +85,7 @@ static struct kobject *mokvar_kobj;
* as an alternative to ordinary EFI variables, due to platform-dependent
* limitations. The memory occupied by this table is marked as reserved.
*
- * This routine must be called before efi_free_boot_services() in order
+ * This routine must be called before efi_unmap_boot_services() in order
* to guarantee that it can mark the table as reserved.
*
* Implicit inputs:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index a7ecc33ddf22..ef5356b5a65e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -583,6 +583,7 @@ static void aca_error_fini(struct aca_error *aerr)
aca_bank_error_remove(aerr, bank_error);

out_unlock:
+ mutex_unlock(&aerr->lock);
mutex_destroy(&aerr->lock);
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index cab75f5c9f2f..361184355e23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4648,7 +4648,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
* before ip_fini_early to prevent kfd locking refcount issues by calling
* amdgpu_amdkfd_suspend()
*/
- if (drm_dev_is_unplugged(adev_to_drm(adev)))
+ if (pci_dev_is_disconnected(adev->pdev))
amdgpu_amdkfd_device_fini_sw(adev);

amdgpu_device_ip_fini_early(adev);
@@ -4660,7 +4660,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)

amdgpu_gart_dummy_page_fini(adev);

- if (drm_dev_is_unplugged(adev_to_drm(adev)))
+ if (pci_dev_is_disconnected(adev->pdev))
amdgpu_device_unmap_mmio(adev);

}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 38face981c3e..0d3c18f04ac3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -171,13 +171,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t

copy_pos += sizeof(uint32_t);

- ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
- if (!ta_bin)
- return -ENOMEM;
- if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) {
- ret = -EFAULT;
- goto err_free_bin;
- }
+ ta_bin = memdup_user(&buf[copy_pos], ta_bin_len);
+ if (IS_ERR(ta_bin))
+ return PTR_ERR(ta_bin);

/* Set TA context and functions */
set_ta_context_funcs(psp, ta_type, &context);
@@ -327,26 +323,22 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
return -EFAULT;
copy_pos += sizeof(uint32_t);

- shared_buf = kzalloc(shared_buf_len, GFP_KERNEL);
- if (!shared_buf)
- return -ENOMEM;
- if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) {
- ret = -EFAULT;
- goto err_free_shared_buf;
- }
+ shared_buf = memdup_user(&buf[copy_pos], shared_buf_len);
+ if (IS_ERR(shared_buf))
+ return PTR_ERR(shared_buf);

set_ta_context_funcs(psp, ta_type, &context);

if (!context || !context->initialized) {
dev_err(adev->dev, "TA is not initialized\n");
ret = -EINVAL;
- goto err_free_shared_buf;
+ goto free_shared_buf;
}

if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
dev_err(adev->dev, "Unsupported function to invoke TA\n");
ret = -EOPNOTSUPP;
- goto err_free_shared_buf;
+ goto free_shared_buf;
}

context->session_id = ta_id;
@@ -354,7 +346,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
mutex_lock(&psp->ras_context.mutex);
ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
if (ret)
- goto err_free_shared_buf;
+ goto unlock;

ret = psp_fn_ta_invoke(psp, cmd_id);
if (ret || context->resp_status) {
@@ -362,15 +354,17 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
ret, context->resp_status);
if (!ret) {
ret = -EINVAL;
- goto err_free_shared_buf;
+ goto unlock;
}
}

if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
ret = -EFAULT;

-err_free_shared_buf:
+unlock:
mutex_unlock(&psp->ras_context.mutex);
+
+free_shared_buf:
kfree(shared_buf);

return ret;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 6de0cced6c9d..8400330dfe3e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -186,29 +186,37 @@ static ssize_t vidi_store_connection(struct device *dev,
const char *buf, size_t len)
{
struct vidi_context *ctx = dev_get_drvdata(dev);
- int ret;
+ int ret, new_connected;

- ret = kstrtoint(buf, 0, &ctx->connected);
+ ret = kstrtoint(buf, 0, &new_connected);
if (ret)
return ret;
-
- if (ctx->connected > 1)
+ if (new_connected > 1)
return -EINVAL;

+ mutex_lock(&ctx->lock);
+
/*
* Use fake edid data for test. If raw_edid is set then it can't be
* tested.
*/
if (ctx->raw_edid) {
DRM_DEV_DEBUG_KMS(dev, "edid data is not fake data.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail;
}

+ ctx->connected = new_connected;
+ mutex_unlock(&ctx->lock);
+
DRM_DEV_DEBUG_KMS(dev, "requested connection.\n");

drm_helper_hpd_irq_event(ctx->drm_dev);

return len;
+fail:
+ mutex_unlock(&ctx->lock);
+ return ret;
}

static DEVICE_ATTR(connection, 0644, vidi_show_connection,
@@ -238,21 +246,38 @@ int vidi_connection_ioctl(struct drm_device *drm_dev, void *data,
return -EINVAL;
}

+ mutex_lock(&ctx->lock);
if (ctx->connected == vidi->connection) {
+ mutex_unlock(&ctx->lock);
DRM_DEV_DEBUG_KMS(ctx->dev,
"same connection request.\n");
return -EINVAL;
}
+ mutex_unlock(&ctx->lock);

if (vidi->connection) {
const struct drm_edid *drm_edid;
- const struct edid *raw_edid;
+ const void __user *edid_userptr = u64_to_user_ptr(vidi->edid);
+ void *edid_buf;
+ struct edid hdr;
size_t size;

- raw_edid = (const struct edid *)(unsigned long)vidi->edid;
- size = (raw_edid->extensions + 1) * EDID_LENGTH;
+ if (copy_from_user(&hdr, edid_userptr, sizeof(hdr)))
+ return -EFAULT;
+
+ size = (hdr.extensions + 1) * EDID_LENGTH;
+
+ edid_buf = kmalloc(size, GFP_KERNEL);
+ if (!edid_buf)
+ return -ENOMEM;

- drm_edid = drm_edid_alloc(raw_edid, size);
+ if (copy_from_user(edid_buf, edid_userptr, size)) {
+ kfree(edid_buf);
+ return -EFAULT;
+ }
+
+ drm_edid = drm_edid_alloc(edid_buf, size);
+ kfree(edid_buf);
if (!drm_edid)
return -ENOMEM;

@@ -262,14 +287,21 @@ int vidi_connection_ioctl(struct drm_device *drm_dev, void *data,
"edid data is invalid.\n");
return -EINVAL;
}
+ mutex_lock(&ctx->lock);
ctx->raw_edid = drm_edid;
+ mutex_unlock(&ctx->lock);
} else {
/* with connection = 0, free raw_edid */
+ mutex_lock(&ctx->lock);
drm_edid_free(ctx->raw_edid);
ctx->raw_edid = NULL;
+ mutex_unlock(&ctx->lock);
}

+ mutex_lock(&ctx->lock);
ctx->connected = vidi->connection;
+ mutex_unlock(&ctx->lock);
+
drm_helper_hpd_irq_event(ctx->drm_dev);

return 0;
@@ -284,7 +316,7 @@ static enum drm_connector_status vidi_detect(struct drm_connector *connector,
* connection request would come from user side
* to do hotplug through specific ioctl.
*/
- return ctx->connected ? connector_status_connected :
+ return READ_ONCE(ctx->connected) ? connector_status_connected :
connector_status_disconnected;
}

@@ -307,13 +339,14 @@ static int vidi_get_modes(struct drm_connector *connector)
const struct drm_edid *drm_edid;
int count;

+ mutex_lock(&ctx->lock);
+
if (ctx->raw_edid)
drm_edid = drm_edid_dup(ctx->raw_edid);
else
drm_edid = drm_edid_alloc(fake_edid_info, sizeof(fake_edid_info));

- if (!drm_edid)
- return 0;
+ mutex_unlock(&ctx->lock);

drm_edid_connector_update(connector, drm_edid);

@@ -459,9 +492,13 @@ static void vidi_remove(struct platform_device *pdev)
{
struct vidi_context *ctx = platform_get_drvdata(pdev);

+ mutex_lock(&ctx->lock);
+
drm_edid_free(ctx->raw_edid);
ctx->raw_edid = NULL;

+ mutex_unlock(&ctx->lock);
+
component_del(&pdev->dev, &vidi_component_ops);
}

diff --git a/drivers/gpu/drm/logicvc/logicvc_drm.c b/drivers/gpu/drm/logicvc/logicvc_drm.c
index 01a37e28c080..6d88f8645988 100644
--- a/drivers/gpu/drm/logicvc/logicvc_drm.c
+++ b/drivers/gpu/drm/logicvc/logicvc_drm.c
@@ -90,7 +90,6 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc)
struct device *dev = drm_dev->dev;
struct device_node *of_node = dev->of_node;
struct logicvc_drm_config *config = &logicvc->config;
- struct device_node *layers_node;
int ret;

logicvc_of_property_parse_bool(of_node, LOGICVC_OF_PROPERTY_DITHERING,
@@ -126,7 +125,8 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc)
if (ret)
return ret;

- layers_node = of_get_child_by_name(of_node, "layers");
+ struct device_node *layers_node __free(device_node) =
+ of_get_child_by_name(of_node, "layers");
if (!layers_node) {
drm_err(drm_dev, "Missing non-optional layers node\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 4dde0dc525ce..4f43c0fa4019 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -404,6 +404,7 @@ static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
/**
* drm_sched_job_done - complete a job
* @s_job: pointer to the job which is done
+ * @result: 0 on success, -ERRNO on error
*
* Finish the job's fence and wake up the worker thread.
*/
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index e0fc12d514d7..cd8347396082 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -736,6 +736,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x,
unsigned int height = drm_rect_height(rect);
unsigned int line_length = DIV_ROUND_UP(width, 8);
unsigned int page_height = SSD130X_PAGE_HEIGHT;
+ u8 page_start = ssd130x->page_offset + y / page_height;
unsigned int pages = DIV_ROUND_UP(height, page_height);
struct drm_device *drm = &ssd130x->drm;
u32 array_idx = 0;
@@ -773,14 +774,11 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x,
*/

if (!ssd130x->page_address_mode) {
- u8 page_start;
-
/* Set address range for horizontal addressing mode */
ret = ssd130x_set_col_range(ssd130x, ssd130x->col_offset + x, width);
if (ret < 0)
return ret;

- page_start = ssd130x->page_offset + y / page_height;
ret = ssd130x_set_page_range(ssd130x, page_start, pages);
if (ret < 0)
return ret;
@@ -812,7 +810,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x,
*/
if (ssd130x->page_address_mode) {
ret = ssd130x_set_page_pos(ssd130x,
- ssd130x->page_offset + i,
+ page_start + i,
ssd130x->col_offset + x);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 532a8f4bee7f..a796dc674237 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -1540,11 +1540,9 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi)
return -EPROBE_DEFER;

dsi->slave = platform_get_drvdata(gangster);
-
- if (!dsi->slave) {
- put_device(&gangster->dev);
+ put_device(&gangster->dev);
+ if (!dsi->slave)
return -EPROBE_DEFER;
- }

dsi->slave->master = dsi;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 0c1bd3acf359..6b921db2dcd2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1161,7 +1161,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo);
if (ret != 0) {
drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n");
- return PTR_ERR(vmw_bo);
+ return ret;
}
vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB);
ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
@@ -1217,7 +1217,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo);
if (ret != 0) {
drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n");
- return PTR_ERR(vmw_bo);
+ return ret;
}
vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM,
VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index de2498749e27..5bb710824d72 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -274,6 +274,13 @@ int vmw_bo_dirty_add(struct vmw_bo *vbo)
return ret;
}

+static void vmw_bo_dirty_free(struct kref *kref)
+{
+ struct vmw_bo_dirty *dirty = container_of(kref, struct vmw_bo_dirty, ref_count);
+
+ kvfree(dirty);
+}
+
/**
* vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
* @vbo: The buffer object
@@ -288,7 +295,7 @@ void vmw_bo_dirty_release(struct vmw_bo *vbo)
{
struct vmw_bo_dirty *dirty = vbo->dirty;

- if (dirty && kref_put(&dirty->ref_count, (void *)kvfree))
+ if (dirty && kref_put(&dirty->ref_count, vmw_bo_dirty_free))
vbo->dirty = NULL;
}

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index d3773a985387..ae9e6df2f4e1 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -102,10 +102,12 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
*pentry = *e;
ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
if (ret)
- goto fail;
+ goto fail_free;

return 0;

+fail_free:
+ kfree(pentry);
fail:
xe_gt_err(gt,
"discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index fb31e09acb51..c9e8969f99fc 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -259,6 +259,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc

i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);

+ /* Don't preempt fence signaling */
+ dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
if (job->user_fence.used) {
i = emit_flush_dw(dw, i);
i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
@@ -322,6 +325,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,

i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);

+ /* Don't preempt fence signaling */
+ dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
if (job->user_fence.used) {
i = emit_flush_dw(dw, i);
i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
@@ -371,6 +377,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,

i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);

+ /* Don't preempt fence signaling */
+ dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
i = emit_render_cache_flush(job, dw, i);

if (job->user_fence.used)
diff --git a/drivers/hid/hid-cmedia.c b/drivers/hid/hid-cmedia.c
index 528d7f361215..8bf5649b0c79 100644
--- a/drivers/hid/hid-cmedia.c
+++ b/drivers/hid/hid-cmedia.c
@@ -99,7 +99,7 @@ static int cmhid_raw_event(struct hid_device *hid, struct hid_report *report,
{
struct cmhid *cm = hid_get_drvdata(hid);

- if (len != CM6533_JD_RAWEV_LEN)
+ if (len != CM6533_JD_RAWEV_LEN || !(hid->claimed & HID_CLAIMED_INPUT))
goto out;
if (memcmp(data+CM6533_JD_SFX_OFFSET, ji_sfx, sizeof(ji_sfx)))
goto out;
diff --git a/drivers/hid/hid-creative-sb0540.c b/drivers/hid/hid-creative-sb0540.c
index b4c8e7a5d3e0..dfd6add353d1 100644
--- a/drivers/hid/hid-creative-sb0540.c
+++ b/drivers/hid/hid-creative-sb0540.c
@@ -153,7 +153,7 @@ static int creative_sb0540_raw_event(struct hid_device *hid,
u64 code, main_code;
int key;

- if (len != 6)
+ if (len != 6 || !(hid->claimed & HID_CLAIMED_INPUT))
return 0;

/* From daemons/hw_hiddev.c sb0540_rec() in lirc */
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index dfa39a37405e..0a65490dfcb4 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -832,6 +832,7 @@
#define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5
#define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe
#define USB_DEVICE_ID_LENOVO_X12_TAB2 0x61ae
+#define USB_DEVICE_ID_LENOVO_YOGABOOK9I 0x6161
#define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E 0x600e
#define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d
#define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019 0x6019
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index c3a914458358..eb148988484b 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -72,6 +72,7 @@ MODULE_LICENSE("GPL");
#define MT_QUIRK_FORCE_MULTI_INPUT BIT(20)
#define MT_QUIRK_DISABLE_WAKEUP BIT(21)
#define MT_QUIRK_ORIENTATION_INVERT BIT(22)
+#define MT_QUIRK_YOGABOOK9I BIT(24)

#define MT_INPUTMODE_TOUCHSCREEN 0x02
#define MT_INPUTMODE_TOUCHPAD 0x03
@@ -215,6 +216,8 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
#define MT_CLS_GOOGLE 0x0111
#define MT_CLS_RAZER_BLADE_STEALTH 0x0112
#define MT_CLS_SMART_TECH 0x0113
+#define MT_CLS_YOGABOOK9I 0x0115
+#define MT_CLS_EGALAX_P80H84 0x0116
#define MT_CLS_SIS 0x0457

#define MT_DEFAULT_MAXCONTACT 10
@@ -405,6 +408,19 @@ static const struct mt_class mt_classes[] = {
.quirks = MT_QUIRK_NOT_SEEN_MEANS_UP |
MT_QUIRK_ALWAYS_VALID |
MT_QUIRK_CONTACT_CNT_ACCURATE,
+ },
+ { .name = MT_CLS_YOGABOOK9I,
+ .quirks = MT_QUIRK_ALWAYS_VALID |
+ MT_QUIRK_FORCE_MULTI_INPUT |
+ MT_QUIRK_SEPARATE_APP_REPORT |
+ MT_QUIRK_HOVERING |
+ MT_QUIRK_YOGABOOK9I,
+ .export_all_inputs = true
+ },
+ { .name = MT_CLS_EGALAX_P80H84,
+ .quirks = MT_QUIRK_ALWAYS_VALID |
+ MT_QUIRK_IGNORE_DUPLICATES |
+ MT_QUIRK_CONTACT_CNT_ACCURATE,
},
{ }
};
@@ -1493,6 +1509,38 @@ static void mt_report(struct hid_device *hid, struct hid_report *report)
if (rdata && rdata->is_mt_collection)
return mt_touch_report(hid, rdata);

+ /* Lenovo Yoga Book 9i requires consuming and dropping certain bogus reports */
+ if (rdata && rdata->application &&
+ (rdata->application->quirks & MT_QUIRK_YOGABOOK9I)) {
+
+ bool all_zero_report = true;
+
+ for (int f = 0; f < report->maxfield && all_zero_report; f++) {
+ struct hid_field *fld = report->field[f];
+
+ for (int i = 0; i < fld->report_count; i++) {
+ unsigned int usage = fld->usage[i].hid;
+
+ if (usage == HID_DG_INRANGE ||
+ usage == HID_DG_TIPSWITCH ||
+ usage == HID_DG_BARRELSWITCH ||
+ usage == HID_DG_BARRELSWITCH2 ||
+ usage == HID_DG_CONTACTID ||
+ usage == HID_DG_TILT_X ||
+ usage == HID_DG_TILT_Y) {
+
+ if (fld->value[i] != 0) {
+ all_zero_report = false;
+ break;
+ }
+ }
+ }
+ }
+
+ if (all_zero_report)
+ return;
+ }
+
if (field && field->hidinput && field->hidinput->input)
input_sync(field->hidinput->input);
}
@@ -1683,6 +1731,30 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
break;
}

+ /* Lenovo Yoga Book 9i requires custom naming to allow differentiation in udev */
+ if (hi->report && td->mtclass.quirks & MT_QUIRK_YOGABOOK9I) {
+ switch (hi->report->id) {
+ case 48:
+ suffix = "Touchscreen Top";
+ break;
+ case 56:
+ suffix = "Touchscreen Bottom";
+ break;
+ case 20:
+ suffix = "Stylus Top";
+ break;
+ case 40:
+ suffix = "Stylus Bottom";
+ break;
+ case 80:
+ suffix = "Emulated Touchpad";
+ break;
+ default:
+ suffix = "";
+ break;
+ }
+ }
+
if (suffix) {
hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
"%s %s", hdev->name, suffix);
@@ -2029,8 +2101,9 @@ static const struct hid_device_id mt_devices[] = {
{ .driver_data = MT_CLS_EGALAX_SERIAL,
MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C000) },
- { .driver_data = MT_CLS_EGALAX,
- MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
+ { .driver_data = MT_CLS_EGALAX_P80H84,
+ HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_DWAV,
USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C002) },

/* Elan devices */
@@ -2160,6 +2233,12 @@ static const struct hid_device_id mt_devices[] = {
USB_VENDOR_ID_LENOVO,
USB_DEVICE_ID_LENOVO_X12_TAB2) },

+ /* Lenovo Yoga Book 9i */
+ { .driver_data = MT_CLS_YOGABOOK9I,
+ HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_LENOVO,
+ USB_DEVICE_ID_LENOVO_YOGABOOK9I) },
+
/* Logitech devices */
{ .driver_data = MT_CLS_NSMU,
HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_MULTITOUCH_WIN_8,
diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c
index 3bdb26f45592..1aae80f848f5 100644
--- a/drivers/hid/hid-zydacron.c
+++ b/drivers/hid/hid-zydacron.c
@@ -114,7 +114,7 @@ static int zc_raw_event(struct hid_device *hdev, struct hid_report *report,
unsigned key;
unsigned short index;

- if (report->id == data[0]) {
+ if (report->id == data[0] && (hdev->claimed & HID_CLAIMED_INPUT)) {

/* break keys */
for (index = 0; index < 4; index++) {
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 58480a3f4683..19622dd6ec93 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -245,12 +245,12 @@ config SENSORS_ADT7475
will be called adt7475.

config SENSORS_AHT10
- tristate "Aosong AHT10, AHT20"
+ tristate "Aosong AHT10, AHT20, DHT20"
depends on I2C
select CRC8
help
- If you say yes here, you get support for the Aosong AHT10 and AHT20
- temperature and humidity sensors
+ If you say yes here, you get support for the Aosong AHT10, AHT20 and
+ DHT20 temperature and humidity sensors

This driver can also be built as a module. If so, the module
will be called aht10.
diff --git a/drivers/hwmon/aht10.c b/drivers/hwmon/aht10.c
index 312ef3e98754..4099b5ba0982 100644
--- a/drivers/hwmon/aht10.c
+++ b/drivers/hwmon/aht10.c
@@ -37,6 +37,10 @@
#define AHT10_CMD_MEAS 0b10101100
#define AHT10_CMD_RST 0b10111010

+#define AHT20_CMD_INIT 0b10111110
+
+#define DHT20_CMD_INIT 0b01110001
+
/*
* Flags in the answer byte/command
*/
@@ -48,11 +52,12 @@

#define AHT10_MAX_POLL_INTERVAL_LEN 30

-enum aht10_variant { aht10, aht20 };
+enum aht10_variant { aht10, aht20, dht20};

static const struct i2c_device_id aht10_id[] = {
{ "aht10", aht10 },
{ "aht20", aht20 },
+ { "dht20", dht20 },
{ },
};
MODULE_DEVICE_TABLE(i2c, aht10_id);
@@ -77,6 +82,7 @@ MODULE_DEVICE_TABLE(i2c, aht10_id);
* AHT10/AHT20
* @crc8: crc8 support flag
* @meas_size: measurements data size
+ * @init_cmd: Initialization command
*/

struct aht10_data {
@@ -92,6 +98,7 @@ struct aht10_data {
int humidity;
bool crc8;
unsigned int meas_size;
+ u8 init_cmd;
};

/**
@@ -101,13 +108,13 @@ struct aht10_data {
*/
static int aht10_init(struct aht10_data *data)
{
- const u8 cmd_init[] = {AHT10_CMD_INIT, AHT10_CAL_ENABLED | AHT10_MODE_CYC,
+ const u8 cmd_init[] = {data->init_cmd, AHT10_CAL_ENABLED | AHT10_MODE_CYC,
0x00};
int res;
u8 status;
struct i2c_client *client = data->client;

- res = i2c_master_send(client, cmd_init, 3);
+ res = i2c_master_send(client, cmd_init, sizeof(cmd_init));
if (res < 0)
return res;

@@ -352,9 +359,17 @@ static int aht10_probe(struct i2c_client *client)
data->meas_size = AHT20_MEAS_SIZE;
data->crc8 = true;
crc8_populate_msb(crc8_table, AHT20_CRC8_POLY);
+ data->init_cmd = AHT20_CMD_INIT;
+ break;
+ case dht20:
+ data->meas_size = AHT20_MEAS_SIZE;
+ data->crc8 = true;
+ crc8_populate_msb(crc8_table, AHT20_CRC8_POLY);
+ data->init_cmd = DHT20_CMD_INIT;
break;
default:
data->meas_size = AHT10_MEAS_SIZE;
+ data->init_cmd = AHT10_CMD_INIT;
break;
}

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index e233aafa8856..5cfb98a0512f 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -3590,10 +3590,13 @@ static int it87_resume(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct it87_data *data = dev_get_drvdata(dev);
+ int err;

it87_resume_sio(pdev);

- it87_lock(data);
+ err = it87_lock(data);
+ if (err)
+ return err;

it87_check_pwm(dev);
it87_check_limit_regs(data);
diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c
index 4c9e7892a73c..43fbb9b26b10 100644
--- a/drivers/hwmon/max16065.c
+++ b/drivers/hwmon/max16065.c
@@ -151,27 +151,27 @@ static struct max16065_data *max16065_update_device(struct device *dev)
int i;

for (i = 0; i < data->num_adc; i++)
- data->adc[i]
- = max16065_read_adc(client, MAX16065_ADC(i));
+ WRITE_ONCE(data->adc[i],
+ max16065_read_adc(client, MAX16065_ADC(i)));

if (data->have_current) {
- data->adc[MAX16065_NUM_ADC]
- = max16065_read_adc(client, MAX16065_CSP_ADC);
- data->curr_sense
- = i2c_smbus_read_byte_data(client,
- MAX16065_CURR_SENSE);
+ WRITE_ONCE(data->adc[MAX16065_NUM_ADC],
+ max16065_read_adc(client, MAX16065_CSP_ADC));
+ WRITE_ONCE(data->curr_sense,
+ i2c_smbus_read_byte_data(client, MAX16065_CURR_SENSE));
}

for (i = 0; i < 2; i++)
- data->fault[i]
- = i2c_smbus_read_byte_data(client, MAX16065_FAULT(i));
+ WRITE_ONCE(data->fault[i],
+ i2c_smbus_read_byte_data(client, MAX16065_FAULT(i)));

/*
* MAX16067 and MAX16068 have separate undervoltage and
* overvoltage alarm bits. Squash them together.
*/
if (data->chip == max16067 || data->chip == max16068)
- data->fault[0] |= data->fault[1];
+ WRITE_ONCE(data->fault[0],
+ data->fault[0] | data->fault[1]);

data->last_updated = jiffies;
data->valid = true;
@@ -185,7 +185,7 @@ static ssize_t max16065_alarm_show(struct device *dev,
{
struct sensor_device_attribute_2 *attr2 = to_sensor_dev_attr_2(da);
struct max16065_data *data = max16065_update_device(dev);
- int val = data->fault[attr2->nr];
+ int val = READ_ONCE(data->fault[attr2->nr]);

if (val < 0)
return val;
@@ -203,7 +203,7 @@ static ssize_t max16065_input_show(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
struct max16065_data *data = max16065_update_device(dev);
- int adc = data->adc[attr->index];
+ int adc = READ_ONCE(data->adc[attr->index]);

if (unlikely(adc < 0))
return adc;
@@ -216,7 +216,7 @@ static ssize_t max16065_current_show(struct device *dev,
struct device_attribute *da, char *buf)
{
struct max16065_data *data = max16065_update_device(dev);
- int curr_sense = data->curr_sense;
+ int curr_sense = READ_ONCE(data->curr_sense);

if (unlikely(curr_sense < 0))
return curr_sense;
diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c
index c955b0f3a8d3..0b0a9f4c2307 100644
--- a/drivers/hwmon/max6639.c
+++ b/drivers/hwmon/max6639.c
@@ -19,7 +19,6 @@
#include <linux/hwmon-sysfs.h>
#include <linux/err.h>
#include <linux/mutex.h>
-#include <linux/platform_data/max6639.h>
#include <linux/regmap.h>
#include <linux/util_macros.h>

@@ -531,14 +530,49 @@ static int rpm_range_to_reg(int range)
return 1; /* default: 4000 RPM */
}

+static int max6639_probe_child_from_dt(struct i2c_client *client,
+ struct device_node *child,
+ struct max6639_data *data)
+
+{
+ struct device *dev = &client->dev;
+ u32 i;
+ int err, val;
+
+ err = of_property_read_u32(child, "reg", &i);
+ if (err) {
+ dev_err(dev, "missing reg property of %pOFn\n", child);
+ return err;
+ }
+
+ if (i > 1) {
+ dev_err(dev, "Invalid fan index reg %d\n", i);
+ return -EINVAL;
+ }
+
+ err = of_property_read_u32(child, "pulses-per-revolution", &val);
+ if (!err) {
+ if (val < 1 || val > 5) {
+ dev_err(dev, "invalid pulses-per-revolution %d of %pOFn\n", val, child);
+ return -EINVAL;
+ }
+ data->ppr[i] = val;
+ }
+
+ err = of_property_read_u32(child, "max-rpm", &val);
+ if (!err)
+ data->rpm_range[i] = rpm_range_to_reg(val);
+
+ return 0;
+}
+
static int max6639_init_client(struct i2c_client *client,
struct max6639_data *data)
{
- struct max6639_platform_data *max6639_info =
- dev_get_platdata(&client->dev);
- int i;
- int rpm_range = 1; /* default: 4000 RPM */
- int err, ppr;
+ struct device *dev = &client->dev;
+ const struct device_node *np = dev->of_node;
+ struct device_node *child;
+ int i, err;

/* Reset chip to default values, see below for GCONFIG setup */
err = regmap_write(data->regmap, MAX6639_REG_GCONFIG, MAX6639_GCONFIG_POR);
@@ -546,21 +580,29 @@ static int max6639_init_client(struct i2c_client *client,
return err;

/* Fans pulse per revolution is 2 by default */
- if (max6639_info && max6639_info->ppr > 0 &&
- max6639_info->ppr < 5)
- ppr = max6639_info->ppr;
- else
- ppr = 2;
+ data->ppr[0] = 2;
+ data->ppr[1] = 2;
+
+ /* default: 4000 RPM */
+ data->rpm_range[0] = 1;
+ data->rpm_range[1] = 1;

- data->ppr[0] = ppr;
- data->ppr[1] = ppr;
+ for_each_child_of_node(np, child) {
+ if (strcmp(child->name, "fan"))
+ continue;

- if (max6639_info)
- rpm_range = rpm_range_to_reg(max6639_info->rpm_range);
- data->rpm_range[0] = rpm_range;
- data->rpm_range[1] = rpm_range;
+ err = max6639_probe_child_from_dt(client, child, data);
+ if (err) {
+ of_node_put(child);
+ return err;
+ }
+ }

for (i = 0; i < MAX6639_NUM_CHANNELS; i++) {
+ err = regmap_set_bits(data->regmap, MAX6639_REG_OUTPUT_MASK, BIT(1 - i));
+ if (err)
+ return err;
+
/* Set Fan pulse per revolution */
err = max6639_set_ppr(data, i, data->ppr[i]);
if (err)
@@ -573,12 +615,7 @@ static int max6639_init_client(struct i2c_client *client,
return err;

/* Fans PWM polarity high by default */
- if (max6639_info) {
- if (max6639_info->pwm_polarity == 0)
- err = regmap_write(data->regmap, MAX6639_REG_FAN_CONFIG2a(i), 0x00);
- else
- err = regmap_write(data->regmap, MAX6639_REG_FAN_CONFIG2a(i), 0x02);
- }
+ err = regmap_write(data->regmap, MAX6639_REG_FAN_CONFIG2a(i), 0x02);
if (err)
return err;

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index be7ca6a0ebeb..24363acfc3f8 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -303,9 +303,10 @@ struct i801_priv {

/*
* If set to true the host controller registers are reserved for
- * ACPI AML use.
+ * ACPI AML use. Needs extra protection by acpi_lock.
*/
bool acpi_reserved;
+ struct mutex acpi_lock;
};

#define FEATURE_SMBUS_PEC BIT(0)
@@ -893,8 +894,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
int hwpec, ret;
struct i801_priv *priv = i2c_get_adapdata(adap);

- if (priv->acpi_reserved)
+ mutex_lock(&priv->acpi_lock);
+ if (priv->acpi_reserved) {
+ mutex_unlock(&priv->acpi_lock);
return -EBUSY;
+ }

pm_runtime_get_sync(&priv->pci_dev->dev);

@@ -935,6 +939,7 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,

pm_runtime_mark_last_busy(&priv->pci_dev->dev);
pm_runtime_put_autosuspend(&priv->pci_dev->dev);
+ mutex_unlock(&priv->acpi_lock);
return ret;
}

@@ -1586,7 +1591,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
* further access from the driver itself. This device is now owned
* by the system firmware.
*/
- i2c_lock_bus(&priv->adapter, I2C_LOCK_SEGMENT);
+ mutex_lock(&priv->acpi_lock);

if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) {
priv->acpi_reserved = true;
@@ -1606,7 +1611,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
else
status = acpi_os_write_port(address, (u32)*value, bits);

- i2c_unlock_bus(&priv->adapter, I2C_LOCK_SEGMENT);
+ mutex_unlock(&priv->acpi_lock);

return status;
}
@@ -1666,6 +1671,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
priv->adapter.dev.parent = &dev->dev;
acpi_use_parent_companion(&priv->adapter.dev);
priv->adapter.retries = 3;
+ mutex_init(&priv->acpi_lock);

priv->pci_dev = dev;
priv->features = id->driver_data;
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index c33a36d5c43c..ac01b6c10327 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -4589,7 +4589,7 @@ static int irdma_create_user_ah(struct ib_ah *ibah,
#define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd)
struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
- struct irdma_create_ah_resp uresp;
+ struct irdma_create_ah_resp uresp = {};
struct irdma_ah *parent_ah;
int err;

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6a1e2e79ddc3..c01ac0e478c6 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -428,6 +428,8 @@ static int mthca_create_srq(struct ib_srq *ibsrq,

if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
mthca_free_srq(to_mdev(ibsrq->device), srq);
+ mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar,
+ context->db_tab, ucmd.db_index);
return -EFAULT;
}

@@ -436,6 +438,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq,

static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
+ mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
if (udata) {
struct mthca_ucontext *context =
rdma_udata_to_drv_context(
@@ -446,8 +449,6 @@ static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
context->db_tab, to_msrq(srq)->db_index);
}
-
- mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
return 0;
}

diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index a0d707e47d93..29da66af36d7 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -372,7 +372,7 @@ static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id)
{
struct synaptics_i2c *touch = dev_id;

- mod_delayed_work(system_wq, &touch->dwork, 0);
+ mod_delayed_work(system_dfl_wq, &touch->dwork, 0);

return IRQ_HANDLED;
}
@@ -448,7 +448,7 @@ static void synaptics_i2c_work_handler(struct work_struct *work)
* We poll the device once in THREAD_IRQ_SLEEP_SECS and
* if error is detected, we try to reset and reconfigure the touchpad.
*/
- mod_delayed_work(system_wq, &touch->dwork, delay);
+ mod_delayed_work(system_dfl_wq, &touch->dwork, delay);
}

static int synaptics_i2c_open(struct input_dev *input)
@@ -461,7 +461,7 @@ static int synaptics_i2c_open(struct input_dev *input)
return ret;

if (polling_req)
- mod_delayed_work(system_wq, &touch->dwork,
+ mod_delayed_work(system_dfl_wq, &touch->dwork,
msecs_to_jiffies(NO_DATA_SLEEP_MSECS));

return 0;
@@ -615,13 +615,16 @@ static int synaptics_i2c_resume(struct device *dev)
int ret;
struct i2c_client *client = to_i2c_client(dev);
struct synaptics_i2c *touch = i2c_get_clientdata(client);
+ struct input_dev *input = touch->input;

ret = synaptics_i2c_reset_config(client);
if (ret)
return ret;

- mod_delayed_work(system_wq, &touch->dwork,
- msecs_to_jiffies(NO_DATA_SLEEP_MSECS));
+ guard(mutex)(&input->mutex);
+ if (input_device_enabled(input))
+ mod_delayed_work(system_dfl_wq, &touch->dwork,
+ msecs_to_jiffies(NO_DATA_SLEEP_MSECS));

return 0;
}
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 90fdfa5f7d1d..3d1d43675bf2 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -867,6 +867,14 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info)
if (!info->ats_enabled)
return;

+ /*
+ * Skip dev-IOTLB flush for inaccessible PCIe devices to prevent the
+ * Intel IOMMU from waiting indefinitely for an ATS invalidation that
+ * cannot complete.
+ */
+ if (!pci_device_is_present(to_pci_dev(info->dev)))
+ return;
+
qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index c0cf4fed13e0..b58b3cd807d4 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -154,8 +154,13 @@ static void plic_irq_disable(struct irq_data *d)
static void plic_irq_eoi(struct irq_data *d)
{
struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+ u32 __iomem *reg;
+ bool enabled;
+
+ reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32);
+ enabled = readl(reg) & BIT(d->hwirq % 32);

- if (unlikely(irqd_irq_disabled(d))) {
+ if (unlikely(!enabled)) {
plic_toggle(handler, d->hwirq, 1);
writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
plic_toggle(handler, d->hwirq, 0);
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 92c2fb618c8e..b4d52b814055 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -6,18 +6,17 @@
* Author: Jassi Brar <jassisinghbrar@xxxxxxxxx>
*/

-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
+#include <linux/cleanup.h>
#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/module.h>
#include <linux/device.h>
-#include <linux/bitops.h>
+#include <linux/err.h>
#include <linux/mailbox_client.h>
#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/spinlock.h>

#include "mailbox.h"

@@ -325,7 +324,7 @@ static int __mbox_bind_client(struct mbox_chan *chan, struct mbox_client *cl)
int ret;

if (chan->cl || !try_module_get(chan->mbox->dev->driver->owner)) {
- dev_dbg(dev, "%s: mailbox not free\n", __func__);
+ dev_err(dev, "%s: mailbox not free\n", __func__);
return -EBUSY;
}

@@ -373,13 +372,9 @@ static int __mbox_bind_client(struct mbox_chan *chan, struct mbox_client *cl)
*/
int mbox_bind_client(struct mbox_chan *chan, struct mbox_client *cl)
{
- int ret;
+ guard(mutex)(&con_mutex);

- mutex_lock(&con_mutex);
- ret = __mbox_bind_client(chan, cl);
- mutex_unlock(&con_mutex);
-
- return ret;
+ return __mbox_bind_client(chan, cl);
}
EXPORT_SYMBOL_GPL(mbox_bind_client);

@@ -402,47 +397,65 @@ EXPORT_SYMBOL_GPL(mbox_bind_client);
*/
struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
{
- struct device *dev = cl->dev;
+ struct fwnode_reference_args fwspec;
+ struct fwnode_handle *fwnode;
struct mbox_controller *mbox;
struct of_phandle_args spec;
struct mbox_chan *chan;
+ struct device *dev;
+ unsigned int i;
int ret;

- if (!dev || !dev->of_node) {
- pr_debug("%s: No owner device node\n", __func__);
+ dev = cl->dev;
+ if (!dev) {
+ pr_debug("No owner device\n");
return ERR_PTR(-ENODEV);
}

- mutex_lock(&con_mutex);
+ fwnode = dev_fwnode(dev);
+ if (!fwnode) {
+ dev_dbg(dev, "No owner fwnode\n");
+ return ERR_PTR(-ENODEV);
+ }

- ret = of_parse_phandle_with_args(dev->of_node, "mboxes", "#mbox-cells",
- index, &spec);
+ ret = fwnode_property_get_reference_args(fwnode, "mboxes", "#mbox-cells",
+ 0, index, &fwspec);
if (ret) {
- dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
- mutex_unlock(&con_mutex);
+ dev_err(dev, "%s: can't parse \"%s\" property\n", __func__, "mboxes");
return ERR_PTR(ret);
}

- chan = ERR_PTR(-EPROBE_DEFER);
- list_for_each_entry(mbox, &mbox_cons, node)
- if (mbox->dev->of_node == spec.np) {
- chan = mbox->of_xlate(mbox, &spec);
- if (!IS_ERR(chan))
- break;
+ spec.np = to_of_node(fwspec.fwnode);
+ spec.args_count = fwspec.nargs;
+ for (i = 0; i < spec.args_count; i++)
+ spec.args[i] = fwspec.args[i];
+
+ scoped_guard(mutex, &con_mutex) {
+ chan = ERR_PTR(-EPROBE_DEFER);
+ list_for_each_entry(mbox, &mbox_cons, node) {
+ if (device_match_fwnode(mbox->dev, fwspec.fwnode)) {
+ if (mbox->fw_xlate) {
+ chan = mbox->fw_xlate(mbox, &fwspec);
+ if (!IS_ERR(chan))
+ break;
+ } else if (mbox->of_xlate) {
+ chan = mbox->of_xlate(mbox, &spec);
+ if (!IS_ERR(chan))
+ break;
+ }
+ }
}

- of_node_put(spec.np);
+ fwnode_handle_put(fwspec.fwnode);

- if (IS_ERR(chan)) {
- mutex_unlock(&con_mutex);
- return chan;
- }
+ if (IS_ERR(chan))
+ return chan;

- ret = __mbox_bind_client(chan, cl);
- if (ret)
- chan = ERR_PTR(ret);
+ ret = __mbox_bind_client(chan, cl);
+ if (ret)
+ chan = ERR_PTR(ret);
+ }

- mutex_unlock(&con_mutex);
return chan;
}
EXPORT_SYMBOL_GPL(mbox_request_channel);
@@ -450,15 +463,8 @@ EXPORT_SYMBOL_GPL(mbox_request_channel);
struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl,
const char *name)
{
- struct device_node *np = cl->dev->of_node;
- int index;
+ int index = device_property_match_string(cl->dev, "mbox-names", name);

- if (!np) {
- dev_err(cl->dev, "%s() currently only supports DT\n", __func__);
- return ERR_PTR(-EINVAL);
- }
-
- index = of_property_match_string(np, "mbox-names", name);
if (index < 0) {
dev_err(cl->dev, "%s() could not locate channel named \"%s\"\n",
__func__, name);
@@ -495,16 +501,13 @@ void mbox_free_channel(struct mbox_chan *chan)
}
EXPORT_SYMBOL_GPL(mbox_free_channel);

-static struct mbox_chan *
-of_mbox_index_xlate(struct mbox_controller *mbox,
- const struct of_phandle_args *sp)
+static struct mbox_chan *fw_mbox_index_xlate(struct mbox_controller *mbox,
+ const struct fwnode_reference_args *sp)
{
- int ind = sp->args[0];
-
- if (ind >= mbox->num_chans)
+ if (sp->nargs < 1 || sp->args[0] >= mbox->num_chans)
return ERR_PTR(-EINVAL);

- return &mbox->chans[ind];
+ return &mbox->chans[sp->args[0]];
}

/**
@@ -550,12 +553,11 @@ int mbox_controller_register(struct mbox_controller *mbox)
spin_lock_init(&chan->lock);
}

- if (!mbox->of_xlate)
- mbox->of_xlate = of_mbox_index_xlate;
+ if (!mbox->fw_xlate && !mbox->of_xlate)
+ mbox->fw_xlate = fw_mbox_index_xlate;

- mutex_lock(&con_mutex);
- list_add_tail(&mbox->node, &mbox_cons);
- mutex_unlock(&con_mutex);
+ scoped_guard(mutex, &con_mutex)
+ list_add_tail(&mbox->node, &mbox_cons);

return 0;
}
@@ -572,17 +574,15 @@ void mbox_controller_unregister(struct mbox_controller *mbox)
if (!mbox)
return;

- mutex_lock(&con_mutex);
-
- list_del(&mbox->node);
+ scoped_guard(mutex, &con_mutex) {
+ list_del(&mbox->node);

- for (i = 0; i < mbox->num_chans; i++)
- mbox_free_channel(&mbox->chans[i]);
+ for (i = 0; i < mbox->num_chans; i++)
+ mbox_free_channel(&mbox->chans[i]);

- if (mbox->txdone_poll)
- hrtimer_cancel(&mbox->poll_hrt);
-
- mutex_unlock(&con_mutex);
+ if (mbox->txdone_poll)
+ hrtimer_cancel(&mbox->poll_hrt);
+ }
}
EXPORT_SYMBOL_GPL(mbox_controller_unregister);

diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index 804fb339f735..a67cce02d157 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -168,7 +168,9 @@ static int dvb_dvr_open(struct inode *inode, struct file *file)
mutex_unlock(&dmxdev->mutex);
return -ENOMEM;
}
- dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE);
+ dmxdev->dvr_buffer.data = mem;
+ dmxdev->dvr_buffer.size = DVR_BUFFER_SIZE;
+ dvb_ringbuffer_reset(&dmxdev->dvr_buffer);
if (dmxdev->may_do_mmap)
dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr",
file->f_flags & O_NONBLOCK);
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index 5cb596f38de3..470307135254 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -748,7 +748,7 @@ config VIDEO_AK7375

config VIDEO_DW9714
tristate "DW9714 lens voice coil support"
- depends on I2C && VIDEO_DEV
+ depends on GPIOLIB && I2C && VIDEO_DEV
select MEDIA_CONTROLLER
select VIDEO_V4L2_SUBDEV_API
select V4L2_ASYNC
diff --git a/drivers/media/i2c/dw9714.c b/drivers/media/i2c/dw9714.c
index 2ddd7daa79e2..8fee13e9b3a0 100644
--- a/drivers/media/i2c/dw9714.c
+++ b/drivers/media/i2c/dw9714.c
@@ -2,6 +2,7 @@
// Copyright (c) 2015--2017 Intel Corporation.

#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
#include <linux/module.h>
#include <linux/pm_runtime.h>
@@ -38,6 +39,7 @@ struct dw9714_device {
struct v4l2_subdev sd;
u16 current_val;
struct regulator *vcc;
+ struct gpio_desc *powerdown_gpio;
};

static inline struct dw9714_device *to_dw9714_vcm(struct v4l2_ctrl *ctrl)
@@ -137,6 +139,28 @@ static int dw9714_init_controls(struct dw9714_device *dev_vcm)
return hdl->error;
}

+static int dw9714_power_up(struct dw9714_device *dw9714_dev)
+{
+ int ret;
+
+ ret = regulator_enable(dw9714_dev->vcc);
+ if (ret)
+ return ret;
+
+ gpiod_set_value_cansleep(dw9714_dev->powerdown_gpio, 0);
+
+ usleep_range(12000, 14000);
+
+ return 0;
+}
+
+static int dw9714_power_down(struct dw9714_device *dw9714_dev)
+{
+ gpiod_set_value_cansleep(dw9714_dev->powerdown_gpio, 1);
+
+ return regulator_disable(dw9714_dev->vcc);
+}
+
static int dw9714_probe(struct i2c_client *client)
{
struct dw9714_device *dw9714_dev;
@@ -151,13 +175,18 @@ static int dw9714_probe(struct i2c_client *client)
if (IS_ERR(dw9714_dev->vcc))
return PTR_ERR(dw9714_dev->vcc);

- rval = regulator_enable(dw9714_dev->vcc);
- if (rval < 0) {
- dev_err(&client->dev, "failed to enable vcc: %d\n", rval);
- return rval;
- }
+ dw9714_dev->powerdown_gpio = devm_gpiod_get_optional(&client->dev,
+ "powerdown",
+ GPIOD_OUT_HIGH);
+ if (IS_ERR(dw9714_dev->powerdown_gpio))
+ return dev_err_probe(&client->dev,
+ PTR_ERR(dw9714_dev->powerdown_gpio),
+ "could not get powerdown gpio\n");

- usleep_range(1000, 2000);
+ rval = dw9714_power_up(dw9714_dev);
+ if (rval)
+ return dev_err_probe(&client->dev, rval,
+ "failed to power up: %d\n", rval);

v4l2_i2c_subdev_init(&dw9714_dev->sd, client, &dw9714_ops);
dw9714_dev->sd.flags |= V4L2_SUBDEV_FL_HAS_DEVNODE |
@@ -185,7 +214,7 @@ static int dw9714_probe(struct i2c_client *client)
return 0;

err_cleanup:
- regulator_disable(dw9714_dev->vcc);
+ dw9714_power_down(dw9714_dev);
v4l2_ctrl_handler_free(&dw9714_dev->ctrls_vcm);
media_entity_cleanup(&dw9714_dev->sd.entity);

@@ -200,10 +229,10 @@ static void dw9714_remove(struct i2c_client *client)

pm_runtime_disable(&client->dev);
if (!pm_runtime_status_suspended(&client->dev)) {
- ret = regulator_disable(dw9714_dev->vcc);
+ ret = dw9714_power_down(dw9714_dev);
if (ret) {
dev_err(&client->dev,
- "Failed to disable vcc: %d\n", ret);
+ "Failed to power down: %d\n", ret);
}
}
pm_runtime_set_suspended(&client->dev);
@@ -234,9 +263,9 @@ static int __maybe_unused dw9714_vcm_suspend(struct device *dev)
usleep_range(DW9714_CTRL_DELAY_US, DW9714_CTRL_DELAY_US + 10);
}

- ret = regulator_disable(dw9714_dev->vcc);
+ ret = dw9714_power_down(dw9714_dev);
if (ret)
- dev_err(dev, "Failed to disable vcc: %d\n", ret);
+ dev_err(dev, "Failed to power down: %d\n", ret);

return ret;
}
@@ -257,12 +286,11 @@ static int __maybe_unused dw9714_vcm_resume(struct device *dev)
if (pm_runtime_suspended(&client->dev))
return 0;

- ret = regulator_enable(dw9714_dev->vcc);
+ ret = dw9714_power_up(dw9714_dev);
if (ret) {
- dev_err(dev, "Failed to enable vcc: %d\n", ret);
+ dev_err(dev, "Failed to power up: %d\n", ret);
return ret;
}
- usleep_range(1000, 2000);

for (val = dw9714_dev->current_val % DW9714_CTRL_STEPS;
val < dw9714_dev->current_val + DW9714_CTRL_STEPS - 1;
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index 2bc034dff691..2d7f7cc5bfa9 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -564,6 +564,7 @@ static int mtk_smi_larb_probe(struct platform_device *pdev)
err_pm_disable:
pm_runtime_disable(dev);
device_link_remove(dev, larb->smi_common_dev);
+ put_device(larb->smi_common_dev);
return ret;
}

@@ -574,6 +575,7 @@ static void mtk_smi_larb_remove(struct platform_device *pdev)
device_link_remove(&pdev->dev, larb->smi_common_dev);
pm_runtime_disable(&pdev->dev);
component_del(&pdev->dev, &mtk_smi_larb_component_ops);
+ put_device(larb->smi_common_dev);
}

static int __maybe_unused mtk_smi_larb_resume(struct device *dev)
@@ -799,6 +801,7 @@ static void mtk_smi_common_remove(struct platform_device *pdev)
if (common->plat->type == MTK_SMI_GEN2_SUB_COMM)
device_link_remove(&pdev->dev, common->smi_common_dev);
pm_runtime_disable(&pdev->dev);
+ put_device(common->smi_common_dev);
}

static int __maybe_unused mtk_smi_common_resume(struct device *dev)
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 0472bcdff130..b5729d6c0b47 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -115,6 +115,8 @@ static const struct attribute_group com20020_state_group = {
.attrs = com20020_state_attrs,
};

+static struct com20020_pci_card_info card_info_2p5mbit;
+
static void com20020pci_remove(struct pci_dev *pdev);

static int com20020pci_probe(struct pci_dev *pdev,
@@ -140,7 +142,7 @@ static int com20020pci_probe(struct pci_dev *pdev,

ci = (struct com20020_pci_card_info *)id->driver_data;
if (!ci)
- return -EINVAL;
+ ci = &card_info_2p5mbit;

priv->ci = ci;
mm = &ci->misc_map;
@@ -347,6 +349,18 @@ static struct com20020_pci_card_info card_info_5mbit = {
.flags = ARC_IS_5MBIT,
};

+static struct com20020_pci_card_info card_info_2p5mbit = {
+ .name = "ARC-PCI",
+ .devcount = 1,
+ .chan_map_tbl = {
+ {
+ .bar = 2,
+ .offset = 0x00,
+ .size = 0x08,
+ },
+ },
+};
+
static struct com20020_pci_card_info card_info_sohard = {
.name = "SOHARD SH ARC-PCI",
.devcount = 1,
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index dd1f8cad953b..2ac455a9d1bb 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -322,7 +322,7 @@ static bool bond_sk_check(struct bonding *bond)
}
}

-bool bond_xdp_check(struct bonding *bond, int mode)
+bool __bond_xdp_check(int mode, int xmit_policy)
{
switch (mode) {
case BOND_MODE_ROUNDROBIN:
@@ -333,7 +333,7 @@ bool bond_xdp_check(struct bonding *bond, int mode)
/* vlan+srcmac is not supported with XDP as in most cases the 802.1q
* payload is not in the packet due to hardware offload.
*/
- if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC)
+ if (xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC)
return true;
fallthrough;
default:
@@ -341,6 +341,11 @@ bool bond_xdp_check(struct bonding *bond, int mode)
}
}

+bool bond_xdp_check(struct bonding *bond, int mode)
+{
+ return __bond_xdp_check(mode, bond->params.xmit_policy);
+}
+
/*---------------------------------- VLAN -----------------------------------*/

/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index a37b47b8ea8e..33af81a55a45 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1546,6 +1546,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond,
static int bond_option_xmit_hash_policy_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
+ if (bond->xdp_prog && !__bond_xdp_check(BOND_MODE(bond), newval->value))
+ return -EOPNOTSUPP;
netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
newval->string, newval->value);
bond->params.xmit_policy = newval->value;
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index ec5c64006a16..74906aa98be3 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1201,6 +1201,7 @@ static int mcp251x_open(struct net_device *net)
{
struct mcp251x_priv *priv = netdev_priv(net);
struct spi_device *spi = priv->spi;
+ bool release_irq = false;
unsigned long flags = 0;
int ret;

@@ -1244,12 +1245,24 @@ static int mcp251x_open(struct net_device *net)
return 0;

out_free_irq:
- free_irq(spi->irq, priv);
+ /* The IRQ handler might be running, and if so it will be waiting
+ * for the lock. But free_irq() must wait for the handler to finish
+ * so calling it here would deadlock.
+ *
+ * Setting priv->force_quit will let the handler exit right away
+ * without any access to the hardware. This make it safe to call
+ * free_irq() after the lock is released.
+ */
+ priv->force_quit = 1;
+ release_irq = true;
+
mcp251x_hw_sleep(spi);
out_close:
mcp251x_power_enable(priv->transceiver, 0);
close_candev(net);
mutex_unlock(&priv->mcp_lock);
+ if (release_irq)
+ free_irq(spi->irq, priv);
return ret;
}

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index fac8ac79df59..d8c881130e90 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -445,6 +445,11 @@ static void ems_usb_read_bulk_callback(struct urb *urb)
start = CPC_HEADER_SIZE;

while (msg_count) {
+ if (start + CPC_MSG_HEADER_LEN > urb->actual_length) {
+ netdev_err(netdev, "format error\n");
+ break;
+ }
+
msg = (struct ems_cpc_msg *)&ibuf[start];

switch (msg->type) {
@@ -474,7 +479,7 @@ static void ems_usb_read_bulk_callback(struct urb *urb)
start += CPC_MSG_HEADER_LEN + msg->length;
msg_count--;

- if (start > urb->transfer_buffer_length) {
+ if (start > urb->actual_length) {
netdev_err(netdev, "format error\n");
break;
}
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index d483cb7cfbcd..154588cb7b32 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -1461,12 +1461,18 @@ static void es58x_read_bulk_callback(struct urb *urb)
}

resubmit_urb:
+ usb_anchor_urb(urb, &es58x_dev->rx_urbs);
ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (!ret)
+ return;
+
+ usb_unanchor_urb(urb);
+
if (ret == -ENODEV) {
for (i = 0; i < es58x_dev->num_can_ch; i++)
if (es58x_dev->netdev[i])
netif_device_detach(es58x_dev->netdev[i]);
- } else if (ret)
+ } else
dev_err_ratelimited(dev,
"Failed resubmitting read bulk urb: %pe\n",
ERR_PTR(ret));
diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c
index e0cfa1460b0b..6b8b2795c018 100644
--- a/drivers/net/can/usb/f81604.c
+++ b/drivers/net/can/usb/f81604.c
@@ -413,6 +413,7 @@ static void f81604_read_bulk_callback(struct urb *urb)
{
struct f81604_can_frame *frame = urb->transfer_buffer;
struct net_device *netdev = urb->context;
+ struct f81604_port_priv *priv = netdev_priv(netdev);
int ret;

if (!netif_device_present(netdev))
@@ -445,10 +446,15 @@ static void f81604_read_bulk_callback(struct urb *urb)
f81604_process_rx_packet(netdev, frame);

resubmit_urb:
+ usb_anchor_urb(urb, &priv->urbs_anchor);
ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (!ret)
+ return;
+ usb_unanchor_urb(urb);
+
if (ret == -ENODEV)
netif_device_detach(netdev);
- else if (ret)
+ else
netdev_err(netdev,
"%s: failed to resubmit read bulk urb: %pe\n",
__func__, ERR_PTR(ret));
@@ -620,6 +626,12 @@ static void f81604_read_int_callback(struct urb *urb)
netdev_info(netdev, "%s: Int URB aborted: %pe\n", __func__,
ERR_PTR(urb->status));

+ if (urb->actual_length < sizeof(*data)) {
+ netdev_warn(netdev, "%s: short int URB: %u < %zu\n",
+ __func__, urb->actual_length, sizeof(*data));
+ goto resubmit_urb;
+ }
+
switch (urb->status) {
case 0: /* success */
break;
@@ -646,10 +658,15 @@ static void f81604_read_int_callback(struct urb *urb)
f81604_handle_tx(priv, data);

resubmit_urb:
+ usb_anchor_urb(urb, &priv->urbs_anchor);
ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (!ret)
+ return;
+ usb_unanchor_urb(urb);
+
if (ret == -ENODEV)
netif_device_detach(netdev);
- else if (ret)
+ else
netdev_err(netdev, "%s: failed to resubmit int urb: %pe\n",
__func__, ERR_PTR(ret));
}
@@ -874,9 +891,27 @@ static void f81604_write_bulk_callback(struct urb *urb)
if (!netif_device_present(netdev))
return;

- if (urb->status)
- netdev_info(netdev, "%s: Tx URB error: %pe\n", __func__,
- ERR_PTR(urb->status));
+ if (!urb->status)
+ return;
+
+ switch (urb->status) {
+ case -ENOENT:
+ case -ECONNRESET:
+ case -ESHUTDOWN:
+ return;
+ default:
+ break;
+ }
+
+ if (net_ratelimit())
+ netdev_err(netdev, "%s: Tx URB error: %pe\n", __func__,
+ ERR_PTR(urb->status));
+
+ can_free_echo_skb(netdev, 0, NULL);
+ netdev->stats.tx_dropped++;
+ netdev->stats.tx_errors++;
+
+ netif_wake_queue(netdev);
}

static void f81604_clear_reg_work(struct work_struct *work)
diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
index 07406daf7c88..6c90b4a7d955 100644
--- a/drivers/net/can/usb/ucan.c
+++ b/drivers/net/can/usb/ucan.c
@@ -749,7 +749,7 @@ static void ucan_read_bulk_callback(struct urb *urb)
len = le16_to_cpu(m->len);

/* check sanity (length of content) */
- if (urb->actual_length - pos < len) {
+ if ((len == 0) || (urb->actual_length - pos < len)) {
netdev_warn(up->netdev,
"invalid message (short; no data; l:%d)\n",
urb->actual_length);
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index ad7044b295ec..74a8336174e5 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -769,7 +769,7 @@ static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy,
out:
rtl83xx_unlock(priv);

- return 0;
+ return ret;
}

static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index aa25a8a0a106..d99d2295eab0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -514,7 +514,7 @@
#define MAC_SSIR_SSINC_INDEX 16
#define MAC_SSIR_SSINC_WIDTH 8
#define MAC_TCR_SS_INDEX 29
-#define MAC_TCR_SS_WIDTH 2
+#define MAC_TCR_SS_WIDTH 3
#define MAC_TCR_TE_INDEX 0
#define MAC_TCR_TE_WIDTH 1
#define MAC_TCR_VNE_INDEX 24
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index e6a249236022..c6fcddbff3f5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1181,7 +1181,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned long flags;

DBGPR("-->xgbe_powerdown\n");

@@ -1192,8 +1191,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
return -EINVAL;
}

- spin_lock_irqsave(&pdata->lock, flags);
-
if (caller == XGMAC_DRIVER_CONTEXT)
netif_device_detach(netdev);

@@ -1209,8 +1206,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller)

pdata->power_down = 1;

- spin_unlock_irqrestore(&pdata->lock, flags);
-
DBGPR("<--xgbe_powerdown\n");

return 0;
@@ -1220,7 +1215,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned long flags;

DBGPR("-->xgbe_powerup\n");

@@ -1231,8 +1225,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)
return -EINVAL;
}

- spin_lock_irqsave(&pdata->lock, flags);
-
pdata->power_down = 0;

xgbe_napi_enable(pdata, 0);
@@ -1247,8 +1239,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)

xgbe_start_timers(pdata);

- spin_unlock_irqrestore(&pdata->lock, flags);
-
DBGPR("<--xgbe_powerup\n");

return 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 0e8698928e4d..6e8fafb2acba 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -185,7 +185,6 @@ struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev)
pdata->netdev = netdev;
pdata->dev = dev;

- spin_lock_init(&pdata->lock);
spin_lock_init(&pdata->xpcs_lock);
mutex_init(&pdata->rss_mutex);
spin_lock_init(&pdata->tstamp_lock);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 7526a0906b39..c98461252053 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1083,9 +1083,6 @@ struct xgbe_prv_data {
unsigned int pp3;
unsigned int pp4;

- /* Overall device lock */
- spinlock_t lock;
-
/* XPCS indirect addressing lock */
spinlock_t xpcs_lock;
unsigned int xpcs_window_def_reg;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index e78f40078477..a7c8ec0bdfe5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1532,7 +1532,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
if_id = (status & 0xFFFF0000) >> 16;
if (if_id >= ethsw->sw_attr.num_ifs) {
dev_err(dev, "Invalid if_id %d in IRQ status\n", if_id);
- goto out;
+ goto out_clear;
}
port_priv = ethsw->ports[if_id];

@@ -1552,6 +1552,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
dpaa2_switch_port_connect_mac(port_priv);
}

+out_clear:
err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
DPSW_IRQ_INDEX_IF, status);
if (err)
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index ba331899d186..d4a1041e456d 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -33,6 +33,7 @@

/* Extended Device Control */
#define E1000_CTRL_EXT_LPCD 0x00000004 /* LCD Power Cycle Done */
+#define E1000_CTRL_EXT_DPG_EN 0x00000008 /* Dynamic Power Gating Enable */
#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */
#define E1000_CTRL_EXT_FORCE_SMBUS 0x00000800 /* Force SMBus mode */
#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index df4e7d781cb1..f9328caefe44 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -4925,6 +4925,15 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
reg |= E1000_KABGTXD_BGSQLBIAS;
ew32(KABGTXD, reg);

+ /* The hardware reset value of the DPG_EN bit is 1.
+ * Clear DPG_EN to prevent unexpected autonomous power gating.
+ */
+ if (hw->mac.type >= e1000_pch_ptp) {
+ reg = er32(CTRL_EXT);
+ reg &= ~E1000_CTRL_EXT_DPG_EN;
+ ew32(CTRL_EXT, reg);
+ }
+
return 0;
}

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 31c83fc69cf4..e7a06db26c91 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3619,6 +3619,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
u16 pf_q = vsi->base_queue + ring->queue_index;
struct i40e_hw *hw = &vsi->back->hw;
struct i40e_hmc_obj_rxq rx_ctx;
+ u32 xdp_frame_sz;
int err = 0;
bool ok;

@@ -3628,49 +3629,47 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
memset(&rx_ctx, 0, sizeof(rx_ctx));

ring->rx_buf_len = vsi->rx_buf_len;
+ xdp_frame_sz = i40e_rx_pg_size(ring) / 2;

/* XDP RX-queue info only needed for RX rings exposed to XDP */
if (ring->vsi->type != I40E_VSI_MAIN)
goto skip;

- if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
- err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
- ring->queue_index,
- ring->q_vector->napi.napi_id,
- ring->rx_buf_len);
- if (err)
- return err;
- }
-
ring->xsk_pool = i40e_xsk_pool(ring);
if (ring->xsk_pool) {
- xdp_rxq_info_unreg(&ring->xdp_rxq);
+ xdp_frame_sz = xsk_pool_get_rx_frag_step(ring->xsk_pool);
ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->queue_index,
ring->q_vector->napi.napi_id,
- ring->rx_buf_len);
+ xdp_frame_sz);
if (err)
return err;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL,
NULL);
if (err)
- return err;
+ goto unreg_xdp;
dev_info(&vsi->back->pdev->dev,
"Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->queue_index);

} else {
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->queue_index,
+ ring->q_vector->napi.napi_id,
+ xdp_frame_sz);
+ if (err)
+ return err;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
if (err)
- return err;
+ goto unreg_xdp;
}

skip:
- xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+ xdp_init_buff(&ring->xdp, xdp_frame_sz, &ring->xdp_rxq);

rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
@@ -3704,7 +3703,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
dev_info(&vsi->back->pdev->dev,
"Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
ring->queue_index, pf_q, err);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto unreg_xdp;
}

/* set the context in the HMC */
@@ -3713,7 +3713,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
dev_info(&vsi->back->pdev->dev,
"Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
ring->queue_index, pf_q, err);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto unreg_xdp;
}

/* configure Rx buffer alignment */
@@ -3721,7 +3722,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
if (I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_info(&vsi->back->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto unreg_xdp;
}
clear_ring_build_skb_enabled(ring);
} else {
@@ -3751,6 +3753,11 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}

return 0;
+unreg_xdp:
+ if (ring->vsi->type == I40E_VSI_MAIN)
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
+
+ return err;
}

/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index 759f3d1c4c8f..dde0ccd789ed 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -88,7 +88,7 @@ TRACE_EVENT(i40e_napi_poll,
__entry->rx_clean_complete = rx_clean_complete;
__entry->tx_clean_complete = tx_clean_complete;
__entry->irq_num = q->irq_num;
- __entry->curr_cpu = get_cpu();
+ __entry->curr_cpu = smp_processor_id();
__assign_str(qname);
__assign_str(dev_name);
__assign_bitmask(irq_affinity, cpumask_bits(&q->affinity_mask),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ca7517a68a2c..bca8398a6ab4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1469,6 +1469,9 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;

+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+
if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
@@ -1526,8 +1529,6 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
void i40e_free_rx_resources(struct i40e_ring *rx_ring)
{
i40e_clean_rx_ring(rx_ring);
- if (rx_ring->vsi->type == I40E_VSI_MAIN)
- xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
rx_ring->xdp_prog = NULL;
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 422af897d933..dcd4f172ddc8 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2630,7 +2630,22 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
netdev->watchdog_timeo = 5 * HZ;

netdev->min_mtu = ETH_MIN_MTU;
- netdev->max_mtu = LIBIE_MAX_MTU;
+
+ /* PF/VF API: vf_res->max_mtu is max frame size (not MTU).
+ * Convert to MTU.
+ */
+ if (!adapter->vf_res->max_mtu) {
+ netdev->max_mtu = LIBIE_MAX_MTU;
+ } else if (adapter->vf_res->max_mtu < LIBETH_RX_LL_LEN + ETH_MIN_MTU ||
+ adapter->vf_res->max_mtu >
+ LIBETH_RX_LL_LEN + LIBIE_MAX_MTU) {
+ netdev_warn_once(adapter->netdev,
+ "invalid max frame size %d from PF, using default MTU %d",
+ adapter->vf_res->max_mtu, LIBIE_MAX_MTU);
+ netdev->max_mtu = LIBIE_MAX_MTU;
+ } else {
+ netdev->max_mtu = adapter->vf_res->max_mtu - LIBETH_RX_LL_LEN;
+ }

if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 3ddf7b1e85ef..6d33783ac8db 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3477,7 +3477,7 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport)
continue;

name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name,
- vec_name, vidx);
+ vec_name, vector);

err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
name, q_vector);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 449c55c09b4a..fd515964869a 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -555,28 +555,43 @@ static void octep_clean_irqs(struct octep_device *oct)
}

/**
- * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ * octep_update_pkt() - Update IQ/OQ IN/OUT_CNT registers.
*
* @iq: Octeon Tx queue data structure.
* @oq: Octeon Rx queue data structure.
*/
-static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq)
+static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq)
{
- u32 pkts_pend = oq->pkts_pending;
+ u32 pkts_pend = READ_ONCE(oq->pkts_pending);
+ u32 last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ u32 pkts_processed = READ_ONCE(iq->pkts_processed);
+ u32 pkt_in_done = READ_ONCE(iq->pkt_in_done);

netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no);
- if (iq->pkts_processed) {
- writel(iq->pkts_processed, iq->inst_cnt_reg);
- iq->pkt_in_done -= iq->pkts_processed;
- iq->pkts_processed = 0;
+ if (pkts_processed) {
+ writel(pkts_processed, iq->inst_cnt_reg);
+ readl(iq->inst_cnt_reg);
+ WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed));
+ WRITE_ONCE(iq->pkts_processed, 0);
}
- if (oq->last_pkt_count - pkts_pend) {
- writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg);
- oq->last_pkt_count = pkts_pend;
+ if (last_pkt_count - pkts_pend) {
+ writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg);
+ readl(oq->pkts_sent_reg);
+ WRITE_ONCE(oq->last_pkt_count, pkts_pend);
}

/* Flush the previous wrties before writing to RESEND bit */
- wmb();
+ smp_wmb();
+}
+
+/**
+ * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @oq: Octeon Rx queue data structure.
+ */
+static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq)
+{
writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg);
writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg);
}
@@ -602,7 +617,8 @@ static int octep_napi_poll(struct napi_struct *napi, int budget)
if (tx_pending || rx_done >= budget)
return budget;

- napi_complete(napi);
+ octep_update_pkt(ioq_vector->iq, ioq_vector->oq);
+ napi_complete_done(napi, rx_done);
octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq);
return rx_done;
}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
index f2a7c6a76c74..74de19166488 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
@@ -324,10 +324,16 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct,
struct octep_oq *oq)
{
u32 pkt_count, new_pkts;
+ u32 last_pkt_count, pkts_pending;

pkt_count = readl(oq->pkts_sent_reg);
- new_pkts = pkt_count - oq->last_pkt_count;
+ last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ new_pkts = pkt_count - last_pkt_count;

+ if (pkt_count < last_pkt_count) {
+ dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n",
+ oq->q_no, pkt_count, last_pkt_count);
+ }
/* Clear the hardware packets counter register if the rx queue is
* being processed continuously with-in a single interrupt and
* reached half its max value.
@@ -338,8 +344,9 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct,
pkt_count = readl(oq->pkts_sent_reg);
new_pkts += pkt_count;
}
- oq->last_pkt_count = pkt_count;
- oq->pkts_pending += new_pkts;
+ WRITE_ONCE(oq->last_pkt_count, pkt_count);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts));
return new_pkts;
}

@@ -414,7 +421,7 @@ static int __octep_oq_process_rx(struct octep_device *oct,
u16 rx_ol_flags;
u32 read_idx;

- read_idx = oq->host_read_idx;
+ read_idx = READ_ONCE(oq->host_read_idx);
rx_bytes = 0;
desc_used = 0;
for (pkt = 0; pkt < pkts_to_process; pkt++) {
@@ -499,7 +506,7 @@ static int __octep_oq_process_rx(struct octep_device *oct,
napi_gro_receive(oq->napi, skb);
}

- oq->host_read_idx = read_idx;
+ WRITE_ONCE(oq->host_read_idx, read_idx);
oq->refill_count += desc_used;
oq->stats->packets += pkt;
oq->stats->bytes += rx_bytes;
@@ -522,22 +529,26 @@ int octep_oq_process_rx(struct octep_oq *oq, int budget)
{
u32 pkts_available, pkts_processed, total_pkts_processed;
struct octep_device *oct = oq->octep_dev;
+ u32 pkts_pending;

pkts_available = 0;
pkts_processed = 0;
total_pkts_processed = 0;
while (total_pkts_processed < budget) {
/* update pending count only when current one exhausted */
- if (oq->pkts_pending == 0)
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ if (pkts_pending == 0)
octep_oq_check_hw_for_pkts(oct, oq);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
pkts_available = min(budget - total_pkts_processed,
- oq->pkts_pending);
+ pkts_pending);
if (!pkts_available)
break;

pkts_processed = __octep_oq_process_rx(oct, oq,
pkts_available);
- oq->pkts_pending -= pkts_processed;
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed));
total_pkts_processed += pkts_processed;
}

diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
index b9430c4a33a3..72c1e9415efa 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
@@ -288,28 +288,45 @@ static void octep_vf_clean_irqs(struct octep_vf_device *oct)
}

/**
- * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ * octep_vf_update_pkt() - Update IQ/OQ IN/OUT_CNT registers.
*
* @iq: Octeon Tx queue data structure.
* @oq: Octeon Rx queue data structure.
*/
-static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq)
+
+static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq)
{
- u32 pkts_pend = oq->pkts_pending;
+ u32 pkts_pend = READ_ONCE(oq->pkts_pending);
+ u32 last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ u32 pkts_processed = READ_ONCE(iq->pkts_processed);
+ u32 pkt_in_done = READ_ONCE(iq->pkt_in_done);

netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no);
- if (iq->pkts_processed) {
- writel(iq->pkts_processed, iq->inst_cnt_reg);
- iq->pkt_in_done -= iq->pkts_processed;
- iq->pkts_processed = 0;
+ if (pkts_processed) {
+ writel(pkts_processed, iq->inst_cnt_reg);
+ readl(iq->inst_cnt_reg);
+ WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed));
+ WRITE_ONCE(iq->pkts_processed, 0);
}
- if (oq->last_pkt_count - pkts_pend) {
- writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg);
- oq->last_pkt_count = pkts_pend;
+ if (last_pkt_count - pkts_pend) {
+ writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg);
+ readl(oq->pkts_sent_reg);
+ WRITE_ONCE(oq->last_pkt_count, pkts_pend);
}

/* Flush the previous wrties before writing to RESEND bit */
smp_wmb();
+}
+
+/**
+ * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @oq: Octeon Rx queue data structure.
+ */
+static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq,
+ struct octep_vf_oq *oq)
+{
writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg);
writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg);
}
@@ -335,6 +352,7 @@ static int octep_vf_napi_poll(struct napi_struct *napi, int budget)
if (tx_pending || rx_done >= budget)
return budget;

+ octep_vf_update_pkt(ioq_vector->iq, ioq_vector->oq);
if (likely(napi_complete_done(napi, rx_done)))
octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq);

diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
index 6f865dbbba6c..b579d5b545c4 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
@@ -325,9 +325,16 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct,
struct octep_vf_oq *oq)
{
u32 pkt_count, new_pkts;
+ u32 last_pkt_count, pkts_pending;

pkt_count = readl(oq->pkts_sent_reg);
- new_pkts = pkt_count - oq->last_pkt_count;
+ last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ new_pkts = pkt_count - last_pkt_count;
+
+ if (pkt_count < last_pkt_count) {
+ dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n",
+ oq->q_no, pkt_count, last_pkt_count);
+ }

/* Clear the hardware packets counter register if the rx queue is
* being processed continuously with-in a single interrupt and
@@ -339,8 +346,9 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct,
pkt_count = readl(oq->pkts_sent_reg);
new_pkts += pkt_count;
}
- oq->last_pkt_count = pkt_count;
- oq->pkts_pending += new_pkts;
+ WRITE_ONCE(oq->last_pkt_count, pkt_count);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts));
return new_pkts;
}

@@ -369,7 +377,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct,
struct sk_buff *skb;
u32 read_idx;

- read_idx = oq->host_read_idx;
+ read_idx = READ_ONCE(oq->host_read_idx);
rx_bytes = 0;
desc_used = 0;
for (pkt = 0; pkt < pkts_to_process; pkt++) {
@@ -463,7 +471,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct,
napi_gro_receive(oq->napi, skb);
}

- oq->host_read_idx = read_idx;
+ WRITE_ONCE(oq->host_read_idx, read_idx);
oq->refill_count += desc_used;
oq->stats->packets += pkt;
oq->stats->bytes += rx_bytes;
@@ -486,22 +494,26 @@ int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget)
{
u32 pkts_available, pkts_processed, total_pkts_processed;
struct octep_vf_device *oct = oq->octep_vf_dev;
+ u32 pkts_pending;

pkts_available = 0;
pkts_processed = 0;
total_pkts_processed = 0;
while (total_pkts_processed < budget) {
/* update pending count only when current one exhausted */
- if (oq->pkts_pending == 0)
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ if (pkts_pending == 0)
octep_vf_oq_check_hw_for_pkts(oct, oq);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
pkts_available = min(budget - total_pkts_processed,
- oq->pkts_pending);
+ pkts_pending);
if (!pkts_available)
break;

pkts_processed = __octep_vf_oq_process_rx(oct, oq,
pkts_available);
- oq->pkts_pending -= pkts_processed;
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed));
total_pkts_processed += pkts_processed;
}

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 64d86068b51e..45d4bac984a5 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3566,12 +3566,21 @@ static int mtk_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
mtk_stop(dev);

old_prog = rcu_replace_pointer(eth->prog, prog, lockdep_rtnl_is_held());
+
+ if (netif_running(dev) && need_update) {
+ int err;
+
+ err = mtk_open(dev);
+ if (err) {
+ rcu_assign_pointer(eth->prog, old_prog);
+
+ return err;
+ }
+ }
+
if (old_prog)
bpf_prog_put(old_prog);

- if (netif_running(dev) && need_update)
- return mtk_open(dev);
-
return 0;
}

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index 702ea5a00b56..aced7589d20d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -86,7 +86,7 @@ static void loongson_default_data(struct pci_dev *pdev,
/* Get bus_id, this can be overwritten later */
plat->bus_id = pci_dev_id(pdev);

- plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
+ plat->clk_csr = 1; /* clk_csr_i = 100-150MHz & MDC = clk_csr_i/62 */
plat->has_gmac = 1;
plat->force_sf_dma_mode = 1;

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 112287a6e9ab..396216633149 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6679,9 +6679,13 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid

if (priv->hw->num_vlan) {
ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
- if (ret)
+ if (ret) {
+ clear_bit(vid, priv->active_vlans);
+ stmmac_vlan_update(priv, is_double);
goto err_pm_put;
+ }
}
+
err_pm_put:
pm_runtime_put(priv->device);

@@ -6702,15 +6706,21 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
is_double = true;

clear_bit(vid, priv->active_vlans);
+ ret = stmmac_vlan_update(priv, is_double);
+ if (ret) {
+ set_bit(vid, priv->active_vlans);
+ goto del_vlan_error;
+ }

if (priv->hw->num_vlan) {
ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
- if (ret)
+ if (ret) {
+ set_bit(vid, priv->active_vlans);
+ stmmac_vlan_update(priv, is_double);
goto del_vlan_error;
+ }
}

- ret = stmmac_vlan_update(priv, is_double);
-
del_vlan_error:
pm_runtime_put(priv->device);

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 6b5cff087686..68049bb2bd98 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -290,7 +290,7 @@ static void am65_cpsw_nuss_ndo_slave_set_rx_mode(struct net_device *ndev)
cpsw_ale_set_allmulti(common->ale,
ndev->flags & IFF_ALLMULTI, port->port_id);

- port_mask = ALE_PORT_HOST;
+ port_mask = BIT(port->port_id) | ALE_PORT_HOST;
/* Clear all mcast from ALE */
cpsw_ale_flush_multicast(common->ale, port_mask, -1);

diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index dc5e247ca5d1..a6bb09545c60 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -443,14 +443,13 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry,
ale->port_mask_bits);
if ((mask & port_mask) == 0)
return; /* ports dont intersect, not interested */
- mask &= ~port_mask;
+ mask &= (~port_mask | ALE_PORT_HOST);

- /* free if only remaining port is host port */
- if (mask)
+ if (mask == 0x0 || mask == ALE_PORT_HOST)
+ cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
+ else
cpsw_ale_set_port_mask(ale_entry, mask,
ale->port_mask_bits);
- else
- cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
}

int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 055c5765bd86..5e1133c322a7 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -307,6 +307,14 @@ static int prueth_emac_common_start(struct prueth *prueth)
if (ret)
goto disable_class;

+ /* Reset link state to force reconfiguration in
+ * emac_adjust_link(). Without this, if the link was already up
+ * before restart, emac_adjust_link() won't detect any state
+ * change and will skip critical configuration like writing
+ * speed to firmware.
+ */
+ emac->link = 0;
+
mutex_lock(&emac->ndev->phydev->lock);
emac_adjust_link(emac->ndev);
mutex_unlock(&emac->ndev->phydev->lock);
diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
index 613fc6910f14..ee9c48f7f68f 100644
--- a/drivers/net/usb/kalmia.c
+++ b/drivers/net/usb/kalmia.c
@@ -132,11 +132,18 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf)
{
int status;
u8 ethernet_addr[ETH_ALEN];
+ static const u8 ep_addr[] = {
+ 1 | USB_DIR_IN,
+ 2 | USB_DIR_OUT,
+ 0};

/* Don't bind to AT command interface */
if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC)
return -EINVAL;

+ if (!usb_check_bulk_endpoints(intf, ep_addr))
+ return -ENODEV;
+
dev->in = usb_rcvbulkpipe(dev->udev, 0x81 & USB_ENDPOINT_NUMBER_MASK);
dev->out = usb_sndbulkpipe(dev->udev, 0x02 & USB_ENDPOINT_NUMBER_MASK);
dev->status = NULL;
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index e01d14f6c366..cb2472b59e10 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -883,6 +883,13 @@ static int kaweth_probe(
const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
int result = 0;
int rv = -EIO;
+ static const u8 bulk_ep_addr[] = {
+ 1 | USB_DIR_IN,
+ 2 | USB_DIR_OUT,
+ 0};
+ static const u8 int_ep_addr[] = {
+ 3 | USB_DIR_IN,
+ 0};

dev_dbg(dev,
"Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n",
@@ -896,6 +903,12 @@ static int kaweth_probe(
(int)udev->descriptor.bLength,
(int)udev->descriptor.bDescriptorType);

+ if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) ||
+ !usb_check_int_endpoints(intf, int_ep_addr)) {
+ dev_err(dev, "couldn't find required endpoints\n");
+ return -ENODEV;
+ }
+
netdev = alloc_etherdev(sizeof(*kaweth));
if (!netdev)
return -ENOMEM;
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 0f16a133c75d..475b066081c7 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -815,8 +815,19 @@ static void unlink_all_urbs(pegasus_t *pegasus)

static int alloc_urbs(pegasus_t *pegasus)
{
+ static const u8 bulk_ep_addr[] = {
+ 1 | USB_DIR_IN,
+ 2 | USB_DIR_OUT,
+ 0};
+ static const u8 int_ep_addr[] = {
+ 3 | USB_DIR_IN,
+ 0};
int res = -ENOMEM;

+ if (!usb_check_bulk_endpoints(pegasus->intf, bulk_ep_addr) ||
+ !usb_check_int_endpoints(pegasus->intf, int_ep_addr))
+ return -ENODEV;
+
pegasus->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
if (!pegasus->rx_urb) {
return res;
@@ -1171,6 +1182,7 @@ static int pegasus_probe(struct usb_interface *intf,

pegasus = netdev_priv(net);
pegasus->dev_index = dev_index;
+ pegasus->intf = intf;

res = alloc_urbs(pegasus);
if (res < 0) {
@@ -1182,7 +1194,6 @@ static int pegasus_probe(struct usb_interface *intf,

INIT_DELAYED_WORK(&pegasus->carrier_check, check_carrier);

- pegasus->intf = intf;
pegasus->usb = dev;
pegasus->net = net;

diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index c78451ed06ec..2dbd7772363b 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2153,6 +2153,11 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
{
struct ipv6hdr *pip6;

+ /* check if nd_tbl is not initiliazed due to
+ * ipv6.disable=1 set during boot
+ */
+ if (!ipv6_stub->nd_tbl)
+ return false;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
return false;
pip6 = ipv6_hdr(skb);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index a3db65254e37..268f414f0a02 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -396,6 +396,7 @@ mt76_connac2_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi,
u32 val;

if (ieee80211_is_action(fc) &&
+ skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + 1 + 2 &&
mgmt->u.action.category == WLAN_CATEGORY_BACK &&
mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) {
u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
index f1bd0c174acf..2ab439f28e16 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
@@ -671,6 +671,7 @@ mt7925_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi,
u32 val;

if (ieee80211_is_action(fc) &&
+ skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 &&
mgmt->u.action.category == WLAN_CATEGORY_BACK &&
mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ)
tid = MT_TX_ADDBA;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
index 0990a3d481f2..b7a5426c933d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -759,6 +759,7 @@ mt7996_mac_write_txwi_80211(struct mt7996_dev *dev, __le32 *txwi,
u32 val;

if (ieee80211_is_action(fc) &&
+ skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 &&
mgmt->u.action.category == WLAN_CATEGORY_BACK &&
mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ)
tid = MT_TX_ADDBA;
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index c92bb8815320..85fd5090e0b8 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -666,7 +666,7 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw,
struct rsi_hw *adapter = hw->priv;
struct rsi_common *common = adapter->priv;
struct ieee80211_conf *conf = &hw->conf;
- int status = -EOPNOTSUPP;
+ int status = 0;

mutex_lock(&common->mutex);

diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c
index a20ab577a364..212b6f2af8de 100644
--- a/drivers/net/wireless/st/cw1200/pm.c
+++ b/drivers/net/wireless/st/cw1200/pm.c
@@ -264,12 +264,14 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
wiphy_err(priv->hw->wiphy,
"PM request failed: %d. WoW is disabled.\n", ret);
cw1200_wow_resume(hw);
+ mutex_unlock(&priv->conf_mutex);
return -EBUSY;
}

/* Force resume if event is coming from the device. */
if (atomic_read(&priv->bh_rx)) {
cw1200_wow_resume(hw);
+ mutex_unlock(&priv->conf_mutex);
return -EAGAIN;
}

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 42805ed7ca12..da6db99b0d57 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -1879,6 +1879,8 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
wl->wow_enabled);
WARN_ON(!wl->wow_enabled);

+ mutex_lock(&wl->mutex);
+
ret = pm_runtime_force_resume(wl->dev);
if (ret < 0) {
wl1271_error("ELP wakeup failure!");
@@ -1895,8 +1897,6 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
run_irq_work = true;
spin_unlock_irqrestore(&wl->wl_lock, flags);

- mutex_lock(&wl->mutex);
-
/* test the recovery flag before calling any SDIO functions */
pending_recovery = test_bit(WL1271_FLAG_RECOVERY_IN_PROGRESS,
&wl->flags);
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index 018a80674f06..0f12f86ebb02 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -628,6 +628,7 @@ static void pn533_usb_disconnect(struct usb_interface *interface)
usb_free_urb(phy->out_urb);
usb_free_urb(phy->ack_urb);
kfree(phy->ack_buffer);
+ usb_put_dev(phy->udev);

nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n");
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a766290b1ee8..de4b9e9db45d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4609,6 +4609,13 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ret)
return ret;

+ /*
+ * If a previous admin queue exists (e.g., from before a reset),
+ * put it now before allocating a new one to avoid orphaning it.
+ */
+ if (ctrl->admin_q)
+ blk_put_queue(ctrl->admin_q);
+
ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL);
if (IS_ERR(ctrl->admin_q)) {
ret = PTR_ERR(ctrl->admin_q);
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 80dd09aa01a3..e1d07f824b13 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -200,7 +200,8 @@ static int nvme_pr_resv_report(struct block_device *bdev, void *data,
static int nvme_pr_read_keys(struct block_device *bdev,
struct pr_keys *keys_info)
{
- u32 rse_len, num_keys = keys_info->num_keys;
+ size_t rse_len;
+ u32 num_keys = keys_info->num_keys;
struct nvme_reservation_status_ext *rse;
int ret, i;
bool eds;
@@ -210,7 +211,10 @@ static int nvme_pr_read_keys(struct block_device *bdev,
* enough to get enough keys to fill the return keys buffer.
*/
rse_len = struct_size(rse, regctl_eds, num_keys);
- rse = kzalloc(rse_len, GFP_KERNEL);
+ if (rse_len > U32_MAX)
+ return -EINVAL;
+
+ rse = kvzalloc(rse_len, GFP_KERNEL);
if (!rse)
return -ENOMEM;

@@ -235,7 +239,7 @@ static int nvme_pr_read_keys(struct block_device *bdev,
}

free_rse:
- kfree(rse);
+ kvfree(rse);
return ret;
}

diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index 5b924597a4de..81f272b15476 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -128,7 +128,6 @@ int __init ima_get_kexec_buffer(void **addr, size_t *size)
{
int ret, len;
unsigned long tmp_addr;
- unsigned long start_pfn, end_pfn;
size_t tmp_size;
const void *prop;

@@ -144,17 +143,9 @@ int __init ima_get_kexec_buffer(void **addr, size_t *size)
if (!tmp_size)
return -ENOENT;

- /*
- * Calculate the PFNs for the buffer and ensure
- * they are with in addressable memory.
- */
- start_pfn = PHYS_PFN(tmp_addr);
- end_pfn = PHYS_PFN(tmp_addr + tmp_size - 1);
- if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) {
- pr_warn("IMA buffer at 0x%lx, size = 0x%zx beyond memory\n",
- tmp_addr, tmp_size);
- return -EINVAL;
- }
+ ret = ima_validate_range(tmp_addr, tmp_size);
+ if (ret)
+ return ret;

*addr = __va(tmp_addr);
*size = tmp_size;
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 00289948f9c1..189675747b2b 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -290,6 +290,20 @@ static int dw_pcie_find_index(struct dw_pcie_ep *ep, phys_addr_t addr,
return -EINVAL;
}

+static u64 dw_pcie_ep_align_addr(struct pci_epc *epc, u64 pci_addr,
+ size_t *pci_size, size_t *offset)
+{
+ struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+ u64 mask = pci->region_align - 1;
+ size_t ofst = pci_addr & mask;
+
+ *pci_size = ALIGN(ofst + *pci_size, epc->mem->window.page_size);
+ *offset = ofst;
+
+ return pci_addr & ~mask;
+}
+
static void dw_pcie_ep_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t addr)
{
@@ -467,6 +481,7 @@ static const struct pci_epc_ops epc_ops = {
.write_header = dw_pcie_ep_write_header,
.set_bar = dw_pcie_ep_set_bar,
.clear_bar = dw_pcie_ep_clear_bar,
+ .align_addr = dw_pcie_ep_align_addr,
.map_addr = dw_pcie_ep_map_addr,
.unmap_addr = dw_pcie_ep_unmap_addr,
.set_msi = dw_pcie_ep_set_msi,
@@ -511,7 +526,8 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
u32 msg_addr_lower, msg_addr_upper, reg;
struct dw_pcie_ep_func *ep_func;
struct pci_epc *epc = ep->epc;
- unsigned int aligned_offset;
+ size_t map_size = sizeof(u32);
+ size_t offset;
u16 msg_ctrl, msg_data;
bool has_upper;
u64 msg_addr;
@@ -539,14 +555,13 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
}
msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower;

- aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
- msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size);
+ msg_addr = dw_pcie_ep_align_addr(epc, msg_addr, &map_size, &offset);
ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
- epc->mem->window.page_size);
+ map_size);
if (ret)
return ret;

- writel(msg_data | (interrupt_num - 1), ep->msi_mem + aligned_offset);
+ writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset);

dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);

@@ -597,8 +612,9 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
struct pci_epf_msix_tbl *msix_tbl;
struct dw_pcie_ep_func *ep_func;
struct pci_epc *epc = ep->epc;
+ size_t map_size = sizeof(u32);
+ size_t offset;
u32 reg, msg_data, vec_ctrl;
- unsigned int aligned_offset;
u32 tbl_offset;
u64 msg_addr;
int ret;
@@ -623,14 +639,16 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
return -EPERM;
}

- aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
- msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size);
+ msg_addr = dw_pcie_ep_align_addr(epc, msg_addr, &map_size, &offset);
ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
- epc->mem->window.page_size);
+ map_size);
if (ret)
return ret;

- writel(msg_data, ep->msi_mem + aligned_offset);
+ writel(msg_data, ep->msi_mem + offset);
+
+ /* flush posted write before unmap */
+ readl(ep->msi_mem + offset);

dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);

diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index de665342dc16..75c668829003 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -128,6 +128,18 @@ enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features
}
EXPORT_SYMBOL_GPL(pci_epc_get_next_free_bar);

+static bool pci_epc_function_is_valid(struct pci_epc *epc,
+ u8 func_no, u8 vfunc_no)
+{
+ if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+ return false;
+
+ if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ return false;
+
+ return true;
+}
+
/**
* pci_epc_get_features() - get the features supported by EPC
* @epc: the features supported by *this* EPC device will be returned
@@ -145,10 +157,7 @@ const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
{
const struct pci_epc_features *epc_features;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
- return NULL;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return NULL;

if (!epc->ops->get_features)
@@ -218,10 +227,7 @@ int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
{
int ret;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
- return -EINVAL;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return -EINVAL;

if (!epc->ops->raise_irq)
@@ -262,10 +268,7 @@ int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
{
int ret;

- if (IS_ERR_OR_NULL(epc))
- return -EINVAL;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return -EINVAL;

if (!epc->ops->map_msi_irq)
@@ -293,10 +296,7 @@ int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
{
int interrupt;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
- return 0;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return 0;

if (!epc->ops->get_msi)
@@ -329,11 +329,10 @@ int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 interrupts)
int ret;
u8 encode_int;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
- interrupts < 1 || interrupts > 32)
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return -EINVAL;

- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (interrupts < 1 || interrupts > 32)
return -EINVAL;

if (!epc->ops->set_msi)
@@ -361,10 +360,7 @@ int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
{
int interrupt;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
- return 0;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return 0;

if (!epc->ops->get_msix)
@@ -397,11 +393,10 @@ int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
{
int ret;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
- interrupts < 1 || interrupts > 2048)
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return -EINVAL;

- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (interrupts < 1 || interrupts > 2048)
return -EINVAL;

if (!epc->ops->set_msix)
@@ -428,10 +423,7 @@ EXPORT_SYMBOL_GPL(pci_epc_set_msix);
void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t phys_addr)
{
- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
- return;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return;

if (!epc->ops->unmap_addr)
@@ -459,10 +451,7 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
{
int ret;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
- return -EINVAL;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return -EINVAL;

if (!epc->ops->map_addr)
@@ -477,6 +466,109 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
}
EXPORT_SYMBOL_GPL(pci_epc_map_addr);

+/**
+ * pci_epc_mem_map() - allocate and map a PCI address to a CPU address
+ * @epc: the EPC device on which the CPU address is to be allocated and mapped
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @pci_addr: PCI address to which the CPU address should be mapped
+ * @pci_size: the number of bytes to map starting from @pci_addr
+ * @map: where to return the mapping information
+ *
+ * Allocate a controller memory address region and map it to a RC PCI address
+ * region, taking into account the controller physical address mapping
+ * constraints using the controller operation align_addr(). If this operation is
+ * not defined, we assume that there are no alignment constraints for the
+ * mapping.
+ *
+ * The effective size of the PCI address range mapped from @pci_addr is
+ * indicated by @map->pci_size. This size may be less than the requested
+ * @pci_size. The local virtual CPU address for the mapping is indicated by
+ * @map->virt_addr (@map->phys_addr indicates the physical address).
+ * The size and CPU address of the controller memory allocated and mapped are
+ * respectively indicated by @map->map_size and @map->virt_base (and
+ * @map->phys_base for the physical address of @map->virt_base).
+ *
+ * Returns 0 on success and a negative error code in case of error.
+ */
+int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ u64 pci_addr, size_t pci_size, struct pci_epc_map *map)
+{
+ size_t map_size = pci_size;
+ size_t map_offset = 0;
+ int ret;
+
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
+ return -EINVAL;
+
+ if (!pci_size || !map)
+ return -EINVAL;
+
+ /*
+ * Align the PCI address to map. If the controller defines the
+ * .align_addr() operation, use it to determine the PCI address to map
+ * and the size of the mapping. Otherwise, assume that the controller
+ * has no alignment constraint.
+ */
+ memset(map, 0, sizeof(*map));
+ map->pci_addr = pci_addr;
+ if (epc->ops->align_addr)
+ map->map_pci_addr =
+ epc->ops->align_addr(epc, pci_addr,
+ &map_size, &map_offset);
+ else
+ map->map_pci_addr = pci_addr;
+ map->map_size = map_size;
+ if (map->map_pci_addr + map->map_size < pci_addr + pci_size)
+ map->pci_size = map->map_pci_addr + map->map_size - pci_addr;
+ else
+ map->pci_size = pci_size;
+
+ map->virt_base = pci_epc_mem_alloc_addr(epc, &map->phys_base,
+ map->map_size);
+ if (!map->virt_base)
+ return -ENOMEM;
+
+ map->phys_addr = map->phys_base + map_offset;
+ map->virt_addr = map->virt_base + map_offset;
+
+ ret = pci_epc_map_addr(epc, func_no, vfunc_no, map->phys_base,
+ map->map_pci_addr, map->map_size);
+ if (ret) {
+ pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base,
+ map->map_size);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_epc_mem_map);
+
+/**
+ * pci_epc_mem_unmap() - unmap and free a CPU address region
+ * @epc: the EPC device on which the CPU address is allocated and mapped
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @map: the mapping information
+ *
+ * Unmap and free a CPU address region that was allocated and mapped with
+ * pci_epc_mem_map().
+ */
+void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ struct pci_epc_map *map)
+{
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
+ return;
+
+ if (!map || !map->virt_base)
+ return;
+
+ pci_epc_unmap_addr(epc, func_no, vfunc_no, map->phys_base);
+ pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base,
+ map->map_size);
+}
+EXPORT_SYMBOL_GPL(pci_epc_mem_unmap);
+
/**
* pci_epc_clear_bar() - reset the BAR
* @epc: the EPC device for which the BAR has to be cleared
@@ -489,12 +581,11 @@ EXPORT_SYMBOL_GPL(pci_epc_map_addr);
void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_bar *epf_bar)
{
- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
- (epf_bar->barno == BAR_5 &&
- epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return;

- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (epf_bar->barno == BAR_5 &&
+ epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
return;

if (!epc->ops->clear_bar)
@@ -521,18 +612,16 @@ int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
int ret;
int flags = epf_bar->flags;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
- (epf_bar->barno == BAR_5 &&
- flags & PCI_BASE_ADDRESS_MEM_TYPE_64) ||
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
+ return -EINVAL;
+
+ if ((epf_bar->barno == BAR_5 && flags & PCI_BASE_ADDRESS_MEM_TYPE_64) ||
(flags & PCI_BASE_ADDRESS_SPACE_IO &&
flags & PCI_BASE_ADDRESS_IO_MASK) ||
(upper_32_bits(epf_bar->size) &&
!(flags & PCI_BASE_ADDRESS_MEM_TYPE_64)))
return -EINVAL;

- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
- return -EINVAL;
-
if (!epc->ops->set_bar)
return 0;

@@ -561,10 +650,7 @@ int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
{
int ret;

- if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
- return -EINVAL;
-
- if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+ if (!pci_epc_function_is_valid(epc, func_no, vfunc_no))
return -EINVAL;

/* Only Virtual Function #1 has deviceID */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 9e419f14738a..9e71eb4d1010 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -263,8 +263,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
&& sz64 > 0x100000000ULL) {
res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
- res->start = 0;
- res->end = 0;
+ resource_set_range(res, 0, 0);
pci_err(dev, "%s: can't handle BAR larger than 4GB (size %#010llx)\n",
res_name, (unsigned long long)sz64);
goto out;
@@ -273,8 +272,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
if ((sizeof(pci_bus_addr_t) < 8) && l) {
/* Above 32-bit boundary; try to reallocate */
res->flags |= IORESOURCE_UNSET;
- res->start = 0;
- res->end = sz64 - 1;
+ resource_set_range(res, 0, sz64);
pci_info(dev, "%s: can't handle BAR above 4GB (bus address %#010llx)\n",
res_name, (unsigned long long)l64);
goto out;
diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
index 8b3f3b945e20..7734dae06a4a 100644
--- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
+++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
@@ -569,10 +569,9 @@ static int cs42l43_pin_probe(struct platform_device *pdev)
if (child) {
ret = devm_add_action_or_reset(&pdev->dev,
cs42l43_fwnode_put, child);
- if (ret) {
- fwnode_handle_put(child);
+ if (ret)
return ret;
- }
+
if (!child->dev)
child->dev = priv->dev;
fwnode = child;
diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c
index c82491da2cc9..5204466c6b3e 100644
--- a/drivers/pinctrl/pinctrl-equilibrium.c
+++ b/drivers/pinctrl/pinctrl-equilibrium.c
@@ -22,7 +22,7 @@
#define PIN_NAME_LEN 10
#define PAD_REG_OFF 0x100

-static void eqbr_gpio_disable_irq(struct irq_data *d)
+static void eqbr_irq_mask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -35,7 +35,7 @@ static void eqbr_gpio_disable_irq(struct irq_data *d)
gpiochip_disable_irq(gc, offset);
}

-static void eqbr_gpio_enable_irq(struct irq_data *d)
+static void eqbr_irq_unmask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -49,7 +49,7 @@ static void eqbr_gpio_enable_irq(struct irq_data *d)
raw_spin_unlock_irqrestore(&gctrl->lock, flags);
}

-static void eqbr_gpio_ack_irq(struct irq_data *d)
+static void eqbr_irq_ack(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -61,10 +61,17 @@ static void eqbr_gpio_ack_irq(struct irq_data *d)
raw_spin_unlock_irqrestore(&gctrl->lock, flags);
}

-static void eqbr_gpio_mask_ack_irq(struct irq_data *d)
+static void eqbr_irq_mask_ack(struct irq_data *d)
{
- eqbr_gpio_disable_irq(d);
- eqbr_gpio_ack_irq(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
+ unsigned int offset = irqd_to_hwirq(d);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&gctrl->lock, flags);
+ writel(BIT(offset), gctrl->membase + GPIO_IRNENCLR);
+ writel(BIT(offset), gctrl->membase + GPIO_IRNCR);
+ raw_spin_unlock_irqrestore(&gctrl->lock, flags);
}

static inline void eqbr_cfg_bit(void __iomem *addr,
@@ -91,7 +98,7 @@ static int eqbr_irq_type_cfg(struct gpio_irq_type *type,
return 0;
}

-static int eqbr_gpio_set_irq_type(struct irq_data *d, unsigned int type)
+static int eqbr_irq_set_type(struct irq_data *d, unsigned int type)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -165,11 +172,11 @@ static void eqbr_irq_handler(struct irq_desc *desc)

static const struct irq_chip eqbr_irq_chip = {
.name = "gpio_irq",
- .irq_mask = eqbr_gpio_disable_irq,
- .irq_unmask = eqbr_gpio_enable_irq,
- .irq_ack = eqbr_gpio_ack_irq,
- .irq_mask_ack = eqbr_gpio_mask_ack_irq,
- .irq_set_type = eqbr_gpio_set_irq_type,
+ .irq_ack = eqbr_irq_ack,
+ .irq_mask = eqbr_irq_mask,
+ .irq_mask_ack = eqbr_irq_mask_ack,
+ .irq_unmask = eqbr_irq_unmask,
+ .irq_set_type = eqbr_irq_set_type,
.flags = IRQCHIP_IMMUTABLE,
GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c
index 841a5414d28a..01f3ff21c888 100644
--- a/drivers/platform/x86/dell/dell-wmi-base.c
+++ b/drivers/platform/x86/dell/dell-wmi-base.c
@@ -80,6 +80,12 @@ static const struct dmi_system_id dell_wmi_smbios_list[] __initconst = {
static const struct key_entry dell_wmi_keymap_type_0000[] = {
{ KE_IGNORE, 0x003a, { KEY_CAPSLOCK } },

+ /* Audio mute toggle */
+ { KE_KEY, 0x0109, { KEY_MUTE } },
+
+ /* Mic mute toggle */
+ { KE_KEY, 0x0150, { KEY_MICMUTE } },
+
/* Meta key lock */
{ KE_IGNORE, 0xe000, { KEY_RIGHTMETA } },

diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c
index 86ec962aace9..e586f7957946 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c
@@ -93,7 +93,6 @@ int set_new_password(const char *password_type, const char *new)
if (ret < 0)
goto out;

- print_hex_dump_bytes("set new password data: ", DUMP_PREFIX_NONE, buffer, buffer_size);
ret = call_password_interface(wmi_priv.password_attr_wdev, buffer, buffer_size);
/* on success copy the new password to current password */
if (!ret)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 2c67d9758e6b..e4fe90b70e50 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -9499,14 +9499,16 @@ static int tpacpi_battery_get(int what, int battery, int *ret)
{
switch (what) {
case THRESHOLD_START:
- if ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_START, ret, battery))
+ if (!battery_info.batteries[battery].start_support ||
+ ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_START, ret, battery)))
return -ENODEV;

/* The value is in the low 8 bits of the response */
*ret = *ret & 0xFF;
return 0;
case THRESHOLD_STOP:
- if ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_STOP, ret, battery))
+ if (!battery_info.batteries[battery].stop_support ||
+ ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_STOP, ret, battery)))
return -ENODEV;
/* Value is in lower 8 bits */
*ret = *ret & 0xFF;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 08e6b8ed601c..5b9830a28c8d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -12044,6 +12044,8 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
iounmap(phba->sli4_hba.conf_regs_memmap_p);
if (phba->sli4_hba.dpp_regs_memmap_p)
iounmap(phba->sli4_hba.dpp_regs_memmap_p);
+ if (phba->sli4_hba.dpp_regs_memmap_wc_p)
+ iounmap(phba->sli4_hba.dpp_regs_memmap_wc_p);
break;
case LPFC_SLI_INTF_IF_TYPE_1:
break;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2a1f2b201715..7dba06fa82d8 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -15916,6 +15916,32 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
return NULL;
}

+static __maybe_unused void __iomem *
+lpfc_dpp_wc_map(struct lpfc_hba *phba, uint8_t dpp_barset)
+{
+
+ /* DPP region is supposed to cover 64-bit BAR2 */
+ if (dpp_barset != WQ_PCI_BAR_4_AND_5) {
+ lpfc_log_msg(phba, KERN_WARNING, LOG_INIT,
+ "3273 dpp_barset x%x != WQ_PCI_BAR_4_AND_5\n",
+ dpp_barset);
+ return NULL;
+ }
+
+ if (!phba->sli4_hba.dpp_regs_memmap_wc_p) {
+ void __iomem *dpp_map;
+
+ dpp_map = ioremap_wc(phba->pci_bar2_map,
+ pci_resource_len(phba->pcidev,
+ PCI_64BIT_BAR4));
+
+ if (dpp_map)
+ phba->sli4_hba.dpp_regs_memmap_wc_p = dpp_map;
+ }
+
+ return phba->sli4_hba.dpp_regs_memmap_wc_p;
+}
+
/**
* lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs
* @phba: HBA structure that EQs are on.
@@ -16879,9 +16905,6 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
uint8_t dpp_barset;
uint32_t dpp_offset;
uint8_t wq_create_version;
-#ifdef CONFIG_X86
- unsigned long pg_addr;
-#endif

/* sanity check on queue memory */
if (!wq || !cq)
@@ -17067,14 +17090,15 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,

#ifdef CONFIG_X86
/* Enable combined writes for DPP aperture */
- pg_addr = (unsigned long)(wq->dpp_regaddr) & PAGE_MASK;
- rc = set_memory_wc(pg_addr, 1);
- if (rc) {
+ bar_memmap_p = lpfc_dpp_wc_map(phba, dpp_barset);
+ if (!bar_memmap_p) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3272 Cannot setup Combined "
"Write on WQ[%d] - disable DPP\n",
wq->queue_id);
phba->cfg_enable_dpp = 0;
+ } else {
+ wq->dpp_regaddr = bar_memmap_p + dpp_offset;
}
#else
phba->cfg_enable_dpp = 0;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index c1e9ec0243ba..9caada8cbe58 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -783,6 +783,9 @@ struct lpfc_sli4_hba {
void __iomem *dpp_regs_memmap_p; /* Kernel memory mapped address for
* dpp registers
*/
+ void __iomem *dpp_regs_memmap_wc_p;/* Kernel memory mapped address for
+ * dpp registers with write combining
+ */
union {
struct {
/* IF Type 0, BAR 0 PCI cfg space reg mem map */
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 4daab8b6d675..0f911228cb2f 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -476,8 +476,9 @@ int pm8001_queue_command(struct sas_task *task, gfp_t gfp_flags)
} else {
task->task_done(task);
}
- rc = -ENODEV;
- goto err_out;
+ spin_unlock_irqrestore(&pm8001_ha->lock, flags);
+ pm8001_dbg(pm8001_ha, IO, "pm8001_task_exec device gone\n");
+ return 0;
}

ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_dev, task);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 36e0b3105460..372807485517 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -354,6 +354,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
* since we use this queue depth most of times.
*/
if (scsi_realloc_sdev_budget_map(sdev, depth)) {
+ kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags);
put_device(&starget->dev);
kfree(sdev);
goto out;
diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c
index 57a856a21e90..463410349d07 100644
--- a/drivers/staging/media/tegra-video/vi.c
+++ b/drivers/staging/media/tegra-video/vi.c
@@ -440,7 +440,7 @@ static int __tegra_channel_try_format(struct tegra_vi_channel *chan,
.target = V4L2_SEL_TGT_CROP_BOUNDS,
};
struct v4l2_rect *try_crop;
- int ret;
+ int ret = 0;

subdev = tegra_channel_get_remote_source_subdev(chan);
if (!subdev)
@@ -484,8 +484,10 @@ static int __tegra_channel_try_format(struct tegra_vi_channel *chan,
} else {
ret = v4l2_subdev_call(subdev, pad, get_selection,
NULL, &sdsel);
- if (ret)
- return -EINVAL;
+ if (ret) {
+ ret = -EINVAL;
+ goto out_free;
+ }

try_crop->width = sdsel.r.width;
try_crop->height = sdsel.r.height;
@@ -497,14 +499,15 @@ static int __tegra_channel_try_format(struct tegra_vi_channel *chan,

ret = v4l2_subdev_call(subdev, pad, set_fmt, sd_state, &fmt);
if (ret < 0)
- return ret;
+ goto out_free;

v4l2_fill_pix_format(pix, &fmt.format);
chan->vi->ops->vi_fmt_align(pix, fmtinfo->bpp);

+out_free:
__v4l2_subdev_state_free(sd_state);

- return 0;
+ return ret;
}

static int tegra_channel_try_format(struct file *file, void *fh,
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 68b40e01d5a0..5af0c64d6a55 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -108,8 +108,8 @@ static ssize_t target_core_item_dbroot_store(struct config_item *item,
const char *page, size_t count)
{
ssize_t read_bytes;
- struct file *fp;
ssize_t r = -EINVAL;
+ struct path path = {};

mutex_lock(&target_devices_lock);
if (target_devices) {
@@ -131,17 +131,14 @@ static ssize_t target_core_item_dbroot_store(struct config_item *item,
db_root_stage[read_bytes - 1] = '\0';

/* validate new db root before accepting it */
- fp = filp_open(db_root_stage, O_RDONLY, 0);
- if (IS_ERR(fp)) {
+ r = kern_path(db_root_stage, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
+ if (r) {
pr_err("db_root: cannot open: %s\n", db_root_stage);
+ if (r == -ENOTDIR)
+ pr_err("db_root: not a directory: %s\n", db_root_stage);
goto unlock;
}
- if (!S_ISDIR(file_inode(fp)->i_mode)) {
- filp_close(fp, NULL);
- pr_err("db_root: not a directory: %s\n", db_root_stage);
- goto unlock;
- }
- filp_close(fp, NULL);
+ path_put(&path);

strncpy(db_root, db_root_stage, read_bytes);
pr_debug("Target_Core_ConfigFS: db_root set to %s\n", db_root);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index ba0cc2a051ff..ad5866149e24 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -4348,14 +4348,6 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd)
spin_unlock_irqrestore(hba->host->host_lock, flags);
mutex_unlock(&hba->uic_cmd_mutex);

- /*
- * If the h8 exit fails during the runtime resume process, it becomes
- * stuck and cannot be recovered through the error handler. To fix
- * this, use link recovery instead of the error handler.
- */
- if (ret && hba->pm_op_in_progress)
- ret = ufshcd_link_recovery(hba);
-
return ret;
}

@@ -9947,7 +9939,15 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
} else {
dev_err(hba->dev, "%s: hibern8 exit failed %d\n",
__func__, ret);
- goto vendor_suspend;
+ /*
+ * If the h8 exit fails during the runtime resume
+ * process, it becomes stuck and cannot be recovered
+ * through the error handler. To fix this, use link
+ * recovery instead of the error handler.
+ */
+ ret = ufshcd_link_recovery(hba);
+ if (ret)
+ goto vendor_suspend;
}
} else if (ufshcd_is_link_off(hba)) {
/*
diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index 465e9267b49c..f0e32227c0b7 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -524,14 +524,13 @@ EXPORT_SYMBOL_GPL(cdns_suspend);

int cdns_resume(struct cdns *cdns)
{
+ bool power_lost = cdns_power_is_lost(cdns);
enum usb_role real_role;
bool role_changed = false;
int ret = 0;

- if (cdns_power_is_lost(cdns)) {
- if (cdns->role_sw) {
- cdns->role = cdns_role_get(cdns->role_sw);
- } else {
+ if (power_lost) {
+ if (!cdns->role_sw) {
real_role = cdns_hw_role_state_machine(cdns);
if (real_role != cdns->role) {
ret = cdns_hw_role_switch(cdns);
@@ -552,8 +551,8 @@ int cdns_resume(struct cdns *cdns)
}
}

- if (cdns->roles[cdns->role]->resume)
- cdns->roles[cdns->role]->resume(cdns, cdns_power_is_lost(cdns));
+ if (!role_changed && cdns->roles[cdns->role]->resume)
+ cdns->roles[cdns->role]->resume(cdns, power_lost);

return 0;
}
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 296703939846..520756159d3d 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -379,11 +379,8 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
acpi_psd[acpi_id].domain);
}

- status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
- if (ACPI_FAILURE(status)) {
- if (!pblk)
- return AE_OK;
- }
+ if (!pblk && !acpi_has_method(handle, "_CST"))
+ return AE_OK;
/* .. and it has a C-state */
__set_bit(acpi_id, acpi_id_cst_present);

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 79daf6fac58f..c579713e9899 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1877,7 +1877,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
while (!list_empty(&fs_info->reclaim_bgs)) {
u64 zone_unusable;
- u64 reclaimed;
+ u64 used;
+ u64 reserved;
+ u64 old_total;
int ret = 0;

bg = list_first_entry(&fs_info->reclaim_bgs,
@@ -1953,6 +1955,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
zone_unusable = bg->zone_unusable;

spin_unlock(&bg->lock);
+ old_total = space_info->total_bytes;
spin_unlock(&space_info->lock);

/*
@@ -1973,28 +1976,52 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (ret < 0)
goto next;

+ /*
+ * The amount of bytes reclaimed corresponds to the sum of the
+ * "used" and "reserved" counters. We have set the block group
+ * to RO above, which prevents reservations from happening but
+ * we may have existing reservations for which allocation has
+ * not yet been done - btrfs_update_block_group() was not yet
+ * called, which is where we will transfer a reserved extent's
+ * size from the "reserved" counter to the "used" counter - this
+ * happens when running delayed references. When we relocate the
+ * chunk below, relocation first flushes dellaloc, waits for
+ * ordered extent completion (which is where we create delayed
+ * references for data extents) and commits the current
+ * transaction (which runs delayed references), and only after
+ * it does the actual work to move extents out of the block
+ * group. So the reported amount of reclaimed bytes is
+ * effectively the sum of the 'used' and 'reserved' counters.
+ */
+ spin_lock(&bg->lock);
+ used = bg->used;
+ reserved = bg->reserved;
+ spin_unlock(&bg->lock);
+
btrfs_info(fs_info,
- "reclaiming chunk %llu with %llu%% used %llu%% unusable",
+ "reclaiming chunk %llu with %llu%% used %llu%% reserved %llu%% unusable",
bg->start,
- div64_u64(bg->used * 100, bg->length),
+ div64_u64(used * 100, bg->length),
+ div64_u64(reserved * 100, bg->length),
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
- reclaimed = bg->used;
ret = btrfs_relocate_chunk(fs_info, bg->start);
if (ret) {
btrfs_dec_block_group_ro(bg);
btrfs_err(fs_info, "error relocating chunk %llu",
bg->start);
- reclaimed = 0;
+ used = 0;
+ reserved = 0;
spin_lock(&space_info->lock);
space_info->reclaim_errors++;
- if (READ_ONCE(space_info->periodic_reclaim))
- space_info->periodic_reclaim_ready = false;
spin_unlock(&space_info->lock);
}
spin_lock(&space_info->lock);
space_info->reclaim_count++;
- space_info->reclaim_bytes += reclaimed;
+ space_info->reclaim_bytes += used;
+ space_info->reclaim_bytes += reserved;
+ if (space_info->total_bytes < old_total)
+ btrfs_set_periodic_reclaim_ready(space_info, true);
spin_unlock(&space_info->lock);

next:
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index fb414643f223..8ce58aa4189f 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -868,6 +868,22 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
goto buffered;
}
+ /*
+ * We can't control the folios being passed in, applications can write
+ * to them while a direct IO write is in progress. This means the
+ * content might change after we calculated the data checksum.
+ * Therefore we can end up storing a checksum that doesn't match the
+ * persisted data.
+ *
+ * To be extra safe and avoid false data checksum mismatch, if the
+ * inode requires data checksum, just fallback to buffered IO.
+ * For buffered IO we have full control of page cache and can ensure
+ * no one is modifying the content during writeback.
+ */
+ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
+ goto buffered;
+ }

/*
* The iov_iter can be mapped to the same file range we are writing to.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 034cd7b1d0f5..fa4d22f6f29d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3119,7 +3119,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
btrfs_err(fs_info,
"cannot mount because of unknown incompat features (0x%llx)",
- incompat);
+ incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP);
return -EINVAL;
}

@@ -3151,7 +3151,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
if (compat_ro_unsupp && is_rw_mount) {
btrfs_err(fs_info,
"cannot mount read-write because of unknown compat_ro features (0x%llx)",
- compat_ro);
+ compat_ro_unsupp);
return -EINVAL;
}

@@ -3164,7 +3164,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
btrfs_err(fs_info,
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
- compat_ro);
+ compat_ro_unsupp);
return -EINVAL;
}

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b1d9595762ef..09ebe5acbe43 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4629,7 +4629,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
- btrfs_root_id(root));
+ btrfs_root_id(dest));
ret = -EPERM;
goto out_up_write;
}
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 0d599fd847c9..1212674d7a1b 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -10,6 +10,13 @@
#include <linux/math64.h>
#include <linux/rbtree.h>

+/*
+ * Convenience macros to define a pointer with the __free(kfree) and
+ * __free(kvfree) cleanup attributes and initialized to NULL.
+ */
+#define AUTO_KFREE(name) *name __free(kfree) = NULL
+#define AUTO_KVFREE(name) *name __free(kvfree) = NULL
+
/*
* Enumerate bits using enum autoincrement. Define the @name as the n-th bit.
*/
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 3cbb9f22d394..513c2bfa8d62 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -634,7 +634,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad fsid, has %pU want %pU",
logical, stripe->mirror_num,
- header->fsid, fs_info->fs_devices->fsid);
+ header->fsid, fs_info->fs_devices->metadata_uuid);
return;
}
if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index cae4ec21bab4..af19f7a3e74a 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -2031,12 +2031,11 @@ static bool is_reclaim_urgent(struct btrfs_space_info *space_info)
return unalloc < data_chunk_size;
}

-static void do_reclaim_sweep(const struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, int raid)
+static bool do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
{
struct btrfs_block_group *bg;
int thresh_pct;
- bool try_again = true;
+ bool will_reclaim = false;
bool urgent;

spin_lock(&space_info->lock);
@@ -2054,7 +2053,7 @@ static void do_reclaim_sweep(const struct btrfs_fs_info *fs_info,
spin_lock(&bg->lock);
thresh = mult_perc(bg->length, thresh_pct);
if (bg->used < thresh && bg->reclaim_mark) {
- try_again = false;
+ will_reclaim = true;
reclaim = true;
}
bg->reclaim_mark++;
@@ -2071,12 +2070,13 @@ static void do_reclaim_sweep(const struct btrfs_fs_info *fs_info,
* If we have any staler groups, we don't touch the fresher ones, but if we
* really need a block group, do take a fresh one.
*/
- if (try_again && urgent) {
- try_again = false;
+ if (!will_reclaim && urgent) {
+ urgent = false;
goto again;
}

up_read(&space_info->groups_sem);
+ return will_reclaim;
}

void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
@@ -2086,7 +2086,8 @@ void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s6
lockdep_assert_held(&space_info->lock);
space_info->reclaimable_bytes += bytes;

- if (space_info->reclaimable_bytes >= chunk_sz)
+ if (space_info->reclaimable_bytes > 0 &&
+ space_info->reclaimable_bytes >= chunk_sz)
btrfs_set_periodic_reclaim_ready(space_info, true);
}

@@ -2113,7 +2114,6 @@ bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)

spin_lock(&space_info->lock);
ret = space_info->periodic_reclaim_ready;
- btrfs_set_periodic_reclaim_ready(space_info, false);
spin_unlock(&space_info->lock);

return ret;
@@ -2127,7 +2127,9 @@ void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info)
list_for_each_entry(space_info, &fs_info->space_info, list) {
if (!btrfs_should_periodic_reclaim(space_info))
continue;
- for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++)
- do_reclaim_sweep(fs_info, space_info, raid);
+ for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
+ if (do_reclaim_sweep(space_info, raid))
+ btrfs_set_periodic_reclaim_ready(space_info, false);
+ }
}
}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 3bb7a376bd3f..60bba7fbeb35 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1701,7 +1701,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
objectid > BTRFS_LAST_FREE_OBJECTID)) {
extent_err(leaf, slot,
"invalid extent data backref objectid value %llu",
- root);
+ objectid);
return -EUCLEAN;
}
if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
@@ -1882,7 +1882,7 @@ static int check_dev_extent_item(const struct extent_buffer *leaf,
if (unlikely(prev_key->offset + prev_len > key->offset)) {
generic_err(leaf, slot,
"dev extent overlap, prev offset %llu len %llu current offset %llu",
- prev_key->objectid, prev_len, key->offset);
+ prev_key->offset, prev_len, key->offset);
return -EUCLEAN;
}
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 181cb3f56ab4..e0c5ff2e08c1 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1386,7 +1386,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;

@@ -1409,6 +1410,27 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
zone_info[1].physical);
return -EIO;
}
+
+ /*
+ * When the last extent is removed, last_alloc can be smaller than the other write
+ * pointer. In that case, last_alloc should be moved to the corresponding write
+ * pointer position.
+ */
+ for (int i = 0; i < map->num_stripes; i++) {
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ continue;
+ if (last_alloc <= zone_info[i].alloc_offset) {
+ last_alloc = zone_info[i].alloc_offset;
+ break;
+ }
+ }
+
+ if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
+ zone_info[0].alloc_offset = last_alloc;
+
+ if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
+ zone_info[1].alloc_offset = last_alloc;
+
if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
btrfs_err(bg->fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
@@ -1429,7 +1451,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
int i;
@@ -1443,10 +1466,27 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
/* In case a device is missing we have a cap of 0, so don't use it. */
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);

+ /*
+ * When the last extent is removed, last_alloc can be smaller than the other write
+ * pointer. In that case, last_alloc should be moved to the corresponding write
+ * pointer position.
+ */
for (i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
continue;
+ if (last_alloc <= zone_info[i].alloc_offset) {
+ last_alloc = zone_info[i].alloc_offset;
+ break;
+ }
+ }
+
+ for (i = 0; i < map->num_stripes; i++) {
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
+ continue;
+
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ zone_info[i].alloc_offset = last_alloc;

if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
!btrfs_test_opt(fs_info, DEGRADED)) {
@@ -1477,9 +1517,14 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 stripe_nr = 0, stripe_offset = 0;
+ u64 prev_offset = 0;
+ u32 stripe_index = 0;
+ bool has_partial = false, has_conventional = false;

if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
@@ -1487,11 +1532,79 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
return -EINVAL;
}

+ /*
+ * When the last extent is removed, last_alloc can be smaller than the other write
+ * pointer. In that case, last_alloc should be moved to the corresponding write
+ * pointer position.
+ */
for (int i = 0; i < map->num_stripes; i++) {
+ u64 alloc;
+
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
continue;

+ stripe_nr = zone_info[i].alloc_offset >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK;
+ if (stripe_offset == 0 && stripe_nr > 0) {
+ stripe_nr--;
+ stripe_offset = BTRFS_STRIPE_LEN;
+ }
+ alloc = ((stripe_nr * map->num_stripes + i) << BTRFS_STRIPE_LEN_SHIFT) +
+ stripe_offset;
+ last_alloc = max(last_alloc, alloc);
+
+ /* Partially written stripe found. It should be last. */
+ if (zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK)
+ break;
+ }
+ stripe_nr = 0;
+ stripe_offset = 0;
+
+ if (last_alloc) {
+ u32 factor = map->num_stripes;
+
+ stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK;
+ stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+ }
+
+ for (int i = 0; i < map->num_stripes; i++) {
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
+ continue;
+
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
+ has_conventional = true;
+ zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr);
+
+ if (stripe_index > i)
+ zone_info[i].alloc_offset += BTRFS_STRIPE_LEN;
+ else if (stripe_index == i)
+ zone_info[i].alloc_offset += stripe_offset;
+ }
+
+ /* Verification */
+ if (i != 0) {
+ if (unlikely(prev_offset < zone_info[i].alloc_offset)) {
+ btrfs_err(fs_info,
+ "zoned: stripe position disorder found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+
+ if (unlikely(has_partial &&
+ (zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK))) {
+ btrfs_err(fs_info,
+ "zoned: multiple partial written stripe found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+ }
+ prev_offset = zone_info[i].alloc_offset;
+
+ if ((zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK) != 0)
+ has_partial = true;
+
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
return -EIO;
@@ -1503,15 +1616,34 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
bg->alloc_offset += zone_info[i].alloc_offset;
}

+ /* Check if all devices stay in the same stripe row. */
+ if (unlikely(zone_info[0].alloc_offset -
+ zone_info[map->num_stripes - 1].alloc_offset > BTRFS_STRIPE_LEN)) {
+ btrfs_err(fs_info, "zoned: stripe gap too large in block group %llu", bg->start);
+ return -EIO;
+ }
+
+ if (unlikely(has_conventional && bg->alloc_offset < last_alloc)) {
+ btrfs_err(fs_info, "zoned: allocated extent stays beyond write pointers %llu %llu",
+ bg->alloc_offset, last_alloc);
+ return -EIO;
+ }
+
return 0;
}

static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 AUTO_KFREE(raid0_allocs);
+ u64 stripe_nr = 0, stripe_offset = 0;
+ u32 stripe_index = 0;
+ bool has_partial = false, has_conventional = false;
+ u64 prev_offset = 0;

if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
@@ -1519,17 +1651,114 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
return -EINVAL;
}

+ raid0_allocs = kcalloc(map->num_stripes / map->sub_stripes, sizeof(*raid0_allocs),
+ GFP_NOFS);
+ if (!raid0_allocs)
+ return -ENOMEM;
+
+ /*
+ * When the last extent is removed, last_alloc can be smaller than the other write
+ * pointer. In that case, last_alloc should be moved to the corresponding write
+ * pointer position.
+ */
+ for (int i = 0; i < map->num_stripes; i += map->sub_stripes) {
+ u64 alloc = zone_info[i].alloc_offset;
+
+ for (int j = 1; j < map->sub_stripes; j++) {
+ int idx = i + j;
+
+ if (zone_info[idx].alloc_offset == WP_MISSING_DEV ||
+ zone_info[idx].alloc_offset == WP_CONVENTIONAL)
+ continue;
+ if (alloc == WP_MISSING_DEV || alloc == WP_CONVENTIONAL) {
+ alloc = zone_info[idx].alloc_offset;
+ } else if (unlikely(zone_info[idx].alloc_offset != alloc)) {
+ btrfs_err(fs_info,
+ "zoned: write pointer mismatch found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+ }
+
+ raid0_allocs[i / map->sub_stripes] = alloc;
+ if (alloc == WP_CONVENTIONAL)
+ continue;
+ if (unlikely(alloc == WP_MISSING_DEV)) {
+ btrfs_err(fs_info,
+ "zoned: cannot recover write pointer of block group %llu due to missing device",
+ bg->start);
+ return -EIO;
+ }
+
+ stripe_nr = alloc >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = alloc & BTRFS_STRIPE_LEN_MASK;
+ if (stripe_offset == 0 && stripe_nr > 0) {
+ stripe_nr--;
+ stripe_offset = BTRFS_STRIPE_LEN;
+ }
+
+ alloc = ((stripe_nr * (map->num_stripes / map->sub_stripes) +
+ (i / map->sub_stripes)) <<
+ BTRFS_STRIPE_LEN_SHIFT) + stripe_offset;
+ last_alloc = max(last_alloc, alloc);
+ }
+ stripe_nr = 0;
+ stripe_offset = 0;
+
+ if (last_alloc) {
+ u32 factor = map->num_stripes / map->sub_stripes;
+
+ stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK;
+ stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+ }
+
for (int i = 0; i < map->num_stripes; i++) {
+ int idx = i / map->sub_stripes;
+
+ if (raid0_allocs[idx] == WP_CONVENTIONAL) {
+ has_conventional = true;
+ raid0_allocs[idx] = btrfs_stripe_nr_to_offset(stripe_nr);
+
+ if (stripe_index > idx)
+ raid0_allocs[idx] += BTRFS_STRIPE_LEN;
+ else if (stripe_index == idx)
+ raid0_allocs[idx] += stripe_offset;
+ }
+
+ if ((i % map->sub_stripes) == 0) {
+ /* Verification */
+ if (i != 0) {
+ if (unlikely(prev_offset < raid0_allocs[idx])) {
+ btrfs_err(fs_info,
+ "zoned: stripe position disorder found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+
+ if (unlikely(has_partial &&
+ (raid0_allocs[idx] & BTRFS_STRIPE_LEN_MASK))) {
+ btrfs_err(fs_info,
+ "zoned: multiple partial written stripe found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+ }
+ prev_offset = raid0_allocs[idx];
+
+ if ((raid0_allocs[idx] & BTRFS_STRIPE_LEN_MASK) != 0)
+ has_partial = true;
+ }
+
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
- continue;
+ zone_info[i].alloc_offset = raid0_allocs[idx];

if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
return -EIO;
- } else {
- if (test_bit(0, active))
- set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
+ } else if (test_bit(0, active)) {
+ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}

if ((i % map->sub_stripes) == 0) {
@@ -1538,6 +1767,20 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
}
}

+ /* Check if all devices stay in the same stripe row. */
+ if (unlikely(zone_info[0].alloc_offset -
+ zone_info[map->num_stripes - 1].alloc_offset > BTRFS_STRIPE_LEN)) {
+ btrfs_err(fs_info, "zoned: stripe gap too large in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+
+ if (unlikely(has_conventional && bg->alloc_offset < last_alloc)) {
+ btrfs_err(fs_info, "zoned: allocated extent stays beyond write pointers %llu %llu",
+ bg->alloc_offset, last_alloc);
+ return -EIO;
+ }
+
return 0;
}

@@ -1619,18 +1862,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break;
case BTRFS_BLOCK_GROUP_DUP:
- ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_dup(cache, map, zone_info, active,
+ last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID1C3:
case BTRFS_BLOCK_GROUP_RAID1C4:
- ret = btrfs_load_block_group_raid1(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid1(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID0:
- ret = btrfs_load_block_group_raid0(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid0(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID10:
- ret = btrfs_load_block_group_raid10(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid10(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID5:
case BTRFS_BLOCK_GROUP_RAID6:
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8711fac20804..aa4318271eee 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2012,7 +2012,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
* @ep: the &struct eventpoll to be currently checked.
* @depth: Current depth of the path being checked.
*
- * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep.
+ * Return: depth of the subtree, or a value bigger than EP_MAX_NESTS if we found
+ * a loop or went too deep.
*/
static int ep_loop_check_proc(struct eventpoll *ep, int depth)
{
@@ -2031,7 +2032,7 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
struct eventpoll *ep_tovisit;
ep_tovisit = epi->ffd.file->private_data;
if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS)
- result = INT_MAX;
+ result = EP_MAX_NESTS+1;
else
result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1);
if (result > EP_MAX_NESTS)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d8a059ec1ad6..bcdd8f381869 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -213,15 +213,6 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
-/* Large fragment size list lookup succeeded at least once for
- * CR_POWER2_ALIGNED */
-#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
-/* Avg fragment size rb tree lookup succeeded at least once for
- * CR_GOAL_LEN_FAST */
-#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
-/* Avg fragment size rb tree lookup succeeded at least once for
- * CR_BEST_AVAIL_LEN */
-#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000

struct ext4_allocation_request {
/* target inode for block we're allocating */
@@ -1588,10 +1579,8 @@ struct ext4_sb_info {
struct list_head s_discard_list;
struct work_struct s_discard_work;
atomic_t s_retry_alloc_pending;
- struct list_head *s_mb_avg_fragment_size;
- rwlock_t *s_mb_avg_fragment_size_locks;
- struct list_head *s_mb_largest_free_orders;
- rwlock_t *s_mb_largest_free_orders_locks;
+ struct xarray *s_mb_avg_fragment_size;
+ struct xarray *s_mb_largest_free_orders;

/* tunables */
unsigned long s_stripe;
@@ -1621,9 +1610,6 @@ struct ext4_sb_info {
atomic_t s_bal_len_goals; /* len goal hits */
atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */
- atomic_t s_bal_p2_aligned_bad_suggestions;
- atomic_t s_bal_goal_fast_bad_suggestions;
- atomic_t s_bal_best_avail_bad_suggestions;
atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
@@ -3455,8 +3441,6 @@ struct ext4_group_info {
void *bb_bitmap;
#endif
struct rw_semaphore alloc_sem;
- struct list_head bb_avg_fragment_size_node;
- struct list_head bb_largest_free_order_node;
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
* bb_counters[3] = 5 means
@@ -3507,23 +3491,28 @@ static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
}

+static inline bool ext4_try_lock_group(struct super_block *sb, ext4_group_t group)
+{
+ if (!spin_trylock(ext4_group_lock_ptr(sb, group)))
+ return false;
+ /*
+ * We're able to grab the lock right away, so drop the lock
+ * contention counter.
+ */
+ atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
+ return true;
+}
+
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{
- spinlock_t *lock = ext4_group_lock_ptr(sb, group);
- if (spin_trylock(lock))
- /*
- * We're able to grab the lock right away, so drop the
- * lock contention counter.
- */
- atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
- else {
+ if (!ext4_try_lock_group(sb, group)) {
/*
* The lock is busy, so bump the contention counter,
* and then wait on the spin lock.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
EXT4_MAX_CONTENTION);
- spin_lock(lock);
+ spin_lock(ext4_group_lock_ptr(sb, group));
}
}

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 05d4a6330086..bd556a3eac19 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3754,10 +3754,6 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle,
>> inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
eof_block = map->m_lblk + map->m_len;
- /*
- * It is safe to convert extent to initialized via explicit
- * zeroout only if extent is fully inside i_size or new_size.
- */
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
@@ -3766,11 +3762,19 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle,
/* Convert to unwritten */
if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
split_flag |= EXT4_EXT_DATA_ENTIRE_VALID1;
- /* Convert to initialized */
- } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
+ /* Split the existing unwritten extent */
+ } else if (flags & (EXT4_GET_BLOCKS_UNWRIT_EXT |
+ EXT4_GET_BLOCKS_CONVERT)) {
+ /*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully inside i_size or new_size.
+ */
split_flag |= ee_block + ee_len <= eof_block ?
EXT4_EXT_MAY_ZEROOUT : 0;
- split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
+ split_flag |= EXT4_EXT_MARK_UNWRIT2;
+ /* Convert to initialized */
+ if (flags & EXT4_GET_BLOCKS_CONVERT)
+ split_flag |= EXT4_EXT_DATA_VALID2;
}
flags |= EXT4_GET_BLOCKS_PRE_IO;
return ext4_split_extent(handle, inode, path, map, split_flag, flags,
@@ -3949,7 +3953,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
/* get_block() before submitting IO, split the extent */
if (flags & EXT4_GET_BLOCKS_PRE_IO) {
path = ext4_split_convert_extents(handle, inode, map, path,
- flags | EXT4_GET_BLOCKS_CONVERT, allocated);
+ flags, allocated);
if (IS_ERR(path))
return path;
/*
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 8eacba6e780a..0f81094fc0db 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -804,8 +804,6 @@ static void test_mb_mark_used(struct kunit *test)
grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb);
grp->bb_largest_free_order = -1;
grp->bb_avg_fragment_size_order = -1;
- INIT_LIST_HEAD(&grp->bb_largest_free_order_node);
- INIT_LIST_HEAD(&grp->bb_avg_fragment_size_node);
mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
for (i = 0; i < TEST_RANGE_COUNT; i++)
test_mb_mark_used_range(test, &e4b, ranges[i].start,
@@ -880,8 +878,6 @@ static void test_mb_free_blocks(struct kunit *test)
grp->bb_free = 0;
grp->bb_largest_free_order = -1;
grp->bb_avg_fragment_size_order = -1;
- INIT_LIST_HEAD(&grp->bb_largest_free_order_node);
- INIT_LIST_HEAD(&grp->bb_avg_fragment_size_node);
memset(bitmap, 0xff, sb->s_blocksize);

mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index edfffd15b295..45e44b6e7238 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -132,25 +132,30 @@
* If "mb_optimize_scan" mount option is set, we maintain in memory group info
* structures in two data structures:
*
- * 1) Array of largest free order lists (sbi->s_mb_largest_free_orders)
+ * 1) Array of largest free order xarrays (sbi->s_mb_largest_free_orders)
*
- * Locking: sbi->s_mb_largest_free_orders_locks(array of rw locks)
+ * Locking: Writers use xa_lock, readers use rcu_read_lock.
*
- * This is an array of lists where the index in the array represents the
+ * This is an array of xarrays where the index in the array represents the
* largest free order in the buddy bitmap of the participating group infos of
- * that list. So, there are exactly MB_NUM_ORDERS(sb) (which means total
- * number of buddy bitmap orders possible) number of lists. Group-infos are
- * placed in appropriate lists.
+ * that xarray. So, there are exactly MB_NUM_ORDERS(sb) (which means total
+ * number of buddy bitmap orders possible) number of xarrays. Group-infos are
+ * placed in appropriate xarrays.
*
- * 2) Average fragment size lists (sbi->s_mb_avg_fragment_size)
+ * 2) Average fragment size xarrays (sbi->s_mb_avg_fragment_size)
*
- * Locking: sbi->s_mb_avg_fragment_size_locks(array of rw locks)
+ * Locking: Writers use xa_lock, readers use rcu_read_lock.
*
- * This is an array of lists where in the i-th list there are groups with
+ * This is an array of xarrays where in the i-th xarray there are groups with
* average fragment size >= 2^i and < 2^(i+1). The average fragment size
* is computed as ext4_group_info->bb_free / ext4_group_info->bb_fragments.
- * Note that we don't bother with a special list for completely empty groups
- * so we only have MB_NUM_ORDERS(sb) lists.
+ * Note that we don't bother with a special xarray for completely empty
+ * groups so we only have MB_NUM_ORDERS(sb) xarrays. Group-infos are placed
+ * in appropriate xarrays.
+ *
+ * In xarray, the index is the block group number, the value is the block group
+ * information, and a non-empty value indicates the block group is present in
+ * the current xarray.
*
* When "mb_optimize_scan" mount option is set, mballoc consults the above data
* structures to decide the order in which groups are to be traversed for
@@ -420,8 +425,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);

-static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
- ext4_group_t group, enum criteria cr);
+static int ext4_mb_scan_group(struct ext4_allocation_context *ac,
+ ext4_group_t group);

static int ext4_try_to_trim_range(struct super_block *sb,
struct ext4_buddy *e4b, ext4_grpblk_t start,
@@ -869,121 +874,165 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
if (new == old)
return;

- if (old >= 0) {
- write_lock(&sbi->s_mb_avg_fragment_size_locks[old]);
- list_del(&grp->bb_avg_fragment_size_node);
- write_unlock(&sbi->s_mb_avg_fragment_size_locks[old]);
- }
+ if (old >= 0)
+ xa_erase(&sbi->s_mb_avg_fragment_size[old], grp->bb_group);

grp->bb_avg_fragment_size_order = new;
if (new >= 0) {
- write_lock(&sbi->s_mb_avg_fragment_size_locks[new]);
- list_add_tail(&grp->bb_avg_fragment_size_node,
- &sbi->s_mb_avg_fragment_size[new]);
- write_unlock(&sbi->s_mb_avg_fragment_size_locks[new]);
+ /*
+ * Cannot use __GFP_NOFAIL because we hold the group lock.
+ * Although allocation for insertion may fails, it's not fatal
+ * as we have linear traversal to fall back on.
+ */
+ int err = xa_insert(&sbi->s_mb_avg_fragment_size[new],
+ grp->bb_group, grp, GFP_ATOMIC);
+ if (err)
+ mb_debug(sb, "insert group: %u to s_mb_avg_fragment_size[%d] failed, err %d",
+ grp->bb_group, new, err);
}
}

+static ext4_group_t ext4_get_allocation_groups_count(
+ struct ext4_allocation_context *ac)
+{
+ ext4_group_t ngroups = ext4_get_groups_count(ac->ac_sb);
+
+ /* non-extent files are limited to low blocks/groups */
+ if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
+ ngroups = EXT4_SB(ac->ac_sb)->s_blockfile_groups;
+
+ /* Pairs with smp_wmb() in ext4_update_super() */
+ smp_rmb();
+
+ return ngroups;
+}
+
+static int ext4_mb_scan_groups_xa_range(struct ext4_allocation_context *ac,
+ struct xarray *xa,
+ ext4_group_t start, ext4_group_t end)
+{
+ struct super_block *sb = ac->ac_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ enum criteria cr = ac->ac_criteria;
+ ext4_group_t ngroups = ext4_get_allocation_groups_count(ac);
+ unsigned long group = start;
+ struct ext4_group_info *grp;
+
+ if (WARN_ON_ONCE(end > ngroups || start >= end))
+ return 0;
+
+ xa_for_each_range(xa, group, grp, start, end - 1) {
+ int err;
+
+ if (sbi->s_mb_stats)
+ atomic64_inc(&sbi->s_bal_cX_groups_considered[cr]);
+
+ err = ext4_mb_scan_group(ac, grp->bb_group);
+ if (err || ac->ac_status != AC_STATUS_CONTINUE)
+ return err;
+
+ cond_resched();
+ }
+
+ return 0;
+}
+
+/*
+ * Find a suitable group of given order from the largest free orders xarray.
+ */
+static inline int
+ext4_mb_scan_groups_largest_free_order_range(struct ext4_allocation_context *ac,
+ int order, ext4_group_t start,
+ ext4_group_t end)
+{
+ struct xarray *xa = &EXT4_SB(ac->ac_sb)->s_mb_largest_free_orders[order];
+
+ if (xa_empty(xa))
+ return 0;
+
+ return ext4_mb_scan_groups_xa_range(ac, xa, start, end);
+}
+
/*
* Choose next group by traversing largest_free_order lists. Updates *new_cr if
* cr level needs an update.
*/
-static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group)
+static int ext4_mb_scan_groups_p2_aligned(struct ext4_allocation_context *ac,
+ ext4_group_t group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_group_info *iter;
int i;
+ int ret = 0;
+ ext4_group_t start, end;

- if (ac->ac_status == AC_STATUS_FOUND)
- return;
-
- if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED))
- atomic_inc(&sbi->s_bal_p2_aligned_bad_suggestions);
-
+ start = group;
+ end = ext4_get_allocation_groups_count(ac);
+wrap_around:
for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
- if (list_empty(&sbi->s_mb_largest_free_orders[i]))
- continue;
- read_lock(&sbi->s_mb_largest_free_orders_locks[i]);
- if (list_empty(&sbi->s_mb_largest_free_orders[i])) {
- read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
- continue;
- }
- list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
- bb_largest_free_order_node) {
- if (sbi->s_mb_stats)
- atomic64_inc(&sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED]);
- if (likely(ext4_mb_good_group(ac, iter->bb_group, CR_POWER2_ALIGNED))) {
- *group = iter->bb_group;
- ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED;
- read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
- return;
- }
- }
- read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
+ ret = ext4_mb_scan_groups_largest_free_order_range(ac, i,
+ start, end);
+ if (ret || ac->ac_status != AC_STATUS_CONTINUE)
+ return ret;
+ }
+ if (start) {
+ end = start;
+ start = 0;
+ goto wrap_around;
}

+ if (sbi->s_mb_stats)
+ atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]);
+
/* Increment cr and search again if no group is found */
- *new_cr = CR_GOAL_LEN_FAST;
+ ac->ac_criteria = CR_GOAL_LEN_FAST;
+ return ret;
}

/*
- * Find a suitable group of given order from the average fragments list.
+ * Find a suitable group of given order from the average fragments xarray.
*/
-static struct ext4_group_info *
-ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int order)
+static int
+ext4_mb_scan_groups_avg_frag_order_range(struct ext4_allocation_context *ac,
+ int order, ext4_group_t start,
+ ext4_group_t end)
{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct list_head *frag_list = &sbi->s_mb_avg_fragment_size[order];
- rwlock_t *frag_list_lock = &sbi->s_mb_avg_fragment_size_locks[order];
- struct ext4_group_info *grp = NULL, *iter;
- enum criteria cr = ac->ac_criteria;
+ struct xarray *xa = &EXT4_SB(ac->ac_sb)->s_mb_avg_fragment_size[order];

- if (list_empty(frag_list))
- return NULL;
- read_lock(frag_list_lock);
- if (list_empty(frag_list)) {
- read_unlock(frag_list_lock);
- return NULL;
- }
- list_for_each_entry(iter, frag_list, bb_avg_fragment_size_node) {
- if (sbi->s_mb_stats)
- atomic64_inc(&sbi->s_bal_cX_groups_considered[cr]);
- if (likely(ext4_mb_good_group(ac, iter->bb_group, cr))) {
- grp = iter;
- break;
- }
- }
- read_unlock(frag_list_lock);
- return grp;
+ if (xa_empty(xa))
+ return 0;
+
+ return ext4_mb_scan_groups_xa_range(ac, xa, start, end);
}

/*
* Choose next group by traversing average fragment size list of suitable
* order. Updates *new_cr if cr level needs an update.
*/
-static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group)
+static int ext4_mb_scan_groups_goal_fast(struct ext4_allocation_context *ac,
+ ext4_group_t group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_group_info *grp = NULL;
- int i;
-
- if (unlikely(ac->ac_flags & EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED)) {
- if (sbi->s_mb_stats)
- atomic_inc(&sbi->s_bal_goal_fast_bad_suggestions);
+ int i, ret = 0;
+ ext4_group_t start, end;
+
+ start = group;
+ end = ext4_get_allocation_groups_count(ac);
+wrap_around:
+ i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
+ for (; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
+ ret = ext4_mb_scan_groups_avg_frag_order_range(ac, i,
+ start, end);
+ if (ret || ac->ac_status != AC_STATUS_CONTINUE)
+ return ret;
}
-
- for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
- i < MB_NUM_ORDERS(ac->ac_sb); i++) {
- grp = ext4_mb_find_good_group_avg_frag_lists(ac, i);
- if (grp) {
- *group = grp->bb_group;
- ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED;
- return;
- }
+ if (start) {
+ end = start;
+ start = 0;
+ goto wrap_around;
}

+ if (sbi->s_mb_stats)
+ atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]);
/*
* CR_BEST_AVAIL_LEN works based on the concept that we have
* a larger normalized goal len request which can be trimmed to
@@ -993,9 +1042,11 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
* See function ext4_mb_normalize_request() (EXT4_MB_HINT_DATA).
*/
if (ac->ac_flags & EXT4_MB_HINT_DATA)
- *new_cr = CR_BEST_AVAIL_LEN;
+ ac->ac_criteria = CR_BEST_AVAIL_LEN;
else
- *new_cr = CR_GOAL_LEN_SLOW;
+ ac->ac_criteria = CR_GOAL_LEN_SLOW;
+
+ return ret;
}

/*
@@ -1007,18 +1058,14 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
* preallocations. However, we make sure that we don't trim the request too
* much and fall to CR_GOAL_LEN_SLOW in that case.
*/
-static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group)
+static int ext4_mb_scan_groups_best_avail(struct ext4_allocation_context *ac,
+ ext4_group_t group)
{
+ int ret = 0;
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_group_info *grp = NULL;
int i, order, min_order;
unsigned long num_stripe_clusters = 0;
-
- if (unlikely(ac->ac_flags & EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED)) {
- if (sbi->s_mb_stats)
- atomic_inc(&sbi->s_bal_best_avail_bad_suggestions);
- }
+ ext4_group_t start, end;

/*
* mb_avg_fragment_size_order() returns order in a way that makes
@@ -1050,6 +1097,9 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
if (1 << min_order < ac->ac_o_ex.fe_len)
min_order = fls(ac->ac_o_ex.fe_len);

+ start = group;
+ end = ext4_get_allocation_groups_count(ac);
+wrap_around:
for (i = order; i >= min_order; i--) {
int frag_order;
/*
@@ -1072,17 +1122,24 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
frag_order = mb_avg_fragment_size_order(ac->ac_sb,
ac->ac_g_ex.fe_len);

- grp = ext4_mb_find_good_group_avg_frag_lists(ac, frag_order);
- if (grp) {
- *group = grp->bb_group;
- ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
- return;
- }
+ ret = ext4_mb_scan_groups_avg_frag_order_range(ac, frag_order,
+ start, end);
+ if (ret || ac->ac_status != AC_STATUS_CONTINUE)
+ return ret;
+ }
+ if (start) {
+ end = start;
+ start = 0;
+ goto wrap_around;
}

/* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */
ac->ac_g_ex.fe_len = ac->ac_orig_goal_len;
- *new_cr = CR_GOAL_LEN_SLOW;
+ if (sbi->s_mb_stats)
+ atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]);
+ ac->ac_criteria = CR_GOAL_LEN_SLOW;
+
+ return ret;
}

static inline int should_optimize_scan(struct ext4_allocation_context *ac)
@@ -1095,59 +1152,78 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
}

/*
- * Return next linear group for allocation.
+ * next linear group for allocation.
*/
-static ext4_group_t
-next_linear_group(ext4_group_t group, ext4_group_t ngroups)
+static void next_linear_group(ext4_group_t *group, ext4_group_t ngroups)
{
/*
* Artificially restricted ngroups for non-extent
* files makes group > ngroups possible on first loop.
*/
- return group + 1 >= ngroups ? 0 : group + 1;
+ *group = *group + 1 >= ngroups ? 0 : *group + 1;
}

-/*
- * ext4_mb_choose_next_group: choose next group for allocation.
- *
- * @ac Allocation Context
- * @new_cr This is an output parameter. If the there is no good group
- * available at current CR level, this field is updated to indicate
- * the new cr level that should be used.
- * @group This is an input / output parameter. As an input it indicates the
- * next group that the allocator intends to use for allocation. As
- * output, this field indicates the next group that should be used as
- * determined by the optimization functions.
- * @ngroups Total number of groups
- */
-static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
- enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+static int ext4_mb_scan_groups_linear(struct ext4_allocation_context *ac,
+ ext4_group_t ngroups, ext4_group_t *start, ext4_group_t count)
{
- *new_cr = ac->ac_criteria;
+ int ret, i;
+ enum criteria cr = ac->ac_criteria;
+ struct super_block *sb = ac->ac_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_group_t group = *start;

- if (!should_optimize_scan(ac)) {
- *group = next_linear_group(*group, ngroups);
- return;
+ for (i = 0; i < count; i++, next_linear_group(&group, ngroups)) {
+ ret = ext4_mb_scan_group(ac, group);
+ if (ret || ac->ac_status != AC_STATUS_CONTINUE)
+ return ret;
+ cond_resched();
}

+ *start = group;
+ if (count == ngroups)
+ ac->ac_criteria++;
+
+ /* Processed all groups and haven't found blocks */
+ if (sbi->s_mb_stats && i == ngroups)
+ atomic64_inc(&sbi->s_bal_cX_failed[cr]);
+
+ return 0;
+}
+
+static int ext4_mb_scan_groups(struct ext4_allocation_context *ac)
+{
+ int ret = 0;
+ ext4_group_t start;
+ struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ ext4_group_t ngroups = ext4_get_allocation_groups_count(ac);
+
+ /* searching for the right group start from the goal value specified */
+ start = ac->ac_g_ex.fe_group;
+ ac->ac_prefetch_grp = start;
+ ac->ac_prefetch_nr = 0;
+
+ if (!should_optimize_scan(ac))
+ return ext4_mb_scan_groups_linear(ac, ngroups, &start, ngroups);
+
/*
* Optimized scanning can return non adjacent groups which can cause
* seek overhead for rotational disks. So try few linear groups before
* trying optimized scan.
*/
- if (ac->ac_groups_linear_remaining) {
- *group = next_linear_group(*group, ngroups);
- ac->ac_groups_linear_remaining--;
- return;
- }
+ if (sbi->s_mb_max_linear_groups)
+ ret = ext4_mb_scan_groups_linear(ac, ngroups, &start,
+ sbi->s_mb_max_linear_groups);
+ if (ret || ac->ac_status != AC_STATUS_CONTINUE)
+ return ret;

- if (*new_cr == CR_POWER2_ALIGNED) {
- ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
- } else if (*new_cr == CR_GOAL_LEN_FAST) {
- ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
- } else if (*new_cr == CR_BEST_AVAIL_LEN) {
- ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
- } else {
+ switch (ac->ac_criteria) {
+ case CR_POWER2_ALIGNED:
+ return ext4_mb_scan_groups_p2_aligned(ac, start);
+ case CR_GOAL_LEN_FAST:
+ return ext4_mb_scan_groups_goal_fast(ac, start);
+ case CR_BEST_AVAIL_LEN:
+ return ext4_mb_scan_groups_best_avail(ac, start);
+ default:
/*
* TODO: For CR_GOAL_LEN_SLOW, we can arrange groups in an
* rb tree sorted by bb_free. But until that happens, we should
@@ -1155,6 +1231,8 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
*/
WARN_ON(1);
}
+
+ return 0;
}

/*
@@ -1175,18 +1253,25 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
if (new == old)
return;

- if (old >= 0 && !list_empty(&grp->bb_largest_free_order_node)) {
- write_lock(&sbi->s_mb_largest_free_orders_locks[old]);
- list_del_init(&grp->bb_largest_free_order_node);
- write_unlock(&sbi->s_mb_largest_free_orders_locks[old]);
+ if (old >= 0) {
+ struct xarray *xa = &sbi->s_mb_largest_free_orders[old];
+
+ if (!xa_empty(xa) && xa_load(xa, grp->bb_group))
+ xa_erase(xa, grp->bb_group);
}

grp->bb_largest_free_order = new;
if (test_opt2(sb, MB_OPTIMIZE_SCAN) && new >= 0 && grp->bb_free) {
- write_lock(&sbi->s_mb_largest_free_orders_locks[new]);
- list_add_tail(&grp->bb_largest_free_order_node,
- &sbi->s_mb_largest_free_orders[new]);
- write_unlock(&sbi->s_mb_largest_free_orders_locks[new]);
+ /*
+ * Cannot use __GFP_NOFAIL because we hold the group lock.
+ * Although allocation for insertion may fails, it's not fatal
+ * as we have linear traversal to fall back on.
+ */
+ int err = xa_insert(&sbi->s_mb_largest_free_orders[new],
+ grp->bb_group, grp, GFP_ATOMIC);
+ if (err)
+ mb_debug(sb, "insert group: %u to s_mb_largest_free_orders[%d] failed, err %d",
+ grp->bb_group, new, err);
}
}

@@ -2582,6 +2667,30 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
}
}

+static void __ext4_mb_scan_group(struct ext4_allocation_context *ac)
+{
+ bool is_stripe_aligned;
+ struct ext4_sb_info *sbi;
+ enum criteria cr = ac->ac_criteria;
+
+ ac->ac_groups_scanned++;
+ if (cr == CR_POWER2_ALIGNED)
+ return ext4_mb_simple_scan_group(ac, ac->ac_e4b);
+
+ sbi = EXT4_SB(ac->ac_sb);
+ is_stripe_aligned = false;
+ if ((sbi->s_stripe >= sbi->s_cluster_ratio) &&
+ !(ac->ac_g_ex.fe_len % EXT4_NUM_B2C(sbi, sbi->s_stripe)))
+ is_stripe_aligned = true;
+
+ if ((cr == CR_GOAL_LEN_FAST || cr == CR_BEST_AVAIL_LEN) &&
+ is_stripe_aligned)
+ ext4_mb_scan_aligned(ac, ac->ac_e4b);
+
+ if (ac->ac_status == AC_STATUS_CONTINUE)
+ ext4_mb_complex_scan_group(ac, ac->ac_e4b);
+}
+
/*
* This is also called BEFORE we load the buddy bitmap.
* Returns either 1 or 0 indicating that the group is either suitable
@@ -2771,6 +2880,37 @@ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
return group;
}

+/*
+ * Batch reads of the block allocation bitmaps to get
+ * multiple READs in flight; limit prefetching at inexpensive
+ * CR, otherwise mballoc can spend a lot of time loading
+ * imperfect groups
+ */
+static void ext4_mb_might_prefetch(struct ext4_allocation_context *ac,
+ ext4_group_t group)
+{
+ struct ext4_sb_info *sbi;
+
+ if (ac->ac_prefetch_grp != group)
+ return;
+
+ sbi = EXT4_SB(ac->ac_sb);
+ if (ext4_mb_cr_expensive(ac->ac_criteria) ||
+ ac->ac_prefetch_ios < sbi->s_mb_prefetch_limit) {
+ unsigned int nr = sbi->s_mb_prefetch;
+
+ if (ext4_has_feature_flex_bg(ac->ac_sb)) {
+ nr = 1 << sbi->s_log_groups_per_flex;
+ nr -= group & (nr - 1);
+ nr = umin(nr, sbi->s_mb_prefetch);
+ }
+
+ ac->ac_prefetch_nr = nr;
+ ac->ac_prefetch_grp = ext4_mb_prefetch(ac->ac_sb, group, nr,
+ &ac->ac_prefetch_ios);
+ }
+}
+
/*
* Prefetching reads the block bitmap into the buffer cache; but we
* need to make sure that the buddy bitmap in the page cache has been
@@ -2804,24 +2944,58 @@ void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
}
}

+static int ext4_mb_scan_group(struct ext4_allocation_context *ac,
+ ext4_group_t group)
+{
+ int ret;
+ struct super_block *sb = ac->ac_sb;
+ enum criteria cr = ac->ac_criteria;
+
+ ext4_mb_might_prefetch(ac, group);
+
+ /* prevent unnecessary buddy loading. */
+ if (cr < CR_ANY_FREE && spin_is_locked(ext4_group_lock_ptr(sb, group)))
+ return 0;
+
+ /* This now checks without needing the buddy page */
+ ret = ext4_mb_good_group_nolock(ac, group, cr);
+ if (ret <= 0) {
+ if (!ac->ac_first_err)
+ ac->ac_first_err = ret;
+ return 0;
+ }
+
+ ret = ext4_mb_load_buddy(sb, group, ac->ac_e4b);
+ if (ret)
+ return ret;
+
+ /* skip busy group */
+ if (cr >= CR_ANY_FREE)
+ ext4_lock_group(sb, group);
+ else if (!ext4_try_lock_group(sb, group))
+ goto out_unload;
+
+ /* We need to check again after locking the block group. */
+ if (unlikely(!ext4_mb_good_group(ac, group, cr)))
+ goto out_unlock;
+
+ __ext4_mb_scan_group(ac);
+
+out_unlock:
+ ext4_unlock_group(sb, group);
+out_unload:
+ ext4_mb_unload_buddy(ac->ac_e4b);
+ return ret;
+}
+
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
- ext4_group_t prefetch_grp = 0, ngroups, group, i;
- enum criteria new_cr, cr = CR_GOAL_LEN_FAST;
- int err = 0, first_err = 0;
- unsigned int nr = 0, prefetch_ios = 0;
- struct ext4_sb_info *sbi;
- struct super_block *sb;
+ ext4_group_t i;
+ int err = 0;
+ struct super_block *sb = ac->ac_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_buddy e4b;
- int lost;
-
- sb = ac->ac_sb;
- sbi = EXT4_SB(sb);
- ngroups = ext4_get_groups_count(sb);
- /* non-extent files are limited to low blocks/groups */
- if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
- ngroups = sbi->s_blockfile_groups;

BUG_ON(ac->ac_status == AC_STATUS_FOUND);

@@ -2867,107 +3041,21 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* start with CR_GOAL_LEN_FAST, unless it is power of 2
* aligned, in which case let's do that faster approach first.
*/
+ ac->ac_criteria = CR_GOAL_LEN_FAST;
if (ac->ac_2order)
- cr = CR_POWER2_ALIGNED;
-repeat:
- for (; cr < EXT4_MB_NUM_CRS && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
- ac->ac_criteria = cr;
- /*
- * searching for the right group start
- * from the goal value specified
- */
- group = ac->ac_g_ex.fe_group;
- ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
- prefetch_grp = group;
- nr = 0;
+ ac->ac_criteria = CR_POWER2_ALIGNED;

- for (i = 0, new_cr = cr; i < ngroups; i++,
- ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
- int ret = 0;
-
- cond_resched();
- if (new_cr != cr) {
- cr = new_cr;
- goto repeat;
- }
-
- /*
- * Batch reads of the block allocation bitmaps
- * to get multiple READs in flight; limit
- * prefetching at inexpensive CR, otherwise mballoc
- * can spend a lot of time loading imperfect groups
- */
- if ((prefetch_grp == group) &&
- (ext4_mb_cr_expensive(cr) ||
- prefetch_ios < sbi->s_mb_prefetch_limit)) {
- nr = sbi->s_mb_prefetch;
- if (ext4_has_feature_flex_bg(sb)) {
- nr = 1 << sbi->s_log_groups_per_flex;
- nr -= group & (nr - 1);
- nr = min(nr, sbi->s_mb_prefetch);
- }
- prefetch_grp = ext4_mb_prefetch(sb, group,
- nr, &prefetch_ios);
- }
-
- /* This now checks without needing the buddy page */
- ret = ext4_mb_good_group_nolock(ac, group, cr);
- if (ret <= 0) {
- if (!first_err)
- first_err = ret;
- continue;
- }
-
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err)
- goto out;
-
- ext4_lock_group(sb, group);
-
- /*
- * We need to check again after locking the
- * block group
- */
- ret = ext4_mb_good_group(ac, group, cr);
- if (ret == 0) {
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
- continue;
- }
-
- ac->ac_groups_scanned++;
- if (cr == CR_POWER2_ALIGNED)
- ext4_mb_simple_scan_group(ac, &e4b);
- else {
- bool is_stripe_aligned =
- (sbi->s_stripe >=
- sbi->s_cluster_ratio) &&
- !(ac->ac_g_ex.fe_len %
- EXT4_NUM_B2C(sbi, sbi->s_stripe));
-
- if ((cr == CR_GOAL_LEN_FAST ||
- cr == CR_BEST_AVAIL_LEN) &&
- is_stripe_aligned)
- ext4_mb_scan_aligned(ac, &e4b);
-
- if (ac->ac_status == AC_STATUS_CONTINUE)
- ext4_mb_complex_scan_group(ac, &e4b);
- }
-
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
-
- if (ac->ac_status != AC_STATUS_CONTINUE)
- break;
- }
- /* Processed all groups and haven't found blocks */
- if (sbi->s_mb_stats && i == ngroups)
- atomic64_inc(&sbi->s_bal_cX_failed[cr]);
+ ac->ac_e4b = &e4b;
+ ac->ac_prefetch_ios = 0;
+ ac->ac_first_err = 0;
+repeat:
+ while (ac->ac_criteria < EXT4_MB_NUM_CRS) {
+ err = ext4_mb_scan_groups(ac);
+ if (err)
+ goto out;

- if (i == ngroups && ac->ac_criteria == CR_BEST_AVAIL_LEN)
- /* Reset goal length to original goal length before
- * falling into CR_GOAL_LEN_SLOW */
- ac->ac_g_ex.fe_len = ac->ac_orig_goal_len;
+ if (ac->ac_status != AC_STATUS_CONTINUE)
+ break;
}

if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
@@ -2978,6 +3066,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
*/
ext4_mb_try_best_found(ac, &e4b);
if (ac->ac_status != AC_STATUS_FOUND) {
+ int lost;
+
/*
* Someone more lucky has already allocated it.
* The only thing we can do is just take first
@@ -2993,7 +3083,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ac->ac_b_ex.fe_len = 0;
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_flags |= EXT4_MB_HINT_FIRST;
- cr = CR_ANY_FREE;
+ ac->ac_criteria = CR_ANY_FREE;
goto repeat;
}
}
@@ -3001,15 +3091,15 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND)
atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]);
out:
- if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
- err = first_err;
+ if (!err && ac->ac_status != AC_STATUS_FOUND && ac->ac_first_err)
+ err = ac->ac_first_err;

mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
- ac->ac_flags, cr, err);
+ ac->ac_flags, ac->ac_criteria, err);

- if (nr)
- ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
+ if (ac->ac_prefetch_nr)
+ ext4_mb_prefetch_fini(sb, ac->ac_prefetch_grp, ac->ac_prefetch_nr);

return err;
}
@@ -3134,8 +3224,6 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
atomic_read(&sbi->s_bal_cX_ex_scanned[CR_POWER2_ALIGNED]));
seq_printf(seq, "\t\tuseless_loops: %llu\n",
atomic64_read(&sbi->s_bal_cX_failed[CR_POWER2_ALIGNED]));
- seq_printf(seq, "\t\tbad_suggestions: %u\n",
- atomic_read(&sbi->s_bal_p2_aligned_bad_suggestions));

/* CR_GOAL_LEN_FAST stats */
seq_puts(seq, "\tcr_goal_fast_stats:\n");
@@ -3148,8 +3236,6 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
atomic_read(&sbi->s_bal_cX_ex_scanned[CR_GOAL_LEN_FAST]));
seq_printf(seq, "\t\tuseless_loops: %llu\n",
atomic64_read(&sbi->s_bal_cX_failed[CR_GOAL_LEN_FAST]));
- seq_printf(seq, "\t\tbad_suggestions: %u\n",
- atomic_read(&sbi->s_bal_goal_fast_bad_suggestions));

/* CR_BEST_AVAIL_LEN stats */
seq_puts(seq, "\tcr_best_avail_stats:\n");
@@ -3163,8 +3249,6 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
atomic_read(&sbi->s_bal_cX_ex_scanned[CR_BEST_AVAIL_LEN]));
seq_printf(seq, "\t\tuseless_loops: %llu\n",
atomic64_read(&sbi->s_bal_cX_failed[CR_BEST_AVAIL_LEN]));
- seq_printf(seq, "\t\tbad_suggestions: %u\n",
- atomic_read(&sbi->s_bal_best_avail_bad_suggestions));

/* CR_GOAL_LEN_SLOW stats */
seq_puts(seq, "\tcr_goal_slow_stats:\n");
@@ -3240,6 +3324,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
unsigned long position = ((unsigned long) v);
struct ext4_group_info *grp;
unsigned int count;
+ unsigned long idx;

position--;
if (position >= MB_NUM_ORDERS(sb)) {
@@ -3248,11 +3333,8 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
seq_puts(seq, "avg_fragment_size_lists:\n");

count = 0;
- read_lock(&sbi->s_mb_avg_fragment_size_locks[position]);
- list_for_each_entry(grp, &sbi->s_mb_avg_fragment_size[position],
- bb_avg_fragment_size_node)
+ xa_for_each(&sbi->s_mb_avg_fragment_size[position], idx, grp)
count++;
- read_unlock(&sbi->s_mb_avg_fragment_size_locks[position]);
seq_printf(seq, "\tlist_order_%u_groups: %u\n",
(unsigned int)position, count);
return 0;
@@ -3264,11 +3346,8 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
seq_puts(seq, "max_free_order_lists:\n");
}
count = 0;
- read_lock(&sbi->s_mb_largest_free_orders_locks[position]);
- list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
- bb_largest_free_order_node)
+ xa_for_each(&sbi->s_mb_largest_free_orders[position], idx, grp)
count++;
- read_unlock(&sbi->s_mb_largest_free_orders_locks[position]);
seq_printf(seq, "\tlist_order_%u_groups: %u\n",
(unsigned int)position, count);

@@ -3388,8 +3467,6 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root = RB_ROOT;
- INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
- INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
meta_group_info[i]->bb_avg_fragment_size_order = -1; /* uninit */
meta_group_info[i]->bb_group = group;
@@ -3599,6 +3676,30 @@ static void ext4_discard_work(struct work_struct *work)
ext4_mb_unload_buddy(&e4b);
}

+static inline void ext4_mb_avg_fragment_size_destroy(struct ext4_sb_info *sbi)
+{
+ if (!sbi->s_mb_avg_fragment_size)
+ return;
+
+ for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
+ xa_destroy(&sbi->s_mb_avg_fragment_size[i]);
+
+ kfree(sbi->s_mb_avg_fragment_size);
+ sbi->s_mb_avg_fragment_size = NULL;
+}
+
+static inline void ext4_mb_largest_free_orders_destroy(struct ext4_sb_info *sbi)
+{
+ if (!sbi->s_mb_largest_free_orders)
+ return;
+
+ for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
+ xa_destroy(&sbi->s_mb_largest_free_orders[i]);
+
+ kfree(sbi->s_mb_largest_free_orders);
+ sbi->s_mb_largest_free_orders = NULL;
+}
+
int ext4_mb_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3644,41 +3745,24 @@ int ext4_mb_init(struct super_block *sb)
} while (i < MB_NUM_ORDERS(sb));

sbi->s_mb_avg_fragment_size =
- kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
+ kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct xarray),
GFP_KERNEL);
if (!sbi->s_mb_avg_fragment_size) {
ret = -ENOMEM;
goto out;
}
- sbi->s_mb_avg_fragment_size_locks =
- kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
- GFP_KERNEL);
- if (!sbi->s_mb_avg_fragment_size_locks) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
- INIT_LIST_HEAD(&sbi->s_mb_avg_fragment_size[i]);
- rwlock_init(&sbi->s_mb_avg_fragment_size_locks[i]);
- }
+ for (i = 0; i < MB_NUM_ORDERS(sb); i++)
+ xa_init(&sbi->s_mb_avg_fragment_size[i]);
+
sbi->s_mb_largest_free_orders =
- kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
+ kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct xarray),
GFP_KERNEL);
if (!sbi->s_mb_largest_free_orders) {
ret = -ENOMEM;
goto out;
}
- sbi->s_mb_largest_free_orders_locks =
- kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
- GFP_KERNEL);
- if (!sbi->s_mb_largest_free_orders_locks) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
- INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
- rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
- }
+ for (i = 0; i < MB_NUM_ORDERS(sb); i++)
+ xa_init(&sbi->s_mb_largest_free_orders[i]);

spin_lock_init(&sbi->s_md_lock);
sbi->s_mb_free_pending = 0;
@@ -3751,10 +3835,8 @@ int ext4_mb_init(struct super_block *sb)
free_percpu(sbi->s_locality_groups);
sbi->s_locality_groups = NULL;
out:
- kfree(sbi->s_mb_avg_fragment_size);
- kfree(sbi->s_mb_avg_fragment_size_locks);
- kfree(sbi->s_mb_largest_free_orders);
- kfree(sbi->s_mb_largest_free_orders_locks);
+ ext4_mb_avg_fragment_size_destroy(sbi);
+ ext4_mb_largest_free_orders_destroy(sbi);
kfree(sbi->s_mb_offsets);
sbi->s_mb_offsets = NULL;
kfree(sbi->s_mb_maxs);
@@ -3821,10 +3903,8 @@ void ext4_mb_release(struct super_block *sb)
kvfree(group_info);
rcu_read_unlock();
}
- kfree(sbi->s_mb_avg_fragment_size);
- kfree(sbi->s_mb_avg_fragment_size_locks);
- kfree(sbi->s_mb_largest_free_orders);
- kfree(sbi->s_mb_largest_free_orders_locks);
+ ext4_mb_avg_fragment_size_destroy(sbi);
+ ext4_mb_largest_free_orders_destroy(sbi);
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
iput(sbi->s_buddy_cache);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index f8280de3e882..15a049f05d04 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -192,8 +192,13 @@ struct ext4_allocation_context {
*/
ext4_grpblk_t ac_orig_goal_len;

+ ext4_group_t ac_prefetch_grp;
+ unsigned int ac_prefetch_ios;
+ unsigned int ac_prefetch_nr;
+
+ int ac_first_err;
+
__u32 ac_flags; /* allocation hints */
- __u32 ac_groups_linear_remaining;
__u16 ac_groups_scanned;
__u16 ac_found;
__u16 ac_cX_found[EXT4_MB_NUM_CRS];
@@ -204,6 +209,8 @@ struct ext4_allocation_context {
__u8 ac_2order; /* if request is to allocate 2^N blocks and
* N > 0, the field stores N, otherwise 0 */
__u8 ac_op; /* operation, for history only */
+
+ struct ext4_buddy *ac_e4b;
struct folio *ac_bitmap_folio;
struct folio *ac_buddy_folio;
struct ext4_prealloc_space *ac_pa;
diff --git a/fs/namespace.c b/fs/namespace.c
index 3869d2b32ac2..df6d6a2c6b89 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1590,23 +1590,33 @@ static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_mounts *p = m->private;
+ struct mount *mnt;

down_read(&namespace_sem);

- return mnt_find_id_at(p->ns, *pos);
+ mnt = mnt_find_id_at(p->ns, *pos);
+ if (mnt)
+ *pos = mnt->mnt_id_unique;
+ return mnt;
}

static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct mount *next = NULL, *mnt = v;
+ struct mount *mnt = v;
struct rb_node *node = rb_next(&mnt->mnt_node);

- ++*pos;
if (node) {
- next = node_to_mount(node);
+ struct mount *next = node_to_mount(node);
*pos = next->mnt_id_unique;
+ return next;
}
- return next;
+
+ /*
+ * No more mounts. Set pos past current mount's ID so that if
+ * iteration restarts, mnt_find_id_at() returns NULL.
+ */
+ *pos = mnt->mnt_id_unique + 1;
+ return NULL;
}

static void m_stop(struct seq_file *m, void *v)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index eb012f943912..d80be50dad76 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1724,7 +1724,7 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
scope = nla_data(attr);
}

- ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope);
+ ret = nfsd_svc(nrpools, nthreads, net, current_cred(), scope);
if (ret > 0)
ret = 0;
out_unlock:
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 3c83bda8298e..823b4e1ce2d0 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2197,7 +2197,6 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
/* find first : in payload */
payload = upayload->data;
delim = strnchr(payload, upayload->datalen, ':');
- cifs_dbg(FYI, "payload=%s\n", payload);
if (!delim) {
cifs_dbg(FYI, "Unable to find ':' in payload (datalen=%d)\n",
upayload->datalen);
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 1c65787657dd..cac14c7b3fbc 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -315,7 +315,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
cfile->fid.volatile_fid,
SMB_FIND_FILE_POSIX_INFO,
SMB2_O_INFO_FILE, 0,
- sizeof(struct smb311_posix_qinfo *) +
+ sizeof(struct smb311_posix_qinfo) +
(PATH_MAX * 2) +
(sizeof(struct smb_sid) * 2), 0, NULL);
} else {
@@ -325,7 +325,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
COMPOUND_FID,
SMB_FIND_FILE_POSIX_INFO,
SMB2_O_INFO_FILE, 0,
- sizeof(struct smb311_posix_qinfo *) +
+ sizeof(struct smb311_posix_qinfo) +
(PATH_MAX * 2) +
(sizeof(struct smb_sid) * 2), 0, NULL);
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 23e61ecf1b51..b6821815248e 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -1666,19 +1666,17 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
is_binding = (ses->ses_status == SES_GOOD);
spin_unlock(&ses->ses_lock);

- /* keep session key if binding */
- if (!is_binding) {
- kfree_sensitive(ses->auth_key.response);
- ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
- GFP_KERNEL);
- if (!ses->auth_key.response) {
- cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory\n",
- msg->sesskey_len);
- rc = -ENOMEM;
- goto out_put_spnego_key;
- }
- ses->auth_key.len = msg->sesskey_len;
+ kfree_sensitive(ses->auth_key.response);
+ ses->auth_key.response = kmemdup(msg->data,
+ msg->sesskey_len,
+ GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cifs_dbg(VFS, "%s: can't allocate (%u bytes) memory\n",
+ __func__, msg->sesskey_len);
+ rc = -ENOMEM;
+ goto out_put_spnego_key;
}
+ ses->auth_key.len = msg->sesskey_len;

sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
sess_data->iov[1].iov_len = msg->secblob_len;
@@ -3935,7 +3933,7 @@ int
SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, struct smb311_posix_qinfo *data, u32 *plen)
{
- size_t output_len = sizeof(struct smb311_posix_qinfo *) +
+ size_t output_len = sizeof(struct smb311_posix_qinfo) +
(sizeof(struct smb_sid) * 2) + (PATH_MAX * 2);
*plen = 0;

diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index a77e5a489b1c..98c27dda2410 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1026,16 +1026,21 @@ cifs_cancelled_callback(struct mid_q_entry *mid)
}

/*
- * Return a channel (master if none) of @ses that can be used to send
- * regular requests.
+ * cifs_pick_channel - pick an eligible channel for network operations
*
- * If we are currently binding a new channel (negprot/sess.setup),
- * return the new incomplete channel.
+ * @ses: session reference
+ *
+ * Select an eligible channel (not terminating and not marked as needing
+ * reconnect), preferring the least loaded one. If no eligible channel is
+ * found, fall back to the primary channel (index 0).
+ *
+ * Return: TCP_Server_Info pointer for the chosen channel, or NULL if @ses is
+ * NULL.
*/
struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
{
uint index = 0;
- unsigned int min_in_flight = UINT_MAX, max_in_flight = 0;
+ unsigned int min_in_flight = UINT_MAX;
struct TCP_Server_Info *server = NULL;
int i, start, cur;

@@ -1065,14 +1070,8 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
min_in_flight = server->in_flight;
index = cur;
}
- if (server->in_flight > max_in_flight)
- max_in_flight = server->in_flight;
}

- /* if all channels are equally loaded, fall back to round-robin */
- if (min_in_flight == max_in_flight)
- index = (uint)start % ses->chan_count;
-
server = ses->chans[index].server;
spin_unlock(&ses->chan_lock);

diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 66198ed26aec..352cf9e47ebe 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -32,12 +32,14 @@ static void free_channel_list(struct ksmbd_session *sess)
struct channel *chann;
unsigned long index;

+ down_write(&sess->chann_lock);
xa_for_each(&sess->ksmbd_chann_list, index, chann) {
xa_erase(&sess->ksmbd_chann_list, index);
kfree(chann);
}

xa_destroy(&sess->ksmbd_chann_list);
+ up_write(&sess->chann_lock);
}

static void __session_rpc_close(struct ksmbd_session *sess,
@@ -220,7 +222,9 @@ static int ksmbd_chann_del(struct ksmbd_conn *conn, struct ksmbd_session *sess)
{
struct channel *chann;

+ down_write(&sess->chann_lock);
chann = xa_erase(&sess->ksmbd_chann_list, (long)conn);
+ up_write(&sess->chann_lock);
if (!chann)
return -ENOENT;

@@ -454,6 +458,7 @@ static struct ksmbd_session *__session_create(int protocol)
rwlock_init(&sess->tree_conns_lock);
atomic_set(&sess->refcnt, 2);
init_rwsem(&sess->rpc_lock);
+ init_rwsem(&sess->chann_lock);

ret = __init_smb2_session(sess);
if (ret)
diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h
index c5749d6ec715..cba7f688f6b5 100644
--- a/fs/smb/server/mgmt/user_session.h
+++ b/fs/smb/server/mgmt/user_session.h
@@ -49,6 +49,7 @@ struct ksmbd_session {
char sess_key[CIFS_KEY_SIZE];

struct hlist_node hlist;
+ struct rw_semaphore chann_lock;
struct xarray ksmbd_chann_list;
struct xarray tree_conns;
struct ida tree_conn_ida;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index ac8248479cba..0d7ba57c1ca6 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -78,7 +78,13 @@ static inline bool check_session_id(struct ksmbd_conn *conn, u64 id)

struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn *conn)
{
- return xa_load(&sess->ksmbd_chann_list, (long)conn);
+ struct channel *chann;
+
+ down_read(&sess->chann_lock);
+ chann = xa_load(&sess->ksmbd_chann_list, (long)conn);
+ up_read(&sess->chann_lock);
+
+ return chann;
}

/**
@@ -1560,8 +1566,10 @@ static int ntlm_authenticate(struct ksmbd_work *work,
return -ENOMEM;

chann->conn = conn;
+ down_write(&sess->chann_lock);
old = xa_store(&sess->ksmbd_chann_list, (long)conn, chann,
KSMBD_DEFAULT_GFP);
+ up_write(&sess->chann_lock);
if (xa_is_err(old)) {
kfree(chann);
return xa_err(old);
@@ -1592,7 +1600,7 @@ static int krb5_authenticate(struct ksmbd_work *work,
struct ksmbd_conn *conn = work->conn;
struct ksmbd_session *sess = work->sess;
char *in_blob, *out_blob;
- struct channel *chann = NULL;
+ struct channel *chann = NULL, *old;
u64 prev_sess_id;
int in_len, out_len;
int retval;
@@ -1658,7 +1666,14 @@ static int krb5_authenticate(struct ksmbd_work *work,
return -ENOMEM;

chann->conn = conn;
- xa_store(&sess->ksmbd_chann_list, (long)conn, chann, KSMBD_DEFAULT_GFP);
+ down_write(&sess->chann_lock);
+ old = xa_store(&sess->ksmbd_chann_list, (long)conn,
+ chann, KSMBD_DEFAULT_GFP);
+ up_write(&sess->chann_lock);
+ if (xa_is_err(old)) {
+ kfree(chann);
+ return xa_err(old);
+ }
}
}

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 5062326d0efb..25bf038b880a 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -340,6 +340,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
if (unlikely(length < 0))
return -EIO;

+ if (unlikely(*offset < 0 || *offset >= SQUASHFS_METADATA_SIZE))
+ return -EIO;
+
while (length) {
entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
if (entry->error) {
diff --git a/fs/xattr.c b/fs/xattr.c
index 0191ac2590e0..23b9e4b1f3dd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -697,9 +697,9 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
int error;

CLASS(fd, f)(fd);
- if (!fd_file(f))
- return -EBADF;

+ if (fd_empty(f))
+ return -EBADF;
audit_file(fd_file(f));
error = setxattr_copy(name, &ctx);
if (error)
@@ -809,16 +809,13 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
void __user *, value, size_t, size)
{
- struct fd f = fdget(fd);
- ssize_t error = -EBADF;
+ CLASS(fd, f)(fd);

- if (!fd_file(f))
- return error;
+ if (fd_empty(f))
+ return -EBADF;
audit_file(fd_file(f));
- error = getxattr(file_mnt_idmap(fd_file(f)), fd_file(f)->f_path.dentry,
+ return getxattr(file_mnt_idmap(fd_file(f)), fd_file(f)->f_path.dentry,
name, value, size);
- fdput(f);
- return error;
}

/*
@@ -885,15 +882,12 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,

SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
{
- struct fd f = fdget(fd);
- ssize_t error = -EBADF;
+ CLASS(fd, f)(fd);

- if (!fd_file(f))
- return error;
+ if (fd_empty(f))
+ return -EBADF;
audit_file(fd_file(f));
- error = listxattr(fd_file(f)->f_path.dentry, list, size);
- fdput(f);
- return error;
+ return listxattr(fd_file(f)->f_path.dentry, list, size);
}

/*
@@ -950,12 +944,12 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,

SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
char kname[XATTR_NAME_MAX + 1];
- int error = -EBADF;
+ int error;

- if (!fd_file(f))
- return error;
+ if (fd_empty(f))
+ return -EBADF;
audit_file(fd_file(f));

error = strncpy_from_user(kname, name, sizeof(kname));
@@ -970,7 +964,6 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
fd_file(f)->f_path.dentry, kname);
mnt_drop_write_file(fd_file(f));
}
- fdput(f);
return error;
}

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0bae61a15b60..abf8923f8fc5 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -32,6 +32,9 @@ static inline void ima_appraise_parse_cmdline(void) {}

#ifdef CONFIG_IMA_KEXEC
extern void ima_add_kexec_buffer(struct kimage *image);
+extern void ima_kexec_post_load(struct kimage *image);
+#else
+static inline void ima_kexec_post_load(struct kimage *image) {}
#endif

#else
@@ -66,6 +69,7 @@ static inline int ima_measure_critical_data(const char *event_label,
#ifdef CONFIG_HAVE_IMA_KEXEC
int __init ima_free_kexec_buffer(void);
int __init ima_get_kexec_buffer(void **addr, size_t *size);
+int ima_validate_range(phys_addr_t phys, size_t size);
#endif

#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index 35227d47cfc9..dc272b514a01 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -16,22 +16,26 @@
*/
#define INDIRECT_CALL_1(f, f1, ...) \
({ \
- likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \
+ typeof(f) __f1 = (f); \
+ likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__); \
})
#define INDIRECT_CALL_2(f, f2, f1, ...) \
({ \
- likely(f == f2) ? f2(__VA_ARGS__) : \
- INDIRECT_CALL_1(f, f1, __VA_ARGS__); \
+ typeof(f) __f2 = (f); \
+ likely(__f2 == f2) ? f2(__VA_ARGS__) : \
+ INDIRECT_CALL_1(__f2, f1, __VA_ARGS__); \
})
#define INDIRECT_CALL_3(f, f3, f2, f1, ...) \
({ \
- likely(f == f3) ? f3(__VA_ARGS__) : \
- INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \
+ typeof(f) __f3 = (f); \
+ likely(__f3 == f3) ? f3(__VA_ARGS__) : \
+ INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__); \
})
#define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \
({ \
- likely(f == f4) ? f4(__VA_ARGS__) : \
- INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \
+ typeof(f) __f4 = (f); \
+ likely(__f4 == f4) ? f4(__VA_ARGS__) : \
+ INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__); \
})

#define INDIRECT_CALLABLE_DECLARE(f) f
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6e9fb667a1c5..5385349f0b8a 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -249,6 +249,38 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start);
int adjust_resource(struct resource *res, resource_size_t start,
resource_size_t size);
resource_size_t resource_alignment(struct resource *res);
+
+/**
+ * resource_set_size - Calculate resource end address from size and start
+ * @res: Resource descriptor
+ * @size: Size of the resource
+ *
+ * Calculate the end address for @res based on @size.
+ *
+ * Note: The start address of @res must be set when calling this function.
+ * Prefer resource_set_range() if setting both the start address and @size.
+ */
+static inline void resource_set_size(struct resource *res, resource_size_t size)
+{
+ res->end = res->start + size - 1;
+}
+
+/**
+ * resource_set_range - Set resource start and end addresses
+ * @res: Resource descriptor
+ * @start: Start address for the resource
+ * @size: Size of the resource
+ *
+ * Set @res start address and calculate the end address based on @size.
+ */
+static inline void resource_set_range(struct resource *res,
+ resource_size_t start,
+ resource_size_t size)
+{
+ res->start = start;
+ resource_set_size(res, size);
+}
+
static inline resource_size_t resource_size(const struct resource *res)
{
return res->end - res->start + 1;
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index f0e9f8eda7a3..7d6b12f8b8d0 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -467,13 +467,19 @@ extern bool kexec_file_dbg_print;
#define kexec_dprintk(fmt, arg...) \
do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0)

+extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size);
+extern void kimage_unmap_segment(void *buffer);
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
+struct kimage;
static inline void __crash_kexec(struct pt_regs *regs) { }
static inline void crash_kexec(struct pt_regs *regs) { }
static inline int kexec_should_crash(struct task_struct *p) { return 0; }
static inline int kexec_crash_loaded(void) { return 0; }
+static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size)
+{ return NULL; }
+static inline void kimage_unmap_segment(void *buffer) { }
#define kexec_in_progress false
#endif /* CONFIG_KEXEC_CORE */

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1983a98e3d67..14c835f5d661 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -155,7 +155,6 @@ enum {
ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */
ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */
ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */
- ATA_DFLAG_ZAC = (1 << 30), /* ZAC device */

ATA_DFLAG_FEATURES_MASK = (ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \
ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \
@@ -898,6 +897,9 @@ struct ata_port {
u64 qc_active;
int nr_active_links; /* #links with active qcs */

+ struct work_struct deferred_qc_work;
+ struct ata_queued_cmd *deferred_qc;
+
struct ata_link link; /* host default link */
struct ata_link *slave_link; /* see ata_slave_link_init() */

diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
index 734694912ef7..c6eea9afb943 100644
--- a/include/linux/mailbox_client.h
+++ b/include/linux/mailbox_client.h
@@ -7,8 +7,8 @@
#ifndef __MAILBOX_CLIENT_H
#define __MAILBOX_CLIENT_H

-#include <linux/of.h>
#include <linux/device.h>
+#include <linux/of.h>

struct mbox_chan;

diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
index 6fee33cb52f5..b91379922cb3 100644
--- a/include/linux/mailbox_controller.h
+++ b/include/linux/mailbox_controller.h
@@ -3,11 +3,11 @@
#ifndef __MAILBOX_CONTROLLER_H
#define __MAILBOX_CONTROLLER_H

+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/hrtimer.h>
#include <linux/of.h>
#include <linux/types.h>
-#include <linux/hrtimer.h>
-#include <linux/device.h>
-#include <linux/completion.h>

struct mbox_chan;

@@ -66,6 +66,7 @@ struct mbox_chan_ops {
* no interrupt rises. Ignored if 'txdone_irq' is set.
* @txpoll_period: If 'txdone_poll' is in effect, the API polls for
* last TX's status after these many millisecs
+ * @fw_xlate: Controller driver specific mapping of channel via fwnode
* @of_xlate: Controller driver specific mapping of channel via DT
* @poll_hrt: API private. hrtimer used to poll for TXDONE on all
* channels.
@@ -79,6 +80,8 @@ struct mbox_controller {
bool txdone_irq;
bool txdone_poll;
unsigned txpoll_period;
+ struct mbox_chan *(*fw_xlate)(struct mbox_controller *mbox,
+ const struct fwnode_reference_args *sp);
struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
const struct of_phandle_args *sp);
/* Internal to API */
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 42ef06136bd1..de8cc3658220 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -32,11 +32,43 @@ pci_epc_interface_string(enum pci_epc_interface_type type)
}
}

+/**
+ * struct pci_epc_map - information about EPC memory for mapping a RC PCI
+ * address range
+ * @pci_addr: start address of the RC PCI address range to map
+ * @pci_size: size of the RC PCI address range mapped from @pci_addr
+ * @map_pci_addr: RC PCI address used as the first address mapped (may be lower
+ * than @pci_addr)
+ * @map_size: size of the controller memory needed for mapping the RC PCI address
+ * range @pci_addr..@pci_addr+@pci_size
+ * @phys_base: base physical address of the allocated EPC memory for mapping the
+ * RC PCI address range
+ * @phys_addr: physical address at which @pci_addr is mapped
+ * @virt_base: base virtual address of the allocated EPC memory for mapping the
+ * RC PCI address range
+ * @virt_addr: virtual address at which @pci_addr is mapped
+ */
+struct pci_epc_map {
+ u64 pci_addr;
+ size_t pci_size;
+
+ u64 map_pci_addr;
+ size_t map_size;
+
+ phys_addr_t phys_base;
+ phys_addr_t phys_addr;
+ void __iomem *virt_base;
+ void __iomem *virt_addr;
+};
+
/**
* struct pci_epc_ops - set of function pointers for performing EPC operations
* @write_header: ops to populate configuration space header
* @set_bar: ops to configure the BAR
* @clear_bar: ops to reset the BAR
+ * @align_addr: operation to get the mapping address, mapping size and offset
+ * into a controller memory window needed to map an RC PCI address
+ * region
* @map_addr: ops to map CPU address to PCI address
* @unmap_addr: ops to unmap CPU address and PCI address
* @set_msi: ops to set the requested number of MSI interrupts in the MSI
@@ -61,6 +93,8 @@ struct pci_epc_ops {
struct pci_epf_bar *epf_bar);
void (*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_bar *epf_bar);
+ u64 (*align_addr)(struct pci_epc *epc, u64 pci_addr, size_t *size,
+ size_t *offset);
int (*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t addr, u64 pci_addr, size_t size);
void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
@@ -278,6 +312,10 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
phys_addr_t *phys_addr, size_t size);
void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr,
void __iomem *virt_addr, size_t size);
+int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ u64 pci_addr, size_t pci_size, struct pci_epc_map *map);
+void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ struct pci_epc_map *map);

#else
static inline void pci_epc_init_notify(struct pci_epc *epc)
diff --git a/include/linux/platform_data/max6639.h b/include/linux/platform_data/max6639.h
deleted file mode 100644
index 65bfdb4fdc15..000000000000
--- a/include/linux/platform_data/max6639.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_MAX6639_H
-#define _LINUX_MAX6639_H
-
-#include <linux/types.h>
-
-/* platform data for the MAX6639 temperature sensor and fan control */
-
-struct max6639_platform_data {
- bool pwm_polarity; /* Polarity low (0) or high (1, default) */
- int ppr; /* Pulses per rotation 1..4 (default == 2) */
- int rpm_range; /* 2000, 4000 (default), 8000 or 16000 */
-};
-
-#endif /* _LINUX_MAX6639_H */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d8424abcf726..03934249b91e 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -244,6 +244,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node);

int ring_buffer_map(struct trace_buffer *buffer, int cpu,
struct vm_area_struct *vma);
+void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
#endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 59c2695e12e7..23642bb1a103 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -427,7 +427,7 @@ enum wq_consts {
/*
* System-wide workqueues which are always present.
*
- * system_wq is the one used by schedule[_delayed]_work[_on]().
+ * system_percpu_wq is the one used by schedule[_delayed]_work[_on]().
* Multi-CPU multi-threaded. There are users which expect relatively
* short queue flush time. Don't queue works which can run for too
* long.
@@ -438,7 +438,7 @@ enum wq_consts {
* system_long_wq is similar to system_wq but may host long running
* works. Queue flushing might take relatively long.
*
- * system_unbound_wq is unbound workqueue. Workers are not bound to
+ * system_dfl_wq is unbound workqueue. Workers are not bound to
* any specific CPU, not concurrency managed, and all queued works are
* executed immediately as long as max_active limit is not reached and
* resources are available.
@@ -455,10 +455,12 @@ enum wq_consts {
* system_bh[_highpri]_wq are convenience interface to softirq. BH work items
* are executed in the queueing CPU's BH context in the queueing order.
*/
-extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */
+extern struct workqueue_struct *system_percpu_wq;
extern struct workqueue_struct *system_highpri_wq;
extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_dfl_wq;
extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 77ee0c657e2c..d8103b2270d9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -68,6 +68,7 @@ struct tc_action {
#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
#define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4))
+#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5))

/* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice.
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 9fb40a592020..66940d41d485 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -696,6 +696,7 @@ void bond_debug_register(struct bonding *bond);
void bond_debug_unregister(struct bonding *bond);
void bond_debug_reregister(struct bonding *bond);
const char *bond_mode_name(int mode);
+bool __bond_xdp_check(int mode, int xmit_policy);
bool bond_xdp_check(struct bonding *bond, int mode);
void bond_setup(struct net_device *bond_dev);
unsigned int bond_get_num_tx_queues(void);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 967e4dc555fa..339b92cd5cec 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -544,7 +544,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net,
siphash_aligned_key_t hash_key;
u32 mp_seed;

- mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
+ mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed);
fib_multipath_hash_construct_key(&hash_key, mp_seed);

return flow_hash_from_keys_seed(keys, &hash_key);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d440583aa4b2..79296ed87b9b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1851,6 +1851,11 @@ struct nft_trans_gc {
struct rcu_head rcu;
};

+static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
static inline void nft_ctx_update(struct nft_ctx *ctx,
const struct nft_trans *trans)
{
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1e002b1dea62..75a0d6095d2e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -758,13 +758,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
struct Qdisc *qdisc;
+ bool nolock;

for (; i < dev->num_tx_queues; i++) {
qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
if (qdisc) {
+ nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock) {
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+ spin_unlock_bh(&qdisc->seqlock);
+ }
}
}
}
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index c7f24a2da1ca..24d4d5a62b3c 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -13,15 +13,13 @@ struct tcf_ife_params {
u8 eth_src[ETH_ALEN];
u16 eth_type;
u16 flags;
-
+ struct list_head metalist;
struct rcu_head rcu;
};

struct tcf_ife_info {
struct tc_action common;
struct tcf_ife_params __rcu *params;
- /* list of metaids allowed */
- struct list_head metalist;
};
#define to_ife(a) ((struct tcf_ife_info *)a)

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 0a5dca2b2b3f..997e28dd3896 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -47,6 +47,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
}

+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+ return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool);
+}
+
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
@@ -126,8 +131,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
if (likely(!xdp_buff_has_frags(xdp)))
goto out;

- list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
- list_del(&pos->xskb_list_node);
+ list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
+ list_del_init(&pos->list_node);
xp_free(pos);
}

@@ -140,7 +145,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);

- list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
+ list_add_tail(&frag->list_node, &frag->pool->xskb_list);
}

static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
@@ -150,9 +155,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
struct xdp_buff_xsk *frag;

frag = list_first_entry_or_null(&xskb->pool->xskb_list,
- struct xdp_buff_xsk, xskb_list_node);
+ struct xdp_buff_xsk, list_node);
if (frag) {
- list_del(&frag->xskb_list_node);
+ list_del_init(&frag->list_node);
ret = &frag->xdp;
}

@@ -163,7 +168,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail)
{
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);

- list_del(&xskb->xskb_list_node);
+ list_del_init(&xskb->list_node);
}

static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
@@ -172,7 +177,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
struct xdp_buff_xsk *frag;

frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
- xskb_list_node);
+ list_node);
return &frag->xdp;
}

@@ -296,6 +301,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
return 0;
}

+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+ return 0;
+}
+
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 823fd5c7a3b1..e21062cf6229 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,8 +29,7 @@ struct xdp_buff_xsk {
dma_addr_t frame_dma;
struct xsk_buff_pool *pool;
u64 orig_addr;
- struct list_head free_list_node;
- struct list_head xskb_list_node;
+ struct list_head list_node;
};

#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f3c9de0a497c..bf6c143551ec 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -699,7 +699,7 @@
#define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */
#define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */
#define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x34 /* end of v2 EPs w/ link */
#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */
#define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */
#define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3aa002a47a96..39b7efa396b8 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -588,18 +588,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin
}

/* Get ifindex of each upper device. 'indexes' must be able to hold at
- * least MAX_NEST_DEV elements.
- * Returns the number of ifindexes added.
+ * least 'max' elements.
+ * Returns the number of ifindexes added, or -EOVERFLOW if there are too
+ * many upper devices.
*/
-static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max)
{
struct net_device *upper;
struct list_head *iter;
int n = 0;

netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ if (n >= max)
+ return -EOVERFLOW;
indexes[n++] = upper->ifindex;
}
+
return n;
}

@@ -615,7 +619,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
int err;

if (exclude_ingress) {
- num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+ num_excluded = get_upper_ifindexes(dev_rx, excluded_devices,
+ ARRAY_SIZE(excluded_devices) - 1);
+ if (num_excluded < 0)
+ return num_excluded;
+
excluded_devices[num_excluded++] = dev_rx->ifindex;
}

@@ -733,7 +741,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
int err;

if (exclude_ingress) {
- num_excluded = get_upper_ifindexes(dev, excluded_devices);
+ num_excluded = get_upper_ifindexes(dev, excluded_devices,
+ ARRAY_SIZE(excluded_devices) - 1);
+ if (num_excluded < 0)
+ return num_excluded;
+
excluded_devices[num_excluded++] = dev->ifindex;
}

diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index dbe7754b4f4e..894cd6f205f5 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -749,10 +749,8 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
mutex_lock(&tr->mutex);

shim_link = cgroup_shim_find(tr, bpf_func);
- if (shim_link) {
+ if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) {
/* Reusing existing shim attached by the other program. */
- bpf_link_inc(&shim_link->link.link);
-
mutex_unlock(&tr->mutex);
bpf_trampoline_put(tr); /* bpf_trampoline_get above */
return 0;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 1b93eb7b29c5..77b07548c302 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2126,7 +2126,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));

- cpuset_update_tasks_cpumask(cp, cp->effective_cpus);
+ cpuset_update_tasks_cpumask(cp, tmp->new_cpus);

/*
* On default hierarchy, inherit the CS_SCHED_LOAD_BALANCE
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 01a87cd9b5cc..814b6536b09d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10001,6 +10001,13 @@ int perf_event_overflow(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ /*
+ * Entry point from hardware PMI, interrupts should be disabled here.
+ * This serializes us against perf_event_remove_from_context() in
+ * things like perf_event_release_kernel().
+ */
+ lockdep_assert_irqs_disabled();
+
return __perf_event_overflow(event, 1, data, regs);
}

@@ -10077,6 +10084,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
struct hw_perf_event *hwc = &event->hw;

+ /*
+ * This is:
+ * - software preempt
+ * - tracepoint preempt
+ * - tp_target_task irq (ctx->lock)
+ * - uprobes preempt/irq
+ * - kprobes preempt/irq
+ * - hw_breakpoint irq
+ *
+ * Any of these are sufficient to hold off RCU and thus ensure @event
+ * exists.
+ */
+ lockdep_assert_preemption_disabled();
local64_add(nr, &event->count);

if (!regs)
@@ -10085,6 +10105,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;

+ /*
+ * Serialize against event_function_call() IPIs like normal overflow
+ * event handling. Specifically, must not allow
+ * perf_event_release_kernel() -> perf_remove_from_context() to make
+ * progress and 'release' the event from under us.
+ */
+ guard(irqsave)();
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return;
+
if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
data->period = nr;
return perf_swevent_overflow(event, 1, data, regs);
@@ -10584,6 +10614,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_sample_data data;
struct perf_event *event;

+ /*
+ * Per being a tracepoint, this runs with preemption disabled.
+ */
+ lockdep_assert_preemption_disabled();
+
struct perf_raw_record raw = {
.frag = {
.size = entry_size,
@@ -10906,6 +10941,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;

+ /*
+ * Exception context, will have interrupts disabled.
+ */
+ lockdep_assert_irqs_disabled();
+
perf_sample_data_init(&sample, bp->attr.bp_addr, 0);

if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -11358,7 +11398,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)

if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
- if (__perf_event_overflow(event, 1, &data, regs))
+ if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e30c4dd345f4..e3c8d9900ca7 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
#include <linux/khugepaged.h>
+#include <linux/rcupdate_trace.h>

#include <linux/uprobes.h>

@@ -42,8 +43,6 @@ static struct rb_root uprobes_tree = RB_ROOT;
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock);

-DEFINE_STATIC_SRCU(uprobes_srcu);
-
#define UPROBES_HASH_SZ 13
/* serialize uprobe->pending_list */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
@@ -667,7 +666,7 @@ static void put_uprobe(struct uprobe *uprobe)
delayed_uprobe_remove(uprobe, NULL);
mutex_unlock(&delayed_uprobe_lock);

- call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
+ call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu);
}

static __always_inline
@@ -722,7 +721,7 @@ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
struct rb_node *node;
unsigned int seq;

- lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
+ lockdep_assert(rcu_read_lock_trace_held());

do {
seq = read_seqcount_begin(&uprobes_seqcount);
@@ -950,8 +949,7 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
bool ret = false;

down_read(&uprobe->consumer_rwsem);
- list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
- srcu_read_lock_held(&uprobes_srcu)) {
+ list_for_each_entry(uc, &uprobe->consumers, cons_node) {
ret = consumer_filter(uc, mm);
if (ret)
break;
@@ -1172,7 +1170,7 @@ void uprobe_unregister_sync(void)
* unlucky enough caller can free consumer's memory and cause
* handler_chain() or handle_uretprobe_chain() to do an use-after-free.
*/
- synchronize_srcu(&uprobes_srcu);
+ synchronize_rcu_tasks_trace();
}
EXPORT_SYMBOL_GPL(uprobe_unregister_sync);

@@ -1256,19 +1254,18 @@ EXPORT_SYMBOL_GPL(uprobe_register);
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
{
struct uprobe_consumer *con;
- int ret = -ENOENT, srcu_idx;
+ int ret = -ENOENT;

down_write(&uprobe->register_rwsem);

- srcu_idx = srcu_read_lock(&uprobes_srcu);
- list_for_each_entry_srcu(con, &uprobe->consumers, cons_node,
- srcu_read_lock_held(&uprobes_srcu)) {
+ rcu_read_lock_trace();
+ list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
if (con == uc) {
ret = register_for_each_vma(uprobe, add ? uc : NULL);
break;
}
}
- srcu_read_unlock(&uprobes_srcu, srcu_idx);
+ rcu_read_unlock_trace();

up_write(&uprobe->register_rwsem);

@@ -2150,8 +2147,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)

current->utask->auprobe = &uprobe->arch;

- list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
- srcu_read_lock_held(&uprobes_srcu)) {
+ list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
int rc = 0;

if (uc->handler) {
@@ -2189,15 +2185,13 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
{
struct uprobe *uprobe = ri->uprobe;
struct uprobe_consumer *uc;
- int srcu_idx;

- srcu_idx = srcu_read_lock(&uprobes_srcu);
- list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
- srcu_read_lock_held(&uprobes_srcu)) {
+ rcu_read_lock_trace();
+ list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
if (uc->ret_handler)
uc->ret_handler(uc, ri->func, regs);
}
- srcu_read_unlock(&uprobes_srcu, srcu_idx);
+ rcu_read_unlock_trace();
}

static struct return_instance *find_next_ret_chain(struct return_instance *ri)
@@ -2282,13 +2276,13 @@ static void handle_swbp(struct pt_regs *regs)
{
struct uprobe *uprobe;
unsigned long bp_vaddr;
- int is_swbp, srcu_idx;
+ int is_swbp;

bp_vaddr = uprobe_get_swbp_addr(regs);
if (bp_vaddr == uprobe_get_trampoline_vaddr())
return uprobe_handle_trampoline(regs);

- srcu_idx = srcu_read_lock(&uprobes_srcu);
+ rcu_read_lock_trace();

uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
if (!uprobe) {
@@ -2353,7 +2347,7 @@ static void handle_swbp(struct pt_regs *regs)

out:
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
- srcu_read_unlock(&uprobes_srcu, srcu_idx);
+ rcu_read_unlock_trace();
}

/*
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index c0caa14880c3..6c15cd5b9cae 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -867,6 +867,60 @@ int kimage_load_segment(struct kimage *image,
return result;
}

+void *kimage_map_segment(struct kimage *image,
+ unsigned long addr, unsigned long size)
+{
+ unsigned long src_page_addr, dest_page_addr = 0;
+ unsigned long eaddr = addr + size;
+ kimage_entry_t *ptr, entry;
+ struct page **src_pages;
+ unsigned int npages;
+ void *vaddr = NULL;
+ int i;
+
+ /*
+ * Collect the source pages and map them in a contiguous VA range.
+ */
+ npages = PFN_UP(eaddr) - PFN_DOWN(addr);
+ src_pages = kmalloc_array(npages, sizeof(*src_pages), GFP_KERNEL);
+ if (!src_pages) {
+ pr_err("Could not allocate ima pages array.\n");
+ return NULL;
+ }
+
+ i = 0;
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_DESTINATION) {
+ dest_page_addr = entry & PAGE_MASK;
+ } else if (entry & IND_SOURCE) {
+ if (dest_page_addr >= addr && dest_page_addr < eaddr) {
+ src_page_addr = entry & PAGE_MASK;
+ src_pages[i++] =
+ virt_to_page(__va(src_page_addr));
+ if (i == npages)
+ break;
+ dest_page_addr += PAGE_SIZE;
+ }
+ }
+ }
+
+ /* Sanity check. */
+ WARN_ON(i < npages);
+
+ vaddr = vmap(src_pages, npages, VM_MAP, PAGE_KERNEL);
+ kfree(src_pages);
+
+ if (!vaddr)
+ pr_err("Could not map ima buffer.\n");
+
+ return vaddr;
+}
+
+void kimage_unmap_segment(void *segment_buffer)
+{
+ vunmap(segment_buffer);
+}
+
struct kexec_load_limit {
/* Mutex protects the limit count. */
struct mutex mutex;
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 810005f927d7..e6ee81dd1e45 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -432,8 +432,9 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
* auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq
* size, the required alignment is the original struct rseq alignment.
*
- * In order to be valid, rseq_len is either the original rseq size, or
- * large enough to contain all supported fields, as communicated to
+ * The rseq_len is required to be greater or equal to the original rseq
+ * size. In order to be valid, rseq_len is either the original rseq size,
+ * or large enough to contain all supported fields, as communicated to
* user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE.
*/
if (rseq_len < ORIG_RSEQ_SIZE ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9e72543e0099..6958ae79464d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -7154,6 +7154,27 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
return err;
}

+/*
+ * This is called when a VMA is duplicated (e.g., on fork()) to increment
+ * the user_mapped counter without remapping pages.
+ */
+void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (WARN_ON(!cpumask_test_cpu(cpu, buffer->cpumask)))
+ return;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ guard(mutex)(&cpu_buffer->mapping_lock);
+
+ if (cpu_buffer->user_mapped)
+ __rb_inc_dec_mapped(cpu_buffer, true);
+ else
+ WARN(1, "Unexpected buffer stat, it should be mapped");
+}
+
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fb76a7262abf..a543bb9d86b8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8274,6 +8274,18 @@ static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
static inline void put_snapshot_map(struct trace_array *tr) { }
#endif

+/*
+ * This is called when a VMA is duplicated (e.g., on fork()) to increment
+ * the user_mapped counter without remapping pages.
+ */
+static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
+{
+ struct ftrace_buffer_info *info = vma->vm_file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
+}
+
static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
{
struct ftrace_buffer_info *info = vma->vm_file->private_data;
@@ -8293,6 +8305,7 @@ static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long a
}

static const struct vm_operations_struct tracing_buffers_vmops = {
+ .open = tracing_buffers_mmap_open,
.close = tracing_buffers_mmap_close,
.may_split = tracing_buffers_may_split,
};
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index d5dbda9b0e4b..1e4e699c2547 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -19,6 +19,9 @@ static DEFINE_MUTEX(trigger_cmd_mutex);

void trigger_data_free(struct event_trigger_data *data)
{
+ if (!data)
+ return;
+
if (data->cmd_ops->set_filter)
data->cmd_ops->set_filter(NULL, data, NULL);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9f7f7244bdc8..3840d7ce9cda 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -508,12 +508,16 @@ static struct kthread_worker *pwq_release_worker __ro_after_init;

struct workqueue_struct *system_wq __ro_after_init;
EXPORT_SYMBOL(system_wq);
+struct workqueue_struct *system_percpu_wq __ro_after_init;
+EXPORT_SYMBOL(system_percpu_wq);
struct workqueue_struct *system_highpri_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_highpri_wq);
struct workqueue_struct *system_long_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_long_wq);
struct workqueue_struct *system_unbound_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_unbound_wq);
+struct workqueue_struct *system_dfl_wq __ro_after_init;
+EXPORT_SYMBOL_GPL(system_dfl_wq);
struct workqueue_struct *system_freezable_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_freezable_wq);
struct workqueue_struct *system_power_efficient_wq __ro_after_init;
@@ -7848,10 +7852,11 @@ void __init workqueue_init_early(void)
}

system_wq = alloc_workqueue("events", 0, 0);
+ system_percpu_wq = alloc_workqueue("events", 0, 0);
system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
system_long_wq = alloc_workqueue("events_long", 0, 0);
- system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
- WQ_MAX_ACTIVE);
+ system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE);
+ system_dfl_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",
WQ_FREEZABLE, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient",
@@ -7862,8 +7867,8 @@ void __init workqueue_init_early(void)
system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0);
system_bh_highpri_wq = alloc_workqueue("events_bh_highpri",
WQ_BH | WQ_HIGHPRI, 0);
- BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
- !system_unbound_wq || !system_freezable_wq ||
+ BUG_ON(!system_wq || !system_percpu_wq|| !system_highpri_wq || !system_long_wq ||
+ !system_unbound_wq || !system_freezable_wq || !system_dfl_wq ||
!system_power_efficient_wq ||
!system_freezable_power_efficient_wq ||
!system_bh_wq || !system_bh_highpri_wq);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 42e8047c6510..4a8ca2d7ff59 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1260,24 +1260,28 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry)
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
struct net_device *dev = (struct net_device *)vcc->proto_data;

- vcc->pop = vpriv->old_pop;
- if (vpriv->xoff)
- netif_wake_queue(dev);
- kfree(vpriv);
- vcc->user_back = NULL;
- vcc->push = entry->old_push;
- vcc_release_async(vcc, -EPIPE);
+ if (vpriv) {
+ vcc->pop = vpriv->old_pop;
+ if (vpriv->xoff)
+ netif_wake_queue(dev);
+ kfree(vpriv);
+ vcc->user_back = NULL;
+ vcc->push = entry->old_push;
+ vcc_release_async(vcc, -EPIPE);
+ }
entry->vcc = NULL;
}
if (entry->recv_vcc) {
struct atm_vcc *vcc = entry->recv_vcc;
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);

- kfree(vpriv);
- vcc->user_back = NULL;
+ if (vpriv) {
+ kfree(vpriv);
+ vcc->user_back = NULL;

- entry->recv_vcc->push = entry->old_recv_push;
- vcc_release_async(entry->recv_vcc, -EPIPE);
+ entry->recv_vcc->push = entry->old_recv_push;
+ vcc_release_async(entry->recv_vcc, -EPIPE);
+ }
entry->recv_vcc = NULL;
}
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 26b79feb385d..3768cc9c8ecb 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -72,7 +72,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
- } else if (IS_ENABLED(CONFIG_IPV6) &&
+ } else if (ipv6_mod_enabled() &&
skb->protocol == htons(ETH_P_IPV6) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 44459c9d2ce7..e22088b07e70 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -165,7 +165,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
(skb->protocol == htons(ETH_P_ARP) ||
skb->protocol == htons(ETH_P_RARP))) {
br_do_proxy_suppress_arp(skb, br, vid, p);
- } else if (IS_ENABLED(CONFIG_IPV6) &&
+ } else if (ipv6_mod_enabled() &&
skb->protocol == htons(ETH_P_IPV6) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/can/bcm.c b/net/can/bcm.c
index e33ff2a5b20c..152cc29e87d7 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1170,6 +1170,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;

+ spin_lock_init(&op->bcm_tx_lock);
op->can_id = msg_head->can_id;
op->nframes = msg_head->nframes;
op->cfsiz = CFSIZ(msg_head->flags);
diff --git a/net/core/filter.c b/net/core/filter.c
index 182a7388e84f..1f96c3aa01ca 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4154,12 +4154,14 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
struct xdp_rxq_info *rxq = xdp->rxq;
- unsigned int tailroom;
+ int tailroom;

if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
return -EOPNOTSUPP;

- tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+ tailroom = rxq->frag_size - skb_frag_size(frag) -
+ skb_frag_off(frag) % rxq->frag_size;
+ WARN_ON_ONCE(tailroom < 0);
if (unlikely(offset > tailroom))
return -EINVAL;

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a79b2a52ce01..8d411cce0aed 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -481,7 +481,8 @@ static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
proc_fib_multipath_hash_rand_seed),
};

- WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.user_seed, new.user_seed);
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed, new.mp_seed);
}

static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write,
@@ -495,7 +496,7 @@ static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write
int ret;

mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
- user_seed = mphs->user_seed;
+ user_seed = READ_ONCE(mphs->user_seed);

tmp = *table;
tmp.data = &user_seed;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aeac45af3a22..7b9279d4c363 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1064,7 +1064,8 @@ static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
*/
if (netif_is_l3_slave(dev) &&
!rt6_need_strict(&res->f6i->fib6_dst.addr))
- dev = l3mdev_master_dev_rcu(dev);
+ dev = l3mdev_master_dev_rcu(dev) ? :
+ dev_net(dev)->loopback_dev;
else if (!netif_is_l3_master(dev))
dev = dev_net(dev)->loopback_dev;
/* last case is netif_is_l3_master(dev) is true in which
@@ -3576,7 +3577,6 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
- int addr_type;
int err;

fib6_nh->fib_nh_family = AF_INET6;
@@ -3618,11 +3618,10 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,

fib6_nh->fib_nh_weight = 1;

- /* We cannot add true routes via loopback here,
- * they would result in kernel looping; promote them to reject routes
+ /* Reset the nexthop device to the loopback device in case of reject
+ * routes.
*/
- addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
+ if (cfg->fc_flags & RTF_REJECT) {
/* hold loopback dev/idev if we haven't done so. */
if (dev != net->loopback_dev) {
if (dev) {
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 37e11320553e..00bdf36e333e 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1631,6 +1631,9 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
if (!mesh_matches_local(sdata, elems))
goto free;

+ if (!elems->mesh_chansw_params_ie)
+ goto free;
+
ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl;
if (!--ifmsh->chsw_ttl)
fwd_csa = false;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e0766a817f4a..61e793592638 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -6256,6 +6256,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
control = le16_to_cpu(prof->control);
link_id = control & IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID;

+ if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS)
+ continue;
+
removed_links |= BIT(link_id);

/* the MAC address should not be included, but handle it */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8dccd3598166..c1b9b00907bb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -10151,11 +10151,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
schedule_work(&trans_gc_work);
}

-static int nft_trans_gc_space(struct nft_trans_gc *trans)
-{
- return NFT_TRANS_GC_BATCHCOUNT - trans->count;
-}
-
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 642152e9c322..ab5045bf3e59 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1667,11 +1667,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
}

/**
- * pipapo_gc() - Drop expired entries from set, destroy start and end elements
+ * pipapo_gc_scan() - Drop expired entries from set and link them to gc list
* @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
@@ -1684,6 +1684,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
if (!gc)
return;

+ list_add(&gc->list, &priv->gc_head);
+
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const struct nft_pipapo_field *f;
@@ -1711,9 +1713,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
- if (!gc)
- return;
+ if (!nft_trans_gc_space(gc)) {
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
+
+ list_add(&gc->list, &priv->gc_head);
+ }

nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
@@ -1727,10 +1733,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
}
}

- gc = nft_trans_gc_catchall_sync(gc);
+ priv->last_gc = jiffies;
+}
+
+/**
+ * pipapo_gc_queue() - Free expired elements
+ * @set: nftables API set representation
+ */
+static void pipapo_gc_queue(struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_trans_gc *gc, *next;
+
+ /* always do a catchall cycle: */
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (gc) {
+ gc = nft_trans_gc_catchall_sync(gc);
+ if (gc)
+ nft_trans_gc_queue_sync_done(gc);
+ }
+
+ /* always purge queued gc elements. */
+ list_for_each_entry_safe(gc, next, &priv->gc_head, list) {
+ list_del(&gc->list);
nft_trans_gc_queue_sync_done(gc);
- priv->last_gc = jiffies;
}
}

@@ -1784,6 +1810,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
*
* We also need to create a new working copy for subsequent insertions and
* deletions.
+ *
+ * After the live copy has been replaced by the clone, we can safely queue
+ * expired elements that have been collected by pipapo_gc_scan() for
+ * memory reclaim.
*/
static void nft_pipapo_commit(struct nft_set *set)
{
@@ -1794,7 +1824,7 @@ static void nft_pipapo_commit(struct nft_set *set)
return;

if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ pipapo_gc_scan(set, priv->clone);

old = rcu_replace_pointer(priv->match, priv->clone,
nft_pipapo_transaction_mutex_held(set));
@@ -1802,6 +1832,8 @@ static void nft_pipapo_commit(struct nft_set *set)

if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
+
+ pipapo_gc_queue(set);
}

static void nft_pipapo_abort(const struct nft_set *set)
@@ -2259,6 +2291,7 @@ static int nft_pipapo_init(const struct nft_set *set,
f->mt = NULL;
}

+ INIT_LIST_HEAD(&priv->gc_head);
rcu_assign_pointer(priv->match, m);

return 0;
@@ -2308,6 +2341,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;

+ WARN_ON_ONCE(!list_empty(&priv->gc_head));
+
m = rcu_dereference_protected(priv->match, true);

if (priv->clone) {
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 4a2ff85ce1c4..49000f5510b2 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -156,12 +156,14 @@ struct nft_pipapo_match {
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
* @last_gc: Timestamp of last garbage collection run, jiffies
+ * @gc_head: list of nft_trans_gc to queue up for mem reclaim
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
unsigned long last_gc;
+ struct list_head gc_head;
};

struct nft_pipapo_elem;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 1bdaf680b488..18ff1c23769a 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1024,18 +1024,23 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
struct nci_conn_info *conn_info;

conn_info = ndev->rf_conn_info;
- if (!conn_info)
+ if (!conn_info) {
+ kfree_skb(skb);
return -EPROTO;
+ }

pr_debug("target_idx %d, len %d\n", target->idx, skb->len);

if (!ndev->target_active_prot) {
pr_err("unable to exchange data, no active target\n");
+ kfree_skb(skb);
return -EINVAL;
}

- if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags))
+ if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) {
+ kfree_skb(skb);
return -EBUSY;
+ }

/* store cb and context to be used on receiving data */
conn_info->data_exchange_cb = cb;
@@ -1471,10 +1476,20 @@ static bool nci_valid_size(struct sk_buff *skb)
unsigned int hdr_size = NCI_CTRL_HDR_SIZE;

if (skb->len < hdr_size ||
- !nci_plen(skb->data) ||
skb->len < hdr_size + nci_plen(skb->data)) {
return false;
}
+
+ if (!nci_plen(skb->data)) {
+ /* Allow zero length in proprietary notifications (0x20 - 0x3F). */
+ if (nci_opcode_oid(nci_opcode(skb->data)) >= 0x20 &&
+ nci_mt(skb->data) == NCI_MT_NTF_PKT)
+ return true;
+
+ /* Disallow zero length otherwise. */
+ return false;
+ }
+
return true;
}

diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 3d36ea5701f0..7a3fb2a397a1 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -33,7 +33,8 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
if (!conn_info) {
kfree_skb(skb);
- goto exit;
+ clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
+ return;
}

cb = conn_info->data_exchange_cb;
@@ -45,6 +46,12 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
del_timer_sync(&ndev->data_timer);
clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);

+ /* Mark the exchange as done before calling the callback.
+ * The callback (e.g. rawsock_data_exchange_complete) may
+ * want to immediately queue another data exchange.
+ */
+ clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
+
if (cb) {
/* forward skb to nfc core */
cb(cb_context, skb, err);
@@ -54,9 +61,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
/* no waiting callback, free skb */
kfree_skb(skb);
}
-
-exit:
- clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
}

/* ----------------- NCI TX Data ----------------- */
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 5125392bb68e..028b4daafaf8 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -67,6 +67,17 @@ static int rawsock_release(struct socket *sock)
if (sock->type == SOCK_RAW)
nfc_sock_unlink(&raw_sk_list, sk);

+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* Prevent rawsock_tx_work from starting new transmits and
+ * wait for any in-progress work to finish. This must happen
+ * before the socket is orphaned to avoid a race where
+ * rawsock_tx_work runs after the NCI device has been freed.
+ */
+ sk->sk_shutdown |= SEND_SHUTDOWN;
+ cancel_work_sync(&nfc_rawsock(sk)->tx_work);
+ rawsock_write_queue_purge(sk);
+ }
+
sock_orphan(sk);
sock_put(sk);

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 3cc2f303bf78..b66dfcc3efaa 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -495,18 +495,24 @@ bool rds_tcp_tune(struct socket *sock)
struct rds_tcp_net *rtn;

tcp_sock_set_nodelay(sock->sk);
- lock_sock(sk);
/* TCP timer functions might access net namespace even after
* a process which created this net namespace terminated.
*/
if (!sk->sk_net_refcnt) {
- if (!maybe_get_net(net)) {
- release_sock(sk);
+ if (!maybe_get_net(net))
return false;
- }
+ /*
+ * sk_net_refcnt_upgrade() must be called before lock_sock()
+ * because it does a GFP_KERNEL allocation, which can trigger
+ * fs_reclaim and create a circular lock dependency with the
+ * socket lock. The fields it modifies (sk_net_refcnt,
+ * ns_tracker) are not accessed by any concurrent code path
+ * at this point.
+ */
sk_net_refcnt_upgrade(sk);
put_net(net);
}
+ lock_sock(sk);
rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 2197eb625658..945b64be4c1f 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1358,6 +1358,12 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}

+ if (bind && !(flags & TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Attaching ct to a non ingress/clsact qdisc is unsupported");
+ return -EOPNOTSUPP;
+ }
+
err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack);
if (err < 0)
return err;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 8e8f6af731d5..4ad01d4e820d 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -293,8 +293,8 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held)
/* called when adding new meta information
*/
static int __add_metainfo(const struct tcf_meta_ops *ops,
- struct tcf_ife_info *ife, u32 metaid, void *metaval,
- int len, bool atomic, bool exists)
+ struct tcf_ife_params *p, u32 metaid, void *metaval,
+ int len, bool atomic)
{
struct tcf_meta_info *mi = NULL;
int ret = 0;
@@ -313,45 +313,40 @@ static int __add_metainfo(const struct tcf_meta_ops *ops,
}
}

- if (exists)
- spin_lock_bh(&ife->tcf_lock);
- list_add_tail(&mi->metalist, &ife->metalist);
- if (exists)
- spin_unlock_bh(&ife->tcf_lock);
+ list_add_tail(&mi->metalist, &p->metalist);

return ret;
}

static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops,
- struct tcf_ife_info *ife, u32 metaid,
- bool exists)
+ struct tcf_ife_params *p, u32 metaid)
{
int ret;

if (!try_module_get(ops->owner))
return -ENOENT;
- ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists);
+ ret = __add_metainfo(ops, p, metaid, NULL, 0, true);
if (ret)
module_put(ops->owner);
return ret;
}

-static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
- int len, bool exists)
+static int add_metainfo(struct tcf_ife_params *p, u32 metaid, void *metaval,
+ int len)
{
const struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret;

if (!ops)
return -ENOENT;
- ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists);
+ ret = __add_metainfo(ops, p, metaid, metaval, len, false);
if (ret)
/*put back what find_ife_oplist took */
module_put(ops->owner);
return ret;
}

-static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
+static int use_all_metadata(struct tcf_ife_params *p)
{
struct tcf_meta_ops *o;
int rc = 0;
@@ -359,7 +354,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)

read_lock(&ife_mod_lock);
list_for_each_entry(o, &ifeoplist, list) {
- rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists);
+ rc = add_metainfo_and_get_ops(o, p, o->metaid);
if (rc == 0)
installed += 1;
}
@@ -371,7 +366,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
return -EINVAL;
}

-static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
+static int dump_metalist(struct sk_buff *skb, struct tcf_ife_params *p)
{
struct tcf_meta_info *e;
struct nlattr *nest;
@@ -379,14 +374,14 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
int total_encoded = 0;

/*can only happen on decode */
- if (list_empty(&ife->metalist))
+ if (list_empty(&p->metalist))
return 0;

nest = nla_nest_start_noflag(skb, TCA_IFE_METALST);
if (!nest)
goto out_nlmsg_trim;

- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry(e, &p->metalist, metalist) {
if (!e->ops->get(skb, e))
total_encoded += 1;
}
@@ -403,13 +398,11 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
return -1;
}

-/* under ife->tcf_lock */
-static void _tcf_ife_cleanup(struct tc_action *a)
+static void __tcf_ife_cleanup(struct tcf_ife_params *p)
{
- struct tcf_ife_info *ife = to_ife(a);
struct tcf_meta_info *e, *n;

- list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
+ list_for_each_entry_safe(e, n, &p->metalist, metalist) {
list_del(&e->metalist);
if (e->metaval) {
if (e->ops->release)
@@ -422,18 +415,23 @@ static void _tcf_ife_cleanup(struct tc_action *a)
}
}

+static void tcf_ife_cleanup_params(struct rcu_head *head)
+{
+ struct tcf_ife_params *p = container_of(head, struct tcf_ife_params,
+ rcu);
+
+ __tcf_ife_cleanup(p);
+ kfree(p);
+}
+
static void tcf_ife_cleanup(struct tc_action *a)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p;

- spin_lock_bh(&ife->tcf_lock);
- _tcf_ife_cleanup(a);
- spin_unlock_bh(&ife->tcf_lock);
-
p = rcu_dereference_protected(ife->params, 1);
if (p)
- kfree_rcu(p, rcu);
+ call_rcu(&p->rcu, tcf_ife_cleanup_params);
}

static int load_metalist(struct nlattr **tb, bool rtnl_held)
@@ -455,8 +453,7 @@ static int load_metalist(struct nlattr **tb, bool rtnl_held)
return 0;
}

-static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
- bool exists, bool rtnl_held)
+static int populate_metalist(struct tcf_ife_params *p, struct nlattr **tb)
{
int len = 0;
int rc = 0;
@@ -468,7 +465,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
val = nla_data(tb[i]);
len = nla_len(tb[i]);

- rc = add_metainfo(ife, i, val, len, exists);
+ rc = add_metainfo(p, i, val, len);
if (rc)
return rc;
}
@@ -523,6 +520,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
+ INIT_LIST_HEAD(&p->metalist);

if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
@@ -567,8 +565,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}

ife = to_ife(*a);
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&ife->metalist);

err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
@@ -600,8 +596,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}

if (tb[TCA_IFE_METALST]) {
- err = populate_metalist(ife, tb2, exists,
- !(flags & TCA_ACT_FLAGS_NO_RTNL));
+ err = populate_metalist(p, tb2);
if (err)
goto metadata_parse_err;
} else {
@@ -610,7 +605,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
* as we can. You better have at least one else we are
* going to bail out
*/
- err = use_all_metadata(ife, exists);
+ err = use_all_metadata(p);
if (err)
goto metadata_parse_err;
}
@@ -626,13 +621,14 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (p)
- kfree_rcu(p, rcu);
+ call_rcu(&p->rcu, tcf_ife_cleanup_params);

return ret;
metadata_parse_err:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
+ __tcf_ife_cleanup(p);
kfree(p);
tcf_idr_release(*a, bind);
return err;
@@ -679,7 +675,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
goto nla_put_failure;

- if (dump_metalist(skb, ife)) {
+ if (dump_metalist(skb, p)) {
/*ignore failure to dump metalist */
pr_info("Failed to dump metalist\n");
}
@@ -693,13 +689,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
return -1;
}

-static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
+static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_params *p,
u16 metaid, u16 mlen, void *mdata)
{
struct tcf_meta_info *e;

/* XXX: use hash to speed up */
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (metaid == e->metaid) {
if (e->ops) {
/* We check for decode presence already */
@@ -716,10 +712,13 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
+ struct tcf_ife_params *p;
u8 *ifehdr_end;
u8 *tlv_data;
u16 metalen;

+ p = rcu_dereference_bh(ife->params);
+
bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);

@@ -745,7 +744,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
return TC_ACT_SHOT;
}

- if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
+ if (find_decode_metaid(skb, p, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
* but dont have an ops for it
*/
@@ -769,12 +768,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
/*XXX: check if we can do this at install time instead of current
* send data path
**/
-static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
+static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_params *p)
{
- struct tcf_meta_info *e, *n;
+ struct tcf_meta_info *e;
int tot_run_sz = 0, run_sz = 0;

- list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (e->ops->check_presence) {
run_sz = e->ops->check_presence(skb, e);
tot_run_sz += run_sz;
@@ -795,7 +794,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA
where ORIGDATA = original ethernet header ...
*/
- u16 metalen = ife_get_sz(skb, ife);
+ u16 metalen = ife_get_sz(skb, p);
int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
unsigned int skboff = 0;
int new_len = skb->len + hdrm;
@@ -833,25 +832,21 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
if (!ife_meta)
goto drop;

- spin_lock(&ife->tcf_lock);
-
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
* ops->presence_check...
*/
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (e->ops->encode) {
err = e->ops->encode(skb, (void *)(ife_meta + skboff),
e);
}
if (err < 0) {
/* too corrupt to keep around if overwritten */
- spin_unlock(&ife->tcf_lock);
goto drop;
}
skboff += err;
}
- spin_unlock(&ife->tcf_lock);
oethh = (struct ethhdr *)skb->data;

if (!is_zero_ether_addr(p->eth_src))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a3bab5e27e71..d301d0ea2d31 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2222,6 +2222,11 @@ static bool is_qdisc_ingress(__u32 classid)
return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS));
}

+static bool is_ingress_or_clsact(struct tcf_block *block, struct Qdisc *q)
+{
+ return tcf_block_shared(block) || (q && !!(q->flags & TCQ_F_INGRESS));
+}
+
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -2415,6 +2420,8 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
flags |= TCA_ACT_FLAGS_NO_RTNL;
if (is_qdisc_ingress(parent))
flags |= TCA_ACT_FLAGS_AT_INGRESS;
+ if (is_ingress_or_clsact(block, q))
+ flags |= TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT;
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
flags, extack);
if (err == 0) {
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 306e046276d4..a4b07b661b77 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -115,12 +115,12 @@ static void ets_offload_change(struct Qdisc *sch)
struct ets_sched *q = qdisc_priv(sch);
struct tc_ets_qopt_offload qopt;
unsigned int w_psum_prev = 0;
- unsigned int q_psum = 0;
- unsigned int q_sum = 0;
unsigned int quantum;
unsigned int w_psum;
unsigned int weight;
unsigned int i;
+ u64 q_psum = 0;
+ u64 q_sum = 0;

if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
@@ -138,8 +138,12 @@ static void ets_offload_change(struct Qdisc *sch)

for (i = 0; i < q->nbands; i++) {
quantum = q->classes[i].quantum;
- q_psum += quantum;
- w_psum = quantum ? q_psum * 100 / q_sum : 0;
+ if (quantum) {
+ q_psum += quantum;
+ w_psum = div64_u64(q_psum * 100, q_sum);
+ } else {
+ w_psum = 0;
+ }
weight = w_psum - w_psum_prev;
w_psum_prev = w_psum;

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 1af9768cd8ff..682daf79af37 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -815,6 +815,7 @@ static void fq_reset(struct Qdisc *sch)
for (idx = 0; idx < FQ_BANDS; idx++) {
q->band_flows[idx].new_flows.first = NULL;
q->band_flows[idx].old_flows.first = NULL;
+ q->band_pkt_count[idx] = 0;
}
q->delayed = RB_ROOT;
q->flows = 0;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 73c051f129d3..46e70f2e39cc 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1174,6 +1174,7 @@ void wiphy_unregister(struct wiphy *wiphy)
/* this has nothing to do now but make sure it's gone */
cancel_work_sync(&rdev->wiphy_work);

+ cancel_work_sync(&rdev->rfkill_block);
cancel_work_sync(&rdev->conn_work);
flush_work(&rdev->event_work);
cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index b7e3e46ec16d..e58c81cd79ee 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -239,14 +239,14 @@ int ieee80211_radiotap_iterator_next(
default:
if (!iterator->current_namespace ||
iterator->_arg_index >= iterator->current_namespace->n_bits) {
- if (iterator->current_namespace == &radiotap_ns)
- return -ENOENT;
align = 0;
} else {
align = iterator->current_namespace->align_size[iterator->_arg_index].align;
size = iterator->current_namespace->align_size[iterator->_arg_index].size;
}
if (!align) {
+ if (iterator->current_namespace == &radiotap_ns)
+ return -ENOENT;
/* skip all subsequent data */
iterator->_arg = iterator->_next_ns_data;
/* give up on this namespace */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f031b07baa57..ed1aeaded9be 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -159,26 +159,32 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
struct xdp_buff_xsk *pos, *tmp;
struct list_head *xskb_list;
u32 contd = 0;
+ u32 num_desc;
int err;

- if (frags)
- contd = XDP_PKT_CONTD;
+ if (likely(!frags)) {
+ err = __xsk_rcv_zc(xs, xskb, len, contd);
+ if (err)
+ goto err;
+ return 0;
+ }

- err = __xsk_rcv_zc(xs, xskb, len, contd);
- if (err)
+ contd = XDP_PKT_CONTD;
+ num_desc = xdp_get_shared_info_from_buff(xdp)->nr_frags + 1;
+ if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+ xs->rx_queue_full++;
+ err = -ENOBUFS;
goto err;
- if (likely(!frags))
- return 0;
+ }

+ __xsk_rcv_zc(xs, xskb, len, contd);
xskb_list = &xskb->pool->xskb_list;
- list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
- err = __xsk_rcv_zc(xs, pos, len, contd);
- if (err)
- goto err;
- list_del(&pos->xskb_list_node);
+ __xsk_rcv_zc(xs, pos, len, contd);
+ list_del_init(&pos->list_node);
}

return 0;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b69dbd8615fc..9db08365fcb0 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -102,8 +102,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb = &pool->heads[i];
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
- INIT_LIST_HEAD(&xskb->free_list_node);
- INIT_LIST_HEAD(&xskb->xskb_list_node);
+ INIT_LIST_HEAD(&xskb->list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
@@ -550,8 +549,8 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
} else {
pool->free_list_cnt--;
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
- free_list_node);
- list_del_init(&xskb->free_list_node);
+ list_node);
+ list_del_init(&xskb->list_node);
}

xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
@@ -617,8 +616,8 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3

i = nb_entries;
while (i--) {
- xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
- list_del_init(&xskb->free_list_node);
+ xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node);
+ list_del_init(&xskb->list_node);

*xdp = &xskb->xdp;
xdp++;
@@ -688,11 +687,11 @@ EXPORT_SYMBOL(xp_can_alloc);

void xp_free(struct xdp_buff_xsk *xskb)
{
- if (!list_empty(&xskb->free_list_node))
+ if (!list_empty(&xskb->list_node))
return;

xskb->pool->free_list_cnt++;
- list_add(&xskb->free_list_node, &xskb->pool->free_list);
+ list_add(&xskb->list_node, &xskb->pool->free_list);
}
EXPORT_SYMBOL(xp_free);

diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
index 824da0e9738a..7b38fca9f242 100644
--- a/rust/kernel/kunit.rs
+++ b/rust/kernel/kunit.rs
@@ -13,6 +13,10 @@
/// Public but hidden since it should only be used from KUnit generated code.
#[doc(hidden)]
pub fn err(args: fmt::Arguments<'_>) {
+ // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning.
+ #[cfg(not(CONFIG_PRINTK))]
+ let _ = args;
+
// SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we
// are passing.
#[cfg(CONFIG_PRINTK)]
@@ -29,6 +33,10 @@ pub fn err(args: fmt::Arguments<'_>) {
/// Public but hidden since it should only be used from KUnit generated code.
#[doc(hidden)]
pub fn info(args: fmt::Arguments<'_>) {
+ // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning.
+ #[cfg(not(CONFIG_PRINTK))]
+ let _ = args;
+
// SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we
// are passing.
#[cfg(CONFIG_PRINTK)]
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 584b40718ecb..c12406e62e58 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -32,6 +32,7 @@
#include "include/crypto.h"
#include "include/ipc.h"
#include "include/label.h"
+#include "include/lib.h"
#include "include/policy.h"
#include "include/policy_ns.h"
#include "include/resource.h"
@@ -62,6 +63,7 @@
* securityfs and apparmorfs filesystems.
*/

+#define IREF_POISON 101

/*
* support fns
@@ -79,7 +81,7 @@ static void rawdata_f_data_free(struct rawdata_f_data *private)
if (!private)
return;

- aa_put_loaddata(private->loaddata);
+ aa_put_i_loaddata(private->loaddata);
kvfree(private);
}

@@ -153,6 +155,71 @@ static int aafs_show_path(struct seq_file *seq, struct dentry *dentry)
return 0;
}

+static struct aa_ns *get_ns_common_ref(struct aa_common_ref *ref)
+{
+ if (ref) {
+ struct aa_label *reflabel = container_of(ref, struct aa_label,
+ count);
+ return aa_get_ns(labels_ns(reflabel));
+ }
+
+ return NULL;
+}
+
+static struct aa_proxy *get_proxy_common_ref(struct aa_common_ref *ref)
+{
+ if (ref)
+ return aa_get_proxy(container_of(ref, struct aa_proxy, count));
+
+ return NULL;
+}
+
+static struct aa_loaddata *get_loaddata_common_ref(struct aa_common_ref *ref)
+{
+ if (ref)
+ return aa_get_i_loaddata(container_of(ref, struct aa_loaddata,
+ count));
+ return NULL;
+}
+
+static void aa_put_common_ref(struct aa_common_ref *ref)
+{
+ if (!ref)
+ return;
+
+ switch (ref->reftype) {
+ case REF_RAWDATA:
+ aa_put_i_loaddata(container_of(ref, struct aa_loaddata,
+ count));
+ break;
+ case REF_PROXY:
+ aa_put_proxy(container_of(ref, struct aa_proxy,
+ count));
+ break;
+ case REF_NS:
+ /* ns count is held on its unconfined label */
+ aa_put_ns(labels_ns(container_of(ref, struct aa_label, count)));
+ break;
+ default:
+ AA_BUG(true, "unknown refcount type");
+ break;
+ }
+}
+
+static void aa_get_common_ref(struct aa_common_ref *ref)
+{
+ kref_get(&ref->count);
+}
+
+static void aafs_evict(struct inode *inode)
+{
+ struct aa_common_ref *ref = inode->i_private;
+
+ clear_inode(inode);
+ aa_put_common_ref(ref);
+ inode->i_private = (void *) IREF_POISON;
+}
+
static void aafs_free_inode(struct inode *inode)
{
if (S_ISLNK(inode->i_mode))
@@ -162,6 +229,7 @@ static void aafs_free_inode(struct inode *inode)

static const struct super_operations aafs_super_ops = {
.statfs = simple_statfs,
+ .evict_inode = aafs_evict,
.free_inode = aafs_free_inode,
.show_path = aafs_show_path,
};
@@ -262,7 +330,8 @@ static int __aafs_setup_d_inode(struct inode *dir, struct dentry *dentry,
* aafs_remove(). Will return ERR_PTR on failure.
*/
static struct dentry *aafs_create(const char *name, umode_t mode,
- struct dentry *parent, void *data, void *link,
+ struct dentry *parent,
+ struct aa_common_ref *data, void *link,
const struct file_operations *fops,
const struct inode_operations *iops)
{
@@ -299,6 +368,9 @@ static struct dentry *aafs_create(const char *name, umode_t mode,
goto fail_dentry;
inode_unlock(dir);

+ if (data)
+ aa_get_common_ref(data);
+
return dentry;

fail_dentry:
@@ -323,7 +395,8 @@ static struct dentry *aafs_create(const char *name, umode_t mode,
* see aafs_create
*/
static struct dentry *aafs_create_file(const char *name, umode_t mode,
- struct dentry *parent, void *data,
+ struct dentry *parent,
+ struct aa_common_ref *data,
const struct file_operations *fops)
{
return aafs_create(name, mode, parent, data, NULL, fops, NULL);
@@ -404,7 +477,8 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf,

data->size = copy_size;
if (copy_from_user(data->data, userbuf, copy_size)) {
- aa_put_loaddata(data);
+ /* trigger free - don't need to put pcount */
+ aa_put_i_loaddata(data);
return ERR_PTR(-EFAULT);
}

@@ -412,7 +486,8 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf,
}

static ssize_t policy_update(u32 mask, const char __user *buf, size_t size,
- loff_t *pos, struct aa_ns *ns)
+ loff_t *pos, struct aa_ns *ns,
+ const struct cred *ocred)
{
struct aa_loaddata *data;
struct aa_label *label;
@@ -423,7 +498,7 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size,
/* high level check about policy management - fine grained in
* below after unpack
*/
- error = aa_may_manage_policy(current_cred(), label, ns, mask);
+ error = aa_may_manage_policy(current_cred(), label, ns, ocred, mask);
if (error)
goto end_section;

@@ -431,7 +506,10 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size,
error = PTR_ERR(data);
if (!IS_ERR(data)) {
error = aa_replace_profiles(ns, label, mask, data);
- aa_put_loaddata(data);
+ /* put pcount, which will put count and free if no
+ * profiles referencing it.
+ */
+ aa_put_profile_loaddata(data);
}
end_section:
end_current_label_crit_section(label);
@@ -443,8 +521,9 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size,
static ssize_t profile_load(struct file *f, const char __user *buf, size_t size,
loff_t *pos)
{
- struct aa_ns *ns = aa_get_ns(f->f_inode->i_private);
- int error = policy_update(AA_MAY_LOAD_POLICY, buf, size, pos, ns);
+ struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private);
+ int error = policy_update(AA_MAY_LOAD_POLICY, buf, size, pos, ns,
+ f->f_cred);

aa_put_ns(ns);

@@ -460,9 +539,9 @@ static const struct file_operations aa_fs_profile_load = {
static ssize_t profile_replace(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
- struct aa_ns *ns = aa_get_ns(f->f_inode->i_private);
+ struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private);
int error = policy_update(AA_MAY_LOAD_POLICY | AA_MAY_REPLACE_POLICY,
- buf, size, pos, ns);
+ buf, size, pos, ns, f->f_cred);
aa_put_ns(ns);

return error;
@@ -480,14 +559,14 @@ static ssize_t profile_remove(struct file *f, const char __user *buf,
struct aa_loaddata *data;
struct aa_label *label;
ssize_t error;
- struct aa_ns *ns = aa_get_ns(f->f_inode->i_private);
+ struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private);

label = begin_current_label_crit_section();
/* high level check about policy management - fine grained in
* below after unpack
*/
error = aa_may_manage_policy(current_cred(), label, ns,
- AA_MAY_REMOVE_POLICY);
+ f->f_cred, AA_MAY_REMOVE_POLICY);
if (error)
goto out;

@@ -501,7 +580,7 @@ static ssize_t profile_remove(struct file *f, const char __user *buf,
if (!IS_ERR(data)) {
data->data[size] = 0;
error = aa_remove_profiles(ns, label, data->data, size);
- aa_put_loaddata(data);
+ aa_put_profile_loaddata(data);
}
out:
end_current_label_crit_section(label);
@@ -570,7 +649,7 @@ static int ns_revision_open(struct inode *inode, struct file *file)
if (!rev)
return -ENOMEM;

- rev->ns = aa_get_ns(inode->i_private);
+ rev->ns = get_ns_common_ref(inode->i_private);
if (!rev->ns)
rev->ns = aa_get_current_ns();
file->private_data = rev;
@@ -1048,7 +1127,7 @@ static const struct file_operations seq_profile_ ##NAME ##_fops = { \
static int seq_profile_open(struct inode *inode, struct file *file,
int (*show)(struct seq_file *, void *))
{
- struct aa_proxy *proxy = aa_get_proxy(inode->i_private);
+ struct aa_proxy *proxy = get_proxy_common_ref(inode->i_private);
int error = single_open(file, show, proxy);

if (error) {
@@ -1240,18 +1319,17 @@ static const struct file_operations seq_rawdata_ ##NAME ##_fops = { \
static int seq_rawdata_open(struct inode *inode, struct file *file,
int (*show)(struct seq_file *, void *))
{
- struct aa_loaddata *data = __aa_get_loaddata(inode->i_private);
+ struct aa_loaddata *data = get_loaddata_common_ref(inode->i_private);
int error;

if (!data)
- /* lost race this ent is being reaped */
return -ENOENT;

error = single_open(file, show, data);
if (error) {
AA_BUG(file->private_data &&
((struct seq_file *)file->private_data)->private);
- aa_put_loaddata(data);
+ aa_put_i_loaddata(data);
}

return error;
@@ -1262,7 +1340,7 @@ static int seq_rawdata_release(struct inode *inode, struct file *file)
struct seq_file *seq = (struct seq_file *) file->private_data;

if (seq)
- aa_put_loaddata(seq->private);
+ aa_put_i_loaddata(seq->private);

return single_release(inode, file);
}
@@ -1374,9 +1452,8 @@ static int rawdata_open(struct inode *inode, struct file *file)
if (!aa_current_policy_view_capable(NULL))
return -EACCES;

- loaddata = __aa_get_loaddata(inode->i_private);
+ loaddata = get_loaddata_common_ref(inode->i_private);
if (!loaddata)
- /* lost race: this entry is being reaped */
return -ENOENT;

private = rawdata_f_data_alloc(loaddata->size);
@@ -1401,7 +1478,7 @@ static int rawdata_open(struct inode *inode, struct file *file)
return error;

fail_private_alloc:
- aa_put_loaddata(loaddata);
+ aa_put_i_loaddata(loaddata);
return error;
}

@@ -1418,7 +1495,6 @@ static void remove_rawdata_dents(struct aa_loaddata *rawdata)

for (i = 0; i < AAFS_LOADDATA_NDENTS; i++) {
if (!IS_ERR_OR_NULL(rawdata->dents[i])) {
- /* no refcounts on i_private */
aafs_remove(rawdata->dents[i]);
rawdata->dents[i] = NULL;
}
@@ -1461,35 +1537,37 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata)
return PTR_ERR(dir);
rawdata->dents[AAFS_LOADDATA_DIR] = dir;

- dent = aafs_create_file("abi", S_IFREG | 0444, dir, rawdata,
+ dent = aafs_create_file("abi", S_IFREG | 0444, dir, &rawdata->count,
&seq_rawdata_abi_fops);
if (IS_ERR(dent))
goto fail;
rawdata->dents[AAFS_LOADDATA_ABI] = dent;

- dent = aafs_create_file("revision", S_IFREG | 0444, dir, rawdata,
- &seq_rawdata_revision_fops);
+ dent = aafs_create_file("revision", S_IFREG | 0444, dir,
+ &rawdata->count,
+ &seq_rawdata_revision_fops);
if (IS_ERR(dent))
goto fail;
rawdata->dents[AAFS_LOADDATA_REVISION] = dent;

if (aa_g_hash_policy) {
dent = aafs_create_file("sha256", S_IFREG | 0444, dir,
- rawdata, &seq_rawdata_hash_fops);
+ &rawdata->count,
+ &seq_rawdata_hash_fops);
if (IS_ERR(dent))
goto fail;
rawdata->dents[AAFS_LOADDATA_HASH] = dent;
}

dent = aafs_create_file("compressed_size", S_IFREG | 0444, dir,
- rawdata,
+ &rawdata->count,
&seq_rawdata_compressed_size_fops);
if (IS_ERR(dent))
goto fail;
rawdata->dents[AAFS_LOADDATA_COMPRESSED_SIZE] = dent;

- dent = aafs_create_file("raw_data", S_IFREG | 0444,
- dir, rawdata, &rawdata_fops);
+ dent = aafs_create_file("raw_data", S_IFREG | 0444, dir,
+ &rawdata->count, &rawdata_fops);
if (IS_ERR(dent))
goto fail;
rawdata->dents[AAFS_LOADDATA_DATA] = dent;
@@ -1497,13 +1575,11 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata)

rawdata->ns = aa_get_ns(ns);
list_add(&rawdata->list, &ns->rawdata_list);
- /* no refcount on inode rawdata */

return 0;

fail:
remove_rawdata_dents(rawdata);
-
return PTR_ERR(dent);
}
#endif /* CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */
@@ -1527,13 +1603,10 @@ void __aafs_profile_rmdir(struct aa_profile *profile)
__aafs_profile_rmdir(child);

for (i = AAFS_PROF_SIZEOF - 1; i >= 0; --i) {
- struct aa_proxy *proxy;
if (!profile->dents[i])
continue;

- proxy = d_inode(profile->dents[i])->i_private;
aafs_remove(profile->dents[i]);
- aa_put_proxy(proxy);
profile->dents[i] = NULL;
}
}
@@ -1567,14 +1640,7 @@ static struct dentry *create_profile_file(struct dentry *dir, const char *name,
struct aa_profile *profile,
const struct file_operations *fops)
{
- struct aa_proxy *proxy = aa_get_proxy(profile->label.proxy);
- struct dentry *dent;
-
- dent = aafs_create_file(name, S_IFREG | 0444, dir, proxy, fops);
- if (IS_ERR(dent))
- aa_put_proxy(proxy);
-
- return dent;
+ return aafs_create_file(name, S_IFREG | 0444, dir, &profile->label.proxy->count, fops);
}

#ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY
@@ -1620,7 +1686,8 @@ static const char *rawdata_get_link_base(struct dentry *dentry,
struct delayed_call *done,
const char *name)
{
- struct aa_proxy *proxy = inode->i_private;
+ struct aa_common_ref *ref = inode->i_private;
+ struct aa_proxy *proxy = container_of(ref, struct aa_proxy, count);
struct aa_label *label;
struct aa_profile *profile;
char *target;
@@ -1762,27 +1829,24 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
if (profile->rawdata) {
if (aa_g_hash_policy) {
dent = aafs_create("raw_sha256", S_IFLNK | 0444, dir,
- profile->label.proxy, NULL, NULL,
- &rawdata_link_sha256_iops);
+ &profile->label.proxy->count, NULL,
+ NULL, &rawdata_link_sha256_iops);
if (IS_ERR(dent))
goto fail;
- aa_get_proxy(profile->label.proxy);
profile->dents[AAFS_PROF_RAW_HASH] = dent;
}
dent = aafs_create("raw_abi", S_IFLNK | 0444, dir,
- profile->label.proxy, NULL, NULL,
+ &profile->label.proxy->count, NULL, NULL,
&rawdata_link_abi_iops);
if (IS_ERR(dent))
goto fail;
- aa_get_proxy(profile->label.proxy);
profile->dents[AAFS_PROF_RAW_ABI] = dent;

dent = aafs_create("raw_data", S_IFLNK | 0444, dir,
- profile->label.proxy, NULL, NULL,
+ &profile->label.proxy->count, NULL, NULL,
&rawdata_link_data_iops);
if (IS_ERR(dent))
goto fail;
- aa_get_proxy(profile->label.proxy);
profile->dents[AAFS_PROF_RAW_DATA] = dent;
}
#endif /*CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */
@@ -1813,13 +1877,13 @@ static int ns_mkdir_op(struct mnt_idmap *idmap, struct inode *dir,
int error;

label = begin_current_label_crit_section();
- error = aa_may_manage_policy(current_cred(), label, NULL,
+ error = aa_may_manage_policy(current_cred(), label, NULL, NULL,
AA_MAY_LOAD_POLICY);
end_current_label_crit_section(label);
if (error)
return error;

- parent = aa_get_ns(dir->i_private);
+ parent = get_ns_common_ref(dir->i_private);
AA_BUG(d_inode(ns_subns_dir(parent)) != dir);

/* we have to unlock and then relock to get locking order right
@@ -1863,13 +1927,13 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry)
int error;

label = begin_current_label_crit_section();
- error = aa_may_manage_policy(current_cred(), label, NULL,
+ error = aa_may_manage_policy(current_cred(), label, NULL, NULL,
AA_MAY_LOAD_POLICY);
end_current_label_crit_section(label);
if (error)
return error;

- parent = aa_get_ns(dir->i_private);
+ parent = get_ns_common_ref(dir->i_private);
/* rmdir calls the generic securityfs functions to remove files
* from the apparmor dir. It is up to the apparmor ns locking
* to avoid races.
@@ -1939,27 +2003,6 @@ void __aafs_ns_rmdir(struct aa_ns *ns)

__aa_fs_list_remove_rawdata(ns);

- if (ns_subns_dir(ns)) {
- sub = d_inode(ns_subns_dir(ns))->i_private;
- aa_put_ns(sub);
- }
- if (ns_subload(ns)) {
- sub = d_inode(ns_subload(ns))->i_private;
- aa_put_ns(sub);
- }
- if (ns_subreplace(ns)) {
- sub = d_inode(ns_subreplace(ns))->i_private;
- aa_put_ns(sub);
- }
- if (ns_subremove(ns)) {
- sub = d_inode(ns_subremove(ns))->i_private;
- aa_put_ns(sub);
- }
- if (ns_subrevision(ns)) {
- sub = d_inode(ns_subrevision(ns))->i_private;
- aa_put_ns(sub);
- }
-
for (i = AAFS_NS_SIZEOF - 1; i >= 0; --i) {
aafs_remove(ns->dents[i]);
ns->dents[i] = NULL;
@@ -1984,40 +2027,40 @@ static int __aafs_ns_mkdir_entries(struct aa_ns *ns, struct dentry *dir)
return PTR_ERR(dent);
ns_subdata_dir(ns) = dent;

- dent = aafs_create_file("revision", 0444, dir, ns,
+ dent = aafs_create_file("revision", 0444, dir,
+ &ns->unconfined->label.count,
&aa_fs_ns_revision_fops);
if (IS_ERR(dent))
return PTR_ERR(dent);
- aa_get_ns(ns);
ns_subrevision(ns) = dent;

- dent = aafs_create_file(".load", 0640, dir, ns,
- &aa_fs_profile_load);
+ dent = aafs_create_file(".load", 0640, dir,
+ &ns->unconfined->label.count,
+ &aa_fs_profile_load);
if (IS_ERR(dent))
return PTR_ERR(dent);
- aa_get_ns(ns);
ns_subload(ns) = dent;

- dent = aafs_create_file(".replace", 0640, dir, ns,
- &aa_fs_profile_replace);
+ dent = aafs_create_file(".replace", 0640, dir,
+ &ns->unconfined->label.count,
+ &aa_fs_profile_replace);
if (IS_ERR(dent))
return PTR_ERR(dent);
- aa_get_ns(ns);
ns_subreplace(ns) = dent;

- dent = aafs_create_file(".remove", 0640, dir, ns,
- &aa_fs_profile_remove);
+ dent = aafs_create_file(".remove", 0640, dir,
+ &ns->unconfined->label.count,
+ &aa_fs_profile_remove);
if (IS_ERR(dent))
return PTR_ERR(dent);
- aa_get_ns(ns);
ns_subremove(ns) = dent;

/* use create_dentry so we can supply private data */
- dent = aafs_create("namespaces", S_IFDIR | 0755, dir, ns, NULL, NULL,
- &ns_dir_inode_operations);
+ dent = aafs_create("namespaces", S_IFDIR | 0755, dir,
+ &ns->unconfined->label.count,
+ NULL, NULL, &ns_dir_inode_operations);
if (IS_ERR(dent))
return PTR_ERR(dent);
- aa_get_ns(ns);
ns_subns_dir(ns) = dent;

return 0;
diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h
index 2a72e6b17d68..5aca0f612f8f 100644
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -101,7 +101,7 @@ enum label_flags {

struct aa_label;
struct aa_proxy {
- struct kref count;
+ struct aa_common_ref count;
struct aa_label __rcu *label;
};

@@ -121,7 +121,7 @@ struct label_it {
* @ent: set of profiles for label, actual size determined by @size
*/
struct aa_label {
- struct kref count;
+ struct aa_common_ref count;
struct rb_node node;
struct rcu_head rcu;
struct aa_proxy *proxy;
@@ -373,7 +373,7 @@ int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules,
*/
static inline struct aa_label *__aa_get_label(struct aa_label *l)
{
- if (l && kref_get_unless_zero(&l->count))
+ if (l && kref_get_unless_zero(&l->count.count))
return l;

return NULL;
@@ -382,7 +382,7 @@ static inline struct aa_label *__aa_get_label(struct aa_label *l)
static inline struct aa_label *aa_get_label(struct aa_label *l)
{
if (l)
- kref_get(&(l->count));
+ kref_get(&(l->count.count));

return l;
}
@@ -402,7 +402,7 @@ static inline struct aa_label *aa_get_label_rcu(struct aa_label __rcu **l)
rcu_read_lock();
do {
c = rcu_dereference(*l);
- } while (c && !kref_get_unless_zero(&c->count));
+ } while (c && !kref_get_unless_zero(&c->count.count));
rcu_read_unlock();

return c;
@@ -442,7 +442,7 @@ static inline struct aa_label *aa_get_newest_label(struct aa_label *l)
static inline void aa_put_label(struct aa_label *l)
{
if (l)
- kref_put(&l->count, aa_label_kref);
+ kref_put(&l->count.count, aa_label_kref);
}

@@ -452,7 +452,7 @@ void aa_proxy_kref(struct kref *kref);
static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy)
{
if (proxy)
- kref_get(&(proxy->count));
+ kref_get(&(proxy->count.count));

return proxy;
}
@@ -460,7 +460,7 @@ static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy)
static inline void aa_put_proxy(struct aa_proxy *proxy)
{
if (proxy)
- kref_put(&proxy->count, aa_proxy_kref);
+ kref_put(&proxy->count.count, aa_proxy_kref);
}

void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new);
diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h
index 1ec00113a056..e76470c8b84c 100644
--- a/security/apparmor/include/lib.h
+++ b/security/apparmor/include/lib.h
@@ -71,6 +71,18 @@ void aa_info_message(const char *str);
/* Security blob offsets */
extern struct lsm_blob_sizes apparmor_blob_sizes;

+enum reftype {
+ REF_NS,
+ REF_PROXY,
+ REF_RAWDATA,
+};
+
+/* common reference count used by data the shows up in aafs */
+struct aa_common_ref {
+ struct kref count;
+ enum reftype reftype;
+};
+
/**
* aa_strneq - compare null terminated @str to a non null terminated substring
* @str: a null terminated string
diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h
index dfc93631b43d..14c0401f97c1 100644
--- a/security/apparmor/include/match.h
+++ b/security/apparmor/include/match.h
@@ -183,6 +183,7 @@ static inline void aa_put_dfa(struct aa_dfa *dfa)
#define MATCH_FLAG_DIFF_ENCODE 0x80000000
#define MARK_DIFF_ENCODE 0x40000000
#define MATCH_FLAG_OOB_TRANSITION 0x20000000
+#define MARK_DIFF_ENCODE_VERIFIED 0x10000000
#define MATCH_FLAGS_MASK 0xff000000
#define MATCH_FLAGS_VALID (MATCH_FLAG_DIFF_ENCODE | MATCH_FLAG_OOB_TRANSITION)
#define MATCH_FLAGS_INVALID (MATCH_FLAGS_MASK & ~MATCH_FLAGS_VALID)
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 75088cc310b6..dbd6af6034f1 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -329,7 +329,7 @@ static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head,
static inline struct aa_profile *aa_get_profile(struct aa_profile *p)
{
if (p)
- kref_get(&(p->label.count));
+ kref_get(&(p->label.count.count));

return p;
}
@@ -343,7 +343,7 @@ static inline struct aa_profile *aa_get_profile(struct aa_profile *p)
*/
static inline struct aa_profile *aa_get_profile_not0(struct aa_profile *p)
{
- if (p && kref_get_unless_zero(&p->label.count))
+ if (p && kref_get_unless_zero(&p->label.count.count))
return p;

return NULL;
@@ -363,7 +363,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p)
rcu_read_lock();
do {
c = rcu_dereference(*p);
- } while (c && !kref_get_unless_zero(&c->label.count));
+ } while (c && !kref_get_unless_zero(&c->label.count.count));
rcu_read_unlock();

return c;
@@ -376,7 +376,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p)
static inline void aa_put_profile(struct aa_profile *p)
{
if (p)
- kref_put(&p->label.count, aa_label_kref);
+ kref_put(&p->label.count.count, aa_label_kref);
}

static inline int AUDIT_MODE(struct aa_profile *profile)
@@ -393,7 +393,7 @@ bool aa_policy_admin_capable(const struct cred *subj_cred,
struct aa_label *label, struct aa_ns *ns);
int aa_may_manage_policy(const struct cred *subj_cred,
struct aa_label *label, struct aa_ns *ns,
- u32 mask);
+ const struct cred *ocred, u32 mask);
bool aa_current_policy_view_capable(struct aa_ns *ns);
bool aa_current_policy_admin_capable(struct aa_ns *ns);

diff --git a/security/apparmor/include/policy_ns.h b/security/apparmor/include/policy_ns.h
index d646070fd966..cc6e84151812 100644
--- a/security/apparmor/include/policy_ns.h
+++ b/security/apparmor/include/policy_ns.h
@@ -18,6 +18,8 @@
#include "label.h"
#include "policy.h"

+/* Match max depth of user namespaces */
+#define MAX_NS_DEPTH 32

/* struct aa_ns_acct - accounting of profiles in namespace
* @max_size: maximum space allowed for all profiles in namespace
diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h
index a6f4611ee50c..e5a95dc4da1f 100644
--- a/security/apparmor/include/policy_unpack.h
+++ b/security/apparmor/include/policy_unpack.h
@@ -87,17 +87,29 @@ struct aa_ext {
u32 version;
};

-/*
- * struct aa_loaddata - buffer of policy raw_data set
+/* struct aa_loaddata - buffer of policy raw_data set
+ * @count: inode/filesystem refcount - use aa_get_i_loaddata()
+ * @pcount: profile refcount - use aa_get_profile_loaddata()
+ * @list: list the loaddata is on
+ * @work: used to do a delayed cleanup
+ * @dents: refs to dents created in aafs
+ * @ns: the namespace this loaddata was loaded into
+ * @name:
+ * @size: the size of the data that was loaded
+ * @compressed_size: the size of the data when it is compressed
+ * @revision: unique revision count that this data was loaded as
+ * @abi: the abi number the loaddata uses
+ * @hash: a hash of the loaddata, used to help dedup data
*
- * there is no loaddata ref for being on ns list, nor a ref from
- * d_inode(@dentry) when grab a ref from these, @ns->lock must be held
- * && __aa_get_loaddata() needs to be used, and the return value
- * checked, if NULL the loaddata is already being reaped and should be
- * considered dead.
+ * There is no loaddata ref for being on ns->rawdata_list, so
+ * @ns->lock must be held when walking the list. Dentries and
+ * inode opens hold refs on @count; profiles hold refs on @pcount.
+ * When the last @pcount drops, do_ploaddata_rmfs() removes the
+ * fs entries and drops the associated @count ref.
*/
struct aa_loaddata {
- struct kref count;
+ struct aa_common_ref count;
+ struct kref pcount;
struct list_head list;
struct work_struct work;
struct dentry *dents[AAFS_LOADDATA_NDENTS];
@@ -119,50 +131,53 @@ struct aa_loaddata {
int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, const char **ns);

/**
- * __aa_get_loaddata - get a reference count to uncounted data reference
+ * aa_get_loaddata - get a reference count from a counted data reference
* @data: reference to get a count on
*
- * Returns: pointer to reference OR NULL if race is lost and reference is
- * being repeated.
- * Requires: @data->ns->lock held, and the return code MUST be checked
- *
- * Use only from inode->i_private and @data->list found references
+ * Returns: pointer to reference
+ * Requires: @data to have a valid reference count on it. It is a bug
+ * if the race to reap can be encountered when it is used.
*/
static inline struct aa_loaddata *
-__aa_get_loaddata(struct aa_loaddata *data)
+aa_get_i_loaddata(struct aa_loaddata *data)
{
- if (data && kref_get_unless_zero(&(data->count)))
- return data;

- return NULL;
+ if (data)
+ kref_get(&(data->count.count));
+ return data;
}

+
/**
- * aa_get_loaddata - get a reference count from a counted data reference
+ * aa_get_profile_loaddata - get a profile reference count on loaddata
* @data: reference to get a count on
*
- * Returns: point to reference
- * Requires: @data to have a valid reference count on it. It is a bug
- * if the race to reap can be encountered when it is used.
+ * Returns: pointer to reference
+ * Requires: @data to have a valid reference count on it.
*/
static inline struct aa_loaddata *
-aa_get_loaddata(struct aa_loaddata *data)
+aa_get_profile_loaddata(struct aa_loaddata *data)
{
- struct aa_loaddata *tmp = __aa_get_loaddata(data);
-
- AA_BUG(data && !tmp);
-
- return tmp;
+ if (data)
+ kref_get(&(data->pcount));
+ return data;
}

void __aa_loaddata_update(struct aa_loaddata *data, long revision);
bool aa_rawdata_eq(struct aa_loaddata *l, struct aa_loaddata *r);
void aa_loaddata_kref(struct kref *kref);
+void aa_ploaddata_kref(struct kref *kref);
struct aa_loaddata *aa_loaddata_alloc(size_t size);
-static inline void aa_put_loaddata(struct aa_loaddata *data)
+static inline void aa_put_i_loaddata(struct aa_loaddata *data)
+{
+ if (data)
+ kref_put(&data->count.count, aa_loaddata_kref);
+}
+
+static inline void aa_put_profile_loaddata(struct aa_loaddata *data)
{
if (data)
- kref_put(&data->count, aa_loaddata_kref);
+ kref_put(&data->pcount, aa_ploaddata_kref);
}

#if IS_ENABLED(CONFIG_KUNIT)
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index af25ca6b6b83..3bcc45437b44 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -52,7 +52,8 @@ static void free_proxy(struct aa_proxy *proxy)

void aa_proxy_kref(struct kref *kref)
{
- struct aa_proxy *proxy = container_of(kref, struct aa_proxy, count);
+ struct aa_proxy *proxy = container_of(kref, struct aa_proxy,
+ count.count);

free_proxy(proxy);
}
@@ -63,7 +64,8 @@ struct aa_proxy *aa_alloc_proxy(struct aa_label *label, gfp_t gfp)

new = kzalloc(sizeof(struct aa_proxy), gfp);
if (new) {
- kref_init(&new->count);
+ kref_init(&new->count.count);
+ new->count.reftype = REF_PROXY;
rcu_assign_pointer(new->label, aa_get_label(label));
}
return new;
@@ -371,7 +373,8 @@ static void label_free_rcu(struct rcu_head *head)

void aa_label_kref(struct kref *kref)
{
- struct aa_label *label = container_of(kref, struct aa_label, count);
+ struct aa_label *label = container_of(kref, struct aa_label,
+ count.count);
struct aa_ns *ns = labels_ns(label);

if (!ns) {
@@ -408,7 +411,8 @@ bool aa_label_init(struct aa_label *label, int size, gfp_t gfp)

label->size = size; /* doesn't include null */
label->vec[size] = NULL; /* null terminate */
- kref_init(&label->count);
+ kref_init(&label->count.count);
+ label->count.reftype = REF_NS; /* for aafs purposes */
RB_CLEAR_NODE(&label->node);

return true;
diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index fae26953619a..4e3ada0e7461 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -160,9 +160,10 @@ static int verify_dfa(struct aa_dfa *dfa)
if (state_count == 0)
goto out;
for (i = 0; i < state_count; i++) {
- if (!(BASE_TABLE(dfa)[i] & MATCH_FLAG_DIFF_ENCODE) &&
- (DEFAULT_TABLE(dfa)[i] >= state_count))
+ if (DEFAULT_TABLE(dfa)[i] >= state_count) {
+ pr_err("AppArmor DFA default state out of bounds");
goto out;
+ }
if (BASE_TABLE(dfa)[i] & MATCH_FLAGS_INVALID) {
pr_err("AppArmor DFA state with invalid match flags");
goto out;
@@ -201,16 +202,31 @@ static int verify_dfa(struct aa_dfa *dfa)
size_t j, k;

for (j = i;
- (BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE) &&
- !(BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE);
+ ((BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE) &&
+ !(BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE_VERIFIED));
j = k) {
+ if (BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE)
+ /* loop in current chain */
+ goto out;
k = DEFAULT_TABLE(dfa)[j];
if (j == k)
+ /* self loop */
goto out;
- if (k < j)
- break; /* already verified */
BASE_TABLE(dfa)[j] |= MARK_DIFF_ENCODE;
}
+ /* move mark to verified */
+ for (j = i;
+ (BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE);
+ j = k) {
+ k = DEFAULT_TABLE(dfa)[j];
+ if (j < i)
+ /* jumps to state/chain that has been
+ * verified
+ */
+ break;
+ BASE_TABLE(dfa)[j] &= ~MARK_DIFF_ENCODE;
+ BASE_TABLE(dfa)[j] |= MARK_DIFF_ENCODE_VERIFIED;
+ }
}
error = 0;

@@ -408,13 +424,18 @@ aa_state_t aa_dfa_match_len(struct aa_dfa *dfa, aa_state_t start,
if (dfa->tables[YYTD_ID_EC]) {
/* Equivalence class table defined */
u8 *equiv = EQUIV_TABLE(dfa);
- for (; len; len--)
- match_char(state, def, base, next, check,
- equiv[(u8) *str++]);
+ for (; len; len--) {
+ u8 c = equiv[(u8) *str];
+
+ match_char(state, def, base, next, check, c);
+ str++;
+ }
} else {
/* default is direct to next state */
- for (; len; len--)
- match_char(state, def, base, next, check, (u8) *str++);
+ for (; len; len--) {
+ match_char(state, def, base, next, check, (u8) *str);
+ str++;
+ }
}

return state;
@@ -448,13 +469,18 @@ aa_state_t aa_dfa_match(struct aa_dfa *dfa, aa_state_t start, const char *str)
/* Equivalence class table defined */
u8 *equiv = EQUIV_TABLE(dfa);
/* default is direct to next state */
- while (*str)
- match_char(state, def, base, next, check,
- equiv[(u8) *str++]);
+ while (*str) {
+ u8 c = equiv[(u8) *str];
+
+ match_char(state, def, base, next, check, c);
+ str++;
+ }
} else {
/* default is direct to next state */
- while (*str)
- match_char(state, def, base, next, check, (u8) *str++);
+ while (*str) {
+ match_char(state, def, base, next, check, (u8) *str);
+ str++;
+ }
}

return state;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 105706abf281..b5ae0314b384 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -184,19 +184,43 @@ static void __list_remove_profile(struct aa_profile *profile)
}

/**
- * __remove_profile - remove old profile, and children
- * @profile: profile to be replaced (NOT NULL)
+ * __remove_profile - remove profile, and children
+ * @profile: profile to be removed (NOT NULL)
*
* Requires: namespace list lock be held, or list not be shared
*/
static void __remove_profile(struct aa_profile *profile)
{
+ struct aa_profile *curr, *to_remove;
+
AA_BUG(!profile);
AA_BUG(!profile->ns);
AA_BUG(!mutex_is_locked(&profile->ns->lock));

/* release any children lists first */
- __aa_profile_list_release(&profile->base.profiles);
+ if (!list_empty(&profile->base.profiles)) {
+ curr = list_first_entry(&profile->base.profiles, struct aa_profile, base.list);
+
+ while (curr != profile) {
+
+ while (!list_empty(&curr->base.profiles))
+ curr = list_first_entry(&curr->base.profiles,
+ struct aa_profile, base.list);
+
+ to_remove = curr;
+ if (!list_is_last(&to_remove->base.list,
+ &aa_deref_parent(curr)->base.profiles))
+ curr = list_next_entry(to_remove, base.list);
+ else
+ curr = aa_deref_parent(curr);
+
+ /* released by free_profile */
+ aa_label_remove(&to_remove->label);
+ __aafs_profile_rmdir(to_remove);
+ __list_remove_profile(to_remove);
+ }
+ }
+
/* released by free_profile */
aa_label_remove(&profile->label);
__aafs_profile_rmdir(profile);
@@ -314,7 +338,7 @@ void aa_free_profile(struct aa_profile *profile)
}

kfree_sensitive(profile->hash);
- aa_put_loaddata(profile->rawdata);
+ aa_put_profile_loaddata(profile->rawdata);
aa_label_destroy(&profile->label);

kfree_sensitive(profile);
@@ -870,17 +894,44 @@ bool aa_current_policy_admin_capable(struct aa_ns *ns)
return res;
}

+static bool is_subset_of_obj_privilege(const struct cred *cred,
+ struct aa_label *label,
+ const struct cred *ocred)
+{
+ if (cred == ocred)
+ return true;
+
+ if (!aa_label_is_subset(label, cred_label(ocred)))
+ return false;
+ /* don't allow crossing userns for now */
+ if (cred->user_ns != ocred->user_ns)
+ return false;
+ if (!cap_issubset(cred->cap_inheritable, ocred->cap_inheritable))
+ return false;
+ if (!cap_issubset(cred->cap_permitted, ocred->cap_permitted))
+ return false;
+ if (!cap_issubset(cred->cap_effective, ocred->cap_effective))
+ return false;
+ if (!cap_issubset(cred->cap_bset, ocred->cap_bset))
+ return false;
+ if (!cap_issubset(cred->cap_ambient, ocred->cap_ambient))
+ return false;
+ return true;
+}
+
+
/**
* aa_may_manage_policy - can the current task manage policy
* @subj_cred: subjects cred
* @label: label to check if it can manage policy
* @ns: namespace being managed by @label (may be NULL if @label's ns)
+ * @ocred: object cred if request is coming from an open object
* @mask: contains the policy manipulation operation being done
*
* Returns: 0 if the task is allowed to manipulate policy else error
*/
int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label,
- struct aa_ns *ns, u32 mask)
+ struct aa_ns *ns, const struct cred *ocred, u32 mask)
{
const char *op;

@@ -896,6 +947,11 @@ int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label,
return audit_policy(label, op, NULL, NULL, "policy_locked",
-EACCES);

+ if (ocred && !is_subset_of_obj_privilege(subj_cred, label, ocred))
+ return audit_policy(label, op, NULL, NULL,
+ "not privileged for target profile",
+ -EACCES);
+
if (!aa_policy_admin_capable(subj_cred, label, ns))
return audit_policy(label, op, NULL, NULL, "not policy admin",
-EACCES);
@@ -1067,7 +1123,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,
LIST_HEAD(lh);

op = mask & AA_MAY_REPLACE_POLICY ? OP_PROF_REPL : OP_PROF_LOAD;
- aa_get_loaddata(udata);
+ aa_get_profile_loaddata(udata);
/* released below */
error = aa_unpack(udata, &lh, &ns_name);
if (error)
@@ -1094,6 +1150,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,
goto fail;
}
ns_name = ent->ns_name;
+ ent->ns_name = NULL;
} else
count++;
}
@@ -1118,10 +1175,10 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,
if (aa_rawdata_eq(rawdata_ent, udata)) {
struct aa_loaddata *tmp;

- tmp = __aa_get_loaddata(rawdata_ent);
+ tmp = aa_get_profile_loaddata(rawdata_ent);
/* check we didn't fail the race */
if (tmp) {
- aa_put_loaddata(udata);
+ aa_put_profile_loaddata(udata);
udata = tmp;
break;
}
@@ -1134,7 +1191,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,
struct aa_profile *p;

if (aa_g_export_binary)
- ent->new->rawdata = aa_get_loaddata(udata);
+ ent->new->rawdata = aa_get_profile_loaddata(udata);
error = __lookup_replace(ns, ent->new->base.hname,
!(mask & AA_MAY_REPLACE_POLICY),
&ent->old, &info);
@@ -1267,7 +1324,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,

out:
aa_put_ns(ns);
- aa_put_loaddata(udata);
+ aa_put_profile_loaddata(udata);
kfree(ns_name);

if (error)
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index 1f02cfe1d974..06c0bde97a63 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -223,6 +223,8 @@ static struct aa_ns *__aa_create_ns(struct aa_ns *parent, const char *name,
AA_BUG(!name);
AA_BUG(!mutex_is_locked(&parent->lock));

+ if (parent->level > MAX_NS_DEPTH)
+ return ERR_PTR(-ENOSPC);
ns = alloc_ns(parent->base.hname, name);
if (!ns)
return ERR_PTR(-ENOMEM);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 8f7d6ff5aef6..46420587bcd5 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -108,34 +108,48 @@ bool aa_rawdata_eq(struct aa_loaddata *l, struct aa_loaddata *r)
return memcmp(l->data, r->data, r->compressed_size ?: r->size) == 0;
}

+static void do_loaddata_free(struct aa_loaddata *d)
+{
+ kfree_sensitive(d->hash);
+ kfree_sensitive(d->name);
+ kvfree(d->data);
+ kfree_sensitive(d);
+}
+
+void aa_loaddata_kref(struct kref *kref)
+{
+ struct aa_loaddata *d = container_of(kref, struct aa_loaddata,
+ count.count);
+
+ do_loaddata_free(d);
+}
+
/*
* need to take the ns mutex lock which is NOT safe most places that
* put_loaddata is called, so we have to delay freeing it
*/
-static void do_loaddata_free(struct work_struct *work)
+static void do_ploaddata_rmfs(struct work_struct *work)
{
struct aa_loaddata *d = container_of(work, struct aa_loaddata, work);
struct aa_ns *ns = aa_get_ns(d->ns);

if (ns) {
mutex_lock_nested(&ns->lock, ns->level);
+ /* remove fs ref to loaddata */
__aa_fs_remove_rawdata(d);
mutex_unlock(&ns->lock);
aa_put_ns(ns);
}
-
- kfree_sensitive(d->hash);
- kfree_sensitive(d->name);
- kvfree(d->data);
- kfree_sensitive(d);
+ /* called by dropping last pcount, so drop its associated icount */
+ aa_put_i_loaddata(d);
}

-void aa_loaddata_kref(struct kref *kref)
+void aa_ploaddata_kref(struct kref *kref)
{
- struct aa_loaddata *d = container_of(kref, struct aa_loaddata, count);
+ struct aa_loaddata *d = container_of(kref, struct aa_loaddata, pcount);

if (d) {
- INIT_WORK(&d->work, do_loaddata_free);
+ INIT_WORK(&d->work, do_ploaddata_rmfs);
schedule_work(&d->work);
}
}
@@ -152,7 +166,9 @@ struct aa_loaddata *aa_loaddata_alloc(size_t size)
kfree(d);
return ERR_PTR(-ENOMEM);
}
- kref_init(&d->count);
+ kref_init(&d->count.count);
+ d->count.reftype = REF_RAWDATA;
+ kref_init(&d->pcount);
INIT_LIST_HEAD(&d->list);

return d;
@@ -762,7 +778,17 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy,
if (!aa_unpack_u32(e, &pdb->start[AA_CLASS_FILE], "dfa_start")) {
/* default start state for xmatch and file dfa */
pdb->start[AA_CLASS_FILE] = DFA_START;
- } /* setup class index */
+ }
+
+ size_t state_count = pdb->dfa->tables[YYTD_ID_BASE]->td_lolen;
+
+ if (pdb->start[0] >= state_count ||
+ pdb->start[AA_CLASS_FILE] >= state_count) {
+ *info = "invalid dfa start state";
+ goto fail;
+ }
+
+ /* setup class index */
for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) {
pdb->start[i] = aa_dfa_next(pdb->dfa, pdb->start[0],
i);
@@ -1132,7 +1158,6 @@ static int verify_header(struct aa_ext *e, int required, const char **ns)
{
int error = -EPROTONOSUPPORT;
const char *name = NULL;
- *ns = NULL;

/* get the interface version */
if (!aa_unpack_u32(e, &e->version, "version")) {
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 52e00332defe..c9e5b1d6b0ab 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -12,35 +12,62 @@
#include <linux/kexec.h>
#include <linux/of.h>
#include <linux/ima.h>
+#include <linux/mm.h>
+#include <linux/overflow.h>
+#include <linux/reboot.h>
+#include <asm/page.h>
#include "ima.h"

#ifdef CONFIG_IMA_KEXEC
+static bool ima_kexec_update_registered;
+static struct seq_file ima_kexec_file;
+static void *ima_kexec_buffer;
+
+static void ima_free_kexec_file_buf(struct seq_file *sf)
+{
+ vfree(sf->buf);
+ sf->buf = NULL;
+ sf->size = 0;
+ sf->read_pos = 0;
+ sf->count = 0;
+}
+
+static int ima_alloc_kexec_file_buf(size_t segment_size)
+{
+ ima_free_kexec_file_buf(&ima_kexec_file);
+
+ /* segment size can't change between kexec load and execute */
+ ima_kexec_file.buf = vmalloc(segment_size);
+ if (!ima_kexec_file.buf)
+ return -ENOMEM;
+
+ ima_kexec_file.size = segment_size;
+ ima_kexec_file.read_pos = 0;
+ ima_kexec_file.count = sizeof(struct ima_kexec_hdr); /* reserved space */
+
+ return 0;
+}
+
static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
unsigned long segment_size)
{
struct ima_queue_entry *qe;
- struct seq_file file;
struct ima_kexec_hdr khdr;
int ret = 0;

/* segment size can't change between kexec load and execute */
- file.buf = vmalloc(segment_size);
- if (!file.buf) {
- ret = -ENOMEM;
- goto out;
+ if (!ima_kexec_file.buf) {
+ pr_err("Kexec file buf not allocated\n");
+ return -EINVAL;
}

- file.file = NULL;
- file.size = segment_size;
- file.read_pos = 0;
- file.count = sizeof(khdr); /* reserved space */
-
memset(&khdr, 0, sizeof(khdr));
khdr.version = 1;
- list_for_each_entry_rcu(qe, &ima_measurements, later) {
- if (file.count < file.size) {
+ /* This is an append-only list, no need to hold the RCU read lock */
+ list_for_each_entry_rcu(qe, &ima_measurements, later, true) {
+ if (ima_kexec_file.count < ima_kexec_file.size) {
khdr.count++;
- ima_measurements_show(&file, qe);
+ ima_measurements_show(&ima_kexec_file, qe);
} else {
ret = -EINVAL;
break;
@@ -54,23 +81,22 @@ static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
* fill in reserved space with some buffer details
* (eg. version, buffer size, number of measurements)
*/
- khdr.buffer_size = file.count;
+ khdr.buffer_size = ima_kexec_file.count;
if (ima_canonical_fmt) {
khdr.version = cpu_to_le16(khdr.version);
khdr.count = cpu_to_le64(khdr.count);
khdr.buffer_size = cpu_to_le64(khdr.buffer_size);
}
- memcpy(file.buf, &khdr, sizeof(khdr));
+ memcpy(ima_kexec_file.buf, &khdr, sizeof(khdr));

print_hex_dump_debug("ima dump: ", DUMP_PREFIX_NONE, 16, 1,
- file.buf, file.count < 100 ? file.count : 100,
+ ima_kexec_file.buf, ima_kexec_file.count < 100 ?
+ ima_kexec_file.count : 100,
true);

- *buffer_size = file.count;
- *buffer = file.buf;
+ *buffer_size = ima_kexec_file.count;
+ *buffer = ima_kexec_file.buf;
out:
- if (ret == -EINVAL)
- vfree(file.buf);
return ret;
}

@@ -109,6 +135,12 @@ void ima_add_kexec_buffer(struct kimage *image)
return;
}

+ ret = ima_alloc_kexec_file_buf(kexec_segment_size);
+ if (ret < 0) {
+ pr_err("Not enough memory for the kexec measurement buffer.\n");
+ return;
+ }
+
ima_dump_measurement_list(&kexec_buffer_size, &kexec_buffer,
kexec_segment_size);
if (!kexec_buffer) {
@@ -133,6 +165,49 @@ void ima_add_kexec_buffer(struct kimage *image)
kexec_dprintk("kexec measurement buffer for the loaded kernel at 0x%lx.\n",
kbuf.mem);
}
+
+/*
+ * Called during kexec execute so that IMA can update the measurement list.
+ */
+static int ima_update_kexec_buffer(struct notifier_block *self,
+ unsigned long action, void *data)
+{
+ return NOTIFY_OK;
+}
+
+static struct notifier_block update_buffer_nb = {
+ .notifier_call = ima_update_kexec_buffer,
+ .priority = INT_MIN
+};
+
+/*
+ * Create a mapping for the source pages that contain the IMA buffer
+ * so we can update it later.
+ */
+void ima_kexec_post_load(struct kimage *image)
+{
+ if (ima_kexec_buffer) {
+ kimage_unmap_segment(ima_kexec_buffer);
+ ima_kexec_buffer = NULL;
+ }
+
+ if (!image->ima_buffer_addr)
+ return;
+
+ ima_kexec_buffer = kimage_map_segment(image,
+ image->ima_buffer_addr,
+ image->ima_buffer_size);
+ if (!ima_kexec_buffer) {
+ pr_err("Could not map measurements buffer.\n");
+ return;
+ }
+
+ if (!ima_kexec_update_registered) {
+ register_reboot_notifier(&update_buffer_nb);
+ ima_kexec_update_registered = true;
+ }
+}
+
#endif /* IMA_KEXEC */

/*
@@ -165,3 +240,36 @@ void __init ima_load_kexec_buffer(void)
pr_debug("Error restoring the measurement list: %d\n", rc);
}
}
+
+/*
+ * ima_validate_range - verify a physical buffer lies in addressable RAM
+ * @phys: physical start address of the buffer from previous kernel
+ * @size: size of the buffer
+ *
+ * On success return 0. On failure returns -EINVAL so callers can skip
+ * restoring.
+ */
+int ima_validate_range(phys_addr_t phys, size_t size)
+{
+ unsigned long start_pfn, end_pfn;
+ phys_addr_t end_phys;
+
+ if (check_add_overflow(phys, (phys_addr_t)size - 1, &end_phys))
+ return -EINVAL;
+
+ start_pfn = PHYS_PFN(phys);
+ end_pfn = PHYS_PFN(end_phys);
+
+#ifdef CONFIG_X86
+ if (!pfn_range_is_mapped(start_pfn, end_pfn))
+#else
+ if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn))
+#endif
+ {
+ pr_warn("IMA: previous kernel measurement buffer %pa (size 0x%zx) lies outside available memory\n",
+ &phys, size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 7823f71012a8..2a936f43fad2 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -180,7 +180,7 @@ static int cs35l56_hda_mixer_info(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_mixer_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int reg_val;
int i;

@@ -202,7 +202,7 @@ static int cs35l56_hda_mixer_get(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_mixer_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int item = ucontrol->value.enumerated.item[0];
bool changed;

@@ -231,7 +231,7 @@ static int cs35l56_hda_posture_info(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_posture_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int pos;
int ret;

@@ -249,8 +249,8 @@ static int cs35l56_hda_posture_get(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
- unsigned long pos = ucontrol->value.integer.value[0];
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
+ long pos = ucontrol->value.integer.value[0];
bool changed;
int ret;

@@ -298,7 +298,7 @@ static int cs35l56_hda_vol_info(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_vol_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
unsigned int raw_vol;
int vol;
int ret;
@@ -324,7 +324,7 @@ static int cs35l56_hda_vol_get(struct snd_kcontrol *kcontrol,
static int cs35l56_hda_vol_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
- struct cs35l56_hda *cs35l56 = (struct cs35l56_hda *)kcontrol->private_data;
+ struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
long vol = ucontrol->value.integer.value[0];
unsigned int raw_vol;
bool changed;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 482e801a496a..b7c9eba9236d 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -308,6 +308,7 @@ enum {
CXT_PINCFG_SWS_JS201D,
CXT_PINCFG_TOP_SPEAKER,
CXT_FIXUP_HP_A_U,
+ CXT_FIXUP_ACER_SWIFT_HP,
};

/* for hda_fixup_thinkpad_acpi() */
@@ -1024,6 +1025,14 @@ static const struct hda_fixup cxt_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = cxt_fixup_hp_a_u,
},
+ [CXT_FIXUP_ACER_SWIFT_HP] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x16, 0x0321403f }, /* Headphone */
+ { 0x19, 0x40f001f0 }, /* Mic */
+ { }
+ },
+ },
};

static const struct hda_quirk cxt5045_fixups[] = {
@@ -1073,6 +1082,7 @@ static const struct hda_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+ SND_PCI_QUIRK(0x1025, 0x136d, "Acer Swift SF314", CXT_FIXUP_ACER_SWIFT_HP),
SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK),
@@ -1081,6 +1091,7 @@ static const struct hda_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x8231, "HP ProBook 450 G4", CXT_FIXUP_MUTE_LED_GPIO),
+ SND_PCI_QUIRK(0x103c, 0x826b, "HP ZBook Studio G4", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 85178a0303a5..c13def0f1e1a 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -11295,7 +11295,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x144d, 0xc872, "Samsung Galaxy Book2 Pro (NP950XEE)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS),
SND_PCI_QUIRK(0x144d, 0xc886, "Samsung Galaxy Book3 Pro (NP964XFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
SND_PCI_QUIRK(0x144d, 0xc1ca, "Samsung Galaxy Book3 Pro 360 (NP960QFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
+ SND_PCI_QUIRK(0x144d, 0xc1cb, "Samsung Galaxy Book3 Pro 360 (NP965QFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
SND_PCI_QUIRK(0x144d, 0xc1cc, "Samsung Galaxy Book3 Ultra (NT960XFH)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
+ SND_PCI_QUIRK(0x1458, 0x900e, "Gigabyte G5 KF5 (2023)", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index cb94c2cad221..9d22613f71e2 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -160,8 +160,8 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep)
* This won't be used for implicit feedback which takes the packet size
* returned from the sync source
*/
-static int slave_next_packet_size(struct snd_usb_endpoint *ep,
- unsigned int avail)
+static int synced_next_packet_size(struct snd_usb_endpoint *ep,
+ unsigned int avail)
{
unsigned long flags;
unsigned int phase;
@@ -230,7 +230,7 @@ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep,
}

if (ep->sync_source)
- return slave_next_packet_size(ep, avail);
+ return synced_next_packet_size(ep, avail);
else
return next_packet_size(ep, avail);
}
@@ -1399,6 +1399,9 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
goto unlock;
}

+ ep->packsize[0] = min(ep->packsize[0], ep->maxframesize);
+ ep->packsize[1] = min(ep->packsize[1], ep->maxframesize);
+
/* calculate the frequency in 16.16 format */
ep->freqm = ep->freqn;
ep->freqshift = INT_MIN;
diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c
index f6292c4b8d21..124284010417 100644
--- a/sound/usb/mixer_scarlett2.c
+++ b/sound/usb/mixer_scarlett2.c
@@ -1294,8 +1294,6 @@ struct scarlett2_data {
struct snd_kcontrol *mux_ctls[SCARLETT2_MUX_MAX];
struct snd_kcontrol *mix_ctls[SCARLETT2_MIX_MAX];
struct snd_kcontrol *compressor_ctls[SCARLETT2_COMPRESSOR_CTLS_MAX];
- struct snd_kcontrol *precomp_flt_ctls[SCARLETT2_PRECOMP_FLT_CTLS_MAX];
- struct snd_kcontrol *peq_flt_ctls[SCARLETT2_PEQ_FLT_CTLS_MAX];
struct snd_kcontrol *precomp_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX];
struct snd_kcontrol *peq_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX];
struct snd_kcontrol *direct_monitor_ctl;
@@ -3415,8 +3413,7 @@ static int scarlett2_update_autogain(struct usb_mixer_interface *mixer)
private->autogain_status[i] =
private->num_autogain_status_texts - 1;

-
- for (int i = 0; i < SCARLETT2_AG_TARGET_COUNT; i++)
+ for (i = 0; i < SCARLETT2_AG_TARGET_COUNT; i++)
if (scarlett2_has_config_item(private,
scarlett2_ag_target_configs[i])) {
err = scarlett2_usb_get_config(
@@ -3427,7 +3424,7 @@ static int scarlett2_update_autogain(struct usb_mixer_interface *mixer)
}

/* convert from negative dBFS as used by the device */
- for (int i = 0; i < SCARLETT2_AG_TARGET_COUNT; i++)
+ for (i = 0; i < SCARLETT2_AG_TARGET_COUNT; i++)
private->ag_targets[i] = -ag_target_values[i];

return 0;
@@ -5595,8 +5592,7 @@ static int scarlett2_update_filter_values(struct usb_mixer_interface *mixer)

err = scarlett2_usb_get_config(
mixer, SCARLETT2_CONFIG_PEQ_FLT_SWITCH,
- info->dsp_input_count * info->peq_flt_count,
- private->peq_flt_switch);
+ info->dsp_input_count, private->peq_flt_switch);
if (err < 0)
return err;

@@ -6794,7 +6790,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i)
err = scarlett2_add_new_ctl(
mixer, &scarlett2_precomp_flt_ctl,
i * info->precomp_flt_count + j,
- 1, s, &private->precomp_flt_switch_ctls[j]);
+ 1, s, NULL);
if (err < 0)
return err;
}
@@ -6804,7 +6800,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i)
err = scarlett2_add_new_ctl(
mixer, &scarlett2_peq_flt_ctl,
i * info->peq_flt_count + j,
- 1, s, &private->peq_flt_switch_ctls[j]);
+ 1, s, NULL);
if (err < 0)
return err;
}
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 947467112409..41752b819746 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2408,7 +2408,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
VENDOR_FLG(0x07fd, /* MOTU */
QUIRK_FLAG_VALIDATE_RATES),
VENDOR_FLG(0x1235, /* Focusrite Novation */
- QUIRK_FLAG_VALIDATE_RATES),
+ 0),
VENDOR_FLG(0x1511, /* AURALiC */
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x152a, /* Thesycon devices */
diff --git a/sound/usb/validate.c b/sound/usb/validate.c
index 4bb4893f6e74..f62b7cc041dc 100644
--- a/sound/usb/validate.c
+++ b/sound/usb/validate.c
@@ -281,7 +281,7 @@ static const struct usb_desc_validator audio_validators[] = {
/* UAC_VERSION_2, UAC2_SAMPLE_RATE_CONVERTER: not implemented yet */

/* UAC3 */
- FIXED(UAC_VERSION_2, UAC_HEADER, struct uac3_ac_header_descriptor),
+ FIXED(UAC_VERSION_3, UAC_HEADER, struct uac3_ac_header_descriptor),
FIXED(UAC_VERSION_3, UAC_INPUT_TERMINAL,
struct uac3_input_terminal_descriptor),
FIXED(UAC_VERSION_3, UAC_OUTPUT_TERMINAL,
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 61931c4926fd..12b0f2ee5665 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -333,8 +333,10 @@ class LinuxSourceTree:
return self.validate_config(build_dir)

def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', filter: str='', filter_action: Optional[str]=None, timeout: Optional[int]=None) -> Iterator[str]:
- if not args:
- args = []
+ # Copy to avoid mutating the caller-supplied list. exec_tests() reuses
+ # the same args across repeated run_kernel() calls (e.g. --run_isolated),
+ # so appending to the original would accumulate stale flags on each call.
+ args = list(args) if args else []
if filter_glob:
args.append('kunit.filter_glob=' + filter_glob)
if filter:
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 2beb7327e53f..70e5d0abe87f 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -477,6 +477,32 @@ class LinuxSourceTreeTest(unittest.TestCase):
with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile:
self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output')

+ def test_run_kernel_args_not_mutated(self):
+ """Verify run_kernel() copies args so callers can reuse them."""
+ start_calls = []
+
+ def fake_start(start_args, unused_build_dir):
+ start_calls.append(list(start_args))
+ return subprocess.Popen(['printf', 'KTAP version 1\n'],
+ text=True, stdout=subprocess.PIPE)
+
+ with tempfile.TemporaryDirectory('') as build_dir:
+ tree = kunit_kernel.LinuxSourceTree(build_dir,
+ kunitconfig_paths=[os.devnull])
+ with mock.patch.object(tree._ops, 'start', side_effect=fake_start), \
+ mock.patch.object(kunit_kernel.subprocess, 'call'):
+ kernel_args = ['mem=1G']
+ for _ in tree.run_kernel(args=kernel_args, build_dir=build_dir,
+ filter_glob='suite.test1'):
+ pass
+ for _ in tree.run_kernel(args=kernel_args, build_dir=build_dir,
+ filter_glob='suite.test2'):
+ pass
+ self.assertEqual(kernel_args, ['mem=1G'],
+ 'run_kernel() should not modify caller args')
+ self.assertIn('kunit.filter_glob=suite.test1', start_calls[0])
+ self.assertIn('kunit.filter_glob=suite.test2', start_calls[1])
+
def test_build_reconfig_no_config(self):
with tempfile.TemporaryDirectory('') as build_dir:
with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 265654ec48b9..097bd51e14ca 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -349,8 +349,8 @@ static void sve2_sigill(void)

static void sve2p1_sigill(void)
{
- /* BFADD Z0.H, Z0.H, Z0.H */
- asm volatile(".inst 0x65000000" : : : "z0");
+ /* LD1Q {Z0.Q}, P0/Z, [Z0.D, X0] */
+ asm volatile(".inst 0xC400A000" : : : "z0");
}

static void sveaes_sigill(void)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 666c9fde76da..a4e5b8613bab 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -69,6 +69,15 @@

#include "kselftest.h"

+static inline void __kselftest_memset_safe(void *s, int c, size_t n)
+{
+ if (n > 0)
+ memset(s, c, n);
+}
+
+#define KSELFTEST_PRIO_TEST_F 20000
+#define KSELFTEST_PRIO_XFAIL 20001
+
#define TEST_TIMEOUT_DEFAULT 30

/* Utilities exposed to the test definitions */
@@ -418,7 +427,7 @@
self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
} else { \
- memset(&self_private, 0, sizeof(self_private)); \
+ __kselftest_memset_safe(&self_private, 0, sizeof(self_private)); \
self = &self_private; \
} \
} \
@@ -459,7 +468,7 @@
__test_check_assert(_metadata); \
} \
static struct __test_metadata *_##fixture_name##_##test_name##_object; \
- static void __attribute__((constructor)) \
+ static void __attribute__((constructor(KSELFTEST_PRIO_TEST_F))) \
_register_##fixture_name##_##test_name(void) \
{ \
struct __test_metadata *object = mmap(NULL, sizeof(*object), \
@@ -873,7 +882,7 @@ struct __test_xfail {
.fixture = &_##fixture_name##_fixture_object, \
.variant = &_##fixture_name##_##variant_name##_object, \
}; \
- static void __attribute__((constructor)) \
+ static void __attribute__((constructor(KSELFTEST_PRIO_XFAIL))) \
_register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
{ \
_##fixture_name##_##variant_name##_##test_name##_xfail.test = \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 631dd9889321..91271cfb950f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2384,6 +2384,19 @@ remove_tests()
chk_rst_nr 0 0
fi

+ # signal+subflow with limits, remove
+ if reset "remove signal+subflow with limits"; then
+ pm_nl_set_limits $ns1 0 0
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow
+ pm_nl_set_limits $ns2 0 0
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ chk_rm_nr 1 0 invert
+ chk_rst_nr 0 0
+ fi
+
# addresses remove
if reset "remove addresses"; then
pm_nl_set_limits $ns1 3 3
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 8fa77c8e9b65..8c71b18fdad3 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -226,10 +226,13 @@ run_test()
for dev in ns2eth1 ns2eth2; do
tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
done
- tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
- tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
- tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
- tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+ # keep the queued pkts number low, or the RTT estimator will see
+ # increasing latency over time.
+ tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50
+ tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50

# time is measured in ms, account for transfer size, aggregated link speed
# and header overhead (10%)