Re: Linux 6.1.45

From: Greg Kroah-Hartman
Date: Fri Aug 11 2023 - 07:08:44 EST


diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index bbc80eff03f9..b3c8ac6a2c38 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -141,6 +141,10 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-600 | #1076982,1209401| N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-700 | #2268618,2812531| N/A |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
+----------------+-----------------+-----------------+-----------------------------+
diff --git a/Makefile b/Makefile
index 612f3d83629b..82c958299e98 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 1
-SUBLEVEL = 44
+SUBLEVEL = 45
EXTRAVERSION =
NAME = Curry Ramen

diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index 48424e459f12..15b5651b88d0 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -128,7 +128,7 @@ &i2c1 {
status = "okay";
clock-frequency = <100000>;
i2c-sda-falling-time-ns = <890>; /* hcnt */
- i2c-sdl-falling-time-ns = <890>; /* lcnt */
+ i2c-scl-falling-time-ns = <890>; /* lcnt */

adc@14 {
compatible = "lltc,ltc2497";
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
index 847a7c01f5af..fcf640de90b6 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
@@ -141,7 +141,7 @@ &i2c2 {
status = "okay";
clock-frequency = <100000>;
i2c-sda-falling-time-ns = <890>; /* hcnt */
- i2c-sdl-falling-time-ns = <890>; /* lcnt */
+ i2c-scl-falling-time-ns = <890>; /* lcnt */

adc@14 {
compatible = "lltc,ltc2497";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
index 4a3df2b77b0b..6720ddf59783 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
@@ -141,7 +141,7 @@ can0: can@0 {
};

&gpio1 {
- gpio-line-names = "nINT_ETHPHY", "LED_RED", "WDOG_INT", "X_RTC_INT",
+ gpio-line-names = "", "LED_RED", "WDOG_INT", "X_RTC_INT",
"", "", "", "RESET_ETHPHY",
"CAN_nINT", "CAN_EN", "nENABLE_FLATLINK", "",
"USB_OTG_VBUS_EN", "", "LED_GREEN", "LED_BLUE";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
index 995b44efb1b6..9d9b103c79c7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
@@ -111,7 +111,7 @@ som_flash: flash@0 {
};

&gpio1 {
- gpio-line-names = "nINT_ETHPHY", "", "WDOG_INT", "X_RTC_INT",
+ gpio-line-names = "", "", "WDOG_INT", "X_RTC_INT",
"", "", "", "RESET_ETHPHY",
"", "", "nENABLE_FLATLINK";
};
@@ -210,7 +210,7 @@ regulator-state-mem {
};
};

- reg_vdd_gpu: buck3 {
+ reg_vdd_vpu: buck3 {
regulator-always-on;
regulator-boot-on;
regulator-max-microvolt = <1000000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
index 8e861b920d09..7c9b60f4da92 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
@@ -559,6 +559,10 @@ &pcie0 {
status = "okay";
};

+&disp_blk_ctrl {
+ status = "disabled";
+};
+
&pgc_mipi {
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
index a67771d02146..46a07dfc0086 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
@@ -617,6 +617,10 @@ &pcie0 {
status = "okay";
};

+&disp_blk_ctrl {
+ status = "disabled";
+};
+
&pgc_mipi {
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
index d053ef302fb8..faafefe562e4 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
@@ -351,7 +351,7 @@ MX8MN_IOMUXC_ENET_TXC_ENET1_RGMII_TXC 0x1f
MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91
MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91
MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f
- MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19
+ MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x159
>;
};

diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 4724ed0cbff9..bf8f02c1535c 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -756,7 +756,7 @@ pgc_vpu: power-domain@6 {
<&clk IMX8MQ_SYS1_PLL_800M>,
<&clk IMX8MQ_VPU_PLL>;
assigned-clock-rates = <600000000>,
- <600000000>,
+ <300000000>,
<800000000>,
<0>;
};
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 59aaf2e68833..356036babd09 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -634,7 +634,7 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;

- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;

vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -660,7 +660,7 @@ static void sve_to_fpsimd(struct task_struct *task)
unsigned int i;
__uint128_t const *p;

- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;

vl = thread_get_cur_vl(&task->thread);
@@ -791,7 +791,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;

- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
return;

vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -863,7 +864,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
*/
task->thread.svcr &= ~(SVCR_SM_MASK |
SVCR_ZA_MASK);
- clear_thread_flag(TIF_SME);
+ clear_tsk_thread_flag(task, TIF_SME);
free_sme = true;
}
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 92bc9a2d702c..f19f020ccff9 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -937,11 +937,13 @@ static int sve_set_common(struct task_struct *target,
/*
* Ensure target->thread.sve_state is up to date with target's
* FPSIMD regs, so that a short copyin leaves trailing
- * registers unmodified. Always enable SVE even if going into
- * streaming mode.
+ * registers unmodified. Only enable SVE if we are
+ * configuring normal SVE, a system with streaming SVE may not
+ * have normal SVE.
*/
fpsimd_sync_to_sve(target);
- set_tsk_thread_flag(target, TIF_SVE);
+ if (type == ARM64_VEC_SVE)
+ set_tsk_thread_flag(target, TIF_SVE);

BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 46c31fb8748d..30a12d208687 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
return leading_zero_bits >> 3;
}

-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
unsigned long rhs = val | c->low_bits;
*data = rhs;
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S
index d031093bc436..6f9c2dea905b 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S
@@ -33,6 +33,9 @@
* and then arrange for the ftrace function to be called.
*/
.macro ftrace_regs_entry allregs
+ /* Create a minimal stack frame for representing B */
+ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1)
+
/* Create our stack frame + pt_regs */
PPC_STLU r1,-SWITCH_FRAME_SIZE(r1)

@@ -42,7 +45,7 @@

#ifdef CONFIG_PPC64
/* Save the original return address in A's stack frame */
- std r0, LRSAVE+SWITCH_FRAME_SIZE(r1)
+ std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
@@ -77,6 +80,8 @@
mflr r7
/* Save it as pt_regs->nip */
PPC_STL r7, _NIP(r1)
+ /* Also save it in B's stackframe header for proper unwind */
+ PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
/* Save the read LR in pt_regs->link */
PPC_STL r0, _LINK(r1)

@@ -142,7 +147,7 @@
#endif

/* Pop our stack frame */
- addi r1, r1, SWITCH_FRAME_SIZE
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE

#ifdef CONFIG_LIVEPATCH_64
/* Based on the cmpd above, if the NIP was altered handle livepatch */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fe1b83020e0d..0ec5b45b1e86 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -314,8 +314,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
start = ALIGN_DOWN(start, page_size);
if (altmap) {
alt_start = altmap->base_pfn;
- alt_end = altmap->base_pfn + altmap->reserve +
- altmap->free + altmap->alloc + altmap->align;
+ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
}

pr_debug("vmemmap_free %lx...%lx\n", start, end);
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 4d141e2c132e..2ea7f208f0e7 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -459,9 +459,9 @@ static int sthyi_update_cache(u64 *rc)
*
* Fills the destination with system information returned by the STHYI
* instruction. The data is generated by emulation or execution of STHYI,
- * if available. The return value is the condition code that would be
- * returned, the rc parameter is the return code which is passed in
- * register R2 + 1.
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
*/
int sthyi_fill(void *dst, u64 *rc)
{
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index ee7478a60144..b37bb960bfaf 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -389,8 +389,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
*/
int handle_sthyi(struct kvm_vcpu *vcpu)
{
- int reg1, reg2, r = 0;
- u64 code, addr, cc = 0, rc = 0;
+ int reg1, reg2, cc = 0, r = 0;
+ u64 code, addr, rc = 0;
struct sthyi_sctns *sctns = NULL;

if (!test_kvm_facility(vcpu->kvm, 74))
@@ -421,7 +421,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
return -ENOMEM;

cc = sthyi_fill(sctns, &rc);
-
+ if (cc < 0) {
+ free_page((unsigned long)sctns);
+ return cc;
+ }
out:
if (!cc) {
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 85863b9c9e68..189ae92de4d0 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -14,6 +14,7 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/sev.h>
+#include <asm/ibt.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
@@ -467,6 +468,26 @@ void __init hyperv_init(void)
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
}

+ /*
+ * Some versions of Hyper-V that provide IBT in guest VMs have a bug
+ * in that there's no ENDBR64 instruction at the entry to the
+ * hypercall page. Because hypercalls are invoked via an indirect call
+ * to the hypercall page, all hypercall attempts fail when IBT is
+ * enabled, and Linux panics. For such buggy versions, disable IBT.
+ *
+ * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall
+ * page, so if future Linux kernel versions enable IBT for 32-bit
+ * builds, additional hypercall page hackery will be required here
+ * to provide an ENDBR32.
+ */
+#ifdef CONFIG_X86_KERNEL_IBT
+ if (cpu_feature_enabled(X86_FEATURE_IBT) &&
+ *(u32 *)hv_hypercall_pg != gen_endbr()) {
+ setup_clear_cpu_cap(X86_FEATURE_IBT);
+ pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n");
+ }
+#endif
+
/*
* hyperv_init() is called before LAPIC is initialized: see
* apic_intr_mode_init() -> x86_platform.apic_post_init() and
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index e721b8426c24..b122708792c4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -476,4 +476,5 @@

/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c13e4ff8ec70..45bf26862b99 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -801,10 +801,12 @@ extern u16 get_llc_id(unsigned int cpu);
extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
extern bool cpu_has_ibpb_brtype_microcode(void);
+extern void amd_clear_divider(void);
#else
static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
+static inline void amd_clear_divider(void) { }
#endif

#define for_each_possible_hypervisor_cpuid_base(function) \
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7f0cf4a959c0..43910eb55b2e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -75,6 +75,10 @@ static const int amd_zenbleed[] =
AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));

+static const int amd_div0[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
int osvw_id = *erratum++;
@@ -1115,6 +1119,11 @@ static void init_amd(struct cpuinfo_x86 *c)
check_null_seg_clears_base(c);

zenbleed_check(c);
+
+ if (cpu_has_amd_erratum(c, amd_div0)) {
+ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+ setup_force_cpu_bug(X86_BUG_DIV0);
+ }
}

#ifdef CONFIG_X86_32
@@ -1275,3 +1284,13 @@ void amd_check_microcode(void)
{
on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
+
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+void noinstr amd_clear_divider(void)
+{
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c0a5a4f225d9..7e8795d8b0f1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -206,6 +206,8 @@ DEFINE_IDTENTRY(exc_divide_error)
{
do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
FPE_INTDIV, error_get_trap_addr(regs));
+
+ amd_clear_divider();
}

DEFINE_IDTENTRY(exc_overflow)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index daec0321cd76..74ef3da54536 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3676,7 +3676,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
RBD_LOCK_TAG, "", 0);
- if (ret)
+ if (ret && ret != -EEXIST)
return ret;

__rbd_lock(rbd_dev, cookie);
@@ -3879,7 +3879,7 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
&rbd_dev->header_oloc, RBD_LOCK_NAME,
&lock_type, &lock_tag, &lockers, &num_lockers);
if (ret) {
- rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret);
+ rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
return ERR_PTR(ret);
}

@@ -3941,8 +3941,10 @@ static int find_watcher(struct rbd_device *rbd_dev,
ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
&rbd_dev->header_oloc, &watchers,
&num_watchers);
- if (ret)
+ if (ret) {
+ rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
return ret;
+ }

sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) {
@@ -3986,8 +3988,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
locker = refreshed_locker = NULL;

ret = rbd_lock(rbd_dev);
- if (ret != -EBUSY)
+ if (!ret)
+ goto out;
+ if (ret != -EBUSY) {
+ rbd_warn(rbd_dev, "failed to lock header: %d", ret);
goto out;
+ }

/* determine if the current lock holder is still alive */
locker = get_lock_owner_info(rbd_dev);
@@ -4090,11 +4096,8 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)

ret = rbd_try_lock(rbd_dev);
if (ret < 0) {
- rbd_warn(rbd_dev, "failed to lock header: %d", ret);
- if (ret == -EBLOCKLISTED)
- goto out;
-
- ret = 1; /* request lock anyway */
+ rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
+ goto out;
}
if (ret > 0) {
up_write(&rbd_dev->lock_rwsem);
@@ -6628,12 +6631,11 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
cancel_delayed_work_sync(&rbd_dev->lock_dwork);
if (!ret)
ret = -ETIMEDOUT;
- }

- if (ret) {
- rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
- return ret;
+ rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
}
+ if (ret)
+ return ret;

/*
* The lock may have been released by now, unless automatic lock
diff --git a/drivers/clk/imx/clk-imx93.c b/drivers/clk/imx/clk-imx93.c
index 5e3d299190c8..61d9f9bf86e6 100644
--- a/drivers/clk/imx/clk-imx93.c
+++ b/drivers/clk/imx/clk-imx93.c
@@ -288,7 +288,7 @@ static int imx93_clocks_probe(struct platform_device *pdev)
anatop_base = devm_of_iomap(dev, np, 0, NULL);
of_node_put(np);
if (WARN_ON(IS_ERR(anatop_base))) {
- ret = PTR_ERR(base);
+ ret = PTR_ERR(anatop_base);
goto unregister_hws;
}

diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c
index a455f3c0e98b..25d31dfdad15 100644
--- a/drivers/firmware/arm_scmi/mailbox.c
+++ b/drivers/firmware/arm_scmi/mailbox.c
@@ -106,8 +106,10 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
return -ENOMEM;

shmem = of_parse_phandle(cdev->of_node, "shmem", idx);
- if (!of_device_is_compatible(shmem, "arm,scmi-shmem"))
+ if (!of_device_is_compatible(shmem, "arm,scmi-shmem")) {
+ of_node_put(shmem);
return -ENXIO;
+ }

ret = of_address_to_resource(shmem, 0, &res);
of_node_put(shmem);
diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c
index 87a7b13cf868..ac0bd51ef16a 100644
--- a/drivers/firmware/arm_scmi/smc.c
+++ b/drivers/firmware/arm_scmi/smc.c
@@ -23,6 +23,7 @@
/**
* struct scmi_smc - Structure representing a SCMI smc transport
*
+ * @irq: An optional IRQ for completion
* @cinfo: SCMI channel info
* @shmem: Transmit/Receive shared memory area
* @shmem_lock: Lock to protect access to Tx/Rx shared memory area.
@@ -33,6 +34,7 @@
*/

struct scmi_smc {
+ int irq;
struct scmi_chan_info *cinfo;
struct scmi_shared_mem __iomem *shmem;
/* Protect access to shmem area */
@@ -106,7 +108,7 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
struct resource res;
struct device_node *np;
u32 func_id;
- int ret, irq;
+ int ret;

if (!tx)
return -ENODEV;
@@ -116,8 +118,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
return -ENOMEM;

np = of_parse_phandle(cdev->of_node, "shmem", 0);
- if (!of_device_is_compatible(np, "arm,scmi-shmem"))
+ if (!of_device_is_compatible(np, "arm,scmi-shmem")) {
+ of_node_put(np);
return -ENXIO;
+ }

ret = of_address_to_resource(np, 0, &res);
of_node_put(np);
@@ -142,11 +146,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
* completion of a message is signaled by an interrupt rather than by
* the return of the SMC call.
*/
- irq = of_irq_get_byname(cdev->of_node, "a2p");
- if (irq > 0) {
- ret = devm_request_irq(dev, irq, smc_msg_done_isr,
- IRQF_NO_SUSPEND,
- dev_name(dev), scmi_info);
+ scmi_info->irq = of_irq_get_byname(cdev->of_node, "a2p");
+ if (scmi_info->irq > 0) {
+ ret = request_irq(scmi_info->irq, smc_msg_done_isr,
+ IRQF_NO_SUSPEND, dev_name(dev), scmi_info);
if (ret) {
dev_err(dev, "failed to setup SCMI smc irq\n");
return ret;
@@ -168,6 +171,10 @@ static int smc_chan_free(int id, void *p, void *data)
struct scmi_chan_info *cinfo = p;
struct scmi_smc *scmi_info = cinfo->transport_info;

+ /* Ignore any possible further reception on the IRQ path */
+ if (scmi_info->irq > 0)
+ free_irq(scmi_info->irq, scmi_info);
+
cinfo->transport_info = NULL;
scmi_info->cinfo = NULL;

diff --git a/drivers/firmware/smccc/soc_id.c b/drivers/firmware/smccc/soc_id.c
index 890eb454599a..1990263fbba0 100644
--- a/drivers/firmware/smccc/soc_id.c
+++ b/drivers/firmware/smccc/soc_id.c
@@ -34,7 +34,6 @@ static struct soc_device_attribute *soc_dev_attr;

static int __init smccc_soc_init(void)
{
- struct arm_smccc_res res;
int soc_id_rev, soc_id_version;
static char soc_id_str[20], soc_id_rev_str[12];
static char soc_id_jep106_id_str[12];
@@ -49,13 +48,13 @@ static int __init smccc_soc_init(void)
}

if (soc_id_version < 0) {
- pr_err("ARCH_SOC_ID(0) returned error: %lx\n", res.a0);
+ pr_err("Invalid SoC Version: %x\n", soc_id_version);
return -EINVAL;
}

soc_id_rev = arm_smccc_get_soc_id_revision();
if (soc_id_rev < 0) {
- pr_err("ARCH_SOC_ID(1) returned error: %lx\n", res.a0);
+ pr_err("Invalid SoC Revision: %x\n", soc_id_rev);
return -EINVAL;
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index b81b77a9efa6..9b97fa39d47a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -101,39 +101,97 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
}
}

+static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
+{
+ uint32_t start_addr, fw_size, drv_size;
+
+ start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+ drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb);
+
+ DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n",
+ start_addr,
+ fw_size,
+ drv_size);
+
+ if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+ (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ /* Use the default scratch size */
+ *usage_bytes = 0;
+ } else {
+ *usage_bytes = drv_size << 10;
+ }
+ return 0;
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
+{
+ uint32_t fw_start_addr, fw_size, drv_start_addr, drv_size;
+
+ fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+
+ drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb);
+ drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb);
+
+ DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n",
+ fw_start_addr,
+ fw_size,
+ drv_start_addr,
+ drv_size);
+
+ if ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ }
+
+ if ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
+ /* driver request VRAM reservation for SR-IOV */
+ adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.drv_vram_usage_size = drv_size << 10;
+ }
+
+ *usage_bytes = 0;
+ return 0;
+}
+
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_usagebyfirmware);
- struct vram_usagebyfirmware_v2_1 *firmware_usage;
- uint32_t start_addr, size;
+ struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
+ struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
uint16_t data_offset;
+ uint8_t frev, crev;
int usage_bytes = 0;

- if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
- firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
- DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n",
- le32_to_cpu(firmware_usage->start_address_in_kb),
- le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
- le16_to_cpu(firmware_usage->used_by_driver_in_kb));
-
- start_addr = le32_to_cpu(firmware_usage->start_address_in_kb);
- size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb);
-
- if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
- (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
- ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
- /* Firmware request VRAM reservation for SR-IOV */
- adev->mman.fw_vram_usage_start_offset = (start_addr &
- (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
- adev->mman.fw_vram_usage_size = size << 10;
- /* Use the default scratch size */
- usage_bytes = 0;
- } else {
- usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10;
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+ if (frev == 2 && crev == 1) {
+ fw_usage_v2_1 =
+ (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+ fw_usage_v2_1,
+ &usage_bytes);
+ } else if (frev >= 2 && crev >= 2) {
+ fw_usage_v2_2 =
+ (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+ fw_usage_v2_2,
+ &usage_bytes);
}
}
+
ctx->scratch_size_bytes = 0;
if (usage_bytes == 0)
usage_bytes = 20 * 1024;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ad8cb9e6d1ab..0ee7c935fba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -347,17 +347,16 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
* @adev: amdgpu device object
* @offset: offset of the BO
* @size: size of the BO
- * @domain: where to place it
* @bo_ptr: used to initialize BOs in structures
* @cpu_addr: optional CPU address mapping
*
- * Creates a kernel BO at a specific offset in the address space of the domain.
+ * Creates a kernel BO at a specific offset in VRAM.
*
* Returns:
* 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr)
{
struct ttm_operation_ctx ctx = { false, false };
@@ -367,8 +366,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
offset &= PAGE_MASK;
size = ALIGN(size, PAGE_SIZE);

- r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
- NULL, cpu_addr);
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+ cpu_addr);
if (r)
return r;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 147b79c10cbb..93207badf83f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr);
int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b64938ed8cb6..10469f20a10c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1537,6 +1537,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
NULL, &adev->mman.fw_vram_usage_va);
}

+/*
+ * Driver Reservation functions
+ */
+/**
+ * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free drv reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
+ NULL,
+ NULL);
+}
+
/**
* amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
*
@@ -1558,11 +1575,34 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
return amdgpu_bo_create_kernel_at(adev,
adev->mman.fw_vram_usage_start_offset,
adev->mman.fw_vram_usage_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.fw_vram_usage_reserved_bo,
&adev->mman.fw_vram_usage_va);
}

+/**
+ * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from drv.
+ */
+static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
+{
+ uint64_t vram_size = adev->gmc.visible_vram_size;
+
+ adev->mman.drv_vram_usage_reserved_bo = NULL;
+
+ if (adev->mman.drv_vram_usage_size == 0 ||
+ adev->mman.drv_vram_usage_size > vram_size)
+ return 0;
+
+ return amdgpu_bo_create_kernel_at(adev,
+ adev->mman.drv_vram_usage_start_offset,
+ adev->mman.drv_vram_usage_size,
+ &adev->mman.drv_vram_usage_reserved_bo,
+ NULL);
+}
+
/*
* Memoy training reservation functions
*/
@@ -1585,14 +1625,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}

-static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+ uint32_t reserve_size)
{
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;

memset(ctx, 0, sizeof(*ctx));

ctx->c2p_train_data_offset =
- ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
+ ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
@@ -1610,9 +1651,10 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
- int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false;
+ uint32_t reserve_size = 0;
+ int ret;

if (!amdgpu_sriov_vf(adev)) {
if (amdgpu_atomfirmware_mem_training_supported(adev))
@@ -1628,18 +1670,18 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
* discovery data and G6 memory training data respectively
*/
- adev->mman.discovery_tmr_size =
- amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
- if (!adev->mman.discovery_tmr_size)
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
+ if (adev->bios)
+ reserve_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+ if (!reserve_size)
+ reserve_size = DISCOVERY_TMR_OFFSET;

if (mem_train_support) {
/* reserve vram for mem train according to TMR location */
- amdgpu_ttm_training_data_block_init(adev);
+ amdgpu_ttm_training_data_block_init(adev, reserve_size);
ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&ctx->c2p_bo,
NULL);
if (ret) {
@@ -1651,14 +1693,14 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
}

ret = amdgpu_bo_create_kernel_at(adev,
- adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
- adev->mman.discovery_tmr_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->mman.discovery_memory,
+ adev->gmc.real_vram_size - reserve_size,
+ reserve_size,
+ &adev->mman.fw_reserved_memory,
NULL);
if (ret) {
DRM_ERROR("alloc tmr failed(%d)!\n", ret);
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
+ NULL, NULL);
return ret;
}

@@ -1730,6 +1772,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}

+ /*
+ *The reserved vram for driver must be pinned to the specified
+ *place on the VRAM, so reserve it early.
+ */
+ r = amdgpu_ttm_drv_reserve_vram_init(adev);
+ if (r)
+ return r;
+
/*
* only NAVI10 and onwards ASIC support for IP discovery.
* If IP discovery enabled, a block of memory should be
@@ -1746,21 +1796,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
* avoid display artifacts while transitioning between pre-OS
* and driver. */
r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.stolen_vga_memory,
NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
adev->mman.stolen_extended_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.stolen_extended_memory,
NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
adev->mman.stolen_reserved_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.stolen_reserved_memory,
NULL);
if (r)
@@ -1847,14 +1894,16 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
/* return the stolen vga memory back to VRAM */
amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
- /* return the IP Discovery TMR memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+ /* return the FW reserved memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
if (adev->mman.stolen_reserved_size)
amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
NULL, NULL);
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+ amdgpu_ttm_drv_reserve_vram_fini(adev);

if (drm_dev_enter(adev_to_drm(adev), &idx)) {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index a37207011a69..0fefa5e3a524 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -78,7 +78,8 @@ struct amdgpu_mman {
/* discovery */
uint8_t *discovery_bin;
uint32_t discovery_tmr_size;
- struct amdgpu_bo *discovery_memory;
+ /* fw reserved memory */
+ struct amdgpu_bo *fw_reserved_memory;

/* firmware VRAM reservation */
u64 fw_vram_usage_start_offset;
@@ -86,6 +87,11 @@ struct amdgpu_mman {
struct amdgpu_bo *fw_vram_usage_reserved_bo;
void *fw_vram_usage_va;

+ /* driver VRAM reservation */
+ u64 drv_vram_usage_start_offset;
+ u64 drv_vram_usage_size;
+ struct amdgpu_bo *drv_vram_usage_reserved_bo;
+
/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
struct amdgpu_bo *sdma_access_bo;
void *sdma_access_ptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index c73abe54d974..81549f1edfe0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -391,7 +391,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
*/
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
&bo, NULL))
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9b2915764306..86e07cc1d3dc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -351,6 +351,19 @@ static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
return false;
}

+static inline void reverse_planes_order(struct dc_surface_update *array_of_surface_update,
+ int planes_count)
+{
+ int i, j;
+ struct dc_surface_update surface_updates_temp;
+
+ for (i = 0, j = planes_count - 1; i < j; i++, j--) {
+ surface_updates_temp = array_of_surface_update[i];
+ array_of_surface_update[i] = array_of_surface_update[j];
+ array_of_surface_update[j] = surface_updates_temp;
+ }
+}
+
/**
* update_planes_and_stream_adapter() - Send planes to be updated in DC
*
@@ -367,6 +380,8 @@ static inline bool update_planes_and_stream_adapter(struct dc *dc,
struct dc_stream_update *stream_update,
struct dc_surface_update *array_of_surface_update)
{
+ reverse_planes_order(array_of_surface_update, planes_count);
+
/*
* Previous frame finished and HW is ready for optimization.
*/
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 5d53e54ebe90..c2c6c4587a5c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -2092,6 +2092,7 @@ static enum dc_status enable_link_dp_mst(
struct pipe_ctx *pipe_ctx)
{
struct dc_link *link = pipe_ctx->stream->link;
+ unsigned char mstm_cntl;

/* sink signal type after MST branch is MST. Multiple MST sinks
* share one link. Link DP PHY is enable or training only once.
@@ -2100,7 +2101,9 @@ static enum dc_status enable_link_dp_mst(
return DC_OK;

/* clear payload table */
- dm_helpers_dp_mst_clear_payload_allocation_table(link->ctx, link);
+ core_link_read_dpcd(link, DP_MSTM_CTRL, &mstm_cntl, 1);
+ if (mstm_cntl & DP_MST_EN)
+ dm_helpers_dp_mst_clear_payload_allocation_table(link->ctx, link);

/* to make sure the pending down rep can be processed
* before enabling the link
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index ff855cb21d3f..bbe1337a8cee 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -705,20 +705,65 @@ struct atom_gpio_pin_lut_v2_1
};


-/*
- ***************************************************************************
- Data Table vram_usagebyfirmware structure
- ***************************************************************************
-*/
+/*
+ * VBIOS/PRE-OS always reserve a FB region at the top of frame buffer. driver should not write
+ * access that region. driver can allocate their own reservation region as long as it does not
+ * overlap firwmare's reservation region.
+ * if (pre-NV1X) atom data table firmwareInfoTable version < 3.3:
+ * in this case, atom data table vram_usagebyfirmwareTable version always <= 2.1
+ * if VBIOS/UEFI GOP is posted:
+ * VBIOS/UEFIGOP update used_by_firmware_in_kb = total reserved size by VBIOS
+ * update start_address_in_kb = total_mem_size_in_kb - used_by_firmware_in_kb;
+ * ( total_mem_size_in_kb = reg(CONFIG_MEMSIZE)<<10)
+ * driver can allocate driver reservation region under firmware reservation,
+ * used_by_driver_in_kb = driver reservation size
+ * driver reservation start address = (start_address_in_kb - used_by_driver_in_kb)
+ * Comment1[hchan]: There is only one reservation at the beginning of the FB reserved by
+ * host driver. Host driver would overwrite the table with the following
+ * used_by_firmware_in_kb = total reserved size for pf-vf info exchange and
+ * set SRIOV_MSG_SHARE_RESERVATION mask start_address_in_kb = 0
+ * else there is no VBIOS reservation region:
+ * driver must allocate driver reservation region at top of FB.
+ * driver set used_by_driver_in_kb = driver reservation size
+ * driver reservation start address = (total_mem_size_in_kb - used_by_driver_in_kb)
+ * same as Comment1
+ * else (NV1X and after):
+ * if VBIOS/UEFI GOP is posted:
+ * VBIOS/UEFIGOP update:
+ * used_by_firmware_in_kb = atom_firmware_Info_v3_3.fw_reserved_size_in_kb;
+ * start_address_in_kb = total_mem_size_in_kb - used_by_firmware_in_kb;
+ * (total_mem_size_in_kb = reg(CONFIG_MEMSIZE)<<10)
+ * if vram_usagebyfirmwareTable version <= 2.1:
+ * driver can allocate driver reservation region under firmware reservation,
+ * driver set used_by_driver_in_kb = driver reservation size
+ * driver reservation start address = start_address_in_kb - used_by_driver_in_kb
+ * same as Comment1
+ * else driver can:
+ * allocate it reservation any place as long as it does overlap pre-OS FW reservation area
+ * set used_by_driver_region0_in_kb = driver reservation size
+ * set driver_region0_start_address_in_kb = driver reservation region start address
+ * Comment2[hchan]: Host driver can set used_by_firmware_in_kb and start_address_in_kb to
+ * zero as the reservation for VF as it doesn’t exist. And Host driver should also
+ * update atom_firmware_Info table to remove the same VBIOS reservation as well.
+ */

struct vram_usagebyfirmware_v2_1
{
- struct atom_common_table_header table_header;
- uint32_t start_address_in_kb;
- uint16_t used_by_firmware_in_kb;
- uint16_t used_by_driver_in_kb;
+ struct atom_common_table_header table_header;
+ uint32_t start_address_in_kb;
+ uint16_t used_by_firmware_in_kb;
+ uint16_t used_by_driver_in_kb;
};

+struct vram_usagebyfirmware_v2_2 {
+ struct atom_common_table_header table_header;
+ uint32_t fw_region_start_address_in_kb;
+ uint16_t used_by_firmware_in_kb;
+ uint16_t reserved;
+ uint32_t driver_region0_start_address_in_kb;
+ uint32_t used_by_driver_region0_in_kb;
+ uint32_t reserved32[7];
+};

/*
***************************************************************************
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index da9b995b54c8..96e679a176e9 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -7123,8 +7123,6 @@ static void intel_update_crtc(struct intel_atomic_state *state,

intel_fbc_update(state, crtc);

- drm_WARN_ON(&i915->drm, !intel_display_power_is_enabled(i915, POWER_DOMAIN_DC_OFF));
-
if (!modeset &&
(new_crtc_state->uapi.color_mgmt_changed ||
new_crtc_state->update_pipe))
@@ -7501,28 +7499,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
drm_atomic_helper_wait_for_dependencies(&state->base);
drm_dp_mst_atomic_wait_for_dependencies(&state->base);

- /*
- * During full modesets we write a lot of registers, wait
- * for PLLs, etc. Doing that while DC states are enabled
- * is not a good idea.
- *
- * During fastsets and other updates we also need to
- * disable DC states due to the following scenario:
- * 1. DC5 exit and PSR exit happen
- * 2. Some or all _noarm() registers are written
- * 3. Due to some long delay PSR is re-entered
- * 4. DC5 entry -> DMC saves the already written new
- * _noarm() registers and the old not yet written
- * _arm() registers
- * 5. DC5 exit -> DMC restores a mixture of old and
- * new register values and arms the update
- * 6. PSR exit -> hardware latches a mixture of old and
- * new register values -> corrupted frame, or worse
- * 7. New _arm() registers are finally written
- * 8. Hardware finally latches a complete set of new
- * register values, and subsequent frames will be OK again
- */
- wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_DC_OFF);
+ if (state->modeset)
+ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);

intel_atomic_prepare_plane_clear_colors(state);

@@ -7661,8 +7639,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
* the culprit.
*/
intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
+ intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref);
}
- intel_display_power_put(dev_priv, POWER_DOMAIN_DC_OFF, wakeref);
intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref);

/*
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index e49fa6fa6aee..b2838732ac93 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -256,8 +256,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)

if (!HAS_FLAT_CCS(rq->engine->i915)) {
/* hsdes: 1809175790 */
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
+ cs = gen12_emit_aux_table_inv(rq->engine->gt, cs,
+ GEN12_CCS_AUX_INV);
}

*cs++ = preparser_disable(false);
@@ -317,10 +317,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
if (aux_inv) { /* hsdes: 1809175790 */
if (rq->engine->class == VIDEO_DECODE_CLASS)
cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VD0_AUX_NV);
+ cs, GEN12_VD0_AUX_INV);
else
cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VE0_AUX_NV);
+ cs, GEN12_VE0_AUX_INV);
}

if (mode & EMIT_INVALIDATE)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 2275ee47da95..dd006563cc81 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -301,9 +301,11 @@
#define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4)
#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4)
#define BSD_HWS_PGA_GEN7 _MMIO(0x4180)
-#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208)
-#define GEN12_VD0_AUX_NV _MMIO(0x4218)
-#define GEN12_VD1_AUX_NV _MMIO(0x4228)
+
+#define GEN12_CCS_AUX_INV _MMIO(0x4208)
+#define GEN12_VD0_AUX_INV _MMIO(0x4218)
+#define GEN12_VE0_AUX_INV _MMIO(0x4238)
+#define GEN12_BCS0_AUX_INV _MMIO(0x4248)

#define GEN8_RTCR _MMIO(0x4260)
#define GEN8_M1TCR _MMIO(0x4264)
@@ -311,14 +313,12 @@
#define GEN8_BTCR _MMIO(0x426c)
#define GEN8_VTCR _MMIO(0x4270)

-#define GEN12_VD2_AUX_NV _MMIO(0x4298)
-#define GEN12_VD3_AUX_NV _MMIO(0x42a8)
-#define GEN12_VE0_AUX_NV _MMIO(0x4238)
-
#define BLT_HWS_PGA_GEN7 _MMIO(0x4280)

-#define GEN12_VE1_AUX_NV _MMIO(0x42b8)
+#define GEN12_VD2_AUX_INV _MMIO(0x4298)
+#define GEN12_CCS0_AUX_INV _MMIO(0x42c8)
#define AUX_INV REG_BIT(0)
+
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380)

#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3955292483a6..137e41e37ea5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1299,7 +1299,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
/* hsdes: 1809175790 */
if (!HAS_FLAT_CCS(ce->engine->i915))
cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
+ cs, GEN12_CCS_AUX_INV);

/* Wa_16014892111 */
if (IS_DG2(ce->engine->i915))
@@ -1326,10 +1326,10 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
if (!HAS_FLAT_CCS(ce->engine->i915)) {
if (ce->engine->class == VIDEO_DECODE_CLASS)
cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VD0_AUX_NV);
+ cs, GEN12_VD0_AUX_INV);
else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VE0_AUX_NV);
+ cs, GEN12_VE0_AUX_INV);
}

return cs;
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 8ef93889061a..5ec293011d99 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
}
} while (unlikely(is_barrier(active)));

- if (!__i915_active_fence_set(active, fence))
+ fence = __i915_active_fence_set(active, fence);
+ if (!fence)
__i915_active_acquire(ref);
+ else
+ dma_fence_put(fence);

out:
i915_active_release(ref);
@@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_active *ref,
return NULL;
}

- rcu_read_lock();
prev = __i915_active_fence_set(active, fence);
- if (prev)
- prev = dma_fence_get_rcu(prev);
- else
+ if (!prev)
__i915_active_acquire(ref);
- rcu_read_unlock();

return prev;
}
@@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
*
* Records the new @fence as the last active fence along its timeline in
* this active tracker, moving the tracking callbacks from the previous
- * fence onto this one. Returns the previous fence (if not already completed),
- * which the caller must ensure is executed before the new fence. To ensure
- * that the order of fences within the timeline of the i915_active_fence is
- * understood, it should be locked by the caller.
+ * fence onto this one. Gets and returns a reference to the previous fence
+ * (if not already completed), which the caller must put after making sure
+ * that it is executed before the new fence. To ensure that the order of
+ * fences within the timeline of the i915_active_fence is understood, it
+ * should be locked by the caller.
*/
struct dma_fence *
__i915_active_fence_set(struct i915_active_fence *active,
@@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
struct dma_fence *prev;
unsigned long flags;

- if (fence == rcu_access_pointer(active->fence))
+ /*
+ * In case of fences embedded in i915_requests, their memory is
+ * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
+ * by new requests. Then, there is a risk of passing back a pointer
+ * to a new, completely unrelated fence that reuses the same memory
+ * while tracked under a different active tracker. Combined with i915
+ * perf open/close operations that build await dependencies between
+ * engine kernel context requests and user requests from different
+ * timelines, this can lead to dependency loops and infinite waits.
+ *
+ * As a countermeasure, we try to get a reference to the active->fence
+ * first, so if we succeed and pass it back to our user then it is not
+ * released and potentially reused by an unrelated request before the
+ * user has a chance to set up an await dependency on it.
+ */
+ prev = i915_active_fence_get(active);
+ if (fence == prev)
return fence;

GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
@@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
* Consider that we have two threads arriving (A and B), with
* C already resident as the active->fence.
*
- * A does the xchg first, and so it sees C or NULL depending
- * on the timing of the interrupt handler. If it is NULL, the
- * previous fence must have been signaled and we know that
- * we are first on the timeline. If it is still present,
- * we acquire the lock on that fence and serialise with the interrupt
- * handler, in the process removing it from any future interrupt
- * callback. A will then wait on C before executing (if present).
- *
- * As B is second, it sees A as the previous fence and so waits for
- * it to complete its transition and takes over the occupancy for
- * itself -- remembering that it needs to wait on A before executing.
+ * Both A and B have got a reference to C or NULL, depending on the
+ * timing of the interrupt handler. Let's assume that if A has got C
+ * then it has locked C first (before B).
*
* Note the strong ordering of the timeline also provides consistent
* nesting rules for the fence->lock; the inner lock is always the
* older lock.
*/
spin_lock_irqsave(fence->lock, flags);
- prev = xchg(__active_fence_slot(active), fence);
- if (prev) {
- GEM_BUG_ON(prev == fence);
+ if (prev)
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+
+ /*
+ * A does the cmpxchg first, and so it sees C or NULL, as before, or
+ * something else, depending on the timing of other threads and/or
+ * interrupt handler. If not the same as before then A unlocks C if
+ * applicable and retries, starting from an attempt to get a new
+ * active->fence. Meanwhile, B follows the same path as A.
+ * Once A succeeds with cmpxch, B fails again, retires, gets A from
+ * active->fence, locks it as soon as A completes, and possibly
+ * succeeds with cmpxchg.
+ */
+ while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
+ if (prev) {
+ spin_unlock(prev->lock);
+ dma_fence_put(prev);
+ }
+ spin_unlock_irqrestore(fence->lock, flags);
+
+ prev = i915_active_fence_get(active);
+ GEM_BUG_ON(prev == fence);
+
+ spin_lock_irqsave(fence->lock, flags);
+ if (prev)
+ spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+ }
+
+ /*
+ * If prev is NULL then the previous fence must have been signaled
+ * and we know that we are first on the timeline. If it is still
+ * present then, having the lock on that fence already acquired, we
+ * serialise with the interrupt handler, in the process of removing it
+ * from any future interrupt callback. A will then wait on C before
+ * executing (if present).
+ *
+ * As B is second, it sees A as the previous fence and so waits for
+ * it to complete its transition and takes over the occupancy for
+ * itself -- remembering that it needs to wait on A before executing.
+ */
+ if (prev) {
__list_del_entry(&active->cb.node);
spin_unlock(prev->lock); /* serialise with prev->cb_list */
}
@@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
int err = 0;

/* Must maintain timeline ordering wrt previous active requests */
- rcu_read_lock();
fence = __i915_active_fence_set(active, &rq->fence);
- if (fence) /* but the previous fence may not belong to that timeline! */
- fence = dma_fence_get_rcu(fence);
- rcu_read_unlock();
if (fence) {
err = i915_request_await_dma_fence(rq, fence);
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 62fad16a55e8..803cd2ad4deb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1647,6 +1647,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq,

request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);

+ /*
+ * Users have to put a reference potentially got by
+ * __i915_active_fence_set() to the returned request
+ * when no longer needed
+ */
return to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
}
@@ -1693,6 +1698,10 @@ __i915_request_ensure_ordering(struct i915_request *rq,
0);
}

+ /*
+ * Users have to put the reference to prev potentially got
+ * by __i915_active_fence_set() when no longer needed
+ */
return prev;
}

@@ -1736,6 +1745,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = __i915_request_ensure_ordering(rq, timeline);
else
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
+ if (prev)
+ i915_request_put(prev);

/*
* Make sure that no request gazumped us - if it was allocated after
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 5f26090b0c98..89585b31b985 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -310,7 +310,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n",
sig_cfg.mode.hactive, new_hactive);

- sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive;
+ sig_cfg.mode.hfront_porch -= new_hactive - sig_cfg.mode.hactive;
sig_cfg.mode.hactive = new_hactive;
}

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f7aeeee6f526..db332de134f1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -552,7 +552,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,

if (bo->pin_count) {
*locked = false;
- *busy = false;
+ if (busy)
+ *busy = false;
return false;
}

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index d4d8bfee9feb..db33dc87f69e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -882,6 +882,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
{
int index;

+ if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
+ (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+ cmds->num = 0;
+ }
+
if (cmds->num == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
cmds->num = 0;
@@ -3410,6 +3416,44 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
return 0;
}

+#define IIDR_IMPLEMENTER_ARM 0x43b
+#define IIDR_PRODUCTID_ARM_MMU_600 0x483
+#define IIDR_PRODUCTID_ARM_MMU_700 0x487
+
+static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
+{
+ u32 reg;
+ unsigned int implementer, productid, variant, revision;
+
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
+ implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
+ productid = FIELD_GET(IIDR_PRODUCTID, reg);
+ variant = FIELD_GET(IIDR_VARIANT, reg);
+ revision = FIELD_GET(IIDR_REVISION, reg);
+
+ switch (implementer) {
+ case IIDR_IMPLEMENTER_ARM:
+ switch (productid) {
+ case IIDR_PRODUCTID_ARM_MMU_600:
+ /* Arm erratum 1076982 */
+ if (variant == 0 && revision <= 2)
+ smmu->features &= ~ARM_SMMU_FEAT_SEV;
+ /* Arm erratum 1209401 */
+ if (variant < 2)
+ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+ break;
+ case IIDR_PRODUCTID_ARM_MMU_700:
+ /* Arm erratum 2812531 */
+ smmu->features &= ~ARM_SMMU_FEAT_BTM;
+ smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
+ /* Arm errata 2268618, 2812531 */
+ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+ break;
+ }
+ break;
+ }
+}
+
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
{
u32 reg;
@@ -3615,6 +3659,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)

smmu->ias = max(smmu->ias, smmu->oas);

+ if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
+ (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+ smmu->features |= ARM_SMMU_FEAT_NESTING;
+
+ arm_smmu_device_iidr_probe(smmu);
+
if (arm_smmu_sva_supported(smmu))
smmu->features |= ARM_SMMU_FEAT_SVA;

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index cd48590ada30..d0b207cae107 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -69,6 +69,12 @@
#define IDR5_VAX GENMASK(11, 10)
#define IDR5_VAX_52_BIT 1

+#define ARM_SMMU_IIDR 0x18
+#define IIDR_PRODUCTID GENMASK(31, 20)
+#define IIDR_VARIANT GENMASK(19, 16)
+#define IIDR_REVISION GENMASK(15, 12)
+#define IIDR_IMPLEMENTER GENMASK(11, 0)
+
#define ARM_SMMU_CR0 0x20
#define CR0_ATSCHK (1 << 4)
#define CR0_CMDQEN (1 << 3)
@@ -639,11 +645,13 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_BTM (1 << 16)
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
+#define ARM_SMMU_FEAT_NESTING (1 << 19)
u32 features;

#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
+#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
u32 options;

struct arm_smmu_cmdq cmdq;
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index c0331b268010..fe391de1aba3 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
*z1t = cpu_to_le16(new_z1); /* now send data */
if (bch->tx_idx < bch->tx_skb->len)
return;
- dev_kfree_skb(bch->tx_skb);
+ dev_kfree_skb_any(bch->tx_skb);
if (get_next_bframe(bch))
goto next_t_frame;
return;
@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
}
bz->za[new_f1].z1 = cpu_to_le16(new_z1); /* for next buffer */
bz->f1 = new_f1; /* next frame */
- dev_kfree_skb(bch->tx_skb);
+ dev_kfree_skb_any(bch->tx_skb);
get_next_bframe(bch);
}

@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch)
if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
hfcpci_fill_fifo(bch);
else {
- dev_kfree_skb(bch->tx_skb);
+ dev_kfree_skb_any(bch->tx_skb);
if (get_next_bframe(bch))
hfcpci_fill_fifo(bch);
}
@@ -2277,7 +2277,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
return 0;

if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
- spin_lock(&hc->lock);
+ spin_lock_irq(&hc->lock);
bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
main_rec_hfcpci(bch);
@@ -2288,7 +2288,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
main_rec_hfcpci(bch);
tx_birq(bch);
}
- spin_unlock(&hc->lock);
+ spin_unlock_irq(&hc->lock);
}
return 0;
}
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index b3cc427100a2..636e65328bb3 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -135,7 +135,7 @@ static int fun_exec_op(struct nand_chip *chip, const struct nand_operation *op,
unsigned int i;
int ret;

- if (op->cs > NAND_MAX_CHIPS)
+ if (op->cs >= NAND_MAX_CHIPS)
return -EINVAL;

if (check_only)
diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
index 029a2a302aa6..ac4947f72047 100644
--- a/drivers/mtd/nand/raw/meson_nand.c
+++ b/drivers/mtd/nand/raw/meson_nand.c
@@ -1184,7 +1184,6 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
struct meson_nfc *nfc = nand_get_controller_data(nand);
struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
struct mtd_info *mtd = nand_to_mtd(nand);
- int nsectors = mtd->writesize / 1024;
int ret;

if (!mtd->name) {
@@ -1202,7 +1201,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
nand->options |= NAND_NO_SUBPAGE_WRITE;

ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps,
- mtd->oobsize - 2 * nsectors);
+ mtd->oobsize - 2);
if (ret) {
dev_err(nfc->dev, "failed to ECC init\n");
return -EINVAL;
diff --git a/drivers/mtd/nand/raw/omap_elm.c b/drivers/mtd/nand/raw/omap_elm.c
index 4796a48e1012..22d37fc37e98 100644
--- a/drivers/mtd/nand/raw/omap_elm.c
+++ b/drivers/mtd/nand/raw/omap_elm.c
@@ -177,17 +177,17 @@ static void elm_load_syndrome(struct elm_info *info,
switch (info->bch_type) {
case BCH8_ECC:
/* syndrome fragment 0 = ecc[9-12B] */
- val = cpu_to_be32(*(u32 *) &ecc[9]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[9]);
elm_write_reg(info, offset, val);

/* syndrome fragment 1 = ecc[5-8B] */
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[5]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[5]);
elm_write_reg(info, offset, val);

/* syndrome fragment 2 = ecc[1-4B] */
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[1]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[1]);
elm_write_reg(info, offset, val);

/* syndrome fragment 3 = ecc[0B] */
@@ -197,35 +197,35 @@ static void elm_load_syndrome(struct elm_info *info,
break;
case BCH4_ECC:
/* syndrome fragment 0 = ecc[20-52b] bits */
- val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) |
+ val = ((__force u32)cpu_to_be32(*(u32 *)&ecc[3]) >> 4) |
((ecc[2] & 0xf) << 28);
elm_write_reg(info, offset, val);

/* syndrome fragment 1 = ecc[0-20b] bits */
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 12;
elm_write_reg(info, offset, val);
break;
case BCH16_ECC:
- val = cpu_to_be32(*(u32 *) &ecc[22]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[22]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[18]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[18]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[14]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[14]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[10]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[10]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[6]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[6]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[2]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[2]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16;
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 16;
elm_write_reg(info, offset, val);
break;
default:
diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c
index f133985cc053..c9c4e9ffcae1 100644
--- a/drivers/mtd/nand/raw/rockchip-nand-controller.c
+++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c
@@ -562,9 +562,10 @@ static int rk_nfc_write_page_raw(struct nand_chip *chip, const u8 *buf,
* BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3
*
* The rk_nfc_ooblayout_free() function already has reserved
- * these 4 bytes with:
+ * these 4 bytes together with 2 bytes for BBM
+ * by reducing it's length:
*
- * oob_region->offset = NFC_SYS_DATA_SIZE + 2;
+ * oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
*/
if (!i)
memcpy(rk_nfc_oob_ptr(chip, i),
@@ -597,7 +598,7 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
int pages_per_blk = mtd->erasesize / mtd->writesize;
int ret = 0, i, boot_rom_mode = 0;
dma_addr_t dma_data, dma_oob;
- u32 reg;
+ u32 tmp;
u8 *oob;

nand_prog_page_begin_op(chip, page, 0, NULL, 0);
@@ -624,6 +625,13 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
*
* 0xFF 0xFF 0xFF 0xFF | BBM OOB1 OOB2 OOB3 | ...
*
+ * The code here just swaps the first 4 bytes with the last
+ * 4 bytes without losing any data.
+ *
+ * The chip->oob_poi data layout:
+ *
+ * BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3
+ *
* Configure the ECC algorithm supported by the boot ROM.
*/
if ((page < (pages_per_blk * rknand->boot_blks)) &&
@@ -634,21 +642,17 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
}

for (i = 0; i < ecc->steps; i++) {
- if (!i) {
- reg = 0xFFFFFFFF;
- } else {
+ if (!i)
+ oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
+ else
oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
- reg = oob[0] | oob[1] << 8 | oob[2] << 16 |
- oob[3] << 24;
- }

- if (!i && boot_rom_mode)
- reg = (page & (pages_per_blk - 1)) * 4;
+ tmp = oob[0] | oob[1] << 8 | oob[2] << 16 | oob[3] << 24;

if (nfc->cfg->type == NFC_V9)
- nfc->oob_buf[i] = reg;
+ nfc->oob_buf[i] = tmp;
else
- nfc->oob_buf[i * (oob_step / 4)] = reg;
+ nfc->oob_buf[i * (oob_step / 4)] = tmp;
}

dma_data = dma_map_single(nfc->dev, (void *)nfc->page_buf,
@@ -811,12 +815,17 @@ static int rk_nfc_read_page_hwecc(struct nand_chip *chip, u8 *buf, int oob_on,
goto timeout_err;
}

- for (i = 1; i < ecc->steps; i++) {
- oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
+ for (i = 0; i < ecc->steps; i++) {
+ if (!i)
+ oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
+ else
+ oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
+
if (nfc->cfg->type == NFC_V9)
tmp = nfc->oob_buf[i];
else
tmp = nfc->oob_buf[i * (oob_step / 4)];
+
*oob++ = (u8)tmp;
*oob++ = (u8)(tmp >> 8);
*oob++ = (u8)(tmp >> 16);
@@ -933,12 +942,8 @@ static int rk_nfc_ooblayout_free(struct mtd_info *mtd, int section,
if (section)
return -ERANGE;

- /*
- * The beginning of the OOB area stores the reserved data for the NFC,
- * the size of the reserved data is NFC_SYS_DATA_SIZE bytes.
- */
oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
- oob_region->offset = NFC_SYS_DATA_SIZE + 2;
+ oob_region->offset = 2;

return 0;
}
diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c
index 7380b1ebaccd..a80427c13121 100644
--- a/drivers/mtd/nand/spi/toshiba.c
+++ b/drivers/mtd/nand/spi/toshiba.c
@@ -73,7 +73,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
{
struct nand_device *nand = spinand_to_nand(spinand);
u8 mbf = 0;
- struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf);
+ struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf);

switch (status & STATUS_ECC_MASK) {
case STATUS_ECC_NO_BITFLIPS:
@@ -92,7 +92,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
if (spi_mem_exec_op(spinand->spimem, &op))
return nanddev_get_ecc_conf(nand)->strength;

- mbf >>= 4;
+ mbf = *(spinand->scratchbuf) >> 4;

if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
return nanddev_get_ecc_conf(nand)->strength;
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index cde253d27bd0..72374b066f64 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1436,7 +1436,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
if (IS_ERR(priv->clk))
return PTR_ERR(priv->clk);

- clk_prepare_enable(priv->clk);
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;

priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv");
if (IS_ERR(priv->clk_mdiv)) {
@@ -1444,7 +1446,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
goto out_clk;
}

- clk_prepare_enable(priv->clk_mdiv);
+ ret = clk_prepare_enable(priv->clk_mdiv);
+ if (ret)
+ goto out_clk;

ret = bcm_sf2_sw_rst(priv);
if (ret) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6469fb8a42a8..969db3c45d17 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -721,17 +721,24 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)

static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
struct bnxt_rx_ring_info *rxr,
+ unsigned int *offset,
gfp_t gfp)
{
struct device *dev = &bp->pdev->dev;
struct page *page;

- page = page_pool_dev_alloc_pages(rxr->page_pool);
+ if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) {
+ page = page_pool_dev_alloc_frag(rxr->page_pool, offset,
+ BNXT_RX_PAGE_SIZE);
+ } else {
+ page = page_pool_dev_alloc_pages(rxr->page_pool);
+ *offset = 0;
+ }
if (!page)
return NULL;

- *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
- DMA_ATTR_WEAK_ORDERING);
+ *mapping = dma_map_page_attrs(dev, page, *offset, BNXT_RX_PAGE_SIZE,
+ bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
if (dma_mapping_error(dev, *mapping)) {
page_pool_recycle_direct(rxr->page_pool, page);
return NULL;
@@ -771,15 +778,16 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
dma_addr_t mapping;

if (BNXT_RX_PAGE_MODE(bp)) {
+ unsigned int offset;
struct page *page =
- __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
+ __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);

if (!page)
return -ENOMEM;

mapping += bp->rx_dma_offset;
rx_buf->data = page;
- rx_buf->data_ptr = page_address(page) + bp->rx_offset;
+ rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset;
} else {
u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp);

@@ -839,7 +847,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
unsigned int offset = 0;

if (BNXT_RX_PAGE_MODE(bp)) {
- page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
+ page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);

if (!page)
return -ENOMEM;
@@ -986,15 +994,15 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
return NULL;
}
dma_addr -= bp->rx_dma_offset;
- dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
- DMA_ATTR_WEAK_ORDERING);
- skb = build_skb(page_address(page), PAGE_SIZE);
+ dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
+ bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
+ skb = build_skb(data_ptr - bp->rx_offset, BNXT_RX_PAGE_SIZE);
if (!skb) {
page_pool_recycle_direct(rxr->page_pool, page);
return NULL;
}
skb_mark_for_recycle(skb);
- skb_reserve(skb, bp->rx_dma_offset);
+ skb_reserve(skb, bp->rx_offset);
__skb_put(skb, len);

return skb;
@@ -1020,8 +1028,8 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
return NULL;
}
dma_addr -= bp->rx_dma_offset;
- dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
- DMA_ATTR_WEAK_ORDERING);
+ dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
+ bp->rx_dir, DMA_ATTR_WEAK_ORDERING);

if (unlikely(!payload))
payload = eth_get_headlen(bp->dev, data_ptr, len);
@@ -1034,7 +1042,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,

skb_mark_for_recycle(skb);
off = (void *)data_ptr - page_address(page);
- skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE);
+ skb_add_rx_frag(skb, 0, page, off, len, BNXT_RX_PAGE_SIZE);
memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN,
payload + NET_IP_ALIGN);

@@ -1169,7 +1177,7 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,

skb->data_len += total_frag_len;
skb->len += total_frag_len;
- skb->truesize += PAGE_SIZE * agg_bufs;
+ skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs;
return skb;
}

@@ -2972,8 +2980,8 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
rx_buf->data = NULL;
if (BNXT_RX_PAGE_MODE(bp)) {
mapping -= bp->rx_dma_offset;
- dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
- bp->rx_dir,
+ dma_unmap_page_attrs(&pdev->dev, mapping,
+ BNXT_RX_PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
page_pool_recycle_direct(rxr->page_pool, data);
} else {
@@ -3241,6 +3249,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
pp.nid = dev_to_node(&bp->pdev->dev);
pp.dev = &bp->pdev->dev;
pp.dma_dir = DMA_BIDIRECTIONAL;
+ if (PAGE_SIZE > BNXT_RX_PAGE_SIZE)
+ pp.flags |= PP_FLAG_PAGE_FRAG;

rxr->page_pool = page_pool_create(&pp);
if (IS_ERR(rxr->page_pool)) {
@@ -4017,26 +4027,29 @@ void bnxt_set_ring_params(struct bnxt *bp)
*/
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
+ struct net_device *dev = bp->dev;
+
if (page_mode) {
bp->flags &= ~BNXT_FLAG_AGG_RINGS;
bp->flags |= BNXT_FLAG_RX_PAGE_MODE;

- if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
+ if (bp->xdp_prog->aux->xdp_has_frags)
+ dev->max_mtu = min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
+ else
+ dev->max_mtu =
+ min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
+ if (dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
bp->flags |= BNXT_FLAG_JUMBO;
bp->rx_skb_func = bnxt_rx_multi_page_skb;
- bp->dev->max_mtu =
- min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
} else {
bp->flags |= BNXT_FLAG_NO_AGG_RINGS;
bp->rx_skb_func = bnxt_rx_page_skb;
- bp->dev->max_mtu =
- min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
}
bp->rx_dir = DMA_BIDIRECTIONAL;
/* Disable LRO or GRO_HW */
- netdev_update_features(bp->dev);
+ netdev_update_features(dev);
} else {
- bp->dev->max_mtu = bp->max_mtu;
+ dev->max_mtu = bp->max_mtu;
bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
bp->rx_dir = DMA_FROM_DEVICE;
bp->rx_skb_func = bnxt_rx_skb;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 36d5202c0aee..aa56db138d6b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -180,8 +180,8 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
u16 cons, u8 *data_ptr, unsigned int len,
struct xdp_buff *xdp)
{
+ u32 buflen = BNXT_RX_PAGE_SIZE;
struct bnxt_sw_rx_bd *rx_buf;
- u32 buflen = PAGE_SIZE;
struct pci_dev *pdev;
dma_addr_t mapping;
u32 offset;
@@ -297,7 +297,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
rx_buf = &rxr->rx_buf_ring[cons];
mapping = rx_buf->mapping - bp->rx_dma_offset;
dma_unmap_page_attrs(&pdev->dev, mapping,
- PAGE_SIZE, bp->rx_dir,
+ BNXT_RX_PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);

/* if we are unable to allocate a new buffer, abort and reuse */
@@ -478,7 +478,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
}
xdp_update_skb_shared_info(skb, num_frags,
sinfo->xdp_frags_size,
- PAGE_SIZE * sinfo->nr_frags,
+ BNXT_RX_PAGE_SIZE * sinfo->nr_frags,
xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 8f77088900e9..a771e597795d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -8777,6 +8777,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
+ bool locked = false;
int err;

switch (type) {
@@ -8786,10 +8787,27 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
ice_setup_tc_block_cb,
np, np, true);
case TC_SETUP_QDISC_MQPRIO:
+ if (pf->adev) {
+ mutex_lock(&pf->adev_mutex);
+ device_lock(&pf->adev->dev);
+ locked = true;
+ if (pf->adev->dev.driver) {
+ netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
+ err = -EBUSY;
+ goto adev_unlock;
+ }
+ }
+
/* setup traffic classifier for receive side */
mutex_lock(&pf->tc_mutex);
err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
mutex_unlock(&pf->tc_mutex);
+
+adev_unlock:
+ if (locked) {
+ device_unlock(&pf->adev->dev);
+ mutex_unlock(&pf->adev_mutex);
+ }
return err;
default:
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 2b9335cb4bb3..8537578e1cf1 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1302,11 +1302,10 @@ static int korina_probe(struct platform_device *pdev)
else if (of_get_ethdev_address(pdev->dev.of_node, dev) < 0)
eth_hw_addr_random(dev);

- clk = devm_clk_get_optional(&pdev->dev, "mdioclk");
+ clk = devm_clk_get_optional_enabled(&pdev->dev, "mdioclk");
if (IS_ERR(clk))
return PTR_ERR(clk);
if (clk) {
- clk_prepare_enable(clk);
lp->mii_clock_freq = clk_get_rate(clk);
} else {
lp->mii_clock_freq = 200000000; /* max possible input clk */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index 59470d99f522..a37dbbda8de3 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -702,7 +702,8 @@ static int prestera_fw_get(struct prestera_fw *fw)

err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev);
if (err) {
- if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) {
+ if (ver_maj != PRESTERA_PREV_FW_MAJ_VER ||
+ ver_min != PRESTERA_PREV_FW_MIN_VER) {
ver_maj = PRESTERA_PREV_FW_MAJ_VER;
ver_min = PRESTERA_PREV_FW_MIN_VER;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 6859f1c1a831..c4a84f0a3b73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -58,7 +58,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)

trailer_len = alen + plen + 2;

- pskb_trim(skb, skb->len - trailer_len);
+ ret = pskb_trim(skb, skb->len - trailer_len);
+ if (unlikely(ret))
+ return ret;
if (skb->protocol == htons(ETH_P_IP)) {
ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
ip_send_check(ipv4hdr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
index 5b658a5588c6..6ecf0bf2366a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -160,6 +160,7 @@ static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft)

if (!in) {
kfree(ft->g);
+ ft->g = NULL;
return -ENOMEM;
}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 0ae1865086ff..dc0a0a27ac84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -136,6 +136,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs);

int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
{
+ /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile
+ * cleanup_rx callback and it is not recreated when
+ * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering()
+ * is not called by the uplink_rep profile init_rx callback. Thus, if
+ * ntuple is set, moving to switchdev flow will enter this function
+ * with fs->arfs nullified.
+ */
+ if (!mlx5e_fs_get_arfs(fs))
+ return 0;
+
arfs_del_rules(fs);

return arfs_disable(fs);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 9bd1a93a512d..bd895ef341a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -912,7 +912,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
if (err) {
mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
- return err;
+ goto err_rx_res_free;
}

err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
@@ -946,6 +946,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
+err_rx_res_free:
mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
err_free_fs:
@@ -1039,6 +1040,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
return err;
}

+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err)
+ goto err_neigh_init;
+
if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
err = mlx5e_init_uplink_rep_tx(rpriv);
if (err)
@@ -1055,6 +1060,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
mlx5e_cleanup_uplink_rep_tx(rpriv);
err_init_tx:
+ mlx5e_rep_neigh_cleanup(rpriv);
+err_neigh_init:
mlx5e_destroy_tises(priv);
return err;
}
@@ -1068,22 +1075,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
mlx5e_cleanup_uplink_rep_tx(rpriv);

+ mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_destroy_tises(priv);
}

static void mlx5e_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
mlx5e_set_netdev_mtu_boundaries(priv);
- mlx5e_rep_neigh_init(rpriv);
}

static void mlx5e_rep_disable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- mlx5e_rep_neigh_cleanup(rpriv);
}

static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
@@ -1118,7 +1120,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event

static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
u16 max_mtu;
@@ -1138,7 +1139,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
mlx5_notifier_register(mdev, &priv->events_nb);
mlx5e_dcbnl_initialize(priv);
mlx5e_dcbnl_init_app(priv);
- mlx5e_rep_neigh_init(rpriv);
mlx5e_rep_bridge_init(priv);

netdev->wanted_features |= NETIF_F_HW_TC;
@@ -1153,7 +1153,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)

static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_core_dev *mdev = priv->mdev;

rtnl_lock();
@@ -1163,7 +1162,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
rtnl_unlock();

mlx5e_rep_bridge_cleanup(priv);
- mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_dcbnl_delete_app(priv);
mlx5_notifier_unregister(mdev, &priv->events_nb);
mlx5e_rep_tc_disable(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index a0242dc15741..e112b5685b02 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1061,7 +1061,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
if (!mlx5_core_is_sf(dev))
clear_rmap(dev);
- mlx5_irq_table_destroy(dev);
+ mlx5_irq_table_free_irqs(dev);
mutex_unlock(&table->lock);
}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d53749248fa0..e6674118bc42 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -860,7 +860,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
struct fs_node *iter = list_entry(start, struct fs_node, list);
struct mlx5_flow_table *ft = NULL;

- if (!root || root->type == FS_TYPE_PRIO_CHAINS)
+ if (!root)
return NULL;

list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -876,20 +876,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
return ft;
}

-/* If reverse is false then return the first flow table in next priority of
- * prio in the tree, else return the last flow table in the previous priority
- * of prio in the tree.
+static struct fs_node *find_prio_chains_parent(struct fs_node *parent,
+ struct fs_node **child)
+{
+ struct fs_node *node = NULL;
+
+ while (parent && parent->type != FS_TYPE_PRIO_CHAINS) {
+ node = parent;
+ parent = parent->parent;
+ }
+
+ if (child)
+ *child = node;
+
+ return parent;
+}
+
+/* If reverse is false then return the first flow table next to the passed node
+ * in the tree, else return the last flow table before the node in the tree.
+ * If skip is true, skip the flow tables in the same prio_chains prio.
*/
-static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
+static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse,
+ bool skip)
{
+ struct fs_node *prio_chains_parent = NULL;
struct mlx5_flow_table *ft = NULL;
struct fs_node *curr_node;
struct fs_node *parent;

- parent = prio->node.parent;
- curr_node = &prio->node;
+ if (skip)
+ prio_chains_parent = find_prio_chains_parent(node, NULL);
+ parent = node->parent;
+ curr_node = node;
while (!ft && parent) {
- ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+ if (parent != prio_chains_parent)
+ ft = find_closest_ft_recursive(parent, &curr_node->list,
+ reverse);
curr_node = parent;
parent = curr_node->parent;
}
@@ -897,15 +919,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers
}

/* Assuming all the tree is locked by mutex chain lock */
-static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node)
{
- return find_closest_ft(prio, false);
+ return find_closest_ft(node, false, true);
}

/* Assuming all the tree is locked by mutex chain lock */
-static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node)
{
- return find_closest_ft(prio, true);
+ return find_closest_ft(node, true, true);
}

static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
@@ -917,7 +939,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);

- return find_next_chained_ft(prio);
+ return find_next_chained_ft(&prio->node);
}

static int connect_fts_in_prio(struct mlx5_core_dev *dev,
@@ -941,21 +963,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
return 0;
}

+static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node,
+ struct fs_node *parent,
+ struct fs_node **child,
+ bool reverse)
+{
+ struct mlx5_flow_table *ft;
+
+ ft = find_closest_ft(node, reverse, false);
+
+ if (ft && parent == find_prio_chains_parent(&ft->node, child))
+ return ft;
+
+ return NULL;
+}
+
/* Connect flow tables from previous priority of prio to ft */
static int connect_prev_fts(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
+ struct fs_node *prio_parent, *parent = NULL, *child, *node;
struct mlx5_flow_table *prev_ft;
+ int err = 0;
+
+ prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+ /* return directly if not under the first sub ns of prio_chains prio */
+ if (prio_parent && !list_is_first(&child->list, &prio_parent->children))
+ return 0;

- prev_ft = find_prev_chained_ft(prio);
- if (prev_ft) {
+ prev_ft = find_prev_chained_ft(&prio->node);
+ while (prev_ft) {
struct fs_prio *prev_prio;

fs_get_obj(prev_prio, prev_ft->node.parent);
- return connect_fts_in_prio(dev, prev_prio, ft);
+ err = connect_fts_in_prio(dev, prev_prio, ft);
+ if (err)
+ break;
+
+ if (!parent) {
+ parent = find_prio_chains_parent(&prev_prio->node, &child);
+ if (!parent)
+ break;
+ }
+
+ node = child;
+ prev_ft = find_closet_ft_prio_chains(node, parent, &child, true);
}
- return 0;
+ return err;
}

static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
@@ -1094,7 +1150,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
if (err)
return err;

- next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
+ next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node);
err = connect_fwd_rules(dev, ft, next_ft);
if (err)
return err;
@@ -1169,7 +1225,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa

tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
next_ft = unmanaged ? ft_attr->next_ft :
- find_next_chained_ft(fs_prio);
+ find_next_chained_ft(&fs_prio->node);
ft->def_miss_action = ns->def_miss_action;
ft->ns = ns;
err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
@@ -2157,13 +2213,20 @@ EXPORT_SYMBOL(mlx5_del_flow_rules);
/* Assuming prio->node.children(flow tables) is sorted by level */
static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
{
+ struct fs_node *prio_parent, *child;
struct fs_prio *prio;

fs_get_obj(prio, ft->node.parent);

if (!list_is_last(&ft->node.list, &prio->node.children))
return list_next_entry(ft, node.list);
- return find_next_chained_ft(prio);
+
+ prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+ if (prio_parent && list_is_first(&child->list, &prio_parent->children))
+ return find_closest_ft(&prio->node, false, false);
+
+ return find_next_chained_ft(&prio->node);
}

static int update_root_ft_destroy(struct mlx5_flow_table *ft)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 23cb63fa4588..2e728e4e81fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -14,6 +14,7 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 662f1d55e30e..5e0f7d96aac5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -591,6 +591,24 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
irq_pool_free(table->pf_pool);
}

+static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
+{
+ struct mlx5_irq *irq;
+ unsigned long index;
+
+ xa_for_each(&pool->irqs, index, irq)
+ free_irq(irq->irqn, &irq->nh);
+}
+
+static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
+{
+ if (table->sf_ctrl_pool) {
+ mlx5_irq_pool_free_irqs(table->sf_comp_pool);
+ mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
+ }
+ mlx5_irq_pool_free_irqs(table->pf_pool);
+}
+
/* irq_table API */

int mlx5_irq_table_init(struct mlx5_core_dev *dev)
@@ -670,6 +688,17 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
pci_free_irq_vectors(dev->pdev);
}

+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ mlx5_irq_pools_free_irqs(table);
+ pci_free_irq_vectors(dev->pdev);
+}
+
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
{
if (table->sf_comp_pool)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 84364691a379..d7b1a230b59e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -538,11 +538,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,

err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
if (err)
- return err;
+ goto err_free_in;

*reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
- kvfree(in);

+err_free_in:
+ kvfree(in);
return err;
}

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index f8682356d0cf..94d4f9413ab7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -193,6 +193,22 @@ void qed_hw_remove(struct qed_dev *cdev);
*/
struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);

+/**
+ * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
+ * atomicy.
+ *
+ * @p_hwfn: HW device data.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: struct qed_ptt.
+ *
+ * Should be called at the entry point to the driver
+ * (at the beginning of an exported function).
+ */
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
+ bool is_atomic);
+
/**
* qed_ptt_release(): Release PTT Window.
*
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 3764190b948e..04602ac94708 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -693,13 +693,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
}

static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
- struct qed_fcoe_stats *p_stats)
+ struct qed_fcoe_stats *p_stats,
+ bool is_atomic)
{
struct qed_ptt *p_ptt;

memset(p_stats, 0, sizeof(*p_stats));

- p_ptt = qed_ptt_acquire(p_hwfn);
+ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);

if (!p_ptt) {
DP_ERR(p_hwfn, "Failed to acquire ptt\n");
@@ -973,19 +974,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
QED_SPQ_MODE_EBLOCK, NULL);
}

+static int qed_fcoe_stats_context(struct qed_dev *cdev,
+ struct qed_fcoe_stats *stats,
+ bool is_atomic)
+{
+ return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
+}
+
static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
{
- return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
+ return qed_fcoe_stats_context(cdev, stats, false);
}

void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats)
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic)
{
struct qed_fcoe_stats proto_stats;

/* Retrieve FW statistics */
memset(&proto_stats, 0, sizeof(proto_stats));
- if (qed_fcoe_stats(cdev, &proto_stats)) {
+ if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
DP_VERBOSE(cdev, QED_MSG_STORAGE,
"Failed to collect FCoE statistics\n");
return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
index 19c85adf4ceb..214e8299ecb4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
@@ -28,8 +28,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
void qed_fcoe_setup(struct qed_hwfn *p_hwfn);

void qed_fcoe_free(struct qed_hwfn *p_hwfn);
+/**
+ * qed_get_protocol_stats_fcoe(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats);
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic);
#else /* CONFIG_QED_FCOE */
static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
{
@@ -40,7 +52,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}

static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats)
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic)
{
}
#endif /* CONFIG_QED_FCOE */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 554f30b0cfd5..6263f847b6b9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -23,7 +23,10 @@
#include "qed_reg_addr.h"
#include "qed_sriov.h"

-#define QED_BAR_ACQUIRE_TIMEOUT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT 100000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY 10

/* Invalid values */
#define QED_BAR_INVALID_OFFSET (cpu_to_le32(-1))
@@ -84,12 +87,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
}

struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
+{
+ return qed_ptt_acquire_context(p_hwfn, false);
+}
+
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
{
struct qed_ptt *p_ptt;
- unsigned int i;
+ unsigned int i, count;
+
+ if (is_atomic)
+ count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
+ else
+ count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;

/* Take the free PTT from the list */
- for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
+ for (i = 0; i < count; i++) {
spin_lock_bh(&p_hwfn->p_ptt_pool->lock);

if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
@@ -105,7 +118,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
}

spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
- usleep_range(1000, 2000);
+
+ if (is_atomic)
+ udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
+ else
+ usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
+ QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
}

DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 511ab214eb9c..980e7289b481 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -999,13 +999,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
}

static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
- struct qed_iscsi_stats *stats)
+ struct qed_iscsi_stats *stats,
+ bool is_atomic)
{
struct qed_ptt *p_ptt;

memset(stats, 0, sizeof(*stats));

- p_ptt = qed_ptt_acquire(p_hwfn);
+ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
if (!p_ptt) {
DP_ERR(p_hwfn, "Failed to acquire ptt\n");
return -EAGAIN;
@@ -1336,9 +1337,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
QED_SPQ_MODE_EBLOCK, NULL);
}

+static int qed_iscsi_stats_context(struct qed_dev *cdev,
+ struct qed_iscsi_stats *stats,
+ bool is_atomic)
+{
+ return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
+}
+
static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
{
- return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
+ return qed_iscsi_stats_context(cdev, stats, false);
}

static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1358,13 +1366,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
}

void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats)
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic)
{
struct qed_iscsi_stats proto_stats;

/* Retrieve FW statistics */
memset(&proto_stats, 0, sizeof(proto_stats));
- if (qed_iscsi_stats(cdev, &proto_stats)) {
+ if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
DP_VERBOSE(cdev, QED_MSG_STORAGE,
"Failed to collect ISCSI statistics\n");
return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
index dec2b00259d4..974cb8d26608 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
@@ -39,11 +39,14 @@ void qed_iscsi_free(struct qed_hwfn *p_hwfn);
*
* @cdev: Qed dev pointer.
* @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
*
+ * Context: The function should not sleep in case is_atomic == true.
* Return: Void.
*/
void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats);
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic);
#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
{
@@ -56,7 +59,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}

static inline void
qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats) {}
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic) {}
#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */

#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 7776d3bdd459..970b9aabbc3d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1863,7 +1863,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
}

static void _qed_get_vport_stats(struct qed_dev *cdev,
- struct qed_eth_stats *stats)
+ struct qed_eth_stats *stats,
+ bool is_atomic)
{
u8 fw_vport = 0;
int i;
@@ -1872,10 +1873,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,

for_each_hwfn(cdev, i) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
- struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
- : NULL;
+ struct qed_ptt *p_ptt;
bool b_get_port_stats;

+ p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
+ : NULL;
if (IS_PF(cdev)) {
/* The main vport index is relative first */
if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
@@ -1900,6 +1902,13 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
}

void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
+{
+ qed_get_vport_stats_context(cdev, stats, false);
+}
+
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+ struct qed_eth_stats *stats,
+ bool is_atomic)
{
u32 i;

@@ -1908,7 +1917,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
return;
}

- _qed_get_vport_stats(cdev, stats);
+ _qed_get_vport_stats(cdev, stats, is_atomic);

if (!cdev->reset_stats)
return;
@@ -1960,7 +1969,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
if (!cdev->reset_stats) {
DP_INFO(cdev, "Reset stats not allocated\n");
} else {
- _qed_get_vport_stats(cdev, cdev->reset_stats);
+ _qed_get_vport_stats(cdev, cdev->reset_stats, false);
cdev->reset_stats->common.link_change_count = 0;
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index a538cf478c14..2d2f82c785ad 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -249,8 +249,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
enum spq_mode comp_mode,
struct qed_spq_comp_cb *p_comp_data);

+/**
+ * qed_get_vport_stats(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ *
+ * Return: Void.
+ */
void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);

+/**
+ * qed_get_vport_stats_context(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+ struct qed_eth_stats *stats,
+ bool is_atomic);
+
void qed_reset_vport_stats(struct qed_dev *cdev);

/**
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c91898be7c03..25d9c254288b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -3101,7 +3101,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,

switch (type) {
case QED_MCP_LAN_STATS:
- qed_get_vport_stats(cdev, &eth_stats);
+ qed_get_vport_stats_context(cdev, &eth_stats, true);
stats->lan_stats.ucast_rx_pkts =
eth_stats.common.rx_ucast_pkts;
stats->lan_stats.ucast_tx_pkts =
@@ -3109,10 +3109,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
stats->lan_stats.fcs_err = -1;
break;
case QED_MCP_FCOE_STATS:
- qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
+ qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
break;
case QED_MCP_ISCSI_STATS:
- qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
+ qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
break;
default:
DP_VERBOSE(cdev, QED_MSG_SP,
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 9b46579b5a10..b130e978366c 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1851,6 +1851,17 @@ static int netsec_of_probe(struct platform_device *pdev,
return err;
}

+ /*
+ * SynQuacer is physically configured with TX and RX delays
+ * but the standard firmware claimed otherwise for a long
+ * time, ignore it.
+ */
+ if (of_machine_is_compatible("socionext,developer-box") &&
+ priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) {
+ dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n");
+ priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID;
+ }
+
priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
if (!priv->phy_np) {
dev_err(&pdev->dev, "missing required property 'phy-handle'\n");
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 1066420d6a83..6bf5e341c3c1 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1568,12 +1568,16 @@ static int temac_probe(struct platform_device *pdev)
}

/* Error handle returned DMA RX and TX interrupts */
- if (lp->rx_irq < 0)
- return dev_err_probe(&pdev->dev, lp->rx_irq,
+ if (lp->rx_irq <= 0) {
+ rc = lp->rx_irq ?: -EINVAL;
+ return dev_err_probe(&pdev->dev, rc,
"could not get DMA RX irq\n");
- if (lp->tx_irq < 0)
- return dev_err_probe(&pdev->dev, lp->tx_irq,
+ }
+ if (lp->tx_irq <= 0) {
+ rc = lp->tx_irq ?: -EINVAL;
+ return dev_err_probe(&pdev->dev, rc,
"could not get DMA TX irq\n");
+ }

if (temac_np) {
/* Retrieve the MAC address */
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 510ff2dc8999..cd81dd916c29 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -311,16 +311,15 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
if (ret)
return ret;

- ret = ipa_filter_reset_table(ipa, IPA_MEM_V4_FILTER_HASHED, modem);
- if (ret)
+ ret = ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER, modem);
+ if (ret || !ipa_table_hash_support(ipa))
return ret;

- ret = ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER, modem);
+ ret = ipa_filter_reset_table(ipa, IPA_MEM_V4_FILTER_HASHED, modem);
if (ret)
return ret;
- ret = ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER_HASHED, modem);

- return ret;
+ return ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER_HASHED, modem);
}

/* The AP routes and modem routes are each contiguous within the
@@ -329,11 +328,12 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
* */
static int ipa_route_reset(struct ipa *ipa, bool modem)
{
+ bool hash_support = ipa_table_hash_support(ipa);
struct gsi_trans *trans;
u16 first;
u16 count;

- trans = ipa_cmd_trans_alloc(ipa, 4);
+ trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2);
if (!trans) {
dev_err(&ipa->pdev->dev,
"no transaction for %s route reset\n",
@@ -350,12 +350,14 @@ static int ipa_route_reset(struct ipa *ipa, bool modem)
}

ipa_table_reset_add(trans, false, first, count, IPA_MEM_V4_ROUTE);
- ipa_table_reset_add(trans, false, first, count,
- IPA_MEM_V4_ROUTE_HASHED);
-
ipa_table_reset_add(trans, false, first, count, IPA_MEM_V6_ROUTE);
- ipa_table_reset_add(trans, false, first, count,
- IPA_MEM_V6_ROUTE_HASHED);
+
+ if (hash_support) {
+ ipa_table_reset_add(trans, false, first, count,
+ IPA_MEM_V4_ROUTE_HASHED);
+ ipa_table_reset_add(trans, false, first, count,
+ IPA_MEM_V6_ROUTE_HASHED);
+ }

gsi_trans_commit_wait(trans);

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 3c468ef8f245..8c010857e6d7 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -533,7 +533,7 @@ static int tap_open(struct inode *inode, struct file *file)
q->sock.state = SS_CONNECTED;
q->sock.file = file;
q->sock.ops = &tap_socket_ops;
- sock_init_data_uid(&q->sock, &q->sk, inode->i_uid);
+ sock_init_data_uid(&q->sock, &q->sk, current_fsuid());
q->sk.sk_write_space = tap_sock_write_space;
q->sk.sk_destruct = tap_sock_destruct;
q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7c8db8f6f661..228f5f9ef1dd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3457,7 +3457,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
tfile->socket.file = file;
tfile->socket.ops = &tun_socket_ops;

- sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid);
+ sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());

tfile->sk.sk_write_space = tun_sock_write_space;
tfile->sk.sk_sndbuf = INT_MAX;
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index fb5f59d0d55d..f07bfe56ec87 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -618,9 +618,23 @@ static const struct usb_device_id products[] = {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
+ .idProduct = 0x8005, /* A-300 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = 0,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
.idProduct = 0x8006, /* B-500/SL-5600 */
ZAURUS_MASTER_INTERFACE,
.driver_info = 0,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x8006, /* B-500/SL-5600 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = 0,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
@@ -628,6 +642,13 @@ static const struct usb_device_id products[] = {
.idProduct = 0x8007, /* C-700 */
ZAURUS_MASTER_INTERFACE,
.driver_info = 0,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x8007, /* C-700 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = 0,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 64a9a80b2309..405e588f8a3a 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1770,6 +1770,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
} else if (!info->in || !info->out)
status = usbnet_get_endpoints (dev, udev);
else {
+ u8 ep_addrs[3] = {
+ info->in + USB_DIR_IN, info->out + USB_DIR_OUT, 0
+ };
+
dev->in = usb_rcvbulkpipe (xdev, info->in);
dev->out = usb_sndbulkpipe (xdev, info->out);
if (!(info->flags & FLAG_NO_SETINT))
@@ -1779,6 +1783,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
else
status = 0;

+ if (status == 0 && !usb_check_bulk_endpoints(udev, ep_addrs))
+ status = -EINVAL;
}
if (status >= 0 && dev->status)
status = init_status (dev, udev);
diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c
index 7984f2157d22..df3617c4c44e 100644
--- a/drivers/net/usb/zaurus.c
+++ b/drivers/net/usb/zaurus.c
@@ -289,9 +289,23 @@ static const struct usb_device_id products [] = {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
+ .idProduct = 0x8005, /* A-300 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
.idProduct = 0x8006, /* B-500/SL-5600 */
ZAURUS_MASTER_INTERFACE,
.driver_info = ZAURUS_PXA_INFO,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x8006, /* B-500/SL-5600 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = (unsigned long)&bogus_mdlm_info,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
@@ -299,6 +313,13 @@ static const struct usb_device_id products [] = {
.idProduct = 0x8007, /* C-700 */
ZAURUS_MASTER_INTERFACE,
.driver_info = ZAURUS_PXA_INFO,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x8007, /* C-700 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = (unsigned long)&bogus_mdlm_info,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 6dbaaf95ee38..2092aa373ab3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -123,12 +123,12 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev)
case MT_EE_5GHZ:
dev->mphy.cap.has_5ghz = true;
break;
- case MT_EE_2GHZ:
- dev->mphy.cap.has_2ghz = true;
- break;
case MT_EE_DBDC:
dev->dbdc_support = true;
fallthrough;
+ case MT_EE_2GHZ:
+ dev->mphy.cap.has_2ghz = true;
+ break;
default:
dev->mphy.cap.has_2ghz = true;
dev->mphy.cap.has_5ghz = true;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 1d195429753d..613eab729704 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -716,7 +716,6 @@ struct qeth_card_info {
u16 chid;
u8 ids_valid:1; /* cssid,iid,chid */
u8 dev_addr_is_registered:1;
- u8 open_when_online:1;
u8 promisc_mode:1;
u8 use_v1_blkt:1;
u8 is_vm_nic:1;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 8bd9fd51208c..ae4b6d24bc90 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5371,8 +5371,6 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
qeth_clear_ipacmd_list(card);

rtnl_lock();
- card->info.open_when_online = card->dev->flags & IFF_UP;
- dev_close(card->dev);
netif_device_detach(card->dev);
netif_carrier_off(card->dev);
rtnl_unlock();
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index c6ded3fdd715..9ef2118fc7a2 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2387,9 +2387,12 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
qeth_enable_hw_features(dev);
qeth_l2_enable_brport_features(card);

- if (card->info.open_when_online) {
- card->info.open_when_online = 0;
- dev_open(dev, NULL);
+ if (netif_running(dev)) {
+ local_bh_disable();
+ napi_schedule(&card->napi);
+ /* kick-start the NAPI softirq: */
+ local_bh_enable();
+ qeth_l2_set_rx_mode(dev);
}
rtnl_unlock();
}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index d8487a10cd55..c0f30cefec10 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2017,9 +2017,11 @@ static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok)
netif_device_attach(dev);
qeth_enable_hw_features(dev);

- if (card->info.open_when_online) {
- card->info.open_when_online = 0;
- dev_open(dev, NULL);
+ if (netif_running(dev)) {
+ local_bh_disable();
+ napi_schedule(&card->napi);
+ /* kick-start the NAPI softirq: */
+ local_bh_enable();
}
rtnl_unlock();
}
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 77917b339870..a64def01d824 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *data)

/* re-init to undo drop from zfcp_fc_adisc() */
port->d_id = ntoh24(adisc_resp->adisc_port_id);
- /* port is good, unblock rport without going through erp */
- zfcp_scsi_schedule_rport_register(port);
+ /* port is still good, nothing to do */
out:
atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
put_device(&port->dev);
@@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work)
int retval;

set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */
- get_device(&port->dev);
- port->rport_task = RPORT_DEL;
- zfcp_scsi_rport_work(&port->rport_work);

/* only issue one test command at one time per port */
if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 5284f9a0b826..54a1b8514f04 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -365,6 +365,7 @@ static void storvsc_on_channel_callback(void *context);
#define STORVSC_FC_MAX_LUNS_PER_TARGET 255
#define STORVSC_FC_MAX_TARGETS 128
#define STORVSC_FC_MAX_CHANNELS 8
+#define STORVSC_FC_MAX_XFER_SIZE ((u32)(512 * 1024))

#define STORVSC_IDE_MAX_LUNS_PER_TARGET 64
#define STORVSC_IDE_MAX_TARGETS 1
@@ -2002,6 +2003,9 @@ static int storvsc_probe(struct hv_device *device,
* protecting it from any weird value.
*/
max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
+ if (is_fc)
+ max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE);
+
/* max_hw_sectors_kb */
host->max_sectors = max_xfer_bytes >> 9;
/*
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index d93e8735ab1f..d7aad5e8ee37 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -494,12 +494,16 @@ static void fragment_free_space(struct btrfs_block_group *block_group)
* used yet since their free space will be released as soon as the transaction
* commits.
*/
-u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end)
+int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end,
+ u64 *total_added_ret)
{
struct btrfs_fs_info *info = block_group->fs_info;
- u64 extent_start, extent_end, size, total_added = 0;
+ u64 extent_start, extent_end, size;
int ret;

+ if (total_added_ret)
+ *total_added_ret = 0;
+
while (start < end) {
ret = find_first_extent_bit(&info->excluded_extents, start,
&extent_start, &extent_end,
@@ -512,10 +516,12 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
- total_added += size;
ret = btrfs_add_free_space_async_trimmed(block_group,
start, size);
- BUG_ON(ret); /* -ENOMEM or logic error */
+ if (ret)
+ return ret;
+ if (total_added_ret)
+ *total_added_ret += size;
start = extent_end + 1;
} else {
break;
@@ -524,13 +530,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end

if (start < end) {
size = end - start;
- total_added += size;
ret = btrfs_add_free_space_async_trimmed(block_group, start,
size);
- BUG_ON(ret); /* -ENOMEM or logic error */
+ if (ret)
+ return ret;
+ if (total_added_ret)
+ *total_added_ret += size;
}

- return total_added;
+ return 0;
}

static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
@@ -637,8 +645,13 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)

if (key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY) {
- total_found += add_new_free_space(block_group, last,
- key.objectid);
+ u64 space_added;
+
+ ret = add_new_free_space(block_group, last, key.objectid,
+ &space_added);
+ if (ret)
+ goto out;
+ total_found += space_added;
if (key.type == BTRFS_METADATA_ITEM_KEY)
last = key.objectid +
fs_info->nodesize;
@@ -653,11 +666,10 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
}
path->slots[0]++;
}
- ret = 0;
-
- total_found += add_new_free_space(block_group, last,
- block_group->start + block_group->length);

+ ret = add_new_free_space(block_group, last,
+ block_group->start + block_group->length,
+ NULL);
out:
btrfs_free_path(path);
return ret;
@@ -2101,9 +2113,11 @@ static int read_one_block_group(struct btrfs_fs_info *info,
btrfs_free_excluded_extents(cache);
} else if (cache->used == 0) {
cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, cache->start,
- cache->start + cache->length);
+ ret = add_new_free_space(cache, cache->start,
+ cache->start + cache->length, NULL);
btrfs_free_excluded_extents(cache);
+ if (ret)
+ goto error;
}

ret = btrfs_add_block_group_cache(info, cache);
@@ -2529,9 +2543,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
return ERR_PTR(ret);
}

- add_new_free_space(cache, chunk_offset, chunk_offset + size);
-
+ ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
btrfs_free_excluded_extents(cache);
+ if (ret) {
+ btrfs_put_block_group(cache);
+ return ERR_PTR(ret);
+ }

/*
* Ensure the corresponding space_info object is created and
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 8fb14b99a1d1..0a3d38682358 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -284,8 +284,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache);
-u64 add_new_free_space(struct btrfs_block_group *block_group,
- u64 start, u64 end);
+int add_new_free_space(struct btrfs_block_group *block_group,
+ u64 start, u64 end, u64 *total_added_ret);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index a07450f64abb..a207db932226 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1510,9 +1510,13 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
if (prev_bit == 0 && bit == 1) {
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
- total_found += add_new_free_space(block_group,
- extent_start,
- offset);
+ u64 space_added;
+
+ ret = add_new_free_space(block_group, extent_start,
+ offset, &space_added);
+ if (ret)
+ goto out;
+ total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
wake_up(&caching_ctl->wait);
@@ -1524,8 +1528,9 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
}
}
if (prev_bit == 1) {
- total_found += add_new_free_space(block_group, extent_start,
- end);
+ ret = add_new_free_space(block_group, extent_start, end, NULL);
+ if (ret)
+ goto out;
extent_count++;
}

@@ -1564,6 +1569,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
end = block_group->start + block_group->length;

while (1) {
+ u64 space_added;
+
ret = btrfs_next_item(root, path);
if (ret < 0)
goto out;
@@ -1578,8 +1585,11 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
ASSERT(key.objectid < end && key.objectid + key.offset <= end);

- total_found += add_new_free_space(block_group, key.objectid,
- key.objectid + key.offset);
+ ret = add_new_free_space(block_group, key.objectid,
+ key.objectid + key.offset, &space_added);
+ if (ret)
+ goto out;
+ total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
wake_up(&caching_ctl->wait);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1989c8deea55..dcabe2783edf 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4758,7 +4758,7 @@ static void delayed_work(struct work_struct *work)

dout("mdsc delayed_work\n");

- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;

mutex_lock(&mdsc->mutex);
@@ -4937,7 +4937,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;

ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 18b026b1ac63..9a80658f4167 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};

+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);

+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);

fsc->client->extra_mon_dispatch = NULL;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 533e612b6a48..361f3c29897e 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -989,10 +989,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
struct z_erofs_bvec *bvec)
{
struct z_erofs_bvec_item *item;
+ unsigned int pgnr;

- if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
- unsigned int pgnr;
-
+ if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) &&
+ (bvec->end == PAGE_SIZE ||
+ bvec->offset + bvec->end == be->pcl->length)) {
pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
if (!be->decompressed_pages[pgnr]) {
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index 9f42f25fab92..e918decb3735 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
}
sbi->map_sectors = ((need_map_size - 1) >>
(sb->s_blocksize_bits)) + 1;
- sbi->vol_amap = kmalloc_array(sbi->map_sectors,
+ sbi->vol_amap = kvmalloc_array(sbi->map_sectors,
sizeof(struct buffer_head *), GFP_KERNEL);
if (!sbi->vol_amap)
return -ENOMEM;
@@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
while (j < i)
brelse(sbi->vol_amap[j++]);

- kfree(sbi->vol_amap);
+ kvfree(sbi->vol_amap);
sbi->vol_amap = NULL;
return -EIO;
}
@@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi)
for (i = 0; i < sbi->map_sectors; i++)
__brelse(sbi->vol_amap[i]);

- kfree(sbi->vol_amap);
+ kvfree(sbi->vol_amap);
}

int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 15c4f901be36..51b03b0dd5f7 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -34,6 +34,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
{
int i;
struct exfat_entry_set_cache *es;
+ unsigned int uni_len = 0, len;

es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES);
if (!es)
@@ -52,7 +53,10 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
if (exfat_get_entry_type(ep) != TYPE_EXTEND)
break;

- exfat_extract_uni_name(ep, uniname);
+ len = exfat_extract_uni_name(ep, uniname);
+ uni_len += len;
+ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+ break;
uniname += EXFAT_FILE_NAME_LEN;
}

@@ -210,7 +214,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb)
exfat_init_namebuf(nb);
}

-/* skip iterating emit_dots when dir is empty */
+/*
+ * Before calling dir_emit*(), sbi->s_lock should be released
+ * because page fault can occur in dir_emit*().
+ */
#define ITER_POS_FILLED_DOTS (2)
static int exfat_iterate(struct file *file, struct dir_context *ctx)
{
@@ -225,11 +232,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
int err = 0, fake_offset = 0;

exfat_init_namebuf(nb);
- mutex_lock(&EXFAT_SB(sb)->s_lock);

cpos = ctx->pos;
if (!dir_emit_dots(file, ctx))
- goto unlock;
+ goto out;

if (ctx->pos == ITER_POS_FILLED_DOTS) {
cpos = 0;
@@ -241,16 +247,18 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
/* name buffer should be allocated before use */
err = exfat_alloc_namebuf(nb);
if (err)
- goto unlock;
+ goto out;
get_new:
+ mutex_lock(&EXFAT_SB(sb)->s_lock);
+
if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode))
goto end_of_dir;

err = exfat_readdir(inode, &cpos, &de);
if (err) {
/*
- * At least we tried to read a sector. Move cpos to next sector
- * position (should be aligned).
+ * At least we tried to read a sector.
+ * Move cpos to next sector position (should be aligned).
*/
if (err == -EIO) {
cpos += 1 << (sb->s_blocksize_bits);
@@ -273,16 +281,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
inum = iunique(sb, EXFAT_ROOT_INO);
}

- /*
- * Before calling dir_emit(), sb_lock should be released.
- * Because page fault can occur in dir_emit() when the size
- * of buffer given from user is larger than one page size.
- */
mutex_unlock(&EXFAT_SB(sb)->s_lock);
if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
(de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
- goto out_unlocked;
- mutex_lock(&EXFAT_SB(sb)->s_lock);
+ goto out;
ctx->pos = cpos;
goto get_new;

@@ -290,9 +292,8 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
if (!cpos && fake_offset)
cpos = ITER_POS_FILLED_DOTS;
ctx->pos = cpos;
-unlock:
mutex_unlock(&EXFAT_SB(sb)->s_lock);
-out_unlocked:
+out:
/*
* To improve performance, free namebuf after unlock sb_lock.
* If namebuf is not allocated, this function do nothing
@@ -1027,7 +1028,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;

- if (step != DIRENT_STEP_NAME) {
+ if (step != DIRENT_STEP_NAME ||
+ name_len >= MAX_NAME_LENGTH) {
step = DIRENT_STEP_FILE;
continue;
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index dc5dcb78bc27..2c95916aead8 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -70,10 +70,7 @@ struct mb_cache;
* second extended-fs super-block data in memory
*/
struct ext2_sb_info {
- unsigned long s_frag_size; /* Size of a fragment in bytes */
- unsigned long s_frags_per_block;/* Number of fragments per block */
unsigned long s_inodes_per_block;/* Number of inodes per block */
- unsigned long s_frags_per_group;/* Number of fragments in a group */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
@@ -188,15 +185,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
#define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size)
#define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino)

-/*
- * Macro-instructions used to manage fragments
- */
-#define EXT2_MIN_FRAG_SIZE 1024
-#define EXT2_MAX_FRAG_SIZE 4096
-#define EXT2_MIN_FRAG_LOG_SIZE 10
-#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size)
-#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block)
-
/*
* Structure of a blocks group descriptor
*/
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3feea4b31fa7..99b26fe20d17 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -668,10 +668,9 @@ static int ext2_setup_super (struct super_block * sb,
es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
if (test_opt (sb, DEBUG))
- ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
+ ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, "
"bpg=%lu, ipg=%lu, mo=%04lx]",
EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
- sbi->s_frag_size,
sbi->s_groups_count,
EXT2_BLOCKS_PER_GROUP(sb),
EXT2_INODES_PER_GROUP(sb),
@@ -1012,14 +1011,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
}

- sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
- le32_to_cpu(es->s_log_frag_size);
- if (sbi->s_frag_size == 0)
- goto cantfind_ext2;
- sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
-
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);

sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
@@ -1045,11 +1037,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}

- if (sb->s_blocksize != sbi->s_frag_size) {
+ if (es->s_log_frag_size != es->s_log_block_size) {
ext2_msg(sb, KERN_ERR,
- "error: fragsize %lu != blocksize %lu"
- "(not supported yet)",
- sbi->s_frag_size, sb->s_blocksize);
+ "error: fragsize log %u != blocksize log %u",
+ le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits);
goto failed_mount;
}

@@ -1066,12 +1057,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3);
goto failed_mount;
}
- if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
- ext2_msg(sb, KERN_ERR,
- "error: #fragments per group too big: %lu",
- sbi->s_frags_per_group);
- goto failed_mount;
- }
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4d1e48c676fa..78f39a78de29 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3431,7 +3431,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr)
* file.c
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7b94f047cbf7..3ce6da4fac9c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -628,11 +628,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->ofs_in_node, nr_free);
}

-void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
-{
- f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
-}
-
static int truncate_partial_data_page(struct inode *inode, u64 from,
bool cache_only)
{
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a010b4bc36d2..9fe502485930 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -923,6 +923,7 @@ static int truncate_node(struct dnode_of_data *dn)

static int truncate_dnode(struct dnode_of_data *dn)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct page *page;
int err;

@@ -930,16 +931,25 @@ static int truncate_dnode(struct dnode_of_data *dn)
return 1;

/* get direct node */
- page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ page = f2fs_get_node_page(sbi, dn->nid);
if (PTR_ERR(page) == -ENOENT)
return 1;
else if (IS_ERR(page))
return PTR_ERR(page);

+ if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) {
+ f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
+ dn->inode->i_ino, dn->nid, ino_of_node(page));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
+ f2fs_put_page(page, 1);
+ return -EFSCORRUPTED;
+ }
+
/* Make dnode_of_data for parameter */
dn->node_page = page;
dn->ofs_in_node = 0;
- f2fs_truncate_data_blocks(dn);
+ f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
err = truncate_node(dn);
if (err) {
f2fs_put_page(page, 1);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b6dad389fa14..ff47aad636e5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1347,6 +1347,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}

+ if ((f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb)) &&
+ test_opt(sbi, FLUSH_MERGE)) {
+ f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode");
+ return -EINVAL;
+ }
+
if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_err(sbi, "Allow to mount readonly mode only");
return -EROFS;
@@ -1933,8 +1939,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",inline_dentry");
else
seq_puts(seq, ",noinline_dentry");
- if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
+ if (test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
+ else
+ seq_puts(seq, ",noflush_merge");
if (test_opt(sbi, NOBARRIER))
seq_puts(seq, ",nobarrier");
if (test_opt(sbi, FASTBOOT))
@@ -2032,9 +2040,22 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
return 0;
}

-static void default_options(struct f2fs_sb_info *sbi)
+static void default_options(struct f2fs_sb_info *sbi, bool remount)
{
/* init some FS parameters */
+ if (!remount) {
+ set_opt(sbi, READ_EXTENT_CACHE);
+ clear_opt(sbi, DISABLE_CHECKPOINT);
+
+ if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
+ set_opt(sbi, DISCARD);
+
+ if (f2fs_sb_has_blkzoned(sbi))
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
+ else
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
+ }
+
if (f2fs_sb_has_readonly(sbi))
F2FS_OPTION(sbi).active_logs = NR_CURSEG_RO_TYPE;
else
@@ -2057,22 +2078,16 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
- set_opt(sbi, READ_EXTENT_CACHE);
set_opt(sbi, NOHEAP);
- clear_opt(sbi, DISABLE_CHECKPOINT);
set_opt(sbi, MERGE_CHECKPOINT);
F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
- set_opt(sbi, FLUSH_MERGE);
- if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
- set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi)) {
+ if (!f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb))
+ set_opt(sbi, FLUSH_MERGE);
+ if (f2fs_sb_has_blkzoned(sbi))
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
- F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
- } else {
+ else
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
- F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
- }

#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -2244,7 +2259,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
}

- default_options(sbi);
+ default_options(sbi, true);

/* parse mount options */
err = parse_options(sb, data, true);
@@ -4141,7 +4156,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));

- default_options(sbi);
+ default_options(sbi, false);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
if (data && !options) {
diff --git a/fs/file.c b/fs/file.c
index 35c62b54c9d6..dbca26ef7a01 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1036,12 +1036,28 @@ unsigned long __fdget_raw(unsigned int fd)
return __fget_light(fd, 0);
}

+/*
+ * Try to avoid f_pos locking. We only need it if the
+ * file is marked for FMODE_ATOMIC_POS, and it can be
+ * accessed multiple ways.
+ *
+ * Always do it for directories, because pidfd_getfd()
+ * can make a file accessible even if it otherwise would
+ * not be, and for directories this is a correctness
+ * issue, not a "POSIX requirement".
+ */
+static inline bool file_needs_f_pos_lock(struct file *file)
+{
+ return (file->f_mode & FMODE_ATOMIC_POS) &&
+ (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode));
+}
+
unsigned long __fdget_pos(unsigned int fd)
{
unsigned long v = __fdget(fd);
struct file *file = (struct file *)(v & ~3);

- if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
+ if (file && file_needs_f_pos_lock(file)) {
v |= FDPUT_POS_UNLOCK;
mutex_lock(&file->f_pos_lock);
}
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
index c0c6bcbc8c05..81c22df27c72 100644
--- a/fs/ntfs3/attrlist.c
+++ b/fs/ntfs3/attrlist.c
@@ -52,7 +52,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)

if (!attr->non_res) {
lsize = le32_to_cpu(attr->res.data_size);
- le = kmalloc(al_aligned(lsize), GFP_NOFS);
+ le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
if (!le) {
err = -ENOMEM;
goto out;
@@ -80,7 +80,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
if (err < 0)
goto out;

- le = kmalloc(al_aligned(lsize), GFP_NOFS);
+ le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
if (!le) {
err = -ENOMEM;
goto out;
diff --git a/fs/open.c b/fs/open.c
index 9541430ec5b3..51dc46620d03 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1233,7 +1233,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
lookup_flags |= LOOKUP_IN_ROOT;
if (how->resolve & RESOLVE_CACHED) {
/* Don't bother even trying for create/truncate/tmpfile open */
- if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
+ if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
return -EAGAIN;
lookup_flags |= LOOKUP_CACHED;
}
diff --git a/fs/super.c b/fs/super.c
index 7c140ee60c54..d138332e57a9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -904,6 +904,7 @@ int reconfigure_super(struct fs_context *fc)
struct super_block *sb = fc->root->d_sb;
int retval;
bool remount_ro = false;
+ bool remount_rw = false;
bool force = fc->sb_flags & SB_FORCE;

if (fc->sb_flags_mask & ~MS_RMT_MASK)
@@ -921,7 +922,7 @@ int reconfigure_super(struct fs_context *fc)
bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
-
+ remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
}

@@ -951,6 +952,14 @@ int reconfigure_super(struct fs_context *fc)
if (retval)
return retval;
}
+ } else if (remount_rw) {
+ /*
+ * We set s_readonly_remount here to protect filesystem's
+ * reconfigure code from writes from userspace until
+ * reconfigure finishes.
+ */
+ sb->s_readonly_remount = 1;
+ smp_wmb();
}

if (fc->ops->reconfigure) {
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 3b8567564e7e..9925cfe57159 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode,
*/
parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key);
bh = sb_getblk(inode->i_sb, parent);
+ if (!bh) {
+ sysv_free_block(inode->i_sb, branch[n].key);
+ break;
+ }
lock_buffer(bh);
memset(bh->b_data, 0, blocksize);
branch[n].bh = bh;
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
index 20c93f08c993..95a1d214108a 100644
--- a/include/asm-generic/word-at-a-time.h
+++ b/include/asm-generic/word-at-a-time.h
@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask)
return (mask >> 8) ? byte : byte + 1;
}

-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
unsigned long rhs = val | c->low_bits;
*data = rhs;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index ee0d75d9a302..77055b239165 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -104,6 +104,7 @@ enum f2fs_error {
ERROR_INCONSISTENT_SIT,
ERROR_CORRUPTED_VERITY_XATTR,
ERROR_CORRUPTED_XATTR,
+ ERROR_INVALID_NODE_REFERENCE,
ERROR_MAX,
};

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 51857117ac09..c8ef3b881f03 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)

static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark &&
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+ u32 mark = READ_ONCE(sk->sk_mark);
+
+ if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;

- return sk->sk_mark;
+ return mark;
}

static inline int inet_request_bound_dev_if(const struct sock *sk,
diff --git a/include/net/ip.h b/include/net/ip.h
index 83a1a9bc3ceb..530e7257e438 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
{
ipcm_init(ipcm);

- ipcm->sockc.mark = inet->sk.sk_mark;
+ ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
diff --git a/include/net/route.h b/include/net/route.h
index fe00b0a2e475..af8431b25f80 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -171,7 +171,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
__be16 dport, __be16 sport,
__u8 proto, __u8 tos, int oif)
{
- flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
+ flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
RT_SCOPE_UNIVERSE, proto,
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
@@ -304,7 +304,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;

- flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
+ flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), protocol, flow_flags, dst,
src, dport, sport, sk->sk_uid);
}
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 03bcc1ef0d61..a46ec889acb7 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -548,12 +548,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
}

static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
- int hash,
+ u32 hash,
struct vxlan_rdst *rdst)
{
struct fib_nh_common *nhc;

- nhc = nexthop_path_fdb_result(nh, hash);
+ nhc = nexthop_path_fdb_result(nh, hash >> 1);
if (unlikely(!nhc))
return false;

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index f091153bc854..ed8e9deae284 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2349,12 +2349,21 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
return 0;
}

+static bool current_pending_io(void)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx)
+ return false;
+ return percpu_counter_read_positive(&tctx->inflight);
+}
+
/* when returns >0, the caller should retry */
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq,
ktime_t *timeout)
{
- int token, ret;
+ int io_wait, ret;
unsigned long check_cq;

/* make sure we run task_work before checking for signals */
@@ -2372,15 +2381,17 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
}

/*
- * Use io_schedule_prepare/finish, so cpufreq can take into account
- * that the task is waiting for IO - turns out to be important for low
- * QD IO.
+ * Mark us as being in io_wait if we have pending requests, so cpufreq
+ * can take into account that the task is waiting for IO - turns out
+ * to be important for low QD IO.
*/
- token = io_schedule_prepare();
+ io_wait = current->in_iowait;
+ if (current_pending_io())
+ current->in_iowait = 1;
ret = 1;
if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS))
ret = -ETIME;
- io_schedule_finish(token);
+ current->in_iowait = io_wait;
return ret;
}

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 4c6a5666541c..b0cf05ebcbcc 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -545,7 +545,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
goto add;
}

- tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+ tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
timeout->target_seq = tail + off;

/* Update the last seq here in case io_flush_timeouts() hasn't.
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 09141351d545..08a8e8102728 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -26,6 +26,7 @@
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/capability.h>
+#include <linux/completion.h>
#include <trace/events/xdp.h>
#include <linux/btf_ids.h>

@@ -71,6 +72,7 @@ struct bpf_cpu_map_entry {
struct rcu_head rcu;

struct work_struct kthread_stop_wq;
+ struct completion kthread_running;
};

struct bpf_cpu_map {
@@ -134,11 +136,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
* invoked cpu_map_kthread_stop(). Catch any broken behaviour
* gracefully and warn once.
*/
- struct xdp_frame *xdpf;
+ void *ptr;

- while ((xdpf = ptr_ring_consume(ring)))
- if (WARN_ON_ONCE(xdpf))
- xdp_return_frame(xdpf);
+ while ((ptr = ptr_ring_consume(ring))) {
+ WARN_ON_ONCE(1);
+ if (unlikely(__ptr_test_bit(0, &ptr))) {
+ __ptr_clear_bit(0, &ptr);
+ kfree_skb(ptr);
+ continue;
+ }
+ xdp_return_frame(ptr);
+ }
}

static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -158,7 +166,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
static void cpu_map_kthread_stop(struct work_struct *work)
{
struct bpf_cpu_map_entry *rcpu;
- int err;

rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);

@@ -168,14 +175,7 @@ static void cpu_map_kthread_stop(struct work_struct *work)
rcu_barrier();

/* kthread_stop will wake_up_process and wait for it to complete */
- err = kthread_stop(rcpu->kthread);
- if (err) {
- /* kthread_stop may be called before cpu_map_kthread_run
- * is executed, so we need to release the memory related
- * to rcpu.
- */
- put_cpu_map_entry(rcpu);
- }
+ kthread_stop(rcpu->kthread);
}

static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
@@ -303,11 +303,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
return nframes;
}

-
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;

+ complete(&rcpu->kthread_running);
set_current_state(TASK_INTERRUPTIBLE);

/* When kthread gives stop order, then rcpu have been disconnected
@@ -472,6 +472,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
goto free_ptr_ring;

/* Setup kthread */
+ init_completion(&rcpu->kthread_running);
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
"cpumap/%d/map:%d", cpu,
map->id);
@@ -485,6 +486,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
kthread_bind(rcpu->kthread, cpu);
wake_up_process(rcpu->kthread);

+ /* Make sure kthread has been running, so kthread_stop() will not
+ * stop the kthread prematurely and all pending frames or skbs
+ * will be handled by the kthread before kthread_stop() returns.
+ */
+ wait_for_completion(&rcpu->kthread_running);
+
return rcpu;

free_prog:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 934332b3eb54..db1065daabb6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1133,6 +1133,11 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
return 0;
}

+static int perf_mux_hrtimer_restart_ipi(void *arg)
+{
+ return perf_mux_hrtimer_restart(arg);
+}
+
void perf_pmu_disable(struct pmu *pmu)
{
int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -11155,8 +11160,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);

- cpu_function_call(cpu,
- (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
+ cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpuctx);
}
cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1642548892a8..ad04390883ad 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -662,8 +662,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
- struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
- int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
+ struct bpf_trace_sample_data *sds;
struct perf_raw_record raw = {
.frag = {
.size = size,
@@ -671,7 +670,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
},
};
struct perf_sample_data *sd;
- int err;
+ int nest_level, err;
+
+ preempt_disable();
+ sds = this_cpu_ptr(&bpf_trace_sds);
+ nest_level = this_cpu_inc_return(bpf_trace_nest_level);

if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
err = -EBUSY;
@@ -690,9 +693,9 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
sd->sample_flags |= PERF_SAMPLE_RAW;

err = __bpf_perf_event_output(regs, map, flags, sd);
-
out:
this_cpu_dec(bpf_trace_nest_level);
+ preempt_enable();
return err;
}

@@ -717,7 +720,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
- int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
@@ -734,8 +736,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
};
struct perf_sample_data *sd;
struct pt_regs *regs;
+ int nest_level;
u64 ret;

+ preempt_disable();
+ nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
+
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
ret = -EBUSY;
goto out;
@@ -751,6 +757,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
ret = __bpf_perf_event_output(regs, map, flags, sd);
out:
this_cpu_dec(bpf_event_output_nest_level);
+ preempt_enable();
return ret;
}

diff --git a/lib/Makefile b/lib/Makefile
index 59bd7c2f793a..5ffe72ec9979 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -81,8 +81,14 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_SCANF) += test_scanf.o
+
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
+ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy)
+# FIXME: Clang breaks test_bitmap_const_eval when KASAN and GCOV are enabled
+GCOV_PROFILE_test_bitmap.o := n
+endif
+
obj-$(CONFIG_TEST_UUID) += test_uuid.o
obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index c46736210363..dacb80c22c4f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -498,6 +498,15 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
const struct debug_obj_descr *descr = obj->descr;
static int limit;

+ /*
+ * Don't report if lookup_object_or_alloc() by the current thread
+ * failed because lookup_object_or_alloc()/debug_objects_oom() by a
+ * concurrent thread turned off debug_objects_enabled and cleared
+ * the hash buckets.
+ */
+ if (!debug_objects_enabled)
+ return;
+
if (limit < 5 && descr != descr_test) {
void *hint = descr->debug_hint ?
descr->debug_hint(obj->object) : NULL;
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index a8005ad3bd58..37a9108c4f58 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -1149,6 +1149,10 @@ static void __init test_bitmap_print_buf(void)
}
}

+/*
+ * FIXME: Clang breaks compile-time evaluations when KASAN and GCOV are enabled.
+ * To workaround it, GCOV is force-disabled in Makefile for this configuration.
+ */
static void __init test_bitmap_const_eval(void)
{
DECLARE_BITMAP(bitmap, BITS_PER_LONG);
@@ -1174,11 +1178,7 @@ static void __init test_bitmap_const_eval(void)
* the compiler is fixed.
*/
bitmap_clear(bitmap, 0, BITS_PER_LONG);
-#if defined(__s390__) && defined(__clang__)
- if (!const_test_bit(7, bitmap))
-#else
if (!test_bit(7, bitmap))
-#endif
bitmap_set(bitmap, 5, 2);

/* Equals to `unsigned long bitopvar = BIT(20)` */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3e8f1ad0fe9d..67b6d8238b3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3165,12 +3165,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
* accumulating over a page of vmstat data or when pgdat or idx
* changes.
*/
- if (stock->cached_objcg != objcg) {
+ if (READ_ONCE(stock->cached_objcg) != objcg) {
old = drain_obj_stock(stock);
obj_cgroup_get(objcg);
stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
- stock->cached_objcg = objcg;
+ WRITE_ONCE(stock->cached_objcg, objcg);
stock->cached_pgdat = pgdat;
} else if (stock->cached_pgdat != pgdat) {
/* Flush the existing cached vmstat data */
@@ -3224,7 +3224,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
local_lock_irqsave(&memcg_stock.stock_lock, flags);

stock = this_cpu_ptr(&memcg_stock);
- if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
+ if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
stock->nr_bytes -= nr_bytes;
ret = true;
}
@@ -3236,7 +3236,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)

static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
{
- struct obj_cgroup *old = stock->cached_objcg;
+ struct obj_cgroup *old = READ_ONCE(stock->cached_objcg);

if (!old)
return NULL;
@@ -3289,7 +3289,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
stock->cached_pgdat = NULL;
}

- stock->cached_objcg = NULL;
+ WRITE_ONCE(stock->cached_objcg, NULL);
/*
* The `old' objects needs to be released by the caller via
* obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
@@ -3300,10 +3300,11 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
struct mem_cgroup *root_memcg)
{
+ struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg);
struct mem_cgroup *memcg;

- if (stock->cached_objcg) {
- memcg = obj_cgroup_memcg(stock->cached_objcg);
+ if (objcg) {
+ memcg = obj_cgroup_memcg(objcg);
if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
return true;
}
@@ -3322,10 +3323,10 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
local_lock_irqsave(&memcg_stock.stock_lock, flags);

stock = this_cpu_ptr(&memcg_stock);
- if (stock->cached_objcg != objcg) { /* reset if necessary */
+ if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
old = drain_obj_stock(stock);
obj_cgroup_get(objcg);
- stock->cached_objcg = objcg;
+ WRITE_ONCE(stock->cached_objcg, objcg);
stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
allow_uncharge = true; /* Allow uncharge when objcg changes */
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index eebe256104bc..947ca580bb9a 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -46,6 +46,7 @@ static const struct proto_ops l2cap_sock_ops;
static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
int proto, gfp_t prio, int kern);
+static void l2cap_sock_cleanup_listen(struct sock *parent);

bool l2cap_is_socket(struct socket *sock)
{
@@ -1415,6 +1416,7 @@ static int l2cap_sock_release(struct socket *sock)
if (!sk)
return 0;

+ l2cap_sock_cleanup_listen(sk);
bt_sock_unlink(&l2cap_sk_list, sk);

err = l2cap_sock_shutdown(sock, SHUT_RDWR);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4e4f1e4bc265..c22bb06b450e 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
int ret;

dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
+ ret = wait_for_completion_killable(&lreq->reg_commit_wait);
return ret ?: lreq->reg_commit_error;
}

-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
+static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
+ unsigned long timeout)
{
- int ret;
+ long left;

dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
- return ret ?: lreq->notify_finish_error;
+ left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
+ ceph_timeout_jiffies(timeout));
+ if (left <= 0)
+ left = left ?: -ETIMEDOUT;
+ else
+ left = lreq->notify_finish_error; /* completed */
+
+ return left;
}

/*
@@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (!ret)
- ret = linger_notify_finish_wait(lreq);
+ ret = linger_notify_finish_wait(lreq,
+ msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
else
dout("lreq %p failed to initiate notify %d\n", lreq, ret);

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94374d529ea4..ad01b1bea52e 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -531,8 +531,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
return ERR_PTR(-EPERM);

nla_for_each_nested(nla, nla_stgs, rem) {
- if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
+ if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
+ if (nla_len(nla) != sizeof(u32))
+ return ERR_PTR(-EINVAL);
nr_maps++;
+ }
}

diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5625ed30a06f..2758b3f7c021 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -5030,13 +5030,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;

have_flags = true;
flags = nla_get_u16(attr);
- break;
+ }
+
+ if (nla_type(attr) == IFLA_BRIDGE_MODE) {
+ if (nla_len(attr) < sizeof(u16))
+ return -EINVAL;
}
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0c1baa5517f1..3b5304f084ef 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -800,7 +800,7 @@ EXPORT_SYMBOL(sock_no_linger);
void sock_set_priority(struct sock *sk, u32 priority)
{
lock_sock(sk);
- sk->sk_priority = priority;
+ WRITE_ONCE(sk->sk_priority, priority);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_priority);
@@ -977,7 +977,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
static void __sock_set_mark(struct sock *sk, u32 val)
{
if (val != sk->sk_mark) {
- sk->sk_mark = val;
+ WRITE_ONCE(sk->sk_mark, val);
sk_dst_reset(sk);
}
}
@@ -996,7 +996,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes)
bytes = round_down(bytes, PAGE_SIZE);

WARN_ON(bytes > sk->sk_reserved_mem);
- sk->sk_reserved_mem -= bytes;
+ WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
sk_mem_reclaim(sk);
}

@@ -1033,7 +1033,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
}
sk->sk_forward_alloc += pages << PAGE_SHIFT;

- sk->sk_reserved_mem += pages << PAGE_SHIFT;
+ WRITE_ONCE(sk->sk_reserved_mem,
+ sk->sk_reserved_mem + (pages << PAGE_SHIFT));

return 0;
}
@@ -1202,7 +1203,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
if ((val >= 0 && val <= 6) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- sk->sk_priority = val;
+ WRITE_ONCE(sk->sk_priority, val);
else
ret = -EPERM;
break;
@@ -1425,7 +1426,8 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE,
SK_PACING_NEEDED);
- sk->sk_max_pacing_rate = ulval;
+ /* Pairs with READ_ONCE() from sk_getsockopt() */
+ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
break;
}
@@ -1520,7 +1522,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
}
if ((u8)val == SOCK_TXREHASH_DEFAULT)
val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
- /* Paired with READ_ONCE() in tcp_rtx_synack() */
+ /* Paired with READ_ONCE() in tcp_rtx_synack()
+ * and sk_getsockopt().
+ */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;

@@ -1620,11 +1624,11 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;

case SO_SNDBUF:
- v.val = sk->sk_sndbuf;
+ v.val = READ_ONCE(sk->sk_sndbuf);
break;

case SO_RCVBUF:
- v.val = sk->sk_rcvbuf;
+ v.val = READ_ONCE(sk->sk_rcvbuf);
break;

case SO_REUSEADDR:
@@ -1666,7 +1670,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;

case SO_PRIORITY:
- v.val = sk->sk_priority;
+ v.val = READ_ONCE(sk->sk_priority);
break;

case SO_LINGER:
@@ -1713,7 +1717,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;

case SO_RCVLOWAT:
- v.val = sk->sk_rcvlowat;
+ v.val = READ_ONCE(sk->sk_rcvlowat);
break;

case SO_SNDLOWAT:
@@ -1792,7 +1796,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);

case SO_MARK:
- v.val = sk->sk_mark;
+ v.val = READ_ONCE(sk->sk_mark);
break;

case SO_RCVMARK:
@@ -1811,7 +1815,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
if (!sock->ops->set_peek_off)
return -EOPNOTSUPP;

- v.val = sk->sk_peek_off;
+ v.val = READ_ONCE(sk->sk_peek_off);
break;
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
@@ -1841,7 +1845,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,

#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
- v.val = sk->sk_ll_usec;
+ v.val = READ_ONCE(sk->sk_ll_usec);
break;
case SO_PREFER_BUSY_POLL:
v.val = READ_ONCE(sk->sk_prefer_busy_poll);
@@ -1849,12 +1853,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
#endif

case SO_MAX_PACING_RATE:
+ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
lv = sizeof(v.ulval);
- v.ulval = sk->sk_max_pacing_rate;
+ v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
} else {
/* 32bit version */
- v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
+ v.val = min_t(unsigned long, ~0U,
+ READ_ONCE(sk->sk_max_pacing_rate));
}
break;

@@ -1922,11 +1928,12 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;

case SO_RESERVE_MEM:
- v.val = sk->sk_reserved_mem;
+ v.val = READ_ONCE(sk->sk_reserved_mem);
break;

case SO_TXREHASH:
- v.val = sk->sk_txrehash;
+ /* Paired with WRITE_ONCE() in sk_setsockopt() */
+ v.val = READ_ONCE(sk->sk_txrehash);
break;

default:
@@ -3112,7 +3119,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);

int sk_set_peek_off(struct sock *sk, int val)
{
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d38267201892..c84e5073c0b6 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -117,7 +117,6 @@ static void sock_map_sk_acquire(struct sock *sk)
__acquires(&sk->sk_lock.slock)
{
lock_sock(sk);
- preempt_disable();
rcu_read_lock();
}

@@ -125,7 +124,6 @@ static void sock_map_sk_release(struct sock *sk)
__releases(&sk->sk_lock.slock)
{
rcu_read_unlock();
- preempt_enable();
release_sock(sk);
}

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index dc4fb699b56c..d2981e89d363 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -946,7 +946,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;

ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
+ tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
NULL);
if (ret)
return ret;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c0fd8f5f3b94..b51ce6f8ceba 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -237,8 +237,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
- sk->sk_priority);
+ err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
+ np->tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b812eb36f0e3..f7426926a104 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
#endif

- if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
goto errout;

if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
@@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
if (sk_fullsock(sk))
- entry.mark = sk->sk_mark;
+ entry.mark = READ_ONCE(sk->sk_mark);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
else if (sk->sk_state == TCP_TIME_WAIT)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7b4ab545c06e..acfe58d2f1dd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -182,9 +182,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
ip_options_build(skb, &opt->opt, daddr, rt);
}

- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
if (!skb->mark)
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);

/* Send it out. */
return ip_local_out(net, skb->sk, skb);
@@ -526,8 +526,8 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
skb_shinfo(skb)->gso_segs ?: 1);

/* TODO : should we use skb->sk here instead of sk ? */
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);

res = ip_local_out(net, sk, skb);
rcu_read_unlock();
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a7fd035b5b4f..63aa52becd88 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -591,7 +591,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
}
if (inet_sk(sk)->tos != val) {
inet_sk(sk)->tos = val;
- sk->sk_priority = rt_tos2priority(val);
+ WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 86197634dcf5..639aa5abda9d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -346,7 +346,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
goto error;
skb_reserve(skb, hlen);

- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cd1fa9f70f1a..51bd9a50a1d1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct inet_sock *inet = inet_sk(sk);

oif = sk->sk_bound_dev_if;
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
@@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
- flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9a8d59e9303a..08921b96f972 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -931,9 +931,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(ipv4_tcp_sk);
sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_mark : sk->sk_mark;
+ inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_priority : sk->sk_priority;
+ inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 82f4575f9cd9..99ac5efe244d 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {

struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
- possible_net_t tcpm_net;
+ struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
struct rcu_head rcu_head;
};

-static inline struct net *tm_net(struct tcp_metrics_block *tm)
+static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
- return read_pnet(&tm->tcpm_net);
+ /* Paired with the WRITE_ONCE() in tcpm_new() */
+ return READ_ONCE(tm->tcpm_net);
}

static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_lock & (1 << idx);
+ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
+ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}

-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_vals[idx];
+ /* Paired with WRITE_ONCE() in tcp_metric_set() */
+ return READ_ONCE(tm->tcpm_vals[idx]);
}

static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
- tm->tcpm_vals[idx] = val;
+ /* Paired with READ_ONCE() in tcp_metric_get() */
+ WRITE_ONCE(tm->tcpm_vals[idx], val);
}

static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
- return inetpeer_addr_cmp(a, b) == 0;
+ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}

struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;

static DEFINE_SPINLOCK(tcp_metrics_lock);
+static DEFINE_SEQLOCK(fastopen_seqlock);

static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
u32 msval;
u32 val;

- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);

val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
- tm->tcpm_lock = val;
+ /* Paired with READ_ONCE() in tcp_metric_locked() */
+ WRITE_ONCE(tm->tcpm_lock, val);

msval = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);

msval = dst_metric_raw(dst, RTAX_RTTVAR);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+ dst_metric_raw(dst, RTAX_SSTHRESH));
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ dst_metric_raw(dst, RTAX_CWND));
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) {
+ write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
+ write_sequnlock(&fastopen_seqlock);
}
}

#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)

-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_check_stamp(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst)
{
- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+ unsigned long limit;
+
+ if (!tm)
+ return;
+ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
+ if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false);
}

@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_locked(oldest->tcpm_next); tm;
tm = deref_locked(tm->tcpm_next)) {
- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
+ if (time_before(READ_ONCE(tm->tcpm_stamp),
+ READ_ONCE(oldest->tcpm_stamp)))
oldest = tm;
}
tm = oldest;
} else {
- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
+ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
goto out_unlock;
}
- write_pnet(&tm->tcpm_net, net);
+ /* Paired with the READ_ONCE() in tm_net() */
+ WRITE_ONCE(tm->tcpm_net, net);
+
tm->tcpm_saddr = *saddr;
tm->tcpm_daddr = *daddr;

- tcpm_suck_dst(tm, dst, true);
+ tcpm_suck_dst(tm, dst, reclaim);

if (likely(!reclaim)) {
tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
tp->reordering);
}
}
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock:
rcu_read_unlock();
}
@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
return ret;
}

-static DEFINE_SEQLOCK(fastopen_seqlock);
-
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
}

if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
- jiffies - tm->tcpm_stamp,
+ jiffies - READ_ONCE(tm->tcpm_stamp),
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;

@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
if (!nest)
goto nla_put_failure;
for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
- u32 val = tm->tcpm_vals[i];
+ u32 val = tcp_metric_get(tm, i);

if (!val)
continue;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index facdc78a43e5..27fb5479988a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
And all this only to mangle msg->im6_msgtype and
to set msg->im6_mbz to "mbz" :-)
*/
- skb_push(skb, -skb_network_offset(pkt));
+ __skb_pull(skb, skb_network_offset(pkt));

skb_push(skb, sizeof(*msg));
skb_reset_transport_header(skb);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 4651aaf70db4..4d5a27dd9a4b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

ipcm6_init_sk(&ipc6, np);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);

fl6.flowi6_oif = oif;

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 33852fc38ad9..df3abd9e5237 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -612,7 +612,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);

skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;

@@ -772,12 +772,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
*/
memset(&fl6, 0, sizeof(fl6));

- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;

ipcm6_init(&ipc6);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = fl6.flowi6_mark;

if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0b060cb8681f..960ab43a49c4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2952,7 +2952,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
if (!oif && skb->dev)
oif = l3mdev_master_ifindex(skb->dev);

- ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
+ sk->sk_uid);

dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -3173,8 +3174,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)

void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
- sk->sk_uid);
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d9253aa764fa..4bdd356bb5c4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -567,8 +567,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
- tclass, sk->sk_priority);
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
+ opt, tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -943,7 +943,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
}
if (txhash) {
@@ -1132,7 +1132,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ READ_ONCE(sk->sk_priority),
READ_ONCE(tcp_rsk(req)->txhash));
}

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 04f1d696503c..27348172b25b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -622,7 +622,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == NDISC_REDIRECT) {
if (tunnel) {
ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
- sk->sk_mark, sk->sk_uid);
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
} else {
ip6_sk_redirect(skb, sk);
}
@@ -1350,7 +1350,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);

/* destination address check */
if (sin6) {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 5137ea1861ce..bce4132b0a5c 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Get and verify the address */
memset(&fl6, 0, sizeof(fl6));

- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;

ipcm6_init(&ipc6);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 696ba398d699..937bd4c55615 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -102,7 +102,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
break;
case SO_MARK:
if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
- ssk->sk_mark = sk->sk_mark;
+ WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
sk_dst_reset(ssk);
}
break;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 49a5348a6a14..777561b71fcb 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
break;
case NFT_SOCKET_MARK:
if (sk_fullsock(sk)) {
- *dest = sk->sk_mark;
+ *dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
return;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 7013f55f05d1..76e01f292aaf 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,

if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);

if (sk != skb->sk)
sock_gen_put(sk);
@@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)

if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);

if (sk != skb->sk)
sock_gen_put(sk);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6ab9d5b54338..168106840073 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2052,8 +2052,8 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,

skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;

skb_setup_tx_timestamp(skb, sockc.tsflags);
@@ -2575,8 +2575,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,

skb->protocol = proto;
skb->dev = dev;
- skb->priority = po->sk.sk_priority;
- skb->mark = po->sk.sk_mark;
+ skb->priority = READ_ONCE(po->sk.sk_priority);
+ skb->mark = READ_ONCE(po->sk.sk_mark);
skb->tstamp = sockc->transmit_time;
skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
@@ -2978,7 +2978,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_unlock;

sockcm_init(&sockc, sk);
- sockc.mark = sk->sk_mark;
+ sockc.mark = READ_ONCE(sk->sk_mark);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err))
@@ -3052,7 +3052,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)

skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;

diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 1212b057b129..6160ef7d646a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -265,7 +265,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;

fnew->id = f->id;
- fnew->res = f->res;
fnew->ifindex = f->ifindex;
fnew->tp = f->tp;

diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 9e43b929d4ca..306188bf2d1f 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -511,7 +511,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
f->id = fold->id;
f->iif = fold->iif;
- f->res = fold->res;
f->handle = fold->handle;

f->tp = fold->tp;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 1280736a7b92..ba93e2a6bdbb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -824,7 +824,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,

new->ifindex = n->ifindex;
new->fshift = n->fshift;
- new->res = n->res;
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);

@@ -1022,18 +1021,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}

+ /* At this point, we need to derive the new handle that will be used to
+ * uniquely map the identity of this table match entry. The
+ * identity of the entry that we need to construct is 32 bits made of:
+ * htid(12b):bucketid(8b):node/entryid(12b)
+ *
+ * At this point _we have the table(ht)_ in which we will insert this
+ * entry. We carry the table's id in variable "htid".
+ * Note that earlier code picked the ht selection either by a) the user
+ * providing the htid specified via TCA_U32_HASH attribute or b) when
+ * no such attribute is passed then the root ht, is default to at ID
+ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
+ * If OTOH the user passed us the htid, they may also pass a bucketid of
+ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
+ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
+ * passed via the htid, so even if it was non-zero it will be ignored.
+ *
+ * We may also have a handle, if the user passed one. The handle also
+ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
+ * Rule: the bucketid on the handle is ignored even if one was passed;
+ * rather the value on "htid" is always assumed to be the bucketid.
+ */
if (handle) {
+ /* Rule: The htid from handle and tableid from htid must match */
if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
return -EINVAL;
}
- handle = htid | TC_U32_NODE(handle);
- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
- GFP_KERNEL);
- if (err)
- return err;
- } else
+ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
+ * need to finalize the table entry identification with the last
+ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
+ * entries. Rule: nodeid of 0 is reserved only for tables(see
+ * earlier code which processes TC_U32_DIVISOR attribute).
+ * Rule: The nodeid can only be derived from the handle (and not
+ * htid).
+ * Rule: if the handle specified zero for the node id example
+ * 0x60000000, then pick a new nodeid from the pool of IDs
+ * this hash table has been allocating from.
+ * If OTOH it is specified (i.e for example the user passed a
+ * handle such as 0x60000123), then we use it generate our final
+ * handle which is used to uniquely identify the match entry.
+ */
+ if (!TC_U32_NODE(handle)) {
+ handle = gen_new_kid(ht, htid);
+ } else {
+ handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
+ handle, GFP_KERNEL);
+ if (err)
+ return err;
+ }
+ } else {
+ /* The user did not give us a handle; lets just generate one
+ * from the table's pool of nodeids.
+ */
handle = gen_new_kid(ht, htid);
+ }

if (tb[TCA_U32_SEL] == NULL) {
NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index a274a9332f33..8d5eebb2dd1b 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -769,6 +769,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
[TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
};

+static struct netlink_range_validation_signed taprio_cycle_time_range = {
+ .min = 0,
+ .max = INT_MAX,
+};
+
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
@@ -777,7 +782,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
- [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
+ NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
@@ -913,6 +919,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
return -EINVAL;
}

+ if (cycle < 0 || cycle > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}

@@ -1110,7 +1121,7 @@ static void setup_txtime(struct taprio_sched *q,
struct sched_gate_list *sched, ktime_t base)
{
struct sched_entry *entry;
- u32 interval = 0;
+ u64 interval = 0;

list_for_each_entry(entry, &sched->entries, list) {
entry->next_txtime = ktime_add_ns(base, interval);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 02d1daae7739..5ae0a54a823b 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -447,7 +447,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
nsk->sk_rcvbuf = osk->sk_rcvbuf;
nsk->sk_sndtimeo = osk->sk_sndtimeo;
nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
- nsk->sk_mark = osk->sk_mark;
+ nsk->sk_mark = READ_ONCE(osk->sk_mark);
nsk->sk_priority = osk->sk_priority;
nsk->sk_rcvlowat = osk->sk_rcvlowat;
nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5b19b6c53a2c..78fa620a6398 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -779,7 +779,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
if (mutex_lock_interruptible(&u->iolock))
return -EINTR;

- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
mutex_unlock(&u->iolock);

return 0;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index efe9283e9893..e5c1510c098f 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -643,7 +643,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,

ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
if (ret)
- return ret;
+ return 0;

/* RNR IE may contain more than one NEIGHBOR_AP_INFO */
while (pos + sizeof(*ap_info) <= end) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 371d269d22fa..22bf10ffbf2d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -504,7 +504,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,

skb->dev = dev;
skb->priority = xs->sk.sk_priority;
- skb->mark = xs->sk.sk_mark;
+ skb->mark = READ_ONCE(xs->sk.sk_mark);
skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
skb->destructor = xsk_destruct_skb;

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7b1b93584bdb..e65de78cb61b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2174,7 +2174,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,

match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
- if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+ if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
pol->if_id != if_id) {
pol = NULL;
goto out;
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index c48bc284214a..fdb4e11df3bd 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -9,5 +9,6 @@
#include <linux/slab.h>

/* `bindgen` gets confused at certain things. */
+const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
index 397a3dd57a9b..9363b527be66 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/allocator.rs
@@ -9,6 +9,36 @@ use crate::bindings;

struct KernelAllocator;

+/// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment.
+///
+/// # Safety
+///
+/// - `ptr` can be either null or a pointer which has been allocated by this allocator.
+/// - `new_layout` must have a non-zero size.
+unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 {
+ // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
+ let layout = new_layout.pad_to_align();
+
+ let mut size = layout.size();
+
+ if layout.align() > bindings::BINDINGS_ARCH_SLAB_MINALIGN {
+ // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
+ // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
+ // more information).
+ //
+ // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
+ // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
+ size = size.next_power_of_two();
+ }
+
+ // SAFETY:
+ // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
+ // function safety requirement.
+ // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
+ // according to the function safety requirement) or a result from `next_power_of_two()`.
+ unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 }
+}
+
unsafe impl GlobalAlloc for KernelAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
// `krealloc()` is used instead of `kmalloc()` because the latter is
@@ -30,10 +60,20 @@ static ALLOCATOR: KernelAllocator = KernelAllocator;
// to extract the object file that has them from the archive. For the moment,
// let's generate them ourselves instead.
//
+// Note: Although these are *safe* functions, they are called by the compiler
+// with parameters that obey the same `GlobalAlloc` function safety
+// requirements: size and align should form a valid layout, and size is
+// greater than 0.
+//
// Note that `#[no_mangle]` implies exported too, nowadays.
#[no_mangle]
-fn __rust_alloc(size: usize, _align: usize) -> *mut u8 {
- unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 }
+fn __rust_alloc(size: usize, align: usize) -> *mut u8 {
+ // SAFETY: See assumption above.
+ let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
+
+ // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
+ // than 0.
+ unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
}

#[no_mangle]
@@ -42,23 +82,27 @@ fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) {
}

#[no_mangle]
-fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 {
- unsafe {
- bindings::krealloc(
- ptr as *const core::ffi::c_void,
- new_size,
- bindings::GFP_KERNEL,
- ) as *mut u8
- }
+fn __rust_realloc(ptr: *mut u8, _old_size: usize, align: usize, new_size: usize) -> *mut u8 {
+ // SAFETY: See assumption above.
+ let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, align) };
+
+ // SAFETY: Per assumption above, `ptr` is allocated by `__rust_*` before, and the size of
+ // `new_layout` is greater than 0.
+ unsafe { krealloc_aligned(ptr, new_layout, bindings::GFP_KERNEL) }
}

#[no_mangle]
-fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 {
+fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8 {
+ // SAFETY: See assumption above.
+ let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
+
+ // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
+ // than 0.
unsafe {
- bindings::krealloc(
- core::ptr::null(),
- size,
+ krealloc_aligned(
+ ptr::null_mut(),
+ layout,
bindings::GFP_KERNEL | bindings::__GFP_ZERO,
- ) as *mut u8
+ )
}
}
diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
index 00d2e0e2e0c2..319f36ebb9a4 100644
--- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
@@ -4,6 +4,12 @@

set -e

+# skip if there's no gcc
+if ! [ -x "$(command -v gcc)" ]; then
+ echo "failed: no gcc compiler"
+ exit 2
+fi
+
temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)

cleanup()
@@ -11,7 +17,7 @@ cleanup()
trap - EXIT TERM INT
if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
echo "--- Cleaning up ---"
- perf probe -x ${temp_dir}/testfile -d foo
+ perf probe -x ${temp_dir}/testfile -d foo || true
rm -f "${temp_dir}/"*
rmdir "${temp_dir}"
fi
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 4177f9507bbe..b736a5169aad 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -32,9 +32,17 @@
#include "../kselftest.h"
#include "rseq.h"

-static const ptrdiff_t *libc_rseq_offset_p;
-static const unsigned int *libc_rseq_size_p;
-static const unsigned int *libc_rseq_flags_p;
+/*
+ * Define weak versions to play nice with binaries that are statically linked
+ * against a libc that doesn't support registering its own rseq.
+ */
+__weak ptrdiff_t __rseq_offset;
+__weak unsigned int __rseq_size;
+__weak unsigned int __rseq_flags;
+
+static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
+static const unsigned int *libc_rseq_size_p = &__rseq_size;
+static const unsigned int *libc_rseq_flags_p = &__rseq_flags;

/* Offset from the thread pointer to the rseq area. */
ptrdiff_t rseq_offset;
@@ -108,9 +116,17 @@ int rseq_unregister_current_thread(void)
static __attribute__((constructor))
void rseq_init(void)
{
- libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
- libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
- libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ /*
+ * If the libc's registered rseq size isn't already valid, it may be
+ * because the binary is dynamically linked and not necessarily due to
+ * libc not having registered a restartable sequence. Try to find the
+ * symbols if that's the case.
+ */
+ if (!*libc_rseq_size_p) {
+ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+ libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+ libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ }
if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
*libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index a44455372646..08d4861c2e78 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -131,5 +131,30 @@
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
]
+ },
+ {
+ "id": "3e1e",
+ "name": "Add taprio Qdisc with an invalid cycle-time",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true",
+ "$IP link set dev $ETH up",
+ "$IP addr add 10.10.10.10/24 dev $ETH"
+ ],
+ "cmdUnderTest": "/bin/true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
}
]